xref: /original-bsd/sys/ufs/ufs/ufs_lookup.c (revision 764d752b)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ufs_lookup.c	7.27 (Berkeley) 03/11/91
8  */
9 
10 #include "param.h"
11 #include "user.h"
12 #include "buf.h"
13 #include "file.h"
14 #include "vnode.h"
15 #include "../ufs/quota.h"
16 #include "../ufs/inode.h"
17 #include "../ufs/fs.h"
18 
19 struct	nchstats nchstats;
20 int	dirchk = 1;
21 
22 /*
23  * Convert a component of a pathname into a pointer to a locked inode.
24  * This is a very central and rather complicated routine.
25  * If the file system is not maintained in a strict tree hierarchy,
26  * this can result in a deadlock situation (see comments in code below).
27  *
28  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
29  * whether the name is to be looked up, created, renamed, or deleted.
30  * When CREATE, RENAME, or DELETE is specified, information usable in
31  * creating, renaming, or deleting a directory entry may be calculated.
32  * If flag has LOCKPARENT or'ed into it and the target of the pathname
33  * exists, lookup returns both the target and its parent directory locked.
34  * When creating or renaming and LOCKPARENT is specified, the target may
35  * not be ".".  When deleting and LOCKPARENT is specified, the target may
36  * be "."., but the caller must check to ensure it does an vrele and iput
37  * instead of two iputs.
38  *
39  * Overall outline of ufs_lookup:
40  *
41  *	check accessibility of directory
42  *	look for name in cache, if found, then if at end of path
43  *	  and deleting or creating, drop it, else return name
44  *	search for name in directory, to found or notfound
45  * notfound:
46  *	if creating, return locked directory, leaving info on available slots
47  *	else return error
48  * found:
49  *	if at end of path and deleting, return information to allow delete
50  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
51  *	  inode and return info to allow rewrite
52  *	if not at end, add name to cache; if at end and neither creating
53  *	  nor deleting, add name to cache
54  *
55  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
56  */
57 ufs_lookup(vdp, ndp)
58 	register struct vnode *vdp;
59 	register struct nameidata *ndp;
60 {
61 	register struct inode *dp;	/* the directory we are searching */
62 	register struct fs *fs;		/* file system that directory is in */
63 	struct buf *bp = 0;		/* a buffer of directory entries */
64 	register struct direct *ep;	/* the current directory entry */
65 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
66 	enum {NONE, COMPACT, FOUND} slotstatus;
67 	int slotoffset = -1;		/* offset of area with free space */
68 	int slotsize;			/* size of area at slotoffset */
69 	int slotfreespace;		/* amount of space free in slot */
70 	int slotneeded;			/* size of the entry we're seeking */
71 	int numdirpasses;		/* strategy for directory search */
72 	int endsearch;			/* offset to end directory search */
73 	int prevoff;			/* ndp->ni_offset of previous entry */
74 	struct inode *pdp;		/* saved dp during symlink work */
75 	struct inode *tdp;		/* returned by iget */
76 	off_t enduseful;		/* pointer past last used dir slot */
77 	int flag;			/* LOOKUP, CREATE, RENAME, or DELETE */
78 	int lockparent;			/* 1 => lockparent flag is set */
79 	int wantparent;			/* 1 => wantparent or lockparent flag */
80 	int error;
81 
82 	ndp->ni_dvp = vdp;
83 	ndp->ni_vp = NULL;
84 	dp = VTOI(vdp);
85 	fs = dp->i_fs;
86 	lockparent = ndp->ni_nameiop & LOCKPARENT;
87 	flag = ndp->ni_nameiop & OPMASK;
88 	wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT);
89 
90 	/*
91 	 * Check accessiblity of directory.
92 	 */
93 	if ((dp->i_mode&IFMT) != IFDIR)
94 		return (ENOTDIR);
95 	if (error = ufs_access(vdp, VEXEC, ndp->ni_cred))
96 		return (error);
97 
98 	/*
99 	 * We now have a segment name to search for, and a directory to search.
100 	 *
101 	 * Before tediously performing a linear scan of the directory,
102 	 * check the name cache to see if the directory/name pair
103 	 * we are looking for is known already.
104 	 */
105 	if (error = cache_lookup(ndp)) {
106 		int vpid;	/* capability number of vnode */
107 
108 		if (error == ENOENT)
109 			return (error);
110 #ifdef PARANOID
111 		if (vdp == ndp->ni_rdir && ndp->ni_isdotdot)
112 			panic("ufs_lookup: .. through root");
113 #endif
114 		/*
115 		 * Get the next vnode in the path.
116 		 * See comment below starting `Step through' for
117 		 * an explaination of the locking protocol.
118 		 */
119 		pdp = dp;
120 		dp = VTOI(ndp->ni_vp);
121 		vdp = ndp->ni_vp;
122 		vpid = vdp->v_id;
123 		if (pdp == dp) {
124 			VREF(vdp);
125 			error = 0;
126 		} else if (ndp->ni_isdotdot) {
127 			IUNLOCK(pdp);
128 			error = vget(vdp);
129 			if (!error && lockparent && *ndp->ni_next == '\0')
130 				ILOCK(pdp);
131 		} else {
132 			error = vget(vdp);
133 			if (!lockparent || error || *ndp->ni_next != '\0')
134 				IUNLOCK(pdp);
135 		}
136 		/*
137 		 * Check that the capability number did not change
138 		 * while we were waiting for the lock.
139 		 */
140 		if (!error) {
141 			if (vpid == vdp->v_id)
142 				return (0);
143 			iput(dp);
144 			if (lockparent && pdp != dp && *ndp->ni_next == '\0')
145 				IUNLOCK(pdp);
146 		}
147 		ILOCK(pdp);
148 		dp = pdp;
149 		vdp = ITOV(dp);
150 		ndp->ni_vp = NULL;
151 	}
152 
153 	/*
154 	 * Suppress search for slots unless creating
155 	 * file and at end of pathname, in which case
156 	 * we watch for a place to put the new file in
157 	 * case it doesn't already exist.
158 	 */
159 	slotstatus = FOUND;
160 	if ((flag == CREATE || flag == RENAME) && *ndp->ni_next == 0) {
161 		slotstatus = NONE;
162 		slotfreespace = 0;
163 		slotneeded = DIRSIZ(&ndp->ni_dent);
164 	}
165 
166 	/*
167 	 * If there is cached information on a previous search of
168 	 * this directory, pick up where we last left off.
169 	 * We cache only lookups as these are the most common
170 	 * and have the greatest payoff. Caching CREATE has little
171 	 * benefit as it usually must search the entire directory
172 	 * to determine that the entry does not exist. Caching the
173 	 * location of the last DELETE or RENAME has not reduced
174 	 * profiling time and hence has been removed in the interest
175 	 * of simplicity.
176 	 */
177 	if (flag != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) {
178 		ndp->ni_offset = 0;
179 		numdirpasses = 1;
180 	} else {
181 		ndp->ni_offset = dp->i_diroff;
182 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
183 		if (entryoffsetinblock != 0) {
184 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
185 			if (error)
186 				return (error);
187 		}
188 		numdirpasses = 2;
189 		nchstats.ncs_2passes++;
190 	}
191 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
192 	enduseful = 0;
193 
194 searchloop:
195 	while (ndp->ni_offset < endsearch) {
196 		/*
197 		 * If offset is on a block boundary,
198 		 * read the next directory block.
199 		 * Release previous if it exists.
200 		 */
201 		if (blkoff(fs, ndp->ni_offset) == 0) {
202 			if (bp != NULL)
203 				brelse(bp);
204 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
205 			if (error)
206 				return (error);
207 			entryoffsetinblock = 0;
208 		}
209 		/*
210 		 * If still looking for a slot, and at a DIRBLKSIZE
211 		 * boundary, have to start looking for free space again.
212 		 */
213 		if (slotstatus == NONE &&
214 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
215 			slotoffset = -1;
216 			slotfreespace = 0;
217 		}
218 		/*
219 		 * Get pointer to next entry.
220 		 * Full validation checks are slow, so we only check
221 		 * enough to insure forward progress through the
222 		 * directory. Complete checks can be run by patching
223 		 * "dirchk" to be true.
224 		 */
225 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
226 		if (ep->d_reclen == 0 ||
227 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
228 			int i;
229 
230 			dirbad(dp, ndp->ni_offset, "mangled entry");
231 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
232 			ndp->ni_offset += i;
233 			entryoffsetinblock += i;
234 			continue;
235 		}
236 
237 		/*
238 		 * If an appropriate sized slot has not yet been found,
239 		 * check to see if one is available. Also accumulate space
240 		 * in the current block so that we can determine if
241 		 * compaction is viable.
242 		 */
243 		if (slotstatus != FOUND) {
244 			int size = ep->d_reclen;
245 
246 			if (ep->d_ino != 0)
247 				size -= DIRSIZ(ep);
248 			if (size > 0) {
249 				if (size >= slotneeded) {
250 					slotstatus = FOUND;
251 					slotoffset = ndp->ni_offset;
252 					slotsize = ep->d_reclen;
253 				} else if (slotstatus == NONE) {
254 					slotfreespace += size;
255 					if (slotoffset == -1)
256 						slotoffset = ndp->ni_offset;
257 					if (slotfreespace >= slotneeded) {
258 						slotstatus = COMPACT;
259 						slotsize = ndp->ni_offset +
260 						      ep->d_reclen - slotoffset;
261 					}
262 				}
263 			}
264 		}
265 
266 		/*
267 		 * Check for a name match.
268 		 */
269 		if (ep->d_ino) {
270 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
271 			    !bcmp(ndp->ni_ptr, ep->d_name,
272 				(unsigned)ep->d_namlen)) {
273 				/*
274 				 * Save directory entry's inode number and
275 				 * reclen in ndp->ni_dent, and release
276 				 * directory buffer.
277 				 */
278 				ndp->ni_dent.d_ino = ep->d_ino;
279 				ndp->ni_dent.d_reclen = ep->d_reclen;
280 				brelse(bp);
281 				goto found;
282 			}
283 		}
284 		prevoff = ndp->ni_offset;
285 		ndp->ni_offset += ep->d_reclen;
286 		entryoffsetinblock += ep->d_reclen;
287 		if (ep->d_ino)
288 			enduseful = ndp->ni_offset;
289 	}
290 /* notfound: */
291 	/*
292 	 * If we started in the middle of the directory and failed
293 	 * to find our target, we must check the beginning as well.
294 	 */
295 	if (numdirpasses == 2) {
296 		numdirpasses--;
297 		ndp->ni_offset = 0;
298 		endsearch = dp->i_diroff;
299 		goto searchloop;
300 	}
301 	if (bp != NULL)
302 		brelse(bp);
303 	/*
304 	 * If creating, and at end of pathname and current
305 	 * directory has not been removed, then can consider
306 	 * allowing file to be created.
307 	 */
308 	if ((flag == CREATE || flag == RENAME) &&
309 	    *ndp->ni_next == 0 && dp->i_nlink != 0) {
310 		/*
311 		 * Access for write is interpreted as allowing
312 		 * creation of files in the directory.
313 		 */
314 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
315 			return (error);
316 		/*
317 		 * Return an indication of where the new directory
318 		 * entry should be put.  If we didn't find a slot,
319 		 * then set ndp->ni_count to 0 indicating that the new
320 		 * slot belongs at the end of the directory. If we found
321 		 * a slot, then the new entry can be put in the range
322 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
323 		 */
324 		if (slotstatus == NONE) {
325 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
326 			ndp->ni_count = 0;
327 			enduseful = ndp->ni_offset;
328 		} else {
329 			ndp->ni_offset = slotoffset;
330 			ndp->ni_count = slotsize;
331 			if (enduseful < slotoffset + slotsize)
332 				enduseful = slotoffset + slotsize;
333 		}
334 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
335 		dp->i_flag |= IUPD|ICHG;
336 		/*
337 		 * We return with the directory locked, so that
338 		 * the parameters we set up above will still be
339 		 * valid if we actually decide to do a direnter().
340 		 * We return ni_vp == NULL to indicate that the entry
341 		 * does not currently exist; we leave a pointer to
342 		 * the (locked) directory inode in ndp->ni_dvp.
343 		 *
344 		 * NB - if the directory is unlocked, then this
345 		 * information cannot be used.
346 		 */
347 		if (!lockparent)
348 			IUNLOCK(dp);
349 	}
350 	/*
351 	 * Insert name into cache (as non-existent) if appropriate.
352 	 */
353 	if (ndp->ni_makeentry && flag != CREATE)
354 		cache_enter(ndp);
355 	return (ENOENT);
356 
357 found:
358 	if (numdirpasses == 2)
359 		nchstats.ncs_pass2++;
360 	/*
361 	 * Check that directory length properly reflects presence
362 	 * of this entry.
363 	 */
364 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
365 		dirbad(dp, ndp->ni_offset, "i_size too small");
366 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
367 		dp->i_flag |= IUPD|ICHG;
368 	}
369 
370 	/*
371 	 * Found component in pathname.
372 	 * If the final component of path name, save information
373 	 * in the cache as to where the entry was found.
374 	 */
375 	if (*ndp->ni_next == '\0' && flag == LOOKUP)
376 		dp->i_diroff = ndp->ni_offset &~ (DIRBLKSIZ - 1);
377 
378 	/*
379 	 * If deleting, and at end of pathname, return
380 	 * parameters which can be used to remove file.
381 	 * If the wantparent flag isn't set, we return only
382 	 * the directory (in ndp->ni_dvp), otherwise we go
383 	 * on and lock the inode, being careful with ".".
384 	 */
385 	if (flag == DELETE && *ndp->ni_next == 0) {
386 		/*
387 		 * Write access to directory required to delete files.
388 		 */
389 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
390 			return (error);
391 		/*
392 		 * Return pointer to current entry in ndp->ni_offset,
393 		 * and distance past previous entry (if there
394 		 * is a previous entry in this block) in ndp->ni_count.
395 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
396 		 */
397 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
398 			ndp->ni_count = 0;
399 		else
400 			ndp->ni_count = ndp->ni_offset - prevoff;
401 		if (dp->i_number == ndp->ni_dent.d_ino) {
402 			VREF(vdp);
403 			ndp->ni_vp = vdp;
404 			return (0);
405 		}
406 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
407 			return (error);
408 		/*
409 		 * If directory is "sticky", then user must own
410 		 * the directory, or the file in it, else she
411 		 * may not delete it (unless she's root). This
412 		 * implements append-only directories.
413 		 */
414 		if ((dp->i_mode & ISVTX) &&
415 		    ndp->ni_cred->cr_uid != 0 &&
416 		    ndp->ni_cred->cr_uid != dp->i_uid &&
417 		    tdp->i_uid != ndp->ni_cred->cr_uid) {
418 			iput(tdp);
419 			return (EPERM);
420 		}
421 		ndp->ni_vp = ITOV(tdp);
422 		if (!lockparent)
423 			IUNLOCK(dp);
424 		return (0);
425 	}
426 
427 	/*
428 	 * If rewriting (RENAME), return the inode and the
429 	 * information required to rewrite the present directory
430 	 * Must get inode of directory entry to verify it's a
431 	 * regular file, or empty directory.
432 	 */
433 	if (flag == RENAME && wantparent && *ndp->ni_next == 0) {
434 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
435 			return (error);
436 		/*
437 		 * Careful about locking second inode.
438 		 * This can only occur if the target is ".".
439 		 */
440 		if (dp->i_number == ndp->ni_dent.d_ino)
441 			return (EISDIR);
442 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
443 			return (error);
444 		ndp->ni_vp = ITOV(tdp);
445 		if (!lockparent)
446 			IUNLOCK(dp);
447 		return (0);
448 	}
449 
450 	/*
451 	 * Step through the translation in the name.  We do not `iput' the
452 	 * directory because we may need it again if a symbolic link
453 	 * is relative to the current directory.  Instead we save it
454 	 * unlocked as "pdp".  We must get the target inode before unlocking
455 	 * the directory to insure that the inode will not be removed
456 	 * before we get it.  We prevent deadlock by always fetching
457 	 * inodes from the root, moving down the directory tree. Thus
458 	 * when following backward pointers ".." we must unlock the
459 	 * parent directory before getting the requested directory.
460 	 * There is a potential race condition here if both the current
461 	 * and parent directories are removed before the `iget' for the
462 	 * inode associated with ".." returns.  We hope that this occurs
463 	 * infrequently since we cannot avoid this race condition without
464 	 * implementing a sophisticated deadlock detection algorithm.
465 	 * Note also that this simple deadlock detection scheme will not
466 	 * work if the file system has any hard links other than ".."
467 	 * that point backwards in the directory structure.
468 	 */
469 	pdp = dp;
470 	if (ndp->ni_isdotdot) {
471 		IUNLOCK(pdp);	/* race to get the inode */
472 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp)) {
473 			ILOCK(pdp);
474 			return (error);
475 		}
476 		if (lockparent && *ndp->ni_next == '\0')
477 			ILOCK(pdp);
478 		ndp->ni_vp = ITOV(tdp);
479 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
480 		VREF(vdp);	/* we want ourself, ie "." */
481 		ndp->ni_vp = vdp;
482 	} else {
483 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
484 			return (error);
485 		if (!lockparent || *ndp->ni_next != '\0')
486 			IUNLOCK(pdp);
487 		ndp->ni_vp = ITOV(tdp);
488 	}
489 
490 	/*
491 	 * Insert name into cache if appropriate.
492 	 */
493 	if (ndp->ni_makeentry)
494 		cache_enter(ndp);
495 	return (0);
496 }
497 
498 
499 dirbad(ip, offset, how)
500 	struct inode *ip;
501 	off_t offset;
502 	char *how;
503 {
504 
505 	printf("%s: bad dir ino %d at offset %d: %s\n",
506 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
507 	panic("bad dir");
508 }
509 
510 /*
511  * Do consistency checking on a directory entry:
512  *	record length must be multiple of 4
513  *	entry must fit in rest of its DIRBLKSIZ block
514  *	record must be large enough to contain entry
515  *	name is not longer than MAXNAMLEN
516  *	name must be as long as advertised, and null terminated
517  */
518 dirbadentry(ep, entryoffsetinblock)
519 	register struct direct *ep;
520 	int entryoffsetinblock;
521 {
522 	register int i;
523 
524 	if ((ep->d_reclen & 0x3) != 0 ||
525 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
526 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
527 		return (1);
528 	for (i = 0; i < ep->d_namlen; i++)
529 		if (ep->d_name[i] == '\0')
530 			return (1);
531 	return (ep->d_name[i]);
532 }
533 
534 /*
535  * Write a directory entry after a call to namei, using the parameters
536  * which it left in nameidata.  The argument ip is the inode which the
537  * new directory entry will refer to.  The nameidata field ndp->ni_dvp
538  * is a pointer to the directory to be written, which was left locked by
539  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
540  * how the space for the new entry is to be gotten.
541  */
542 direnter(ip, ndp)
543 	struct inode *ip;
544 	register struct nameidata *ndp;
545 {
546 	register struct direct *ep, *nep;
547 	register struct inode *dp = VTOI(ndp->ni_dvp);
548 	struct buf *bp;
549 	int loc, spacefree, error = 0;
550 	u_int dsize;
551 	int newentrysize;
552 	char *dirbuf;
553 
554 	ndp->ni_dent.d_ino = ip->i_number;
555 	newentrysize = DIRSIZ(&ndp->ni_dent);
556 	if (ndp->ni_count == 0) {
557 		/*
558 		 * If ndp->ni_count is 0, then namei could find no space in the
559 		 * directory. In this case ndp->ni_offset will be on a directory
560 		 * block boundary and we will write the new entry into a fresh
561 		 * block.
562 		 */
563 		if (ndp->ni_offset&(DIRBLKSIZ-1))
564 			panic("wdir: newblk");
565 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
566 		ndp->ni_count = newentrysize;
567 		ndp->ni_resid = newentrysize;
568 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
569 		ndp->ni_iov = &ndp->ni_nd.nd_iovec;
570 		ndp->ni_iovcnt = 1;
571 		ndp->ni_rw = UIO_WRITE;
572 		ndp->ni_uioseg = UIO_SYSSPACE;
573 		error =
574 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
575 		if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
576 			panic("wdir: blksize"); /* XXX - should grow w/balloc */
577 		} else {
578 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
579 			dp->i_flag |= ICHG;
580 		}
581 		return (error);
582 	}
583 
584 	/*
585 	 * If ndp->ni_count is non-zero, then namei found space for the new
586 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
587 	 * in the directory.  To use this space, we may have to compact
588 	 * the entries located there, by copying them together towards
589 	 * the beginning of the block, leaving the free space in
590 	 * one usable chunk at the end.
591 	 */
592 
593 	/*
594 	 * Increase size of directory if entry eats into new space.
595 	 * This should never push the size past a new multiple of
596 	 * DIRBLKSIZE.
597 	 *
598 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
599 	 */
600 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
601 		dp->i_size = ndp->ni_offset + ndp->ni_count;
602 	/*
603 	 * Get the block containing the space for the new directory entry.
604 	 */
605 	if (error = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf, &bp))
606 		return (error);
607 	/*
608 	 * Find space for the new entry.  In the simple case, the
609 	 * entry at offset base will have the space.  If it does
610 	 * not, then namei arranged that compacting the region
611 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
612 	 */
613 	ep = (struct direct *)dirbuf;
614 	dsize = DIRSIZ(ep);
615 	spacefree = ep->d_reclen - dsize;
616 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
617 		nep = (struct direct *)(dirbuf + loc);
618 		if (ep->d_ino) {
619 			/* trim the existing slot */
620 			ep->d_reclen = dsize;
621 			ep = (struct direct *)((char *)ep + dsize);
622 		} else {
623 			/* overwrite; nothing there; header is ours */
624 			spacefree += dsize;
625 		}
626 		dsize = DIRSIZ(nep);
627 		spacefree += nep->d_reclen - dsize;
628 		loc += nep->d_reclen;
629 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
630 	}
631 	/*
632 	 * Update the pointer fields in the previous entry (if any),
633 	 * copy in the new entry, and write out the block.
634 	 */
635 	if (ep->d_ino == 0) {
636 		if (spacefree + dsize < newentrysize)
637 			panic("wdir: compact1");
638 		ndp->ni_dent.d_reclen = spacefree + dsize;
639 	} else {
640 		if (spacefree < newentrysize)
641 			panic("wdir: compact2");
642 		ndp->ni_dent.d_reclen = spacefree;
643 		ep->d_reclen = dsize;
644 		ep = (struct direct *)((char *)ep + dsize);
645 	}
646 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
647 	error = bwrite(bp);
648 	dp->i_flag |= IUPD|ICHG;
649 	if (!error && ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
650 		error = itrunc(dp, (u_long)ndp->ni_endoff, IO_SYNC);
651 	return (error);
652 }
653 
654 /*
655  * Remove a directory entry after a call to namei, using
656  * the parameters which it left in nameidata. The entry
657  * ni_offset contains the offset into the directory of the
658  * entry to be eliminated.  The ni_count field contains the
659  * size of the previous record in the directory.  If this
660  * is 0, the first entry is being deleted, so we need only
661  * zero the inode number to mark the entry as free.  If the
662  * entry isn't the first in the directory, we must reclaim
663  * the space of the now empty record by adding the record size
664  * to the size of the previous entry.
665  */
666 dirremove(ndp)
667 	register struct nameidata *ndp;
668 {
669 	register struct inode *dp = VTOI(ndp->ni_dvp);
670 	struct direct *ep;
671 	struct buf *bp;
672 	int error;
673 
674 	if (ndp->ni_count == 0) {
675 		/*
676 		 * First entry in block: set d_ino to zero.
677 		 */
678 		ndp->ni_dent.d_ino = 0;
679 		ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
680 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
681 		ndp->ni_iov = &ndp->ni_nd.nd_iovec;
682 		ndp->ni_iovcnt = 1;
683 		ndp->ni_rw = UIO_WRITE;
684 		ndp->ni_uioseg = UIO_SYSSPACE;
685 		error =
686 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
687 	} else {
688 		/*
689 		 * Collapse new free space into previous entry.
690 		 */
691 		if (error = blkatoff(dp, ndp->ni_offset - ndp->ni_count,
692 		    (char **)&ep, &bp)) {
693 			return (error);
694 		}
695 		ep->d_reclen += ndp->ni_dent.d_reclen;
696 		error = bwrite(bp);
697 		dp->i_flag |= IUPD|ICHG;
698 	}
699 	return (error);
700 }
701 
702 /*
703  * Rewrite an existing directory entry to point at the inode
704  * supplied.  The parameters describing the directory entry are
705  * set up by a call to namei.
706  */
707 dirrewrite(dp, ip, ndp)
708 	struct inode *dp, *ip;
709 	struct nameidata *ndp;
710 {
711 
712 	ndp->ni_dent.d_ino = ip->i_number;
713 	ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
714 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
715 	ndp->ni_iov = &ndp->ni_nd.nd_iovec;
716 	ndp->ni_iovcnt = 1;
717 	ndp->ni_rw = UIO_WRITE;
718 	ndp->ni_uioseg = UIO_SYSSPACE;
719 	return (ufs_write(ITOV(dp), &ndp->ni_uio, IO_SYNC, ndp->ni_cred));
720 }
721 
722 /*
723  * Return buffer with contents of block "offset"
724  * from the beginning of directory "ip".  If "res"
725  * is non-zero, fill it in with a pointer to the
726  * remaining space in the directory.
727  */
728 blkatoff(ip, offset, res, bpp)
729 	struct inode *ip;
730 	off_t offset;
731 	char **res;
732 	struct buf **bpp;
733 {
734 	register struct fs *fs = ip->i_fs;
735 	daddr_t lbn = lblkno(fs, offset);
736 	int bsize = blksize(fs, ip, lbn);
737 	struct buf *bp;
738 	daddr_t bn;
739 	int error;
740 
741 	*bpp = 0;
742 	if (error = bread(ITOV(ip), lbn, bsize, NOCRED, &bp)) {
743 		brelse(bp);
744 		return (error);
745 	}
746 	if (res)
747 		*res = bp->b_un.b_addr + blkoff(fs, offset);
748 	*bpp = bp;
749 	return (0);
750 }
751 
752 /*
753  * Check if a directory is empty or not.
754  * Inode supplied must be locked.
755  *
756  * Using a struct dirtemplate here is not precisely
757  * what we want, but better than using a struct direct.
758  *
759  * NB: does not handle corrupted directories.
760  */
761 dirempty(ip, parentino, cred)
762 	register struct inode *ip;
763 	ino_t parentino;
764 	struct ucred *cred;
765 {
766 	register off_t off;
767 	struct dirtemplate dbuf;
768 	register struct direct *dp = (struct direct *)&dbuf;
769 	int error, count;
770 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
771 
772 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
773 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
774 		    off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count);
775 		/*
776 		 * Since we read MINDIRSIZ, residual must
777 		 * be 0 unless we're at end of file.
778 		 */
779 		if (error || count != 0)
780 			return (0);
781 		/* avoid infinite loops */
782 		if (dp->d_reclen == 0)
783 			return (0);
784 		/* skip empty entries */
785 		if (dp->d_ino == 0)
786 			continue;
787 		/* accept only "." and ".." */
788 		if (dp->d_namlen > 2)
789 			return (0);
790 		if (dp->d_name[0] != '.')
791 			return (0);
792 		/*
793 		 * At this point d_namlen must be 1 or 2.
794 		 * 1 implies ".", 2 implies ".." if second
795 		 * char is also "."
796 		 */
797 		if (dp->d_namlen == 1)
798 			continue;
799 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
800 			continue;
801 		return (0);
802 	}
803 	return (1);
804 }
805 
806 /*
807  * Check if source directory is in the path of the target directory.
808  * Target is supplied locked, source is unlocked.
809  * The target is always iput() before returning.
810  */
811 checkpath(source, target, cred)
812 	struct inode *source, *target;
813 	struct ucred *cred;
814 {
815 	struct dirtemplate dirbuf;
816 	struct inode *ip;
817 	int error = 0;
818 
819 	ip = target;
820 	if (ip->i_number == source->i_number) {
821 		error = EEXIST;
822 		goto out;
823 	}
824 	if (ip->i_number == ROOTINO)
825 		goto out;
826 
827 	for (;;) {
828 		if ((ip->i_mode&IFMT) != IFDIR) {
829 			error = ENOTDIR;
830 			break;
831 		}
832 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)&dirbuf,
833 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
834 			IO_NODELOCKED, cred, (int *)0);
835 		if (error != 0)
836 			break;
837 		if (dirbuf.dotdot_namlen != 2 ||
838 		    dirbuf.dotdot_name[0] != '.' ||
839 		    dirbuf.dotdot_name[1] != '.') {
840 			error = ENOTDIR;
841 			break;
842 		}
843 		if (dirbuf.dotdot_ino == source->i_number) {
844 			error = EINVAL;
845 			break;
846 		}
847 		if (dirbuf.dotdot_ino == ROOTINO)
848 			break;
849 		iput(ip);
850 		if (error = iget(ip, dirbuf.dotdot_ino, &ip))
851 			break;
852 	}
853 
854 out:
855 	if (error == ENOTDIR)
856 		printf("checkpath: .. not a directory\n");
857 	if (ip != NULL)
858 		iput(ip);
859 	return (error);
860 }
861