xref: /original-bsd/sys/ufs/ffs/ufs_lookup.c (revision bdc0a208)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ufs_lookup.c	7.31 (Berkeley) 04/16/91
8  */
9 
10 #include "param.h"
11 #include "namei.h"
12 #include "buf.h"
13 #include "file.h"
14 #include "vnode.h"
15 
16 #include "quota.h"
17 #include "inode.h"
18 #include "fs.h"
19 
20 struct	nchstats nchstats;
21 #ifdef DIAGNOSTIC
22 int	dirchk = 1;
23 #else
24 int	dirchk = 0;
25 #endif
26 
27 /*
28  * Convert a component of a pathname into a pointer to a locked inode.
29  * This is a very central and rather complicated routine.
30  * If the file system is not maintained in a strict tree hierarchy,
31  * this can result in a deadlock situation (see comments in code below).
32  *
33  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
34  * whether the name is to be looked up, created, renamed, or deleted.
35  * When CREATE, RENAME, or DELETE is specified, information usable in
36  * creating, renaming, or deleting a directory entry may be calculated.
37  * If flag has LOCKPARENT or'ed into it and the target of the pathname
38  * exists, lookup returns both the target and its parent directory locked.
39  * When creating or renaming and LOCKPARENT is specified, the target may
40  * not be ".".  When deleting and LOCKPARENT is specified, the target may
41  * be "."., but the caller must check to ensure it does an vrele and iput
42  * instead of two iputs.
43  *
44  * Overall outline of ufs_lookup:
45  *
46  *	check accessibility of directory
47  *	look for name in cache, if found, then if at end of path
48  *	  and deleting or creating, drop it, else return name
49  *	search for name in directory, to found or notfound
50  * notfound:
51  *	if creating, return locked directory, leaving info on available slots
52  *	else return error
53  * found:
54  *	if at end of path and deleting, return information to allow delete
55  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
56  *	  inode and return info to allow rewrite
57  *	if not at end, add name to cache; if at end and neither creating
58  *	  nor deleting, add name to cache
59  *
60  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
61  */
62 ufs_lookup(vdp, ndp, p)
63 	register struct vnode *vdp;
64 	register struct nameidata *ndp;
65 	struct proc *p;
66 {
67 	register struct inode *dp;	/* the directory we are searching */
68 	register struct fs *fs;		/* file system that directory is in */
69 	struct buf *bp = 0;		/* a buffer of directory entries */
70 	register struct direct *ep;	/* the current directory entry */
71 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
72 	enum {NONE, COMPACT, FOUND} slotstatus;
73 	int slotoffset = -1;		/* offset of area with free space */
74 	int slotsize;			/* size of area at slotoffset */
75 	int slotfreespace;		/* amount of space free in slot */
76 	int slotneeded;			/* size of the entry we're seeking */
77 	int numdirpasses;		/* strategy for directory search */
78 	int endsearch;			/* offset to end directory search */
79 	int prevoff;			/* ndp->ni_offset of previous entry */
80 	struct inode *pdp;		/* saved dp during symlink work */
81 	struct inode *tdp;		/* returned by iget */
82 	off_t enduseful;		/* pointer past last used dir slot */
83 	int flag;			/* LOOKUP, CREATE, RENAME, or DELETE */
84 	int lockparent;			/* 1 => lockparent flag is set */
85 	int wantparent;			/* 1 => wantparent or lockparent flag */
86 	int error;
87 
88 	ndp->ni_dvp = vdp;
89 	ndp->ni_vp = NULL;
90 	dp = VTOI(vdp);
91 	fs = dp->i_fs;
92 	lockparent = ndp->ni_nameiop & LOCKPARENT;
93 	flag = ndp->ni_nameiop & OPMASK;
94 	wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT);
95 
96 	/*
97 	 * Check accessiblity of directory.
98 	 */
99 	if ((dp->i_mode&IFMT) != IFDIR)
100 		return (ENOTDIR);
101 	if (error = ufs_access(vdp, VEXEC, ndp->ni_cred, p))
102 		return (error);
103 
104 	/*
105 	 * We now have a segment name to search for, and a directory to search.
106 	 *
107 	 * Before tediously performing a linear scan of the directory,
108 	 * check the name cache to see if the directory/name pair
109 	 * we are looking for is known already.
110 	 */
111 	if (error = cache_lookup(ndp)) {
112 		int vpid;	/* capability number of vnode */
113 
114 		if (error == ENOENT)
115 			return (error);
116 #ifdef PARANOID
117 		if (vdp == ndp->ni_rdir && ndp->ni_isdotdot)
118 			panic("ufs_lookup: .. through root");
119 #endif
120 		/*
121 		 * Get the next vnode in the path.
122 		 * See comment below starting `Step through' for
123 		 * an explaination of the locking protocol.
124 		 */
125 		pdp = dp;
126 		dp = VTOI(ndp->ni_vp);
127 		vdp = ndp->ni_vp;
128 		vpid = vdp->v_id;
129 		if (pdp == dp) {
130 			VREF(vdp);
131 			error = 0;
132 		} else if (ndp->ni_isdotdot) {
133 			IUNLOCK(pdp);
134 			error = vget(vdp);
135 			if (!error && lockparent && *ndp->ni_next == '\0')
136 				ILOCK(pdp);
137 		} else {
138 			error = vget(vdp);
139 			if (!lockparent || error || *ndp->ni_next != '\0')
140 				IUNLOCK(pdp);
141 		}
142 		/*
143 		 * Check that the capability number did not change
144 		 * while we were waiting for the lock.
145 		 */
146 		if (!error) {
147 			if (vpid == vdp->v_id)
148 				return (0);
149 			iput(dp);
150 			if (lockparent && pdp != dp && *ndp->ni_next == '\0')
151 				IUNLOCK(pdp);
152 		}
153 		ILOCK(pdp);
154 		dp = pdp;
155 		vdp = ITOV(dp);
156 		ndp->ni_vp = NULL;
157 	}
158 
159 	/*
160 	 * Suppress search for slots unless creating
161 	 * file and at end of pathname, in which case
162 	 * we watch for a place to put the new file in
163 	 * case it doesn't already exist.
164 	 */
165 	slotstatus = FOUND;
166 	if ((flag == CREATE || flag == RENAME) && *ndp->ni_next == 0) {
167 		slotstatus = NONE;
168 		slotfreespace = 0;
169 		slotneeded = DIRSIZ(&ndp->ni_dent);
170 	}
171 
172 	/*
173 	 * If there is cached information on a previous search of
174 	 * this directory, pick up where we last left off.
175 	 * We cache only lookups as these are the most common
176 	 * and have the greatest payoff. Caching CREATE has little
177 	 * benefit as it usually must search the entire directory
178 	 * to determine that the entry does not exist. Caching the
179 	 * location of the last DELETE or RENAME has not reduced
180 	 * profiling time and hence has been removed in the interest
181 	 * of simplicity.
182 	 */
183 	if (flag != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) {
184 		ndp->ni_offset = 0;
185 		numdirpasses = 1;
186 	} else {
187 		ndp->ni_offset = dp->i_diroff;
188 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
189 		if (entryoffsetinblock != 0) {
190 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
191 			if (error)
192 				return (error);
193 		}
194 		numdirpasses = 2;
195 		nchstats.ncs_2passes++;
196 	}
197 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
198 	enduseful = 0;
199 
200 searchloop:
201 	while (ndp->ni_offset < endsearch) {
202 		/*
203 		 * If offset is on a block boundary,
204 		 * read the next directory block.
205 		 * Release previous if it exists.
206 		 */
207 		if (blkoff(fs, ndp->ni_offset) == 0) {
208 			if (bp != NULL)
209 				brelse(bp);
210 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
211 			if (error)
212 				return (error);
213 			entryoffsetinblock = 0;
214 		}
215 		/*
216 		 * If still looking for a slot, and at a DIRBLKSIZE
217 		 * boundary, have to start looking for free space again.
218 		 */
219 		if (slotstatus == NONE &&
220 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
221 			slotoffset = -1;
222 			slotfreespace = 0;
223 		}
224 		/*
225 		 * Get pointer to next entry.
226 		 * Full validation checks are slow, so we only check
227 		 * enough to insure forward progress through the
228 		 * directory. Complete checks can be run by patching
229 		 * "dirchk" to be true.
230 		 */
231 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
232 		if (ep->d_reclen == 0 ||
233 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
234 			int i;
235 
236 			dirbad(dp, ndp->ni_offset, "mangled entry");
237 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
238 			ndp->ni_offset += i;
239 			entryoffsetinblock += i;
240 			continue;
241 		}
242 
243 		/*
244 		 * If an appropriate sized slot has not yet been found,
245 		 * check to see if one is available. Also accumulate space
246 		 * in the current block so that we can determine if
247 		 * compaction is viable.
248 		 */
249 		if (slotstatus != FOUND) {
250 			int size = ep->d_reclen;
251 
252 			if (ep->d_ino != 0)
253 				size -= DIRSIZ(ep);
254 			if (size > 0) {
255 				if (size >= slotneeded) {
256 					slotstatus = FOUND;
257 					slotoffset = ndp->ni_offset;
258 					slotsize = ep->d_reclen;
259 				} else if (slotstatus == NONE) {
260 					slotfreespace += size;
261 					if (slotoffset == -1)
262 						slotoffset = ndp->ni_offset;
263 					if (slotfreespace >= slotneeded) {
264 						slotstatus = COMPACT;
265 						slotsize = ndp->ni_offset +
266 						      ep->d_reclen - slotoffset;
267 					}
268 				}
269 			}
270 		}
271 
272 		/*
273 		 * Check for a name match.
274 		 */
275 		if (ep->d_ino) {
276 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
277 			    !bcmp(ndp->ni_ptr, ep->d_name,
278 				(unsigned)ep->d_namlen)) {
279 				/*
280 				 * Save directory entry's inode number and
281 				 * reclen in ndp->ni_dent, and release
282 				 * directory buffer.
283 				 */
284 				ndp->ni_dent.d_ino = ep->d_ino;
285 				ndp->ni_dent.d_reclen = ep->d_reclen;
286 				brelse(bp);
287 				goto found;
288 			}
289 		}
290 		prevoff = ndp->ni_offset;
291 		ndp->ni_offset += ep->d_reclen;
292 		entryoffsetinblock += ep->d_reclen;
293 		if (ep->d_ino)
294 			enduseful = ndp->ni_offset;
295 	}
296 /* notfound: */
297 	/*
298 	 * If we started in the middle of the directory and failed
299 	 * to find our target, we must check the beginning as well.
300 	 */
301 	if (numdirpasses == 2) {
302 		numdirpasses--;
303 		ndp->ni_offset = 0;
304 		endsearch = dp->i_diroff;
305 		goto searchloop;
306 	}
307 	if (bp != NULL)
308 		brelse(bp);
309 	/*
310 	 * If creating, and at end of pathname and current
311 	 * directory has not been removed, then can consider
312 	 * allowing file to be created.
313 	 */
314 	if ((flag == CREATE || flag == RENAME) &&
315 	    *ndp->ni_next == 0 && dp->i_nlink != 0) {
316 		/*
317 		 * Access for write is interpreted as allowing
318 		 * creation of files in the directory.
319 		 */
320 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred, p))
321 			return (error);
322 		/*
323 		 * Return an indication of where the new directory
324 		 * entry should be put.  If we didn't find a slot,
325 		 * then set ndp->ni_count to 0 indicating that the new
326 		 * slot belongs at the end of the directory. If we found
327 		 * a slot, then the new entry can be put in the range
328 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
329 		 */
330 		if (slotstatus == NONE) {
331 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
332 			ndp->ni_count = 0;
333 			enduseful = ndp->ni_offset;
334 		} else {
335 			ndp->ni_offset = slotoffset;
336 			ndp->ni_count = slotsize;
337 			if (enduseful < slotoffset + slotsize)
338 				enduseful = slotoffset + slotsize;
339 		}
340 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
341 		dp->i_flag |= IUPD|ICHG;
342 		/*
343 		 * We return with the directory locked, so that
344 		 * the parameters we set up above will still be
345 		 * valid if we actually decide to do a direnter().
346 		 * We return ni_vp == NULL to indicate that the entry
347 		 * does not currently exist; we leave a pointer to
348 		 * the (locked) directory inode in ndp->ni_dvp.
349 		 *
350 		 * NB - if the directory is unlocked, then this
351 		 * information cannot be used.
352 		 */
353 		if (!lockparent)
354 			IUNLOCK(dp);
355 	}
356 	/*
357 	 * Insert name into cache (as non-existent) if appropriate.
358 	 */
359 	if (ndp->ni_makeentry && flag != CREATE)
360 		cache_enter(ndp);
361 	return (ENOENT);
362 
363 found:
364 	if (numdirpasses == 2)
365 		nchstats.ncs_pass2++;
366 	/*
367 	 * Check that directory length properly reflects presence
368 	 * of this entry.
369 	 */
370 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
371 		dirbad(dp, ndp->ni_offset, "i_size too small");
372 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
373 		dp->i_flag |= IUPD|ICHG;
374 	}
375 
376 	/*
377 	 * Found component in pathname.
378 	 * If the final component of path name, save information
379 	 * in the cache as to where the entry was found.
380 	 */
381 	if (*ndp->ni_next == '\0' && flag == LOOKUP)
382 		dp->i_diroff = ndp->ni_offset &~ (DIRBLKSIZ - 1);
383 
384 	/*
385 	 * If deleting, and at end of pathname, return
386 	 * parameters which can be used to remove file.
387 	 * If the wantparent flag isn't set, we return only
388 	 * the directory (in ndp->ni_dvp), otherwise we go
389 	 * on and lock the inode, being careful with ".".
390 	 */
391 	if (flag == DELETE && *ndp->ni_next == 0) {
392 		/*
393 		 * Write access to directory required to delete files.
394 		 */
395 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred, p))
396 			return (error);
397 		/*
398 		 * Return pointer to current entry in ndp->ni_offset,
399 		 * and distance past previous entry (if there
400 		 * is a previous entry in this block) in ndp->ni_count.
401 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
402 		 */
403 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
404 			ndp->ni_count = 0;
405 		else
406 			ndp->ni_count = ndp->ni_offset - prevoff;
407 		if (dp->i_number == ndp->ni_dent.d_ino) {
408 			VREF(vdp);
409 			ndp->ni_vp = vdp;
410 			return (0);
411 		}
412 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
413 			return (error);
414 		/*
415 		 * If directory is "sticky", then user must own
416 		 * the directory, or the file in it, else she
417 		 * may not delete it (unless she's root). This
418 		 * implements append-only directories.
419 		 */
420 		if ((dp->i_mode & ISVTX) &&
421 		    ndp->ni_cred->cr_uid != 0 &&
422 		    ndp->ni_cred->cr_uid != dp->i_uid &&
423 		    tdp->i_uid != ndp->ni_cred->cr_uid) {
424 			iput(tdp);
425 			return (EPERM);
426 		}
427 		ndp->ni_vp = ITOV(tdp);
428 		if (!lockparent)
429 			IUNLOCK(dp);
430 		return (0);
431 	}
432 
433 	/*
434 	 * If rewriting (RENAME), return the inode and the
435 	 * information required to rewrite the present directory
436 	 * Must get inode of directory entry to verify it's a
437 	 * regular file, or empty directory.
438 	 */
439 	if (flag == RENAME && wantparent && *ndp->ni_next == 0) {
440 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred, p))
441 			return (error);
442 		/*
443 		 * Careful about locking second inode.
444 		 * This can only occur if the target is ".".
445 		 */
446 		if (dp->i_number == ndp->ni_dent.d_ino)
447 			return (EISDIR);
448 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
449 			return (error);
450 		ndp->ni_vp = ITOV(tdp);
451 		if (!lockparent)
452 			IUNLOCK(dp);
453 		return (0);
454 	}
455 
456 	/*
457 	 * Step through the translation in the name.  We do not `iput' the
458 	 * directory because we may need it again if a symbolic link
459 	 * is relative to the current directory.  Instead we save it
460 	 * unlocked as "pdp".  We must get the target inode before unlocking
461 	 * the directory to insure that the inode will not be removed
462 	 * before we get it.  We prevent deadlock by always fetching
463 	 * inodes from the root, moving down the directory tree. Thus
464 	 * when following backward pointers ".." we must unlock the
465 	 * parent directory before getting the requested directory.
466 	 * There is a potential race condition here if both the current
467 	 * and parent directories are removed before the `iget' for the
468 	 * inode associated with ".." returns.  We hope that this occurs
469 	 * infrequently since we cannot avoid this race condition without
470 	 * implementing a sophisticated deadlock detection algorithm.
471 	 * Note also that this simple deadlock detection scheme will not
472 	 * work if the file system has any hard links other than ".."
473 	 * that point backwards in the directory structure.
474 	 */
475 	pdp = dp;
476 	if (ndp->ni_isdotdot) {
477 		IUNLOCK(pdp);	/* race to get the inode */
478 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp)) {
479 			ILOCK(pdp);
480 			return (error);
481 		}
482 		if (lockparent && *ndp->ni_next == '\0')
483 			ILOCK(pdp);
484 		ndp->ni_vp = ITOV(tdp);
485 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
486 		VREF(vdp);	/* we want ourself, ie "." */
487 		ndp->ni_vp = vdp;
488 	} else {
489 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
490 			return (error);
491 		if (!lockparent || *ndp->ni_next != '\0')
492 			IUNLOCK(pdp);
493 		ndp->ni_vp = ITOV(tdp);
494 	}
495 
496 	/*
497 	 * Insert name into cache if appropriate.
498 	 */
499 	if (ndp->ni_makeentry)
500 		cache_enter(ndp);
501 	return (0);
502 }
503 
504 
505 dirbad(ip, offset, how)
506 	struct inode *ip;
507 	off_t offset;
508 	char *how;
509 {
510 
511 	printf("%s: bad dir ino %d at offset %d: %s\n",
512 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
513 	if (ip->i_fs->fs_ronly == 0)
514 		panic("bad dir");
515 }
516 
517 /*
518  * Do consistency checking on a directory entry:
519  *	record length must be multiple of 4
520  *	entry must fit in rest of its DIRBLKSIZ block
521  *	record must be large enough to contain entry
522  *	name is not longer than MAXNAMLEN
523  *	name must be as long as advertised, and null terminated
524  */
525 dirbadentry(ep, entryoffsetinblock)
526 	register struct direct *ep;
527 	int entryoffsetinblock;
528 {
529 	register int i;
530 
531 	if ((ep->d_reclen & 0x3) != 0 ||
532 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
533 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
534 		return (1);
535 	for (i = 0; i < ep->d_namlen; i++)
536 		if (ep->d_name[i] == '\0')
537 			return (1);
538 	return (ep->d_name[i]);
539 }
540 
541 /*
542  * Write a directory entry after a call to namei, using the parameters
543  * which it left in nameidata.  The argument ip is the inode which the
544  * new directory entry will refer to.  The nameidata field ndp->ni_dvp
545  * is a pointer to the directory to be written, which was left locked by
546  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
547  * how the space for the new entry is to be gotten.
548  */
549 direnter(ip, ndp)
550 	struct inode *ip;
551 	register struct nameidata *ndp;
552 {
553 	register struct direct *ep, *nep;
554 	register struct inode *dp = VTOI(ndp->ni_dvp);
555 	struct buf *bp;
556 	int loc, spacefree, error = 0;
557 	u_int dsize;
558 	int newentrysize;
559 	char *dirbuf;
560 
561 	ndp->ni_dent.d_ino = ip->i_number;
562 	newentrysize = DIRSIZ(&ndp->ni_dent);
563 	if (ndp->ni_count == 0) {
564 		/*
565 		 * If ndp->ni_count is 0, then namei could find no space in the
566 		 * directory. In this case ndp->ni_offset will be on a directory
567 		 * block boundary and we will write the new entry into a fresh
568 		 * block.
569 		 */
570 		if (ndp->ni_offset&(DIRBLKSIZ-1))
571 			panic("wdir: newblk");
572 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
573 		ndp->ni_count = newentrysize;
574 		ndp->ni_resid = newentrysize;
575 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
576 		ndp->ni_iov = &ndp->ni_nd.nd_iovec;
577 		ndp->ni_iovcnt = 1;
578 		ndp->ni_rw = UIO_WRITE;
579 		ndp->ni_uioseg = UIO_SYSSPACE;
580 		error =
581 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
582 		if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
583 			panic("wdir: blksize"); /* XXX - should grow w/balloc */
584 		} else {
585 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
586 			dp->i_flag |= ICHG;
587 		}
588 		return (error);
589 	}
590 
591 	/*
592 	 * If ndp->ni_count is non-zero, then namei found space for the new
593 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
594 	 * in the directory.  To use this space, we may have to compact
595 	 * the entries located there, by copying them together towards
596 	 * the beginning of the block, leaving the free space in
597 	 * one usable chunk at the end.
598 	 */
599 
600 	/*
601 	 * Increase size of directory if entry eats into new space.
602 	 * This should never push the size past a new multiple of
603 	 * DIRBLKSIZE.
604 	 *
605 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
606 	 */
607 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
608 		dp->i_size = ndp->ni_offset + ndp->ni_count;
609 	/*
610 	 * Get the block containing the space for the new directory entry.
611 	 */
612 	if (error = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf, &bp))
613 		return (error);
614 	/*
615 	 * Find space for the new entry.  In the simple case, the
616 	 * entry at offset base will have the space.  If it does
617 	 * not, then namei arranged that compacting the region
618 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
619 	 */
620 	ep = (struct direct *)dirbuf;
621 	dsize = DIRSIZ(ep);
622 	spacefree = ep->d_reclen - dsize;
623 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
624 		nep = (struct direct *)(dirbuf + loc);
625 		if (ep->d_ino) {
626 			/* trim the existing slot */
627 			ep->d_reclen = dsize;
628 			ep = (struct direct *)((char *)ep + dsize);
629 		} else {
630 			/* overwrite; nothing there; header is ours */
631 			spacefree += dsize;
632 		}
633 		dsize = DIRSIZ(nep);
634 		spacefree += nep->d_reclen - dsize;
635 		loc += nep->d_reclen;
636 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
637 	}
638 	/*
639 	 * Update the pointer fields in the previous entry (if any),
640 	 * copy in the new entry, and write out the block.
641 	 */
642 	if (ep->d_ino == 0) {
643 		if (spacefree + dsize < newentrysize)
644 			panic("wdir: compact1");
645 		ndp->ni_dent.d_reclen = spacefree + dsize;
646 	} else {
647 		if (spacefree < newentrysize)
648 			panic("wdir: compact2");
649 		ndp->ni_dent.d_reclen = spacefree;
650 		ep->d_reclen = dsize;
651 		ep = (struct direct *)((char *)ep + dsize);
652 	}
653 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
654 	error = bwrite(bp);
655 	dp->i_flag |= IUPD|ICHG;
656 	if (!error && ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
657 		error = itrunc(dp, (u_long)ndp->ni_endoff, IO_SYNC);
658 	return (error);
659 }
660 
661 /*
662  * Remove a directory entry after a call to namei, using
663  * the parameters which it left in nameidata. The entry
664  * ni_offset contains the offset into the directory of the
665  * entry to be eliminated.  The ni_count field contains the
666  * size of the previous record in the directory.  If this
667  * is 0, the first entry is being deleted, so we need only
668  * zero the inode number to mark the entry as free.  If the
669  * entry isn't the first in the directory, we must reclaim
670  * the space of the now empty record by adding the record size
671  * to the size of the previous entry.
672  */
673 dirremove(ndp)
674 	register struct nameidata *ndp;
675 {
676 	register struct inode *dp = VTOI(ndp->ni_dvp);
677 	struct direct *ep;
678 	struct buf *bp;
679 	int error;
680 
681 	if (ndp->ni_count == 0) {
682 		/*
683 		 * First entry in block: set d_ino to zero.
684 		 */
685 		ndp->ni_dent.d_ino = 0;
686 		ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
687 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
688 		ndp->ni_iov = &ndp->ni_nd.nd_iovec;
689 		ndp->ni_iovcnt = 1;
690 		ndp->ni_rw = UIO_WRITE;
691 		ndp->ni_uioseg = UIO_SYSSPACE;
692 		error =
693 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
694 	} else {
695 		/*
696 		 * Collapse new free space into previous entry.
697 		 */
698 		if (error = blkatoff(dp, ndp->ni_offset - ndp->ni_count,
699 		    (char **)&ep, &bp)) {
700 			return (error);
701 		}
702 		ep->d_reclen += ndp->ni_dent.d_reclen;
703 		error = bwrite(bp);
704 		dp->i_flag |= IUPD|ICHG;
705 	}
706 	return (error);
707 }
708 
709 /*
710  * Rewrite an existing directory entry to point at the inode
711  * supplied.  The parameters describing the directory entry are
712  * set up by a call to namei.
713  */
714 dirrewrite(dp, ip, ndp)
715 	struct inode *dp, *ip;
716 	struct nameidata *ndp;
717 {
718 
719 	ndp->ni_dent.d_ino = ip->i_number;
720 	ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
721 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
722 	ndp->ni_iov = &ndp->ni_nd.nd_iovec;
723 	ndp->ni_iovcnt = 1;
724 	ndp->ni_rw = UIO_WRITE;
725 	ndp->ni_uioseg = UIO_SYSSPACE;
726 	return (ufs_write(ITOV(dp), &ndp->ni_uio, IO_SYNC, ndp->ni_cred));
727 }
728 
729 /*
730  * Return buffer with contents of block "offset"
731  * from the beginning of directory "ip".  If "res"
732  * is non-zero, fill it in with a pointer to the
733  * remaining space in the directory.
734  */
735 blkatoff(ip, offset, res, bpp)
736 	struct inode *ip;
737 	off_t offset;
738 	char **res;
739 	struct buf **bpp;
740 {
741 	register struct fs *fs = ip->i_fs;
742 	daddr_t lbn = lblkno(fs, offset);
743 	int bsize = blksize(fs, ip, lbn);
744 	struct buf *bp;
745 	daddr_t bn;
746 	int error;
747 
748 	*bpp = 0;
749 	if (error = bread(ITOV(ip), lbn, bsize, NOCRED, &bp)) {
750 		brelse(bp);
751 		return (error);
752 	}
753 	if (res)
754 		*res = bp->b_un.b_addr + blkoff(fs, offset);
755 	*bpp = bp;
756 	return (0);
757 }
758 
759 /*
760  * Check if a directory is empty or not.
761  * Inode supplied must be locked.
762  *
763  * Using a struct dirtemplate here is not precisely
764  * what we want, but better than using a struct direct.
765  *
766  * NB: does not handle corrupted directories.
767  */
768 dirempty(ip, parentino, cred)
769 	register struct inode *ip;
770 	ino_t parentino;
771 	struct ucred *cred;
772 {
773 	register off_t off;
774 	struct dirtemplate dbuf;
775 	register struct direct *dp = (struct direct *)&dbuf;
776 	int error, count;
777 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
778 
779 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
780 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
781 		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
782 		/*
783 		 * Since we read MINDIRSIZ, residual must
784 		 * be 0 unless we're at end of file.
785 		 */
786 		if (error || count != 0)
787 			return (0);
788 		/* avoid infinite loops */
789 		if (dp->d_reclen == 0)
790 			return (0);
791 		/* skip empty entries */
792 		if (dp->d_ino == 0)
793 			continue;
794 		/* accept only "." and ".." */
795 		if (dp->d_namlen > 2)
796 			return (0);
797 		if (dp->d_name[0] != '.')
798 			return (0);
799 		/*
800 		 * At this point d_namlen must be 1 or 2.
801 		 * 1 implies ".", 2 implies ".." if second
802 		 * char is also "."
803 		 */
804 		if (dp->d_namlen == 1)
805 			continue;
806 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
807 			continue;
808 		return (0);
809 	}
810 	return (1);
811 }
812 
813 /*
814  * Check if source directory is in the path of the target directory.
815  * Target is supplied locked, source is unlocked.
816  * The target is always iput() before returning.
817  */
818 checkpath(source, target, cred)
819 	struct inode *source, *target;
820 	struct ucred *cred;
821 {
822 	struct dirtemplate dirbuf;
823 	struct inode *ip;
824 	int error = 0;
825 
826 	ip = target;
827 	if (ip->i_number == source->i_number) {
828 		error = EEXIST;
829 		goto out;
830 	}
831 	if (ip->i_number == ROOTINO)
832 		goto out;
833 
834 	for (;;) {
835 		if ((ip->i_mode&IFMT) != IFDIR) {
836 			error = ENOTDIR;
837 			break;
838 		}
839 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)&dirbuf,
840 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
841 			IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
842 		if (error != 0)
843 			break;
844 		if (dirbuf.dotdot_namlen != 2 ||
845 		    dirbuf.dotdot_name[0] != '.' ||
846 		    dirbuf.dotdot_name[1] != '.') {
847 			error = ENOTDIR;
848 			break;
849 		}
850 		if (dirbuf.dotdot_ino == source->i_number) {
851 			error = EINVAL;
852 			break;
853 		}
854 		if (dirbuf.dotdot_ino == ROOTINO)
855 			break;
856 		iput(ip);
857 		if (error = iget(ip, dirbuf.dotdot_ino, &ip))
858 			break;
859 	}
860 
861 out:
862 	if (error == ENOTDIR)
863 		printf("checkpath: .. not a directory\n");
864 	if (ip != NULL)
865 		iput(ip);
866 	return (error);
867 }
868