xref: /original-bsd/sys/ufs/ffs/ufs_lookup.c (revision cde495fc)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ufs_lookup.c	7.28 (Berkeley) 03/19/91
8  */
9 
10 #include "param.h"
11 #include "namei.h"
12 #include "buf.h"
13 #include "file.h"
14 #include "vnode.h"
15 
16 #include "quota.h"
17 #include "inode.h"
18 #include "fs.h"
19 
20 struct	nchstats nchstats;
21 int	dirchk = 1;
22 
23 /*
24  * Convert a component of a pathname into a pointer to a locked inode.
25  * This is a very central and rather complicated routine.
26  * If the file system is not maintained in a strict tree hierarchy,
27  * this can result in a deadlock situation (see comments in code below).
28  *
29  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
30  * whether the name is to be looked up, created, renamed, or deleted.
31  * When CREATE, RENAME, or DELETE is specified, information usable in
32  * creating, renaming, or deleting a directory entry may be calculated.
33  * If flag has LOCKPARENT or'ed into it and the target of the pathname
34  * exists, lookup returns both the target and its parent directory locked.
35  * When creating or renaming and LOCKPARENT is specified, the target may
36  * not be ".".  When deleting and LOCKPARENT is specified, the target may
37  * be "."., but the caller must check to ensure it does an vrele and iput
38  * instead of two iputs.
39  *
40  * Overall outline of ufs_lookup:
41  *
42  *	check accessibility of directory
43  *	look for name in cache, if found, then if at end of path
44  *	  and deleting or creating, drop it, else return name
45  *	search for name in directory, to found or notfound
46  * notfound:
47  *	if creating, return locked directory, leaving info on available slots
48  *	else return error
49  * found:
50  *	if at end of path and deleting, return information to allow delete
51  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
52  *	  inode and return info to allow rewrite
53  *	if not at end, add name to cache; if at end and neither creating
54  *	  nor deleting, add name to cache
55  *
56  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
57  */
58 ufs_lookup(vdp, ndp)
59 	register struct vnode *vdp;
60 	register struct nameidata *ndp;
61 {
62 	register struct inode *dp;	/* the directory we are searching */
63 	register struct fs *fs;		/* file system that directory is in */
64 	struct buf *bp = 0;		/* a buffer of directory entries */
65 	register struct direct *ep;	/* the current directory entry */
66 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
67 	enum {NONE, COMPACT, FOUND} slotstatus;
68 	int slotoffset = -1;		/* offset of area with free space */
69 	int slotsize;			/* size of area at slotoffset */
70 	int slotfreespace;		/* amount of space free in slot */
71 	int slotneeded;			/* size of the entry we're seeking */
72 	int numdirpasses;		/* strategy for directory search */
73 	int endsearch;			/* offset to end directory search */
74 	int prevoff;			/* ndp->ni_offset of previous entry */
75 	struct inode *pdp;		/* saved dp during symlink work */
76 	struct inode *tdp;		/* returned by iget */
77 	off_t enduseful;		/* pointer past last used dir slot */
78 	int flag;			/* LOOKUP, CREATE, RENAME, or DELETE */
79 	int lockparent;			/* 1 => lockparent flag is set */
80 	int wantparent;			/* 1 => wantparent or lockparent flag */
81 	int error;
82 
83 	ndp->ni_dvp = vdp;
84 	ndp->ni_vp = NULL;
85 	dp = VTOI(vdp);
86 	fs = dp->i_fs;
87 	lockparent = ndp->ni_nameiop & LOCKPARENT;
88 	flag = ndp->ni_nameiop & OPMASK;
89 	wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT);
90 
91 	/*
92 	 * Check accessiblity of directory.
93 	 */
94 	if ((dp->i_mode&IFMT) != IFDIR)
95 		return (ENOTDIR);
96 	if (error = ufs_access(vdp, VEXEC, ndp->ni_cred))
97 		return (error);
98 
99 	/*
100 	 * We now have a segment name to search for, and a directory to search.
101 	 *
102 	 * Before tediously performing a linear scan of the directory,
103 	 * check the name cache to see if the directory/name pair
104 	 * we are looking for is known already.
105 	 */
106 	if (error = cache_lookup(ndp)) {
107 		int vpid;	/* capability number of vnode */
108 
109 		if (error == ENOENT)
110 			return (error);
111 #ifdef PARANOID
112 		if (vdp == ndp->ni_rdir && ndp->ni_isdotdot)
113 			panic("ufs_lookup: .. through root");
114 #endif
115 		/*
116 		 * Get the next vnode in the path.
117 		 * See comment below starting `Step through' for
118 		 * an explaination of the locking protocol.
119 		 */
120 		pdp = dp;
121 		dp = VTOI(ndp->ni_vp);
122 		vdp = ndp->ni_vp;
123 		vpid = vdp->v_id;
124 		if (pdp == dp) {
125 			VREF(vdp);
126 			error = 0;
127 		} else if (ndp->ni_isdotdot) {
128 			IUNLOCK(pdp);
129 			error = vget(vdp);
130 			if (!error && lockparent && *ndp->ni_next == '\0')
131 				ILOCK(pdp);
132 		} else {
133 			error = vget(vdp);
134 			if (!lockparent || error || *ndp->ni_next != '\0')
135 				IUNLOCK(pdp);
136 		}
137 		/*
138 		 * Check that the capability number did not change
139 		 * while we were waiting for the lock.
140 		 */
141 		if (!error) {
142 			if (vpid == vdp->v_id)
143 				return (0);
144 			iput(dp);
145 			if (lockparent && pdp != dp && *ndp->ni_next == '\0')
146 				IUNLOCK(pdp);
147 		}
148 		ILOCK(pdp);
149 		dp = pdp;
150 		vdp = ITOV(dp);
151 		ndp->ni_vp = NULL;
152 	}
153 
154 	/*
155 	 * Suppress search for slots unless creating
156 	 * file and at end of pathname, in which case
157 	 * we watch for a place to put the new file in
158 	 * case it doesn't already exist.
159 	 */
160 	slotstatus = FOUND;
161 	if ((flag == CREATE || flag == RENAME) && *ndp->ni_next == 0) {
162 		slotstatus = NONE;
163 		slotfreespace = 0;
164 		slotneeded = DIRSIZ(&ndp->ni_dent);
165 	}
166 
167 	/*
168 	 * If there is cached information on a previous search of
169 	 * this directory, pick up where we last left off.
170 	 * We cache only lookups as these are the most common
171 	 * and have the greatest payoff. Caching CREATE has little
172 	 * benefit as it usually must search the entire directory
173 	 * to determine that the entry does not exist. Caching the
174 	 * location of the last DELETE or RENAME has not reduced
175 	 * profiling time and hence has been removed in the interest
176 	 * of simplicity.
177 	 */
178 	if (flag != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) {
179 		ndp->ni_offset = 0;
180 		numdirpasses = 1;
181 	} else {
182 		ndp->ni_offset = dp->i_diroff;
183 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
184 		if (entryoffsetinblock != 0) {
185 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
186 			if (error)
187 				return (error);
188 		}
189 		numdirpasses = 2;
190 		nchstats.ncs_2passes++;
191 	}
192 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
193 	enduseful = 0;
194 
195 searchloop:
196 	while (ndp->ni_offset < endsearch) {
197 		/*
198 		 * If offset is on a block boundary,
199 		 * read the next directory block.
200 		 * Release previous if it exists.
201 		 */
202 		if (blkoff(fs, ndp->ni_offset) == 0) {
203 			if (bp != NULL)
204 				brelse(bp);
205 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
206 			if (error)
207 				return (error);
208 			entryoffsetinblock = 0;
209 		}
210 		/*
211 		 * If still looking for a slot, and at a DIRBLKSIZE
212 		 * boundary, have to start looking for free space again.
213 		 */
214 		if (slotstatus == NONE &&
215 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
216 			slotoffset = -1;
217 			slotfreespace = 0;
218 		}
219 		/*
220 		 * Get pointer to next entry.
221 		 * Full validation checks are slow, so we only check
222 		 * enough to insure forward progress through the
223 		 * directory. Complete checks can be run by patching
224 		 * "dirchk" to be true.
225 		 */
226 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
227 		if (ep->d_reclen == 0 ||
228 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
229 			int i;
230 
231 			dirbad(dp, ndp->ni_offset, "mangled entry");
232 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
233 			ndp->ni_offset += i;
234 			entryoffsetinblock += i;
235 			continue;
236 		}
237 
238 		/*
239 		 * If an appropriate sized slot has not yet been found,
240 		 * check to see if one is available. Also accumulate space
241 		 * in the current block so that we can determine if
242 		 * compaction is viable.
243 		 */
244 		if (slotstatus != FOUND) {
245 			int size = ep->d_reclen;
246 
247 			if (ep->d_ino != 0)
248 				size -= DIRSIZ(ep);
249 			if (size > 0) {
250 				if (size >= slotneeded) {
251 					slotstatus = FOUND;
252 					slotoffset = ndp->ni_offset;
253 					slotsize = ep->d_reclen;
254 				} else if (slotstatus == NONE) {
255 					slotfreespace += size;
256 					if (slotoffset == -1)
257 						slotoffset = ndp->ni_offset;
258 					if (slotfreespace >= slotneeded) {
259 						slotstatus = COMPACT;
260 						slotsize = ndp->ni_offset +
261 						      ep->d_reclen - slotoffset;
262 					}
263 				}
264 			}
265 		}
266 
267 		/*
268 		 * Check for a name match.
269 		 */
270 		if (ep->d_ino) {
271 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
272 			    !bcmp(ndp->ni_ptr, ep->d_name,
273 				(unsigned)ep->d_namlen)) {
274 				/*
275 				 * Save directory entry's inode number and
276 				 * reclen in ndp->ni_dent, and release
277 				 * directory buffer.
278 				 */
279 				ndp->ni_dent.d_ino = ep->d_ino;
280 				ndp->ni_dent.d_reclen = ep->d_reclen;
281 				brelse(bp);
282 				goto found;
283 			}
284 		}
285 		prevoff = ndp->ni_offset;
286 		ndp->ni_offset += ep->d_reclen;
287 		entryoffsetinblock += ep->d_reclen;
288 		if (ep->d_ino)
289 			enduseful = ndp->ni_offset;
290 	}
291 /* notfound: */
292 	/*
293 	 * If we started in the middle of the directory and failed
294 	 * to find our target, we must check the beginning as well.
295 	 */
296 	if (numdirpasses == 2) {
297 		numdirpasses--;
298 		ndp->ni_offset = 0;
299 		endsearch = dp->i_diroff;
300 		goto searchloop;
301 	}
302 	if (bp != NULL)
303 		brelse(bp);
304 	/*
305 	 * If creating, and at end of pathname and current
306 	 * directory has not been removed, then can consider
307 	 * allowing file to be created.
308 	 */
309 	if ((flag == CREATE || flag == RENAME) &&
310 	    *ndp->ni_next == 0 && dp->i_nlink != 0) {
311 		/*
312 		 * Access for write is interpreted as allowing
313 		 * creation of files in the directory.
314 		 */
315 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
316 			return (error);
317 		/*
318 		 * Return an indication of where the new directory
319 		 * entry should be put.  If we didn't find a slot,
320 		 * then set ndp->ni_count to 0 indicating that the new
321 		 * slot belongs at the end of the directory. If we found
322 		 * a slot, then the new entry can be put in the range
323 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
324 		 */
325 		if (slotstatus == NONE) {
326 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
327 			ndp->ni_count = 0;
328 			enduseful = ndp->ni_offset;
329 		} else {
330 			ndp->ni_offset = slotoffset;
331 			ndp->ni_count = slotsize;
332 			if (enduseful < slotoffset + slotsize)
333 				enduseful = slotoffset + slotsize;
334 		}
335 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
336 		dp->i_flag |= IUPD|ICHG;
337 		/*
338 		 * We return with the directory locked, so that
339 		 * the parameters we set up above will still be
340 		 * valid if we actually decide to do a direnter().
341 		 * We return ni_vp == NULL to indicate that the entry
342 		 * does not currently exist; we leave a pointer to
343 		 * the (locked) directory inode in ndp->ni_dvp.
344 		 *
345 		 * NB - if the directory is unlocked, then this
346 		 * information cannot be used.
347 		 */
348 		if (!lockparent)
349 			IUNLOCK(dp);
350 	}
351 	/*
352 	 * Insert name into cache (as non-existent) if appropriate.
353 	 */
354 	if (ndp->ni_makeentry && flag != CREATE)
355 		cache_enter(ndp);
356 	return (ENOENT);
357 
358 found:
359 	if (numdirpasses == 2)
360 		nchstats.ncs_pass2++;
361 	/*
362 	 * Check that directory length properly reflects presence
363 	 * of this entry.
364 	 */
365 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
366 		dirbad(dp, ndp->ni_offset, "i_size too small");
367 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
368 		dp->i_flag |= IUPD|ICHG;
369 	}
370 
371 	/*
372 	 * Found component in pathname.
373 	 * If the final component of path name, save information
374 	 * in the cache as to where the entry was found.
375 	 */
376 	if (*ndp->ni_next == '\0' && flag == LOOKUP)
377 		dp->i_diroff = ndp->ni_offset &~ (DIRBLKSIZ - 1);
378 
379 	/*
380 	 * If deleting, and at end of pathname, return
381 	 * parameters which can be used to remove file.
382 	 * If the wantparent flag isn't set, we return only
383 	 * the directory (in ndp->ni_dvp), otherwise we go
384 	 * on and lock the inode, being careful with ".".
385 	 */
386 	if (flag == DELETE && *ndp->ni_next == 0) {
387 		/*
388 		 * Write access to directory required to delete files.
389 		 */
390 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
391 			return (error);
392 		/*
393 		 * Return pointer to current entry in ndp->ni_offset,
394 		 * and distance past previous entry (if there
395 		 * is a previous entry in this block) in ndp->ni_count.
396 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
397 		 */
398 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
399 			ndp->ni_count = 0;
400 		else
401 			ndp->ni_count = ndp->ni_offset - prevoff;
402 		if (dp->i_number == ndp->ni_dent.d_ino) {
403 			VREF(vdp);
404 			ndp->ni_vp = vdp;
405 			return (0);
406 		}
407 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
408 			return (error);
409 		/*
410 		 * If directory is "sticky", then user must own
411 		 * the directory, or the file in it, else she
412 		 * may not delete it (unless she's root). This
413 		 * implements append-only directories.
414 		 */
415 		if ((dp->i_mode & ISVTX) &&
416 		    ndp->ni_cred->cr_uid != 0 &&
417 		    ndp->ni_cred->cr_uid != dp->i_uid &&
418 		    tdp->i_uid != ndp->ni_cred->cr_uid) {
419 			iput(tdp);
420 			return (EPERM);
421 		}
422 		ndp->ni_vp = ITOV(tdp);
423 		if (!lockparent)
424 			IUNLOCK(dp);
425 		return (0);
426 	}
427 
428 	/*
429 	 * If rewriting (RENAME), return the inode and the
430 	 * information required to rewrite the present directory
431 	 * Must get inode of directory entry to verify it's a
432 	 * regular file, or empty directory.
433 	 */
434 	if (flag == RENAME && wantparent && *ndp->ni_next == 0) {
435 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
436 			return (error);
437 		/*
438 		 * Careful about locking second inode.
439 		 * This can only occur if the target is ".".
440 		 */
441 		if (dp->i_number == ndp->ni_dent.d_ino)
442 			return (EISDIR);
443 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
444 			return (error);
445 		ndp->ni_vp = ITOV(tdp);
446 		if (!lockparent)
447 			IUNLOCK(dp);
448 		return (0);
449 	}
450 
451 	/*
452 	 * Step through the translation in the name.  We do not `iput' the
453 	 * directory because we may need it again if a symbolic link
454 	 * is relative to the current directory.  Instead we save it
455 	 * unlocked as "pdp".  We must get the target inode before unlocking
456 	 * the directory to insure that the inode will not be removed
457 	 * before we get it.  We prevent deadlock by always fetching
458 	 * inodes from the root, moving down the directory tree. Thus
459 	 * when following backward pointers ".." we must unlock the
460 	 * parent directory before getting the requested directory.
461 	 * There is a potential race condition here if both the current
462 	 * and parent directories are removed before the `iget' for the
463 	 * inode associated with ".." returns.  We hope that this occurs
464 	 * infrequently since we cannot avoid this race condition without
465 	 * implementing a sophisticated deadlock detection algorithm.
466 	 * Note also that this simple deadlock detection scheme will not
467 	 * work if the file system has any hard links other than ".."
468 	 * that point backwards in the directory structure.
469 	 */
470 	pdp = dp;
471 	if (ndp->ni_isdotdot) {
472 		IUNLOCK(pdp);	/* race to get the inode */
473 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp)) {
474 			ILOCK(pdp);
475 			return (error);
476 		}
477 		if (lockparent && *ndp->ni_next == '\0')
478 			ILOCK(pdp);
479 		ndp->ni_vp = ITOV(tdp);
480 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
481 		VREF(vdp);	/* we want ourself, ie "." */
482 		ndp->ni_vp = vdp;
483 	} else {
484 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
485 			return (error);
486 		if (!lockparent || *ndp->ni_next != '\0')
487 			IUNLOCK(pdp);
488 		ndp->ni_vp = ITOV(tdp);
489 	}
490 
491 	/*
492 	 * Insert name into cache if appropriate.
493 	 */
494 	if (ndp->ni_makeentry)
495 		cache_enter(ndp);
496 	return (0);
497 }
498 
499 
500 dirbad(ip, offset, how)
501 	struct inode *ip;
502 	off_t offset;
503 	char *how;
504 {
505 
506 	printf("%s: bad dir ino %d at offset %d: %s\n",
507 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
508 	panic("bad dir");
509 }
510 
511 /*
512  * Do consistency checking on a directory entry:
513  *	record length must be multiple of 4
514  *	entry must fit in rest of its DIRBLKSIZ block
515  *	record must be large enough to contain entry
516  *	name is not longer than MAXNAMLEN
517  *	name must be as long as advertised, and null terminated
518  */
519 dirbadentry(ep, entryoffsetinblock)
520 	register struct direct *ep;
521 	int entryoffsetinblock;
522 {
523 	register int i;
524 
525 	if ((ep->d_reclen & 0x3) != 0 ||
526 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
527 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
528 		return (1);
529 	for (i = 0; i < ep->d_namlen; i++)
530 		if (ep->d_name[i] == '\0')
531 			return (1);
532 	return (ep->d_name[i]);
533 }
534 
535 /*
536  * Write a directory entry after a call to namei, using the parameters
537  * which it left in nameidata.  The argument ip is the inode which the
538  * new directory entry will refer to.  The nameidata field ndp->ni_dvp
539  * is a pointer to the directory to be written, which was left locked by
540  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
541  * how the space for the new entry is to be gotten.
542  */
543 direnter(ip, ndp)
544 	struct inode *ip;
545 	register struct nameidata *ndp;
546 {
547 	register struct direct *ep, *nep;
548 	register struct inode *dp = VTOI(ndp->ni_dvp);
549 	struct buf *bp;
550 	int loc, spacefree, error = 0;
551 	u_int dsize;
552 	int newentrysize;
553 	char *dirbuf;
554 
555 	ndp->ni_dent.d_ino = ip->i_number;
556 	newentrysize = DIRSIZ(&ndp->ni_dent);
557 	if (ndp->ni_count == 0) {
558 		/*
559 		 * If ndp->ni_count is 0, then namei could find no space in the
560 		 * directory. In this case ndp->ni_offset will be on a directory
561 		 * block boundary and we will write the new entry into a fresh
562 		 * block.
563 		 */
564 		if (ndp->ni_offset&(DIRBLKSIZ-1))
565 			panic("wdir: newblk");
566 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
567 		ndp->ni_count = newentrysize;
568 		ndp->ni_resid = newentrysize;
569 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
570 		ndp->ni_iov = &ndp->ni_nd.nd_iovec;
571 		ndp->ni_iovcnt = 1;
572 		ndp->ni_rw = UIO_WRITE;
573 		ndp->ni_uioseg = UIO_SYSSPACE;
574 		error =
575 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
576 		if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
577 			panic("wdir: blksize"); /* XXX - should grow w/balloc */
578 		} else {
579 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
580 			dp->i_flag |= ICHG;
581 		}
582 		return (error);
583 	}
584 
585 	/*
586 	 * If ndp->ni_count is non-zero, then namei found space for the new
587 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
588 	 * in the directory.  To use this space, we may have to compact
589 	 * the entries located there, by copying them together towards
590 	 * the beginning of the block, leaving the free space in
591 	 * one usable chunk at the end.
592 	 */
593 
594 	/*
595 	 * Increase size of directory if entry eats into new space.
596 	 * This should never push the size past a new multiple of
597 	 * DIRBLKSIZE.
598 	 *
599 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
600 	 */
601 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
602 		dp->i_size = ndp->ni_offset + ndp->ni_count;
603 	/*
604 	 * Get the block containing the space for the new directory entry.
605 	 */
606 	if (error = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf, &bp))
607 		return (error);
608 	/*
609 	 * Find space for the new entry.  In the simple case, the
610 	 * entry at offset base will have the space.  If it does
611 	 * not, then namei arranged that compacting the region
612 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
613 	 */
614 	ep = (struct direct *)dirbuf;
615 	dsize = DIRSIZ(ep);
616 	spacefree = ep->d_reclen - dsize;
617 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
618 		nep = (struct direct *)(dirbuf + loc);
619 		if (ep->d_ino) {
620 			/* trim the existing slot */
621 			ep->d_reclen = dsize;
622 			ep = (struct direct *)((char *)ep + dsize);
623 		} else {
624 			/* overwrite; nothing there; header is ours */
625 			spacefree += dsize;
626 		}
627 		dsize = DIRSIZ(nep);
628 		spacefree += nep->d_reclen - dsize;
629 		loc += nep->d_reclen;
630 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
631 	}
632 	/*
633 	 * Update the pointer fields in the previous entry (if any),
634 	 * copy in the new entry, and write out the block.
635 	 */
636 	if (ep->d_ino == 0) {
637 		if (spacefree + dsize < newentrysize)
638 			panic("wdir: compact1");
639 		ndp->ni_dent.d_reclen = spacefree + dsize;
640 	} else {
641 		if (spacefree < newentrysize)
642 			panic("wdir: compact2");
643 		ndp->ni_dent.d_reclen = spacefree;
644 		ep->d_reclen = dsize;
645 		ep = (struct direct *)((char *)ep + dsize);
646 	}
647 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
648 	error = bwrite(bp);
649 	dp->i_flag |= IUPD|ICHG;
650 	if (!error && ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
651 		error = itrunc(dp, (u_long)ndp->ni_endoff, IO_SYNC);
652 	return (error);
653 }
654 
655 /*
656  * Remove a directory entry after a call to namei, using
657  * the parameters which it left in nameidata. The entry
658  * ni_offset contains the offset into the directory of the
659  * entry to be eliminated.  The ni_count field contains the
660  * size of the previous record in the directory.  If this
661  * is 0, the first entry is being deleted, so we need only
662  * zero the inode number to mark the entry as free.  If the
663  * entry isn't the first in the directory, we must reclaim
664  * the space of the now empty record by adding the record size
665  * to the size of the previous entry.
666  */
667 dirremove(ndp)
668 	register struct nameidata *ndp;
669 {
670 	register struct inode *dp = VTOI(ndp->ni_dvp);
671 	struct direct *ep;
672 	struct buf *bp;
673 	int error;
674 
675 	if (ndp->ni_count == 0) {
676 		/*
677 		 * First entry in block: set d_ino to zero.
678 		 */
679 		ndp->ni_dent.d_ino = 0;
680 		ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
681 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
682 		ndp->ni_iov = &ndp->ni_nd.nd_iovec;
683 		ndp->ni_iovcnt = 1;
684 		ndp->ni_rw = UIO_WRITE;
685 		ndp->ni_uioseg = UIO_SYSSPACE;
686 		error =
687 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
688 	} else {
689 		/*
690 		 * Collapse new free space into previous entry.
691 		 */
692 		if (error = blkatoff(dp, ndp->ni_offset - ndp->ni_count,
693 		    (char **)&ep, &bp)) {
694 			return (error);
695 		}
696 		ep->d_reclen += ndp->ni_dent.d_reclen;
697 		error = bwrite(bp);
698 		dp->i_flag |= IUPD|ICHG;
699 	}
700 	return (error);
701 }
702 
703 /*
704  * Rewrite an existing directory entry to point at the inode
705  * supplied.  The parameters describing the directory entry are
706  * set up by a call to namei.
707  */
708 dirrewrite(dp, ip, ndp)
709 	struct inode *dp, *ip;
710 	struct nameidata *ndp;
711 {
712 
713 	ndp->ni_dent.d_ino = ip->i_number;
714 	ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
715 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
716 	ndp->ni_iov = &ndp->ni_nd.nd_iovec;
717 	ndp->ni_iovcnt = 1;
718 	ndp->ni_rw = UIO_WRITE;
719 	ndp->ni_uioseg = UIO_SYSSPACE;
720 	return (ufs_write(ITOV(dp), &ndp->ni_uio, IO_SYNC, ndp->ni_cred));
721 }
722 
723 /*
724  * Return buffer with contents of block "offset"
725  * from the beginning of directory "ip".  If "res"
726  * is non-zero, fill it in with a pointer to the
727  * remaining space in the directory.
728  */
729 blkatoff(ip, offset, res, bpp)
730 	struct inode *ip;
731 	off_t offset;
732 	char **res;
733 	struct buf **bpp;
734 {
735 	register struct fs *fs = ip->i_fs;
736 	daddr_t lbn = lblkno(fs, offset);
737 	int bsize = blksize(fs, ip, lbn);
738 	struct buf *bp;
739 	daddr_t bn;
740 	int error;
741 
742 	*bpp = 0;
743 	if (error = bread(ITOV(ip), lbn, bsize, NOCRED, &bp)) {
744 		brelse(bp);
745 		return (error);
746 	}
747 	if (res)
748 		*res = bp->b_un.b_addr + blkoff(fs, offset);
749 	*bpp = bp;
750 	return (0);
751 }
752 
753 /*
754  * Check if a directory is empty or not.
755  * Inode supplied must be locked.
756  *
757  * Using a struct dirtemplate here is not precisely
758  * what we want, but better than using a struct direct.
759  *
760  * NB: does not handle corrupted directories.
761  */
762 dirempty(ip, parentino, cred)
763 	register struct inode *ip;
764 	ino_t parentino;
765 	struct ucred *cred;
766 {
767 	register off_t off;
768 	struct dirtemplate dbuf;
769 	register struct direct *dp = (struct direct *)&dbuf;
770 	int error, count;
771 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
772 
773 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
774 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
775 		    off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count);
776 		/*
777 		 * Since we read MINDIRSIZ, residual must
778 		 * be 0 unless we're at end of file.
779 		 */
780 		if (error || count != 0)
781 			return (0);
782 		/* avoid infinite loops */
783 		if (dp->d_reclen == 0)
784 			return (0);
785 		/* skip empty entries */
786 		if (dp->d_ino == 0)
787 			continue;
788 		/* accept only "." and ".." */
789 		if (dp->d_namlen > 2)
790 			return (0);
791 		if (dp->d_name[0] != '.')
792 			return (0);
793 		/*
794 		 * At this point d_namlen must be 1 or 2.
795 		 * 1 implies ".", 2 implies ".." if second
796 		 * char is also "."
797 		 */
798 		if (dp->d_namlen == 1)
799 			continue;
800 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
801 			continue;
802 		return (0);
803 	}
804 	return (1);
805 }
806 
807 /*
808  * Check if source directory is in the path of the target directory.
809  * Target is supplied locked, source is unlocked.
810  * The target is always iput() before returning.
811  */
812 checkpath(source, target, cred)
813 	struct inode *source, *target;
814 	struct ucred *cred;
815 {
816 	struct dirtemplate dirbuf;
817 	struct inode *ip;
818 	int error = 0;
819 
820 	ip = target;
821 	if (ip->i_number == source->i_number) {
822 		error = EEXIST;
823 		goto out;
824 	}
825 	if (ip->i_number == ROOTINO)
826 		goto out;
827 
828 	for (;;) {
829 		if ((ip->i_mode&IFMT) != IFDIR) {
830 			error = ENOTDIR;
831 			break;
832 		}
833 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)&dirbuf,
834 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
835 			IO_NODELOCKED, cred, (int *)0);
836 		if (error != 0)
837 			break;
838 		if (dirbuf.dotdot_namlen != 2 ||
839 		    dirbuf.dotdot_name[0] != '.' ||
840 		    dirbuf.dotdot_name[1] != '.') {
841 			error = ENOTDIR;
842 			break;
843 		}
844 		if (dirbuf.dotdot_ino == source->i_number) {
845 			error = EINVAL;
846 			break;
847 		}
848 		if (dirbuf.dotdot_ino == ROOTINO)
849 			break;
850 		iput(ip);
851 		if (error = iget(ip, dirbuf.dotdot_ino, &ip))
852 			break;
853 	}
854 
855 out:
856 	if (error == ENOTDIR)
857 		printf("checkpath: .. not a directory\n");
858 	if (ip != NULL)
859 		iput(ip);
860 	return (error);
861 }
862