xref: /original-bsd/sys/ufs/ffs/ufs_lookup.c (revision b7261a4b)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)ufs_lookup.c	7.22 (Berkeley) 06/19/90
18  */
19 
20 #include "param.h"
21 #include "user.h"
22 #include "buf.h"
23 #include "file.h"
24 #include "vnode.h"
25 #include "../ufs/quota.h"
26 #include "../ufs/inode.h"
27 #include "../ufs/fs.h"
28 
29 struct	nchstats nchstats;
30 int	dirchk = 1;
31 
32 /*
33  * Convert a component of a pathname into a pointer to a locked inode.
34  * This is a very central and rather complicated routine.
35  * If the file system is not maintained in a strict tree hierarchy,
36  * this can result in a deadlock situation (see comments in code below).
37  *
38  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
39  * whether the name is to be looked up, created, renamed, or deleted.
40  * When CREATE, RENAME, or DELETE is specified, information usable in
41  * creating, renaming, or deleting a directory entry may be calculated.
42  * If flag has LOCKPARENT or'ed into it and the target of the pathname
43  * exists, lookup returns both the target and its parent directory locked.
44  * When creating or renaming and LOCKPARENT is specified, the target may
45  * not be ".".  When deleting and LOCKPARENT is specified, the target may
46  * be "."., but the caller must check to ensure it does an vrele and iput
47  * instead of two iputs.
48  *
49  * Overall outline of ufs_lookup:
50  *
51  *	check accessibility of directory
52  *	look for name in cache, if found, then if at end of path
53  *	  and deleting or creating, drop it, else return name
54  *	search for name in directory, to found or notfound
55  * notfound:
56  *	if creating, return locked directory, leaving info on available slots
57  *	else return error
58  * found:
59  *	if at end of path and deleting, return information to allow delete
60  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
61  *	  inode and return info to allow rewrite
62  *	if not at end, add name to cache; if at end and neither creating
63  *	  nor deleting, add name to cache
64  *
65  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
66  */
67 ufs_lookup(vdp, ndp)
68 	register struct vnode *vdp;
69 	register struct nameidata *ndp;
70 {
71 	register struct inode *dp;	/* the directory we are searching */
72 	register struct fs *fs;		/* file system that directory is in */
73 	struct buf *bp = 0;		/* a buffer of directory entries */
74 	register struct direct *ep;	/* the current directory entry */
75 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
76 	enum {NONE, COMPACT, FOUND} slotstatus;
77 	int slotoffset = -1;		/* offset of area with free space */
78 	int slotsize;			/* size of area at slotoffset */
79 	int slotfreespace;		/* amount of space free in slot */
80 	int slotneeded;			/* size of the entry we're seeking */
81 	int numdirpasses;		/* strategy for directory search */
82 	int endsearch;			/* offset to end directory search */
83 	int prevoff;			/* ndp->ni_offset of previous entry */
84 	struct inode *pdp;		/* saved dp during symlink work */
85 	struct inode *tdp;		/* returned by iget */
86 	off_t enduseful;		/* pointer past last used dir slot */
87 	int flag;			/* LOOKUP, CREATE, RENAME, or DELETE */
88 	int lockparent;			/* 1 => lockparent flag is set */
89 	int wantparent;			/* 1 => wantparent or lockparent flag */
90 	int error;
91 
92 	ndp->ni_dvp = vdp;
93 	ndp->ni_vp = NULL;
94 	dp = VTOI(vdp);
95 	fs = dp->i_fs;
96 	lockparent = ndp->ni_nameiop & LOCKPARENT;
97 	flag = ndp->ni_nameiop & OPFLAG;
98 	wantparent = ndp->ni_nameiop & (LOCKPARENT|WANTPARENT);
99 
100 	/*
101 	 * Check accessiblity of directory.
102 	 */
103 	if ((dp->i_mode&IFMT) != IFDIR)
104 		return (ENOTDIR);
105 	if (error = ufs_access(vdp, VEXEC, ndp->ni_cred))
106 		return (error);
107 
108 	/*
109 	 * We now have a segment name to search for, and a directory to search.
110 	 *
111 	 * Before tediously performing a linear scan of the directory,
112 	 * check the name cache to see if the directory/name pair
113 	 * we are looking for is known already.
114 	 */
115 	if (error = cache_lookup(ndp)) {
116 		int vpid;	/* capability number of vnode */
117 
118 		if (error == ENOENT)
119 			return (error);
120 		if (vdp == ndp->ni_rdir && ndp->ni_isdotdot)
121 			panic("ufs_lookup: .. through root");
122 		/*
123 		 * Get the next vnode in the path.
124 		 * See comment below starting `Step through' for
125 		 * an explaination of the locking protocol.
126 		 */
127 		pdp = dp;
128 		dp = VTOI(ndp->ni_vp);
129 		vdp = ndp->ni_vp;
130 		vpid = vdp->v_id;
131 		if (pdp == dp) {
132 			VREF(vdp);
133 			error = 0;
134 		} else if (ndp->ni_isdotdot) {
135 			IUNLOCK(pdp);
136 			error = vget(vdp);
137 		} else {
138 			error = vget(vdp);
139 			IUNLOCK(pdp);
140 		}
141 		/*
142 		 * Check that the capability number did not change
143 		 * while we were waiting for the lock.
144 		 */
145 		if (!error) {
146 			if (vpid == vdp->v_id)
147 				return (0);
148 			else
149 				iput(dp);
150 		}
151 		ILOCK(pdp);
152 		dp = pdp;
153 		vdp = ITOV(dp);
154 		ndp->ni_vp = NULL;
155 	}
156 
157 	/*
158 	 * Suppress search for slots unless creating
159 	 * file and at end of pathname, in which case
160 	 * we watch for a place to put the new file in
161 	 * case it doesn't already exist.
162 	 */
163 	slotstatus = FOUND;
164 	if ((flag == CREATE || flag == RENAME) && *ndp->ni_next == 0) {
165 		slotstatus = NONE;
166 		slotfreespace = 0;
167 		slotneeded = DIRSIZ(&ndp->ni_dent);
168 	}
169 
170 	/*
171 	 * If there is cached information on a previous search of
172 	 * this directory, pick up where we last left off.
173 	 * We cache only lookups as these are the most common
174 	 * and have the greatest payoff. Caching CREATE has little
175 	 * benefit as it usually must search the entire directory
176 	 * to determine that the entry does not exist. Caching the
177 	 * location of the last DELETE or RENAME has not reduced
178 	 * profiling time and hence has been removed in the interest
179 	 * of simplicity.
180 	 */
181 	if (flag != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) {
182 		ndp->ni_offset = 0;
183 		numdirpasses = 1;
184 	} else {
185 		ndp->ni_offset = dp->i_diroff;
186 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
187 		if (entryoffsetinblock != 0) {
188 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
189 			if (error)
190 				return (error);
191 		}
192 		numdirpasses = 2;
193 		nchstats.ncs_2passes++;
194 	}
195 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
196 	enduseful = 0;
197 
198 searchloop:
199 	while (ndp->ni_offset < endsearch) {
200 		/*
201 		 * If offset is on a block boundary,
202 		 * read the next directory block.
203 		 * Release previous if it exists.
204 		 */
205 		if (blkoff(fs, ndp->ni_offset) == 0) {
206 			if (bp != NULL)
207 				brelse(bp);
208 			error = blkatoff(dp, ndp->ni_offset, (char **)0, &bp);
209 			if (error)
210 				return (error);
211 			entryoffsetinblock = 0;
212 		}
213 		/*
214 		 * If still looking for a slot, and at a DIRBLKSIZE
215 		 * boundary, have to start looking for free space again.
216 		 */
217 		if (slotstatus == NONE &&
218 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
219 			slotoffset = -1;
220 			slotfreespace = 0;
221 		}
222 		/*
223 		 * Get pointer to next entry.
224 		 * Full validation checks are slow, so we only check
225 		 * enough to insure forward progress through the
226 		 * directory. Complete checks can be run by patching
227 		 * "dirchk" to be true.
228 		 */
229 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
230 		if (ep->d_reclen == 0 ||
231 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
232 			int i;
233 
234 			dirbad(dp, ndp->ni_offset, "mangled entry");
235 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
236 			ndp->ni_offset += i;
237 			entryoffsetinblock += i;
238 			continue;
239 		}
240 
241 		/*
242 		 * If an appropriate sized slot has not yet been found,
243 		 * check to see if one is available. Also accumulate space
244 		 * in the current block so that we can determine if
245 		 * compaction is viable.
246 		 */
247 		if (slotstatus != FOUND) {
248 			int size = ep->d_reclen;
249 
250 			if (ep->d_ino != 0)
251 				size -= DIRSIZ(ep);
252 			if (size > 0) {
253 				if (size >= slotneeded) {
254 					slotstatus = FOUND;
255 					slotoffset = ndp->ni_offset;
256 					slotsize = ep->d_reclen;
257 				} else if (slotstatus == NONE) {
258 					slotfreespace += size;
259 					if (slotoffset == -1)
260 						slotoffset = ndp->ni_offset;
261 					if (slotfreespace >= slotneeded) {
262 						slotstatus = COMPACT;
263 						slotsize = ndp->ni_offset +
264 						      ep->d_reclen - slotoffset;
265 					}
266 				}
267 			}
268 		}
269 
270 		/*
271 		 * Check for a name match.
272 		 */
273 		if (ep->d_ino) {
274 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
275 			    !bcmp(ndp->ni_ptr, ep->d_name,
276 				(unsigned)ep->d_namlen)) {
277 				/*
278 				 * Save directory entry's inode number and
279 				 * reclen in ndp->ni_dent, and release
280 				 * directory buffer.
281 				 */
282 				ndp->ni_dent.d_ino = ep->d_ino;
283 				ndp->ni_dent.d_reclen = ep->d_reclen;
284 				brelse(bp);
285 				goto found;
286 			}
287 		}
288 		prevoff = ndp->ni_offset;
289 		ndp->ni_offset += ep->d_reclen;
290 		entryoffsetinblock += ep->d_reclen;
291 		if (ep->d_ino)
292 			enduseful = ndp->ni_offset;
293 	}
294 /* notfound: */
295 	/*
296 	 * If we started in the middle of the directory and failed
297 	 * to find our target, we must check the beginning as well.
298 	 */
299 	if (numdirpasses == 2) {
300 		numdirpasses--;
301 		ndp->ni_offset = 0;
302 		endsearch = dp->i_diroff;
303 		goto searchloop;
304 	}
305 	if (bp != NULL)
306 		brelse(bp);
307 	/*
308 	 * If creating, and at end of pathname and current
309 	 * directory has not been removed, then can consider
310 	 * allowing file to be created.
311 	 */
312 	if ((flag == CREATE || flag == RENAME) &&
313 	    *ndp->ni_next == 0 && dp->i_nlink != 0) {
314 		/*
315 		 * Access for write is interpreted as allowing
316 		 * creation of files in the directory.
317 		 */
318 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
319 			return (error);
320 		/*
321 		 * Return an indication of where the new directory
322 		 * entry should be put.  If we didn't find a slot,
323 		 * then set ndp->ni_count to 0 indicating that the new
324 		 * slot belongs at the end of the directory. If we found
325 		 * a slot, then the new entry can be put in the range
326 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
327 		 */
328 		if (slotstatus == NONE) {
329 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
330 			ndp->ni_count = 0;
331 			enduseful = ndp->ni_offset;
332 		} else {
333 			ndp->ni_offset = slotoffset;
334 			ndp->ni_count = slotsize;
335 			if (enduseful < slotoffset + slotsize)
336 				enduseful = slotoffset + slotsize;
337 		}
338 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
339 		dp->i_flag |= IUPD|ICHG;
340 		/*
341 		 * We return with the directory locked, so that
342 		 * the parameters we set up above will still be
343 		 * valid if we actually decide to do a direnter().
344 		 * We return ni_vp == NULL to indicate that the entry
345 		 * does not currently exist; we leave a pointer to
346 		 * the (locked) directory inode in ndp->ni_dvp.
347 		 *
348 		 * NB - if the directory is unlocked, then this
349 		 * information cannot be used.
350 		 */
351 		if (!lockparent)
352 			IUNLOCK(dp);
353 	}
354 	/*
355 	 * Insert name into cache (as non-existent) if appropriate.
356 	 */
357 	if (ndp->ni_makeentry)
358 		cache_enter(ndp);
359 	return (ENOENT);
360 
361 found:
362 	if (numdirpasses == 2)
363 		nchstats.ncs_pass2++;
364 	/*
365 	 * Check that directory length properly reflects presence
366 	 * of this entry.
367 	 */
368 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
369 		dirbad(dp, ndp->ni_offset, "i_size too small");
370 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
371 		dp->i_flag |= IUPD|ICHG;
372 	}
373 
374 	/*
375 	 * Found component in pathname.
376 	 * If the final component of path name, save information
377 	 * in the cache as to where the entry was found.
378 	 */
379 	if (*ndp->ni_next == '\0' && flag == LOOKUP)
380 		dp->i_diroff = ndp->ni_offset &~ (DIRBLKSIZ - 1);
381 
382 	/*
383 	 * If deleting, and at end of pathname, return
384 	 * parameters which can be used to remove file.
385 	 * If the wantparent flag isn't set, we return only
386 	 * the directory (in ndp->ni_dvp), otherwise we go
387 	 * on and lock the inode, being careful with ".".
388 	 */
389 	if (flag == DELETE && *ndp->ni_next == 0) {
390 		/*
391 		 * Write access to directory required to delete files.
392 		 */
393 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
394 			return (error);
395 		/*
396 		 * Return pointer to current entry in ndp->ni_offset,
397 		 * and distance past previous entry (if there
398 		 * is a previous entry in this block) in ndp->ni_count.
399 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
400 		 */
401 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
402 			ndp->ni_count = 0;
403 		else
404 			ndp->ni_count = ndp->ni_offset - prevoff;
405 		if (dp->i_number == ndp->ni_dent.d_ino) {
406 			VREF(vdp);
407 			ndp->ni_vp = vdp;
408 			return (0);
409 		}
410 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
411 			return (error);
412 		/*
413 		 * If directory is "sticky", then user must own
414 		 * the directory, or the file in it, else she
415 		 * may not delete it (unless she's root). This
416 		 * implements append-only directories.
417 		 */
418 		if ((dp->i_mode & ISVTX) &&
419 		    ndp->ni_cred->cr_uid != 0 &&
420 		    ndp->ni_cred->cr_uid != dp->i_uid &&
421 		    tdp->i_uid != ndp->ni_cred->cr_uid) {
422 			iput(tdp);
423 			return (EPERM);
424 		}
425 		ndp->ni_vp = ITOV(tdp);
426 		if (!lockparent)
427 			IUNLOCK(dp);
428 		return (0);
429 	}
430 
431 	/*
432 	 * If rewriting (RENAME), return the inode and the
433 	 * information required to rewrite the present directory
434 	 * Must get inode of directory entry to verify it's a
435 	 * regular file, or empty directory.
436 	 */
437 	if (flag == RENAME && wantparent && *ndp->ni_next == 0) {
438 		if (error = ufs_access(vdp, VWRITE, ndp->ni_cred))
439 			return (error);
440 		/*
441 		 * Careful about locking second inode.
442 		 * This can only occur if the target is ".".
443 		 */
444 		if (dp->i_number == ndp->ni_dent.d_ino)
445 			return (EISDIR);
446 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
447 			return (error);
448 		ndp->ni_vp = ITOV(tdp);
449 		if (!lockparent)
450 			IUNLOCK(dp);
451 		return (0);
452 	}
453 
454 	/*
455 	 * Step through the translation in the name.  We do not `iput' the
456 	 * directory because we may need it again if a symbolic link
457 	 * is relative to the current directory.  Instead we save it
458 	 * unlocked as "pdp".  We must get the target inode before unlocking
459 	 * the directory to insure that the inode will not be removed
460 	 * before we get it.  We prevent deadlock by always fetching
461 	 * inodes from the root, moving down the directory tree. Thus
462 	 * when following backward pointers ".." we must unlock the
463 	 * parent directory before getting the requested directory.
464 	 * There is a potential race condition here if both the current
465 	 * and parent directories are removed before the `iget' for the
466 	 * inode associated with ".." returns.  We hope that this occurs
467 	 * infrequently since we cannot avoid this race condition without
468 	 * implementing a sophisticated deadlock detection algorithm.
469 	 * Note also that this simple deadlock detection scheme will not
470 	 * work if the file system has any hard links other than ".."
471 	 * that point backwards in the directory structure.
472 	 */
473 	pdp = dp;
474 	if (ndp->ni_isdotdot) {
475 		IUNLOCK(pdp);	/* race to get the inode */
476 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp)) {
477 			ILOCK(pdp);
478 			return (error);
479 		}
480 		if (lockparent && *ndp->ni_next == '\0')
481 			ILOCK(pdp);
482 		ndp->ni_vp = ITOV(tdp);
483 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
484 		VREF(vdp);	/* we want ourself, ie "." */
485 		ndp->ni_vp = vdp;
486 	} else {
487 		if (error = iget(dp, ndp->ni_dent.d_ino, &tdp))
488 			return (error);
489 		if (!lockparent || *ndp->ni_next != '\0')
490 			IUNLOCK(pdp);
491 		ndp->ni_vp = ITOV(tdp);
492 	}
493 
494 	/*
495 	 * Insert name into cache if appropriate.
496 	 */
497 	if (ndp->ni_makeentry)
498 		cache_enter(ndp);
499 	return (0);
500 }
501 
502 
503 dirbad(ip, offset, how)
504 	struct inode *ip;
505 	off_t offset;
506 	char *how;
507 {
508 
509 	printf("%s: bad dir ino %d at offset %d: %s\n",
510 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
511 	panic("bad dir");
512 }
513 
514 /*
515  * Do consistency checking on a directory entry:
516  *	record length must be multiple of 4
517  *	entry must fit in rest of its DIRBLKSIZ block
518  *	record must be large enough to contain entry
519  *	name is not longer than MAXNAMLEN
520  *	name must be as long as advertised, and null terminated
521  */
522 dirbadentry(ep, entryoffsetinblock)
523 	register struct direct *ep;
524 	int entryoffsetinblock;
525 {
526 	register int i;
527 
528 	if ((ep->d_reclen & 0x3) != 0 ||
529 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
530 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
531 		return (1);
532 	for (i = 0; i < ep->d_namlen; i++)
533 		if (ep->d_name[i] == '\0')
534 			return (1);
535 	return (ep->d_name[i]);
536 }
537 
538 /*
539  * Write a directory entry after a call to namei, using the parameters
540  * which it left in nameidata.  The argument ip is the inode which the
541  * new directory entry will refer to.  The nameidata field ndp->ni_dvp
542  * is a pointer to the directory to be written, which was left locked by
543  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
544  * how the space for the new entry is to be gotten.
545  */
546 direnter(ip, ndp)
547 	struct inode *ip;
548 	register struct nameidata *ndp;
549 {
550 	register struct direct *ep, *nep;
551 	register struct inode *dp = VTOI(ndp->ni_dvp);
552 	struct buf *bp;
553 	int loc, spacefree, error = 0;
554 	u_int dsize;
555 	int newentrysize;
556 	char *dirbuf;
557 
558 	ndp->ni_dent.d_ino = ip->i_number;
559 	newentrysize = DIRSIZ(&ndp->ni_dent);
560 	if (ndp->ni_count == 0) {
561 		/*
562 		 * If ndp->ni_count is 0, then namei could find no space in the
563 		 * directory. In this case ndp->ni_offset will be on a directory
564 		 * block boundary and we will write the new entry into a fresh
565 		 * block.
566 		 */
567 		if (ndp->ni_offset&(DIRBLKSIZ-1))
568 			panic("wdir: newblk");
569 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
570 		ndp->ni_count = newentrysize;
571 		ndp->ni_resid = newentrysize;
572 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
573 		ndp->ni_uioseg = UIO_SYSSPACE;
574 		error =
575 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
576 		if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
577 			panic("wdir: blksize"); /* XXX - should grow w/balloc */
578 		} else {
579 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
580 			dp->i_flag |= ICHG;
581 		}
582 		iput(dp);
583 		return (error);
584 	}
585 
586 	/*
587 	 * If ndp->ni_count is non-zero, then namei found space for the new
588 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
589 	 * in the directory.  To use this space, we may have to compact
590 	 * the entries located there, by copying them together towards
591 	 * the beginning of the block, leaving the free space in
592 	 * one usable chunk at the end.
593 	 */
594 
595 	/*
596 	 * Increase size of directory if entry eats into new space.
597 	 * This should never push the size past a new multiple of
598 	 * DIRBLKSIZE.
599 	 *
600 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
601 	 */
602 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
603 		dp->i_size = ndp->ni_offset + ndp->ni_count;
604 	/*
605 	 * Get the block containing the space for the new directory entry.
606 	 */
607 	if (error = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf, &bp)) {
608 		iput(dp);
609 		return (error);
610 	}
611 	/*
612 	 * Find space for the new entry.  In the simple case, the
613 	 * entry at offset base will have the space.  If it does
614 	 * not, then namei arranged that compacting the region
615 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
616 	 */
617 	ep = (struct direct *)dirbuf;
618 	dsize = DIRSIZ(ep);
619 	spacefree = ep->d_reclen - dsize;
620 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
621 		nep = (struct direct *)(dirbuf + loc);
622 		if (ep->d_ino) {
623 			/* trim the existing slot */
624 			ep->d_reclen = dsize;
625 			ep = (struct direct *)((char *)ep + dsize);
626 		} else {
627 			/* overwrite; nothing there; header is ours */
628 			spacefree += dsize;
629 		}
630 		dsize = DIRSIZ(nep);
631 		spacefree += nep->d_reclen - dsize;
632 		loc += nep->d_reclen;
633 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
634 	}
635 	/*
636 	 * Update the pointer fields in the previous entry (if any),
637 	 * copy in the new entry, and write out the block.
638 	 */
639 	if (ep->d_ino == 0) {
640 		if (spacefree + dsize < newentrysize)
641 			panic("wdir: compact1");
642 		ndp->ni_dent.d_reclen = spacefree + dsize;
643 	} else {
644 		if (spacefree < newentrysize)
645 			panic("wdir: compact2");
646 		ndp->ni_dent.d_reclen = spacefree;
647 		ep->d_reclen = dsize;
648 		ep = (struct direct *)((char *)ep + dsize);
649 	}
650 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
651 	error = bwrite(bp);
652 	dp->i_flag |= IUPD|ICHG;
653 	if (!error && ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
654 		error = itrunc(dp, (u_long)ndp->ni_endoff, IO_SYNC);
655 	iput(dp);
656 	return (error);
657 }
658 
659 /*
660  * Remove a directory entry after a call to namei, using
661  * the parameters which it left in nameidata. The entry
662  * ni_offset contains the offset into the directory of the
663  * entry to be eliminated.  The ni_count field contains the
664  * size of the previous record in the directory.  If this
665  * is 0, the first entry is being deleted, so we need only
666  * zero the inode number to mark the entry as free.  If the
667  * entry isn't the first in the directory, we must reclaim
668  * the space of the now empty record by adding the record size
669  * to the size of the previous entry.
670  */
671 dirremove(ndp)
672 	register struct nameidata *ndp;
673 {
674 	register struct inode *dp = VTOI(ndp->ni_dvp);
675 	struct direct *ep;
676 	struct buf *bp;
677 	int error;
678 
679 	if (ndp->ni_count == 0) {
680 		/*
681 		 * First entry in block: set d_ino to zero.
682 		 */
683 		ndp->ni_dent.d_ino = 0;
684 		ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
685 		ndp->ni_base = (caddr_t)&ndp->ni_dent;
686 		ndp->ni_uioseg = UIO_SYSSPACE;
687 		error =
688 		    ufs_write(ndp->ni_dvp, &ndp->ni_uio, IO_SYNC, ndp->ni_cred);
689 	} else {
690 		/*
691 		 * Collapse new free space into previous entry.
692 		 */
693 		if (error = blkatoff(dp, ndp->ni_offset - ndp->ni_count,
694 		    (char **)&ep, &bp)) {
695 			return (error);
696 		}
697 		ep->d_reclen += ndp->ni_dent.d_reclen;
698 		error = bwrite(bp);
699 		dp->i_flag |= IUPD|ICHG;
700 	}
701 	return (error);
702 }
703 
704 /*
705  * Rewrite an existing directory entry to point at the inode
706  * supplied.  The parameters describing the directory entry are
707  * set up by a call to namei.
708  */
709 dirrewrite(dp, ip, ndp)
710 	struct inode *dp, *ip;
711 	struct nameidata *ndp;
712 {
713 
714 	ndp->ni_dent.d_ino = ip->i_number;
715 	ndp->ni_count = ndp->ni_resid = DIRSIZ(&ndp->ni_dent);
716 	ndp->ni_base = (caddr_t)&ndp->ni_dent;
717 	ndp->ni_uioseg = UIO_SYSSPACE;
718 	return (ufs_write(ITOV(dp), &ndp->ni_uio, IO_SYNC, ndp->ni_cred));
719 }
720 
721 /*
722  * Return buffer with contents of block "offset"
723  * from the beginning of directory "ip".  If "res"
724  * is non-zero, fill it in with a pointer to the
725  * remaining space in the directory.
726  */
727 blkatoff(ip, offset, res, bpp)
728 	struct inode *ip;
729 	off_t offset;
730 	char **res;
731 	struct buf **bpp;
732 {
733 	register struct fs *fs = ip->i_fs;
734 	daddr_t lbn = lblkno(fs, offset);
735 	int bsize = blksize(fs, ip, lbn);
736 	struct buf *bp;
737 	daddr_t bn;
738 	int error;
739 
740 	*bpp = 0;
741 	if (error = bread(ITOV(ip), lbn, bsize, NOCRED, &bp)) {
742 		brelse(bp);
743 		return (error);
744 	}
745 	if (res)
746 		*res = bp->b_un.b_addr + blkoff(fs, offset);
747 	*bpp = bp;
748 	return (0);
749 }
750 
751 /*
752  * Check if a directory is empty or not.
753  * Inode supplied must be locked.
754  *
755  * Using a struct dirtemplate here is not precisely
756  * what we want, but better than using a struct direct.
757  *
758  * NB: does not handle corrupted directories.
759  */
760 dirempty(ip, parentino, cred)
761 	register struct inode *ip;
762 	ino_t parentino;
763 	struct ucred *cred;
764 {
765 	register off_t off;
766 	struct dirtemplate dbuf;
767 	register struct direct *dp = (struct direct *)&dbuf;
768 	int error, count;
769 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
770 
771 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
772 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
773 		    off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count);
774 		/*
775 		 * Since we read MINDIRSIZ, residual must
776 		 * be 0 unless we're at end of file.
777 		 */
778 		if (error || count != 0)
779 			return (0);
780 		/* avoid infinite loops */
781 		if (dp->d_reclen == 0)
782 			return (0);
783 		/* skip empty entries */
784 		if (dp->d_ino == 0)
785 			continue;
786 		/* accept only "." and ".." */
787 		if (dp->d_namlen > 2)
788 			return (0);
789 		if (dp->d_name[0] != '.')
790 			return (0);
791 		/*
792 		 * At this point d_namlen must be 1 or 2.
793 		 * 1 implies ".", 2 implies ".." if second
794 		 * char is also "."
795 		 */
796 		if (dp->d_namlen == 1)
797 			continue;
798 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
799 			continue;
800 		return (0);
801 	}
802 	return (1);
803 }
804 
805 /*
806  * Check if source directory is in the path of the target directory.
807  * Target is supplied locked, source is unlocked.
808  * The target is always iput() before returning.
809  */
810 checkpath(source, target, cred)
811 	struct inode *source, *target;
812 	struct ucred *cred;
813 {
814 	struct dirtemplate dirbuf;
815 	struct inode *ip;
816 	int error = 0;
817 
818 	ip = target;
819 	if (ip->i_number == source->i_number) {
820 		error = EEXIST;
821 		goto out;
822 	}
823 	if (ip->i_number == ROOTINO)
824 		goto out;
825 
826 	for (;;) {
827 		if ((ip->i_mode&IFMT) != IFDIR) {
828 			error = ENOTDIR;
829 			break;
830 		}
831 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)&dirbuf,
832 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
833 			IO_NODELOCKED, cred, (int *)0);
834 		if (error != 0)
835 			break;
836 		if (dirbuf.dotdot_namlen != 2 ||
837 		    dirbuf.dotdot_name[0] != '.' ||
838 		    dirbuf.dotdot_name[1] != '.') {
839 			error = ENOTDIR;
840 			break;
841 		}
842 		if (dirbuf.dotdot_ino == source->i_number) {
843 			error = EINVAL;
844 			break;
845 		}
846 		if (dirbuf.dotdot_ino == ROOTINO)
847 			break;
848 		iput(ip);
849 		if (error = iget(ip, dirbuf.dotdot_ino, &ip))
850 			break;
851 	}
852 
853 out:
854 	if (error == ENOTDIR)
855 		printf("checkpath: .. not a directory\n");
856 	if (ip != NULL)
857 		iput(ip);
858 	return (error);
859 }
860