xref: /openbsd/sys/ufs/ufs/ufs_lookup.c (revision 81fb472f)
1 /*	$OpenBSD: ufs_lookup.c,v 1.61 2024/02/03 18:51:58 beck Exp $	*/
2 /*	$NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)ufs_lookup.c	8.9 (Berkeley) 8/11/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/namei.h>
44 #include <sys/buf.h>
45 #include <sys/stat.h>
46 #include <sys/mount.h>
47 #include <sys/proc.h>
48 #include <sys/vnode.h>
49 
50 #include <ufs/ufs/quota.h>
51 #include <ufs/ufs/inode.h>
52 #include <ufs/ufs/dir.h>
53 #ifdef UFS_DIRHASH
54 #include <ufs/ufs/dirhash.h>
55 #endif
56 #include <ufs/ufs/ufsmount.h>
57 #include <ufs/ufs/ufs_extern.h>
58 
59 extern	struct nchstats nchstats;
60 
61 #ifdef DIAGNOSTIC
62 int	dirchk = 1;
63 #else
64 int	dirchk = 0;
65 #endif
66 
67 /*
68  * Convert a component of a pathname into a pointer to a locked inode.
69  * This is a very central and rather complicated routine.
70  * If the file system is not maintained in a strict tree hierarchy,
71  * this can result in a deadlock situation (see comments in code below).
72  *
73  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
74  * on whether the name is to be looked up, created, renamed, or deleted.
75  * When CREATE, RENAME, or DELETE is specified, information usable in
76  * creating, renaming, or deleting a directory entry may be calculated.
77  * If flag has LOCKPARENT or'ed into it and the target of the pathname
78  * exists, lookup returns both the target and its parent directory locked.
79  * When creating or renaming and LOCKPARENT is specified, the target may
80  * not be ".".  When deleting and LOCKPARENT is specified, the target may
81  * be "."., but the caller must check to ensure it does an vrele and vput
82  * instead of two vputs.
83  *
84  * Overall outline of ufs_lookup:
85  *
86  *	check accessibility of directory
87  *	look for name in cache, if found, then if at end of path
88  *	  and deleting or creating, drop it, else return name
89  *	search for name in directory, to found or notfound
90  * notfound:
91  *	if creating, return locked directory, leaving info on available slots
92  *	else return error
93  * found:
94  *	if at end of path and deleting, return information to allow delete
95  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
96  *	  inode and return info to allow rewrite
97  *	if not at end, add name to cache; if at end and neither creating
98  *	  nor deleting, add name to cache
99  */
100 int
ufs_lookup(void * v)101 ufs_lookup(void *v)
102 {
103 	struct vop_lookup_args *ap = v;
104 	struct vnode *vdp;		/* vnode for directory being searched */
105 	struct inode *dp;		/* inode for directory being searched */
106 	struct buf *bp;			/* a buffer of directory entries */
107 	struct direct *ep;		/* the current directory entry */
108 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
109 	enum {NONE, COMPACT, FOUND} slotstatus;
110 	doff_t slotoffset;		/* offset of area with free space */
111 	int slotsize;			/* size of area at slotoffset */
112 	int slotfreespace;		/* amount of space free in slot */
113 	int slotneeded;			/* size of the entry we're seeking */
114 	int numdirpasses;		/* strategy for directory search */
115 	doff_t endsearch;		/* offset to end directory search */
116 	doff_t prevoff;			/* prev entry dp->i_offset */
117 	struct vnode *pdp;		/* saved dp during symlink work */
118 	struct vnode *tdp;		/* returned by VFS_VGET */
119 	doff_t enduseful;		/* pointer past last used dir slot */
120 	u_long bmask;			/* block offset mask */
121 	int lockparent;			/* 1 => lockparent flag is set */
122 	int wantparent;			/* 1 => wantparent or lockparent flag */
123 	int namlen, error;
124 	struct vnode **vpp = ap->a_vpp;
125 	struct componentname *cnp = ap->a_cnp;
126 	struct ucred *cred = cnp->cn_cred;
127 	int flags;
128 	int nameiop = cnp->cn_nameiop;
129 
130 	cnp->cn_flags &= ~PDIRUNLOCK;
131 	flags = cnp->cn_flags;
132 
133 	bp = NULL;
134 	slotoffset = -1;
135 	*vpp = NULL;
136 	vdp = ap->a_dvp;
137 	dp = VTOI(vdp);
138 	lockparent = flags & LOCKPARENT;
139 	wantparent = flags & (LOCKPARENT|WANTPARENT);
140 
141 	/*
142 	 * Check accessibility of directory.
143 	 */
144 	if ((DIP(dp, mode) & IFMT) != IFDIR)
145 		return (ENOTDIR);
146 	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
147 		return (error);
148 
149 	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
150 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
151 		return (EROFS);
152 
153 	/*
154 	 * We now have a segment name to search for, and a directory to search.
155 	 *
156 	 * Before tediously performing a linear scan of the directory,
157 	 * check the name cache to see if the directory/name pair
158 	 * we are looking for is known already.
159 	 */
160 	if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
161 		return (error);
162 
163 	/*
164 	 * Suppress search for slots unless creating
165 	 * file and at end of pathname, in which case
166 	 * we watch for a place to put the new file in
167 	 * case it doesn't already exist.
168 	 */
169 	slotstatus = FOUND;
170 	slotfreespace = slotsize = slotneeded = 0;
171 	if ((nameiop == CREATE || nameiop == RENAME) &&
172 	    (flags & ISLASTCN)) {
173 		slotstatus = NONE;
174 		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
175 			cnp->cn_namelen + 3) &~ 3;
176 	}
177 
178 	/*
179 	 * If there is cached information on a previous search of
180 	 * this directory, pick up where we last left off.
181 	 * We cache only lookups as these are the most common
182 	 * and have the greatest payoff. Caching CREATE has little
183 	 * benefit as it usually must search the entire directory
184 	 * to determine that the entry does not exist. Caching the
185 	 * location of the last DELETE or RENAME has not reduced
186 	 * profiling time and hence has been removed in the interest
187 	 * of simplicity.
188 	 */
189 	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
190 
191 #ifdef UFS_DIRHASH
192 	/*
193 	 * Use dirhash for fast operations on large directories. The logic
194 	 * to determine whether to hash the directory is contained within
195 	 * ufsdirhash_build(); a zero return means that it decided to hash
196 	 * this directory and it successfully built up the hash table.
197 	 */
198 	if (ufsdirhash_build(dp) == 0) {
199 		/* Look for a free slot if needed. */
200 		enduseful = DIP(dp, size);
201 		if (slotstatus != FOUND) {
202 			slotoffset = ufsdirhash_findfree(dp, slotneeded,
203 			    &slotsize);
204 			if (slotoffset >= 0) {
205 				slotstatus = COMPACT;
206 				enduseful = ufsdirhash_enduseful(dp);
207 				if (enduseful < 0)
208 					enduseful = DIP(dp, size);
209 			}
210 		}
211 		/* Look up the component. */
212 		numdirpasses = 1;
213 		entryoffsetinblock = 0; /* silence compiler warning */
214 		switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
215 		    &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
216 		case 0:
217 			ep = (struct direct *)((char *)bp->b_data +
218 			    (dp->i_offset & bmask));
219 			goto foundentry;
220 		case ENOENT:
221 #define roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
222 			dp->i_offset = roundup2(DIP(dp, size), DIRBLKSIZ);
223 			goto notfound;
224 		default:
225 			/* Something failed; just do a linear search. */
226 			break;
227 		}
228 	}
229 #endif /* UFS_DIRHASH */
230 
231 	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
232 	    dp->i_diroff >= DIP(dp, size)) {
233 		entryoffsetinblock = 0;
234 		dp->i_offset = 0;
235 		numdirpasses = 1;
236 	} else {
237 		dp->i_offset = dp->i_diroff;
238 		if ((entryoffsetinblock = dp->i_offset & bmask) &&
239 		    (error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp)))
240 			return (error);
241 		numdirpasses = 2;
242 		nchstats.ncs_2passes++;
243 	}
244 	prevoff = dp->i_offset;
245 	endsearch = roundup(DIP(dp, size), DIRBLKSIZ);
246 	enduseful = 0;
247 
248 searchloop:
249 	while (dp->i_offset < endsearch) {
250 		/*
251 		 * If necessary, get the next directory block.
252 		 */
253 		if ((dp->i_offset & bmask) == 0) {
254 			if (bp != NULL)
255 				brelse(bp);
256 			error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL,
257 					     &bp);
258 			if (error)
259 				return (error);
260 			entryoffsetinblock = 0;
261 		}
262 		/*
263 		 * If still looking for a slot, and at a DIRBLKSIZE
264 		 * boundary, have to start looking for free space again.
265 		 */
266 		if (slotstatus == NONE &&
267 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
268 			slotoffset = -1;
269 			slotfreespace = 0;
270 		}
271 		/*
272 		 * Get pointer to next entry.
273 		 * Full validation checks are slow, so we only check
274 		 * enough to insure forward progress through the
275 		 * directory. Complete checks can be run by patching
276 		 * "dirchk" to be true.
277 		 */
278 		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
279 		if (ep->d_reclen == 0 ||
280 		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
281 			int i;
282 
283 			ufs_dirbad(dp, dp->i_offset, "mangled entry");
284 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
285 			dp->i_offset += i;
286 			entryoffsetinblock += i;
287 			continue;
288 		}
289 
290 		/*
291 		 * If an appropriate sized slot has not yet been found,
292 		 * check to see if one is available. Also accumulate space
293 		 * in the current block so that we can determine if
294 		 * compaction is viable.
295 		 */
296 		if (slotstatus != FOUND) {
297 			int size = ep->d_reclen;
298 
299 			if (ep->d_ino != 0)
300 				size -= DIRSIZ(ep);
301 			if (size > 0) {
302 				if (size >= slotneeded) {
303 					slotstatus = FOUND;
304 					slotoffset = dp->i_offset;
305 					slotsize = ep->d_reclen;
306 				} else if (slotstatus == NONE) {
307 					slotfreespace += size;
308 					if (slotoffset == -1)
309 						slotoffset = dp->i_offset;
310 					if (slotfreespace >= slotneeded) {
311 						slotstatus = COMPACT;
312 						slotsize = dp->i_offset +
313 						      ep->d_reclen - slotoffset;
314 					}
315 				}
316 			}
317 		}
318 
319 		/*
320 		 * Check for a name match.
321 		 */
322 		if (ep->d_ino) {
323 			namlen = ep->d_namlen;
324 			if (namlen == cnp->cn_namelen &&
325 			    !memcmp(cnp->cn_nameptr, ep->d_name, namlen)) {
326 #ifdef UFS_DIRHASH
327 foundentry:
328 #endif
329 				/*
330 				 * Save directory entry's inode number and
331 				 * reclen in ndp->ni_ufs area, and release
332 				 * directory buffer.
333 				 */
334 				dp->i_ino = ep->d_ino;
335 				dp->i_reclen = ep->d_reclen;
336 				goto found;
337 			}
338 		}
339 		prevoff = dp->i_offset;
340 		dp->i_offset += ep->d_reclen;
341 		entryoffsetinblock += ep->d_reclen;
342 		if (ep->d_ino)
343 			enduseful = dp->i_offset;
344 	}
345 #ifdef UFS_DIRHASH
346 notfound:
347 #endif
348 	/*
349 	 * If we started in the middle of the directory and failed
350 	 * to find our target, we must check the beginning as well.
351 	 */
352 	if (numdirpasses == 2) {
353 		numdirpasses--;
354 		dp->i_offset = 0;
355 		endsearch = dp->i_diroff;
356 		goto searchloop;
357 	}
358 	if (bp != NULL)
359 		brelse(bp);
360 	/*
361 	 * If creating, and at end of pathname and current
362 	 * directory has not been removed, then can consider
363 	 * allowing file to be created.
364 	 */
365 	if ((nameiop == CREATE || nameiop == RENAME) &&
366 	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
367 		/*
368 		 * Access for write is interpreted as allowing
369 		 * creation of files in the directory.
370 		 */
371 		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
372 		if (error)
373 			return (error);
374 		/*
375 		 * Return an indication of where the new directory
376 		 * entry should be put.  If we didn't find a slot,
377 		 * then set dp->i_count to 0 indicating
378 		 * that the new slot belongs at the end of the
379 		 * directory. If we found a slot, then the new entry
380 		 * can be put in the range from dp->i_offset to
381 		 * dp->i_offset + dp->i_count.
382 		 */
383 		if (slotstatus == NONE) {
384 			dp->i_offset = roundup(DIP(dp, size), DIRBLKSIZ);
385 			dp->i_count = 0;
386 			enduseful = dp->i_offset;
387 		} else if (nameiop == DELETE) {
388 			dp->i_offset = slotoffset;
389 			if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
390 				dp->i_count = 0;
391 			else
392 				dp->i_count = dp->i_offset - prevoff;
393 		} else {
394 			dp->i_offset = slotoffset;
395 			dp->i_count = slotsize;
396 			if (enduseful < slotoffset + slotsize)
397 				enduseful = slotoffset + slotsize;
398 		}
399 		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
400 		/*
401 		 * We return with the directory locked, so that
402 		 * the parameters we set up above will still be
403 		 * valid if we actually decide to do a direnter().
404 		 * We return ni_vp == NULL to indicate that the entry
405 		 * does not currently exist; we leave a pointer to
406 		 * the (locked) directory inode in ndp->ni_dvp.
407 		 * The pathname buffer is saved so that the name
408 		 * can be obtained later.
409 		 *
410 		 * NB - if the directory is unlocked, then this
411 		 * information cannot be used.
412 		 */
413 		cnp->cn_flags |= SAVENAME;
414 		if (!lockparent) {
415 			VOP_UNLOCK(vdp);
416 			cnp->cn_flags |= PDIRUNLOCK;
417 		}
418 		return (EJUSTRETURN);
419 	}
420 	/*
421 	 * Insert name into cache (as non-existent) if appropriate.
422 	 */
423 	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
424 		cache_enter(vdp, *vpp, cnp);
425 	return (ENOENT);
426 
427 found:
428 	if (numdirpasses == 2)
429 		nchstats.ncs_pass2++;
430 	/*
431 	 * Check that directory length properly reflects presence
432 	 * of this entry.
433 	 */
434 	if (dp->i_offset + DIRSIZ(ep) > DIP(dp, size)) {
435 		ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small");
436 		DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(ep));
437 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
438 	}
439 	brelse(bp);
440 
441 	/*
442 	 * Found component in pathname.
443 	 * If the final component of path name, save information
444 	 * in the cache as to where the entry was found.
445 	 */
446 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
447 		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
448 
449 	/*
450 	 * If deleting, and at end of pathname, return
451 	 * parameters which can be used to remove file.
452 	 * If the wantparent flag isn't set, we return only
453 	 * the directory (in ndp->ni_dvp), otherwise we go
454 	 * on and lock the inode, being careful with ".".
455 	 */
456 	if (nameiop == DELETE && (flags & ISLASTCN)) {
457 		/*
458 		 * Write access to directory required to delete files.
459 		 */
460 		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
461 		if (error)
462 			return (error);
463 		/*
464 		 * Return pointer to current entry in dp->i_offset,
465 		 * and distance past previous entry (if there
466 		 * is a previous entry in this block) in dp->i_count.
467 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
468 		 */
469 		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
470 			dp->i_count = 0;
471 		else
472 			dp->i_count = dp->i_offset - prevoff;
473 		if (dp->i_number == dp->i_ino) {
474 			vref(vdp);
475 			*vpp = vdp;
476 			return (0);
477 		}
478 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
479 		if (error)
480 			return (error);
481 		/*
482 		 * If directory is "sticky", then user must own
483 		 * the directory, or the file in it, else she
484 		 * may not delete it (unless she's root). This
485 		 * implements append-only directories.
486 		 */
487 		if ((DIP(dp, mode) & ISVTX) &&
488 		    cred->cr_uid != 0 &&
489 		    cred->cr_uid != DIP(dp, uid) &&
490 		    !vnoperm(vdp) &&
491 		    DIP(VTOI(tdp), uid) != cred->cr_uid) {
492 			vput(tdp);
493 			return (EPERM);
494 		}
495 		*vpp = tdp;
496 		if (!lockparent) {
497 			VOP_UNLOCK(vdp);
498 			cnp->cn_flags |= PDIRUNLOCK;
499 		}
500 		return (0);
501 	}
502 
503 	/*
504 	 * If rewriting (RENAME), return the inode and the
505 	 * information required to rewrite the present directory
506 	 * Must get inode of directory entry to verify it's a
507 	 * regular file, or empty directory.
508 	 */
509 	if (nameiop == RENAME && wantparent &&
510 	    (flags & ISLASTCN)) {
511 		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
512 		if (error)
513 			return (error);
514 		/*
515 		 * Careful about locking second inode.
516 		 * This can only occur if the target is ".".
517 		 */
518 		if (dp->i_number == dp->i_ino)
519 			return (EISDIR);
520 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
521 		if (error)
522 			return (error);
523 		*vpp = tdp;
524 		cnp->cn_flags |= SAVENAME;
525 		if (!lockparent) {
526 			VOP_UNLOCK(vdp);
527 			cnp->cn_flags |= PDIRUNLOCK;
528 		}
529 		return (0);
530 	}
531 
532 	/*
533 	 * Step through the translation in the name.  We do not `vput' the
534 	 * directory because we may need it again if a symbolic link
535 	 * is relative to the current directory.  Instead we save it
536 	 * unlocked as "pdp".  We must get the target inode before unlocking
537 	 * the directory to insure that the inode will not be removed
538 	 * before we get it.  We prevent deadlock by always fetching
539 	 * inodes from the root, moving down the directory tree. Thus
540 	 * when following backward pointers ".." we must unlock the
541 	 * parent directory before getting the requested directory.
542 	 * There is a potential race condition here if both the current
543 	 * and parent directories are removed before the VFS_VGET for the
544 	 * inode associated with ".." returns.  We hope that this occurs
545 	 * infrequently since we cannot avoid this race condition without
546 	 * implementing a sophisticated deadlock detection algorithm.
547 	 * Note also that this simple deadlock detection scheme will not
548 	 * work if the file system has any hard links other than ".."
549 	 * that point backwards in the directory structure.
550 	 */
551 	pdp = vdp;
552 	if (flags & ISDOTDOT) {
553 		VOP_UNLOCK(pdp);	/* race to get the inode */
554 		cnp->cn_flags |= PDIRUNLOCK;
555 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
556 		if (error) {
557 			if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY) == 0)
558 				cnp->cn_flags &= ~PDIRUNLOCK;
559 			return (error);
560 		}
561 		if (lockparent && (flags & ISLASTCN)) {
562 			if ((error = vn_lock(pdp, LK_EXCLUSIVE))) {
563 				vput(tdp);
564 				return (error);
565 			}
566 			cnp->cn_flags &= ~PDIRUNLOCK;
567 		}
568 		*vpp = tdp;
569 	} else if (dp->i_number == dp->i_ino) {
570 		vref(vdp);	/* we want ourself, ie "." */
571 		*vpp = vdp;
572 	} else {
573 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
574 		if (error)
575 			return (error);
576 		if (!lockparent || !(flags & ISLASTCN)) {
577 			VOP_UNLOCK(pdp);
578 			cnp->cn_flags |= PDIRUNLOCK;
579 		}
580 		*vpp = tdp;
581 	}
582 
583 	/*
584 	 * Insert name into cache if appropriate.
585 	 */
586 	if (cnp->cn_flags & MAKEENTRY)
587 		cache_enter(vdp, *vpp, cnp);
588 	return (0);
589 }
590 
591 void
ufs_dirbad(struct inode * ip,doff_t offset,char * how)592 ufs_dirbad(struct inode *ip, doff_t offset, char *how)
593 {
594 	struct mount *mp;
595 
596 	mp = ITOV(ip)->v_mount;
597 	(void)printf("%s: bad dir ino %u at offset %d: %s\n",
598 	    mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
599 	if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
600 		panic("bad dir");
601 }
602 
603 /*
604  * Do consistency checking on a directory entry:
605  *	record length must be multiple of 4
606  *	entry must fit in rest of its DIRBLKSIZ block
607  *	record must be large enough to contain entry
608  *	name is not longer than MAXNAMLEN
609  *	name must be as long as advertised, and null terminated
610  */
611 int
ufs_dirbadentry(struct vnode * vdp,struct direct * ep,int entryoffsetinblock)612 ufs_dirbadentry(struct vnode *vdp, struct direct *ep, int entryoffsetinblock)
613 {
614 	struct inode *dp;
615 	int i;
616 	int namlen;
617 
618 	dp = VTOI(vdp);
619 
620 	namlen = ep->d_namlen;
621 	if ((ep->d_reclen & 0x3) != 0 ||
622 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
623 	    ep->d_reclen < DIRSIZ(ep) || namlen > MAXNAMLEN) {
624 		/*return (1); */
625 		printf("First bad\n");
626 		goto bad;
627 	}
628 	if (ep->d_ino == 0)
629 		return (0);
630 	for (i = 0; i < namlen; i++)
631 		if (ep->d_name[i] == '\0') {
632 			/*return (1); */
633 			printf("Second bad\n");
634 			goto bad;
635 	}
636 	if (ep->d_name[i])
637 		goto bad;
638 	return (0);
639 bad:
640 	return (1);
641 }
642 
643 /*
644  * Construct a new directory entry after a call to namei, using the
645  * parameters that it left in the componentname argument cnp. The
646  * argument ip is the inode to which the new directory entry will refer.
647  */
648 void
ufs_makedirentry(struct inode * ip,struct componentname * cnp,struct direct * newdirp)649 ufs_makedirentry(struct inode *ip, struct componentname *cnp,
650     struct direct *newdirp)
651 {
652 #ifdef DIAGNOSTIC
653   	if ((cnp->cn_flags & SAVENAME) == 0)
654 		panic("ufs_makedirentry: missing name");
655 #endif
656 	newdirp->d_ino = ip->i_number;
657 	newdirp->d_namlen = cnp->cn_namelen;
658 	memset(newdirp->d_name + (cnp->cn_namelen & ~(DIR_ROUNDUP-1)),
659 	    0, DIR_ROUNDUP);
660 	memcpy(newdirp->d_name, cnp->cn_nameptr, cnp->cn_namelen);
661 	newdirp->d_type = IFTODT(DIP(ip, mode));
662 }
663 
664 /*
665  * Write a directory entry after a call to namei, using the parameters
666  * that it left in nameidata. The argument dirp is the new directory
667  * entry contents. Dvp is a pointer to the directory to be written,
668  * which was left locked by namei. Remaining parameters (dp->i_offset,
669  * dp->i_count) indicate how the space for the new entry is to be obtained.
670  * Non-null bp indicates that a directory is being created (for the
671  * soft dependency code).
672  */
673 int
ufs_direnter(struct vnode * dvp,struct vnode * tvp,struct direct * dirp,struct componentname * cnp,struct buf * newdirbp)674 ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
675     struct componentname *cnp, struct buf *newdirbp)
676 {
677 	struct ucred *cr;
678 	struct proc *p;
679 	int newentrysize;
680 	struct inode *dp;
681 	struct buf *bp;
682 	u_int dsize;
683 	struct direct *ep, *nep;
684 	int error, ret, blkoff, loc, spacefree, flags;
685 	char *dirbuf;
686 
687 	error = 0;
688 	cr = cnp->cn_cred;
689 	p = cnp->cn_proc;
690 	dp = VTOI(dvp);
691 	newentrysize = DIRSIZ(dirp);
692 
693 	if (dp->i_count == 0) {
694 		/*
695 		 * If dp->i_count is 0, then namei could find no
696 		 * space in the directory. Here, dp->i_offset will
697 		 * be on a directory block boundary and we will write the
698 		 * new entry into a fresh block.
699 		 */
700 		if (dp->i_offset & (DIRBLKSIZ - 1))
701 			panic("ufs_direnter: newblk");
702 		flags = B_CLRBUF;
703 		flags |= B_SYNC;
704 		if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ,
705 		    cr, flags, &bp)) != 0) {
706 			return (error);
707 		}
708 		DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ);
709 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
710 		uvm_vnp_setsize(dvp, DIP(dp, size));
711 		dirp->d_reclen = DIRBLKSIZ;
712 		blkoff = dp->i_offset &
713 		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
714 		memcpy(bp->b_data + blkoff, dirp, newentrysize);
715 
716 #ifdef UFS_DIRHASH
717 		if (dp->i_dirhash != NULL) {
718 			ufsdirhash_newblk(dp, dp->i_offset);
719 			ufsdirhash_add(dp, dirp, dp->i_offset);
720 			ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
721 			dp->i_offset);
722 		}
723 #endif
724 
725 		error = VOP_BWRITE(bp);
726 		ret = UFS_UPDATE(dp, 1);
727 		if (error == 0)
728 			return (ret);
729 		return (error);
730 	}
731 
732 	/*
733 	 * If dp->i_count is non-zero, then namei found space for the new
734 	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
735 	 * in the directory. To use this space, we may have to compact
736 	 * the entries located there, by copying them together towards the
737 	 * beginning of the block, leaving the free space in one usable
738 	 * chunk at the end.
739 	 */
740 
741 	/*
742 	 * Increase size of directory if entry eats into new space.
743 	 * This should never push the size past a new multiple of
744 	 * DIRBLKSIZE.
745 	 *
746 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
747 	 */
748 	if (dp->i_offset + dp->i_count > DIP(dp, size))
749 		DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count);
750 	/*
751 	 * Get the block containing the space for the new directory entry.
752 	 */
753 	if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp))
754 	    != 0) {
755 		return (error);
756 	}
757 	/*
758 	 * Find space for the new entry. In the simple case, the entry at
759 	 * offset base will have the space. If it does not, then namei
760 	 * arranged that compacting the region dp->i_offset to
761 	 * dp->i_offset + dp->i_count would yield the space.
762 	 */
763 	ep = (struct direct *)dirbuf;
764 	dsize = ep->d_ino ? DIRSIZ(ep) : 0;
765 	spacefree = ep->d_reclen - dsize;
766 	for (loc = ep->d_reclen; loc < dp->i_count; ) {
767 		nep = (struct direct *)(dirbuf + loc);
768 
769 		/* Trim the existing slot (NB: dsize may be zero). */
770 		ep->d_reclen = dsize;
771 		ep = (struct direct *)((char *)ep + dsize);
772 
773 		/* Read nep->d_reclen now as the memmove() may clobber it. */
774 		loc += nep->d_reclen;
775 		if (nep->d_ino == 0) {
776 			/*
777 			 * A mid-block unused entry. Such entries are
778 			 * never created by the kernel, but fsck_ffs
779 			 * can create them (and it doesn't fix them).
780 			 *
781 			 * Add up the free space, and initialise the
782 			 * relocated entry since we don't memmove it.
783 			 */
784 			spacefree += nep->d_reclen;
785 			ep->d_ino = 0;
786 			dsize = 0;
787 			continue;
788 		}
789 		dsize = DIRSIZ(nep);
790 		spacefree += nep->d_reclen - dsize;
791 #ifdef UFS_DIRHASH
792 		if (dp->i_dirhash != NULL)
793 			ufsdirhash_move(dp, nep,
794 			    dp->i_offset + ((char *)nep - dirbuf),
795 			    dp->i_offset + ((char *)ep - dirbuf));
796 #endif
797 		memmove(ep, nep, dsize);
798 	}
799 	/*
800 	 * Here, `ep' points to a directory entry containing `dsize' in-use
801 	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
802 	 * then the entry is completely unused (dsize == 0). The value
803 	 * of ep->d_reclen is always indeterminate.
804 	 *
805 	 * Update the pointer fields in the previous entry (if any),
806 	 * copy in the new entry, and write out the block.
807 	 */
808 	if (ep->d_ino == 0) {
809 		if (spacefree + dsize < newentrysize)
810 			panic("ufs_direnter: compact1");
811 		dirp->d_reclen = spacefree + dsize;
812 	} else {
813 		if (spacefree < newentrysize)
814 			panic("ufs_direnter: compact2");
815 		dirp->d_reclen = spacefree;
816 		ep->d_reclen = dsize;
817 		ep = (struct direct *)((char *)ep + dsize);
818 	}
819 
820 #ifdef UFS_DIRHASH
821 	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
822 	    dirp->d_reclen == spacefree))
823 		ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
824 #endif
825 	memcpy(ep, dirp, newentrysize);
826 #ifdef UFS_DIRHASH
827 	if (dp->i_dirhash != NULL)
828 		ufsdirhash_checkblock(dp, dirbuf -
829 		    (dp->i_offset & (DIRBLKSIZ - 1)),
830 		    dp->i_offset & ~(DIRBLKSIZ - 1));
831 #endif
832 
833 	error = VOP_BWRITE(bp);
834 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
835 
836 	/*
837 	 * If all went well, and the directory can be shortened, proceed
838 	 * with the truncation. Note that we have to unlock the inode for
839 	 * the entry that we just entered, as the truncation may need to
840 	 * lock other inodes which can lead to deadlock if we also hold a
841 	 * lock on the newly entered node.
842 	 */
843 
844 	if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) {
845 		if (tvp != NULL)
846 			VOP_UNLOCK(tvp);
847 		error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_SYNC, cr);
848 #ifdef UFS_DIRHASH
849 		if (error == 0 && dp->i_dirhash != NULL)
850 			ufsdirhash_dirtrunc(dp, dp->i_endoff);
851 #endif
852 		if (tvp != NULL)
853 			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
854 	}
855 	return (error);
856 }
857 
858 /*
859  * Remove a directory entry after a call to namei, using
860  * the parameters which it left in nameidata. The entry
861  * dp->i_offset contains the offset into the directory of the
862  * entry to be eliminated.  The dp->i_count field contains the
863  * size of the previous record in the directory.  If this
864  * is 0, the first entry is being deleted, so we need only
865  * zero the inode number to mark the entry as free.  If the
866  * entry is not the first in the directory, we must reclaim
867  * the space of the now empty record by adding the record size
868  * to the size of the previous entry.
869  */
870 int
ufs_dirremove(struct vnode * dvp,struct inode * ip,int flags,int isrmdir)871 ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
872 {
873 	struct inode *dp;
874 	struct direct *ep;
875 	struct buf *bp;
876 	int error;
877 
878 	dp = VTOI(dvp);
879 
880 	if ((error = UFS_BUFATOFF(dp,
881 	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
882 		return (error);
883 #ifdef UFS_DIRHASH
884 	/*
885 	 * Remove the dirhash entry. This is complicated by the fact
886 	 * that `ep' is the previous entry when dp->i_count != 0.
887 	 */
888 	if (dp->i_dirhash != NULL)
889 		ufsdirhash_remove(dp, (dp->i_count == 0) ? ep :
890 		(struct direct *)((char *)ep + ep->d_reclen), dp->i_offset);
891 #endif
892 
893 	if (dp->i_count == 0) {
894 		/*
895 		 * First entry in block: set d_ino to zero.
896 		 */
897 		ep->d_ino = 0;
898 	} else {
899 		/*
900 		 * Collapse new free space into previous entry.
901 		 */
902 		ep->d_reclen += dp->i_reclen;
903 	}
904 #ifdef UFS_DIRHASH
905 	if (dp->i_dirhash != NULL)
906 		ufsdirhash_checkblock(dp, (char *)ep -
907 		    ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
908 		    dp->i_offset & ~(DIRBLKSIZ - 1));
909 #endif
910 	if (ip) {
911 		ip->i_effnlink--;
912 		DIP_ADD(ip, nlink, -1);
913 		ip->i_flag |= IN_CHANGE;
914 	}
915 	if (DOINGASYNC(dvp) && dp->i_count != 0) {
916 		bdwrite(bp);
917 		error = 0;
918 	} else
919 		error = bwrite(bp);
920 
921 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
922 	return (error);
923 }
924 
925 /*
926  * Rewrite an existing directory entry to point at the inode
927  * supplied.  The parameters describing the directory entry are
928  * set up by a call to namei.
929  */
930 int
ufs_dirrewrite(struct inode * dp,struct inode * oip,ufsino_t newinum,int newtype,int isrmdir)931 ufs_dirrewrite(struct inode *dp, struct inode *oip, ufsino_t newinum,
932     int newtype, int isrmdir)
933 {
934 	struct buf *bp;
935 	struct direct *ep;
936 	struct vnode *vdp = ITOV(dp);
937 	int error;
938 
939 	error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, (char **)&ep, &bp);
940 	if (error)
941 		return (error);
942 	ep->d_ino = newinum;
943 	ep->d_type = newtype;
944 	oip->i_effnlink--;
945 	DIP_ADD(oip, nlink, -1);
946 	oip->i_flag |= IN_CHANGE;
947 	if (DOINGASYNC(vdp)) {
948 		bdwrite(bp);
949 		error = 0;
950 	} else {
951 		error = VOP_BWRITE(bp);
952 	}
953 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
954 	return (error);
955 }
956 
957 /*
958  * Check if a directory is empty or not.
959  * Inode supplied must be locked.
960  *
961  * Using a struct dirtemplate here is not precisely
962  * what we want, but better than using a struct direct.
963  *
964  * NB: does not handle corrupted directories.
965  */
966 int
ufs_dirempty(struct inode * ip,ufsino_t parentino,struct ucred * cred)967 ufs_dirempty(struct inode *ip, ufsino_t parentino, struct ucred *cred)
968 {
969 	off_t off, m;
970 	struct dirtemplate dbuf;
971 	struct direct *dp = (struct direct *)&dbuf;
972 	int error, namlen;
973 	size_t count;
974 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
975 
976 	m = DIP(ip, size);
977 	for (off = 0; off < m; off += dp->d_reclen) {
978 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
979 		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, curproc);
980 		/*
981 		 * Since we read MINDIRSIZ, residual must
982 		 * be 0 unless we're at end of file.
983 		 */
984 		if (error || count != 0)
985 			return (0);
986 		/* avoid infinite loops */
987 		if (dp->d_reclen == 0)
988 			return (0);
989 		/* skip empty entries */
990 		if (dp->d_ino == 0)
991 			continue;
992 		/* accept only "." and ".." */
993 		namlen = dp->d_namlen;
994 		if (namlen > 2)
995 			return (0);
996 		if (dp->d_name[0] != '.')
997 			return (0);
998 		/*
999 		 * At this point namlen must be 1 or 2.
1000 		 * 1 implies ".", 2 implies ".." if second
1001 		 * char is also "."
1002 		 */
1003 		if (namlen == 1 && dp->d_ino == ip->i_number)
1004 			continue;
1005 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1006 			continue;
1007 		return (0);
1008 	}
1009 	return (1);
1010 }
1011 
1012 /*
1013  * Check if source directory is in the path of the target directory.
1014  * Target is supplied locked, source is unlocked.
1015  * The target is always vput before returning.
1016  */
1017 int
ufs_checkpath(struct inode * source,struct inode * target,struct ucred * cred)1018 ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred)
1019 {
1020 	struct vnode *nextvp, *vp;
1021 	int error, rootino, namlen;
1022 	struct dirtemplate dirbuf;
1023 
1024 	vp = ITOV(target);
1025 	if (target->i_number == source->i_number) {
1026 		error = EEXIST;
1027 		goto out;
1028 	}
1029 	rootino = ROOTINO;
1030 	error = 0;
1031 	if (target->i_number == rootino)
1032 		goto out;
1033 
1034 	for (;;) {
1035 		if (vp->v_type != VDIR) {
1036 			error = ENOTDIR;
1037 			break;
1038 		}
1039 		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
1040 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
1041 			IO_NODELOCKED, cred, NULL, curproc);
1042 		if (error != 0)
1043 			break;
1044 		namlen = dirbuf.dotdot_namlen;
1045 		if (namlen != 2 ||
1046 		    dirbuf.dotdot_name[0] != '.' ||
1047 		    dirbuf.dotdot_name[1] != '.') {
1048 			error = ENOTDIR;
1049 			break;
1050 		}
1051 		if (dirbuf.dotdot_ino == source->i_number) {
1052 			error = EINVAL;
1053 			break;
1054 		}
1055 		if (dirbuf.dotdot_ino == rootino)
1056 			break;
1057 		VOP_UNLOCK(vp);
1058 		error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &nextvp);
1059 		vrele(vp);
1060 		if (error) {
1061 			vp = NULL;
1062 			break;
1063 		}
1064 		vp = nextvp;
1065 	}
1066 
1067 out:
1068 	if (error == ENOTDIR)
1069 		printf("checkpath: .. not a directory\n");
1070 	if (vp != NULL)
1071 		vput(vp);
1072 	return (error);
1073 }
1074