xref: /dragonfly/sys/kern/vfs_nlookup.c (revision f9993810)
1 /*
2  * Copyright (c) 2004-2022 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * nlookup() is the 'new' namei interface.  Rather then return directory and
36  * leaf vnodes (in various lock states) the new interface instead deals in
37  * namecache records.  Namecache records may represent both a positive or
38  * a negative hit.  The namespace is locked via the namecache record instead
39  * of via the vnode, and only the leaf namecache record (representing the
40  * filename) needs to be locked.
41  *
42  * This greatly improves filesystem parallelism and is a huge simplification
43  * of the API verses the old vnode locking / namei scheme.
44  *
45  * Filesystems must actively control the caching aspects of the namecache,
46  * and since namecache pointers are used as handles they are non-optional
47  * even for filesystems which do not generally wish to cache things.  It is
48  * intended that a separate cache coherency API will be constructed to handle
49  * these issues.
50  */
51 
52 #include "opt_ktrace.h"
53 
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/uio.h>
57 #include <sys/kernel.h>
58 #include <sys/vnode.h>
59 #include <sys/mount.h>
60 #include <sys/filedesc.h>
61 #include <sys/proc.h>
62 #include <sys/namei.h>
63 #include <sys/nlookup.h>
64 #include <sys/malloc.h>
65 #include <sys/stat.h>
66 #include <sys/objcache.h>
67 #include <sys/file.h>
68 #include <sys/kcollect.h>
69 #include <sys/sysctl.h>
70 
71 #ifdef KTRACE
72 #include <sys/ktrace.h>
73 #endif
74 
75 __read_mostly static int nlookup_max_retries = 4;
76 SYSCTL_INT(_debug, OID_AUTO, nlookup_max_retries, CTLFLAG_RW,
77 	&nlookup_max_retries, 0,
78 	"retries on generation mismatch");
79 __read_mostly static int nlookup_debug;
80 SYSCTL_INT(_debug, OID_AUTO, nlookup_debug, CTLFLAG_RW,
81 	&nlookup_debug, 0,
82 	"Force retry test");
83 
84 static int naccess(struct nchandle *nch, u_int *genp, int vmode,
85 			struct ucred *cred, int *stickyp, int nchislocked);
86 
87 /*
88  * unmount operations flag NLC_IGNBADDIR in order to allow the
89  * umount to successfully issue a nlookup() on the path in order
90  * to extract the mount point.  Allow certain errors through.
91  */
92 static __inline
93 int
94 keeperror(struct nlookupdata *nd, int error)
95 {
96 	if (error) {
97 		if ((nd->nl_flags & NLC_IGNBADDIR) == 0 ||
98 		   (error != EIO && error != EBADRPC && error != ESTALE)) {
99 			return 1;
100 		}
101 	}
102 	return 0;
103 }
104 
105 /*
106  * Initialize a nlookup() structure, early error return for copyin faults
107  * or a degenerate empty string (which is not allowed).
108  *
109  * The first process proc0's credentials are used if the calling thread
110  * is not associated with a process context.
111  *
112  * MPSAFE
113  */
114 int
115 nlookup_init(struct nlookupdata *nd,
116 	     const char *path, enum uio_seg seg, int flags)
117 {
118     size_t pathlen;
119     struct proc *p;
120     thread_t td;
121     int error;
122 
123     td = curthread;
124     p = td->td_proc;
125 
126     /*
127      * note: the pathlen set by copy*str() includes the terminating \0.
128      */
129     bzero(nd, sizeof(struct nlookupdata));
130     nd->nl_path = objcache_get(namei_oc, M_WAITOK);
131     nd->nl_flags |= NLC_HASBUF;
132     if (seg == UIO_SYSSPACE)
133 	error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen);
134     else
135 	error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen);
136 
137     /*
138      * Don't allow empty pathnames.
139      * POSIX.1 requirement: "" is not a vaild file name.
140      */
141     if (error == 0 && pathlen <= 1)
142 	error = ENOENT;
143 
144     if (error == 0) {
145 	if (p && p->p_fd) {
146 	    if (nd->nl_path[0] == '/') {
147 		    if ((flags & NLC_NLNCH_NOINIT) == 0) {
148 			    nd->nl_basench = &p->p_fd->fd_nrdir;
149 			    cache_copy(nd->nl_basench, &nd->nl_nch);
150 		    }
151 		    cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch);
152 		    if (p->p_fd->fd_njdir.ncp)
153 			cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch);
154 		    nd->nl_cred = td->td_ucred;
155 		    nd->nl_flags |= NLC_BORROWCRED;
156 	    } else {
157 		    if ((flags & NLC_NLNCH_NOINIT) == 0) {
158 			    nd->nl_basench = &p->p_fd->fd_ncdir;
159 			    cache_copy(nd->nl_basench, &nd->nl_nch);
160 		    }
161 		    cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch);
162 		    if (p->p_fd->fd_njdir.ncp)
163 			cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch);
164 		    nd->nl_cred = td->td_ucred;
165 		    nd->nl_flags |= NLC_BORROWCRED;
166 	    }
167 	} else {
168 	    if ((flags & NLC_NLNCH_NOINIT) == 0) {
169 		    nd->nl_basench = &rootnch;
170 		    cache_copy(nd->nl_basench, &nd->nl_nch);
171 	    }
172 	    cache_copy(&rootnch, &nd->nl_rootnch);
173 	    cache_copy(&rootnch, &nd->nl_jailnch);
174 	    nd->nl_cred = proc0.p_ucred;
175 	    nd->nl_flags |= NLC_BORROWCRED;
176 	}
177 	nd->nl_td = td;
178 	nd->nl_flags |= flags & ~NLC_NLNCH_NOINIT;
179     } else {
180 	nlookup_done(nd);
181     }
182     return(error);
183 }
184 
185 
186 /*
187  * nlookup_init() for "at" family of syscalls.
188  *
189  * Similar to nlookup_init() but if the path is relative and fd is not
190  * AT_FDCWD, the path will be interpreted relative to the directory pointed
191 *  to by fd.  In this case, the file entry pointed to by fd is ref'ed and
192 *  returned in *fpp.
193  *
194  * If the call succeeds, nlookup_done_at() must be called to clean-up the nd
195  * and release the ref to the file entry.
196  */
197 int
198 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd,
199 		const char *path, enum uio_seg seg, int flags)
200 {
201 	struct thread *td = curthread;
202 	struct file* fp;
203 	struct vnode *vp;
204 	int error;
205 
206 	*fpp = NULL;
207 
208 	/*
209 	 * Resolve the path, we might have to copy it in from userland,
210 	 * but don't initialize nl_basench, or nl_nch.
211 	 */
212 	error = nlookup_init(nd, path, seg, flags | NLC_NLNCH_NOINIT);
213 	if (__predict_false(error))
214 		return (error);
215 
216 	/*
217 	 * Setup nl_basench (a pointer only not refd), and copy+ref
218 	 * to initialize nl_nch.  Only applicable to relative paths.
219 	 * For absolute paths, or if (fd) is degenerate, just use the
220 	 * normal path.
221 	 */
222 	if (nd->nl_path[0] == '/') {
223 		struct proc *p = curproc;
224 		nd->nl_basench = &p->p_fd->fd_nrdir;
225 	} else if (fd == AT_FDCWD) {
226 		struct proc *p = curproc;
227 		nd->nl_basench = &p->p_fd->fd_ncdir;
228 	} else {
229 		if ((error = holdvnode(td, fd, &fp)) != 0)
230 			goto done;
231 		vp = (struct vnode*)fp->f_data;
232 		if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) {
233 			fdrop(fp);
234 			fp = NULL;
235 			error = ENOTDIR;
236 			goto done;
237 		}
238 		nd->nl_basench = &fp->f_nchandle;
239 		*fpp = fp;
240 	}
241 	cache_copy(nd->nl_basench, &nd->nl_nch);
242 done:
243 	if (error)
244 		nlookup_done(nd);
245 	return (error);
246 }
247 
248 /*
249  * This works similarly to nlookup_init() but does not assume a process
250  * context.  rootnch is always chosen for the root directory and the cred
251  * and starting directory are supplied in arguments.
252  */
253 int
254 nlookup_init_raw(struct nlookupdata *nd,
255 	     const char *path, enum uio_seg seg, int flags,
256 	     struct ucred *cred, struct nchandle *ncstart)
257 {
258     size_t pathlen;
259     thread_t td;
260     int error;
261 
262     td = curthread;
263 
264     bzero(nd, sizeof(struct nlookupdata));
265     nd->nl_path = objcache_get(namei_oc, M_WAITOK);
266     nd->nl_flags |= NLC_HASBUF;
267     if (seg == UIO_SYSSPACE)
268 	error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen);
269     else
270 	error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen);
271 
272     /*
273      * Don't allow empty pathnames.
274      * POSIX.1 requirement: "" is not a vaild file name.
275      */
276     if (error == 0 && pathlen <= 1)
277 	error = ENOENT;
278 
279     if (error == 0) {
280 	cache_copy(ncstart, &nd->nl_nch);
281 	cache_copy(&rootnch, &nd->nl_rootnch);
282 	cache_copy(&rootnch, &nd->nl_jailnch);
283 	nd->nl_cred = crhold(cred);
284 	nd->nl_td = td;
285 	nd->nl_flags |= flags;
286     } else {
287 	nlookup_done(nd);
288     }
289     return(error);
290 }
291 
292 /*
293  * This works similarly to nlookup_init_raw() but does not rely
294  * on rootnch being initialized yet.
295  */
296 int
297 nlookup_init_root(struct nlookupdata *nd,
298 	     const char *path, enum uio_seg seg, int flags,
299 	     struct ucred *cred, struct nchandle *ncstart,
300 	     struct nchandle *ncroot)
301 {
302     size_t pathlen;
303     thread_t td;
304     int error;
305 
306     td = curthread;
307 
308     bzero(nd, sizeof(struct nlookupdata));
309     nd->nl_path = objcache_get(namei_oc, M_WAITOK);
310     nd->nl_flags |= NLC_HASBUF;
311     if (seg == UIO_SYSSPACE)
312 	error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen);
313     else
314 	error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen);
315 
316     /*
317      * Don't allow empty pathnames.
318      * POSIX.1 requirement: "" is not a vaild file name.
319      */
320     if (error == 0 && pathlen <= 1)
321 	error = ENOENT;
322 
323     if (error == 0) {
324 	cache_copy(ncstart, &nd->nl_nch);
325 	cache_copy(ncroot, &nd->nl_rootnch);
326 	cache_copy(ncroot, &nd->nl_jailnch);
327 	nd->nl_cred = crhold(cred);
328 	nd->nl_td = td;
329 	nd->nl_flags |= flags;
330     } else {
331 	nlookup_done(nd);
332     }
333     return(error);
334 }
335 
336 #if 0
337 /*
338  * Set a different credential; this credential will be used by future
339  * operations performed on nd.nl_open_vp and nlookupdata structure.
340  */
341 void
342 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred)
343 {
344 	KKASSERT(nd->nl_cred != NULL);
345 
346 	if (nd->nl_cred != cred) {
347 		cred = crhold(cred);
348 		if ((nd->nl_flags & NLC_BORROWCRED) == 0)
349 			crfree(nd->nl_cred);
350 		nd->nl_flags &= ~NLC_BORROWCRED;
351 		nd->nl_cred = cred;
352 	}
353 }
354 #endif
355 
356 /*
357  * Cleanup a nlookupdata structure after we are through with it.  This may
358  * be called on any nlookupdata structure initialized with nlookup_init().
359  * Calling nlookup_done() is mandatory in all cases except where nlookup_init()
360  * returns an error, even if as a consumer you believe you have taken all
361  * dynamic elements out of the nlookupdata structure.
362  */
363 void
364 nlookup_done(struct nlookupdata *nd)
365 {
366     if (nd->nl_nch.ncp) {
367 	if (nd->nl_flags & NLC_NCPISLOCKED)
368 	    cache_unlock(&nd->nl_nch);
369 	cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno);
370     }
371     nd->nl_flags &= ~NLC_NCPISLOCKED;
372     if (nd->nl_rootnch.ncp)
373 	cache_drop_and_cache(&nd->nl_rootnch, 0);
374     if (nd->nl_jailnch.ncp)
375 	cache_drop_and_cache(&nd->nl_jailnch, 0);
376     if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) {
377 	objcache_put(namei_oc, nd->nl_path);
378 	nd->nl_path = NULL;
379     }
380     if (nd->nl_cred) {
381 	if ((nd->nl_flags & NLC_BORROWCRED) == 0)
382 	    crfree(nd->nl_cred);
383 	nd->nl_cred = NULL;
384 	nd->nl_flags &= ~NLC_BORROWCRED;
385     }
386     if (nd->nl_open_vp) {
387 	if (nd->nl_flags & NLC_LOCKVP) {
388 		vn_unlock(nd->nl_open_vp);
389 		nd->nl_flags &= ~NLC_LOCKVP;
390 	}
391 	vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL);
392 	nd->nl_open_vp = NULL;
393     }
394     if (nd->nl_dvp) {
395 	vrele(nd->nl_dvp);
396 	nd->nl_dvp = NULL;
397     }
398     nd->nl_flags = 0;	/* clear remaining flags (just clear everything) */
399     nd->nl_basench = NULL;
400 }
401 
402 /*
403  * Works similarly to nlookup_done() when nd initialized with
404  * nlookup_init_at().
405  */
406 void
407 nlookup_done_at(struct nlookupdata *nd, struct file *fp)
408 {
409 	nlookup_done(nd);
410 	if (fp != NULL)
411 		fdrop(fp);
412 }
413 
414 void
415 nlookup_zero(struct nlookupdata *nd)
416 {
417 	bzero(nd, sizeof(struct nlookupdata));
418 }
419 
420 /*
421  * Simple all-in-one nlookup.  Returns a locked namecache structure or NULL
422  * if an error occured.
423  *
424  * Note that the returned ncp is not checked for permissions, though VEXEC
425  * is checked on the directory path leading up to the result.  The caller
426  * must call naccess() to check the permissions of the returned leaf.
427  */
428 struct nchandle
429 nlookup_simple(const char *str, enum uio_seg seg,
430 	       int niflags, int *error)
431 {
432     struct nlookupdata nd;
433     struct nchandle nch;
434 
435     *error = nlookup_init(&nd, str, seg, niflags);
436     if (*error == 0) {
437 	    if ((*error = nlookup(&nd)) == 0) {
438 		    nch = nd.nl_nch;	/* keep hold ref from structure */
439 		    cache_zero(&nd.nl_nch); /* and NULL out */
440 	    } else {
441 		    cache_zero(&nch);
442 	    }
443 	    nlookup_done(&nd);
444     } else {
445 	    cache_zero(&nch);
446     }
447     return(nch);
448 }
449 
450 /*
451  * Returns non-zero if the path element is the last element
452  */
453 static
454 int
455 islastelement(const char *ptr)
456 {
457 	while (*ptr == '/')
458 		++ptr;
459 	return (*ptr == 0);
460 }
461 
462 /*
463  * Returns non-zero if we need to lock the namecache element
464  * exclusively.  Unless otherwise requested by NLC_SHAREDLOCK,
465  * the last element of the namecache lookup will be locked
466  * exclusively.
467  *
468  * O_CREAT or O_TRUNC need the last element to be locked exlcusively.
469  * Intermediate elements are always locked shared.
470  *
471  * NOTE: Even if we return on-zero, an unresolved namecache record
472  *	 will always be locked exclusively.
473  */
474 static __inline
475 int
476 wantsexcllock(struct nlookupdata *nd, int last_element)
477 {
478 	if ((nd->nl_flags & NLC_SHAREDLOCK) == 0)
479 		return(last_element);
480 	return 0;
481 }
482 
483 
484 /*
485  * Do a generic nlookup.  Note that the passed nd is not nlookup_done()'d
486  * on return, even if an error occurs.  If no error occurs or NLC_CREATE
487  * is flagged and ENOENT is returned, then the returned nl_nch is always
488  * referenced and locked exclusively.
489  *
490  * WARNING: For any general error other than ENOENT w/NLC_CREATE, the
491  *	    the resulting nl_nch may or may not be locked and if locked
492  *	    might be locked either shared or exclusive.
493  *
494  * Intermediate directory elements, including the current directory, require
495  * execute (search) permission.  nlookup does not examine the access
496  * permissions on the returned element.
497  *
498  * If NLC_CREATE is set the last directory must allow node creation,
499  * and an error code of 0 will be returned for a non-existant
500  * target (not ENOENT).
501  *
502  * If NLC_RENAME_DST is set the last directory mut allow node deletion,
503  * plus the sticky check is made, and an error code of 0 will be returned
504  * for a non-existant target (not ENOENT).
505  *
506  * If NLC_DELETE is set the last directory mut allow node deletion,
507  * plus the sticky check is made.
508  *
509  * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode
510  * of the returned entry.  The vnode will be referenced but not locked.
511  *
512  * IF THE PATH REPRESENTS A MOUNT POINT CROSSING THEN NLC_REFDVP WILL SET
513  * NL_DVP TO NULL AND RETURN NO ERROR (ERROR == 0), allowing the operation
514  * to return up the stack.  The nch will only be referenced and not locked.
515  * High level code must check this case and do the right thing since,
516  * typically, it means things like 'mkdir' should fail with EEXIST.  For
517  * example 'mkdir /var/cache' where /var/cache is a null-mount from
518  * /build/var.cache, needs to return EEXIST rather than a mount-crossing
519  * failure.
520  *
521  * NOTE: As an optimization we attempt to obtain a shared namecache lock
522  *	 on any intermediate elements.  On success, the returned element
523  *	 is ALWAYS locked exclusively.
524  *
525  * NOTE: If for any reason the nc_generation number of the ncp's being
526  *	 evaluated changes, the lookup is retried.
527  */
528 int
529 nlookup(struct nlookupdata *nd)
530 {
531     globaldata_t gd = mycpu;
532     struct nlcomponent nlc;
533     struct nchandle nch;
534     struct nchandle nctmp;
535     struct mount *mp;
536     int wasdotordotdot;
537     char *path_reset;
538     char *ptr;
539     char *nptr;
540     int error;
541     int len;
542     int dflags;
543     int hit = 1;
544     int saveflag = nd->nl_flags;
545     int max_retries = nlookup_max_retries;
546     u_int nl_gen;
547     u_int nch_gen;
548     int gen_changed;
549     boolean_t doretry = FALSE;
550     boolean_t inretry = FALSE;
551 
552     path_reset = NULL;
553 
554 nlookup_start:
555 
556 #ifdef KTRACE
557     if (KTRPOINT(nd->nl_td, KTR_NAMEI))
558 	ktrnamei(nd->nl_td->td_lwp, nd->nl_path);
559 #endif
560     bzero(&nlc, sizeof(nlc));
561 
562     /*
563      * Setup for the loop.  The current working namecache element is
564      * always at least referenced.  We lock it as required, but always
565      * return a locked, resolved namecache entry.
566      */
567     nd->nl_loopcnt = 0;
568     if (nd->nl_dvp) {
569 	vrele(nd->nl_dvp);
570 	nd->nl_dvp = NULL;
571     }
572     ptr = nd->nl_path;
573 
574     nl_gen = nd->nl_nch.ncp ? nd->nl_nch.ncp->nc_generation : 0;
575     nl_gen &= ~3;
576     gen_changed = 0;
577 
578     /*
579      * Loop on the path components.  At the top of the loop nd->nl_nch
580      * is ref'd and unlocked and represents our current position.
581      */
582     for (;;) {
583 	int last_element;
584 
585 	++nd->nl_elmno;
586 	KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
587 
588 	/*
589 	 * Check if the root directory should replace the current
590 	 * directory.  This is done at the start of a translation
591 	 * or after a symbolic link has been found.  In other cases
592 	 * ptr will never be pointing at a '/'.
593 	 */
594 	if (*ptr == '/') {
595 	    do {
596 		++ptr;
597 	    } while (*ptr == '/');
598 
599 	    /*
600 	     * We might already be at the root as a pre-optimization
601 	     */
602 	    if (nd->nl_nch.mount != nd->nl_rootnch.mount ||
603 		nd->nl_nch.ncp != nd->nl_rootnch.ncp) {
604 		cache_drop_and_cache(&nd->nl_nch, 0);
605 		cache_copy(&nd->nl_rootnch, &nd->nl_nch);
606 		nl_gen = nd->nl_nch.ncp->nc_generation & ~3;
607 	    }
608 
609 	    /*
610 	     * Fast-track termination.  There is no parent directory of
611 	     * the root in the same mount from the point of view of
612 	     * the caller so return EACCES if NLC_REFDVP is specified,
613 	     * and EEXIST if NLC_CREATE is also specified.
614 	     * e.g. 'rmdir /' or 'mkdir /' are not allowed.
615 	     */
616 	    if (*ptr == 0) {
617 		if (nd->nl_flags & NLC_REFDVP)
618 			error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES;
619 		else
620 			error = 0;
621 		nd->nl_flags |= NLC_NCPISLOCKED;
622 		cache_lock_maybe_shared(&nd->nl_nch,
623 					wantsexcllock(nd, islastelement(ptr)));
624 		break;
625 	    }
626 	    continue;
627 	}
628 
629 	/*
630 	 * Pre-calculate next path component so we can check whether the
631 	 * current component directory is the last directory in the path
632 	 * or not.
633 	 */
634 	for (nptr = ptr; *nptr && *nptr != '/'; ++nptr)
635 		;
636 
637 	/*
638 	 * nd->nl_nch is referenced and not locked here.
639 	 *
640 	 * Check directory search permissions.  This will load dflags to
641 	 * obtain directory-special permissions to be checked along with the
642 	 * last component.
643 	 *
644 	 * We only need to pass-in &dflags for the second-to-last component.
645 	 * Optimize by passing-in NULL for any prior components, which may
646 	 * allow the code to bypass the naccess() call.
647 	 *
648 	 * naccess() is optimized to avoid having to lock the nch or get
649 	 * the related vnode if cached perms are sufficient.
650 	 */
651 	dflags = 0;
652 	if (*nptr == '/' || (saveflag & NLC_MODIFYING_MASK) == 0) {
653 	    error = naccess(&nd->nl_nch, &nl_gen, NLC_EXEC,
654 			    nd->nl_cred, NULL, 0);
655 	} else {
656 	    error = naccess(&nd->nl_nch, &nl_gen, NLC_EXEC,
657 			    nd->nl_cred, &dflags, 0);
658 	}
659 	if (error) {
660 	    if (keeperror(nd, error))
661 		break;
662 	    error = 0;
663 	}
664 
665 	/*
666 	 * Extract the next (or last) path component.  Path components are
667 	 * limited to 255 characters.
668 	 */
669 	nlc.nlc_nameptr = ptr;
670 	nlc.nlc_namelen = nptr - ptr;
671 	ptr = nptr;
672 	if (nlc.nlc_namelen >= 256) {
673 	    error = ENAMETOOLONG;
674 	    break;
675 	}
676 	last_element = islastelement(nptr);
677 
678 	/*
679 	 * Lookup the path component in the cache, creating an unresolved
680 	 * entry if necessary.  We have to handle "." and ".." as special
681 	 * cases.
682 	 *
683 	 * When handling ".." we have to detect a traversal back through a
684 	 * mount point.   If we are at the root, ".." just returns the root.
685 	 *
686 	 * When handling "." or ".." we also have to recalculate dflags
687 	 * since our dflags will be for some sub-directory instead of the
688 	 * parent dir.
689 	 *
690 	 * This subsection returns a referenced and possibly locked 'nch'.
691 	 * The locking status is based on the last_element flag.
692 	 *
693 	 * The namecache topology is not allowed to be disconnected, so
694 	 * encountering a NULL parent will generate EINVAL.  This typically
695 	 * occurs when a directory is removed out from under a process.
696 	 *
697 	 * WARNING! The unlocking of nd->nl_nch is sensitive code.
698 	 */
699 	KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
700 
701 	if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') {
702 	    if (last_element) {
703 		cache_get_maybe_shared(&nd->nl_nch, &nch,
704 				       wantsexcllock(nd, 1));
705 	    } else {
706 		cache_copy(&nd->nl_nch, &nch);
707 	    }
708 	    nch_gen = nch.ncp->nc_generation & ~3;
709 	    wasdotordotdot = 1;
710 	} else if (nlc.nlc_namelen == 2 &&
711 		   nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') {
712 	    if (nd->nl_nch.mount == nd->nl_rootnch.mount &&
713 		nd->nl_nch.ncp == nd->nl_rootnch.ncp
714 	    ) {
715 		/*
716 		 * ".." at the root returns the root
717 		 */
718 		if (last_element) {
719 		    cache_get_maybe_shared(&nd->nl_nch, &nch,
720 					   wantsexcllock(nd, 1));
721 		} else {
722 		    cache_copy(&nd->nl_nch, &nch);
723 		}
724 	    } else {
725 		/*
726 		 * Locate the parent ncp.  If we are at the root of a
727 		 * filesystem mount we have to skip to the mounted-on
728 		 * point in the underlying filesystem.
729 		 *
730 		 * Expect the parent to always be good since the
731 		 * mountpoint doesn't go away.  XXX hack.  cache_get()
732 		 * requires the ncp to already have a ref as a safety.
733 		 *
734 		 * However, a process which has been broken out of a chroot
735 		 * will wind up with a NULL parent if it tries to '..' above
736 		 * the real root, deal with the case.  Note that this does
737 		 * not protect us from a jail breakout, it just stops a panic
738 		 * if the jail-broken process tries to '..' past the real
739 		 * root.
740 		 */
741 		nctmp = nd->nl_nch;
742 		while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) {
743 			nctmp = nctmp.mount->mnt_ncmounton;
744 			if (nctmp.ncp == NULL)
745 				break;
746 		}
747 		if (nctmp.ncp == NULL) {
748 			if (curthread->td_proc) {
749 				kprintf("vfs_nlookup: '..' traverse broke "
750 					"jail: pid %d (%s)\n",
751 					curthread->td_proc->p_pid,
752 					curthread->td_comm);
753 			}
754 			nctmp = nd->nl_rootnch;
755 		} else {
756 			nctmp.ncp = nctmp.ncp->nc_parent;
757 		}
758 		if (last_element) {
759 		    cache_get_maybe_shared(&nctmp, &nch,
760 					   wantsexcllock(nd, 1));
761 		} else {
762 		    cache_copy(&nctmp, &nch);
763 		}
764 	    }
765 	    nch_gen = nch.ncp->nc_generation & ~3;
766 	    wasdotordotdot = 2;
767 	} else {
768 	    /*
769 	     * Quickly lookup the component.  If we can't find it, then
770 	     * slowly lookup and resolve the component.
771 	     */
772 	    if (last_element) {
773 		    error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc,
774 						       wantsexcllock(nd, 1),
775 						       &nch);
776 	    } else {
777 		    nch = cache_nlookup_nonlocked(&nd->nl_nch, &nlc);
778 		    if (nch.ncp == NULL)
779 			error = EWOULDBLOCK;
780 	    }
781 
782 	    /*
783 	     * At this point the only possible error is EWOULDBLOCK.
784 	     *
785 	     * If no error nch is set and referenced, and then also locked
786 	     * according to last_element.  For EWOULDBLOCK nch is not set.
787 	     * For any other error nch is set and referenced, but not locked.
788 	     *
789 	     * On EWOULDBLOCK the ncp may be unresolved (if not locked it can
790 	     * become unresolved at any time, but we don't care at this time).
791 	     */
792 	    if (error == EWOULDBLOCK) {
793 		nch = cache_nlookup(&nd->nl_nch, &nlc);
794 		if (nch.ncp->nc_flag & NCF_UNRESOLVED)
795 		    hit = 0;
796 		for (;;) {
797 		    error = cache_resolve(&nch, &nch_gen, nd->nl_cred);
798 		    if (error != EAGAIN &&
799 			(nch.ncp->nc_flag & NCF_DESTROYED) == 0) {
800 			    if (error == ESTALE) {
801 				if (!inretry)
802 				    error = ENOENT;
803 				doretry = TRUE;
804 			    }
805 			    if (last_element == 0)
806 				    cache_unlock(&nch);
807 			    break;
808 		    }
809 		    kprintf("[diagnostic] nlookup: relookup %*.*s\n",
810 			    nch.ncp->nc_nlen, nch.ncp->nc_nlen,
811 			    nch.ncp->nc_name);
812 		    cache_put(&nch);
813 		    nch = cache_nlookup(&nd->nl_nch, &nlc);
814 		}
815 	    }
816 	    nch_gen = nch.ncp->nc_generation & ~3;
817 	    wasdotordotdot = 0;
818 	}
819 
820 	/*
821 	 * If the component is "." or ".." our dflags no longer represents
822 	 * the parent directory and we have to explicitly look it up.
823 	 *
824 	 * Expect the parent to be good since nch is locked.
825 	 *
826 	 * nch will continue to be valid even if an error occurs after this
827 	 * point.
828 	 */
829 	if (wasdotordotdot && error == 0) {
830 	    struct nchandle par;
831 
832 	    dflags = 0;
833 	    if (last_element == 0)
834 		cache_lock_maybe_shared(&nch, wantsexcllock(nd, 0));
835 
836 	    if ((par.ncp = nch.ncp->nc_parent) != NULL) {
837 		u_int dummy_gen = 0;
838 
839 		par.mount = nch.mount;
840 		cache_hold(&par);
841 		error = naccess(&par, &dummy_gen, 0, nd->nl_cred, &dflags, 0);
842 		cache_drop_and_cache(&par, nd->nl_elmno - 1);
843 		if (error) {
844 		    if (!keeperror(nd, error))
845 			error = 0;
846 		    if (error == EINVAL) {
847 			kprintf("nlookup (%s): trailing . or .. retry on %s\n",
848 				curthread->td_comm, nd->nl_path);
849 			doretry = TRUE;
850 		    }
851 		}
852 	    }
853 
854 	    if (last_element == 0)
855 		cache_unlock(&nch);
856 	}
857 
858 	/*
859 	 * [end of subsection]
860 	 *
861 	 * nch is referenced and locked according to (last_element).
862 	 * nd->nl_nch is unlocked and referenced.
863 	 * nl_gen and nch_gen are both set.
864 	 */
865 	KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
866 
867 	/*
868 	 * Resolve the namespace if necessary.  The ncp returned by
869 	 * cache_nlookup() is referenced, and also locked according
870 	 * to last_element.
871 	 *
872 	 * XXX neither '.' nor '..' should return EAGAIN since they were
873 	 * previously resolved and thus cannot be newly created ncp's.
874 	 */
875 	if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
876 	    if (last_element == 0)
877 		cache_lock(&nch);
878 	    hit = 0;
879 	    error = cache_resolve(&nch, &nch_gen, nd->nl_cred);
880 	    if (error == ESTALE) {
881 		if (!inretry)
882 		    error = ENOENT;
883 		doretry = TRUE;
884 	    }
885 	    if (last_element == 0)
886 		cache_unlock(&nch);
887 	    KKASSERT(error != EAGAIN);
888 	} else {
889 	    error = nch.ncp->nc_error;
890 	}
891 
892 	/*
893 	 * Early completion.  ENOENT is not an error if this is the last
894 	 * component and NLC_CREATE or NLC_RENAME (rename target) was
895 	 * requested.  Note that ncp->nc_error is left as ENOENT in that
896 	 * case, which we check later on.
897 	 *
898 	 * Also handle invalid '.' or '..' components terminating a path
899 	 * for a create/rename/delete.  The standard requires this and pax
900 	 * pretty stupidly depends on it.
901 	 */
902 	if (last_element) {
903 	    if (error == ENOENT &&
904 		(nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)))
905 	    {
906 		if (nd->nl_flags & NLC_NFS_RDONLY) {
907 			error = EROFS;
908 		} else {
909 			error = naccess(&nch, &nch_gen, nd->nl_flags | dflags,
910 					nd->nl_cred, NULL, last_element);
911 		}
912 	    }
913 	    if (error == 0 && wasdotordotdot &&
914 		(nd->nl_flags & (NLC_CREATE | NLC_DELETE |
915 				 NLC_RENAME_SRC | NLC_RENAME_DST)))
916 	    {
917 		/*
918 		 * POSIX junk
919 		 */
920 		if (nd->nl_flags & NLC_CREATE)
921 			error = EEXIST;
922 		else if (nd->nl_flags & NLC_DELETE)
923 			error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY;
924 		else
925 			error = EINVAL;
926 	    }
927 	}
928 
929 	/*
930 	 * Early completion on error.
931 	 */
932 	if (error) {
933 	    if (last_element)
934 		    cache_unlock(&nch);
935 	    cache_drop_and_cache(&nch, nd->nl_elmno);
936 	    break;
937 	}
938 
939 	/*
940 	 * If the element is a symlink and it is either not the last
941 	 * element or it is the last element and we are allowed to
942 	 * follow symlinks, resolve the symlink.
943 	 */
944 	if ((nch.ncp->nc_flag & NCF_ISSYMLINK) &&
945 	    (*ptr || (nd->nl_flags & NLC_FOLLOW))
946 	) {
947 	    if (nd->nl_loopcnt++ >= MAXSYMLINKS) {
948 		error = ELOOP;
949 		if (last_element)
950 			cache_unlock(&nch);
951 		cache_drop_and_cache(&nch, nd->nl_elmno);
952 		break;
953 	    }
954 
955 	    /*
956 	     * Check for a generation change.
957 	     *
958 	     * NOTE: On generation changes we must at a minimum cycle
959 	     *	     the lock.  Here we get or have the lock so we are
960 	     *	     ok.
961 	     */
962 	    if (last_element == 0)
963 		cache_lock_maybe_shared(&nch, 1);
964 
965 	    if ((nch.ncp->nc_generation - nch_gen) & ~1) {
966 		if (nlookup_debug & 1) {
967 		    kprintf("nlookup: symlink: GEN CHANGE %d\n",
968 			    (nch.ncp->nc_generation - nch_gen));
969 		}
970 		gen_changed = 1;
971 	    }
972 
973 	    error = nreadsymlink(nd, &nch, &nlc);
974 	    cache_put(&nch);
975 	    if (error)
976 		break;
977 
978 	    /*
979 	     * Concatenate trailing path elements onto the returned symlink.
980 	     * Note that if the path component (ptr) is not exhausted, it
981 	     * will being with a '/', so we do not have to add another one.
982 	     *
983 	     * The symlink may not be empty.
984 	     */
985 	    len = strlen(ptr);
986 	    if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) {
987 		error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT;
988 		objcache_put(namei_oc, nlc.nlc_nameptr);
989 		break;
990 	    }
991 	    bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1);
992 
993 	    if (path_reset) {
994 		if (nd->nl_flags & NLC_HASBUF)
995 		    objcache_put(namei_oc, nd->nl_path);
996 	    } else {
997 		path_reset = nd->nl_path;
998 	    }
999 	    nd->nl_path = nlc.nlc_nameptr;
1000 	    nd->nl_flags |= NLC_HASBUF;
1001 	    ptr = nd->nl_path;
1002 	    /* nl_gen has not changed */
1003 
1004 	    /*
1005 	     * Go back up to the top to resolve any initial '/'s in the
1006 	     * symlink.
1007 	     */
1008 	    continue;
1009 	}
1010 
1011 	/*
1012 	 * If the element is a directory and we are crossing a mount point,
1013 	 * Locate the mount.
1014 	 */
1015 	while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) &&
1016 	    (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 &&
1017 	    (mp = cache_findmount(&nch)) != NULL
1018 	) {
1019 	    struct vnode *tdp;
1020 	    int vfs_do_busy = 0;
1021 
1022 	    /*
1023 	     * VFS must be busied before the namecache entry is locked,
1024 	     * but we don't want to waste time calling vfs_busy() if the
1025 	     * mount point is already resolved.
1026 	     */
1027 again:
1028 	    /*
1029 	     * Check for a generation change.
1030 	     *
1031 	     * NOTE: On generation changes we must at a minimum cycle
1032 	     *	     the lock.  So get and release the lock if we
1033 	     *	     do not have it.
1034 	     */
1035 	    if ((nch.ncp->nc_generation - nch_gen) & ~1) {
1036 		if (last_element == 0) {
1037 		    cache_lock_maybe_shared(&nch, 1);
1038 		    cache_unlock(&nch);
1039 		}
1040 		if (nlookup_debug & 1) {
1041 		    kprintf("nlookup: mountpt: GEN CHANGE %d\n",
1042 			    (nch.ncp->nc_generation - nch_gen));
1043 		}
1044 		gen_changed = 1;
1045 	    }
1046 	    if (last_element)
1047 		    cache_unlock(&nch);
1048 	    cache_drop_and_cache(&nch, nd->nl_elmno);
1049 
1050 	    if (vfs_do_busy) {
1051 		while (vfs_busy(mp, 0)) {
1052 		    if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
1053 			kprintf("nlookup: warning umount race avoided\n");
1054 			cache_dropmount(mp);
1055 			error = EBUSY;
1056 			vfs_do_busy = 0;
1057 			goto double_break;
1058 		    }
1059 		}
1060 	    }
1061 
1062 	    /*
1063 	     * We don't need to lock the nch unless the entry is unresolved
1064 	     * or this is the last element.
1065 	     */
1066 	    if (last_element)
1067 		cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch,
1068 				       wantsexcllock(nd, 1));
1069 	    else
1070 		cache_copy(&mp->mnt_ncmountpt, &nch);
1071 	    nch_gen = nch.ncp->nc_generation & ~3;
1072 
1073 	    if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
1074 		if (last_element == 0)
1075 		    cache_lock(&nch);
1076 		if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
1077 		    if (vfs_do_busy == 0) {
1078 			vfs_do_busy = 1;
1079 			if (last_element == 0)
1080 			    cache_unlock(&nch);
1081 			goto again;
1082 		    }
1083 		    error = VFS_ROOT(mp, &tdp);
1084 		    vfs_unbusy(mp);
1085 		    vfs_do_busy = 0;
1086 		    if (keeperror(nd, error)) {
1087 			cache_dropmount(mp);
1088 			if (last_element == 0)
1089 			    cache_unlock(&nch);
1090 			break;
1091 		    }
1092 		    if (error == 0) {
1093 			cache_setvp(&nch, tdp);
1094 			nch_gen = nch.ncp->nc_generation & ~3;
1095 			vput(tdp);
1096 		    }
1097 		}
1098 		if (last_element == 0)
1099 		    cache_unlock(&nch);
1100 	    }
1101 	    if (vfs_do_busy)
1102 		vfs_unbusy(mp);
1103 	    cache_dropmount(mp);
1104 	}
1105 
1106 	/*
1107 	 * Break out on error
1108 	 */
1109 	if (keeperror(nd, error)) {
1110 	    if (last_element)
1111 		    cache_unlock(&nch);
1112 	    cache_drop_and_cache(&nch, nd->nl_elmno);
1113 double_break:
1114 	    break;
1115 	}
1116 
1117 	/*
1118 	 * Skip any slashes to get to the next element.  If there
1119 	 * are any slashes at all the current element must be a
1120 	 * directory or, in the create case, intended to become a directory.
1121 	 * If it isn't we break without incrementing ptr and fall through
1122 	 * to the failure case below.
1123 	 */
1124 	while (*ptr == '/') {
1125 	    if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 &&
1126 		!(nd->nl_flags & NLC_WILLBEDIR)
1127 	    ) {
1128 		break;
1129 	    }
1130 	    ++ptr;
1131 	}
1132 
1133 	/*
1134 	 * Continuation case: additional elements and the current
1135 	 * element is a directory.
1136 	 */
1137 	if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) {
1138 	    /*
1139 	     * Check for a generation change.
1140 	     *
1141 	     * NOTE: On generation changes we must at a minimum cycle
1142 	     *	     the lock.  So get and release the lock if we
1143 	     *	     do not have it.
1144 	     */
1145 	    if ((nch.ncp->nc_generation - nch_gen) & ~1) {
1146 		if (last_element == 0) {
1147 		    cache_lock_maybe_shared(&nch, 1);
1148 		    cache_unlock(&nch);
1149 		}
1150 		if (nlookup_debug & 1) {
1151 		    kprintf("nlookup: next: GEN CHANGE %d\n",
1152 			    (nch.ncp->nc_generation - nch_gen));
1153 		}
1154 		gen_changed = 1;
1155 	    }
1156 	    cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno);
1157 	    if (last_element)
1158 		    cache_unlock(&nch);
1159 	    /*nchislocked = 0; not needed */
1160 	    KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
1161 	    nd->nl_nch = nch;
1162 	    nl_gen = nch_gen;
1163 	    continue;
1164 	}
1165 
1166 	/*
1167 	 * Check for a generation change.
1168 	 *
1169 	 * NOTE: On generation changes we must at a minimum cycle
1170 	 *	     the lock.  So get and release the lock if we
1171 	 *	     do not have it.
1172 	 */
1173 	if ((nch.ncp->nc_generation - nch_gen) & ~1) {
1174 	    if (nlookup_debug & 1) {
1175 		if (last_element == 0) {
1176 		    cache_lock_maybe_shared(&nch, 1);
1177 		    cache_unlock(&nch);
1178 		}
1179 		kprintf("nlookup: final: GEN CHANGE %d\n",
1180 			(nch.ncp->nc_generation - nch_gen));
1181 		gen_changed = 1;
1182 	    }
1183 	}
1184 
1185 	/*
1186 	 * Failure case: additional elements and the current element
1187 	 * is not a directory
1188 	 */
1189 	if (*ptr) {
1190 	    if (last_element)
1191 		    cache_unlock(&nch);
1192 	    cache_drop_and_cache(&nch, nd->nl_elmno);
1193 	    error = ENOTDIR;
1194 	    break;
1195 	}
1196 
1197 	/*
1198 	 * Successful lookup of last element.
1199 	 *
1200 	 * Check permissions if the target exists.  If the target does not
1201 	 * exist directory permissions were already tested in the early
1202 	 * completion code above.
1203 	 *
1204 	 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY
1205 	 * if the file is marked append-only, and NLC_STICKY if the directory
1206 	 * containing the file is sticky.
1207 	 */
1208 	KKASSERT(last_element);
1209 
1210 	if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) {
1211 	    error = naccess(&nch, &nch_gen, nd->nl_flags | dflags,
1212 			    nd->nl_cred, NULL, 1);
1213 	    if (keeperror(nd, error)) {
1214 		cache_put(&nch);
1215 		break;
1216 	    }
1217 	}
1218 
1219 	/*
1220 	 * Termination: no more elements.
1221 	 *
1222 	 * Check to see if the immediate parent has been destroyed.  This race
1223 	 * can occur because the element lookup must temporarily unlock
1224 	 * the parent.  If so, do a retry.
1225 	 */
1226 	if (nch.ncp->nc_parent &&
1227 	    (nch.ncp->nc_parent->nc_flag & NCF_DESTROYED)) {
1228 		doretry = TRUE;
1229 	}
1230 
1231 	/*
1232 	 * Termination: no more elements.
1233 	 *
1234 	 * If NLC_REFDVP is set acquire a referenced parent dvp.  Typically
1235 	 * used for mkdir/mknod/ncreate/nremove/unlink/rename.
1236 	 *
1237 	 * If a mount-point transition occurs due to ncp being a mount point,
1238 	 * or a null-mount, nl_dvp will be set to NULL and an error code of
1239 	 * 0 will be returned.  A NULL nc_parent is not necessarily the only
1240 	 * indication of a mount-point as null-mounts will also tend to have
1241 	 * a non-null nc_parent.
1242 	 *
1243 	 * nch is locked, standard lock order for the namecache is
1244 	 * child-to-parent so we can safely lock its parent.  We can
1245 	 * just use cache_dvpref().
1246 	 */
1247 	if ((nd->nl_flags & NLC_REFDVP) &&
1248 	    (doretry == FALSE || inretry == TRUE)) {
1249 		if (nch.ncp->nc_parent) {
1250 			error = cache_resolve_dvp(&nch, nd->nl_cred,
1251 						  &nd->nl_dvp);
1252 			if (error) {
1253 				if (nlookup_debug & 1) {
1254 				    kprintf("Parent directory lost during "
1255 					    "nlookup: %s/%s (%08x/%08x)\n",
1256 					    nch.ncp->nc_parent->nc_name,
1257 					    nch.ncp->nc_name,
1258 					    nch.ncp->nc_parent->nc_flag,
1259 					    nch.ncp->nc_flag);
1260 				}
1261 				cache_put(&nch);
1262 				error = EINVAL;
1263 				break;
1264 			}
1265 
1266 			/*
1267 			 * Mount-point, nl_dvp should remain NULL, error 0,
1268 			 * caller won't be able to use the results so leave
1269 			 * the ncp referenced but unlocked.
1270 			 */
1271 			if (nd->nl_dvp == NULL) {
1272 				cache_put(&nch);
1273 				break;
1274 			}
1275 
1276 			/*
1277 			 * Good directory, fall through to drop-and-cache
1278 			 * below
1279 			 */
1280 			/* */
1281 		} else {
1282 			/*
1283 			 * Mount-point, nl_dvp should remain NULL, error 0,
1284 			 * caller won't be able to use the results so leave
1285 			 * the ncp referenced but unlocked.
1286 			 */
1287 			error = 0;
1288 			cache_put(&nch);
1289 			break;
1290 		}
1291 	}
1292 
1293 	/*
1294 	 * ncp left with lock+ref on break, set NLC_NCPISLOCKED flag
1295 	 */
1296 	cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno);
1297 	nd->nl_nch = nch;
1298 	nd->nl_flags |= NLC_NCPISLOCKED;
1299 	nl_gen = nch_gen;
1300 	error = 0;
1301 	break;
1302     }
1303 
1304     /*
1305      * Force a retry (up to max_retries) if nl_gen is incorrect
1306      *
1307      * NOTE: On generation changes we must at a minimum cycle
1308      *	     the lock.   In this case we have one so we are ok.
1309      */
1310     if (nd->nl_nch.ncp && (nd->nl_nch.ncp->nc_generation - nl_gen) & ~1) {
1311 	if (nlookup_debug & 1) {
1312 	    kprintf("nlookup: DONE error %d: GEN CHANGE ON \"%s\" "
1313 		    "%d (retries %d)\n",
1314 		    error,
1315 		    nd->nl_nch.ncp->nc_name,
1316 		    (nd->nl_nch.ncp->nc_generation - nl_gen),
1317 		    max_retries);
1318 	}
1319 	gen_changed = 1;
1320     }
1321     if (gen_changed) {
1322 	if (max_retries) {
1323 	    --max_retries;
1324 	    doretry = TRUE;
1325 	    inretry = FALSE;
1326 	} else {
1327 	    error = EINVAL;
1328 	}
1329     }
1330 
1331     /*
1332      * We are done / or possibly retry
1333      */
1334     if (hit)
1335 	++gd->gd_nchstats->ncs_longhits;
1336     else
1337 	++gd->gd_nchstats->ncs_longmiss;
1338 
1339     if (nd->nl_flags & NLC_NCPISLOCKED)
1340 	KKASSERT(cache_lockstatus(&nd->nl_nch) > 0);
1341 
1342     /*
1343      * Reset nd->nl_path if necessary (due to softlinks).  We want to return
1344      * nl_path to its original state before retrying or returning.
1345      */
1346     if (path_reset) {
1347 	if (nd->nl_flags & NLC_HASBUF) {
1348 	    objcache_put(namei_oc, nd->nl_path);
1349 	    nd->nl_flags &= ~NLC_HASBUF;
1350 	}
1351 	nd->nl_path = path_reset;
1352 	nd->nl_flags |= saveflag & NLC_HASBUF;
1353 	path_reset = NULL;
1354     }
1355 
1356     /*
1357      * Retry the whole thing if doretry flag is set, but only once.
1358      *
1359      * autofs(5) may mount another filesystem under its root directory
1360      * while resolving a path.
1361      *
1362      * NFS might return ESTALE
1363      */
1364     if (doretry && !inretry) {
1365 	if (nlookup_debug & 2)
1366 	    kprintf("nlookup: errno %d retry %s\n", error, nd->nl_path);
1367 	inretry = TRUE;
1368 
1369 	/*
1370 	 * Clean up nd->nl_nch and reset to base directory
1371 	 */
1372 	if (nd->nl_flags & NLC_NCPISLOCKED) {
1373 		cache_unlock(&nd->nl_nch);
1374 		nd->nl_flags &= ~NLC_NCPISLOCKED;
1375 	}
1376 	cache_drop(&nd->nl_nch);
1377 	cache_copy(nd->nl_basench, &nd->nl_nch);
1378 
1379 	nd->nl_elmno = 0;
1380 	nd->nl_flags |= saveflag;
1381 
1382 	goto nlookup_start;
1383     }
1384 
1385     /*
1386      * NOTE: If NLC_CREATE was set the ncp may represent a negative hit
1387      * (ncp->nc_error will be ENOENT), but we will still return an error
1388      * code of 0.
1389      */
1390     return(error);
1391 }
1392 
1393 /*
1394  * Resolve a mount point's glue ncp.  This ncp connects creates the illusion
1395  * of continuity in the namecache tree by connecting the ncp related to the
1396  * vnode under the mount to the ncp related to the mount's root vnode.
1397  *
1398  * If no error occured a locked, ref'd ncp is stored in *ncpp.
1399  */
1400 int
1401 nlookup_mp(struct mount *mp, struct nchandle *nch)
1402 {
1403     struct vnode *vp;
1404     int error;
1405 
1406     error = 0;
1407     cache_get(&mp->mnt_ncmountpt, nch);
1408     if (nch->ncp->nc_flag & NCF_UNRESOLVED) {
1409 	while (vfs_busy(mp, 0))
1410 	    ;
1411 	error = VFS_ROOT(mp, &vp);
1412 	vfs_unbusy(mp);
1413 	if (error) {
1414 	    cache_put(nch);
1415 	} else {
1416 	    cache_setvp(nch, vp);
1417 	    vput(vp);
1418 	}
1419     }
1420     return(error);
1421 }
1422 
1423 /*
1424  * Read the contents of a symlink, allocate a path buffer out of the
1425  * namei_oc and initialize the supplied nlcomponent with the result.
1426  *
1427  * If an error occurs no buffer will be allocated or returned in the nlc.
1428  */
1429 int
1430 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch,
1431 		struct nlcomponent *nlc)
1432 {
1433     struct vnode *vp;
1434     struct iovec aiov;
1435     struct uio auio;
1436     int linklen;
1437     int error;
1438     char *cp;
1439 
1440     nlc->nlc_nameptr = NULL;
1441     nlc->nlc_namelen = 0;
1442     if (nch->ncp->nc_vp == NULL)
1443 	return(ENOENT);
1444     if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0)
1445 	return(error);
1446     cp = objcache_get(namei_oc, M_WAITOK);
1447     aiov.iov_base = cp;
1448     aiov.iov_len = MAXPATHLEN;
1449     auio.uio_iov = &aiov;
1450     auio.uio_iovcnt = 1;
1451     auio.uio_offset = 0;
1452     auio.uio_rw = UIO_READ;
1453     auio.uio_segflg = UIO_SYSSPACE;
1454     auio.uio_td = nd->nl_td;
1455     auio.uio_resid = MAXPATHLEN - 1;
1456     error = VOP_READLINK(vp, &auio, nd->nl_cred);
1457     if (error)
1458 	goto fail;
1459     linklen = MAXPATHLEN - 1 - auio.uio_resid;
1460     if (varsym_enable) {
1461 	linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1);
1462 	if (linklen < 0) {
1463 	    error = ENAMETOOLONG;
1464 	    goto fail;
1465 	}
1466     }
1467     cp[linklen] = 0;
1468     nlc->nlc_nameptr = cp;
1469     nlc->nlc_namelen = linklen;
1470     vput(vp);
1471     return(0);
1472 fail:
1473     objcache_put(namei_oc, cp);
1474     vput(vp);
1475     return(error);
1476 }
1477 
1478 /*
1479  * Check access [XXX cache vattr!] [XXX quota]
1480  *
1481  * Generally check the NLC_* access bits.   All specified bits must pass
1482  * for this function to return 0.
1483  *
1484  * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST
1485  * access, otherwise it must exist.  No error is returned in this case.
1486  *
1487  * The file must not exist if NLC_EXCL is specified.
1488  *
1489  * Directory permissions in general are tested for NLC_CREATE if the file
1490  * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST
1491  * whether the file exists or not.
1492  *
1493  * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST,
1494  * the latter is only tested if the target exists.
1495  *
1496  * The passed ncp must be referenced and locked.  If it is already resolved
1497  * it may be locked shared but otherwise should be locked exclusively.
1498  */
1499 
1500 #define S_WXOK_MASK	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
1501 #define S_XOK_MASK	(S_IXUSR|S_IXGRP|S_IXOTH)
1502 
1503 static int
1504 naccess(struct nchandle *nch, u_int *genp, int nflags,
1505 	struct ucred *cred, int *nflagsp, int nchislocked)
1506 {
1507     struct vnode *vp;
1508     struct vattr_lite lva;
1509     struct namecache *ncp;
1510     int error;
1511     int cflags;
1512 
1513     ncp = nch->ncp;
1514 
1515 again:
1516     /*
1517      * We need a resolved entry.  If the entry is not resolved we need
1518      * to lock and resolve it.  If it is already resolved, our ref should
1519      * prevent normal evictions (as long as we tested the lock race above).
1520      *
1521      * If the ncp was locked by the caller and left unresolved, it must
1522      * have been locked exclusively.
1523      */
1524     if (ncp->nc_flag & NCF_UNRESOLVED) {
1525 	if (nchislocked == 0) {
1526 	    cache_lock(nch);
1527 	    nchislocked = 2;
1528 	}
1529 	cache_resolve(nch, genp, cred);
1530 	ncp = nch->ncp;
1531     }
1532     error = ncp->nc_error;
1533 
1534     /*
1535      * Only unresolved entries should return this error (though maybe
1536      * in-filesystem sockets can too).   XXX check filetype for VSOCK.
1537      */
1538     if (error == ENOTCONN) {
1539 	if (nchislocked == 0) {
1540 	    if (nlookup_debug & 4) {
1541 		kprintf("ncp %p %08x %d %s: Warning, unexpected state, "
1542 			"forcing lock\n",
1543 			ncp, ncp->nc_flag, ncp->nc_error, ncp->nc_name);
1544 		print_backtrace(-1);
1545 	    }
1546 	    cache_lock(nch);
1547 	    nchislocked = 2;
1548 	    goto again;
1549 	}
1550 	if (nlookup_debug & 4) {
1551 	    kprintf("ncp %p %08x %d %s: Warning, unexpected state\n",
1552 		    ncp, ncp->nc_flag, ncp->nc_error, ncp->nc_name);
1553 	    print_backtrace(-1);
1554 	}
1555     }
1556 
1557     /*
1558      * Directory permissions checks.  Silently ignore ENOENT if these
1559      * tests pass.  It isn't an error.
1560      *
1561      * We can safely resolve ncp->nc_parent because ncp is currently
1562      * locked.
1563      */
1564     if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) {
1565 	if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) ||
1566 	    ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) ||
1567 	    ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) ||
1568 	    (nflags & NLC_RENAME_DST)
1569 	) {
1570 	    struct nchandle par;
1571 
1572 	    if (nchislocked == 0) {
1573 		cache_lock_maybe_shared(nch, 0);
1574 		nchislocked = 2;
1575 		goto again;
1576 	    }
1577 	    if ((par.ncp = ncp->nc_parent) == NULL) {
1578 		if (error != EAGAIN)
1579 		    error = EINVAL;
1580 	    } else if (error == 0 || error == ENOENT) {
1581 		u_int dummy_gen = 0;
1582 
1583 		par.mount = nch->mount;
1584 		cache_hold(&par);
1585 		cache_lock_maybe_shared(&par, 0);
1586 		error = naccess(&par, &dummy_gen, NLC_WRITE, cred, NULL, 1);
1587 		cache_put(&par);
1588 	    }
1589 	}
1590     }
1591 
1592     /*
1593      * NLC_EXCL check.  Target file must not exist.
1594      */
1595     if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL)
1596 	error = EEXIST;
1597 
1598     /*
1599      * Try to short-cut the vnode operation for intermediate directory
1600      * components.  This is a major SMP win because it avoids having
1601      * to execute a lot of code for intermediate directory components,
1602      * including shared refs and locks on intermediate directory vnodes.
1603      *
1604      * We can only do this if the caller does not need nflagsp.
1605      */
1606     if (error == 0 && nflagsp == NULL &&
1607 	nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) {
1608 	if (nchislocked == 2)
1609 		cache_unlock(nch);
1610 	return 0;
1611     }
1612 
1613     /*
1614      * Get the vnode attributes so we can do the rest of our checks.
1615      *
1616      * NOTE: We only call naccess_lva() if the target exists.
1617      */
1618     if (error == 0) {
1619 	if (nchislocked == 0) {
1620 	    cache_lock_maybe_shared(nch, 0);
1621 	    nchislocked = 2;
1622 	}
1623 #if 0
1624 	error = cache_vget(nch, cred, LK_SHARED, &vp);
1625 #else
1626 	error = cache_vref(nch, cred, &vp);
1627 #endif
1628 	if (error == ENOENT) {
1629 	    /*
1630 	     * Silently zero-out ENOENT if creating or renaming
1631 	     * (rename target).  It isn't an error.
1632 	     */
1633 	    if (nflags & (NLC_CREATE | NLC_RENAME_DST))
1634 		error = 0;
1635 	} else if (error == 0) {
1636 	    /*
1637 	     * Get the vnode attributes and check for illegal O_TRUNC
1638 	     * requests and read-only mounts.
1639 	     *
1640 	     * NOTE: You can still open devices on read-only mounts for
1641 	     * 	     writing.
1642 	     *
1643 	     * NOTE: creates/deletes/renames are handled by the NLC_WRITE
1644 	     *	     check on the parent directory above.
1645 	     *
1646 	     * XXX cache the va in the namecache or in the vnode
1647 	     */
1648 	    error = VOP_GETATTR_LITE(vp, &lva);
1649 	    if (error == 0 && (nflags & NLC_TRUNCATE)) {
1650 		switch(lva.va_type) {
1651 		case VREG:
1652 		case VDATABASE:
1653 		case VCHR:
1654 		case VBLK:
1655 		case VFIFO:
1656 		    break;
1657 		case VDIR:
1658 		    error = EISDIR;
1659 		    break;
1660 		default:
1661 		    error = EINVAL;
1662 		    break;
1663 		}
1664 	    }
1665 	    if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount &&
1666 		(vp->v_mount->mnt_flag & MNT_RDONLY)
1667 	    ) {
1668 		switch(lva.va_type) {
1669 		case VDIR:
1670 		case VLNK:
1671 		case VREG:
1672 		case VDATABASE:
1673 		    error = EROFS;
1674 		    break;
1675 		default:
1676 		    break;
1677 		}
1678 	    }
1679 #if 0
1680 	    vput(vp);
1681 #else
1682 	    vrele(vp);
1683 #endif
1684 
1685 	    /*
1686 	     * Check permissions based on file attributes.  The passed
1687 	     * flags (*nflagsp) are modified with feedback based on
1688 	     * special attributes and requirements.
1689 	     */
1690 	    if (error == 0) {
1691 		/*
1692 		 * Adjust the returned (*nflagsp) if non-NULL.
1693 		 */
1694 		if (nflagsp) {
1695 		    if ((lva.va_mode & VSVTX) && lva.va_uid != cred->cr_uid)
1696 			*nflagsp |= NLC_STICKY;
1697 		    if (lva.va_flags & APPEND)
1698 			*nflagsp |= NLC_APPENDONLY;
1699 		    if (lva.va_flags & IMMUTABLE)
1700 			*nflagsp |= NLC_IMMUTABLE;
1701 		}
1702 
1703 		/*
1704 		 * NCF_WXOK can be set for world-searchable directories.
1705 		 *
1706 		 * XXX When we implement capabilities this code would also
1707 		 * need a cap check, or only set the flag if there are no
1708 		 * capabilities.
1709 		 */
1710 		cflags = 0;
1711 		if (lva.va_type == VDIR &&
1712 		    (lva.va_mode & S_WXOK_MASK) == S_WXOK_MASK) {
1713 			cflags |= NCF_WXOK;
1714 		}
1715 		if ((lva.va_mode & S_XOK_MASK) == 0)
1716 			cflags |= NCF_NOTX;
1717 
1718 		/*
1719 		 * Track swapcache management flags in the namecache.
1720 		 *
1721 		 * Calculate the flags based on the current vattr_lite info
1722 		 * and recalculate the inherited flags from the parent
1723 		 * (the original cache linkage may have occurred without
1724 		 * getattrs and thus have stale flags).
1725 		 */
1726 		if (lva.va_flags & SF_NOCACHE)
1727 			cflags |= NCF_SF_NOCACHE;
1728 		if (lva.va_flags & UF_CACHE)
1729 			cflags |= NCF_UF_CACHE;
1730 		if (ncp->nc_parent) {
1731 			if (ncp->nc_parent->nc_flag &
1732 			    (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) {
1733 				cflags |= NCF_SF_PNOCACHE;
1734 			}
1735 			if (ncp->nc_parent->nc_flag &
1736 			    (NCF_UF_CACHE | NCF_UF_PCACHE)) {
1737 				cflags |= NCF_UF_PCACHE;
1738 			}
1739 		}
1740 
1741 		/*
1742 		 * We're not supposed to update nc_flag when holding a shared
1743 		 * lock, but we allow the case for certain flags.  Note that
1744 		 * holding an exclusive lock allows updating nc_flag without
1745 		 * atomics.  nc_flag is not allowe to be updated at all unless
1746 		 * a shared or exclusive lock is held.
1747 		 */
1748 		atomic_clear_short(&ncp->nc_flag,
1749 				   (NCF_SF_NOCACHE | NCF_UF_CACHE |
1750 				   NCF_SF_PNOCACHE | NCF_UF_PCACHE |
1751 				   NCF_WXOK | NCF_NOTX) & ~cflags);
1752 		atomic_set_short(&ncp->nc_flag, cflags);
1753 
1754 		/*
1755 		 * Process general access.
1756 		 */
1757 		error = naccess_lva(&lva, nflags, cred);
1758 	    }
1759 	}
1760     }
1761     if (nchislocked == 2)
1762 	cache_unlock(nch);
1763     return(error);
1764 }
1765 
1766 /*
1767  * Check the requested access against the given vattr using cred.
1768  */
1769 int
1770 naccess_lva(struct vattr_lite *lvap, int nflags, struct ucred *cred)
1771 {
1772     int i;
1773     int vmode;
1774 
1775     /*
1776      * Test the immutable bit.  Creations, deletions, renames (source
1777      * or destination) are not allowed.  chown/chmod/other is also not
1778      * allowed but is handled by SETATTR.  Hardlinks to the immutable
1779      * file are allowed.
1780      *
1781      * If the directory is set to immutable then creations, deletions,
1782      * renames (source or dest) and hardlinks to files within the directory
1783      * are not allowed, and regular files opened through the directory may
1784      * not be written to or truncated (unless a special device).
1785      *
1786      * NOTE!  New hardlinks to immutable files work but new hardlinks to
1787      * files, immutable or not, sitting inside an immutable directory are
1788      * not allowed.  As always if the file is hardlinked via some other
1789      * path additional hardlinks may be possible even if the file is marked
1790      * immutable.  The sysop needs to create a closure by checking the hard
1791      * link count.  Once closure is achieved you are good, and security
1792      * scripts should check link counts anyway.
1793      *
1794      * Writes and truncations are only allowed on special devices.
1795      */
1796     if ((lvap->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) {
1797 	if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK))
1798 	    return (EPERM);
1799 	if (nflags & (NLC_CREATE | NLC_DELETE |
1800 		      NLC_RENAME_SRC | NLC_RENAME_DST)) {
1801 	    return (EPERM);
1802 	}
1803 	if (nflags & (NLC_WRITE | NLC_TRUNCATE)) {
1804 	    switch(lvap->va_type) {
1805 	    case VDIR:
1806 		return (EISDIR);
1807 	    case VLNK:
1808 	    case VREG:
1809 	    case VDATABASE:
1810 		return (EPERM);
1811 	    default:
1812 		break;
1813 	    }
1814 	}
1815     }
1816 
1817     /*
1818      * Test the no-unlink and append-only bits for opens, rename targets,
1819      * and deletions.  These bits are not tested for creations or
1820      * rename sources.
1821      *
1822      * Unlike FreeBSD we allow a file with APPEND set to be renamed.
1823      * If you do not wish this you must also set NOUNLINK.
1824      *
1825      * If the governing directory is marked APPEND-only it implies
1826      * NOUNLINK for all entries in the directory.
1827      */
1828     if (((lvap->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) &&
1829 	(nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST))
1830     ) {
1831 	return (EPERM);
1832     }
1833 
1834     /*
1835      * A file marked append-only may not be deleted but can be renamed.
1836      */
1837     if ((lvap->va_flags & APPEND) &&
1838 	(nflags & (NLC_DELETE | NLC_RENAME_DST))
1839     ) {
1840 	return (EPERM);
1841     }
1842 
1843     /*
1844      * A file marked append-only which is opened for writing must also
1845      * be opened O_APPEND.
1846      */
1847     if ((lvap->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) {
1848 	if (nflags & NLC_TRUNCATE)
1849 	    return (EPERM);
1850 	if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) {
1851 	    if ((nflags & NLC_APPEND) == 0)
1852 		return (EPERM);
1853 	}
1854     }
1855 
1856     /*
1857      * root gets universal access
1858      */
1859     if (cred->cr_uid == 0)
1860 	return(0);
1861 
1862     /*
1863      * Check owner perms.
1864      *
1865      * If NLC_OWN is set the owner of the file is allowed no matter when
1866      * the owner-mode bits say (utimes).
1867      */
1868     vmode = 0;
1869     if (nflags & NLC_READ)
1870 	vmode |= S_IRUSR;
1871     if (nflags & NLC_WRITE)
1872 	vmode |= S_IWUSR;
1873     if (nflags & NLC_EXEC)
1874 	vmode |= S_IXUSR;
1875 
1876     if (cred->cr_uid == lvap->va_uid) {
1877 	if ((nflags & NLC_OWN) == 0) {
1878 	    if ((vmode & lvap->va_mode) != vmode)
1879 		return(EACCES);
1880 	}
1881 	return(0);
1882     }
1883 
1884     /*
1885      * If NLC_STICKY is set only the owner may delete or rename a file.
1886      * This bit is typically set on /tmp.
1887      *
1888      * Note that the NLC_READ/WRITE/EXEC bits are not typically set in
1889      * the specific delete or rename case.  For deletions and renames we
1890      * usually just care about directory permissions, not file permissions.
1891      */
1892     if ((nflags & NLC_STICKY) &&
1893 	(nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) {
1894 	return(EACCES);
1895     }
1896 
1897     /*
1898      * Check group perms
1899      */
1900     vmode >>= 3;
1901     for (i = 0; i < cred->cr_ngroups; ++i) {
1902 	if (lvap->va_gid == cred->cr_groups[i]) {
1903 	    if ((vmode & lvap->va_mode) != vmode)
1904 		return(EACCES);
1905 	    return(0);
1906 	}
1907     }
1908 
1909     /*
1910      * Check world perms
1911      */
1912     vmode >>= 3;
1913     if ((vmode & lvap->va_mode) != vmode)
1914 	return(EACCES);
1915     return(0);
1916 }
1917 
1918 /*
1919  * Long-term (10-second interval) statistics collection
1920  */
1921 static
1922 uint64_t
1923 collect_nlookup_callback(int n)
1924 {
1925 	static uint64_t last_total;
1926 	uint64_t save;
1927 	uint64_t total;
1928 
1929 	total = 0;
1930 	for (n = 0; n < ncpus; ++n) {
1931 		globaldata_t gd = globaldata_find(n);
1932 		struct nchstats *sp;
1933 
1934 		if ((sp = gd->gd_nchstats) != NULL)
1935 			total += sp->ncs_longhits + sp->ncs_longmiss;
1936 	}
1937 	save = total;
1938 	total = total - last_total;
1939 	last_total = save;
1940 
1941 	return total;
1942 }
1943 
1944 static
1945 void
1946 nlookup_collect_init(void *dummy __unused)
1947 {
1948 	kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback,
1949 			  KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0));
1950 }
1951 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0);
1952