xref: /openbsd/sys/kern/vfs_cache.c (revision 0d280c5f)
1 /*	$OpenBSD: vfs_cache.c,v 1.58 2022/08/14 01:58:28 jsg Exp $	*/
2 /*	$NetBSD: vfs_cache.c,v 1.13 1996/02/04 02:18:09 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)vfs_cache.c	8.3 (Berkeley) 8/22/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/vnode.h>
38 #include <sys/lock.h>
39 #include <sys/namei.h>
40 #include <sys/errno.h>
41 #include <sys/pool.h>
42 
43 /*
44  * TODO: namecache access should really be locked.
45  */
46 
47 /*
48  * For simplicity (and economy of storage), names longer than
49  * a maximum length of NAMECACHE_MAXLEN are not cached; they occur
50  * infrequently in any case, and are almost never of interest.
51  *
52  * Upon reaching the last segment of a path, if the reference
53  * is for DELETE, or NOCACHE is set (rewrite), and the
54  * name is located in the cache, it will be dropped.
55  */
56 
57 /*
58  * Structures associated with name caching.
59  */
60 long	numcache;	/* total number of cache entries allocated */
61 long	numneg;		/* number of negative cache entries */
62 
63 TAILQ_HEAD(, namecache) nclruhead;	/* Regular Entry LRU chain */
64 TAILQ_HEAD(, namecache) nclruneghead;	/* Negative Entry LRU chain */
65 struct	nchstats nchstats;		/* cache effectiveness statistics */
66 
67 int doingcache = 1;			/* 1 => enable the cache */
68 
69 struct pool nch_pool;
70 
71 void cache_zap(struct namecache *);
72 u_long nextvnodeid;
73 
74 static inline int
namecache_compare(const struct namecache * n1,const struct namecache * n2)75 namecache_compare(const struct namecache *n1, const struct namecache *n2)
76 {
77 	if (n1->nc_nlen == n2->nc_nlen)
78 		return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
79 	else
80 		return (n1->nc_nlen - n2->nc_nlen);
81 }
82 
83 RBT_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
84 RBT_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
85 
86 void
cache_tree_init(struct namecache_rb_cache * tree)87 cache_tree_init(struct namecache_rb_cache *tree)
88 {
89 	RBT_INIT(namecache_rb_cache, tree);
90 }
91 
92 /*
93  * blow away a namecache entry
94  */
95 void
cache_zap(struct namecache * ncp)96 cache_zap(struct namecache *ncp)
97 {
98 	struct vnode *dvp = NULL;
99 
100 	if (ncp->nc_vp != NULL) {
101 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
102 		numcache--;
103 	} else {
104 		TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
105 		numneg--;
106 	}
107 	if (ncp->nc_dvp) {
108 		RBT_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
109 		if (RBT_EMPTY(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree))
110 			dvp = ncp->nc_dvp;
111 	}
112 	if (ncp->nc_vp && (ncp->nc_vpid == ncp->nc_vp->v_id)) {
113 		if (ncp->nc_vp != ncp->nc_dvp &&
114 		    ncp->nc_vp->v_type == VDIR &&
115 		    (ncp->nc_nlen > 2 ||
116 			(ncp->nc_nlen > 1 &&
117 			    ncp->nc_name[1] != '.') ||
118 			(ncp->nc_nlen > 0 &&
119 			    ncp->nc_name[0] != '.'))) {
120 			TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
121 		}
122 	}
123 	pool_put(&nch_pool, ncp);
124 	if (dvp)
125 		vdrop(dvp);
126 }
127 
128 /*
129  * Look for a name in the cache.
130  * dvp points to the directory to search. The componentname cnp holds
131  * the information on the entry being sought, such as its length
132  * and its name. If the lookup succeeds, vpp is set to point to the vnode
133  * and an error of 0 is returned. If the lookup determines the name does
134  * not exist (negative caching) an error of ENOENT is returned. If the
135  * lookup fails, an error of -1 is returned.
136  */
137 int
cache_lookup(struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp)138 cache_lookup(struct vnode *dvp, struct vnode **vpp,
139     struct componentname *cnp)
140 {
141 	struct namecache *ncp;
142 	struct namecache n;
143 	struct vnode *vp;
144 	u_long vpid;
145 	int error;
146 
147 	*vpp = NULL;
148 
149 	if (!doingcache) {
150 		cnp->cn_flags &= ~MAKEENTRY;
151 		return (-1);
152 	}
153 	if (cnp->cn_namelen > NAMECACHE_MAXLEN) {
154 		nchstats.ncs_long++;
155 		cnp->cn_flags &= ~MAKEENTRY;
156 		return (-1);
157 	}
158 
159 	/* lookup in directory vnode's redblack tree */
160 	n.nc_nlen = cnp->cn_namelen;
161 	memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
162 	ncp = RBT_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
163 
164 	if (ncp == NULL) {
165 		nchstats.ncs_miss++;
166 		return (-1);
167 	}
168 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
169 		nchstats.ncs_badhits++;
170 		goto remove;
171 	} else if (ncp->nc_vp == NULL) {
172 		if (cnp->cn_nameiop != CREATE ||
173 		    (cnp->cn_flags & ISLASTCN) == 0) {
174 			nchstats.ncs_neghits++;
175 			/*
176 			 * Move this slot to end of the negative LRU chain,
177 			 */
178 			if (TAILQ_NEXT(ncp, nc_neg) != NULL) {
179 				TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
180 				TAILQ_INSERT_TAIL(&nclruneghead, ncp,
181 				    nc_neg);
182 			}
183 			return (ENOENT);
184 		} else {
185 			nchstats.ncs_badhits++;
186 			goto remove;
187 		}
188 	} else if (ncp->nc_vpid != ncp->nc_vp->v_id) {
189 		nchstats.ncs_falsehits++;
190 		goto remove;
191 	}
192 
193 	/*
194 	 * Move this slot to end of the regular LRU chain.
195 	 */
196 	if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
197 		TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
198 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
199 	}
200 
201 	vp = ncp->nc_vp;
202 	vpid = vp->v_id;
203 	if (vp == dvp) {	/* lookup on "." */
204 		vref(dvp);
205 		error = 0;
206 	} else if (cnp->cn_flags & ISDOTDOT) {
207 		VOP_UNLOCK(dvp);
208 		cnp->cn_flags |= PDIRUNLOCK;
209 		error = vget(vp, LK_EXCLUSIVE);
210 		/*
211 		 * If the above vget() succeeded and both LOCKPARENT and
212 		 * ISLASTCN is set, lock the directory vnode as well.
213 		 */
214 		if (!error && (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) == 0) {
215 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0) {
216 				vput(vp);
217 				return (error);
218 			}
219 			cnp->cn_flags &= ~PDIRUNLOCK;
220 		}
221 	} else {
222 		error = vget(vp, LK_EXCLUSIVE);
223 		/*
224 		 * If the above vget() failed or either of LOCKPARENT or
225 		 * ISLASTCN is set, unlock the directory vnode.
226 		 */
227 		if (error || (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
228 			VOP_UNLOCK(dvp);
229 			cnp->cn_flags |= PDIRUNLOCK;
230 		}
231 	}
232 
233 	/*
234 	 * Check that the lock succeeded, and that the capability number did
235 	 * not change while we were waiting for the lock.
236 	 */
237 	if (error || vpid != vp->v_id) {
238 		if (!error) {
239 			vput(vp);
240 			nchstats.ncs_falsehits++;
241 		} else
242 			nchstats.ncs_badhits++;
243 		/*
244 		 * The parent needs to be locked when we return to VOP_LOOKUP().
245 		 * The `.' case here should be extremely rare (if it can happen
246 		 * at all), so we don't bother optimizing out the unlock/relock.
247 		 */
248 		if (vp == dvp || error ||
249 		    (~cnp->cn_flags & (LOCKPARENT|ISLASTCN)) != 0) {
250 			if ((error = vn_lock(dvp, LK_EXCLUSIVE)) != 0)
251 				return (error);
252 			cnp->cn_flags &= ~PDIRUNLOCK;
253 		}
254 		return (-1);
255 	}
256 
257 	nchstats.ncs_goodhits++;
258 	*vpp = vp;
259 	return (0);
260 
261 remove:
262 	/*
263 	 * Last component and we are renaming or deleting,
264 	 * the cache entry is invalid, or otherwise don't
265 	 * want cache entry to exist.
266 	 */
267 	cache_zap(ncp);
268 	return (-1);
269 }
270 
271 /*
272  * Scan cache looking for name of directory entry pointing at vp.
273  *
274  * Fill in dvpp.
275  *
276  * If bufp is non-NULL, also place the name in the buffer which starts
277  * at bufp, immediately before *bpp, and move bpp backwards to point
278  * at the start of it.  (Yes, this is a little baroque, but it's done
279  * this way to cater to the whims of getcwd).
280  *
281  * Returns 0 on success, -1 on cache miss, positive errno on failure.
282  *
283  * TODO: should we return *dvpp locked?
284  */
285 
286 int
cache_revlookup(struct vnode * vp,struct vnode ** dvpp,char ** bpp,char * bufp)287 cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
288 {
289 	struct namecache *ncp;
290 	struct vnode *dvp = NULL;
291 	char *bp;
292 
293 	if (!doingcache)
294 		goto out;
295 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
296 		dvp = ncp->nc_dvp;
297 		if (dvp && dvp != vp && ncp->nc_dvpid == dvp->v_id)
298 			goto found;
299 	}
300 	goto miss;
301 found:
302 #ifdef DIAGNOSTIC
303 	if (ncp->nc_nlen == 1 &&
304 	    ncp->nc_name[0] == '.')
305 		panic("cache_revlookup: found entry for .");
306 	if (ncp->nc_nlen == 2 &&
307 	    ncp->nc_name[0] == '.' &&
308 	    ncp->nc_name[1] == '.')
309 		panic("cache_revlookup: found entry for ..");
310 #endif
311 	nchstats.ncs_revhits++;
312 
313 	if (bufp != NULL) {
314 		bp = *bpp;
315 		bp -= ncp->nc_nlen;
316 		if (bp <= bufp) {
317 			*dvpp = NULL;
318 			return (ERANGE);
319 		}
320 		memcpy(bp, ncp->nc_name, ncp->nc_nlen);
321 		*bpp = bp;
322 	}
323 
324 	*dvpp = dvp;
325 
326 	/*
327 	 * XXX: Should we vget() here to have more
328 	 * consistent semantics with cache_lookup()?
329 	 */
330 	return (0);
331 
332 miss:
333 	nchstats.ncs_revmiss++;
334 out:
335 	*dvpp = NULL;
336 	return (-1);
337 }
338 
339 /*
340  * Add an entry to the cache
341  */
342 void
cache_enter(struct vnode * dvp,struct vnode * vp,struct componentname * cnp)343 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
344 {
345 	struct namecache *ncp, *lncp;
346 
347 	if (!doingcache || cnp->cn_namelen > NAMECACHE_MAXLEN)
348 		return;
349 
350 	/*
351 	 * allocate, or recycle (free and allocate) an ncp.
352 	 */
353 	if (numcache >= initialvnodes) {
354 		if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
355 			cache_zap(ncp);
356 		else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
357 			cache_zap(ncp);
358 		else
359 			panic("wtf? leak?");
360 	}
361 	ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
362 
363 	/* grab the vnode we just found */
364 	ncp->nc_vp = vp;
365 	if (vp)
366 		ncp->nc_vpid = vp->v_id;
367 
368 	/* fill in cache info */
369 	ncp->nc_dvp = dvp;
370 	ncp->nc_dvpid = dvp->v_id;
371 	ncp->nc_nlen = cnp->cn_namelen;
372 	memcpy(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen);
373 	if (RBT_EMPTY(namecache_rb_cache, &dvp->v_nc_tree)) {
374 		vhold(dvp);
375 	}
376 	if ((lncp = RBT_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp))
377 	    != NULL) {
378 		/* someone has raced us and added a different entry
379 		 * for the same vnode (different ncp) - we don't need
380 		 * this entry, so free it and we are done.
381 		 */
382 		pool_put(&nch_pool, ncp);
383 		/* we know now dvp->v_nc_tree is not empty, no need
384 		 * to vdrop here
385 		 */
386 		goto done;
387 	}
388 	if (vp) {
389 		TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
390 		numcache++;
391 		/* don't put . or .. in the reverse map */
392 		if (vp != dvp && vp->v_type == VDIR &&
393 		    (ncp->nc_nlen > 2 ||
394 			(ncp->nc_nlen > 1 &&
395 			    ncp->nc_name[1] != '.') ||
396 			(ncp->nc_nlen > 0 &&
397 			    ncp->nc_name[0] != '.')))
398 			TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp,
399 			    nc_me);
400 	} else {
401 		TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
402 		numneg++;
403 	}
404 	if (numneg  > initialvnodes) {
405 		if ((ncp = TAILQ_FIRST(&nclruneghead))
406 		    != NULL)
407 			cache_zap(ncp);
408 	}
409 done:
410 	return;
411 }
412 
413 
414 /*
415  * Name cache initialization, from vfs_init() when we are booting
416  */
417 void
nchinit(void)418 nchinit(void)
419 {
420 	TAILQ_INIT(&nclruhead);
421 	TAILQ_INIT(&nclruneghead);
422 	pool_init(&nch_pool, sizeof(struct namecache), 0, IPL_NONE, PR_WAITOK,
423 	    "nchpl", NULL);
424 }
425 
426 /*
427  * Cache flush, a particular vnode; called when a vnode is renamed to
428  * hide entries that would now be invalid
429  */
430 void
cache_purge(struct vnode * vp)431 cache_purge(struct vnode *vp)
432 {
433 	struct namecache *ncp;
434 
435 	/* We should never have destinations cached for a non-VDIR vnode. */
436 	KASSERT(vp->v_type == VDIR || TAILQ_EMPTY(&vp->v_cache_dst));
437 
438 	while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
439 		cache_zap(ncp);
440 	while ((ncp = RBT_ROOT(namecache_rb_cache, &vp->v_nc_tree)))
441 		cache_zap(ncp);
442 
443 	/* XXX this blows goats */
444 	vp->v_id = ++nextvnodeid;
445 	if (vp->v_id == 0)
446 		vp->v_id = ++nextvnodeid;
447 }
448 
449 /*
450  * Cache flush, a whole filesystem; called when filesys is umounted to
451  * remove entries that would now be invalid
452  */
453 void
cache_purgevfs(struct mount * mp)454 cache_purgevfs(struct mount *mp)
455 {
456 	struct namecache *ncp, *nxtcp;
457 
458 	/* whack the regular entries */
459 	TAILQ_FOREACH_SAFE(ncp, &nclruhead, nc_lru, nxtcp) {
460 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
461 			continue;
462 		/* free the resources we had */
463 		cache_zap(ncp);
464 	}
465 	/* whack the negative entries */
466 	TAILQ_FOREACH_SAFE(ncp, &nclruneghead, nc_neg, nxtcp) {
467 		if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp)
468 			continue;
469 		/* free the resources we had */
470 		cache_zap(ncp);
471 	}
472 }
473