xref: /dragonfly/sys/kern/vfs_init.c (revision e8364298)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed
6  * to Berkeley by John Heidemann of the UCLA Ficus project.
7  *
8  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)vfs_init.c	8.3 (Berkeley) 1/4/94
39  * $FreeBSD: src/sys/kern/vfs_init.c,v 1.59 2002/04/30 18:44:32 dillon Exp $
40  * $DragonFly: src/sys/kern/vfs_init.c,v 1.4 2004/03/16 18:42:35 hmp Exp $
41  */
42 
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/mount.h>
48 #include <sys/sysctl.h>
49 #include <sys/vnode.h>
50 #include <sys/malloc.h>
51 #include <vm/vm_zone.h>
52 
53 
54 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
55 
56 /*
57  * Zone for namei
58  */
59 struct vm_zone *namei_zone;
60 
61 /*
62  * vfs_init() will set maxvfsconf
63  * to the highest defined type number.
64  */
65 int maxvfsconf;
66 struct vfsconf *vfsconf;
67 
68 /*
69  * vfs_init.c
70  *
71  * Allocate and fill in operations vectors.
72  *
73  * An undocumented feature of this approach to defining operations is that
74  * there can be multiple entries in vfs_opv_descs for the same operations
75  * vector. This allows third parties to extend the set of operations
76  * supported by another layer in a binary compatibile way. For example,
77  * assume that NFS needed to be modified to support Ficus. NFS has an entry
78  * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
79  * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
80  * listing those new operations Ficus adds to NFS, all without modifying the
81  * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
82  * that is a(whole)nother story.) This is a feature.
83  */
84 
85 /* Table of known vnodeop vectors (list of VFS vnode vectors) */
86 static const struct vnodeopv_desc **vnodeopv_descs;
87 static int vnodeopv_num;
88 
89 /* Table of known descs (list of vnode op handlers "vop_access_desc") */
90 static struct vnodeop_desc **vfs_op_descs;
91 static int *vfs_op_desc_refs;			/* reference counts */
92 static int num_op_descs;
93 
94 /*
95  * Allocate at least vfs_opv_numops number of VFS operation
96  * vectors.  The vector is never freed or deallocted once
97  * it is initalised, so the vnodes might safely reference
98  * it through their v_op pointer without the vector changing
99  * suddenly from under them.
100  *
101  * Allow this number to be tuned at boot.
102  */
103 static int vfs_opv_numops = 96;
104 TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops);
105 SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RD, &vfs_opv_numops,
106 	0, "Maximum number of operations in vop_t vector");
107 
108 static __inline int
109 int_cmp(const void *a, const void *b)
110 {
111 	return(*(const int *)a - *(const int *)b);
112 }
113 
114 /*
115  * Recalculate parts of the VFS operations vector (vfsops).
116  */
117 static void
118 vfs_opv_recalc(void)
119 {
120 	int i, j, k;
121 	int *vfs_op_offsets;
122 	vop_t ***opv_desc_vector_p;
123 	vop_t **opv_desc_vector;
124 	struct vnodeopv_entry_desc *opve_descp;
125 	const struct vnodeopv_desc *opv;
126 
127 	if (vfs_op_descs == NULL)
128 		panic("vfs_opv_recalc called with null vfs_op_descs");
129 
130 	/*
131 	 * Allocate and initialize temporary array to store
132 	 * offsets. Sort it to put all uninitialized entries
133 	 * first and to make holes in existing offset sequence
134 	 * detectable.
135 	 */
136 	MALLOC(vfs_op_offsets, int *,
137 		num_op_descs * sizeof(int), M_VNODE, M_WAITOK);
138 	if (vfs_op_offsets == NULL)
139 		panic("vfs_opv_recals: no memory");
140 	for (i = 0; i < num_op_descs; i++)
141 		vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset;
142 	qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp);
143 
144 	/*
145 	 * Run through and make sure all known descs have an offset.
146 	 * Use vfs_op_offsets to locate the holes in offset sequence
147 	 * and reuse them.
148 	 *
149 	 * vop_default_desc is at hardwired at offset 1, and offset
150 	 * 0 is a panic sanity check.
151 	 */
152 	j = k = 1;
153 	for (i = 0; i < num_op_descs; i++) {
154 		if (vfs_op_descs[i]->vdesc_offset != 0)
155 			continue;
156 		/*
157 		 * Look at two adjacent entries vfs_op_offsets[j - 1] and
158 		 * vfs_op_offsets[j] and see if we can fit a new offset
159 		 * number in between. If not, look at the next pair until
160 		 * hole is found or the end of the vfs_op_offsets vector is
161 		 * reached. j has been initialized to 1 above so that
162 		 * referencing (j-1)-th element is safe and the loop will
163 		 * never execute if num_op_descs is 1. For each new value s
164 		 * of i the j loop pick up from where previous iteration has
165 		 * left off. When the last hole has been consumed or if no
166 		 * hole has been found, we will start allocating new numbers
167 		 * starting from the biggest already available offset + 1.
168 		 */
169 		for (; j < num_op_descs; j++) {
170 			if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k)
171 				break;
172 			k = vfs_op_offsets[j] + 1;
173 		}
174 		vfs_op_descs[i]->vdesc_offset = k++;
175 	}
176 	FREE(vfs_op_offsets, M_VNODE);
177 
178 	/* Panic if new VFSops will cause vector overflow */
179 	if (k > vfs_opv_numops)
180 		panic("VFS: Ran out of vop_t vector entries. "
181 		      "%d entries required, only %d available.",
182 		      k, vfs_opv_numops);
183 
184 	/*
185 	 * Allocate and fill in the vectors
186 	 */
187 	for (i = 0; i < vnodeopv_num; i++) {
188 		opv = vnodeopv_descs[i];
189 		opv_desc_vector_p = opv->opv_desc_vector_p;
190 		if (*opv_desc_vector_p == NULL)
191 			MALLOC(*opv_desc_vector_p, vop_t **,
192 				vfs_opv_numops * sizeof(vop_t *), M_VNODE,
193 				M_WAITOK | M_ZERO);
194 		if (*opv_desc_vector_p == NULL)
195 			panic("no memory for vop_t ** vector");
196 
197 		/* Fill in, with slot 0 being to return EOPNOTSUPP */
198 		opv_desc_vector = *opv_desc_vector_p;
199 		opv_desc_vector[0] = (vop_t *)vop_eopnotsupp;
200 		for (j = 0; opv->opv_desc_ops[j].opve_op; j++) {
201 			opve_descp = &(opv->opv_desc_ops[j]);
202 			opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
203 				opve_descp->opve_impl;
204 		}
205 
206 		/* Replace unfilled routines with their default (slot 1). */
207 		opv_desc_vector = *(opv->opv_desc_vector_p);
208 		if (opv_desc_vector[1] == NULL)
209 			panic("vfs_opv_recalc: vector without a default.");
210 		for (j = 0; j < vfs_opv_numops; j++)
211 			if (opv_desc_vector[j] == NULL)
212 				opv_desc_vector[j] = opv_desc_vector[1];
213 	}
214 }
215 
216 /*
217  * Add a vnode operations (vnops) vector to the global list.
218  */
219 void
220 vfs_add_vnodeops(const void *data)
221 {
222 	const struct vnodeopv_desc *opv;
223 	const struct vnodeopv_desc **newopv;
224 	struct vnodeop_desc **newop;
225 	int *newref;
226 	struct vnodeop_desc *desc;
227 	int i, j;
228 
229 	opv = (const struct vnodeopv_desc *)data;
230 	MALLOC(newopv, const struct vnodeopv_desc **,
231 	       (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK);
232 	if (newopv == NULL)
233 		panic("vfs_add_vnodeops: no memory");
234 	if (vnodeopv_descs) {
235 		bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv));
236 		FREE(vnodeopv_descs, M_VNODE);
237 	}
238 	newopv[vnodeopv_num] = opv;
239 	vnodeopv_descs = newopv;
240 	vnodeopv_num++;
241 
242 	/* See if we have turned up a new vnode op desc */
243 	for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) {
244 		for (j = 0; j < num_op_descs; j++) {
245 			if (desc == vfs_op_descs[j]) {
246 				/* found it, increase reference count */
247 				vfs_op_desc_refs[j]++;
248 				break;
249 			}
250 		}
251 		if (j == num_op_descs) {
252 			/* not found, new entry */
253 			MALLOC(newop, struct vnodeop_desc **,
254 			       (num_op_descs + 1) * sizeof(*newop),
255 			       M_VNODE, M_WAITOK);
256 			if (newop == NULL)
257 				panic("vfs_add_vnodeops: no memory for desc");
258 			/* new reference count (for unload) */
259 			MALLOC(newref, int *,
260 				(num_op_descs + 1) * sizeof(*newref),
261 				M_VNODE, M_WAITOK);
262 			if (newref == NULL)
263 				panic("vfs_add_vnodeops: no memory for refs");
264 			if (vfs_op_descs) {
265 				bcopy(vfs_op_descs, newop,
266 					num_op_descs * sizeof(*newop));
267 				FREE(vfs_op_descs, M_VNODE);
268 			}
269 			if (vfs_op_desc_refs) {
270 				bcopy(vfs_op_desc_refs, newref,
271 					num_op_descs * sizeof(*newref));
272 				FREE(vfs_op_desc_refs, M_VNODE);
273 			}
274 			newop[num_op_descs] = desc;
275 			newref[num_op_descs] = 1;
276 			vfs_op_descs = newop;
277 			vfs_op_desc_refs = newref;
278 			num_op_descs++;
279 		}
280 	}
281 	vfs_opv_recalc();
282 }
283 
284 /*
285  * Unlink previously added vnode operations vector.
286  */
287 void
288 vfs_rm_vnodeops(const void *data)
289 {
290 	const struct vnodeopv_desc *opv;
291 	const struct vnodeopv_desc **newopv;
292 	struct vnodeop_desc **newop;
293 	int *newref;
294 	vop_t **opv_desc_vector;
295 	struct vnodeop_desc *desc;
296 	int i, j, k;
297 
298 	opv = (const struct vnodeopv_desc *)data;
299 	/* Lower ref counts on descs in the table and release if zero */
300 	for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) {
301 		for (j = 0; j < num_op_descs; j++) {
302 			if (desc == vfs_op_descs[j]) {
303 				/* found it, decrease reference count */
304 				vfs_op_desc_refs[j]--;
305 				break;
306 			}
307 		}
308 		for (j = 0; j < num_op_descs; j++) {
309 			if (vfs_op_desc_refs[j] > 0)
310 				continue;
311 			if (vfs_op_desc_refs[j] < 0)
312 				panic("vfs_remove_vnodeops: negative refcnt");
313 			/* Entry is going away - replace it with defaultop */
314 			for (k = 0; k < vnodeopv_num; k++) {
315 				opv_desc_vector =
316 					*(vnodeopv_descs[k]->opv_desc_vector_p);
317 				if (opv_desc_vector != NULL)
318 					opv_desc_vector[desc->vdesc_offset] =
319 						opv_desc_vector[1];
320 			}
321 			MALLOC(newop, struct vnodeop_desc **,
322 			       (num_op_descs - 1) * sizeof(*newop),
323 			       M_VNODE, M_WAITOK);
324 			if (newop == NULL)
325 				panic("vfs_remove_vnodeops: no memory for desc");
326 			/* new reference count (for unload) */
327 			MALLOC(newref, int *,
328 				(num_op_descs - 1) * sizeof(*newref),
329 				M_VNODE, M_WAITOK);
330 			if (newref == NULL)
331 				panic("vfs_remove_vnodeops: no memory for refs");
332 			for (k = j; k < (num_op_descs - 1); k++) {
333 				vfs_op_descs[k] = vfs_op_descs[k + 1];
334 				vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1];
335 			}
336 			bcopy(vfs_op_descs, newop,
337 				(num_op_descs - 1) * sizeof(*newop));
338 			bcopy(vfs_op_desc_refs, newref,
339 				(num_op_descs - 1) * sizeof(*newref));
340 			FREE(vfs_op_descs, M_VNODE);
341 			FREE(vfs_op_desc_refs, M_VNODE);
342 			vfs_op_descs = newop;
343 			vfs_op_desc_refs = newref;
344 			num_op_descs--;
345 		}
346 	}
347 
348 	for (i = 0; i < vnodeopv_num; i++) {
349 		if (vnodeopv_descs[i] == opv) {
350 			for (j = i; j < (vnodeopv_num - 1); j++)
351 				vnodeopv_descs[j] = vnodeopv_descs[j + 1];
352 			break;
353 		}
354 	}
355 	if (i == vnodeopv_num)
356 		panic("vfs_remove_vnodeops: opv not found");
357 	opv_desc_vector = *(opv->opv_desc_vector_p);
358 	if (opv_desc_vector != NULL)
359 		FREE(opv_desc_vector, M_VNODE);
360 	MALLOC(newopv, const struct vnodeopv_desc **,
361 	       (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK);
362 	if (newopv == NULL)
363 		panic("vfs_remove_vnodeops: no memory");
364 	bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv));
365 	FREE(vnodeopv_descs, M_VNODE);
366 	vnodeopv_descs = newopv;
367 	vnodeopv_num--;
368 
369 	vfs_opv_recalc();
370 }
371 
372 /*
373  * Routines having to do with the management of the vnode table.
374  */
375 struct vattr va_null;
376 
377 /*
378  * Initialize the vnode structures and initialize each file system type.
379  */
380 /* ARGSUSED*/
381 static void
382 vfsinit(void *dummy)
383 {
384 
385 	namei_zone = zinit("NAMEI", MAXPATHLEN, 0, 0, 2);
386 
387 	/*
388 	 * Initialize the vnode table
389 	 */
390 	vntblinit();
391 	/*
392 	 * Initialize the vnode name cache
393 	 */
394 	nchinit();
395 	/*
396 	 * Initialize each file system type.
397 	 * Vfs type numbers must be distinct from VFS_GENERIC (and VFS_VFSCONF).
398 	 */
399 	vattr_null(&va_null);
400 	maxvfsconf = VFS_GENERIC + 1;
401 }
402 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL)
403 
404 /*
405  * Register a VFS.
406  *
407  * After doing general initialisation, this function will
408  * call the filesystem specific initialisation vector op,
409  * i.e. vfsops->vfs_init().
410  */
411 int
412 vfs_register(struct vfsconf *vfc)
413 {
414 	struct sysctl_oid *oidp;
415 	struct vfsconf *vfsp;
416 
417 	vfsp = NULL;
418 	if (vfsconf)
419 		for (vfsp = vfsconf; vfsp->vfc_next; vfsp = vfsp->vfc_next)
420 			if (strcmp(vfc->vfc_name, vfsp->vfc_name) == 0)
421 				return EEXIST;
422 
423 	vfc->vfc_typenum = maxvfsconf++;
424 	if (vfsp)
425 		vfsp->vfc_next = vfc;
426 	else
427 		vfsconf = vfc;
428 	vfc->vfc_next = NULL;
429 
430 	/*
431 	 * If this filesystem has a sysctl node under vfs
432 	 * (i.e. vfs.xxfs), then change the oid number of that node to
433 	 * match the filesystem's type number.  This allows user code
434 	 * which uses the type number to read sysctl variables defined
435 	 * by the filesystem to continue working. Since the oids are
436 	 * in a sorted list, we need to make sure the order is
437 	 * preserved by re-registering the oid after modifying its
438 	 * number.
439 	 */
440 	SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link)
441 		if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) {
442 			sysctl_unregister_oid(oidp);
443 			oidp->oid_number = vfc->vfc_typenum;
444 			sysctl_register_oid(oidp);
445 		}
446 
447 	/*
448 	 * Call init function for this VFS...
449 	 */
450 	(*(vfc->vfc_vfsops->vfs_init))(vfc);
451 
452 	return 0;
453 }
454 
455 
456 /*
457  * Remove previously registered VFS.
458  *
459  * After doing general de-registration like removing sysctl
460  * nodes etc, it will call the filesystem specific vector
461  * op, i.e. vfsops->vfs_uninit().
462  *
463  */
464 int
465 vfs_unregister(struct vfsconf *vfc)
466 {
467 	struct vfsconf *vfsp, *prev_vfsp;
468 	int error, i, maxtypenum;
469 
470 	i = vfc->vfc_typenum;
471 
472 	prev_vfsp = NULL;
473 	for (vfsp = vfsconf; vfsp;
474 			prev_vfsp = vfsp, vfsp = vfsp->vfc_next) {
475 		if (!strcmp(vfc->vfc_name, vfsp->vfc_name))
476 			break;
477 	}
478 	if (vfsp == NULL)
479 		return EINVAL;
480 	if (vfsp->vfc_refcount)
481 		return EBUSY;
482 	if (vfc->vfc_vfsops->vfs_uninit != NULL) {
483 		error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp);
484 		if (error)
485 			return (error);
486 	}
487 	if (prev_vfsp)
488 		prev_vfsp->vfc_next = vfsp->vfc_next;
489 	else
490 		vfsconf = vfsp->vfc_next;
491 	maxtypenum = VFS_GENERIC;
492 	for (vfsp = vfsconf; vfsp != NULL; vfsp = vfsp->vfc_next)
493 		if (maxtypenum < vfsp->vfc_typenum)
494 			maxtypenum = vfsp->vfc_typenum;
495 	maxvfsconf = maxtypenum + 1;
496 	return 0;
497 }
498 
499 int
500 vfs_modevent(module_t mod, int type, void *data)
501 {
502 	struct vfsconf *vfc;
503 	int error = 0;
504 
505 	vfc = (struct vfsconf *)data;
506 
507 	switch (type) {
508 	case MOD_LOAD:
509 		if (vfc)
510 			error = vfs_register(vfc);
511 		break;
512 
513 	case MOD_UNLOAD:
514 		if (vfc)
515 			error = vfs_unregister(vfc);
516 		break;
517 	default:	/* including MOD_SHUTDOWN */
518 		break;
519 	}
520 	return (error);
521 }
522