1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94 39 * $FreeBSD: src/sys/kern/vfs_init.c,v 1.59 2002/04/30 18:44:32 dillon Exp $ 40 * $DragonFly: src/sys/kern/vfs_init.c,v 1.4 2004/03/16 18:42:35 hmp Exp $ 41 */ 42 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/mount.h> 48 #include <sys/sysctl.h> 49 #include <sys/vnode.h> 50 #include <sys/malloc.h> 51 #include <vm/vm_zone.h> 52 53 54 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 55 56 /* 57 * Zone for namei 58 */ 59 struct vm_zone *namei_zone; 60 61 /* 62 * vfs_init() will set maxvfsconf 63 * to the highest defined type number. 64 */ 65 int maxvfsconf; 66 struct vfsconf *vfsconf; 67 68 /* 69 * vfs_init.c 70 * 71 * Allocate and fill in operations vectors. 72 * 73 * An undocumented feature of this approach to defining operations is that 74 * there can be multiple entries in vfs_opv_descs for the same operations 75 * vector. This allows third parties to extend the set of operations 76 * supported by another layer in a binary compatibile way. For example, 77 * assume that NFS needed to be modified to support Ficus. NFS has an entry 78 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 79 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 80 * listing those new operations Ficus adds to NFS, all without modifying the 81 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 82 * that is a(whole)nother story.) This is a feature. 83 */ 84 85 /* Table of known vnodeop vectors (list of VFS vnode vectors) */ 86 static const struct vnodeopv_desc **vnodeopv_descs; 87 static int vnodeopv_num; 88 89 /* Table of known descs (list of vnode op handlers "vop_access_desc") */ 90 static struct vnodeop_desc **vfs_op_descs; 91 static int *vfs_op_desc_refs; /* reference counts */ 92 static int num_op_descs; 93 94 /* 95 * Allocate at least vfs_opv_numops number of VFS operation 96 * vectors. The vector is never freed or deallocted once 97 * it is initalised, so the vnodes might safely reference 98 * it through their v_op pointer without the vector changing 99 * suddenly from under them. 100 * 101 * Allow this number to be tuned at boot. 102 */ 103 static int vfs_opv_numops = 96; 104 TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops); 105 SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RD, &vfs_opv_numops, 106 0, "Maximum number of operations in vop_t vector"); 107 108 static __inline int 109 int_cmp(const void *a, const void *b) 110 { 111 return(*(const int *)a - *(const int *)b); 112 } 113 114 /* 115 * Recalculate parts of the VFS operations vector (vfsops). 116 */ 117 static void 118 vfs_opv_recalc(void) 119 { 120 int i, j, k; 121 int *vfs_op_offsets; 122 vop_t ***opv_desc_vector_p; 123 vop_t **opv_desc_vector; 124 struct vnodeopv_entry_desc *opve_descp; 125 const struct vnodeopv_desc *opv; 126 127 if (vfs_op_descs == NULL) 128 panic("vfs_opv_recalc called with null vfs_op_descs"); 129 130 /* 131 * Allocate and initialize temporary array to store 132 * offsets. Sort it to put all uninitialized entries 133 * first and to make holes in existing offset sequence 134 * detectable. 135 */ 136 MALLOC(vfs_op_offsets, int *, 137 num_op_descs * sizeof(int), M_VNODE, M_WAITOK); 138 if (vfs_op_offsets == NULL) 139 panic("vfs_opv_recals: no memory"); 140 for (i = 0; i < num_op_descs; i++) 141 vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset; 142 qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp); 143 144 /* 145 * Run through and make sure all known descs have an offset. 146 * Use vfs_op_offsets to locate the holes in offset sequence 147 * and reuse them. 148 * 149 * vop_default_desc is at hardwired at offset 1, and offset 150 * 0 is a panic sanity check. 151 */ 152 j = k = 1; 153 for (i = 0; i < num_op_descs; i++) { 154 if (vfs_op_descs[i]->vdesc_offset != 0) 155 continue; 156 /* 157 * Look at two adjacent entries vfs_op_offsets[j - 1] and 158 * vfs_op_offsets[j] and see if we can fit a new offset 159 * number in between. If not, look at the next pair until 160 * hole is found or the end of the vfs_op_offsets vector is 161 * reached. j has been initialized to 1 above so that 162 * referencing (j-1)-th element is safe and the loop will 163 * never execute if num_op_descs is 1. For each new value s 164 * of i the j loop pick up from where previous iteration has 165 * left off. When the last hole has been consumed or if no 166 * hole has been found, we will start allocating new numbers 167 * starting from the biggest already available offset + 1. 168 */ 169 for (; j < num_op_descs; j++) { 170 if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k) 171 break; 172 k = vfs_op_offsets[j] + 1; 173 } 174 vfs_op_descs[i]->vdesc_offset = k++; 175 } 176 FREE(vfs_op_offsets, M_VNODE); 177 178 /* Panic if new VFSops will cause vector overflow */ 179 if (k > vfs_opv_numops) 180 panic("VFS: Ran out of vop_t vector entries. " 181 "%d entries required, only %d available.", 182 k, vfs_opv_numops); 183 184 /* 185 * Allocate and fill in the vectors 186 */ 187 for (i = 0; i < vnodeopv_num; i++) { 188 opv = vnodeopv_descs[i]; 189 opv_desc_vector_p = opv->opv_desc_vector_p; 190 if (*opv_desc_vector_p == NULL) 191 MALLOC(*opv_desc_vector_p, vop_t **, 192 vfs_opv_numops * sizeof(vop_t *), M_VNODE, 193 M_WAITOK | M_ZERO); 194 if (*opv_desc_vector_p == NULL) 195 panic("no memory for vop_t ** vector"); 196 197 /* Fill in, with slot 0 being to return EOPNOTSUPP */ 198 opv_desc_vector = *opv_desc_vector_p; 199 opv_desc_vector[0] = (vop_t *)vop_eopnotsupp; 200 for (j = 0; opv->opv_desc_ops[j].opve_op; j++) { 201 opve_descp = &(opv->opv_desc_ops[j]); 202 opv_desc_vector[opve_descp->opve_op->vdesc_offset] = 203 opve_descp->opve_impl; 204 } 205 206 /* Replace unfilled routines with their default (slot 1). */ 207 opv_desc_vector = *(opv->opv_desc_vector_p); 208 if (opv_desc_vector[1] == NULL) 209 panic("vfs_opv_recalc: vector without a default."); 210 for (j = 0; j < vfs_opv_numops; j++) 211 if (opv_desc_vector[j] == NULL) 212 opv_desc_vector[j] = opv_desc_vector[1]; 213 } 214 } 215 216 /* 217 * Add a vnode operations (vnops) vector to the global list. 218 */ 219 void 220 vfs_add_vnodeops(const void *data) 221 { 222 const struct vnodeopv_desc *opv; 223 const struct vnodeopv_desc **newopv; 224 struct vnodeop_desc **newop; 225 int *newref; 226 struct vnodeop_desc *desc; 227 int i, j; 228 229 opv = (const struct vnodeopv_desc *)data; 230 MALLOC(newopv, const struct vnodeopv_desc **, 231 (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 232 if (newopv == NULL) 233 panic("vfs_add_vnodeops: no memory"); 234 if (vnodeopv_descs) { 235 bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv)); 236 FREE(vnodeopv_descs, M_VNODE); 237 } 238 newopv[vnodeopv_num] = opv; 239 vnodeopv_descs = newopv; 240 vnodeopv_num++; 241 242 /* See if we have turned up a new vnode op desc */ 243 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 244 for (j = 0; j < num_op_descs; j++) { 245 if (desc == vfs_op_descs[j]) { 246 /* found it, increase reference count */ 247 vfs_op_desc_refs[j]++; 248 break; 249 } 250 } 251 if (j == num_op_descs) { 252 /* not found, new entry */ 253 MALLOC(newop, struct vnodeop_desc **, 254 (num_op_descs + 1) * sizeof(*newop), 255 M_VNODE, M_WAITOK); 256 if (newop == NULL) 257 panic("vfs_add_vnodeops: no memory for desc"); 258 /* new reference count (for unload) */ 259 MALLOC(newref, int *, 260 (num_op_descs + 1) * sizeof(*newref), 261 M_VNODE, M_WAITOK); 262 if (newref == NULL) 263 panic("vfs_add_vnodeops: no memory for refs"); 264 if (vfs_op_descs) { 265 bcopy(vfs_op_descs, newop, 266 num_op_descs * sizeof(*newop)); 267 FREE(vfs_op_descs, M_VNODE); 268 } 269 if (vfs_op_desc_refs) { 270 bcopy(vfs_op_desc_refs, newref, 271 num_op_descs * sizeof(*newref)); 272 FREE(vfs_op_desc_refs, M_VNODE); 273 } 274 newop[num_op_descs] = desc; 275 newref[num_op_descs] = 1; 276 vfs_op_descs = newop; 277 vfs_op_desc_refs = newref; 278 num_op_descs++; 279 } 280 } 281 vfs_opv_recalc(); 282 } 283 284 /* 285 * Unlink previously added vnode operations vector. 286 */ 287 void 288 vfs_rm_vnodeops(const void *data) 289 { 290 const struct vnodeopv_desc *opv; 291 const struct vnodeopv_desc **newopv; 292 struct vnodeop_desc **newop; 293 int *newref; 294 vop_t **opv_desc_vector; 295 struct vnodeop_desc *desc; 296 int i, j, k; 297 298 opv = (const struct vnodeopv_desc *)data; 299 /* Lower ref counts on descs in the table and release if zero */ 300 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 301 for (j = 0; j < num_op_descs; j++) { 302 if (desc == vfs_op_descs[j]) { 303 /* found it, decrease reference count */ 304 vfs_op_desc_refs[j]--; 305 break; 306 } 307 } 308 for (j = 0; j < num_op_descs; j++) { 309 if (vfs_op_desc_refs[j] > 0) 310 continue; 311 if (vfs_op_desc_refs[j] < 0) 312 panic("vfs_remove_vnodeops: negative refcnt"); 313 /* Entry is going away - replace it with defaultop */ 314 for (k = 0; k < vnodeopv_num; k++) { 315 opv_desc_vector = 316 *(vnodeopv_descs[k]->opv_desc_vector_p); 317 if (opv_desc_vector != NULL) 318 opv_desc_vector[desc->vdesc_offset] = 319 opv_desc_vector[1]; 320 } 321 MALLOC(newop, struct vnodeop_desc **, 322 (num_op_descs - 1) * sizeof(*newop), 323 M_VNODE, M_WAITOK); 324 if (newop == NULL) 325 panic("vfs_remove_vnodeops: no memory for desc"); 326 /* new reference count (for unload) */ 327 MALLOC(newref, int *, 328 (num_op_descs - 1) * sizeof(*newref), 329 M_VNODE, M_WAITOK); 330 if (newref == NULL) 331 panic("vfs_remove_vnodeops: no memory for refs"); 332 for (k = j; k < (num_op_descs - 1); k++) { 333 vfs_op_descs[k] = vfs_op_descs[k + 1]; 334 vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1]; 335 } 336 bcopy(vfs_op_descs, newop, 337 (num_op_descs - 1) * sizeof(*newop)); 338 bcopy(vfs_op_desc_refs, newref, 339 (num_op_descs - 1) * sizeof(*newref)); 340 FREE(vfs_op_descs, M_VNODE); 341 FREE(vfs_op_desc_refs, M_VNODE); 342 vfs_op_descs = newop; 343 vfs_op_desc_refs = newref; 344 num_op_descs--; 345 } 346 } 347 348 for (i = 0; i < vnodeopv_num; i++) { 349 if (vnodeopv_descs[i] == opv) { 350 for (j = i; j < (vnodeopv_num - 1); j++) 351 vnodeopv_descs[j] = vnodeopv_descs[j + 1]; 352 break; 353 } 354 } 355 if (i == vnodeopv_num) 356 panic("vfs_remove_vnodeops: opv not found"); 357 opv_desc_vector = *(opv->opv_desc_vector_p); 358 if (opv_desc_vector != NULL) 359 FREE(opv_desc_vector, M_VNODE); 360 MALLOC(newopv, const struct vnodeopv_desc **, 361 (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 362 if (newopv == NULL) 363 panic("vfs_remove_vnodeops: no memory"); 364 bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv)); 365 FREE(vnodeopv_descs, M_VNODE); 366 vnodeopv_descs = newopv; 367 vnodeopv_num--; 368 369 vfs_opv_recalc(); 370 } 371 372 /* 373 * Routines having to do with the management of the vnode table. 374 */ 375 struct vattr va_null; 376 377 /* 378 * Initialize the vnode structures and initialize each file system type. 379 */ 380 /* ARGSUSED*/ 381 static void 382 vfsinit(void *dummy) 383 { 384 385 namei_zone = zinit("NAMEI", MAXPATHLEN, 0, 0, 2); 386 387 /* 388 * Initialize the vnode table 389 */ 390 vntblinit(); 391 /* 392 * Initialize the vnode name cache 393 */ 394 nchinit(); 395 /* 396 * Initialize each file system type. 397 * Vfs type numbers must be distinct from VFS_GENERIC (and VFS_VFSCONF). 398 */ 399 vattr_null(&va_null); 400 maxvfsconf = VFS_GENERIC + 1; 401 } 402 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL) 403 404 /* 405 * Register a VFS. 406 * 407 * After doing general initialisation, this function will 408 * call the filesystem specific initialisation vector op, 409 * i.e. vfsops->vfs_init(). 410 */ 411 int 412 vfs_register(struct vfsconf *vfc) 413 { 414 struct sysctl_oid *oidp; 415 struct vfsconf *vfsp; 416 417 vfsp = NULL; 418 if (vfsconf) 419 for (vfsp = vfsconf; vfsp->vfc_next; vfsp = vfsp->vfc_next) 420 if (strcmp(vfc->vfc_name, vfsp->vfc_name) == 0) 421 return EEXIST; 422 423 vfc->vfc_typenum = maxvfsconf++; 424 if (vfsp) 425 vfsp->vfc_next = vfc; 426 else 427 vfsconf = vfc; 428 vfc->vfc_next = NULL; 429 430 /* 431 * If this filesystem has a sysctl node under vfs 432 * (i.e. vfs.xxfs), then change the oid number of that node to 433 * match the filesystem's type number. This allows user code 434 * which uses the type number to read sysctl variables defined 435 * by the filesystem to continue working. Since the oids are 436 * in a sorted list, we need to make sure the order is 437 * preserved by re-registering the oid after modifying its 438 * number. 439 */ 440 SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link) 441 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 442 sysctl_unregister_oid(oidp); 443 oidp->oid_number = vfc->vfc_typenum; 444 sysctl_register_oid(oidp); 445 } 446 447 /* 448 * Call init function for this VFS... 449 */ 450 (*(vfc->vfc_vfsops->vfs_init))(vfc); 451 452 return 0; 453 } 454 455 456 /* 457 * Remove previously registered VFS. 458 * 459 * After doing general de-registration like removing sysctl 460 * nodes etc, it will call the filesystem specific vector 461 * op, i.e. vfsops->vfs_uninit(). 462 * 463 */ 464 int 465 vfs_unregister(struct vfsconf *vfc) 466 { 467 struct vfsconf *vfsp, *prev_vfsp; 468 int error, i, maxtypenum; 469 470 i = vfc->vfc_typenum; 471 472 prev_vfsp = NULL; 473 for (vfsp = vfsconf; vfsp; 474 prev_vfsp = vfsp, vfsp = vfsp->vfc_next) { 475 if (!strcmp(vfc->vfc_name, vfsp->vfc_name)) 476 break; 477 } 478 if (vfsp == NULL) 479 return EINVAL; 480 if (vfsp->vfc_refcount) 481 return EBUSY; 482 if (vfc->vfc_vfsops->vfs_uninit != NULL) { 483 error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp); 484 if (error) 485 return (error); 486 } 487 if (prev_vfsp) 488 prev_vfsp->vfc_next = vfsp->vfc_next; 489 else 490 vfsconf = vfsp->vfc_next; 491 maxtypenum = VFS_GENERIC; 492 for (vfsp = vfsconf; vfsp != NULL; vfsp = vfsp->vfc_next) 493 if (maxtypenum < vfsp->vfc_typenum) 494 maxtypenum = vfsp->vfc_typenum; 495 maxvfsconf = maxtypenum + 1; 496 return 0; 497 } 498 499 int 500 vfs_modevent(module_t mod, int type, void *data) 501 { 502 struct vfsconf *vfc; 503 int error = 0; 504 505 vfc = (struct vfsconf *)data; 506 507 switch (type) { 508 case MOD_LOAD: 509 if (vfc) 510 error = vfs_register(vfc); 511 break; 512 513 case MOD_UNLOAD: 514 if (vfc) 515 error = vfs_unregister(vfc); 516 break; 517 default: /* including MOD_SHUTDOWN */ 518 break; 519 } 520 return (error); 521 } 522