1 /* MIB service - main.c - request abstraction and first-level tree */ 2 /* 3 * This is the Management Information Base (MIB) service. Its one and only 4 * task is to implement the sysctl(2) system call, which plays a fairly 5 * important role in parts of *BSD userland. 6 * 7 * The sysctl(2) interface is used to access a variety of information. In 8 * order to obtain that information, and possibly modify it, the MIB service 9 * calls into many other services. The MIB service must therefore not be 10 * called directly from other services, with the exception of ProcFS. In fact, 11 * ProcFS is currently the only service that is modeled as logically higher in 12 * the MINIX3 service stack than MIB, something that itself is possible only 13 * due to the nonblocking nature of VFS. MIB may issue blocking calls to VFS. 14 * 15 * The MIB service is in the boot image because even init(8) makes use of 16 * sysctl(2) during its own startup, so launching the MIB service at any later 17 * time would make a proper implementation of sysctl(2) impossible. Also, the 18 * service needs superuser privileges because it may need to issue privileged 19 * calls and obtain privileged information from other services. 20 * 21 * While most of the sysctl tree is maintained locally, the MIB service also 22 * allows other services to register "remote" subtrees which are then handled 23 * entirely by those services. This feature, which works much like file system 24 * mounting, allows 1) sysctl handling code to stay local to its corresponding 25 * service, and 2) parts of the sysctl tree to adapt and expand dynamically as 26 * optional services are started and stopped. Compared to the MIB service's 27 * local handling, remotely handled subtrees are subject to several additional 28 * practical restrictions, hoever. In the current implementation, the MIB 29 * service makes blocking calls to remote services as needed; in the future, 30 * these interactions could be made (more) asynchronous. 31 * 32 * The MIB service was created by David van Moolenbroek <david@minix3.org>. 33 */ 34 35 #include "mib.h" 36 37 /* 38 * Most of these initially empty nodes are filled in by their corresponding 39 * modules' _init calls; see mib_init below. However, some subtrees are not 40 * populated by the MIB service itself. CTL_NET is expected to be populated 41 * through registration of remote subtrees. The libc sysctl(3) wrapper code 42 * takes care of the CTL_USER subtree. It must have an entry here though, or 43 * sysctl(8) will not list it. CTL_VENDOR is also empty, but writable, so that 44 * it may be used by third parties. 45 */ 46 static struct mib_node mib_table[] = { 47 /* 1*/ [CTL_KERN] = MIB_ENODE(_P | _RO, "kern", "High kernel"), 48 /* 2*/ [CTL_VM] = MIB_ENODE(_P | _RO, "vm", "Virtual memory"), 49 /* 4*/ [CTL_NET] = MIB_ENODE(_P | _RO, "net", "Networking"), 50 /* 6*/ [CTL_HW] = MIB_ENODE(_P | _RO, "hw", "Generic CPU, I/O"), 51 /* 8*/ [CTL_USER] = MIB_ENODE(_P | _RO, "user", "User-level"), 52 /*11*/ [CTL_VENDOR] = MIB_ENODE(_P | _RW, "vendor", "Vendor specific"), 53 /*32*/ [CTL_MINIX] = MIB_ENODE(_P | _RO, "minix", "MINIX3 specific"), 54 }; 55 56 /* 57 * The root node of the tree. The root node is used internally only--it is 58 * impossible to access the root node itself from userland in any way. The 59 * node is writable by default, so that programs such as init(8) may create 60 * their own top-level entries. 61 */ 62 struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", ""); 63 64 /* 65 * Structures describing old and new data as provided by userland. The primary 66 * advantage of these opaque structures is that we could in principle use them 67 * to implement storage of small data results in the sysctl reply message, so 68 * as to avoid the kernel copy, without changing any of the handler code. 69 */ 70 struct mib_oldp { 71 endpoint_t oldp_endpt; 72 vir_bytes oldp_addr; 73 size_t oldp_len; 74 }; 75 /* 76 * Same structure, different type: prevent accidental mixups, and avoid the 77 * need to use __restrict everywhere. 78 */ 79 struct mib_newp { 80 endpoint_t newp_endpt; 81 vir_bytes newp_addr; 82 size_t newp_len; 83 }; 84 85 /* 86 * Return TRUE or FALSE indicating whether the given offset is within the range 87 * of data that is to be copied out. This call can be used to test whether 88 * certain bits of data need to be prepared for copying at all. 89 */ 90 int 91 mib_inrange(struct mib_oldp * oldp, size_t off) 92 { 93 94 if (oldp == NULL) 95 return FALSE; 96 97 return (off < oldp->oldp_len); 98 } 99 100 /* 101 * Return the total length of the requested data. This should not be used 102 * directly except in highly unusual cases, such as particular node requests 103 * where the request semantics blatantly violate overall sysctl(2) semantics. 104 */ 105 size_t 106 mib_getoldlen(struct mib_oldp * oldp) 107 { 108 109 if (oldp == NULL) 110 return 0; 111 112 return oldp->oldp_len; 113 } 114 115 /* 116 * Copy out (partial) data to the user. The copy is automatically limited to 117 * the range of data requested by the user. Return the requested length on 118 * success (for the caller's convenience) or an error code on failure. 119 */ 120 ssize_t 121 mib_copyout(struct mib_oldp * __restrict oldp, size_t off, 122 const void * __restrict buf, size_t size) 123 { 124 size_t len; 125 int r; 126 127 len = size; 128 assert(len <= SSIZE_MAX); 129 130 if (oldp == NULL || off >= oldp->oldp_len) 131 return size; /* nothing to do */ 132 133 if (len > oldp->oldp_len - off) 134 len = oldp->oldp_len - off; 135 136 if ((r = sys_datacopy(SELF, (vir_bytes)buf, oldp->oldp_endpt, 137 oldp->oldp_addr + off, len)) != OK) 138 return r; 139 140 return size; 141 } 142 143 /* 144 * Override the oldlen value returned from the call, in situations where an 145 * error is thrown as well. 146 */ 147 void 148 mib_setoldlen(struct mib_call * call, size_t oldlen) 149 { 150 151 call->call_reslen = oldlen; 152 } 153 154 /* 155 * Return the new data length as provided by the user, or 0 if the user did not 156 * supply new data. 157 */ 158 size_t 159 mib_getnewlen(struct mib_newp * newp) 160 { 161 162 if (newp == NULL) 163 return 0; 164 165 return newp->newp_len; 166 } 167 168 /* 169 * Copy in data from the user. The given length must match exactly the length 170 * given by the user. Return OK or an error code. 171 */ 172 int 173 mib_copyin(struct mib_newp * __restrict newp, void * __restrict buf, 174 size_t len) 175 { 176 177 if (newp == NULL || len != newp->newp_len) 178 return EINVAL; 179 180 if (len == 0) 181 return OK; 182 183 return sys_datacopy(newp->newp_endpt, newp->newp_addr, SELF, 184 (vir_bytes)buf, len); 185 } 186 187 /* 188 * Copy in auxiliary data from the user, based on a user pointer obtained from 189 * data copied in earlier through mib_copyin(). 190 */ 191 int 192 mib_copyin_aux(struct mib_newp * __restrict newp, vir_bytes addr, 193 void * __restrict buf, size_t len) 194 { 195 196 assert(newp != NULL); 197 198 if (len == 0) 199 return OK; 200 201 return sys_datacopy(newp->newp_endpt, addr, SELF, (vir_bytes)buf, len); 202 } 203 204 /* 205 * Create a grant for a call's old data region, if not NULL, for the given 206 * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the 207 * length in lenp, and return OK. On error, return an error code that must not 208 * be ENOMEM. 209 */ 210 int 211 mib_relay_oldp(endpoint_t endpt, struct mib_oldp * __restrict oldp, 212 cp_grant_id_t * grantp, size_t * __restrict lenp) 213 { 214 215 if (oldp != NULL) { 216 *grantp = cpf_grant_magic(endpt, oldp->oldp_endpt, 217 oldp->oldp_addr, oldp->oldp_len, CPF_WRITE); 218 if (!GRANT_VALID(*grantp)) 219 return EINVAL; 220 *lenp = oldp->oldp_len; 221 } else { 222 *grantp = GRANT_INVALID; 223 *lenp = 0; 224 } 225 226 return OK; 227 } 228 229 /* 230 * Create a grant for a call's new data region, if not NULL, for the given 231 * endpoint. On success, store the grant (or GRANT_INVALID) in grantp and the 232 * length in lenp, and return OK. On error, return an error code that must not 233 * be ENOMEM. 234 */ 235 int 236 mib_relay_newp(endpoint_t endpt, struct mib_newp * __restrict newp, 237 cp_grant_id_t * grantp, size_t * __restrict lenp) 238 { 239 240 if (newp != NULL) { 241 *grantp = cpf_grant_magic(endpt, newp->newp_endpt, 242 newp->newp_addr, newp->newp_len, CPF_READ); 243 if (!GRANT_VALID(*grantp)) 244 return EINVAL; 245 *lenp = newp->newp_len; 246 } else { 247 *grantp = GRANT_INVALID; 248 *lenp = 0; 249 } 250 251 return OK; 252 } 253 254 /* 255 * Check whether the user is allowed to perform privileged operations. The 256 * function returns a nonzero value if this is the case, and zero otherwise. 257 * Authorization is performed only once per call. 258 */ 259 int 260 mib_authed(struct mib_call * call) 261 { 262 263 if ((call->call_flags & (MIB_FLAG_AUTH | MIB_FLAG_NOAUTH)) == 0) { 264 /* Ask PM if this endpoint has superuser privileges. */ 265 if (getnuid(call->call_endpt) == SUPER_USER) 266 call->call_flags |= MIB_FLAG_AUTH; 267 else 268 call->call_flags |= MIB_FLAG_NOAUTH; 269 } 270 271 return (call->call_flags & MIB_FLAG_AUTH); 272 } 273 274 /* 275 * Implement the sysctl(2) system call. 276 */ 277 static int 278 mib_sysctl(message * __restrict m_in, int ipc_status, 279 message * __restrict m_out) 280 { 281 vir_bytes oldaddr, newaddr; 282 size_t oldlen, newlen; 283 unsigned int namelen; 284 int s, name[CTL_MAXNAME]; 285 endpoint_t endpt; 286 struct mib_oldp oldp, *oldpp; 287 struct mib_newp newp, *newpp; 288 struct mib_call call; 289 ssize_t r; 290 291 /* Only handle blocking calls. Ignore everything else. */ 292 if (IPC_STATUS_CALL(ipc_status) != SENDREC) 293 return EDONTREPLY; 294 295 endpt = m_in->m_source; 296 oldaddr = m_in->m_lc_mib_sysctl.oldp; 297 oldlen = m_in->m_lc_mib_sysctl.oldlen; 298 newaddr = m_in->m_lc_mib_sysctl.newp; 299 newlen = m_in->m_lc_mib_sysctl.newlen; 300 namelen = m_in->m_lc_mib_sysctl.namelen; 301 302 if (namelen == 0 || namelen > CTL_MAXNAME) 303 return EINVAL; 304 305 /* 306 * In most cases, the entire name fits in the request message, so we 307 * can avoid a kernel copy. 308 */ 309 if (namelen > CTL_SHORTNAME) { 310 if ((s = sys_datacopy(endpt, m_in->m_lc_mib_sysctl.namep, SELF, 311 (vir_bytes)&name, sizeof(name[0]) * namelen)) != OK) 312 return s; 313 } else 314 memcpy(name, m_in->m_lc_mib_sysctl.name, 315 sizeof(name[0]) * namelen); 316 317 /* 318 * Set up a structure for the old data, if any. When no old address is 319 * given, be forgiving if oldlen is not zero, as the user may simply 320 * not have initialized the variable before passing a pointer to it. 321 */ 322 if (oldaddr != 0) { 323 oldp.oldp_endpt = endpt; 324 oldp.oldp_addr = oldaddr; 325 oldp.oldp_len = oldlen; 326 oldpp = &oldp; 327 } else 328 oldpp = NULL; 329 330 /* 331 * Set up a structure for the new data, if any. If one of newaddr and 332 * newlen is zero but not the other, we (like NetBSD) disregard both. 333 */ 334 if (newaddr != 0 && newlen != 0) { 335 newp.newp_endpt = endpt; 336 newp.newp_addr = newaddr; 337 newp.newp_len = newlen; 338 newpp = &newp; 339 } else 340 newpp = NULL; 341 342 /* 343 * Set up a structure for other call parameters. Most of these should 344 * be used rarely, and we may want to add more later, so do not pass 345 * all of them around as actual function parameters all the time. 346 */ 347 call.call_endpt = endpt; 348 call.call_name = name; 349 call.call_namelen = namelen; 350 call.call_flags = 0; 351 call.call_reslen = 0; 352 353 r = mib_dispatch(&call, oldpp, newpp); 354 355 /* 356 * From NetBSD: we copy out as much as we can from the old data, while 357 * at the same time computing the full data length. Then, here at the 358 * end, if the entire result did not fit in the destination buffer, we 359 * return ENOMEM instead of success, thus also returning a partial 360 * result and the full data length. 361 * 362 * It is also possible that data are copied out along with a "real" 363 * error. In that case, we must report a nonzero resulting length 364 * along with that error code. This is currently the case when node 365 * creation resulted in a collision, in which case the error code is 366 * EEXIST while the existing node is copied out as well. 367 */ 368 if (r >= 0) { 369 m_out->m_mib_lc_sysctl.oldlen = (size_t)r; 370 371 if (oldaddr != 0 && oldlen < (size_t)r) 372 r = ENOMEM; 373 else 374 r = OK; 375 } else 376 m_out->m_mib_lc_sysctl.oldlen = call.call_reslen; 377 378 return r; 379 } 380 381 /* 382 * Initialize the service. 383 */ 384 static int 385 mib_init(int type __unused, sef_init_info_t * info __unused) 386 { 387 388 /* 389 * Initialize pointers and sizes of subtrees in different modules. 390 * This is needed because we cannot use sizeof on external arrays. 391 * We do initialize the node entry (including any other fields) 392 * statically through MIB_ENODE because that forces the array to be 393 * large enough to store the entry. 394 */ 395 mib_kern_init(&mib_table[CTL_KERN]); 396 mib_vm_init(&mib_table[CTL_VM]); 397 mib_hw_init(&mib_table[CTL_HW]); 398 mib_minix_init(&mib_table[CTL_MINIX]); 399 400 /* 401 * Now that the static tree is complete, go through the entire tree, 402 * initializing miscellaneous fields. 403 */ 404 mib_tree_init(); 405 406 /* Prepare for requests to mount remote subtrees. */ 407 mib_remote_init(); 408 409 return OK; 410 } 411 412 /* 413 * Perform SEF startup. 414 */ 415 static void 416 mib_startup(void) 417 { 418 419 sef_setcb_init_fresh(mib_init); 420 /* 421 * If we restart we lose all dynamic state, which means we lose all 422 * nodes that have been created at run time. However, running with 423 * only the static node tree is still better than not running at all. 424 */ 425 sef_setcb_init_restart(mib_init); 426 427 sef_startup(); 428 } 429 430 /* 431 * The Management Information Base (MIB) service. 432 */ 433 int 434 main(void) 435 { 436 message m_in, m_out; 437 int r, ipc_status; 438 439 /* Perform initialization. */ 440 mib_startup(); 441 442 /* The main message loop. */ 443 for (;;) { 444 /* Receive a request. */ 445 if ((r = sef_receive_status(ANY, &m_in, &ipc_status)) != OK) 446 panic("sef_receive failed: %d", r); 447 448 /* Process the request. */ 449 if (is_ipc_notify(ipc_status)) { 450 /* We are not expecting any notifications. */ 451 printf("MIB: notification from %d\n", m_in.m_source); 452 453 continue; 454 } 455 456 memset(&m_out, 0, sizeof(m_out)); 457 458 switch (m_in.m_type) { 459 case MIB_SYSCTL: 460 r = mib_sysctl(&m_in, ipc_status, &m_out); 461 462 break; 463 464 case MIB_REGISTER: 465 r = mib_register(&m_in, ipc_status); 466 467 break; 468 469 case MIB_DEREGISTER: 470 r = mib_deregister(&m_in, ipc_status); 471 472 break; 473 474 default: 475 if (IPC_STATUS_CALL(ipc_status) == SENDREC) 476 r = ENOSYS; 477 else 478 r = EDONTREPLY; 479 } 480 481 /* Send a reply, if applicable. */ 482 if (r != EDONTREPLY) { 483 m_out.m_type = r; 484 485 if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK) 486 printf("MIB: ipc_sendnb failed (%d)\n", r); 487 } 488 } 489 490 /* NOTREACHED */ 491 return 0; 492 } 493