xref: /minix/minix/lib/libsys/rmib.c (revision e4dbab1e)
1 /* Service support for remote MIB subtrees - by D.C. van Moolenbroek */
2 /*
3  * In effect, this is a lightweight version of the MIB service's main and tree
4  * code.  Some parts of the code have even been copied almost as is, even
5  * though the copy here operates on slightly different data structures in order
6  * to keep the implementation more lightweight.  For clarification on many
7  * aspects of the source code here, see the source code of the MIB service.
8  * One unique feature here is support for sparse nodes, which is needed for
9  * net.inet/inet6 as those are using subtrees with protocol-based identifiers.
10  *
11  * There is no way for this module to get to know about MIB service deaths
12  * without possibly interfering with the main code of the service this module
13  * is a part of.  As a result, re-registration of mount points after a MIB
14  * service restart is not automatic.  Instead, the main service code should
15  * provide detection of MIB service restarts, and call rmib_reregister() after
16  * such a restart in order to remount any previously mounted subtrees.
17  */
18 
19 #include <minix/drivers.h>
20 #include <minix/sysctl.h>
21 #include <minix/rmib.h>
22 
23 /* Structures for outgoing and incoming data, deliberately distinctly named. */
24 struct rmib_oldp {
25 	cp_grant_id_t oldp_grant;
26 	size_t oldp_len;
27 };
28 
29 struct rmib_newp {
30 	cp_grant_id_t newp_grant;
31 	size_t newp_len;
32 };
33 
34 /*
35  * The maximum field size, in bytes, for which updates (i.e., writes) to the
36  * field do not require dynamic memory allocation.  By policy, non-root users
37  * may not update fields exceeding this size at all.  For strings, this size
38  * includes an extra byte for adding a null terminator if missing.  As the name
39  * indicates, a buffer of this size is placed on the stack.
40  */
41 #define RMIB_STACKBUF		257
42 
43 /*
44  * The maximum number of subtrees that this service can mount.  This value can
45  * be increased without any problems, but it is already quite high in practice.
46  */
47 #define RMIB_MAX_SUBTREES	16
48 
49 /*
50  * The array of subtree root nodes.  Each root node's array index is the root
51  * identifier used in communication with the MIB service.
52  */
53 static struct {
54 	struct rmib_node *rno_node;
55 	unsigned int rno_namelen;
56 	int rno_name[CTL_SHORTNAME];
57 } rnodes[RMIB_MAX_SUBTREES] = { { NULL, 0, { 0 } } };
58 
59 /*
60  * Return TRUE or FALSE indicating whether the given offset is within the range
61  * of data that is to be copied out.  This call can be used to test whether
62  * certain bits of data need to be prepared for copying at all.
63  */
64 int
65 rmib_inrange(struct rmib_oldp * oldp, size_t off)
66 {
67 
68 	if (oldp == NULL)
69 		return FALSE;
70 
71 	return (off < oldp->oldp_len);
72 }
73 
74 /*
75  * Return the total length of the requested data.  This should not be used
76  * directly except in highly unusual cases, such as particular node requests
77  * where the request semantics blatantly violate overall sysctl(2) semantics.
78  */
79 size_t
80 rmib_getoldlen(struct rmib_oldp * oldp)
81 {
82 
83 	if (oldp == NULL)
84 		return 0;
85 
86 	return oldp->oldp_len;
87 }
88 
89 /*
90  * Copy out (partial) data to the user.  The copy is automatically limited to
91  * the range of data requested by the user.  Return the requested length on
92  * success (for the caller's convenience) or an error code on failure.
93  */
94 ssize_t
95 rmib_copyout(struct rmib_oldp * __restrict oldp, size_t off,
96 	const void * __restrict buf, size_t size)
97 {
98 	size_t len;
99 	int r;
100 
101 	len = size;
102 	assert(len <= SSIZE_MAX);
103 
104 	if (oldp == NULL || off >= oldp->oldp_len)
105 		return size; /* nothing to do */
106 
107 	if (len > oldp->oldp_len - off)
108 		len = oldp->oldp_len - off;
109 
110 	if ((r = sys_safecopyto(MIB_PROC_NR, oldp->oldp_grant, off,
111 	    (vir_bytes)buf, len)) != OK)
112 		return r;
113 
114 	return size;
115 }
116 
117 /*
118  * Copy out (partial) data to the user, from a vector of up to RMIB_IOV_MAX
119  * local buffers.  The copy is automatically limited to the range of data
120  * requested by the user.  Return the total requested length on success or an
121  * error code on failure.
122  */
123 ssize_t
124 rmib_vcopyout(struct rmib_oldp * oldp, size_t off, const iovec_t * iov,
125 	unsigned int iovcnt)
126 {
127 	static struct vscp_vec vec[RMIB_IOV_MAX];
128 	size_t size, chunk;
129 	unsigned int i;
130 	ssize_t r;
131 
132 	assert(iov != NULL);
133 	assert(iovcnt <= __arraycount(vec));
134 
135 	/* Take a shortcut for single-vector elements, saving a kernel copy. */
136 	if (iovcnt == 1)
137 		return rmib_copyout(oldp, off, (const void *)iov->iov_addr,
138 		    iov->iov_size);
139 
140 	/*
141 	 * Iterate through the full vector even if we cannot copy out all of
142 	 * it, because we need to compute the total length.
143 	 */
144 	for (size = i = 0; iovcnt > 0; iov++, iovcnt--) {
145 		if (oldp != NULL && off < oldp->oldp_len) {
146 			chunk = oldp->oldp_len - off;
147 			if (chunk > iov->iov_size)
148 				chunk = iov->iov_size;
149 
150 			vec[i].v_from = SELF;
151 			vec[i].v_to = MIB_PROC_NR;
152 			vec[i].v_gid = oldp->oldp_grant;
153 			vec[i].v_offset = off;
154 			vec[i].v_addr = iov->iov_addr;
155 			vec[i].v_bytes = chunk;
156 
157 			off += chunk;
158 			i++;
159 		}
160 
161 		size += iov->iov_size;
162 	}
163 
164 	/* Perform the copy, if there is anything to copy, that is. */
165 	if (i > 0 && (r = sys_vsafecopy(vec, i)) != OK)
166 		return r;
167 
168 	return size;
169 }
170 
171 /*
172  * Copy in data from the user.  The given length must match exactly the length
173  * given by the user.  Return OK or an error code.
174  */
175 int
176 rmib_copyin(struct rmib_newp * __restrict newp, void * __restrict buf,
177 	size_t len)
178 {
179 
180 	if (newp == NULL || len != newp->newp_len)
181 		return EINVAL;
182 
183 	if (len == 0)
184 		return OK;
185 
186 	return sys_safecopyfrom(MIB_PROC_NR, newp->newp_grant, 0,
187 	    (vir_bytes)buf, len);
188 }
189 
190 /*
191  * Copy out a node to userland, using the exchange format for nodes (namely,
192  * a sysctlnode structure).  Return the size of the object that is (or, if the
193  * node falls outside the requested data range, would be) copied out on
194  * success, or a negative error code on failure.
195  */
196 static ssize_t
197 rmib_copyout_node(struct rmib_call * call, struct rmib_oldp * oldp,
198 	ssize_t off, unsigned int id, const struct rmib_node * rnode)
199 {
200 	struct sysctlnode scn;
201 	int visible;
202 
203 	if (!rmib_inrange(oldp, off))
204 		return sizeof(scn); /* nothing to do */
205 
206 	memset(&scn, 0, sizeof(scn));
207 
208 	/*
209 	 * We use CTLFLAG_SPARSE internally only.  NetBSD uses these flags for
210 	 * different purposes.  Either way, do not expose it to userland.
211 	 * hide any of them from the user.
212 	 */
213 	scn.sysctl_flags = SYSCTL_VERSION |
214 	    (rnode->rnode_flags & ~CTLFLAG_SPARSE);
215 	scn.sysctl_num = id;
216 	strlcpy(scn.sysctl_name, rnode->rnode_name, sizeof(scn.sysctl_name));
217 	scn.sysctl_ver = call->call_rootver;
218 	scn.sysctl_size = rnode->rnode_size;
219 
220 	/* Some information is only visible if the user can access the node. */
221 	visible = (!(rnode->rnode_flags & CTLFLAG_PRIVATE) ||
222 	    (call->call_flags & RMIB_FLAG_AUTH));
223 
224 	/*
225 	 * For immediate types, store the immediate value in the resulting
226 	 * structure, unless the caller is not authorized to obtain the value.
227 	 */
228 	if ((rnode->rnode_flags & CTLFLAG_IMMEDIATE) && visible) {
229 		switch (SYSCTL_TYPE(rnode->rnode_flags)) {
230 		case CTLTYPE_BOOL:
231 			scn.sysctl_bdata = rnode->rnode_bool;
232 			break;
233 		case CTLTYPE_INT:
234 			scn.sysctl_idata = rnode->rnode_int;
235 			break;
236 		case CTLTYPE_QUAD:
237 			scn.sysctl_qdata = rnode->rnode_quad;
238 			break;
239 		}
240 	}
241 
242 	/* Special rules apply to parent nodes. */
243 	if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_NODE) {
244 		/* Report the node size the way NetBSD does, just in case. */
245 		scn.sysctl_size = sizeof(scn);
246 
247 		/*
248 		 * For real parent nodes, report child information, but only if
249 		 * the node itself is accessible by the caller.  For function-
250 		 * driven nodes, set a nonzero function address, for trace(1).
251 		 */
252 		if (rnode->rnode_func == NULL && visible) {
253 			scn.sysctl_csize = rnode->rnode_size;
254 			scn.sysctl_clen = rnode->rnode_clen;
255 		} else if (rnode->rnode_func != NULL)
256 			scn.sysctl_func = SYSCTL_NODE_FN;
257 	}
258 
259 	/* Copy out the resulting node. */
260 	return rmib_copyout(oldp, off, &scn, sizeof(scn));
261 }
262 
263 /*
264  * Given a query on a non-leaf (parent) node, provide the user with an array of
265  * this node's children.
266  */
267 static ssize_t
268 rmib_query(struct rmib_call * call, struct rmib_node * rparent,
269 	struct rmib_oldp * oldp, struct rmib_newp * newp)
270 {
271 	struct sysctlnode scn;
272 	struct rmib_node *rnode;
273 	unsigned int i, id;
274 	ssize_t r, off;
275 
276 	/* If the user passed in version numbers, check them. */
277 	if (newp != NULL) {
278 		if ((r = rmib_copyin(newp, &scn, sizeof(scn))) != OK)
279 			return r;
280 
281 		if (SYSCTL_VERS(scn.sysctl_flags) != SYSCTL_VERSION)
282 			return EINVAL;
283 
284 		/*
285 		 * If a node version number is given, it must match the version
286 		 * of the subtree or the root of the entire MIB version.
287 		 */
288 		if (scn.sysctl_ver != 0 &&
289 		    scn.sysctl_ver != call->call_rootver &&
290 		    scn.sysctl_ver != call->call_treever)
291 			return EINVAL;
292 	}
293 
294 	/* Enumerate the child nodes of the given parent node. */
295 	off = 0;
296 
297 	for (i = 0; i < rparent->rnode_size; i++) {
298 		if (rparent->rnode_flags & CTLFLAG_SPARSE) {
299 			id = rparent->rnode_icptr[i].rindir_id;
300 			rnode = rparent->rnode_icptr[i].rindir_node;
301 		} else {
302 			id = i;
303 			rnode = &rparent->rnode_cptr[i];
304 
305 			if (rnode->rnode_flags == 0)
306 				continue;
307 		}
308 
309 		if ((r = rmib_copyout_node(call, oldp, off, id, rnode)) < 0)
310 			return r;
311 		off += r;
312 	}
313 
314 	return off;
315 }
316 
317 /*
318  * Copy out a node description to userland, using the exchange format for node
319  * descriptions (namely, a sysctldesc structure).  Return the size of the
320  * object that is (or, if the description falls outside the requested data
321  * range, would be) copied out on success, or a negative error code on failure.
322  * The function may return 0 to indicate that nothing was copied out after all.
323  */
324 static ssize_t
325 rmib_copyout_desc(struct rmib_call * call, struct rmib_oldp * oldp,
326 	ssize_t off, unsigned int id, const struct rmib_node * rnode)
327 {
328 	struct sysctldesc scd;
329 	size_t len, size;
330 	ssize_t r;
331 
332 	/* Descriptions of private nodes are considered private too. */
333 	if ((rnode->rnode_flags & CTLFLAG_PRIVATE) &&
334 	    !(call->call_flags & RMIB_FLAG_AUTH))
335 		return 0;
336 
337 	/*
338 	 * Unfortunately, we do not have a scratch buffer here.  Instead, copy
339 	 * out the description structure and the actual description string
340 	 * separately.  This is more costly, but remote subtrees are already
341 	 * not going to give the best performance ever.  We do optimize for the
342 	 * case that there is no description, because that is relatively easy.
343 	 */
344 	/* The description length includes the null terminator. */
345 	if (rnode->rnode_desc != NULL)
346 		len = strlen(rnode->rnode_desc) + 1;
347 	else
348 		len = 1;
349 
350 	memset(&scd, 0, sizeof(scd));
351 	scd.descr_num = id;
352 	scd.descr_ver = call->call_rootver;
353 	scd.descr_len = len;
354 
355 	size = offsetof(struct sysctldesc, descr_str);
356 
357 	if (len == 1) {
358 		scd.descr_str[0] = '\0'; /* superfluous */
359 		size++;
360 	}
361 
362 	/* Copy out the structure, possibly including a null terminator. */
363 	if ((r = rmib_copyout(oldp, off, &scd, size)) < 0)
364 		return r;
365 
366 	if (len > 1) {
367 		/* Copy out the description itself. */
368 		if ((r = rmib_copyout(oldp, off + size, rnode->rnode_desc,
369 		    len)) < 0)
370 			return r;
371 
372 		size += len;
373 	}
374 
375 	/*
376 	 * By aligning just the size, we may leave garbage between the entries
377 	 * copied out, which is fine because it is userland's own data.
378 	 */
379 	return roundup2(size, sizeof(int32_t));
380 }
381 
382 /*
383  * Look up a child node given a parent node and a child node identifier.
384  * Return a pointer to the child node if found, or NULL otherwise.  The lookup
385  * procedure differs based on whether the parent node is sparse or not.
386  */
387 static struct rmib_node *
388 rmib_lookup(struct rmib_node * rparent, unsigned int id)
389 {
390 	struct rmib_node *rnode;
391 	struct rmib_indir *rindir;
392 	unsigned int i;
393 
394 	if (rparent->rnode_flags & CTLFLAG_SPARSE) {
395 		rindir = rparent->rnode_icptr;
396 		for (i = 0; i < rparent->rnode_size; i++, rindir++)
397 			if (rindir->rindir_id == id)
398 				return rindir->rindir_node;
399 	} else {
400 		if (id >= rparent->rnode_size)
401 			return NULL;
402 		rnode = &rparent->rnode_cptr[id];
403 		if (rnode->rnode_flags != 0)
404 			return rnode;
405 	}
406 
407 	return NULL;
408 }
409 
410 /*
411  * Retrieve node descriptions in bulk, or retrieve a particular node's
412  * description.
413  */
414 static ssize_t
415 rmib_describe(struct rmib_call * call, struct rmib_node * rparent,
416 	struct rmib_oldp * oldp, struct rmib_newp * newp)
417 {
418 	struct sysctlnode scn;
419 	struct rmib_node *rnode;
420 	unsigned int i, id;
421 	ssize_t r, off;
422 
423 	if (newp != NULL) {
424 		if ((r = rmib_copyin(newp, &scn, sizeof(scn))) != OK)
425 			return r;
426 
427 		if (SYSCTL_VERS(scn.sysctl_flags) != SYSCTL_VERSION)
428 			return EINVAL;
429 
430 		/* Locate the child node. */
431 		if ((rnode = rmib_lookup(rparent, scn.sysctl_num)) == NULL)
432 			return ENOENT;
433 
434 		/* Descriptions of private nodes are considered private too. */
435 		if ((rnode->rnode_flags & CTLFLAG_PRIVATE) &&
436 		    !(call->call_flags & RMIB_FLAG_AUTH))
437 			return EPERM;
438 
439 		/*
440 		 * If a description pointer was given, this is a request to
441 		 * set the node's description.  We do not allow this, nor would
442 		 * we be able to support it, since we cannot access the data.
443 		 */
444 		if (scn.sysctl_desc != NULL)
445 			return EPERM;
446 
447 		/*
448 		 * Copy out the requested node's description.  At this point we
449 		 * should be sure that this call does not return zero.
450 		 */
451 		return rmib_copyout_desc(call, oldp, 0, scn.sysctl_num, rnode);
452 	}
453 
454 	/* Describe the child nodes of the given parent node. */
455 	off = 0;
456 
457 	for (i = 0; i < rparent->rnode_size; i++) {
458 		if (rparent->rnode_flags & CTLFLAG_SPARSE) {
459 			id = rparent->rnode_icptr[i].rindir_id;
460 			rnode = rparent->rnode_icptr[i].rindir_node;
461 		} else {
462 			id = i;
463 			rnode = &rparent->rnode_cptr[i];
464 
465 			if (rnode->rnode_flags == 0)
466 				continue;
467 		}
468 
469 		if ((r = rmib_copyout_desc(call, oldp, off, id, rnode)) < 0)
470 			return r;
471 		off += r;
472 	}
473 
474 	return off;
475 }
476 
477 /*
478  * Return a pointer to the data associated with the given node, or NULL if the
479  * node has no associated data.  Actual calls to this function should never
480  * result in NULL - as long as the proper rules are followed elsewhere.
481  */
482 static void *
483 rmib_getptr(struct rmib_node * rnode)
484 {
485 
486 	switch (SYSCTL_TYPE(rnode->rnode_flags)) {
487 	case CTLTYPE_BOOL:
488 		if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
489 			return &rnode->rnode_bool;
490 		break;
491 	case CTLTYPE_INT:
492 		if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
493 			return &rnode->rnode_int;
494 		break;
495 	case CTLTYPE_QUAD:
496 		if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
497 			return &rnode->rnode_quad;
498 		break;
499 	case CTLTYPE_STRING:
500 	case CTLTYPE_STRUCT:
501 		if (rnode->rnode_flags & CTLFLAG_IMMEDIATE)
502 			return NULL;
503 		break;
504 	default:
505 		return NULL;
506 	}
507 
508 	return rnode->rnode_data;
509 }
510 
511 /*
512  * Read current (old) data from a regular data node, if requested.  Return the
513  * old data length.
514  */
515 static ssize_t
516 rmib_read(struct rmib_node * rnode, struct rmib_oldp * oldp)
517 {
518 	void *ptr;
519 	size_t oldlen;
520 	int r;
521 
522 	if ((ptr = rmib_getptr(rnode)) == NULL)
523 		return EINVAL;
524 
525 	if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_STRING)
526 		oldlen = strlen(rnode->rnode_data) + 1;
527 	else
528 		oldlen = rnode->rnode_size;
529 
530 	if (oldlen > SSIZE_MAX)
531 		return EINVAL;
532 
533 	/* Copy out the current data, if requested at all. */
534 	if (oldp != NULL && (r = rmib_copyout(oldp, 0, ptr, oldlen)) < 0)
535 		return r;
536 
537 	/* Return the current length in any case. */
538 	return (ssize_t)oldlen;
539 }
540 
541 /*
542  * Write new data into a regular data node, if requested.
543  */
544 static int
545 rmib_write(struct rmib_call * call, struct rmib_node * rnode,
546 	struct rmib_newp * newp)
547 {
548 	bool b[(sizeof(bool) == sizeof(char)) ? 1 : -1]; /* for sanitizing */
549 	char *src, *dst, buf[RMIB_STACKBUF];
550 	size_t newlen;
551 	int r;
552 
553 	if (newp == NULL)
554 		return OK; /* nothing to do */
555 
556 	/*
557 	 * When setting a new value, we cannot risk doing an in-place update:
558 	 * the copy from userland may fail halfway through, in which case an
559 	 * in-place update could leave the node value in a corrupted state.
560 	 * Thus, we must first fetch any new data into a temporary buffer.
561 	 */
562 	newlen = newp->newp_len;
563 
564 	if ((dst = rmib_getptr(rnode)) == NULL)
565 		return EINVAL;
566 
567 	switch (SYSCTL_TYPE(rnode->rnode_flags)) {
568 	case CTLTYPE_BOOL:
569 	case CTLTYPE_INT:
570 	case CTLTYPE_QUAD:
571 	case CTLTYPE_STRUCT:
572 		/* Non-string types must have an exact size match. */
573 		if (newlen != rnode->rnode_size)
574 			return EINVAL;
575 		break;
576 	case CTLTYPE_STRING:
577 		/*
578 		 * Strings must not exceed their buffer size.  There is a
579 		 * second check further below, because we allow userland to
580 		 * give us an unterminated string.  In that case we terminate
581 		 * it ourselves, but then the null terminator must fit as well.
582 		 */
583 		if (newlen > rnode->rnode_size)
584 			return EINVAL;
585 		break;
586 	default:
587 		return EINVAL;
588 	}
589 
590 	/*
591 	 * If we cannot fit the data in the small stack buffer, then allocate a
592 	 * temporary buffer.  We add one extra byte so that we can add a null
593 	 * terminator at the end of strings in case userland did not supply
594 	 * one.  Either way, we must free the temporary buffer later!
595 	 */
596 	if (newlen + 1 > sizeof(buf)) {
597 		/*
598 		 * For regular users, we do not want to perform dynamic memory
599 		 * allocation.  Thus, for CTLTYPE_ANYWRITE nodes, only the
600 		 * superuser may set values exceeding the small buffer in size.
601 		 */
602 		if (!(call->call_flags & RMIB_FLAG_AUTH))
603 			return EPERM;
604 
605 		/* Do not return ENOMEM on allocation failure. */
606 		if ((src = malloc(newlen + 1)) == NULL)
607 			return EINVAL;
608 	} else
609 		src = buf;
610 
611 	/* Copy in the data.  Note that the given new length may be zero. */
612 	if ((r = rmib_copyin(newp, src, newlen)) == OK) {
613 		/* Check and, if acceptable, store the new value. */
614 		switch (SYSCTL_TYPE(rnode->rnode_flags)) {
615 		case CTLTYPE_BOOL:
616 			/* Sanitize booleans.  See the MIB code for details. */
617 			b[0] = (bool)src[0];
618 			memcpy(dst, &b[0], sizeof(b[0]));
619 			break;
620 		case CTLTYPE_INT:
621 		case CTLTYPE_QUAD:
622 		case CTLTYPE_STRUCT:
623 			memcpy(dst, src, rnode->rnode_size);
624 			break;
625 		case CTLTYPE_STRING:
626 			if (newlen == rnode->rnode_size &&
627 			    src[newlen - 1] != '\0') {
628 				/* Our null terminator does not fit! */
629 				r = EINVAL;
630 				break;
631 			}
632 			src[newlen] = '\0';
633 			strlcpy(dst, src, rnode->rnode_size);
634 			break;
635 		default:
636 			r = EINVAL;
637 		}
638 	}
639 
640 	if (src != buf)
641 		free(src);
642 
643 	return r;
644 }
645 
646 /*
647  * Read and/or write the value of a regular data node.  A regular data node is
648  * a leaf node.  Typically, a leaf node has no associated function, in which
649  * case this function will be used instead.  In addition, this function may be
650  * used from handler functions as part of their functionality.
651  */
652 ssize_t
653 rmib_readwrite(struct rmib_call * call, struct rmib_node * rnode,
654 	struct rmib_oldp * oldp, struct rmib_newp * newp)
655 {
656 	ssize_t len;
657 	int r;
658 
659 	/* Copy out old data, if requested.  Always get the old data length. */
660 	if ((r = len = rmib_read(rnode, oldp)) < 0)
661 		return r;
662 
663 	/* Copy in new data, if requested. */
664 	if ((r = rmib_write(call, rnode, newp)) != OK)
665 		return r;
666 
667 	/* Return the old data length. */
668 	return len;
669 }
670 
671 /*
672  * Handle a sysctl(2) call from a user process, relayed by the MIB service to
673  * us.  If the call succeeds, return the old length.  The MIB service will
674  * perform a check against the given old length and return ENOMEM to the caller
675  * when applicable, so we do not have to do that here.  If the call fails,
676  * return a negative error code.
677  */
678 static ssize_t
679 rmib_call(const message * m_in)
680 {
681 	struct rmib_node *rnode, *rparent;
682 	struct rmib_call call;
683 	struct rmib_oldp oldp_data, *oldp;
684 	struct rmib_newp newp_data, *newp;
685 	unsigned int root_id, prefixlen, namelen;
686 	int r, id, is_leaf, has_func, name[CTL_MAXNAME];
687 
688 	/*
689 	 * Look up the root of the subtree that is the subject of the call.  If
690 	 * the call is for a subtree that is not registered, return ERESTART to
691 	 * indicate to the MIB service that it should deregister the subtree it
692 	 * thinks we have.  This case may occur in practice if a deregistration
693 	 * request from us crosses a sysctl call request from the MIB service.
694 	 */
695 	root_id = m_in->m_mib_lsys_call.root_id;
696 	if (root_id >= __arraycount(rnodes) ||
697 	    (rnode = rnodes[root_id].rno_node) == NULL)
698 		return ERESTART;
699 
700 	/*
701 	 * Use the name of the mounted subtree as prefix to the given name, so
702 	 * that call_oname will point to the complete name of the node.  This
703 	 * is necessary for the few queries that make use of call_oname.
704 	 */
705 	prefixlen = rnodes[root_id].rno_namelen;
706 	memcpy(name, rnodes[root_id].rno_name, prefixlen * sizeof(name[0]));
707 
708 	/*
709 	 * Set up all data structures that we need to use while handling the
710 	 * call processing.  Start by copying in the remainder of the MIB name.
711 	 */
712 	/* A zero name length is valid and should always yield EISDIR. */
713 	namelen = m_in->m_mib_lsys_call.name_len;
714 	if (prefixlen + namelen > __arraycount(name))
715 		return EINVAL;
716 
717 	if (namelen > 0) {
718 		r = sys_safecopyfrom(m_in->m_source,
719 		    m_in->m_mib_lsys_call.name_grant, 0,
720 		    (vir_bytes)&name[prefixlen], sizeof(name[0]) * namelen);
721 		if (r != OK)
722 			return r;
723 	}
724 
725 	oldp_data.oldp_grant = m_in->m_mib_lsys_call.oldp_grant;
726 	oldp_data.oldp_len = m_in->m_mib_lsys_call.oldp_len;
727 	oldp = (GRANT_VALID(oldp_data.oldp_grant)) ? &oldp_data : NULL;
728 
729 	newp_data.newp_grant = m_in->m_mib_lsys_call.newp_grant;
730 	newp_data.newp_len = m_in->m_mib_lsys_call.newp_len;
731 	newp = (GRANT_VALID(newp_data.newp_grant)) ? &newp_data : NULL;
732 
733 	call.call_endpt = m_in->m_mib_lsys_call.user_endpt;
734 	call.call_oname = name;
735 	call.call_name = &name[prefixlen];
736 	call.call_namelen = namelen;
737 	call.call_flags = m_in->m_mib_lsys_call.flags;
738 	call.call_rootver = m_in->m_mib_lsys_call.root_ver;
739 	call.call_treever = m_in->m_mib_lsys_call.tree_ver;
740 
741 	/*
742 	 * Dispatch the call.
743 	 */
744 	for (rparent = rnode; call.call_namelen > 0; rparent = rnode) {
745 		id = call.call_name[0];
746 		call.call_name++;
747 		call.call_namelen--;
748 
749 		assert(SYSCTL_TYPE(rparent->rnode_flags) == CTLTYPE_NODE);
750 
751 		/* Check for meta-identifiers. */
752 		if (id < 0) {
753 			/*
754 			 * A meta-identifier must always be the last name
755 			 * component.
756 			 */
757 			if (call.call_namelen > 0)
758 				return EINVAL;
759 
760 			switch (id) {
761 			case CTL_QUERY:
762 				return rmib_query(&call, rparent, oldp, newp);
763 			case CTL_DESCRIBE:
764 				return rmib_describe(&call, rparent, oldp,
765 				    newp);
766 			case CTL_CREATE:
767 			case CTL_DESTROY:
768 				/* We support fully static subtrees only. */
769 				return EPERM;
770 			default:
771 				return EOPNOTSUPP;
772 			}
773 		}
774 
775 		/* Locate the child node. */
776 		if ((rnode = rmib_lookup(rparent, id)) == NULL)
777 			return ENOENT;
778 
779 		/* Check if access is permitted at this level. */
780 		if ((rnode->rnode_flags & CTLFLAG_PRIVATE) &&
781 		    !(call.call_flags & RMIB_FLAG_AUTH))
782 			return EPERM;
783 
784 		/*
785 		 * Is this a leaf node, and/or is this node handled by a
786 		 * function?  If either is true, resolution ends at this level.
787 		 */
788 		is_leaf = (SYSCTL_TYPE(rnode->rnode_flags) != CTLTYPE_NODE);
789 		has_func = (rnode->rnode_func != NULL);
790 
791 		/*
792 		 * The name may be longer only if the node is not a leaf.  That
793 		 * also applies to leaves with functions, so check this first.
794 		 */
795 		if (is_leaf && call.call_namelen > 0)
796 			return ENOTDIR;
797 
798 		/*
799 		 * If resolution indeed ends here, and the user supplied new
800 		 * data, check if writing is allowed.
801 		 */
802 		if ((is_leaf || has_func) && newp != NULL) {
803 			if (!(rnode->rnode_flags & CTLFLAG_READWRITE))
804 				return EPERM;
805 
806 			if (!(rnode->rnode_flags & CTLFLAG_ANYWRITE) &&
807 			    !(call.call_flags & RMIB_FLAG_AUTH))
808 				return EPERM;
809 		}
810 
811 		/* If this node has a handler function, let it do the work. */
812 		if (has_func)
813 			return rnode->rnode_func(&call, rnode, oldp, newp);
814 
815 		/* For regular data leaf nodes, handle generic access. */
816 		if (is_leaf)
817 			return rmib_readwrite(&call, rnode, oldp, newp);
818 
819 		/* No function and not a leaf?  Descend further. */
820 	}
821 
822 	/* If we get here, the name refers to a node array. */
823 	return EISDIR;
824 }
825 
826 /*
827  * Initialize the given node and recursively all its node-type children,
828  * assigning the proper child length value to each of them.
829  */
830 static void
831 rmib_init(struct rmib_node * rparent)
832 {
833 	struct rmib_node *rnode;
834 	unsigned int i;
835 
836 	for (i = 0; i < rparent->rnode_size; i++) {
837 		if (rparent->rnode_flags & CTLFLAG_SPARSE) {
838 			/* Indirect lists must be sorted ascending by ID. */
839 			assert(i == 0 || rparent->rnode_icptr[i].rindir_id >
840 			    rparent->rnode_icptr[i - 1].rindir_id);
841 
842 			rnode = rparent->rnode_icptr[i].rindir_node;
843 		} else {
844 			rnode = &rparent->rnode_cptr[i];
845 
846 			if (rnode->rnode_flags == 0)
847 				continue;
848 		}
849 
850 		rparent->rnode_clen++;
851 
852 		if (SYSCTL_TYPE(rnode->rnode_flags) == CTLTYPE_NODE)
853 			rmib_init(rnode); /* recurse */
854 	}
855 }
856 
857 /*
858  * Request that the MIB service (re)mount the subtree identified by the given
859  * identifier.  This is a one-way request, so we never hear whether mounting
860  * succeeds.  There is not that much we can do if it fails anyway though.
861  */
862 static void
863 rmib_send_reg(int id)
864 {
865 	message m;
866 	int r;
867 
868 	memset(&m, 0, sizeof(m));
869 
870 	m.m_type = MIB_REGISTER;
871 	m.m_lsys_mib_register.root_id = id;
872 	m.m_lsys_mib_register.flags = SYSCTL_VERSION |
873 	    (rnodes[id].rno_node->rnode_flags & ~CTLFLAG_SPARSE);
874 	m.m_lsys_mib_register.csize = rnodes[id].rno_node->rnode_size;
875 	m.m_lsys_mib_register.clen = rnodes[id].rno_node->rnode_clen;
876 	m.m_lsys_mib_register.miblen = rnodes[id].rno_namelen;
877 	memcpy(m.m_lsys_mib_register.mib, rnodes[id].rno_name,
878 	    sizeof(rnodes[id].rno_name[0]) * rnodes[id].rno_namelen);
879 
880 	if ((r = asynsend3(MIB_PROC_NR, &m, AMF_NOREPLY)) != OK)
881 		panic("asynsend3 call to MIB service failed: %d", r);
882 }
883 
884 /*
885  * Register a MIB subtree.  Initialize the subtree, add it to the local set,
886  * and send a registration request for it to the MIB service.
887  */
888 int
889 rmib_register(const int * name, unsigned int namelen, struct rmib_node * rnode)
890 {
891 	unsigned int id, free_id;
892 
893 	/* A few basic sanity checks. */
894 	if (namelen == 0 || namelen >= __arraycount(rnodes[0].rno_name))
895 		return EINVAL;
896 	if (SYSCTL_TYPE(rnode->rnode_flags) != CTLTYPE_NODE)
897 		return EINVAL;
898 
899 	/* Make sure this is a new subtree, and find a free slot for it. */
900 	for (id = free_id = 0; id < __arraycount(rnodes); id++) {
901 		if (rnodes[id].rno_node == rnode)
902 			return EEXIST;
903 		else if (rnodes[id].rno_node == NULL &&
904 		    rnodes[free_id].rno_node != NULL)
905 			free_id = id;
906 	}
907 
908 	if (rnodes[free_id].rno_node != NULL)
909 		return ENOMEM;
910 
911 	rnodes[free_id].rno_node = rnode;
912 	rnodes[free_id].rno_namelen = namelen;
913 	memcpy(rnodes[free_id].rno_name, name, sizeof(name[0]) * namelen);
914 
915 	/*
916 	 * Initialize the entire subtree.  This will also compute rnode_clen
917 	 * for the given rnode, so do this before sending the message.
918 	 */
919 	rmib_init(rnode);
920 
921 	/* Send the registration request to the MIB service. */
922 	rmib_send_reg(free_id);
923 
924 	return OK;
925 }
926 
927 /*
928  * Deregister a previously registered subtree, both internally and with the MIB
929  * service.  Return OK if the deregistration procedure has been started, in
930  * which case the given subtree is guaranteed to no longer be accessed.  Return
931  * a negative error code on failure.
932  */
933 int
934 rmib_deregister(struct rmib_node * rnode)
935 {
936 	message m;
937 	unsigned int id;
938 
939 	for (id = 0; id < __arraycount(rnodes); id++)
940 		if (rnodes[id].rno_node == rnode)
941 			break;
942 
943 	if (id == __arraycount(rnodes))
944 		return ENOENT;
945 
946 	rnodes[id].rno_node = NULL;
947 
948 	/*
949 	 * Request that the MIB service unmount the subtree.  We completely
950 	 * ignore failure here, because the caller would not be able to do
951 	 * anything about it anyway.  We may also still receive sysctl call
952 	 * requests for the node we just deregistered, but this is caught
953 	 * during request processing.  Reuse of the rnodes[] slot could be a
954 	 * potential problem though.  We could use sequence numbers in the root
955 	 * identifiers to resolve that problem if it ever occurs in reality.
956 	 */
957 	memset(&m, 0, sizeof(m));
958 
959 	m.m_type = MIB_DEREGISTER;
960 	m.m_lsys_mib_register.root_id = id;
961 
962 	(void)asynsend3(MIB_PROC_NR, &m, AMF_NOREPLY);
963 
964 	return OK;
965 }
966 
967 /*
968  * Reregister all previously registered subtrees.  This routine should be
969  * called after the main program has determined that the MIB service has been
970  * restarted.
971  */
972 void
973 rmib_reregister(void)
974 {
975 	unsigned int id;
976 
977 	for (id = 0; id < __arraycount(rnodes); id++)
978 		if (rnodes[id].rno_node != NULL)
979 			rmib_send_reg(id);
980 }
981 
982 /*
983  * Reset all registrations, without involving MIB communication.  This routine
984  * exists for testing purposes only, and may disappear in the future.
985  */
986 void
987 rmib_reset(void)
988 {
989 
990 	memset(rnodes, 0, sizeof(rnodes));
991 }
992 
993 /*
994  * Process a request from the MIB service for information about the root node
995  * of a subtree, specifically its name and description.
996  */
997 static int
998 rmib_info(const message * m_in)
999 {
1000 	struct rmib_node *rnode;
1001 	unsigned int id;
1002 	const char *ptr;
1003 	size_t size;
1004 	int r;
1005 
1006 	id = m_in->m_mib_lsys_info.root_id;
1007 	if (id >= __arraycount(rnodes) || rnodes[id].rno_node == NULL)
1008 		return ENOENT;
1009 	rnode = rnodes[id].rno_node;
1010 
1011 	/* The name must fit.  If it does not, the service writer messed up. */
1012 	size = strlen(rnode->rnode_name) + 1;
1013 	if (size > m_in->m_mib_lsys_info.name_size)
1014 		return ENAMETOOLONG;
1015 
1016 	r = sys_safecopyto(m_in->m_source, m_in->m_mib_lsys_info.name_grant, 0,
1017 	    (vir_bytes)rnode->rnode_name, size);
1018 	if (r != OK)
1019 		return r;
1020 
1021 	/* If there is no (optional) description, copy out an empty string. */
1022 	ptr = (rnode->rnode_desc != NULL) ? rnode->rnode_desc : "";
1023 	size = strlen(ptr) + 1;
1024 
1025 	if (size > m_in->m_mib_lsys_info.desc_size)
1026 		size = m_in->m_mib_lsys_info.desc_size;
1027 
1028 	return sys_safecopyto(m_in->m_source, m_in->m_mib_lsys_info.desc_grant,
1029 	    0, (vir_bytes)ptr, size);
1030 }
1031 
1032 /*
1033  * Process a request from the MIB service.  The given message should originate
1034  * from the MIB service and have one of the COMMON_MIB_ requests as type.
1035  */
1036 void
1037 rmib_process(const message * m_in, int ipc_status)
1038 {
1039 	message m_out;
1040 	uint32_t req_id;
1041 	ssize_t r;
1042 
1043 	/* Only the MIB service may issue these requests. */
1044 	if (m_in->m_source != MIB_PROC_NR)
1045 		return;
1046 
1047 	/* Process the actual request. */
1048 	switch (m_in->m_type) {
1049 	case COMMON_MIB_INFO:
1050 		req_id = m_in->m_mib_lsys_info.req_id;
1051 
1052 		r = rmib_info(m_in);
1053 
1054 		break;
1055 
1056 	case COMMON_MIB_CALL:
1057 		req_id = m_in->m_mib_lsys_call.req_id;
1058 
1059 		r = rmib_call(m_in);
1060 
1061 		break;
1062 
1063 	default:
1064 		/*
1065 		 * HACK: assume that for all current and future requests, the
1066 		 * request ID field is in the same place.  We could create a
1067 		 * m_mib_lsys_unknown pseudo message type for this, but, eh.
1068 		 */
1069 		req_id = m_in->m_mib_lsys_info.req_id;
1070 
1071 		r = ENOSYS;
1072 	}
1073 
1074 	/* Construct and send a reply message to the MIB service. */
1075 	memset(&m_out, 0, sizeof(m_out));
1076 
1077 	m_out.m_type = COMMON_MIB_REPLY;
1078 	m_out.m_lsys_mib_reply.req_id = req_id;
1079 	m_out.m_lsys_mib_reply.status = r;
1080 
1081 	if (IPC_STATUS_CALL(ipc_status) == SENDREC)
1082 		r = ipc_sendnb(m_in->m_source, &m_out);
1083 	else
1084 		r = asynsend3(m_in->m_source, &m_out, AMF_NOREPLY);
1085 
1086 	if (r != OK)
1087 		printf("lsys:rmib: unable to send reply to %d: %zd\n",
1088 		    m_in->m_source, r);
1089 }
1090