xref: /minix/minix/servers/mib/main.c (revision 08cbf5a0)
1 /* MIB service - main.c - request abstraction and first-level tree */
2 /*
3  * This is the Management Information Base (MIB) service.  Its one and only
4  * task is to implement the sysctl(2) system call, which plays a fairly
5  * important role in parts of *BSD userland.
6  *
7  * The sysctl(2) interface is used to access a variety of information.  In
8  * order to obtain that information, and possibly modify it, the MIB service
9  * calls into many other services.  The MIB service must therefore not be
10  * called directly from other services, with the exception of ProcFS.  In fact,
11  * ProcFS is currently the only service that is modeled as logically higher in
12  * the MINIX3 service stack than MIB, something that itself is possible only
13  * due to the nonblocking nature of VFS.  MIB may issue blocking calls to VFS.
14  *
15  * The MIB service is in the boot image because even init(8) makes use of
16  * sysctl(2) during its own startup, so launching the MIB service at any later
17  * time would make a proper implementation of sysctl(2) impossible.  Also, the
18  * service needs superuser privileges because it may need to issue privileged
19  * calls and obtain privileged information from other services.
20  *
21  * While most of the sysctl tree is maintained locally, the MIB service also
22  * allows other services to register "remote" subtrees which are then handled
23  * entirely by those services.  This feature, which works much like file system
24  * mounting, allows 1) sysctl handling code to stay local to its corresponding
25  * service, and 2) parts of the sysctl tree to adapt and expand dynamically as
26  * optional services are started and stopped.  Compared to the MIB service's
27  * local handling, remotely handled subtrees are subject to several additional
28  * practical restrictions, hoever.  In the current implementation, the MIB
29  * service makes blocking calls to remote services as needed; in the future,
30  * these interactions could be made (more) asynchronous.
31  *
32  * The MIB service was created by David van Moolenbroek <david@minix3.org>.
33  */
34 
35 #include "mib.h"
36 
37 /*
38  * Most of these initially empty nodes are filled in by their corresponding
39  * modules' _init calls; see mib_init below.  However, some subtrees are not
40  * populated by the MIB service itself.  CTL_NET is expected to be populated
41  * through registration of remote subtrees.  The libc sysctl(3) wrapper code
42  * takes care of the CTL_USER subtree.  It must have an entry here though, or
43  * sysctl(8) will not list it.  CTL_VENDOR is also empty, but writable, so that
44  * it may be used by third parties.
45  */
46 static struct mib_node mib_table[] = {
47 /* 1*/	[CTL_KERN]	= MIB_ENODE(_P | _RO, "kern", "High kernel"),
48 /* 2*/	[CTL_VM]	= MIB_ENODE(_P | _RO, "vm", "Virtual memory"),
49 /* 4*/	[CTL_NET]	= MIB_ENODE(_P | _RO, "net", "Networking"),
50 /* 6*/	[CTL_HW]	= MIB_ENODE(_P | _RO, "hw", "Generic CPU, I/O"),
51 /* 8*/	[CTL_USER]	= MIB_ENODE(_P | _RO, "user", "User-level"),
52 /*11*/	[CTL_VENDOR]	= MIB_ENODE(_P | _RW, "vendor", "Vendor specific"),
53 /*32*/	[CTL_MINIX]	= MIB_ENODE(_P | _RO, "minix", "MINIX3 specific"),
54 };
55 
56 /*
57  * The root node of the tree.  The root node is used internally only--it is
58  * impossible to access the root node itself from userland in any way.  The
59  * node is writable by default, so that programs such as init(8) may create
60  * their own top-level entries.
61  */
62 struct mib_node mib_root = MIB_NODE(_RW, mib_table, "", "");
63 
64 /*
65  * Structures describing old and new data as provided by userland.  The primary
66  * advantage of these opaque structures is that we could in principle use them
67  * to implement storage of small data results in the sysctl reply message, so
68  * as to avoid the kernel copy, without changing any of the handler code.
69  */
70 struct mib_oldp {
71 	endpoint_t oldp_endpt;
72 	vir_bytes oldp_addr;
73 	size_t oldp_len;
74 };
75 /*
76  * Same structure, different type: prevent accidental mixups, and avoid the
77  * need to use __restrict everywhere.
78  */
79 struct mib_newp {
80 	endpoint_t newp_endpt;
81 	vir_bytes newp_addr;
82 	size_t newp_len;
83 };
84 
85 /*
86  * Return TRUE or FALSE indicating whether the given offset is within the range
87  * of data that is to be copied out.  This call can be used to test whether
88  * certain bits of data need to be prepared for copying at all.
89  */
90 int
91 mib_inrange(struct mib_oldp * oldp, size_t off)
92 {
93 
94 	if (oldp == NULL)
95 		return FALSE;
96 
97 	return (off < oldp->oldp_len);
98 }
99 
100 /*
101  * Return the total length of the requested data.  This should not be used
102  * directly except in highly unusual cases, such as particular node requests
103  * where the request semantics blatantly violate overall sysctl(2) semantics.
104  */
105 size_t
106 mib_getoldlen(struct mib_oldp * oldp)
107 {
108 
109 	if (oldp == NULL)
110 		return 0;
111 
112 	return oldp->oldp_len;
113 }
114 
115 /*
116  * Copy out (partial) data to the user.  The copy is automatically limited to
117  * the range of data requested by the user.  Return the requested length on
118  * success (for the caller's convenience) or an error code on failure.
119  */
120 ssize_t
121 mib_copyout(struct mib_oldp * __restrict oldp, size_t off,
122 	const void * __restrict buf, size_t size)
123 {
124 	size_t len;
125 	int r;
126 
127 	len = size;
128 	assert(len <= SSIZE_MAX);
129 
130 	if (oldp == NULL || off >= oldp->oldp_len)
131 		return size; /* nothing to do */
132 
133 	if (len > oldp->oldp_len - off)
134 		len = oldp->oldp_len - off;
135 
136 	if ((r = sys_datacopy(SELF, (vir_bytes)buf, oldp->oldp_endpt,
137 	    oldp->oldp_addr + off, len)) != OK)
138 		return r;
139 
140 	return size;
141 }
142 
143 /*
144  * Override the oldlen value returned from the call, in situations where an
145  * error is thrown as well.
146  */
147 void
148 mib_setoldlen(struct mib_call * call, size_t oldlen)
149 {
150 
151 	call->call_reslen = oldlen;
152 }
153 
154 /*
155  * Return the new data length as provided by the user, or 0 if the user did not
156  * supply new data.
157  */
158 size_t
159 mib_getnewlen(struct mib_newp * newp)
160 {
161 
162 	if (newp == NULL)
163 		return 0;
164 
165 	return newp->newp_len;
166 }
167 
168 /*
169  * Copy in data from the user.  The given length must match exactly the length
170  * given by the user.  Return OK or an error code.
171  */
172 int
173 mib_copyin(struct mib_newp * __restrict newp, void * __restrict buf,
174 	size_t len)
175 {
176 
177 	if (newp == NULL || len != newp->newp_len)
178 		return EINVAL;
179 
180 	if (len == 0)
181 		return OK;
182 
183 	return sys_datacopy(newp->newp_endpt, newp->newp_addr, SELF,
184 	    (vir_bytes)buf, len);
185 }
186 
187 /*
188  * Copy in auxiliary data from the user, based on a user pointer obtained from
189  * data copied in earlier through mib_copyin().
190  */
191 int
192 mib_copyin_aux(struct mib_newp * __restrict newp, vir_bytes addr,
193 	void * __restrict buf, size_t len)
194 {
195 
196 	assert(newp != NULL);
197 
198 	if (len == 0)
199 		return OK;
200 
201 	return sys_datacopy(newp->newp_endpt, addr, SELF, (vir_bytes)buf, len);
202 }
203 
204 /*
205  * Create a grant for a call's old data region, if not NULL, for the given
206  * endpoint.  On success, store the grant (or GRANT_INVALID) in grantp and the
207  * length in lenp, and return OK.  On error, return an error code that must not
208  * be ENOMEM.
209  */
210 int
211 mib_relay_oldp(endpoint_t endpt, struct mib_oldp * __restrict oldp,
212 	cp_grant_id_t * grantp, size_t * __restrict lenp)
213 {
214 
215 	if (oldp != NULL) {
216 		*grantp = cpf_grant_magic(endpt, oldp->oldp_endpt,
217 		    oldp->oldp_addr, oldp->oldp_len, CPF_WRITE);
218 		if (!GRANT_VALID(*grantp))
219 			return EINVAL;
220 		*lenp = oldp->oldp_len;
221 	} else {
222 		*grantp = GRANT_INVALID;
223 		*lenp = 0;
224 	}
225 
226 	return OK;
227 }
228 
229 /*
230  * Create a grant for a call's new data region, if not NULL, for the given
231  * endpoint.  On success, store the grant (or GRANT_INVALID) in grantp and the
232  * length in lenp, and return OK.  On error, return an error code that must not
233  * be ENOMEM.
234  */
235 int
236 mib_relay_newp(endpoint_t endpt, struct mib_newp * __restrict newp,
237 	cp_grant_id_t * grantp, size_t * __restrict lenp)
238 {
239 
240 	if (newp != NULL) {
241 		*grantp = cpf_grant_magic(endpt, newp->newp_endpt,
242 		    newp->newp_addr, newp->newp_len, CPF_READ);
243 		if (!GRANT_VALID(*grantp))
244 			return EINVAL;
245 		*lenp = newp->newp_len;
246 	} else {
247 		*grantp = GRANT_INVALID;
248 		*lenp = 0;
249 	}
250 
251 	return OK;
252 }
253 
254 /*
255  * Check whether the user is allowed to perform privileged operations.  The
256  * function returns a nonzero value if this is the case, and zero otherwise.
257  * Authorization is performed only once per call.
258  */
259 int
260 mib_authed(struct mib_call * call)
261 {
262 
263 	if ((call->call_flags & (MIB_FLAG_AUTH | MIB_FLAG_NOAUTH)) == 0) {
264 		/* Ask PM if this endpoint has superuser privileges. */
265 		if (getnuid(call->call_endpt) == SUPER_USER)
266 			call->call_flags |= MIB_FLAG_AUTH;
267 		else
268 			call->call_flags |= MIB_FLAG_NOAUTH;
269 	}
270 
271 	return (call->call_flags & MIB_FLAG_AUTH);
272 }
273 
274 /*
275  * Implement the sysctl(2) system call.
276  */
277 static int
278 mib_sysctl(message * __restrict m_in, int ipc_status,
279 	message * __restrict m_out)
280 {
281 	vir_bytes oldaddr, newaddr;
282 	size_t oldlen, newlen;
283 	unsigned int namelen;
284 	int s, name[CTL_MAXNAME];
285 	endpoint_t endpt;
286 	struct mib_oldp oldp, *oldpp;
287 	struct mib_newp newp, *newpp;
288 	struct mib_call call;
289 	ssize_t r;
290 
291 	/* Only handle blocking calls.  Ignore everything else. */
292 	if (IPC_STATUS_CALL(ipc_status) != SENDREC)
293 		return EDONTREPLY;
294 
295 	endpt = m_in->m_source;
296 	oldaddr = m_in->m_lc_mib_sysctl.oldp;
297 	oldlen = m_in->m_lc_mib_sysctl.oldlen;
298 	newaddr = m_in->m_lc_mib_sysctl.newp;
299 	newlen = m_in->m_lc_mib_sysctl.newlen;
300 	namelen = m_in->m_lc_mib_sysctl.namelen;
301 
302 	if (namelen == 0 || namelen > CTL_MAXNAME)
303 		return EINVAL;
304 
305 	/*
306 	 * In most cases, the entire name fits in the request message, so we
307 	 * can avoid a kernel copy.
308 	 */
309 	if (namelen > CTL_SHORTNAME) {
310 		if ((s = sys_datacopy(endpt, m_in->m_lc_mib_sysctl.namep, SELF,
311 		    (vir_bytes)&name, sizeof(name[0]) * namelen)) != OK)
312 			return s;
313 	} else
314 		memcpy(name, m_in->m_lc_mib_sysctl.name,
315 		    sizeof(name[0]) * namelen);
316 
317 	/*
318 	 * Set up a structure for the old data, if any.  When no old address is
319 	 * given, be forgiving if oldlen is not zero, as the user may simply
320 	 * not have initialized the variable before passing a pointer to it.
321 	 */
322 	if (oldaddr != 0) {
323 		oldp.oldp_endpt = endpt;
324 		oldp.oldp_addr = oldaddr;
325 		oldp.oldp_len = oldlen;
326 		oldpp = &oldp;
327 	} else
328 		oldpp = NULL;
329 
330 	/*
331 	 * Set up a structure for the new data, if any.  If one of newaddr and
332 	 * newlen is zero but not the other, we (like NetBSD) disregard both.
333 	 */
334 	if (newaddr != 0 && newlen != 0) {
335 		newp.newp_endpt = endpt;
336 		newp.newp_addr = newaddr;
337 		newp.newp_len = newlen;
338 		newpp = &newp;
339 	} else
340 		newpp = NULL;
341 
342 	/*
343 	 * Set up a structure for other call parameters.  Most of these should
344 	 * be used rarely, and we may want to add more later, so do not pass
345 	 * all of them around as actual function parameters all the time.
346 	 */
347 	call.call_endpt = endpt;
348 	call.call_name = name;
349 	call.call_namelen = namelen;
350 	call.call_flags = 0;
351 	call.call_reslen = 0;
352 
353 	r = mib_dispatch(&call, oldpp, newpp);
354 
355 	/*
356 	 * From NetBSD: we copy out as much as we can from the old data, while
357 	 * at the same time computing the full data length.  Then, here at the
358 	 * end, if the entire result did not fit in the destination buffer, we
359 	 * return ENOMEM instead of success, thus also returning a partial
360 	 * result and the full data length.
361 	 *
362 	 * It is also possible that data are copied out along with a "real"
363 	 * error.  In that case, we must report a nonzero resulting length
364 	 * along with that error code.  This is currently the case when node
365 	 * creation resulted in a collision, in which case the error code is
366 	 * EEXIST while the existing node is copied out as well.
367 	 */
368 	if (r >= 0) {
369 		m_out->m_mib_lc_sysctl.oldlen = (size_t)r;
370 
371 		if (oldaddr != 0 && oldlen < (size_t)r)
372 			r = ENOMEM;
373 		else
374 			r = OK;
375 	} else
376 		m_out->m_mib_lc_sysctl.oldlen = call.call_reslen;
377 
378 	return r;
379 }
380 
381 /*
382  * Initialize the service.
383  */
384 static int
385 mib_init(int type __unused, sef_init_info_t * info __unused)
386 {
387 
388 	/*
389 	 * Initialize pointers and sizes of subtrees in different modules.
390 	 * This is needed because we cannot use sizeof on external arrays.
391 	 * We do initialize the node entry (including any other fields)
392 	 * statically through MIB_ENODE because that forces the array to be
393 	 * large enough to store the entry.
394 	 */
395 	mib_kern_init(&mib_table[CTL_KERN]);
396 	mib_vm_init(&mib_table[CTL_VM]);
397 	mib_hw_init(&mib_table[CTL_HW]);
398 	mib_minix_init(&mib_table[CTL_MINIX]);
399 
400 	/*
401 	 * Now that the static tree is complete, go through the entire tree,
402 	 * initializing miscellaneous fields.
403 	 */
404 	mib_tree_init();
405 
406 	/* Prepare for requests to mount remote subtrees. */
407 	mib_remote_init();
408 
409 	return OK;
410 }
411 
412 /*
413  * Perform SEF startup.
414  */
415 static void
416 mib_startup(void)
417 {
418 
419 	sef_setcb_init_fresh(mib_init);
420 	/*
421 	 * If we restart we lose all dynamic state, which means we lose all
422 	 * nodes that have been created at run time.  However, running with
423 	 * only the static node tree is still better than not running at all.
424 	 */
425 	sef_setcb_init_restart(mib_init);
426 
427 	sef_startup();
428 }
429 
430 /*
431  * The Management Information Base (MIB) service.
432  */
433 int
434 main(void)
435 {
436 	message m_in, m_out;
437 	int r, ipc_status;
438 
439 	/* Perform initialization. */
440 	mib_startup();
441 
442 	/* The main message loop. */
443 	for (;;) {
444 		/* Receive a request. */
445 		if ((r = sef_receive_status(ANY, &m_in, &ipc_status)) != OK)
446 			panic("sef_receive failed: %d", r);
447 
448 		/* Process the request. */
449 		if (is_ipc_notify(ipc_status)) {
450 			/* We are not expecting any notifications. */
451 			printf("MIB: notification from %d\n", m_in.m_source);
452 
453 			continue;
454 		}
455 
456 		memset(&m_out, 0, sizeof(m_out));
457 
458 		switch (m_in.m_type) {
459 		case MIB_SYSCTL:
460 			r = mib_sysctl(&m_in, ipc_status, &m_out);
461 
462 			break;
463 
464 		case MIB_REGISTER:
465 			r = mib_register(&m_in, ipc_status);
466 
467 			break;
468 
469 		case MIB_DEREGISTER:
470 			r = mib_deregister(&m_in, ipc_status);
471 
472 			break;
473 
474 		default:
475 			if (IPC_STATUS_CALL(ipc_status) == SENDREC)
476 				r = ENOSYS;
477 			else
478 				r = EDONTREPLY;
479 		}
480 
481 		/* Send a reply, if applicable. */
482 		if (r != EDONTREPLY) {
483 			m_out.m_type = r;
484 
485 			if ((r = ipc_sendnb(m_in.m_source, &m_out)) != OK)
486 				printf("MIB: ipc_sendnb failed (%d)\n", r);
487 		}
488 	}
489 
490 	/* NOTREACHED */
491 	return 0;
492 }
493