xref: /minix/minix/servers/mib/remote.c (revision 045e0ed3)
1 /* MIB service - remote.c - remote service management and communication */
2 
3 #include "mib.h"
4 
5 /*
6  * TODO: the main feature that is missing here is a more active way to
7  * determine that a particular service has died, so that its mount points can
8  * be removed proactively.  Without this, there is a (small) risk that we end
9  * up talking to a recycled endpoint with a service that ignores our request,
10  * resulting in a deadlock of the MIB service.  Right now, the problem is that
11  * there is no proper DS API to subscribe to generic service-down events.
12  *
13  * In the long term, communication to other services should be made
14  * asynchronous, so that the MIB service does not block if there are problems
15  * with the other service.  The protocol should already support this, and some
16  * simplifications are the result of preparing for future asynchrony support
17  * (such as not dynamically querying the remote root node for its properties,
18  * which would be very hard to implement in a nonblocking way).  However,
19  * actual support is missing.  For now we assume that the remote service either
20  * answers the request, or crashes (causing the sendrec to abort), which is
21  * mostly good enough.
22  */
23 
24 /* This is the maximum number of remote services that may register subtrees. */
25 #define MIB_ENDPTS	(1U << MIB_EID_BITS)
26 
27 /* This is the maximum service label size, including '\0'. */
28 #define MIB_LABEL_MAX	16
29 
30 /* Table of remote endpoints, indexed by mount point nodes' node_eid fields. */
31 static struct {
32 	endpoint_t endpt;		/* remote endpoint or NONE */
33 	struct mib_node *nodes;		/* head of list of mount point nodes */
34 	char label[MIB_LABEL_MAX];	/* label of the remote endpoint */
35 } endpts[MIB_ENDPTS];
36 
37 /*
38  * Initialize the table of remote endpoints.
39  */
40 void
41 mib_remote_init(void)
42 {
43 	unsigned int i;
44 
45 	for (i = 0; i < __arraycount(endpts); i++) {
46 		endpts[i].endpt = NONE;
47 		endpts[i].nodes = NULL;
48 	}
49 }
50 
51 /*
52  * The remote endpoint with the given table index has been determined to have
53  * died.  Clean up all its mount points.
54  */
55 static void
56 mib_down(unsigned int eid)
57 {
58 	struct mib_node *node, *next_node;
59 
60 	assert(endpts[eid].endpt != NONE);
61 	assert(endpts[eid].nodes != NULL);
62 
63 	/* Unmount each of the remote endpoint's mount points. */
64 	for (node = endpts[eid].nodes; node != NULL; node = next_node) {
65 		/* The unmount call may deallocate the node object. */
66 		next_node = node->node_next;
67 
68 		mib_unmount(node);
69 	}
70 
71 	/* Mark the entry itself as no longer in use. */
72 	endpts[eid].endpt = NONE;
73 	endpts[eid].nodes = NULL;
74 }
75 
76 /*
77  * Obtain the label for the given endpoint.  On success, return OK and store
78  * the label in the given buffer.  If the label cannot be retrieved or does not
79  * fit in the given buffer, return a negative error code.
80  */
81 static int
82 mib_get_label(endpoint_t endpt, char * label, size_t labelsize)
83 {
84 	char key[DS_MAX_KEYLEN];
85 	int r;
86 
87 	/* TODO: init has a label, so this is not a proper is-service test! */
88 	if ((r = ds_retrieve_label_name(key, endpt)) != OK) {
89 		printf("MIB: unable to obtain label for %d\n", endpt);
90 
91 		return r;
92 	}
93 
94 	key[sizeof(key) - 1] = 0;
95 	if (strlen(key) >= labelsize) {
96 		/* This should really never happen. */
97 		printf("MIB: service %d label '%s' is too long\n", endpt, key);
98 
99 		return ENAMETOOLONG;
100 	}
101 
102 	strlcpy(label, key, labelsize);
103 	return OK;
104 }
105 
106 /*
107  * Register a remote subtree, mounting it in the local tree as requested.
108  */
109 static void
110 mib_do_register(endpoint_t endpt, const char * label, uint32_t rid,
111 	uint32_t flags, unsigned int csize, unsigned int clen, const int * mib,
112 	unsigned int miblen)
113 {
114 	struct mib_node *node;
115 	unsigned int eid;
116 	int r, free_eid;
117 
118 	/*
119 	 * See if we already have a remote endpoint for the service's label.
120 	 * If so, we can safely assume that the old endpoint has died and we
121 	 * have to unmount any previous entries.  Also find a free entry for
122 	 * the remote endpoint if it is new.
123 	 */
124 	free_eid = -1;
125 	for (eid = 0; eid < __arraycount(endpts); eid++) {
126 		if (endpts[eid].endpt == endpt)
127 			break;
128 		else if (endpts[eid].endpt != NONE &&
129 		    !strcmp(endpts[eid].label, label)) {
130 			mib_down(eid);
131 
132 			assert(endpts[eid].endpt == NONE);
133 			assert(endpts[eid].nodes == NULL);
134 
135 			break;
136 		} else if (endpts[eid].endpt == NONE && free_eid < 0)
137 			free_eid = eid;
138 	}
139 
140 	if (eid == __arraycount(endpts)) {
141 		if (free_eid < 0) {
142 			printf("MIB: remote endpoints table is full!\n");
143 
144 			return;
145 		}
146 
147 		eid = free_eid;
148 	}
149 
150 	/*
151 	 * Make sure that the caller does not introduce two mount points with
152 	 * the same ID.  Right now we refuse such requests; instead, we could
153 	 * also choose to first deregister the old mount point with this ID.
154 	 */
155 	for (node = endpts[eid].nodes; node != NULL; node = node->node_next) {
156 		if (node->node_rid == rid)
157 			break;
158 	}
159 
160 	if (node != NULL) {
161 		MIB_DEBUG_MOUNT(("MIB: service %d tried to reuse ID %"PRIu32
162 		    "\n", endpt, rid));
163 
164 		return;
165 	}
166 
167 	/*
168 	 * If we did not already have an entry for this endpoint, add one now,
169 	 * because the mib_mount() call will expect it to be there.  If the
170 	 * mount call fails, we may have to invalidate the entry again.
171 	 */
172 	if (endpts[eid].endpt == NONE) {
173 		endpts[eid].endpt = endpt;
174 		endpts[eid].nodes = NULL;
175 		strlcpy(endpts[eid].label, label, sizeof(endpts[eid].label));
176 	}
177 
178 	/* Attempt to mount the remote subtree in the tree. */
179 	r = mib_mount(mib, miblen, eid, rid, flags, csize, clen, &node);
180 
181 	if (r != OK) {
182 		/* If the entry has no other mount points, invalidate it. */
183 		if (endpts[eid].nodes == NULL)
184 			endpts[eid].endpt = NONE;
185 
186 		return;
187 	}
188 
189 	/* Add the new node to the list of mount points of the endpoint. */
190 	node->node_next = endpts[eid].nodes;
191 	endpts[eid].nodes = node;
192 }
193 
194 /*
195  * Process a mount point registration request from another service.
196  */
197 int
198 mib_register(const message * m_in, int ipc_status)
199 {
200 	char label[DS_MAX_KEYLEN];
201 
202 	/*
203 	 * Registration messages must be one-way, or they may cause a deadlock
204 	 * if crossed by a request coming from us.  This case also effectively
205 	 * eliminates the possibility for userland to register nodes.  The
206 	 * return value of ENOSYS effectively tells userland that this call
207 	 * number is not in use, which allows us to repurpose call numbers
208 	 * later.
209 	 */
210 	if (IPC_STATUS_CALL(ipc_status) == SENDREC)
211 		return ENOSYS;
212 
213 	MIB_DEBUG_MOUNT(("MIB: got register request from %d\n",
214 	    m_in->m_source));
215 
216 	/* Double-check if the caller is a service by obtaining its label. */
217 	if (mib_get_label(m_in->m_source, label, sizeof(label)) != OK)
218 		return EDONTREPLY;
219 
220 	/* Perform one message-level bounds check here. */
221 	if (m_in->m_lsys_mib_register.miblen >
222 	    __arraycount(m_in->m_lsys_mib_register.mib))
223 		return EDONTREPLY;
224 
225 	/* The rest of the work is handled by a message-agnostic function. */
226 	mib_do_register(m_in->m_source, label,
227 	    m_in->m_lsys_mib_register.root_id, m_in->m_lsys_mib_register.flags,
228 	    m_in->m_lsys_mib_register.csize, m_in->m_lsys_mib_register.clen,
229 	    m_in->m_lsys_mib_register.mib, m_in->m_lsys_mib_register.miblen);
230 
231 	/* Never reply to this message. */
232 	return EDONTREPLY;
233 }
234 
235 /*
236  * Deregister a previously registered remote subtree, unmounting it from the
237  * local tree.
238  */
239 static void
240 mib_do_deregister(endpoint_t endpt, uint32_t rid)
241 {
242 	struct mib_node *node, **nodep;
243 	unsigned int eid;
244 
245 	for (eid = 0; eid < __arraycount(endpts); eid++) {
246 		if (endpts[eid].endpt == endpt)
247 			break;
248 	}
249 
250 	if (eid == __arraycount(endpts)) {
251 		MIB_DEBUG_MOUNT(("MIB: deregister request from unknown "
252 		    "endpoint %d\n", endpt));
253 
254 		return;
255 	}
256 
257 	for (nodep = &endpts[eid].nodes; *nodep != NULL;
258 	    nodep = &node->node_next) {
259 		node = *nodep;
260 
261 		if (node->node_rid == rid)
262 			break;
263 	}
264 
265 	if (*nodep == NULL) {
266 		MIB_DEBUG_MOUNT(("MIB: deregister request from %d for unknown "
267 		    "ID %"PRIu32"\n", endpt, rid));
268 
269 		return;
270 	}
271 
272 	/*
273 	 * The unmount function may or may not deallocate the node object, so
274 	 * remove it from the linked list first.  If this leaves an empty
275 	 * linked list, also mark the remote endpoint entry itself as free.
276 	 */
277 	*nodep = node->node_next;
278 
279 	if (endpts[eid].nodes == NULL) {
280 		endpts[eid].endpt = NONE;
281 		endpts[eid].nodes = NULL;
282 	}
283 
284 	/* Finally, unmount the remote subtree. */
285 	mib_unmount(node);
286 }
287 
288 /*
289  * Process a mount point deregistration request from another service.
290  */
291 int
292 mib_deregister(const message * m_in, int ipc_status)
293 {
294 
295 	/* Same as for registration messages. */
296 	if (IPC_STATUS_CALL(ipc_status) == SENDREC)
297 		return ENOSYS;
298 
299 	MIB_DEBUG_MOUNT(("MIB: got deregister request from %d\n",
300 	    m_in->m_source));
301 
302 	/* The rest of the work is handled by a message-agnostic function. */
303 	mib_do_deregister(m_in->m_source, m_in->m_lsys_mib_register.root_id);
304 
305 	/* Never reply to this message. */
306 	return EDONTREPLY;
307 }
308 
309 /*
310  * Retrieve information about the root of a remote subtree, specifically its
311  * name and description.  This is done only when there was no corresponding
312  * local node and one has to be created temporarily.  On success, return OK
313  * with the name and description stored in the given buffers.  Otherwise,
314  * return a negative error code.
315  */
316 int
317 mib_remote_info(unsigned int eid, uint32_t rid, char * name, size_t namesize,
318 	char * desc, size_t descsize)
319 {
320 	endpoint_t endpt;
321 	cp_grant_id_t name_grant, desc_grant;
322 	message m;
323 	int r;
324 
325 	if (eid >= __arraycount(endpts) || endpts[eid].endpt == NONE)
326 		return EINVAL;
327 
328 	endpt = endpts[eid].endpt;
329 
330 	if ((name_grant = cpf_grant_direct(endpt, (vir_bytes)name, namesize,
331 	    CPF_WRITE)) == GRANT_INVALID)
332 		return EINVAL;
333 
334 	if ((desc_grant = cpf_grant_direct(endpt, (vir_bytes)desc, descsize,
335 	    CPF_WRITE)) == GRANT_INVALID) {
336 		cpf_revoke(name_grant);
337 
338 		return EINVAL;
339 	}
340 
341 	memset(&m, 0, sizeof(m));
342 
343 	m.m_type = COMMON_MIB_INFO;
344 	m.m_mib_lsys_info.req_id = 0; /* reserved for future async support */
345 	m.m_mib_lsys_info.root_id = rid;
346 	m.m_mib_lsys_info.name_grant = name_grant;
347 	m.m_mib_lsys_info.name_size = namesize;
348 	m.m_mib_lsys_info.desc_grant = desc_grant;
349 	m.m_mib_lsys_info.desc_size = descsize;
350 
351 	r = ipc_sendrec(endpt, &m);
352 
353 	cpf_revoke(desc_grant);
354 	cpf_revoke(name_grant);
355 
356 	if (r != OK)
357 		return r;
358 
359 	if (m.m_type != COMMON_MIB_REPLY)
360 		return EINVAL;
361 	if (m.m_lsys_mib_reply.req_id != 0)
362 		return EINVAL;
363 
364 	return m.m_lsys_mib_reply.status;
365 }
366 
367 /*
368  * Relay a sysctl(2) call from a user process to a remote service, because the
369  * call reached a mount point into a remote subtree.  Return the result code
370  * from the remote service.  Alternatively, return ERESTART if it has been
371  * determined that the remote service is dead, in which case its mount points
372  * will have been removed (possibly including the entire given node), and the
373  * caller should continue the call on the underlying local subtree if there is
374  * any.  Note that the remote service may also return ERESTART to indicate that
375  * the remote subtree does not exist, either because it is being deregistered
376  * or because the remote service was restarted with loss of state.
377  */
378 ssize_t
379 mib_remote_call(struct mib_call * call, struct mib_node * node,
380 	struct mib_oldp * oldp, struct mib_newp * newp)
381 {
382 	cp_grant_id_t name_grant, oldp_grant, newp_grant;
383 	size_t oldp_len, newp_len;
384 	endpoint_t endpt;
385 	message m;
386 	int r;
387 
388 	endpt = endpts[node->node_eid].endpt;
389 	assert(endpt != NONE);
390 
391 	/*
392 	 * Allocate grants.  Since ENOMEM has a special meaning for sysctl(2),
393 	 * never return that code even if it is the most appropriate one.
394 	 * The remainder of the name may be empty; the callee should check.
395 	 */
396 	name_grant = cpf_grant_direct(endpt, (vir_bytes)call->call_name,
397 	    call->call_namelen * sizeof(call->call_name[0]), CPF_READ);
398 	if (!GRANT_VALID(name_grant))
399 		return EINVAL;
400 
401 	if ((r = mib_relay_oldp(endpt, oldp, &oldp_grant, &oldp_len)) != OK) {
402 		cpf_revoke(name_grant);
403 
404 		return r;
405 	}
406 
407 	if ((r = mib_relay_newp(endpt, newp, &newp_grant, &newp_len)) != OK) {
408 		if (GRANT_VALID(oldp_grant))
409 			cpf_revoke(oldp_grant);
410 		cpf_revoke(name_grant);
411 
412 		return r;
413 	}
414 
415 	/*
416 	 * Construct the request message.  We have not optimized this flow for
417 	 * performance.  In particular, we never embed even short names in the
418 	 * message, and we supply a flag indicating whether the caller is root
419 	 * regardless of whether the callee is interested in this.  This is
420 	 * more convenient for the callee, but also more costly.
421 	 */
422 	memset(&m, 0, sizeof(m));
423 
424 	m.m_type = COMMON_MIB_CALL;
425 	m.m_mib_lsys_call.req_id = 0; /* reserved for future async support */
426 	m.m_mib_lsys_call.root_id = node->node_rid;
427 	m.m_mib_lsys_call.name_grant = name_grant;
428 	m.m_mib_lsys_call.name_len = call->call_namelen;
429 	m.m_mib_lsys_call.oldp_grant = oldp_grant;
430 	m.m_mib_lsys_call.oldp_len = oldp_len;
431 	m.m_mib_lsys_call.newp_grant = newp_grant;
432 	m.m_mib_lsys_call.newp_len = newp_len;
433 	m.m_mib_lsys_call.user_endpt = call->call_endpt;
434 	m.m_mib_lsys_call.flags = !!mib_authed(call); /* TODO: define flags */
435 	m.m_mib_lsys_call.root_ver = node->node_ver;
436 	m.m_mib_lsys_call.tree_ver = mib_root.node_ver;
437 
438 	/* Issue a synchronous call to the remove service. */
439 	r = ipc_sendrec(endpt, &m);
440 
441 	/* Then first clean up. */
442 	if (GRANT_VALID(newp_grant))
443 		cpf_revoke(newp_grant);
444 	if (GRANT_VALID(oldp_grant))
445 		cpf_revoke(oldp_grant);
446 	cpf_revoke(name_grant);
447 
448 	/*
449 	 * Treat any IPC-level error as an indication that there is a problem
450 	 * with the remote service.  Declare it dead, remove all its mount
451 	 * points, and return ERESTART to indicate to the caller that it should
452 	 * (carefully) try to continue the request on a local subtree instead.
453 	 * Again: mib_down() may actually deallocate the given 'node' object.
454 	 */
455 	if (r != OK) {
456 		mib_down(node->node_eid);
457 
458 		return ERESTART;
459 	}
460 
461 	if (m.m_type != COMMON_MIB_REPLY)
462 		return EINVAL;
463 	if (m.m_lsys_mib_reply.req_id != 0)
464 		return EINVAL;
465 
466 	/*
467 	 * If a deregister message from the service crosses our call, we'll get
468 	 * the response before we get the deregister request.  In that case,
469 	 * the remote service should return ERESTART to indicate that the mount
470 	 * point does not exist as far as it is concerned, so that we can try
471 	 * the local version of the tree instead.
472 	 */
473 	if (m.m_lsys_mib_reply.status == ERESTART)
474 		mib_do_deregister(endpt, node->node_rid);
475 
476 	return m.m_lsys_mib_reply.status;
477 }
478