1 /* MIB service - remote.c - remote service management and communication */ 2 3 #include "mib.h" 4 5 /* 6 * TODO: the main feature that is missing here is a more active way to 7 * determine that a particular service has died, so that its mount points can 8 * be removed proactively. Without this, there is a (small) risk that we end 9 * up talking to a recycled endpoint with a service that ignores our request, 10 * resulting in a deadlock of the MIB service. Right now, the problem is that 11 * there is no proper DS API to subscribe to generic service-down events. 12 * 13 * In the long term, communication to other services should be made 14 * asynchronous, so that the MIB service does not block if there are problems 15 * with the other service. The protocol should already support this, and some 16 * simplifications are the result of preparing for future asynchrony support 17 * (such as not dynamically querying the remote root node for its properties, 18 * which would be very hard to implement in a nonblocking way). However, 19 * actual support is missing. For now we assume that the remote service either 20 * answers the request, or crashes (causing the sendrec to abort), which is 21 * mostly good enough. 22 */ 23 24 /* This is the maximum number of remote services that may register subtrees. */ 25 #define MIB_ENDPTS (1U << MIB_EID_BITS) 26 27 /* This is the maximum service label size, including '\0'. */ 28 #define MIB_LABEL_MAX 16 29 30 /* Table of remote endpoints, indexed by mount point nodes' node_eid fields. */ 31 static struct { 32 endpoint_t endpt; /* remote endpoint or NONE */ 33 struct mib_node *nodes; /* head of list of mount point nodes */ 34 char label[MIB_LABEL_MAX]; /* label of the remote endpoint */ 35 } endpts[MIB_ENDPTS]; 36 37 /* 38 * Initialize the table of remote endpoints. 39 */ 40 void 41 mib_remote_init(void) 42 { 43 unsigned int i; 44 45 for (i = 0; i < __arraycount(endpts); i++) { 46 endpts[i].endpt = NONE; 47 endpts[i].nodes = NULL; 48 } 49 } 50 51 /* 52 * The remote endpoint with the given table index has been determined to have 53 * died. Clean up all its mount points. 54 */ 55 static void 56 mib_down(unsigned int eid) 57 { 58 struct mib_node *node, *next_node; 59 60 assert(endpts[eid].endpt != NONE); 61 assert(endpts[eid].nodes != NULL); 62 63 /* Unmount each of the remote endpoint's mount points. */ 64 for (node = endpts[eid].nodes; node != NULL; node = next_node) { 65 /* The unmount call may deallocate the node object. */ 66 next_node = node->node_next; 67 68 mib_unmount(node); 69 } 70 71 /* Mark the entry itself as no longer in use. */ 72 endpts[eid].endpt = NONE; 73 endpts[eid].nodes = NULL; 74 } 75 76 /* 77 * Obtain the label for the given endpoint. On success, return OK and store 78 * the label in the given buffer. If the label cannot be retrieved or does not 79 * fit in the given buffer, return a negative error code. 80 */ 81 static int 82 mib_get_label(endpoint_t endpt, char * label, size_t labelsize) 83 { 84 char key[DS_MAX_KEYLEN]; 85 int r; 86 87 /* TODO: init has a label, so this is not a proper is-service test! */ 88 if ((r = ds_retrieve_label_name(key, endpt)) != OK) { 89 printf("MIB: unable to obtain label for %d\n", endpt); 90 91 return r; 92 } 93 94 key[sizeof(key) - 1] = 0; 95 if (strlen(key) >= labelsize) { 96 /* This should really never happen. */ 97 printf("MIB: service %d label '%s' is too long\n", endpt, key); 98 99 return ENAMETOOLONG; 100 } 101 102 strlcpy(label, key, labelsize); 103 return OK; 104 } 105 106 /* 107 * Register a remote subtree, mounting it in the local tree as requested. 108 */ 109 static void 110 mib_do_register(endpoint_t endpt, const char * label, uint32_t rid, 111 uint32_t flags, unsigned int csize, unsigned int clen, const int * mib, 112 unsigned int miblen) 113 { 114 struct mib_node *node; 115 unsigned int eid; 116 int r, free_eid; 117 118 /* 119 * See if we already have a remote endpoint for the service's label. 120 * If so, we can safely assume that the old endpoint has died and we 121 * have to unmount any previous entries. Also find a free entry for 122 * the remote endpoint if it is new. 123 */ 124 free_eid = -1; 125 for (eid = 0; eid < __arraycount(endpts); eid++) { 126 if (endpts[eid].endpt == endpt) 127 break; 128 else if (endpts[eid].endpt != NONE && 129 !strcmp(endpts[eid].label, label)) { 130 mib_down(eid); 131 132 assert(endpts[eid].endpt == NONE); 133 assert(endpts[eid].nodes == NULL); 134 135 break; 136 } else if (endpts[eid].endpt == NONE && free_eid < 0) 137 free_eid = eid; 138 } 139 140 if (eid == __arraycount(endpts)) { 141 if (free_eid < 0) { 142 printf("MIB: remote endpoints table is full!\n"); 143 144 return; 145 } 146 147 eid = free_eid; 148 } 149 150 /* 151 * Make sure that the caller does not introduce two mount points with 152 * the same ID. Right now we refuse such requests; instead, we could 153 * also choose to first deregister the old mount point with this ID. 154 */ 155 for (node = endpts[eid].nodes; node != NULL; node = node->node_next) { 156 if (node->node_rid == rid) 157 break; 158 } 159 160 if (node != NULL) { 161 MIB_DEBUG_MOUNT(("MIB: service %d tried to reuse ID %"PRIu32 162 "\n", endpt, rid)); 163 164 return; 165 } 166 167 /* 168 * If we did not already have an entry for this endpoint, add one now, 169 * because the mib_mount() call will expect it to be there. If the 170 * mount call fails, we may have to invalidate the entry again. 171 */ 172 if (endpts[eid].endpt == NONE) { 173 endpts[eid].endpt = endpt; 174 endpts[eid].nodes = NULL; 175 strlcpy(endpts[eid].label, label, sizeof(endpts[eid].label)); 176 } 177 178 /* Attempt to mount the remote subtree in the tree. */ 179 r = mib_mount(mib, miblen, eid, rid, flags, csize, clen, &node); 180 181 if (r != OK) { 182 /* If the entry has no other mount points, invalidate it. */ 183 if (endpts[eid].nodes == NULL) 184 endpts[eid].endpt = NONE; 185 186 return; 187 } 188 189 /* Add the new node to the list of mount points of the endpoint. */ 190 node->node_next = endpts[eid].nodes; 191 endpts[eid].nodes = node; 192 } 193 194 /* 195 * Process a mount point registration request from another service. 196 */ 197 int 198 mib_register(const message * m_in, int ipc_status) 199 { 200 char label[DS_MAX_KEYLEN]; 201 202 /* 203 * Registration messages must be one-way, or they may cause a deadlock 204 * if crossed by a request coming from us. This case also effectively 205 * eliminates the possibility for userland to register nodes. The 206 * return value of ENOSYS effectively tells userland that this call 207 * number is not in use, which allows us to repurpose call numbers 208 * later. 209 */ 210 if (IPC_STATUS_CALL(ipc_status) == SENDREC) 211 return ENOSYS; 212 213 MIB_DEBUG_MOUNT(("MIB: got register request from %d\n", 214 m_in->m_source)); 215 216 /* Double-check if the caller is a service by obtaining its label. */ 217 if (mib_get_label(m_in->m_source, label, sizeof(label)) != OK) 218 return EDONTREPLY; 219 220 /* Perform one message-level bounds check here. */ 221 if (m_in->m_lsys_mib_register.miblen > 222 __arraycount(m_in->m_lsys_mib_register.mib)) 223 return EDONTREPLY; 224 225 /* The rest of the work is handled by a message-agnostic function. */ 226 mib_do_register(m_in->m_source, label, 227 m_in->m_lsys_mib_register.root_id, m_in->m_lsys_mib_register.flags, 228 m_in->m_lsys_mib_register.csize, m_in->m_lsys_mib_register.clen, 229 m_in->m_lsys_mib_register.mib, m_in->m_lsys_mib_register.miblen); 230 231 /* Never reply to this message. */ 232 return EDONTREPLY; 233 } 234 235 /* 236 * Deregister a previously registered remote subtree, unmounting it from the 237 * local tree. 238 */ 239 static void 240 mib_do_deregister(endpoint_t endpt, uint32_t rid) 241 { 242 struct mib_node *node, **nodep; 243 unsigned int eid; 244 245 for (eid = 0; eid < __arraycount(endpts); eid++) { 246 if (endpts[eid].endpt == endpt) 247 break; 248 } 249 250 if (eid == __arraycount(endpts)) { 251 MIB_DEBUG_MOUNT(("MIB: deregister request from unknown " 252 "endpoint %d\n", endpt)); 253 254 return; 255 } 256 257 for (nodep = &endpts[eid].nodes; *nodep != NULL; 258 nodep = &node->node_next) { 259 node = *nodep; 260 261 if (node->node_rid == rid) 262 break; 263 } 264 265 if (*nodep == NULL) { 266 MIB_DEBUG_MOUNT(("MIB: deregister request from %d for unknown " 267 "ID %"PRIu32"\n", endpt, rid)); 268 269 return; 270 } 271 272 /* 273 * The unmount function may or may not deallocate the node object, so 274 * remove it from the linked list first. If this leaves an empty 275 * linked list, also mark the remote endpoint entry itself as free. 276 */ 277 *nodep = node->node_next; 278 279 if (endpts[eid].nodes == NULL) { 280 endpts[eid].endpt = NONE; 281 endpts[eid].nodes = NULL; 282 } 283 284 /* Finally, unmount the remote subtree. */ 285 mib_unmount(node); 286 } 287 288 /* 289 * Process a mount point deregistration request from another service. 290 */ 291 int 292 mib_deregister(const message * m_in, int ipc_status) 293 { 294 295 /* Same as for registration messages. */ 296 if (IPC_STATUS_CALL(ipc_status) == SENDREC) 297 return ENOSYS; 298 299 MIB_DEBUG_MOUNT(("MIB: got deregister request from %d\n", 300 m_in->m_source)); 301 302 /* The rest of the work is handled by a message-agnostic function. */ 303 mib_do_deregister(m_in->m_source, m_in->m_lsys_mib_register.root_id); 304 305 /* Never reply to this message. */ 306 return EDONTREPLY; 307 } 308 309 /* 310 * Retrieve information about the root of a remote subtree, specifically its 311 * name and description. This is done only when there was no corresponding 312 * local node and one has to be created temporarily. On success, return OK 313 * with the name and description stored in the given buffers. Otherwise, 314 * return a negative error code. 315 */ 316 int 317 mib_remote_info(unsigned int eid, uint32_t rid, char * name, size_t namesize, 318 char * desc, size_t descsize) 319 { 320 endpoint_t endpt; 321 cp_grant_id_t name_grant, desc_grant; 322 message m; 323 int r; 324 325 if (eid >= __arraycount(endpts) || endpts[eid].endpt == NONE) 326 return EINVAL; 327 328 endpt = endpts[eid].endpt; 329 330 if ((name_grant = cpf_grant_direct(endpt, (vir_bytes)name, namesize, 331 CPF_WRITE)) == GRANT_INVALID) 332 return EINVAL; 333 334 if ((desc_grant = cpf_grant_direct(endpt, (vir_bytes)desc, descsize, 335 CPF_WRITE)) == GRANT_INVALID) { 336 cpf_revoke(name_grant); 337 338 return EINVAL; 339 } 340 341 memset(&m, 0, sizeof(m)); 342 343 m.m_type = COMMON_MIB_INFO; 344 m.m_mib_lsys_info.req_id = 0; /* reserved for future async support */ 345 m.m_mib_lsys_info.root_id = rid; 346 m.m_mib_lsys_info.name_grant = name_grant; 347 m.m_mib_lsys_info.name_size = namesize; 348 m.m_mib_lsys_info.desc_grant = desc_grant; 349 m.m_mib_lsys_info.desc_size = descsize; 350 351 r = ipc_sendrec(endpt, &m); 352 353 cpf_revoke(desc_grant); 354 cpf_revoke(name_grant); 355 356 if (r != OK) 357 return r; 358 359 if (m.m_type != COMMON_MIB_REPLY) 360 return EINVAL; 361 if (m.m_lsys_mib_reply.req_id != 0) 362 return EINVAL; 363 364 return m.m_lsys_mib_reply.status; 365 } 366 367 /* 368 * Relay a sysctl(2) call from a user process to a remote service, because the 369 * call reached a mount point into a remote subtree. Return the result code 370 * from the remote service. Alternatively, return ERESTART if it has been 371 * determined that the remote service is dead, in which case its mount points 372 * will have been removed (possibly including the entire given node), and the 373 * caller should continue the call on the underlying local subtree if there is 374 * any. Note that the remote service may also return ERESTART to indicate that 375 * the remote subtree does not exist, either because it is being deregistered 376 * or because the remote service was restarted with loss of state. 377 */ 378 ssize_t 379 mib_remote_call(struct mib_call * call, struct mib_node * node, 380 struct mib_oldp * oldp, struct mib_newp * newp) 381 { 382 cp_grant_id_t name_grant, oldp_grant, newp_grant; 383 size_t oldp_len, newp_len; 384 endpoint_t endpt; 385 message m; 386 int r; 387 388 endpt = endpts[node->node_eid].endpt; 389 assert(endpt != NONE); 390 391 /* 392 * Allocate grants. Since ENOMEM has a special meaning for sysctl(2), 393 * never return that code even if it is the most appropriate one. 394 * The remainder of the name may be empty; the callee should check. 395 */ 396 name_grant = cpf_grant_direct(endpt, (vir_bytes)call->call_name, 397 call->call_namelen * sizeof(call->call_name[0]), CPF_READ); 398 if (!GRANT_VALID(name_grant)) 399 return EINVAL; 400 401 if ((r = mib_relay_oldp(endpt, oldp, &oldp_grant, &oldp_len)) != OK) { 402 cpf_revoke(name_grant); 403 404 return r; 405 } 406 407 if ((r = mib_relay_newp(endpt, newp, &newp_grant, &newp_len)) != OK) { 408 if (GRANT_VALID(oldp_grant)) 409 cpf_revoke(oldp_grant); 410 cpf_revoke(name_grant); 411 412 return r; 413 } 414 415 /* 416 * Construct the request message. We have not optimized this flow for 417 * performance. In particular, we never embed even short names in the 418 * message, and we supply a flag indicating whether the caller is root 419 * regardless of whether the callee is interested in this. This is 420 * more convenient for the callee, but also more costly. 421 */ 422 memset(&m, 0, sizeof(m)); 423 424 m.m_type = COMMON_MIB_CALL; 425 m.m_mib_lsys_call.req_id = 0; /* reserved for future async support */ 426 m.m_mib_lsys_call.root_id = node->node_rid; 427 m.m_mib_lsys_call.name_grant = name_grant; 428 m.m_mib_lsys_call.name_len = call->call_namelen; 429 m.m_mib_lsys_call.oldp_grant = oldp_grant; 430 m.m_mib_lsys_call.oldp_len = oldp_len; 431 m.m_mib_lsys_call.newp_grant = newp_grant; 432 m.m_mib_lsys_call.newp_len = newp_len; 433 m.m_mib_lsys_call.user_endpt = call->call_endpt; 434 m.m_mib_lsys_call.flags = !!mib_authed(call); /* TODO: define flags */ 435 m.m_mib_lsys_call.root_ver = node->node_ver; 436 m.m_mib_lsys_call.tree_ver = mib_root.node_ver; 437 438 /* Issue a synchronous call to the remove service. */ 439 r = ipc_sendrec(endpt, &m); 440 441 /* Then first clean up. */ 442 if (GRANT_VALID(newp_grant)) 443 cpf_revoke(newp_grant); 444 if (GRANT_VALID(oldp_grant)) 445 cpf_revoke(oldp_grant); 446 cpf_revoke(name_grant); 447 448 /* 449 * Treat any IPC-level error as an indication that there is a problem 450 * with the remote service. Declare it dead, remove all its mount 451 * points, and return ERESTART to indicate to the caller that it should 452 * (carefully) try to continue the request on a local subtree instead. 453 * Again: mib_down() may actually deallocate the given 'node' object. 454 */ 455 if (r != OK) { 456 mib_down(node->node_eid); 457 458 return ERESTART; 459 } 460 461 if (m.m_type != COMMON_MIB_REPLY) 462 return EINVAL; 463 if (m.m_lsys_mib_reply.req_id != 0) 464 return EINVAL; 465 466 /* 467 * If a deregister message from the service crosses our call, we'll get 468 * the response before we get the deregister request. In that case, 469 * the remote service should return ERESTART to indicate that the mount 470 * point does not exist as far as it is concerned, so that we can try 471 * the local version of the tree instead. 472 */ 473 if (m.m_lsys_mib_reply.status == ERESTART) 474 mib_do_deregister(endpt, node->node_rid); 475 476 return m.m_lsys_mib_reply.status; 477 } 478