1 /* $NetBSD: rumpuser_sp.c,v 1.38 2011/01/28 19:21:28 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * Sysproxy routines. This provides system RPC support over host sockets. 30 * The most notable limitation is that the client and server must share 31 * the same ABI. This does not mean that they have to be the same 32 * machine or that they need to run the same version of the host OS, 33 * just that they must agree on the data structures. This even *might* 34 * work correctly from one hardware architecture to another. 35 */ 36 37 #include <sys/cdefs.h> 38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.38 2011/01/28 19:21:28 pooka Exp $"); 39 40 #include <sys/types.h> 41 #include <sys/atomic.h> 42 #include <sys/mman.h> 43 #include <sys/socket.h> 44 45 #include <arpa/inet.h> 46 #include <netinet/in.h> 47 #include <netinet/tcp.h> 48 49 #include <assert.h> 50 #include <errno.h> 51 #include <fcntl.h> 52 #include <poll.h> 53 #include <pthread.h> 54 #include <stdarg.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 60 #include <rump/rump.h> /* XXX: for rfork flags */ 61 #include <rump/rumpuser.h> 62 #include "rumpuser_int.h" 63 64 #include "sp_common.c" 65 66 #ifndef MAXCLI 67 #define MAXCLI 256 68 #endif 69 #ifndef MAXWORKER 70 #define MAXWORKER 128 71 #endif 72 #ifndef IDLEWORKER 73 #define IDLEWORKER 16 74 #endif 75 int rumpsp_maxworker = MAXWORKER; 76 int rumpsp_idleworker = IDLEWORKER; 77 78 static struct pollfd pfdlist[MAXCLI]; 79 static struct spclient spclist[MAXCLI]; 80 static unsigned int disco; 81 static volatile int spfini; 82 83 static struct rumpuser_sp_ops spops; 84 85 static char banner[MAXBANNER]; 86 87 #define PROTOMAJOR 0 88 #define PROTOMINOR 2 89 90 struct prefork { 91 uint32_t pf_auth[AUTHLEN]; 92 struct lwp *pf_lwp; 93 94 LIST_ENTRY(prefork) pf_entries; /* global list */ 95 LIST_ENTRY(prefork) pf_spcentries; /* linked from forking spc */ 96 }; 97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks); 98 static pthread_mutex_t pfmtx; 99 100 /* 101 * This version is for the server. It's optimized for multiple threads 102 * and is *NOT* reentrant wrt to signals. 103 */ 104 static int 105 waitresp(struct spclient *spc, struct respwait *rw) 106 { 107 int spcstate; 108 int rv = 0; 109 110 pthread_mutex_lock(&spc->spc_mtx); 111 sendunlockl(spc); 112 while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) { 113 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx); 114 } 115 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries); 116 spcstate = spc->spc_state; 117 pthread_mutex_unlock(&spc->spc_mtx); 118 119 pthread_cond_destroy(&rw->rw_cv); 120 121 if (rv) 122 return rv; 123 if (spcstate == SPCSTATE_DYING) 124 return ENOTCONN; 125 return rw->rw_error; 126 } 127 128 /* 129 * Manual wrappers, since librump does not have access to the 130 * user namespace wrapped interfaces. 131 */ 132 133 static void 134 lwproc_switch(struct lwp *l) 135 { 136 137 spops.spop_schedule(); 138 spops.spop_lwproc_switch(l); 139 spops.spop_unschedule(); 140 } 141 142 static void 143 lwproc_release(void) 144 { 145 146 spops.spop_schedule(); 147 spops.spop_lwproc_release(); 148 spops.spop_unschedule(); 149 } 150 151 static int 152 lwproc_rfork(struct spclient *spc, int flags, const char *comm) 153 { 154 int rv; 155 156 spops.spop_schedule(); 157 rv = spops.spop_lwproc_rfork(spc, flags, comm); 158 spops.spop_unschedule(); 159 160 return rv; 161 } 162 163 static int 164 lwproc_newlwp(pid_t pid) 165 { 166 int rv; 167 168 spops.spop_schedule(); 169 rv = spops.spop_lwproc_newlwp(pid); 170 spops.spop_unschedule(); 171 172 return rv; 173 } 174 175 static struct lwp * 176 lwproc_curlwp(void) 177 { 178 struct lwp *l; 179 180 spops.spop_schedule(); 181 l = spops.spop_lwproc_curlwp(); 182 spops.spop_unschedule(); 183 184 return l; 185 } 186 187 static pid_t 188 lwproc_getpid(void) 189 { 190 pid_t p; 191 192 spops.spop_schedule(); 193 p = spops.spop_getpid(); 194 spops.spop_unschedule(); 195 196 return p; 197 } 198 199 static void 200 lwproc_procexit(void) 201 { 202 203 spops.spop_schedule(); 204 spops.spop_procexit(); 205 spops.spop_unschedule(); 206 } 207 208 static int 209 rumpsyscall(int sysnum, void *data, register_t *retval) 210 { 211 int rv; 212 213 spops.spop_schedule(); 214 rv = spops.spop_syscall(sysnum, data, retval); 215 spops.spop_unschedule(); 216 217 return rv; 218 } 219 220 static uint64_t 221 nextreq(struct spclient *spc) 222 { 223 uint64_t nw; 224 225 pthread_mutex_lock(&spc->spc_mtx); 226 nw = spc->spc_nextreq++; 227 pthread_mutex_unlock(&spc->spc_mtx); 228 229 return nw; 230 } 231 232 static void 233 send_error_resp(struct spclient *spc, uint64_t reqno, int error) 234 { 235 struct rsp_hdr rhdr; 236 237 rhdr.rsp_len = sizeof(rhdr); 238 rhdr.rsp_reqno = reqno; 239 rhdr.rsp_class = RUMPSP_ERROR; 240 rhdr.rsp_type = 0; 241 rhdr.rsp_error = error; 242 243 sendlock(spc); 244 (void)dosend(spc, &rhdr, sizeof(rhdr)); 245 sendunlock(spc); 246 } 247 248 static int 249 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error) 250 { 251 struct rsp_hdr rhdr; 252 int rv; 253 254 rhdr.rsp_len = sizeof(rhdr) + sizeof(error); 255 rhdr.rsp_reqno = reqno; 256 rhdr.rsp_class = RUMPSP_RESP; 257 rhdr.rsp_type = RUMPSP_HANDSHAKE; 258 rhdr.rsp_error = 0; 259 260 sendlock(spc); 261 rv = dosend(spc, &rhdr, sizeof(rhdr)); 262 rv = dosend(spc, &error, sizeof(error)); 263 sendunlock(spc); 264 265 return rv; 266 } 267 268 static int 269 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error, 270 register_t *retval) 271 { 272 struct rsp_hdr rhdr; 273 struct rsp_sysresp sysresp; 274 int rv; 275 276 rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp); 277 rhdr.rsp_reqno = reqno; 278 rhdr.rsp_class = RUMPSP_RESP; 279 rhdr.rsp_type = RUMPSP_SYSCALL; 280 rhdr.rsp_sysnum = 0; 281 282 sysresp.rsys_error = error; 283 memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval)); 284 285 sendlock(spc); 286 rv = dosend(spc, &rhdr, sizeof(rhdr)); 287 rv = dosend(spc, &sysresp, sizeof(sysresp)); 288 sendunlock(spc); 289 290 return rv; 291 } 292 293 static int 294 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth) 295 { 296 struct rsp_hdr rhdr; 297 int rv; 298 299 rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth); 300 rhdr.rsp_reqno = reqno; 301 rhdr.rsp_class = RUMPSP_RESP; 302 rhdr.rsp_type = RUMPSP_PREFORK; 303 rhdr.rsp_sysnum = 0; 304 305 sendlock(spc); 306 rv = dosend(spc, &rhdr, sizeof(rhdr)); 307 rv = dosend(spc, auth, AUTHLEN*sizeof(*auth)); 308 sendunlock(spc); 309 310 return rv; 311 } 312 313 static int 314 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen, 315 int wantstr, void **resp) 316 { 317 struct rsp_hdr rhdr; 318 struct rsp_copydata copydata; 319 struct respwait rw; 320 int rv; 321 322 DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr)); 323 324 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata); 325 rhdr.rsp_class = RUMPSP_REQ; 326 if (wantstr) 327 rhdr.rsp_type = RUMPSP_COPYINSTR; 328 else 329 rhdr.rsp_type = RUMPSP_COPYIN; 330 rhdr.rsp_sysnum = 0; 331 332 copydata.rcp_addr = __UNCONST(remaddr); 333 copydata.rcp_len = *dlen; 334 335 putwait(spc, &rw, &rhdr); 336 rv = dosend(spc, &rhdr, sizeof(rhdr)); 337 rv = dosend(spc, ©data, sizeof(copydata)); 338 if (rv) { 339 unputwait(spc, &rw); 340 return rv; 341 } 342 343 rv = waitresp(spc, &rw); 344 345 DPRINTF(("copyin: response %d\n", rv)); 346 347 *resp = rw.rw_data; 348 if (wantstr) 349 *dlen = rw.rw_dlen; 350 351 return rv; 352 353 } 354 355 static int 356 send_copyout_req(struct spclient *spc, const void *remaddr, 357 const void *data, size_t dlen) 358 { 359 struct rsp_hdr rhdr; 360 struct rsp_copydata copydata; 361 int rv; 362 363 DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr)); 364 365 rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen; 366 rhdr.rsp_reqno = nextreq(spc); 367 rhdr.rsp_class = RUMPSP_REQ; 368 rhdr.rsp_type = RUMPSP_COPYOUT; 369 rhdr.rsp_sysnum = 0; 370 371 copydata.rcp_addr = __UNCONST(remaddr); 372 copydata.rcp_len = dlen; 373 374 sendlock(spc); 375 rv = dosend(spc, &rhdr, sizeof(rhdr)); 376 rv = dosend(spc, ©data, sizeof(copydata)); 377 rv = dosend(spc, data, dlen); 378 sendunlock(spc); 379 380 return rv; 381 } 382 383 static int 384 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp) 385 { 386 struct rsp_hdr rhdr; 387 struct respwait rw; 388 int rv; 389 390 DPRINTF(("anonmmap_req: %zu bytes\n", howmuch)); 391 392 rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch); 393 rhdr.rsp_class = RUMPSP_REQ; 394 rhdr.rsp_type = RUMPSP_ANONMMAP; 395 rhdr.rsp_sysnum = 0; 396 397 putwait(spc, &rw, &rhdr); 398 rv = dosend(spc, &rhdr, sizeof(rhdr)); 399 rv = dosend(spc, &howmuch, sizeof(howmuch)); 400 if (rv) { 401 unputwait(spc, &rw); 402 return rv; 403 } 404 405 rv = waitresp(spc, &rw); 406 407 *resp = rw.rw_data; 408 409 DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp)); 410 411 return rv; 412 } 413 414 static int 415 send_raise_req(struct spclient *spc, int signo) 416 { 417 struct rsp_hdr rhdr; 418 int rv; 419 420 rhdr.rsp_len = sizeof(rhdr); 421 rhdr.rsp_class = RUMPSP_REQ; 422 rhdr.rsp_type = RUMPSP_RAISE; 423 rhdr.rsp_signo = signo; 424 425 sendlock(spc); 426 rv = dosend(spc, &rhdr, sizeof(rhdr)); 427 sendunlock(spc); 428 429 return rv; 430 } 431 432 static void 433 spcref(struct spclient *spc) 434 { 435 436 pthread_mutex_lock(&spc->spc_mtx); 437 spc->spc_refcnt++; 438 pthread_mutex_unlock(&spc->spc_mtx); 439 } 440 441 static void 442 spcrelease(struct spclient *spc) 443 { 444 int ref; 445 446 pthread_mutex_lock(&spc->spc_mtx); 447 ref = --spc->spc_refcnt; 448 pthread_mutex_unlock(&spc->spc_mtx); 449 450 if (ref > 0) 451 return; 452 453 DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd)); 454 455 _DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait)); 456 _DIAGASSERT(spc->spc_buf == NULL); 457 458 if (spc->spc_mainlwp) { 459 lwproc_switch(spc->spc_mainlwp); 460 lwproc_release(); 461 } 462 spc->spc_mainlwp = NULL; 463 464 close(spc->spc_fd); 465 spc->spc_fd = -1; 466 spc->spc_state = SPCSTATE_NEW; 467 468 atomic_inc_uint(&disco); 469 } 470 471 static void 472 serv_handledisco(unsigned int idx) 473 { 474 struct spclient *spc = &spclist[idx]; 475 476 DPRINTF(("rump_sp: disconnecting [%u]\n", idx)); 477 478 pfdlist[idx].fd = -1; 479 pfdlist[idx].revents = 0; 480 pthread_mutex_lock(&spc->spc_mtx); 481 spc->spc_state = SPCSTATE_DYING; 482 kickall(spc); 483 sendunlockl(spc); 484 pthread_mutex_unlock(&spc->spc_mtx); 485 486 if (spc->spc_mainlwp) { 487 lwproc_switch(spc->spc_mainlwp); 488 lwproc_procexit(); 489 lwproc_switch(NULL); 490 } 491 492 /* 493 * Nobody's going to attempt to send/receive anymore, 494 * so reinit info relevant to that. 495 */ 496 /*LINTED:pointer casts may be ok*/ 497 memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF); 498 499 spcrelease(spc); 500 } 501 502 static void 503 serv_shutdown(void) 504 { 505 struct spclient *spc; 506 unsigned int i; 507 508 for (i = 1; i < MAXCLI; i++) { 509 spc = &spclist[i]; 510 if (spc->spc_fd == -1) 511 continue; 512 513 shutdown(spc->spc_fd, SHUT_RDWR); 514 serv_handledisco(i); 515 516 spcrelease(spc); 517 } 518 } 519 520 static unsigned 521 serv_handleconn(int fd, connecthook_fn connhook, int busy) 522 { 523 struct sockaddr_storage ss; 524 socklen_t sl = sizeof(ss); 525 int newfd, flags; 526 unsigned i; 527 528 /*LINTED: cast ok */ 529 newfd = accept(fd, (struct sockaddr *)&ss, &sl); 530 if (newfd == -1) 531 return 0; 532 533 if (busy) { 534 close(newfd); /* EBUSY */ 535 return 0; 536 } 537 538 flags = fcntl(newfd, F_GETFL, 0); 539 if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) { 540 close(newfd); 541 return 0; 542 } 543 544 if (connhook(newfd) != 0) { 545 close(newfd); 546 return 0; 547 } 548 549 /* write out a banner for the client */ 550 if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL) 551 != (ssize_t)strlen(banner)) { 552 close(newfd); 553 return 0; 554 } 555 556 /* find empty slot the simple way */ 557 for (i = 0; i < MAXCLI; i++) { 558 if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW) 559 break; 560 } 561 562 assert(i < MAXCLI); 563 564 pfdlist[i].fd = newfd; 565 spclist[i].spc_fd = newfd; 566 spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */ 567 spclist[i].spc_refcnt = 1; 568 569 TAILQ_INIT(&spclist[i].spc_respwait); 570 571 DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i)); 572 573 return i; 574 } 575 576 static void 577 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data) 578 { 579 register_t retval[2] = {0, 0}; 580 int rv, sysnum; 581 582 sysnum = (int)rhdr->rsp_sysnum; 583 DPRINTF(("rump_sp: handling syscall %d from client %d\n", 584 sysnum, spc->spc_pid)); 585 586 lwproc_newlwp(spc->spc_pid); 587 spc->spc_syscallreq = rhdr->rsp_reqno; 588 rv = rumpsyscall(sysnum, data, retval); 589 spc->spc_syscallreq = 0; 590 lwproc_release(); 591 592 DPRINTF(("rump_sp: got return value %d & %d/%d\n", 593 rv, retval[0], retval[1])); 594 595 send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval); 596 } 597 598 struct sysbouncearg { 599 struct spclient *sba_spc; 600 struct rsp_hdr sba_hdr; 601 uint8_t *sba_data; 602 603 TAILQ_ENTRY(sysbouncearg) sba_entries; 604 }; 605 static pthread_mutex_t sbamtx; 606 static pthread_cond_t sbacv; 607 static int nworker, idleworker; 608 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist); 609 610 /*ARGSUSED*/ 611 static void * 612 serv_syscallbouncer(void *arg) 613 { 614 struct sysbouncearg *sba; 615 616 for (;;) { 617 pthread_mutex_lock(&sbamtx); 618 if (idleworker >= rumpsp_idleworker) { 619 nworker--; 620 pthread_mutex_unlock(&sbamtx); 621 break; 622 } 623 idleworker++; 624 while (TAILQ_EMPTY(&syslist)) { 625 pthread_cond_wait(&sbacv, &sbamtx); 626 } 627 628 sba = TAILQ_FIRST(&syslist); 629 TAILQ_REMOVE(&syslist, sba, sba_entries); 630 idleworker--; 631 pthread_mutex_unlock(&sbamtx); 632 633 serv_handlesyscall(sba->sba_spc, 634 &sba->sba_hdr, sba->sba_data); 635 spcrelease(sba->sba_spc); 636 free(sba->sba_data); 637 free(sba); 638 } 639 640 return NULL; 641 } 642 643 static int 644 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr) 645 { 646 struct spclient *spc = arg; 647 void *rdata = NULL; /* XXXuninit */ 648 int rv, nlocks; 649 650 rumpuser__kunlock(0, &nlocks, NULL); 651 652 rv = copyin_req(spc, raddr, len, wantstr, &rdata); 653 if (rv) 654 goto out; 655 656 memcpy(laddr, rdata, *len); 657 free(rdata); 658 659 out: 660 rumpuser__klock(nlocks, NULL); 661 if (rv) 662 return EFAULT; 663 return 0; 664 } 665 666 int 667 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len) 668 { 669 670 return sp_copyin(arg, raddr, laddr, &len, 0); 671 } 672 673 int 674 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len) 675 { 676 677 return sp_copyin(arg, raddr, laddr, len, 1); 678 } 679 680 static int 681 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 682 { 683 struct spclient *spc = arg; 684 int nlocks, rv; 685 686 rumpuser__kunlock(0, &nlocks, NULL); 687 rv = send_copyout_req(spc, raddr, laddr, dlen); 688 rumpuser__klock(nlocks, NULL); 689 690 if (rv) 691 return EFAULT; 692 return 0; 693 } 694 695 int 696 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen) 697 { 698 699 return sp_copyout(arg, laddr, raddr, dlen); 700 } 701 702 int 703 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen) 704 { 705 706 return sp_copyout(arg, laddr, raddr, *dlen); 707 } 708 709 int 710 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr) 711 { 712 struct spclient *spc = arg; 713 void *resp, *rdata; 714 int nlocks, rv; 715 716 rumpuser__kunlock(0, &nlocks, NULL); 717 718 rv = anonmmap_req(spc, howmuch, &rdata); 719 if (rv) { 720 rv = EFAULT; 721 goto out; 722 } 723 724 resp = *(void **)rdata; 725 free(rdata); 726 727 if (resp == NULL) { 728 rv = ENOMEM; 729 } 730 731 *addr = resp; 732 733 out: 734 rumpuser__klock(nlocks, NULL); 735 736 if (rv) 737 return rv; 738 return 0; 739 } 740 741 int 742 rumpuser_sp_raise(void *arg, int signo) 743 { 744 struct spclient *spc = arg; 745 int rv, nlocks; 746 747 rumpuser__kunlock(0, &nlocks, NULL); 748 rv = send_raise_req(spc, signo); 749 rumpuser__klock(nlocks, NULL); 750 751 return rv; 752 } 753 754 /* 755 * 756 * Startup routines and mainloop for server. 757 * 758 */ 759 760 struct spservarg { 761 int sps_sock; 762 connecthook_fn sps_connhook; 763 }; 764 765 static pthread_attr_t pattr_detached; 766 static void 767 handlereq(struct spclient *spc) 768 { 769 struct sysbouncearg *sba; 770 pthread_t pt; 771 int retries, error, i; 772 773 if (__predict_false(spc->spc_state == SPCSTATE_NEW)) { 774 if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) { 775 send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAUTH); 776 shutdown(spc->spc_fd, SHUT_RDWR); 777 spcfreebuf(spc); 778 return; 779 } 780 781 if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) { 782 char *comm = (char *)spc->spc_buf; 783 size_t commlen = spc->spc_hdr.rsp_len - HDRSZ; 784 785 /* ensure it's 0-terminated */ 786 /* XXX make sure it contains sensible chars? */ 787 comm[commlen] = '\0'; 788 789 if ((error = lwproc_rfork(spc, 790 RUMP_RFCFDG, comm)) != 0) { 791 shutdown(spc->spc_fd, SHUT_RDWR); 792 } 793 794 spcfreebuf(spc); 795 if (error) 796 return; 797 798 spc->spc_mainlwp = lwproc_curlwp(); 799 800 send_handshake_resp(spc, spc->spc_hdr.rsp_reqno, 0); 801 } else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) { 802 struct lwp *tmpmain; 803 struct prefork *pf; 804 struct handshake_fork *rfp; 805 uint64_t reqno; 806 int cancel; 807 808 reqno = spc->spc_hdr.rsp_reqno; 809 if (spc->spc_off-HDRSZ != sizeof(*rfp)) { 810 send_error_resp(spc, reqno, EINVAL); 811 shutdown(spc->spc_fd, SHUT_RDWR); 812 spcfreebuf(spc); 813 return; 814 } 815 816 /*LINTED*/ 817 rfp = (void *)spc->spc_buf; 818 cancel = rfp->rf_cancel; 819 820 pthread_mutex_lock(&pfmtx); 821 LIST_FOREACH(pf, &preforks, pf_entries) { 822 if (memcmp(rfp->rf_auth, pf->pf_auth, 823 sizeof(rfp->rf_auth)) == 0) { 824 LIST_REMOVE(pf, pf_entries); 825 LIST_REMOVE(pf, pf_spcentries); 826 break; 827 } 828 } 829 pthread_mutex_lock(&pfmtx); 830 spcfreebuf(spc); 831 832 if (!pf) { 833 send_error_resp(spc, reqno, ESRCH); 834 shutdown(spc->spc_fd, SHUT_RDWR); 835 return; 836 } 837 838 tmpmain = pf->pf_lwp; 839 free(pf); 840 lwproc_switch(tmpmain); 841 if (cancel) { 842 lwproc_release(); 843 shutdown(spc->spc_fd, SHUT_RDWR); 844 return; 845 } 846 847 /* 848 * So, we forked already during "prefork" to save 849 * the file descriptors from a parent exit 850 * race condition. But now we need to fork 851 * a second time since the initial fork has 852 * the wrong spc pointer. (yea, optimize 853 * interfaces some day if anyone cares) 854 */ 855 if ((error = lwproc_rfork(spc, 0, NULL)) != 0) { 856 send_error_resp(spc, reqno, error); 857 shutdown(spc->spc_fd, SHUT_RDWR); 858 lwproc_release(); 859 return; 860 } 861 spc->spc_mainlwp = lwproc_curlwp(); 862 lwproc_switch(tmpmain); 863 lwproc_release(); 864 lwproc_switch(spc->spc_mainlwp); 865 866 send_handshake_resp(spc, reqno, 0); 867 } 868 869 spc->spc_pid = lwproc_getpid(); 870 871 DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n", 872 spc, spc->spc_pid)); 873 874 lwproc_switch(NULL); 875 spc->spc_state = SPCSTATE_RUNNING; 876 return; 877 } 878 879 if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) { 880 struct prefork *pf; 881 uint64_t reqno; 882 uint32_t auth[AUTHLEN]; 883 884 DPRINTF(("rump_sp: prefork handler executing for %p\n", spc)); 885 reqno = spc->spc_hdr.rsp_reqno; 886 spcfreebuf(spc); 887 888 pf = malloc(sizeof(*pf)); 889 if (pf == NULL) { 890 send_error_resp(spc, reqno, ENOMEM); 891 return; 892 } 893 894 /* 895 * Use client main lwp to fork. this is never used by 896 * worker threads (except if spc refcount goes to 0), 897 * so we can safely use it here. 898 */ 899 lwproc_switch(spc->spc_mainlwp); 900 if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) { 901 DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc)); 902 send_error_resp(spc, reqno, error); 903 lwproc_switch(NULL); 904 free(pf); 905 return; 906 } 907 908 /* Ok, we have a new process context and a new curlwp */ 909 for (i = 0; i < AUTHLEN; i++) { 910 pf->pf_auth[i] = auth[i] = arc4random(); 911 } 912 pf->pf_lwp = lwproc_curlwp(); 913 lwproc_switch(NULL); 914 915 pthread_mutex_lock(&pfmtx); 916 LIST_INSERT_HEAD(&preforks, pf, pf_entries); 917 LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries); 918 pthread_mutex_unlock(&pfmtx); 919 920 DPRINTF(("rump_sp: prefork handler success %p\n", spc)); 921 922 send_prefork_resp(spc, reqno, auth); 923 return; 924 } 925 926 if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) { 927 send_error_resp(spc, spc->spc_hdr.rsp_reqno, EINVAL); 928 spcfreebuf(spc); 929 return; 930 } 931 932 retries = 0; 933 while ((sba = malloc(sizeof(*sba))) == NULL) { 934 if (nworker == 0 || retries > 10) { 935 send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAGAIN); 936 spcfreebuf(spc); 937 return; 938 } 939 /* slim chance of more memory? */ 940 usleep(10000); 941 } 942 943 sba->sba_spc = spc; 944 sba->sba_hdr = spc->spc_hdr; 945 sba->sba_data = spc->spc_buf; 946 spcresetbuf(spc); 947 948 spcref(spc); 949 950 pthread_mutex_lock(&sbamtx); 951 TAILQ_INSERT_TAIL(&syslist, sba, sba_entries); 952 if (idleworker > 0) { 953 /* do we have a daemon's tool (i.e. idle threads)? */ 954 pthread_cond_signal(&sbacv); 955 } else if (nworker < rumpsp_maxworker) { 956 /* 957 * Else, need to create one 958 * (if we can, otherwise just expect another 959 * worker to pick up the syscall) 960 */ 961 if (pthread_create(&pt, &pattr_detached, 962 serv_syscallbouncer, NULL) == 0) 963 nworker++; 964 } 965 pthread_mutex_unlock(&sbamtx); 966 } 967 968 static void * 969 spserver(void *arg) 970 { 971 struct spservarg *sarg = arg; 972 struct spclient *spc; 973 unsigned idx; 974 int seen; 975 int rv; 976 unsigned int nfds, maxidx; 977 978 for (idx = 0; idx < MAXCLI; idx++) { 979 pfdlist[idx].fd = -1; 980 pfdlist[idx].events = POLLIN; 981 982 spc = &spclist[idx]; 983 pthread_mutex_init(&spc->spc_mtx, NULL); 984 pthread_cond_init(&spc->spc_cv, NULL); 985 spc->spc_fd = -1; 986 } 987 pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock; 988 pfdlist[0].events = POLLIN; 989 nfds = 1; 990 maxidx = 0; 991 992 pthread_attr_init(&pattr_detached); 993 pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED); 994 /* XXX: doesn't stacksize currently work on NetBSD */ 995 pthread_attr_setstacksize(&pattr_detached, 32*1024); 996 997 pthread_mutex_init(&sbamtx, NULL); 998 pthread_cond_init(&sbacv, NULL); 999 1000 DPRINTF(("rump_sp: server mainloop\n")); 1001 1002 for (;;) { 1003 int discoed; 1004 1005 /* g/c hangarounds (eventually) */ 1006 discoed = atomic_swap_uint(&disco, 0); 1007 while (discoed--) { 1008 nfds--; 1009 idx = maxidx; 1010 while (idx) { 1011 if (pfdlist[idx].fd != -1) { 1012 maxidx = idx; 1013 break; 1014 } 1015 idx--; 1016 } 1017 DPRINTF(("rump_sp: set maxidx to [%u]\n", 1018 maxidx)); 1019 } 1020 1021 DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1)); 1022 seen = 0; 1023 rv = poll(pfdlist, maxidx+1, INFTIM); 1024 assert(maxidx+1 <= MAXCLI); 1025 assert(rv != 0); 1026 if (rv == -1) { 1027 if (errno == EINTR) 1028 continue; 1029 fprintf(stderr, "rump_spserver: poll returned %d\n", 1030 errno); 1031 break; 1032 } 1033 1034 for (idx = 0; seen < rv && idx < MAXCLI; idx++) { 1035 if ((pfdlist[idx].revents & POLLIN) == 0) 1036 continue; 1037 1038 seen++; 1039 DPRINTF(("rump_sp: activity at [%u] %d/%d\n", 1040 idx, seen, rv)); 1041 if (idx > 0) { 1042 spc = &spclist[idx]; 1043 DPRINTF(("rump_sp: mainloop read [%u]\n", idx)); 1044 switch (readframe(spc)) { 1045 case 0: 1046 break; 1047 case -1: 1048 serv_handledisco(idx); 1049 break; 1050 default: 1051 switch (spc->spc_hdr.rsp_class) { 1052 case RUMPSP_RESP: 1053 kickwaiter(spc); 1054 break; 1055 case RUMPSP_REQ: 1056 handlereq(spc); 1057 break; 1058 default: 1059 send_error_resp(spc, 1060 spc->spc_hdr.rsp_reqno, 1061 ENOENT); 1062 spcfreebuf(spc); 1063 break; 1064 } 1065 break; 1066 } 1067 1068 } else { 1069 DPRINTF(("rump_sp: mainloop new connection\n")); 1070 1071 if (__predict_false(spfini)) { 1072 close(spclist[0].spc_fd); 1073 serv_shutdown(); 1074 goto out; 1075 } 1076 1077 idx = serv_handleconn(pfdlist[0].fd, 1078 sarg->sps_connhook, nfds == MAXCLI); 1079 if (idx) 1080 nfds++; 1081 if (idx > maxidx) 1082 maxidx = idx; 1083 DPRINTF(("rump_sp: maxid now %d\n", maxidx)); 1084 } 1085 } 1086 } 1087 1088 out: 1089 return NULL; 1090 } 1091 1092 static unsigned cleanupidx; 1093 static struct sockaddr *cleanupsa; 1094 int 1095 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp, 1096 const char *ostype, const char *osrelease, const char *machine) 1097 { 1098 pthread_t pt; 1099 struct spservarg *sarg; 1100 struct sockaddr *sap; 1101 char *p; 1102 unsigned idx; 1103 int error, s; 1104 1105 p = strdup(url); 1106 if (p == NULL) 1107 return ENOMEM; 1108 error = parseurl(p, &sap, &idx, 1); 1109 free(p); 1110 if (error) 1111 return error; 1112 1113 snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n", 1114 PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine); 1115 1116 s = socket(parsetab[idx].domain, SOCK_STREAM, 0); 1117 if (s == -1) 1118 return errno; 1119 1120 spops = *spopsp; 1121 sarg = malloc(sizeof(*sarg)); 1122 if (sarg == NULL) { 1123 close(s); 1124 return ENOMEM; 1125 } 1126 1127 sarg->sps_sock = s; 1128 sarg->sps_connhook = parsetab[idx].connhook; 1129 1130 cleanupidx = idx; 1131 cleanupsa = sap; 1132 1133 /* sloppy error recovery */ 1134 1135 /*LINTED*/ 1136 if (bind(s, sap, sap->sa_len) == -1) { 1137 fprintf(stderr, "rump_sp: server bind failed\n"); 1138 return errno; 1139 } 1140 1141 if (listen(s, MAXCLI) == -1) { 1142 fprintf(stderr, "rump_sp: server listen failed\n"); 1143 return errno; 1144 } 1145 1146 if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) { 1147 fprintf(stderr, "rump_sp: cannot create wrkr thread\n"); 1148 return errno; 1149 } 1150 pthread_detach(pt); 1151 1152 return 0; 1153 } 1154 1155 void 1156 rumpuser_sp_fini(void *arg) 1157 { 1158 struct spclient *spc = arg; 1159 register_t retval[2] = {0, 0}; 1160 1161 /* 1162 * stuff response into the socket, since this process is just 1163 * about to exit 1164 */ 1165 if (spc && spc->spc_syscallreq) 1166 send_syscall_resp(spc, spc->spc_syscallreq, 0, retval); 1167 1168 if (spclist[0].spc_fd) { 1169 parsetab[cleanupidx].cleanup(cleanupsa); 1170 shutdown(spclist[0].spc_fd, SHUT_RDWR); 1171 spfini = 1; 1172 } 1173 } 1174