xref: /netbsd/lib/librumpuser/rumpuser_sp.c (revision 6550d01e)
1 /*      $NetBSD: rumpuser_sp.c,v 1.38 2011/01/28 19:21:28 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Sysproxy routines.  This provides system RPC support over host sockets.
30  * The most notable limitation is that the client and server must share
31  * the same ABI.  This does not mean that they have to be the same
32  * machine or that they need to run the same version of the host OS,
33  * just that they must agree on the data structures.  This even *might*
34  * work correctly from one hardware architecture to another.
35  */
36 
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: rumpuser_sp.c,v 1.38 2011/01/28 19:21:28 pooka Exp $");
39 
40 #include <sys/types.h>
41 #include <sys/atomic.h>
42 #include <sys/mman.h>
43 #include <sys/socket.h>
44 
45 #include <arpa/inet.h>
46 #include <netinet/in.h>
47 #include <netinet/tcp.h>
48 
49 #include <assert.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <poll.h>
53 #include <pthread.h>
54 #include <stdarg.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59 
60 #include <rump/rump.h> /* XXX: for rfork flags */
61 #include <rump/rumpuser.h>
62 #include "rumpuser_int.h"
63 
64 #include "sp_common.c"
65 
66 #ifndef MAXCLI
67 #define MAXCLI 256
68 #endif
69 #ifndef MAXWORKER
70 #define MAXWORKER 128
71 #endif
72 #ifndef IDLEWORKER
73 #define IDLEWORKER 16
74 #endif
75 int rumpsp_maxworker = MAXWORKER;
76 int rumpsp_idleworker = IDLEWORKER;
77 
78 static struct pollfd pfdlist[MAXCLI];
79 static struct spclient spclist[MAXCLI];
80 static unsigned int disco;
81 static volatile int spfini;
82 
83 static struct rumpuser_sp_ops spops;
84 
85 static char banner[MAXBANNER];
86 
87 #define PROTOMAJOR 0
88 #define PROTOMINOR 2
89 
90 struct prefork {
91 	uint32_t pf_auth[AUTHLEN];
92 	struct lwp *pf_lwp;
93 
94 	LIST_ENTRY(prefork) pf_entries;		/* global list */
95 	LIST_ENTRY(prefork) pf_spcentries;	/* linked from forking spc */
96 };
97 static LIST_HEAD(, prefork) preforks = LIST_HEAD_INITIALIZER(preforks);
98 static pthread_mutex_t pfmtx;
99 
100 /*
101  * This version is for the server.  It's optimized for multiple threads
102  * and is *NOT* reentrant wrt to signals.
103  */
104 static int
105 waitresp(struct spclient *spc, struct respwait *rw)
106 {
107 	int spcstate;
108 	int rv = 0;
109 
110 	pthread_mutex_lock(&spc->spc_mtx);
111 	sendunlockl(spc);
112 	while (!rw->rw_done && spc->spc_state != SPCSTATE_DYING) {
113 		pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
114 	}
115 	TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
116 	spcstate = spc->spc_state;
117 	pthread_mutex_unlock(&spc->spc_mtx);
118 
119 	pthread_cond_destroy(&rw->rw_cv);
120 
121 	if (rv)
122 		return rv;
123 	if (spcstate == SPCSTATE_DYING)
124 		return ENOTCONN;
125 	return rw->rw_error;
126 }
127 
128 /*
129  * Manual wrappers, since librump does not have access to the
130  * user namespace wrapped interfaces.
131  */
132 
133 static void
134 lwproc_switch(struct lwp *l)
135 {
136 
137 	spops.spop_schedule();
138 	spops.spop_lwproc_switch(l);
139 	spops.spop_unschedule();
140 }
141 
142 static void
143 lwproc_release(void)
144 {
145 
146 	spops.spop_schedule();
147 	spops.spop_lwproc_release();
148 	spops.spop_unschedule();
149 }
150 
151 static int
152 lwproc_rfork(struct spclient *spc, int flags, const char *comm)
153 {
154 	int rv;
155 
156 	spops.spop_schedule();
157 	rv = spops.spop_lwproc_rfork(spc, flags, comm);
158 	spops.spop_unschedule();
159 
160 	return rv;
161 }
162 
163 static int
164 lwproc_newlwp(pid_t pid)
165 {
166 	int rv;
167 
168 	spops.spop_schedule();
169 	rv = spops.spop_lwproc_newlwp(pid);
170 	spops.spop_unschedule();
171 
172 	return rv;
173 }
174 
175 static struct lwp *
176 lwproc_curlwp(void)
177 {
178 	struct lwp *l;
179 
180 	spops.spop_schedule();
181 	l = spops.spop_lwproc_curlwp();
182 	spops.spop_unschedule();
183 
184 	return l;
185 }
186 
187 static pid_t
188 lwproc_getpid(void)
189 {
190 	pid_t p;
191 
192 	spops.spop_schedule();
193 	p = spops.spop_getpid();
194 	spops.spop_unschedule();
195 
196 	return p;
197 }
198 
199 static void
200 lwproc_procexit(void)
201 {
202 
203 	spops.spop_schedule();
204 	spops.spop_procexit();
205 	spops.spop_unschedule();
206 }
207 
208 static int
209 rumpsyscall(int sysnum, void *data, register_t *retval)
210 {
211 	int rv;
212 
213 	spops.spop_schedule();
214 	rv = spops.spop_syscall(sysnum, data, retval);
215 	spops.spop_unschedule();
216 
217 	return rv;
218 }
219 
220 static uint64_t
221 nextreq(struct spclient *spc)
222 {
223 	uint64_t nw;
224 
225 	pthread_mutex_lock(&spc->spc_mtx);
226 	nw = spc->spc_nextreq++;
227 	pthread_mutex_unlock(&spc->spc_mtx);
228 
229 	return nw;
230 }
231 
232 static void
233 send_error_resp(struct spclient *spc, uint64_t reqno, int error)
234 {
235 	struct rsp_hdr rhdr;
236 
237 	rhdr.rsp_len = sizeof(rhdr);
238 	rhdr.rsp_reqno = reqno;
239 	rhdr.rsp_class = RUMPSP_ERROR;
240 	rhdr.rsp_type = 0;
241 	rhdr.rsp_error = error;
242 
243 	sendlock(spc);
244 	(void)dosend(spc, &rhdr, sizeof(rhdr));
245 	sendunlock(spc);
246 }
247 
248 static int
249 send_handshake_resp(struct spclient *spc, uint64_t reqno, int error)
250 {
251 	struct rsp_hdr rhdr;
252 	int rv;
253 
254 	rhdr.rsp_len = sizeof(rhdr) + sizeof(error);
255 	rhdr.rsp_reqno = reqno;
256 	rhdr.rsp_class = RUMPSP_RESP;
257 	rhdr.rsp_type = RUMPSP_HANDSHAKE;
258 	rhdr.rsp_error = 0;
259 
260 	sendlock(spc);
261 	rv = dosend(spc, &rhdr, sizeof(rhdr));
262 	rv = dosend(spc, &error, sizeof(error));
263 	sendunlock(spc);
264 
265 	return rv;
266 }
267 
268 static int
269 send_syscall_resp(struct spclient *spc, uint64_t reqno, int error,
270 	register_t *retval)
271 {
272 	struct rsp_hdr rhdr;
273 	struct rsp_sysresp sysresp;
274 	int rv;
275 
276 	rhdr.rsp_len = sizeof(rhdr) + sizeof(sysresp);
277 	rhdr.rsp_reqno = reqno;
278 	rhdr.rsp_class = RUMPSP_RESP;
279 	rhdr.rsp_type = RUMPSP_SYSCALL;
280 	rhdr.rsp_sysnum = 0;
281 
282 	sysresp.rsys_error = error;
283 	memcpy(sysresp.rsys_retval, retval, sizeof(sysresp.rsys_retval));
284 
285 	sendlock(spc);
286 	rv = dosend(spc, &rhdr, sizeof(rhdr));
287 	rv = dosend(spc, &sysresp, sizeof(sysresp));
288 	sendunlock(spc);
289 
290 	return rv;
291 }
292 
293 static int
294 send_prefork_resp(struct spclient *spc, uint64_t reqno, uint32_t *auth)
295 {
296 	struct rsp_hdr rhdr;
297 	int rv;
298 
299 	rhdr.rsp_len = sizeof(rhdr) + AUTHLEN*sizeof(*auth);
300 	rhdr.rsp_reqno = reqno;
301 	rhdr.rsp_class = RUMPSP_RESP;
302 	rhdr.rsp_type = RUMPSP_PREFORK;
303 	rhdr.rsp_sysnum = 0;
304 
305 	sendlock(spc);
306 	rv = dosend(spc, &rhdr, sizeof(rhdr));
307 	rv = dosend(spc, auth, AUTHLEN*sizeof(*auth));
308 	sendunlock(spc);
309 
310 	return rv;
311 }
312 
313 static int
314 copyin_req(struct spclient *spc, const void *remaddr, size_t *dlen,
315 	int wantstr, void **resp)
316 {
317 	struct rsp_hdr rhdr;
318 	struct rsp_copydata copydata;
319 	struct respwait rw;
320 	int rv;
321 
322 	DPRINTF(("copyin_req: %zu bytes from %p\n", *dlen, remaddr));
323 
324 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata);
325 	rhdr.rsp_class = RUMPSP_REQ;
326 	if (wantstr)
327 		rhdr.rsp_type = RUMPSP_COPYINSTR;
328 	else
329 		rhdr.rsp_type = RUMPSP_COPYIN;
330 	rhdr.rsp_sysnum = 0;
331 
332 	copydata.rcp_addr = __UNCONST(remaddr);
333 	copydata.rcp_len = *dlen;
334 
335 	putwait(spc, &rw, &rhdr);
336 	rv = dosend(spc, &rhdr, sizeof(rhdr));
337 	rv = dosend(spc, &copydata, sizeof(copydata));
338 	if (rv) {
339 		unputwait(spc, &rw);
340 		return rv;
341 	}
342 
343 	rv = waitresp(spc, &rw);
344 
345 	DPRINTF(("copyin: response %d\n", rv));
346 
347 	*resp = rw.rw_data;
348 	if (wantstr)
349 		*dlen = rw.rw_dlen;
350 
351 	return rv;
352 
353 }
354 
355 static int
356 send_copyout_req(struct spclient *spc, const void *remaddr,
357 	const void *data, size_t dlen)
358 {
359 	struct rsp_hdr rhdr;
360 	struct rsp_copydata copydata;
361 	int rv;
362 
363 	DPRINTF(("copyout_req (async): %zu bytes to %p\n", dlen, remaddr));
364 
365 	rhdr.rsp_len = sizeof(rhdr) + sizeof(copydata) + dlen;
366 	rhdr.rsp_reqno = nextreq(spc);
367 	rhdr.rsp_class = RUMPSP_REQ;
368 	rhdr.rsp_type = RUMPSP_COPYOUT;
369 	rhdr.rsp_sysnum = 0;
370 
371 	copydata.rcp_addr = __UNCONST(remaddr);
372 	copydata.rcp_len = dlen;
373 
374 	sendlock(spc);
375 	rv = dosend(spc, &rhdr, sizeof(rhdr));
376 	rv = dosend(spc, &copydata, sizeof(copydata));
377 	rv = dosend(spc, data, dlen);
378 	sendunlock(spc);
379 
380 	return rv;
381 }
382 
383 static int
384 anonmmap_req(struct spclient *spc, size_t howmuch, void **resp)
385 {
386 	struct rsp_hdr rhdr;
387 	struct respwait rw;
388 	int rv;
389 
390 	DPRINTF(("anonmmap_req: %zu bytes\n", howmuch));
391 
392 	rhdr.rsp_len = sizeof(rhdr) + sizeof(howmuch);
393 	rhdr.rsp_class = RUMPSP_REQ;
394 	rhdr.rsp_type = RUMPSP_ANONMMAP;
395 	rhdr.rsp_sysnum = 0;
396 
397 	putwait(spc, &rw, &rhdr);
398 	rv = dosend(spc, &rhdr, sizeof(rhdr));
399 	rv = dosend(spc, &howmuch, sizeof(howmuch));
400 	if (rv) {
401 		unputwait(spc, &rw);
402 		return rv;
403 	}
404 
405 	rv = waitresp(spc, &rw);
406 
407 	*resp = rw.rw_data;
408 
409 	DPRINTF(("anonmmap: mapped at %p\n", **(void ***)resp));
410 
411 	return rv;
412 }
413 
414 static int
415 send_raise_req(struct spclient *spc, int signo)
416 {
417 	struct rsp_hdr rhdr;
418 	int rv;
419 
420 	rhdr.rsp_len = sizeof(rhdr);
421 	rhdr.rsp_class = RUMPSP_REQ;
422 	rhdr.rsp_type = RUMPSP_RAISE;
423 	rhdr.rsp_signo = signo;
424 
425 	sendlock(spc);
426 	rv = dosend(spc, &rhdr, sizeof(rhdr));
427 	sendunlock(spc);
428 
429 	return rv;
430 }
431 
432 static void
433 spcref(struct spclient *spc)
434 {
435 
436 	pthread_mutex_lock(&spc->spc_mtx);
437 	spc->spc_refcnt++;
438 	pthread_mutex_unlock(&spc->spc_mtx);
439 }
440 
441 static void
442 spcrelease(struct spclient *spc)
443 {
444 	int ref;
445 
446 	pthread_mutex_lock(&spc->spc_mtx);
447 	ref = --spc->spc_refcnt;
448 	pthread_mutex_unlock(&spc->spc_mtx);
449 
450 	if (ref > 0)
451 		return;
452 
453 	DPRINTF(("rump_sp: spcrelease: spc %p fd %d\n", spc, spc->spc_fd));
454 
455 	_DIAGASSERT(TAILQ_EMPTY(&spc->spc_respwait));
456 	_DIAGASSERT(spc->spc_buf == NULL);
457 
458 	if (spc->spc_mainlwp) {
459 		lwproc_switch(spc->spc_mainlwp);
460 		lwproc_release();
461 	}
462 	spc->spc_mainlwp = NULL;
463 
464 	close(spc->spc_fd);
465 	spc->spc_fd = -1;
466 	spc->spc_state = SPCSTATE_NEW;
467 
468 	atomic_inc_uint(&disco);
469 }
470 
471 static void
472 serv_handledisco(unsigned int idx)
473 {
474 	struct spclient *spc = &spclist[idx];
475 
476 	DPRINTF(("rump_sp: disconnecting [%u]\n", idx));
477 
478 	pfdlist[idx].fd = -1;
479 	pfdlist[idx].revents = 0;
480 	pthread_mutex_lock(&spc->spc_mtx);
481 	spc->spc_state = SPCSTATE_DYING;
482 	kickall(spc);
483 	sendunlockl(spc);
484 	pthread_mutex_unlock(&spc->spc_mtx);
485 
486 	if (spc->spc_mainlwp) {
487 		lwproc_switch(spc->spc_mainlwp);
488 		lwproc_procexit();
489 		lwproc_switch(NULL);
490 	}
491 
492 	/*
493 	 * Nobody's going to attempt to send/receive anymore,
494 	 * so reinit info relevant to that.
495 	 */
496 	/*LINTED:pointer casts may be ok*/
497 	memset((char *)spc + SPC_ZEROFF, 0, sizeof(*spc) - SPC_ZEROFF);
498 
499 	spcrelease(spc);
500 }
501 
502 static void
503 serv_shutdown(void)
504 {
505 	struct spclient *spc;
506 	unsigned int i;
507 
508 	for (i = 1; i < MAXCLI; i++) {
509 		spc = &spclist[i];
510 		if (spc->spc_fd == -1)
511 			continue;
512 
513 		shutdown(spc->spc_fd, SHUT_RDWR);
514 		serv_handledisco(i);
515 
516 		spcrelease(spc);
517 	}
518 }
519 
520 static unsigned
521 serv_handleconn(int fd, connecthook_fn connhook, int busy)
522 {
523 	struct sockaddr_storage ss;
524 	socklen_t sl = sizeof(ss);
525 	int newfd, flags;
526 	unsigned i;
527 
528 	/*LINTED: cast ok */
529 	newfd = accept(fd, (struct sockaddr *)&ss, &sl);
530 	if (newfd == -1)
531 		return 0;
532 
533 	if (busy) {
534 		close(newfd); /* EBUSY */
535 		return 0;
536 	}
537 
538 	flags = fcntl(newfd, F_GETFL, 0);
539 	if (fcntl(newfd, F_SETFL, flags | O_NONBLOCK) == -1) {
540 		close(newfd);
541 		return 0;
542 	}
543 
544 	if (connhook(newfd) != 0) {
545 		close(newfd);
546 		return 0;
547 	}
548 
549 	/* write out a banner for the client */
550 	if (send(newfd, banner, strlen(banner), MSG_NOSIGNAL)
551 	    != (ssize_t)strlen(banner)) {
552 		close(newfd);
553 		return 0;
554 	}
555 
556 	/* find empty slot the simple way */
557 	for (i = 0; i < MAXCLI; i++) {
558 		if (pfdlist[i].fd == -1 && spclist[i].spc_state == SPCSTATE_NEW)
559 			break;
560 	}
561 
562 	assert(i < MAXCLI);
563 
564 	pfdlist[i].fd = newfd;
565 	spclist[i].spc_fd = newfd;
566 	spclist[i].spc_istatus = SPCSTATUS_BUSY; /* dedicated receiver */
567 	spclist[i].spc_refcnt = 1;
568 
569 	TAILQ_INIT(&spclist[i].spc_respwait);
570 
571 	DPRINTF(("rump_sp: added new connection fd %d at idx %u\n", newfd, i));
572 
573 	return i;
574 }
575 
576 static void
577 serv_handlesyscall(struct spclient *spc, struct rsp_hdr *rhdr, uint8_t *data)
578 {
579 	register_t retval[2] = {0, 0};
580 	int rv, sysnum;
581 
582 	sysnum = (int)rhdr->rsp_sysnum;
583 	DPRINTF(("rump_sp: handling syscall %d from client %d\n",
584 	    sysnum, spc->spc_pid));
585 
586 	lwproc_newlwp(spc->spc_pid);
587 	spc->spc_syscallreq = rhdr->rsp_reqno;
588 	rv = rumpsyscall(sysnum, data, retval);
589 	spc->spc_syscallreq = 0;
590 	lwproc_release();
591 
592 	DPRINTF(("rump_sp: got return value %d & %d/%d\n",
593 	    rv, retval[0], retval[1]));
594 
595 	send_syscall_resp(spc, rhdr->rsp_reqno, rv, retval);
596 }
597 
598 struct sysbouncearg {
599 	struct spclient *sba_spc;
600 	struct rsp_hdr sba_hdr;
601 	uint8_t *sba_data;
602 
603 	TAILQ_ENTRY(sysbouncearg) sba_entries;
604 };
605 static pthread_mutex_t sbamtx;
606 static pthread_cond_t sbacv;
607 static int nworker, idleworker;
608 static TAILQ_HEAD(, sysbouncearg) syslist = TAILQ_HEAD_INITIALIZER(syslist);
609 
610 /*ARGSUSED*/
611 static void *
612 serv_syscallbouncer(void *arg)
613 {
614 	struct sysbouncearg *sba;
615 
616 	for (;;) {
617 		pthread_mutex_lock(&sbamtx);
618 		if (idleworker >= rumpsp_idleworker) {
619 			nworker--;
620 			pthread_mutex_unlock(&sbamtx);
621 			break;
622 		}
623 		idleworker++;
624 		while (TAILQ_EMPTY(&syslist)) {
625 			pthread_cond_wait(&sbacv, &sbamtx);
626 		}
627 
628 		sba = TAILQ_FIRST(&syslist);
629 		TAILQ_REMOVE(&syslist, sba, sba_entries);
630 		idleworker--;
631 		pthread_mutex_unlock(&sbamtx);
632 
633 		serv_handlesyscall(sba->sba_spc,
634 		    &sba->sba_hdr, sba->sba_data);
635 		spcrelease(sba->sba_spc);
636 		free(sba->sba_data);
637 		free(sba);
638 	}
639 
640 	return NULL;
641 }
642 
643 static int
644 sp_copyin(void *arg, const void *raddr, void *laddr, size_t *len, int wantstr)
645 {
646 	struct spclient *spc = arg;
647 	void *rdata = NULL; /* XXXuninit */
648 	int rv, nlocks;
649 
650 	rumpuser__kunlock(0, &nlocks, NULL);
651 
652 	rv = copyin_req(spc, raddr, len, wantstr, &rdata);
653 	if (rv)
654 		goto out;
655 
656 	memcpy(laddr, rdata, *len);
657 	free(rdata);
658 
659  out:
660 	rumpuser__klock(nlocks, NULL);
661 	if (rv)
662 		return EFAULT;
663 	return 0;
664 }
665 
666 int
667 rumpuser_sp_copyin(void *arg, const void *raddr, void *laddr, size_t len)
668 {
669 
670 	return sp_copyin(arg, raddr, laddr, &len, 0);
671 }
672 
673 int
674 rumpuser_sp_copyinstr(void *arg, const void *raddr, void *laddr, size_t *len)
675 {
676 
677 	return sp_copyin(arg, raddr, laddr, len, 1);
678 }
679 
680 static int
681 sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
682 {
683 	struct spclient *spc = arg;
684 	int nlocks, rv;
685 
686 	rumpuser__kunlock(0, &nlocks, NULL);
687 	rv = send_copyout_req(spc, raddr, laddr, dlen);
688 	rumpuser__klock(nlocks, NULL);
689 
690 	if (rv)
691 		return EFAULT;
692 	return 0;
693 }
694 
695 int
696 rumpuser_sp_copyout(void *arg, const void *laddr, void *raddr, size_t dlen)
697 {
698 
699 	return sp_copyout(arg, laddr, raddr, dlen);
700 }
701 
702 int
703 rumpuser_sp_copyoutstr(void *arg, const void *laddr, void *raddr, size_t *dlen)
704 {
705 
706 	return sp_copyout(arg, laddr, raddr, *dlen);
707 }
708 
709 int
710 rumpuser_sp_anonmmap(void *arg, size_t howmuch, void **addr)
711 {
712 	struct spclient *spc = arg;
713 	void *resp, *rdata;
714 	int nlocks, rv;
715 
716 	rumpuser__kunlock(0, &nlocks, NULL);
717 
718 	rv = anonmmap_req(spc, howmuch, &rdata);
719 	if (rv) {
720 		rv = EFAULT;
721 		goto out;
722 	}
723 
724 	resp = *(void **)rdata;
725 	free(rdata);
726 
727 	if (resp == NULL) {
728 		rv = ENOMEM;
729 	}
730 
731 	*addr = resp;
732 
733  out:
734 	rumpuser__klock(nlocks, NULL);
735 
736 	if (rv)
737 		return rv;
738 	return 0;
739 }
740 
741 int
742 rumpuser_sp_raise(void *arg, int signo)
743 {
744 	struct spclient *spc = arg;
745 	int rv, nlocks;
746 
747 	rumpuser__kunlock(0, &nlocks, NULL);
748 	rv = send_raise_req(spc, signo);
749 	rumpuser__klock(nlocks, NULL);
750 
751 	return rv;
752 }
753 
754 /*
755  *
756  * Startup routines and mainloop for server.
757  *
758  */
759 
760 struct spservarg {
761 	int sps_sock;
762 	connecthook_fn sps_connhook;
763 };
764 
765 static pthread_attr_t pattr_detached;
766 static void
767 handlereq(struct spclient *spc)
768 {
769 	struct sysbouncearg *sba;
770 	pthread_t pt;
771 	int retries, error, i;
772 
773 	if (__predict_false(spc->spc_state == SPCSTATE_NEW)) {
774 		if (spc->spc_hdr.rsp_type != RUMPSP_HANDSHAKE) {
775 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAUTH);
776 			shutdown(spc->spc_fd, SHUT_RDWR);
777 			spcfreebuf(spc);
778 			return;
779 		}
780 
781 		if (spc->spc_hdr.rsp_handshake == HANDSHAKE_GUEST) {
782 			char *comm = (char *)spc->spc_buf;
783 			size_t commlen = spc->spc_hdr.rsp_len - HDRSZ;
784 
785 			/* ensure it's 0-terminated */
786 			/* XXX make sure it contains sensible chars? */
787 			comm[commlen] = '\0';
788 
789 			if ((error = lwproc_rfork(spc,
790 			    RUMP_RFCFDG, comm)) != 0) {
791 				shutdown(spc->spc_fd, SHUT_RDWR);
792 			}
793 
794 			spcfreebuf(spc);
795 			if (error)
796 				return;
797 
798 			spc->spc_mainlwp = lwproc_curlwp();
799 
800 			send_handshake_resp(spc, spc->spc_hdr.rsp_reqno, 0);
801 		} else if (spc->spc_hdr.rsp_handshake == HANDSHAKE_FORK) {
802 			struct lwp *tmpmain;
803 			struct prefork *pf;
804 			struct handshake_fork *rfp;
805 			uint64_t reqno;
806 			int cancel;
807 
808 			reqno = spc->spc_hdr.rsp_reqno;
809 			if (spc->spc_off-HDRSZ != sizeof(*rfp)) {
810 				send_error_resp(spc, reqno, EINVAL);
811 				shutdown(spc->spc_fd, SHUT_RDWR);
812 				spcfreebuf(spc);
813 				return;
814 			}
815 
816 			/*LINTED*/
817 			rfp = (void *)spc->spc_buf;
818 			cancel = rfp->rf_cancel;
819 
820 			pthread_mutex_lock(&pfmtx);
821 			LIST_FOREACH(pf, &preforks, pf_entries) {
822 				if (memcmp(rfp->rf_auth, pf->pf_auth,
823 				    sizeof(rfp->rf_auth)) == 0) {
824 					LIST_REMOVE(pf, pf_entries);
825 					LIST_REMOVE(pf, pf_spcentries);
826 					break;
827 				}
828 			}
829 			pthread_mutex_lock(&pfmtx);
830 			spcfreebuf(spc);
831 
832 			if (!pf) {
833 				send_error_resp(spc, reqno, ESRCH);
834 				shutdown(spc->spc_fd, SHUT_RDWR);
835 				return;
836 			}
837 
838 			tmpmain = pf->pf_lwp;
839 			free(pf);
840 			lwproc_switch(tmpmain);
841 			if (cancel) {
842 				lwproc_release();
843 				shutdown(spc->spc_fd, SHUT_RDWR);
844 				return;
845 			}
846 
847 			/*
848 			 * So, we forked already during "prefork" to save
849 			 * the file descriptors from a parent exit
850 			 * race condition.  But now we need to fork
851 			 * a second time since the initial fork has
852 			 * the wrong spc pointer.  (yea, optimize
853 			 * interfaces some day if anyone cares)
854 			 */
855 			if ((error = lwproc_rfork(spc, 0, NULL)) != 0) {
856 				send_error_resp(spc, reqno, error);
857 				shutdown(spc->spc_fd, SHUT_RDWR);
858 				lwproc_release();
859 				return;
860 			}
861 			spc->spc_mainlwp = lwproc_curlwp();
862 			lwproc_switch(tmpmain);
863 			lwproc_release();
864 			lwproc_switch(spc->spc_mainlwp);
865 
866 			send_handshake_resp(spc, reqno, 0);
867 		}
868 
869 		spc->spc_pid = lwproc_getpid();
870 
871 		DPRINTF(("rump_sp: handshake for client %p complete, pid %d\n",
872 		    spc, spc->spc_pid));
873 
874 		lwproc_switch(NULL);
875 		spc->spc_state = SPCSTATE_RUNNING;
876 		return;
877 	}
878 
879 	if (__predict_false(spc->spc_hdr.rsp_type == RUMPSP_PREFORK)) {
880 		struct prefork *pf;
881 		uint64_t reqno;
882 		uint32_t auth[AUTHLEN];
883 
884 		DPRINTF(("rump_sp: prefork handler executing for %p\n", spc));
885 		reqno = spc->spc_hdr.rsp_reqno;
886 		spcfreebuf(spc);
887 
888 		pf = malloc(sizeof(*pf));
889 		if (pf == NULL) {
890 			send_error_resp(spc, reqno, ENOMEM);
891 			return;
892 		}
893 
894 		/*
895 		 * Use client main lwp to fork.  this is never used by
896 		 * worker threads (except if spc refcount goes to 0),
897 		 * so we can safely use it here.
898 		 */
899 		lwproc_switch(spc->spc_mainlwp);
900 		if ((error = lwproc_rfork(spc, RUMP_RFFDG, NULL)) != 0) {
901 			DPRINTF(("rump_sp: fork failed: %d (%p)\n",error, spc));
902 			send_error_resp(spc, reqno, error);
903 			lwproc_switch(NULL);
904 			free(pf);
905 			return;
906 		}
907 
908 		/* Ok, we have a new process context and a new curlwp */
909 		for (i = 0; i < AUTHLEN; i++) {
910 			pf->pf_auth[i] = auth[i] = arc4random();
911 		}
912 		pf->pf_lwp = lwproc_curlwp();
913 		lwproc_switch(NULL);
914 
915 		pthread_mutex_lock(&pfmtx);
916 		LIST_INSERT_HEAD(&preforks, pf, pf_entries);
917 		LIST_INSERT_HEAD(&spc->spc_pflist, pf, pf_spcentries);
918 		pthread_mutex_unlock(&pfmtx);
919 
920 		DPRINTF(("rump_sp: prefork handler success %p\n", spc));
921 
922 		send_prefork_resp(spc, reqno, auth);
923 		return;
924 	}
925 
926 	if (__predict_false(spc->spc_hdr.rsp_type != RUMPSP_SYSCALL)) {
927 		send_error_resp(spc, spc->spc_hdr.rsp_reqno, EINVAL);
928 		spcfreebuf(spc);
929 		return;
930 	}
931 
932 	retries = 0;
933 	while ((sba = malloc(sizeof(*sba))) == NULL) {
934 		if (nworker == 0 || retries > 10) {
935 			send_error_resp(spc, spc->spc_hdr.rsp_reqno, EAGAIN);
936 			spcfreebuf(spc);
937 			return;
938 		}
939 		/* slim chance of more memory? */
940 		usleep(10000);
941 	}
942 
943 	sba->sba_spc = spc;
944 	sba->sba_hdr = spc->spc_hdr;
945 	sba->sba_data = spc->spc_buf;
946 	spcresetbuf(spc);
947 
948 	spcref(spc);
949 
950 	pthread_mutex_lock(&sbamtx);
951 	TAILQ_INSERT_TAIL(&syslist, sba, sba_entries);
952 	if (idleworker > 0) {
953 		/* do we have a daemon's tool (i.e. idle threads)? */
954 		pthread_cond_signal(&sbacv);
955 	} else if (nworker < rumpsp_maxworker) {
956 		/*
957 		 * Else, need to create one
958 		 * (if we can, otherwise just expect another
959 		 * worker to pick up the syscall)
960 		 */
961 		if (pthread_create(&pt, &pattr_detached,
962 		    serv_syscallbouncer, NULL) == 0)
963 			nworker++;
964 	}
965 	pthread_mutex_unlock(&sbamtx);
966 }
967 
968 static void *
969 spserver(void *arg)
970 {
971 	struct spservarg *sarg = arg;
972 	struct spclient *spc;
973 	unsigned idx;
974 	int seen;
975 	int rv;
976 	unsigned int nfds, maxidx;
977 
978 	for (idx = 0; idx < MAXCLI; idx++) {
979 		pfdlist[idx].fd = -1;
980 		pfdlist[idx].events = POLLIN;
981 
982 		spc = &spclist[idx];
983 		pthread_mutex_init(&spc->spc_mtx, NULL);
984 		pthread_cond_init(&spc->spc_cv, NULL);
985 		spc->spc_fd = -1;
986 	}
987 	pfdlist[0].fd = spclist[0].spc_fd = sarg->sps_sock;
988 	pfdlist[0].events = POLLIN;
989 	nfds = 1;
990 	maxidx = 0;
991 
992 	pthread_attr_init(&pattr_detached);
993 	pthread_attr_setdetachstate(&pattr_detached, PTHREAD_CREATE_DETACHED);
994 	/* XXX: doesn't stacksize currently work on NetBSD */
995 	pthread_attr_setstacksize(&pattr_detached, 32*1024);
996 
997 	pthread_mutex_init(&sbamtx, NULL);
998 	pthread_cond_init(&sbacv, NULL);
999 
1000 	DPRINTF(("rump_sp: server mainloop\n"));
1001 
1002 	for (;;) {
1003 		int discoed;
1004 
1005 		/* g/c hangarounds (eventually) */
1006 		discoed = atomic_swap_uint(&disco, 0);
1007 		while (discoed--) {
1008 			nfds--;
1009 			idx = maxidx;
1010 			while (idx) {
1011 				if (pfdlist[idx].fd != -1) {
1012 					maxidx = idx;
1013 					break;
1014 				}
1015 				idx--;
1016 			}
1017 			DPRINTF(("rump_sp: set maxidx to [%u]\n",
1018 			    maxidx));
1019 		}
1020 
1021 		DPRINTF(("rump_sp: loop nfd %d\n", maxidx+1));
1022 		seen = 0;
1023 		rv = poll(pfdlist, maxidx+1, INFTIM);
1024 		assert(maxidx+1 <= MAXCLI);
1025 		assert(rv != 0);
1026 		if (rv == -1) {
1027 			if (errno == EINTR)
1028 				continue;
1029 			fprintf(stderr, "rump_spserver: poll returned %d\n",
1030 			    errno);
1031 			break;
1032 		}
1033 
1034 		for (idx = 0; seen < rv && idx < MAXCLI; idx++) {
1035 			if ((pfdlist[idx].revents & POLLIN) == 0)
1036 				continue;
1037 
1038 			seen++;
1039 			DPRINTF(("rump_sp: activity at [%u] %d/%d\n",
1040 			    idx, seen, rv));
1041 			if (idx > 0) {
1042 				spc = &spclist[idx];
1043 				DPRINTF(("rump_sp: mainloop read [%u]\n", idx));
1044 				switch (readframe(spc)) {
1045 				case 0:
1046 					break;
1047 				case -1:
1048 					serv_handledisco(idx);
1049 					break;
1050 				default:
1051 					switch (spc->spc_hdr.rsp_class) {
1052 					case RUMPSP_RESP:
1053 						kickwaiter(spc);
1054 						break;
1055 					case RUMPSP_REQ:
1056 						handlereq(spc);
1057 						break;
1058 					default:
1059 						send_error_resp(spc,
1060 						    spc->spc_hdr.rsp_reqno,
1061 						    ENOENT);
1062 						spcfreebuf(spc);
1063 						break;
1064 					}
1065 					break;
1066 				}
1067 
1068 			} else {
1069 				DPRINTF(("rump_sp: mainloop new connection\n"));
1070 
1071 				if (__predict_false(spfini)) {
1072 					close(spclist[0].spc_fd);
1073 					serv_shutdown();
1074 					goto out;
1075 				}
1076 
1077 				idx = serv_handleconn(pfdlist[0].fd,
1078 				    sarg->sps_connhook, nfds == MAXCLI);
1079 				if (idx)
1080 					nfds++;
1081 				if (idx > maxidx)
1082 					maxidx = idx;
1083 				DPRINTF(("rump_sp: maxid now %d\n", maxidx));
1084 			}
1085 		}
1086 	}
1087 
1088  out:
1089 	return NULL;
1090 }
1091 
1092 static unsigned cleanupidx;
1093 static struct sockaddr *cleanupsa;
1094 int
1095 rumpuser_sp_init(const char *url, const struct rumpuser_sp_ops *spopsp,
1096 	const char *ostype, const char *osrelease, const char *machine)
1097 {
1098 	pthread_t pt;
1099 	struct spservarg *sarg;
1100 	struct sockaddr *sap;
1101 	char *p;
1102 	unsigned idx;
1103 	int error, s;
1104 
1105 	p = strdup(url);
1106 	if (p == NULL)
1107 		return ENOMEM;
1108 	error = parseurl(p, &sap, &idx, 1);
1109 	free(p);
1110 	if (error)
1111 		return error;
1112 
1113 	snprintf(banner, sizeof(banner), "RUMPSP-%d.%d-%s-%s/%s\n",
1114 	    PROTOMAJOR, PROTOMINOR, ostype, osrelease, machine);
1115 
1116 	s = socket(parsetab[idx].domain, SOCK_STREAM, 0);
1117 	if (s == -1)
1118 		return errno;
1119 
1120 	spops = *spopsp;
1121 	sarg = malloc(sizeof(*sarg));
1122 	if (sarg == NULL) {
1123 		close(s);
1124 		return ENOMEM;
1125 	}
1126 
1127 	sarg->sps_sock = s;
1128 	sarg->sps_connhook = parsetab[idx].connhook;
1129 
1130 	cleanupidx = idx;
1131 	cleanupsa = sap;
1132 
1133 	/* sloppy error recovery */
1134 
1135 	/*LINTED*/
1136 	if (bind(s, sap, sap->sa_len) == -1) {
1137 		fprintf(stderr, "rump_sp: server bind failed\n");
1138 		return errno;
1139 	}
1140 
1141 	if (listen(s, MAXCLI) == -1) {
1142 		fprintf(stderr, "rump_sp: server listen failed\n");
1143 		return errno;
1144 	}
1145 
1146 	if ((error = pthread_create(&pt, NULL, spserver, sarg)) != 0) {
1147 		fprintf(stderr, "rump_sp: cannot create wrkr thread\n");
1148 		return errno;
1149 	}
1150 	pthread_detach(pt);
1151 
1152 	return 0;
1153 }
1154 
1155 void
1156 rumpuser_sp_fini(void *arg)
1157 {
1158 	struct spclient *spc = arg;
1159 	register_t retval[2] = {0, 0};
1160 
1161 	/*
1162 	 * stuff response into the socket, since this process is just
1163 	 * about to exit
1164 	 */
1165 	if (spc && spc->spc_syscallreq)
1166 		send_syscall_resp(spc, spc->spc_syscallreq, 0, retval);
1167 
1168 	if (spclist[0].spc_fd) {
1169 		parsetab[cleanupidx].cleanup(cleanupsa);
1170 		shutdown(spclist[0].spc_fd, SHUT_RDWR);
1171 		spfini = 1;
1172 	}
1173 }
1174