1 /* $NetBSD: rumpclient.c,v 1.70 2023/07/28 18:19:00 christos Exp $ */
2
3 /*
4 * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Client side routines for rump syscall proxy.
30 */
31
32 #include <rump/rumpuser_port.h>
33
34 /*
35 * We use kqueue on the BSDs, poll elsewhere. We
36 * want to use kqueue because it will give us the ability to get signal
37 * notifications but defer their handling to a stage where we do not
38 * hold the communication lock. Taking a signal while holding on to
39 * that lock may cause a deadlock. Therefore, block signals throughout
40 * the RPC when using poll. On Linux, we use signalfd in the same role
41 * as kqueue on NetBSD to be able to take signals while waiting for a
42 * response from the server.
43 */
44
45 #if defined(__NetBSD__) || defined(__FreeBSD__) || \
46 defined(__DragonFly__) || defined(__OpenBSD__)
47 #define USE_KQUEUE
48 #endif
49 #if defined(__linux__)
50 #define USE_SIGNALFD
51 #endif
52
53 __RCSID("$NetBSD: rumpclient.c,v 1.70 2023/07/28 18:19:00 christos Exp $");
54
55 #include <sys/param.h>
56 #include <sys/mman.h>
57 #include <sys/socket.h>
58 #include <sys/time.h>
59
60 #ifdef USE_KQUEUE
61 #include <sys/event.h>
62 #endif
63
64 #include <arpa/inet.h>
65 #include <netinet/in.h>
66 #include <netinet/tcp.h>
67
68 #include <assert.h>
69 #include <dlfcn.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <poll.h>
73 #include <pthread.h>
74 #include <signal.h>
75 #include <stdarg.h>
76 #include <stdbool.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81
82 #include <rump/rumpclient.h>
83
84 #define HOSTOPS
85 int (*host_socket)(int, int, int);
86 int (*host_close)(int);
87 int (*host_connect)(int, const struct sockaddr *, socklen_t);
88 int (*host_fcntl)(int, int, ...);
89 int (*host_poll)(struct pollfd *, nfds_t, int);
90 ssize_t (*host_read)(int, void *, size_t);
91 ssize_t (*host_sendmsg)(int, const struct msghdr *, int);
92 int (*host_setsockopt)(int, int, int, const void *, socklen_t);
93 int (*host_dup)(int);
94
95 #ifdef USE_KQUEUE
96 int (*host_kqueue)(void);
97 #ifdef __NetBSD__
98 int (*host_kevent)(int, const struct kevent *, size_t,
99 struct kevent *, size_t, const struct timespec *);
100 #else
101 int (*host_kevent)(int, const struct kevent *, int,
102 struct kevent *, int, const struct timespec *);
103 #endif
104 #endif
105
106 #ifdef USE_SIGNALFD
107 #include <sys/signalfd.h>
108
109 int (*host_signalfd)(int, const sigset_t *, int);
110 #endif
111
112 int (*host_execve)(const char *, char *const[], char *const[]);
113
114 #include "sp_common.c"
115 #include "rumpuser_sigtrans.c"
116
117 static struct spclient clispc = {
118 .spc_fd = -1,
119 };
120
121 static int holyfd = -1;
122 static sigset_t fullset;
123
124 static int doconnect(void);
125 static int handshake_req(struct spclient *, int, void *, int, bool);
126
127 /*
128 * Default: don't retry. Most clients can't handle it
129 * (consider e.g. fds suddenly going missing).
130 */
131 static time_t retrytimo = 0;
132
133 /* always defined to nothingness for now */
134 #define ERRLOG(a)
135
136 static int
send_with_recon(struct spclient * spc,struct iovec * iov,size_t iovlen)137 send_with_recon(struct spclient *spc, struct iovec *iov, size_t iovlen)
138 {
139 struct timeval starttime, curtime;
140 time_t prevreconmsg;
141 unsigned reconretries;
142 int rv;
143
144 for (prevreconmsg = 0, reconretries = 0;;) {
145 rv = dosend(spc, iov, iovlen);
146 if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
147 /* no persistent connections */
148 if (retrytimo == 0) {
149 rv = ENOTCONN;
150 break;
151 }
152 if (retrytimo == RUMPCLIENT_RETRYCONN_DIE)
153 _exit(1);
154
155 if (!prevreconmsg) {
156 prevreconmsg = time(NULL);
157 gettimeofday(&starttime, NULL);
158 }
159 if (reconretries == 1) {
160 if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
161 rv = ENOTCONN;
162 break;
163 }
164 fprintf(stderr, "rump_sp: connection to "
165 "kernel lost, trying to reconnect ...\n");
166 } else if (time(NULL) - prevreconmsg > 120) {
167 fprintf(stderr, "rump_sp: still trying to "
168 "reconnect ...\n");
169 prevreconmsg = time(NULL);
170 }
171
172 /* check that we aren't over the limit */
173 if (retrytimo > 0) {
174 time_t tdiff;
175
176 gettimeofday(&curtime, NULL);
177 tdiff = curtime.tv_sec - starttime.tv_sec;
178 if (starttime.tv_usec > curtime.tv_usec)
179 tdiff--;
180 if (tdiff >= retrytimo) {
181 fprintf(stderr, "rump_sp: reconnect "
182 "failed, %lld second timeout\n",
183 (long long)retrytimo);
184 return ENOTCONN;
185 }
186 }
187
188 /* adhoc backoff timer */
189 if (reconretries < 10) {
190 usleep(100000 * reconretries);
191 } else {
192 sleep(MIN(10, reconretries-9));
193 }
194 reconretries++;
195
196 if ((rv = doconnect()) != 0)
197 continue;
198 if ((rv = handshake_req(&clispc, HANDSHAKE_GUEST,
199 NULL, 0, true)) != 0)
200 continue;
201
202 /*
203 * ok, reconnect successful. we need to return to
204 * the upper layer to get the entire PDU resent.
205 */
206 if (reconretries != 1)
207 fprintf(stderr, "rump_sp: reconnected!\n");
208 rv = EAGAIN;
209 break;
210 } else {
211 _DIAGASSERT(errno != EAGAIN);
212 break;
213 }
214 }
215
216 return rv;
217 }
218
219 static int
cliwaitresp(struct spclient * spc,struct respwait * rw,sigset_t * mask,bool keeplock)220 cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask,
221 bool keeplock)
222 {
223 uint64_t mygen;
224 bool imalive = true;
225
226 pthread_mutex_lock(&spc->spc_mtx);
227 if (!keeplock)
228 sendunlockl(spc);
229 mygen = spc->spc_generation;
230
231 rw->rw_error = 0;
232 while (!rw->rw_done && rw->rw_error == 0) {
233 if (__predict_false(spc->spc_generation != mygen || !imalive))
234 break;
235
236 /* are we free to receive? */
237 if (spc->spc_istatus == SPCSTATUS_FREE) {
238 int gotresp, dosig, rv;
239
240 spc->spc_istatus = SPCSTATUS_BUSY;
241 pthread_mutex_unlock(&spc->spc_mtx);
242
243 dosig = 0;
244 for (gotresp = 0; !gotresp; ) {
245 #ifdef USE_KQUEUE
246 struct kevent kev[8];
247 int i;
248
249 /*
250 * typically we don't have a frame waiting
251 * when we come in here, so call kevent now
252 */
253 rv = host_kevent(holyfd, NULL, 0,
254 kev, __arraycount(kev), NULL);
255
256 if (__predict_false(rv == -1)) {
257 goto activity;
258 }
259
260 /*
261 * XXX: don't know how this can happen
262 * (timeout cannot expire since there
263 * isn't one), but it does happen.
264 * treat it as an expectional condition
265 * and go through tryread to determine
266 * alive status.
267 */
268 if (__predict_false(rv == 0))
269 goto activity;
270
271 for (i = 0; i < rv; i++) {
272 if (kev[i].filter == EVFILT_SIGNAL)
273 dosig++;
274 }
275 if (dosig)
276 goto cleanup;
277
278 /*
279 * ok, activity. try to read a frame to
280 * determine what happens next.
281 */
282 activity:
283 #else /* !USE_KQUEUE */
284 struct pollfd pfd[2];
285
286 pfd[0].fd = clispc.spc_fd;
287 pfd[0].events = POLLIN;
288 pfd[1].fd = holyfd;
289 pfd[1].events = POLLIN;
290
291 rv = host_poll(pfd, 2, -1);
292 if (rv >= 1 && pfd[1].revents & POLLIN) {
293 dosig = 1;
294 goto cleanup;
295 }
296 #endif /* !USE_KQUEUE */
297
298 switch (readframe(spc)) {
299 case 0:
300 continue;
301 case -1:
302 imalive = false;
303 goto cleanup;
304 default:
305 /* case 1 */
306 break;
307 }
308
309 switch (spc->spc_hdr.rsp_class) {
310 case RUMPSP_RESP:
311 case RUMPSP_ERROR:
312 kickwaiter(spc);
313 gotresp = spc->spc_hdr.rsp_reqno ==
314 rw->rw_reqno;
315 break;
316 case RUMPSP_REQ:
317 handlereq(spc);
318 break;
319 default:
320 /* panic */
321 break;
322 }
323 }
324
325 cleanup:
326 pthread_mutex_lock(&spc->spc_mtx);
327 if (spc->spc_istatus == SPCSTATUS_WANTED)
328 kickall(spc);
329 spc->spc_istatus = SPCSTATUS_FREE;
330
331 /* take one for the team */
332 if (dosig) {
333 pthread_mutex_unlock(&spc->spc_mtx);
334 pthread_sigmask(SIG_SETMASK, mask, NULL);
335 pthread_sigmask(SIG_SETMASK, &fullset, NULL);
336 pthread_mutex_lock(&spc->spc_mtx);
337 }
338 } else {
339 spc->spc_istatus = SPCSTATUS_WANTED;
340 pthread_cond_wait(&rw->rw_cv, &spc->spc_mtx);
341 }
342 }
343 TAILQ_REMOVE(&spc->spc_respwait, rw, rw_entries);
344 pthread_mutex_unlock(&spc->spc_mtx);
345 pthread_cond_destroy(&rw->rw_cv);
346
347 if (spc->spc_generation != mygen || !imalive) {
348 return ENOTCONN;
349 }
350 return rw->rw_error;
351 }
352
353 static int
syscall_req(struct spclient * spc,sigset_t * omask,int sysnum,const void * data,size_t dlen,void ** resp)354 syscall_req(struct spclient *spc, sigset_t *omask, int sysnum,
355 const void *data, size_t dlen, void **resp)
356 {
357 struct rsp_hdr rhdr;
358 struct respwait rw;
359 struct iovec iov[2];
360 int rv;
361
362 rhdr.rsp_len = sizeof(rhdr) + dlen;
363 rhdr.rsp_class = RUMPSP_REQ;
364 rhdr.rsp_type = RUMPSP_SYSCALL;
365 rhdr.rsp_sysnum = sysnum;
366
367 IOVPUT(iov[0], rhdr);
368 IOVPUT_WITHSIZE(iov[1], __UNCONST(data), dlen);
369
370 do {
371 putwait(spc, &rw, &rhdr);
372 if ((rv = send_with_recon(spc, iov, __arraycount(iov))) != 0) {
373 unputwait(spc, &rw);
374 continue;
375 }
376
377 rv = cliwaitresp(spc, &rw, omask, false);
378 if (rv == ENOTCONN)
379 rv = EAGAIN;
380 } while (rv == EAGAIN);
381
382 *resp = rw.rw_data;
383 return rv;
384 }
385
386 static int
handshake_req(struct spclient * spc,int type,void * data,int cancel,bool haslock)387 handshake_req(struct spclient *spc, int type, void *data,
388 int cancel, bool haslock)
389 {
390 struct handshake_fork rf;
391 const char *myprogname = NULL; /* XXXgcc */
392 struct rsp_hdr rhdr;
393 struct respwait rw;
394 sigset_t omask;
395 size_t bonus;
396 struct iovec iov[2];
397 int rv;
398
399 if (type == HANDSHAKE_FORK) {
400 bonus = sizeof(rf);
401 } else {
402 #ifdef __NetBSD__
403 /* would procfs work on NetBSD too? */
404 myprogname = getprogname();
405 #else
406 int fd = open("/proc/self/comm", O_RDONLY);
407 if (fd == -1) {
408 myprogname = "???";
409 } else {
410 static char commname[128];
411
412 memset(commname, 0, sizeof(commname));
413 if (read(fd, commname, sizeof(commname)) > 0) {
414 char *n;
415
416 n = strrchr(commname, '\n');
417 if (n)
418 *n = '\0';
419 myprogname = commname;
420 } else {
421 myprogname = "???";
422 }
423 close(fd);
424 }
425 #endif
426 bonus = strlen(myprogname)+1;
427 }
428
429 /* performs server handshake */
430 rhdr.rsp_len = sizeof(rhdr) + bonus;
431 rhdr.rsp_class = RUMPSP_REQ;
432 rhdr.rsp_type = RUMPSP_HANDSHAKE;
433 rhdr.rsp_handshake = type;
434
435 IOVPUT(iov[0], rhdr);
436
437 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
438 if (haslock)
439 putwait_locked(spc, &rw, &rhdr);
440 else
441 putwait(spc, &rw, &rhdr);
442 if (type == HANDSHAKE_FORK) {
443 memcpy(rf.rf_auth, data, sizeof(rf.rf_auth)); /* uh, why? */
444 rf.rf_cancel = cancel;
445 IOVPUT(iov[1], rf);
446 } else {
447 IOVPUT_WITHSIZE(iov[1], __UNCONST(myprogname), bonus);
448 }
449 rv = send_with_recon(spc, iov, __arraycount(iov));
450 if (rv || cancel) {
451 if (haslock)
452 unputwait_locked(spc, &rw);
453 else
454 unputwait(spc, &rw);
455 if (cancel) {
456 goto out;
457 }
458 } else {
459 rv = cliwaitresp(spc, &rw, &omask, haslock);
460 }
461 if (rv)
462 goto out;
463
464 rv = *(int *)rw.rw_data;
465 free(rw.rw_data);
466
467 out:
468 pthread_sigmask(SIG_SETMASK, &omask, NULL);
469 return rv;
470 }
471
472 static int
prefork_req(struct spclient * spc,sigset_t * omask,void ** resp)473 prefork_req(struct spclient *spc, sigset_t *omask, void **resp)
474 {
475 struct rsp_hdr rhdr;
476 struct respwait rw;
477 struct iovec iov[1];
478 int rv;
479
480 rhdr.rsp_len = sizeof(rhdr);
481 rhdr.rsp_class = RUMPSP_REQ;
482 rhdr.rsp_type = RUMPSP_PREFORK;
483 rhdr.rsp_error = 0;
484
485 IOVPUT(iov[0], rhdr);
486
487 do {
488 putwait(spc, &rw, &rhdr);
489 rv = send_with_recon(spc, iov, __arraycount(iov));
490 if (rv != 0) {
491 unputwait(spc, &rw);
492 continue;
493 }
494
495 rv = cliwaitresp(spc, &rw, omask, false);
496 if (rv == ENOTCONN)
497 rv = EAGAIN;
498 } while (rv == EAGAIN);
499
500 *resp = rw.rw_data;
501 return rv;
502 }
503
504 /*
505 * prevent response code from deadlocking with reconnect code
506 */
507 static int
resp_sendlock(struct spclient * spc)508 resp_sendlock(struct spclient *spc)
509 {
510 int rv = 0;
511
512 pthread_mutex_lock(&spc->spc_mtx);
513 while (spc->spc_ostatus != SPCSTATUS_FREE) {
514 if (__predict_false(spc->spc_reconnecting)) {
515 rv = EBUSY;
516 goto out;
517 }
518 spc->spc_ostatus = SPCSTATUS_WANTED;
519 pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx);
520 }
521 spc->spc_ostatus = SPCSTATUS_BUSY;
522
523 out:
524 pthread_mutex_unlock(&spc->spc_mtx);
525 return rv;
526 }
527
528 static void
send_copyin_resp(struct spclient * spc,uint64_t reqno,void * data,size_t dlen,int wantstr)529 send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen,
530 int wantstr)
531 {
532 struct rsp_hdr rhdr;
533 struct iovec iov[2];
534
535 if (wantstr)
536 dlen = MIN(dlen, strlen(data)+1);
537
538 rhdr.rsp_len = sizeof(rhdr) + dlen;
539 rhdr.rsp_reqno = reqno;
540 rhdr.rsp_class = RUMPSP_RESP;
541 rhdr.rsp_type = RUMPSP_COPYIN;
542 rhdr.rsp_sysnum = 0;
543
544 IOVPUT(iov[0], rhdr);
545 IOVPUT_WITHSIZE(iov[1], data, dlen);
546
547 if (resp_sendlock(spc) != 0)
548 return;
549 (void)SENDIOV(spc, iov);
550 sendunlock(spc);
551 }
552
553 static void
send_anonmmap_resp(struct spclient * spc,uint64_t reqno,void * addr)554 send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr)
555 {
556 struct rsp_hdr rhdr;
557 struct iovec iov[2];
558
559 rhdr.rsp_len = sizeof(rhdr) + sizeof(addr);
560 rhdr.rsp_reqno = reqno;
561 rhdr.rsp_class = RUMPSP_RESP;
562 rhdr.rsp_type = RUMPSP_ANONMMAP;
563 rhdr.rsp_sysnum = 0;
564
565 IOVPUT(iov[0], rhdr);
566 IOVPUT(iov[1], addr);
567
568 if (resp_sendlock(spc) != 0)
569 return;
570 (void)SENDIOV(spc, iov);
571 sendunlock(spc);
572 }
573
574 int
rumpclient_syscall(int sysnum,const void * data,size_t dlen,register_t * retval)575 rumpclient_syscall(int sysnum, const void *data, size_t dlen,
576 register_t *retval)
577 {
578 struct rsp_sysresp *resp;
579 sigset_t omask;
580 void *rdata;
581 int rv;
582
583 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
584
585 DPRINTF(("rumpsp syscall_req: syscall %d with %p/%zu\n",
586 sysnum, data, dlen));
587
588 rv = syscall_req(&clispc, &omask, sysnum, data, dlen, &rdata);
589 if (rv)
590 goto out;
591
592 resp = rdata;
593 DPRINTF(("rumpsp syscall_resp: syscall %d error %d, rv: %"
594 PRIxREGISTER"/%"PRIxREGISTER"\n",
595 sysnum, rv, resp->rsys_retval[0], resp->rsys_retval[1]));
596
597 memcpy(retval, &resp->rsys_retval, sizeof(resp->rsys_retval));
598 rv = resp->rsys_error;
599 free(rdata);
600
601 out:
602 pthread_sigmask(SIG_SETMASK, &omask, NULL);
603 return rv;
604 }
605
606 static void
handlereq(struct spclient * spc)607 handlereq(struct spclient *spc)
608 {
609 struct rsp_copydata *copydata;
610 struct rsp_hdr *rhdr = &spc->spc_hdr;
611 void *mapaddr;
612 size_t maplen;
613 int reqtype = spc->spc_hdr.rsp_type;
614 int sig;
615
616 switch (reqtype) {
617 case RUMPSP_COPYIN:
618 case RUMPSP_COPYINSTR:
619 /*LINTED*/
620 copydata = (struct rsp_copydata *)spc->spc_buf;
621 DPRINTF(("rump_sp handlereq: copyin request: %p/%zu\n",
622 copydata->rcp_addr, copydata->rcp_len));
623 send_copyin_resp(spc, spc->spc_hdr.rsp_reqno,
624 copydata->rcp_addr, copydata->rcp_len,
625 reqtype == RUMPSP_COPYINSTR);
626 break;
627 case RUMPSP_COPYOUT:
628 case RUMPSP_COPYOUTSTR:
629 /*LINTED*/
630 copydata = (struct rsp_copydata *)spc->spc_buf;
631 DPRINTF(("rump_sp handlereq: copyout request: %p/%zu\n",
632 copydata->rcp_addr, copydata->rcp_len));
633 /*LINTED*/
634 memcpy(copydata->rcp_addr, copydata->rcp_data,
635 copydata->rcp_len);
636 break;
637 case RUMPSP_ANONMMAP:
638 /*LINTED*/
639 maplen = *(size_t *)spc->spc_buf;
640 mapaddr = mmap(NULL, maplen, PROT_READ|PROT_WRITE,
641 MAP_ANON|MAP_PRIVATE, -1, 0);
642 if (mapaddr == MAP_FAILED)
643 mapaddr = NULL;
644 DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr));
645 send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr);
646 break;
647 case RUMPSP_RAISE:
648 sig = rumpuser__sig_rump2host(rhdr->rsp_signo);
649 DPRINTF(("rump_sp handlereq: raise sig %d\n", sig));
650 raise(sig);
651 /*
652 * We most likely have signals blocked, but the signal
653 * will be handled soon enough when we return.
654 */
655 break;
656 default:
657 printf("PANIC: INVALID TYPE %d\n", reqtype);
658 abort();
659 break;
660 }
661
662 spcfreebuf(spc);
663 }
664
665 static unsigned ptab_idx;
666 static struct sockaddr *serv_sa;
667
668 /* dup until we get a "good" fd which does not collide with stdio */
669 static int
dupgood(int myfd,int mustchange)670 dupgood(int myfd, int mustchange)
671 {
672 int ofds[4];
673 int sverrno;
674 unsigned int i;
675
676 for (i = 0; (myfd <= 2 || mustchange) && myfd != -1; i++) {
677 assert(i < __arraycount(ofds));
678 ofds[i] = myfd;
679 myfd = host_dup(myfd);
680 if (mustchange) {
681 i--; /* prevent closing old fd */
682 mustchange = 0;
683 }
684 }
685
686 sverrno = 0;
687 if (myfd == -1 && i > 0)
688 sverrno = errno;
689
690 while (i-- > 0) {
691 host_close(ofds[i]);
692 }
693
694 if (sverrno)
695 errno = sverrno;
696
697 return myfd;
698 }
699
700 #if defined(USE_KQUEUE)
701
702 static int
makeholyfd(void)703 makeholyfd(void)
704 {
705 struct kevent kev[NSIG+1];
706 int i, fd;
707
708 /* setup kqueue, we want all signals and the fd */
709 if ((fd = dupgood(host_kqueue(), 0)) == -1) {
710 ERRLOG(("rump_sp: cannot setup kqueue"));
711 return -1;
712 }
713
714 for (i = 0; i < NSIG; i++) {
715 EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0);
716 }
717 EV_SET(&kev[NSIG], clispc.spc_fd,
718 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
719 if (host_kevent(fd, kev, NSIG+1, NULL, 0, NULL) == -1) {
720 ERRLOG(("rump_sp: kevent() failed"));
721 host_close(fd);
722 return -1;
723 }
724
725 return fd;
726 }
727
728 #elif defined(USE_SIGNALFD) /* !USE_KQUEUE */
729
730 static int
makeholyfd(void)731 makeholyfd(void)
732 {
733
734 return host_signalfd(-1, &fullset, 0);
735 }
736
737 #else /* !USE_KQUEUE && !USE_SIGNALFD */
738
739 static int
makeholyfd(void)740 makeholyfd(void)
741 {
742
743 return -1;
744 }
745
746 #endif
747
748 static int
doconnect(void)749 doconnect(void)
750 {
751 struct respwait rw;
752 struct rsp_hdr rhdr;
753 char banner[MAXBANNER];
754 int s, error, flags;
755 ssize_t n;
756
757 if (holyfd != -1)
758 host_close(holyfd);
759 holyfd = -1;
760 s = -1;
761
762 if (clispc.spc_fd != -1)
763 host_close(clispc.spc_fd);
764 clispc.spc_fd = -1;
765
766 /*
767 * for reconnect, gate everyone out of the receiver code
768 */
769 putwait_locked(&clispc, &rw, &rhdr);
770
771 pthread_mutex_lock(&clispc.spc_mtx);
772 clispc.spc_reconnecting = 1;
773 pthread_cond_broadcast(&clispc.spc_cv);
774 clispc.spc_generation++;
775 while (clispc.spc_istatus != SPCSTATUS_FREE) {
776 clispc.spc_istatus = SPCSTATUS_WANTED;
777 pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx);
778 }
779 kickall(&clispc);
780
781 /*
782 * we can release it already since we hold the
783 * send lock during reconnect
784 * XXX: assert it
785 */
786 clispc.spc_istatus = SPCSTATUS_FREE;
787 pthread_mutex_unlock(&clispc.spc_mtx);
788 unputwait_locked(&clispc, &rw);
789
790 free(clispc.spc_buf);
791 clispc.spc_off = 0;
792
793 s = dupgood(host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0), 0);
794 if (s == -1)
795 return -1;
796
797 while (host_connect(s, serv_sa, parsetab[ptab_idx].slen) == -1) {
798 if (errno == EINTR)
799 continue;
800 ERRLOG(("rump_sp: client connect failed: %s\n",
801 strerror(errno)));
802 return -1;
803 }
804
805 if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
806 ERRLOG(("rump_sp: connect hook failed\n"));
807 return -1;
808 }
809
810 if ((n = host_read(s, banner, sizeof(banner)-1)) <= 0) {
811 ERRLOG(("rump_sp: failed to read banner\n"));
812 return -1;
813 }
814
815 if (banner[n-1] != '\n') {
816 ERRLOG(("rump_sp: invalid banner\n"));
817 return -1;
818 }
819 banner[n] = '\0';
820 /* XXX parse the banner some day */
821
822 flags = host_fcntl(s, F_GETFL, 0);
823 if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
824 ERRLOG(("rump_sp: socket fd NONBLOCK: %s\n", strerror(errno)));
825 return -1;
826 }
827 clispc.spc_fd = s;
828 clispc.spc_state = SPCSTATE_RUNNING;
829 clispc.spc_reconnecting = 0;
830 holyfd = makeholyfd();
831
832 return 0;
833 }
834
835 static int
doinit(void)836 doinit(void)
837 {
838
839 TAILQ_INIT(&clispc.spc_respwait);
840 pthread_mutex_init(&clispc.spc_mtx, NULL);
841 pthread_cond_init(&clispc.spc_cv, NULL);
842
843 return 0;
844 }
845
846 #ifdef RTLD_NEXT
847 void *rumpclient__dlsym(void *, const char *);
848 void *
rumpclient__dlsym(void * handle,const char * symbol)849 rumpclient__dlsym(void *handle, const char *symbol)
850 {
851
852 return dlsym(handle, symbol);
853 }
854 void *rumphijack_dlsym(void *, const char *)
855 __attribute__((__weak__, alias("rumpclient__dlsym")));
856 #endif
857
858 static pid_t init_done = 0;
859
860 int
rumpclient_init(void)861 rumpclient_init(void)
862 {
863 char *p;
864 int error;
865 int rv = -1;
866 int hstype;
867 pid_t mypid;
868
869 /*
870 * Make sure we're not riding the context of a previous
871 * host fork. Note: it's *possible* that after n>1 forks
872 * we have the same pid as one of our exited parents, but
873 * I'm pretty sure there are 0 practical implications, since
874 * it means generations would have to skip rumpclient init.
875 */
876 if (init_done == (mypid = getpid()))
877 return 0;
878
879 #ifdef USE_KQUEUE
880 /* kq does not traverse fork() */
881 holyfd = -1;
882 #endif
883 init_done = mypid;
884
885 sigfillset(&fullset);
886
887 /*
888 * sag mir, wo die symbols sind. zogen fort, der krieg beginnt.
889 * wann wird man je verstehen? wann wird man je verstehen?
890 */
891 #ifdef RTLD_NEXT
892 #define FINDSYM2(_name_,_syscall_) \
893 if ((host_##_name_ = rumphijack_dlsym(RTLD_NEXT, \
894 #_syscall_)) == NULL) { \
895 if (rumphijack_dlsym == rumpclient__dlsym) \
896 host_##_name_ = _name_; /* static fallback */ \
897 if (host_##_name_ == NULL) { \
898 fprintf(stderr,"cannot find %s: %s", #_syscall_,\
899 dlerror()); \
900 exit(1); \
901 } \
902 }
903 #else
904 #define FINDSYM2(_name_,_syscall) \
905 host_##_name_ = _name_;
906 #endif
907 #define FINDSYM(_name_) FINDSYM2(_name_,_name_)
908 #ifdef __NetBSD__
909 FINDSYM2(socket,__socket30)
910 #else
911 FINDSYM(socket)
912 #endif
913
914 FINDSYM(close)
915 FINDSYM(connect)
916 FINDSYM(fcntl)
917 FINDSYM(poll)
918 FINDSYM(read)
919 FINDSYM(sendmsg)
920 FINDSYM(setsockopt)
921 FINDSYM(dup)
922 FINDSYM(execve)
923
924 #ifdef USE_KQUEUE
925 FINDSYM(kqueue)
926 #ifdef __NetBSD__
927 #if !__NetBSD_Prereq__(5,99,7)
928 FINDSYM(kevent)
929 #elif !__NetBSD_Prereq__(10,99,4)
930 FINDSYM2(kevent,_sys___kevent50)
931 #else
932 FINDSYM2(kevent,_sys___kevent100)
933 #endif
934 #else
935 FINDSYM(kevent)
936 #endif
937 #endif /* USE_KQUEUE */
938
939 #ifdef USE_SIGNALFD
940 FINDSYM(signalfd)
941 #endif
942
943 #undef FINDSYM
944 #undef FINDSY2
945
946 if ((p = getenv("RUMP__PARSEDSERVER")) == NULL) {
947 if ((p = getenv("RUMP_SERVER")) == NULL) {
948 fprintf(stderr, "error: RUMP_SERVER not set\n");
949 errno = ENOENT;
950 goto out;
951 }
952 }
953
954 if ((error = parseurl(p, &serv_sa, &ptab_idx, 0)) != 0) {
955 errno = error;
956 goto out;
957 }
958
959 if (doinit() == -1)
960 goto out;
961
962 if ((p = getenv("RUMPCLIENT__EXECFD")) != NULL) {
963 sscanf(p, "%d,%d", &clispc.spc_fd, &holyfd);
964 unsetenv("RUMPCLIENT__EXECFD");
965 hstype = HANDSHAKE_EXEC;
966 } else {
967 if (doconnect() == -1)
968 goto out;
969 hstype = HANDSHAKE_GUEST;
970 }
971
972 error = handshake_req(&clispc, hstype, NULL, 0, false);
973 if (error) {
974 pthread_mutex_destroy(&clispc.spc_mtx);
975 pthread_cond_destroy(&clispc.spc_cv);
976 if (clispc.spc_fd != -1)
977 host_close(clispc.spc_fd);
978 errno = error;
979 goto out;
980 }
981 rv = 0;
982
983 out:
984 if (rv == -1)
985 init_done = 0;
986 return rv;
987 }
988
989 struct rumpclient_fork {
990 uint32_t fork_auth[AUTHLEN];
991 struct spclient fork_spc;
992 int fork_holyfd;
993 };
994
995 struct rumpclient_fork *
rumpclient_prefork(void)996 rumpclient_prefork(void)
997 {
998 struct rumpclient_fork *rpf;
999 sigset_t omask;
1000 void *resp;
1001 int rv;
1002
1003 pthread_sigmask(SIG_SETMASK, &fullset, &omask);
1004 rpf = malloc(sizeof(*rpf));
1005 if (rpf == NULL)
1006 goto out;
1007
1008 if ((rv = prefork_req(&clispc, &omask, &resp)) != 0) {
1009 free(rpf);
1010 errno = rv;
1011 rpf = NULL;
1012 goto out;
1013 }
1014
1015 memcpy(rpf->fork_auth, resp, sizeof(rpf->fork_auth));
1016 free(resp);
1017
1018 rpf->fork_spc = clispc;
1019 rpf->fork_holyfd = holyfd;
1020
1021 out:
1022 pthread_sigmask(SIG_SETMASK, &omask, NULL);
1023 return rpf;
1024 }
1025
1026 int
rumpclient_fork_init(struct rumpclient_fork * rpf)1027 rumpclient_fork_init(struct rumpclient_fork *rpf)
1028 {
1029 int error;
1030 int osock;
1031
1032 osock = clispc.spc_fd;
1033 memset(&clispc, 0, sizeof(clispc));
1034 clispc.spc_fd = osock;
1035
1036 #ifdef USE_KQUEUE
1037 holyfd = -1; /* kqueue descriptor is not copied over fork() */
1038 #else
1039 if (holyfd != -1) {
1040 host_close(holyfd);
1041 holyfd = -1;
1042 }
1043 #endif
1044
1045 if (doinit() == -1)
1046 return -1;
1047 if (doconnect() == -1)
1048 return -1;
1049
1050 error = handshake_req(&clispc, HANDSHAKE_FORK, rpf->fork_auth,
1051 0, false);
1052 if (error) {
1053 pthread_mutex_destroy(&clispc.spc_mtx);
1054 pthread_cond_destroy(&clispc.spc_cv);
1055 errno = error;
1056 return -1;
1057 }
1058
1059 return 0;
1060 }
1061
1062 /*ARGSUSED*/
1063 void
rumpclient_fork_cancel(struct rumpclient_fork * rpf)1064 rumpclient_fork_cancel(struct rumpclient_fork *rpf)
1065 {
1066
1067 /* EUNIMPL */
1068 }
1069
1070 void
rumpclient_fork_vparent(struct rumpclient_fork * rpf)1071 rumpclient_fork_vparent(struct rumpclient_fork *rpf)
1072 {
1073
1074 clispc = rpf->fork_spc;
1075 holyfd = rpf->fork_holyfd;
1076 }
1077
1078 void
rumpclient_setconnretry(time_t timeout)1079 rumpclient_setconnretry(time_t timeout)
1080 {
1081
1082 if (timeout < RUMPCLIENT_RETRYCONN_DIE)
1083 return; /* gigo */
1084
1085 retrytimo = timeout;
1086 }
1087
1088 int
rumpclient__closenotify(int * fdp,enum rumpclient_closevariant variant)1089 rumpclient__closenotify(int *fdp, enum rumpclient_closevariant variant)
1090 {
1091 int fd = *fdp;
1092 int untilfd;
1093 int newfd;
1094
1095 switch (variant) {
1096 case RUMPCLIENT_CLOSE_FCLOSEM:
1097 untilfd = MAX(clispc.spc_fd, holyfd);
1098 for (; fd <= untilfd; fd++) {
1099 if (fd == clispc.spc_fd || fd == holyfd)
1100 continue;
1101 (void)host_close(fd);
1102 }
1103 *fdp = fd;
1104 break;
1105
1106 case RUMPCLIENT_CLOSE_CLOSE:
1107 case RUMPCLIENT_CLOSE_DUP2:
1108 if (fd == clispc.spc_fd) {
1109 newfd = dupgood(clispc.spc_fd, 1);
1110 if (newfd == -1)
1111 return -1;
1112
1113 #ifdef USE_KQUEUE
1114 {
1115 struct kevent kev[2];
1116
1117 /*
1118 * now, we have a new socket number, so change
1119 * the file descriptor that kqueue is
1120 * monitoring. remove old and add new.
1121 */
1122 EV_SET(&kev[0], clispc.spc_fd,
1123 EVFILT_READ, EV_DELETE, 0, 0, 0);
1124 EV_SET(&kev[1], newfd,
1125 EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1126 if (host_kevent(holyfd, kev, 2, NULL, 0, NULL) == -1) {
1127 int sverrno = errno;
1128 host_close(newfd);
1129 errno = sverrno;
1130 return -1;
1131 }}
1132 #endif /* !USE_KQUEUE */
1133 clispc.spc_fd = newfd;
1134 }
1135 if (holyfd != -1 && fd == holyfd) {
1136 newfd = dupgood(holyfd, 1);
1137 if (newfd == -1)
1138 return -1;
1139 holyfd = newfd;
1140 }
1141 break;
1142 }
1143
1144 return 0;
1145 }
1146
1147 pid_t
rumpclient_fork(void)1148 rumpclient_fork(void)
1149 {
1150
1151 return rumpclient__dofork(fork);
1152 }
1153
1154 /*
1155 * Process is about to exec. Save info about our existing connection
1156 * in the env. rumpclient will check for this info in init().
1157 * This is mostly for the benefit of rumphijack, but regular applications
1158 * may use it as well.
1159 */
1160 int
rumpclient_exec(const char * path,char * const argv[],char * const envp[])1161 rumpclient_exec(const char *path, char *const argv[], char *const envp[])
1162 {
1163 char buf[4096];
1164 char **newenv;
1165 char *envstr, *envstr2;
1166 size_t nelem;
1167 int rv, sverrno;
1168
1169 snprintf(buf, sizeof(buf), "RUMPCLIENT__EXECFD=%d,%d",
1170 clispc.spc_fd, holyfd);
1171 envstr = malloc(strlen(buf)+1);
1172 if (envstr == NULL) {
1173 return ENOMEM;
1174 }
1175 strcpy(envstr, buf);
1176
1177 /* do we have a fully parsed url we want to forward in the env? */
1178 if (*parsedurl != '\0') {
1179 snprintf(buf, sizeof(buf),
1180 "RUMP__PARSEDSERVER=%s", parsedurl);
1181 envstr2 = malloc(strlen(buf)+1);
1182 if (envstr2 == NULL) {
1183 free(envstr);
1184 return ENOMEM;
1185 }
1186 strcpy(envstr2, buf);
1187 } else {
1188 envstr2 = NULL;
1189 }
1190
1191 for (nelem = 0; envp && envp[nelem]; nelem++)
1192 continue;
1193
1194 newenv = malloc(sizeof(*newenv) * (nelem+3));
1195 if (newenv == NULL) {
1196 free(envstr2);
1197 free(envstr);
1198 return ENOMEM;
1199 }
1200 memcpy(&newenv[0], envp, nelem*sizeof(*envp));
1201
1202 newenv[nelem] = envstr;
1203 newenv[nelem+1] = envstr2;
1204 newenv[nelem+2] = NULL;
1205
1206 rv = host_execve(path, argv, newenv);
1207
1208 _DIAGASSERT(rv != 0);
1209 sverrno = errno;
1210 free(envstr2);
1211 free(envstr);
1212 free(newenv);
1213 errno = sverrno;
1214 return rv;
1215 }
1216
1217 /*
1218 * daemon() is handwritten for the benefit of platforms which
1219 * do not support daemon().
1220 */
1221 int
rumpclient_daemon(int nochdir,int noclose)1222 rumpclient_daemon(int nochdir, int noclose)
1223 {
1224 struct rumpclient_fork *rf;
1225 int sverrno;
1226
1227 if ((rf = rumpclient_prefork()) == NULL)
1228 return -1;
1229
1230 switch (fork()) {
1231 case 0:
1232 break;
1233 case -1:
1234 goto daemonerr;
1235 default:
1236 _exit(0);
1237 }
1238
1239 if (setsid() == -1)
1240 goto daemonerr;
1241 if (!nochdir && chdir("/") == -1)
1242 goto daemonerr;
1243 if (!noclose) {
1244 int fd = open("/dev/null", O_RDWR);
1245 dup2(fd, 0);
1246 dup2(fd, 1);
1247 dup2(fd, 2);
1248 if (fd > 2)
1249 close(fd);
1250 }
1251
1252 /* note: fork is either completed or cancelled by the call */
1253 if (rumpclient_fork_init(rf) == -1)
1254 return -1;
1255
1256 return 0;
1257
1258 daemonerr:
1259 sverrno = errno;
1260 rumpclient_fork_cancel(rf);
1261 errno = sverrno;
1262 return -1;
1263 }
1264