1 /* hinet.c  -  Hiquu I/O Engine I/O shuffler TCP/IP network connect and accept
2  * Copyright (c) 2006,2012 Sampo Kellomaki (sampo@iki.fi), All Rights Reserved.
3  * This is confidential unpublished proprietary source code of the author.
4  * NO WARRANTY, not even implied warranties. Contains trade secrets.
5  * Distribution prohibited unless authorized in writing. See file COPYING.
6  * Special grant: hiios.c may be used with zxid open source project under
7  * same licensing terms as zxid itself.
8  * $Id$
9  *
10  * 15.4.2006, created over Easter holiday --Sampo
11  * 16.8.2012, modified license grant to allow use with ZXID.org --Sampo
12  * 6.9.2012,  added support for TLS and SSL --Sampo
13  * 17.9.2012, factored net code to its own file --Sampo
14  *
15  * See http://pl.atyp.us/content/tech/servers.html for inspiration on threading strategy.
16  *
17  *   MANY ELEMENTS IN QUEUE            ONE ELEMENT IN Q   EMPTY QUEUE
18  *   consume             produce       consume  produce   consume  produce
19  *    |                   |             | ,-------'         |        |
20  *    V                   V             V V                 V        V
21  *   qel.n --> qel.n --> qel.n --> 0   qel.n --> 0          0        0
22  *
23  ****
24  * accept() blocks (after accept returned EAGAIN) - see if this is a blocking socket
25  * see if edge triggered epoll has some special consideration for accept(2).
26  */
27 
28 #include "platform.h"
29 
30 #include <pthread.h>
31 #include <memory.h>
32 #include <stdlib.h>
33 //#include <unistd.h>
34 #include <fcntl.h>
35 #include <sys/types.h>
36 #include <sys/socket.h>
37 #include <errno.h>
38 #include <string.h>
39 
40 #include <zx/zxid.h>
41 #include "akbox.h"
42 #include "hiproto.h"
43 #include "hiios.h"
44 #include "errmac.h"
45 
46 extern zxid_conf* zxbus_cf;
47 extern int errmac_debug;
48 #ifdef MUTEX_DEBUG
49 extern pthread_mutexattr_t MUTEXATTR_DECL;
50 #endif
51 
52 #define SSL_ENCRYPTED_HINT "ERROR\nmessage:tls-needed\n\nTLS or SSL connection wanted but other end did not speak protocol.\n\0"
53 
54 /*() Verify peer ClientTLS credential.
55  * If known peer, eid should be the eid of the peer and is used to look up
56  * the metadata if the peer. The general strategy is that verification
57  * is done only after TLS handshake. This ie either achived by supplying
58  * SSL_VERIFY_NONE or SSL_VERIFY_PEER with verify callback that causes any
59  * certificate to be accepted. In case of STOMP, the STOMP (or CONNECT) connect
60  * message will contain the appropriate eid in login header. In case of client
61  * side, the client knows which server it is contacting so it can look up
62  * the eid for that server.
63  * return:: 0 on error, 1 on success */
64 
65 /* Called by:  hi_open_tcp, zxbus_login_ent */
hi_vfy_peer_ssl_cred(struct hi_thr * hit,struct hi_io * io,const char * eid)66 int hi_vfy_peer_ssl_cred(struct hi_thr* hit, struct hi_io* io, const char* eid)
67 {
68 #ifdef USE_OPENSSL
69   X509* peer_cert;
70   zxid_entity* meta;
71   long vfy_err;
72 
73   if (errmac_debug>1) D("SSL_version(%s) cipher(%s)",SSL_get_version(io->ssl),SSL_get_cipher(io->ssl));
74 
75   vfy_err = SSL_get_verify_result(io->ssl);
76   switch (vfy_err) {
77   case X509_V_OK: break;
78   case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT:
79     D("TLS/SSL connection to(%s) made, with certificate err that will be ignored. (%ld)", eid, vfy_err);
80     zx_report_openssl_err("open_bus_url-verify_res");
81     break;
82   default:
83     ERR("TLS/SSL connection to(%s) made, but certificate not acceptable. (%ld)", eid, vfy_err);
84     zx_report_openssl_err("open_bus_url-verify_res");
85     return 0;
86   }
87   if (!(peer_cert = SSL_get_peer_certificate(io->ssl))) {
88     ERR("TLS/SSL connection to(%s) made, but peer did not send certificate", eid);
89     zx_report_openssl_err("peer_cert");
90     return 0;
91   }
92   meta = zxid_get_ent(zxbus_cf, eid);
93   if (!meta) {
94     ERR("Unable to find metadata for eid(%s) in verify peer cert", eid);
95     return 0;
96   }
97   if (!meta->enc_cert) {
98     ERR("Metadata for eid(%s) does not contain enc cert", eid);
99     return 0;
100   }
101   if (X509_cmp(meta->enc_cert, peer_cert)) {
102     ERR("Peer certificate does not match metadata for eid(%s)", eid);
103     D("compare: %d", memcmp(meta->enc_cert->sha1_hash, peer_cert->sha1_hash, SHA_DIGEST_LENGTH));
104     PEM_write_X509(ERRMAC_DEBUG_LOG, peer_cert);
105     return 0;
106   }
107 #endif
108   return 1;
109 }
110 
111 /*() Set socket to be nonblocking.
112  * Our I/O strategy (edge triggered epoll or /dev/poll) depends on nonblocking fds. */
113 
114 /* Called by: */
nonblock(int fd)115 void nonblock(int fd)
116 {
117 #ifdef MINGW
118   u_long arg = 1;
119   if (ioctlsocket(fd, FIONBIO, &arg) == SOCKET_ERROR) {
120     ERR("Unable to ioctlsocket(%d, FIONBIO, 1): %d %s", fd, errno, STRERROR(errno));
121     exit(2);
122   }
123 #else
124 #if 0
125   int fflags = fcntl(fd, F_GETFL, 0);
126   if (fflags == -1) {
127     ERR("Unable to fcntl(F_GETFL) on socket %d: %d %s", fd, errno, STRERROR(errno));
128     exit(2);
129   }
130   fflags |= O_NONBLOCK | O_NDELAY;  /* O_ASYNC would be synonymous */
131 #endif
132 
133   if( fcntl(fd, F_SETFL, O_NONBLOCK | O_NDELAY) == -1) {
134     ERR("Unable to fcntl(F_SETFL) on socket %d: %d %s", fd, errno, STRERROR(errno));
135     exit(2);
136   }
137 
138 #if 0
139   if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1) {
140     ERR("fcntl(F_SETFD,FD_CLOEXEC) system call failed for %d: %d %s", fd, errno, STRERROR(errno));
141     exit(2);
142   }
143 #endif
144 #endif
145 }
146 
147 /* Tweaking kernel buffers to be smaller can be a win if a massive number
148  * of connections are simultaneously open. On many systems default buffer
149  * size is 64KB in each direction, leading to 128KB memory consumption. Tweaking
150  * to only, say, 8KB can bring substantial savings (but may hurt TCP performance). */
151 
152 /* Called by: */
setkernelbufsizes(int fd,int tx,int rx)153 void setkernelbufsizes(int fd, int tx, int rx)
154 {
155   /* See `man 7 tcp' for TCP_CORK, TCP_NODELAY, etc. */
156   if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, (char*)&tx, sizeof(tx)) == -1) {
157     ERR("setsockopt(SO_SNDBUF, %d) on fd=%d: %d %s", tx, fd, errno, STRERROR(errno));
158     exit(2);
159   }
160   if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, (char*)&rx, sizeof(rx)) == -1) {
161     ERR("setsockopt(SO_RCVBUF, %d) on fd=%d: %d %s", rx, fd, errno, STRERROR(errno));
162     exit(2);
163   }
164 }
165 
166 extern int nkbuf;
167 extern int listen_backlog;
168 
169 /* Called by:  zxbusd_main */
hi_open_listener(struct hiios * shf,struct hi_host_spec * hs,int proto)170 struct hi_io* hi_open_listener(struct hiios* shf, struct hi_host_spec* hs, int proto)
171 {
172   struct hi_io* io;
173   int fd, tmp;
174   /* socket(domain,type,proto): leaving proto as 0 chooses the appropriate
175      one given domain and type, see man 2 socket, near middle. */
176   if ((fd = socket(AF_INET, SOCK_STREAM, 0))== -1) {
177     ERR("listen: Unable to create socket(AF_INET, SOCK_STREAM, 0) %d %s", errno, STRERROR(errno));
178     return 0;
179   }
180   if (fd >= shf->max_ios) {
181     ERR("listen: File descriptor limit(%d) exceeded fd=%d. Consider increasing the limit with -nfd flag, or figure out if there are any descriptor leaks.", shf->max_ios, fd);
182     close(fd);
183     return 0;
184   }
185   nonblock(fd);
186   if (nkbuf)
187     setkernelbufsizes(fd, nkbuf, nkbuf);
188 
189   tmp = 1;
190   if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&tmp, sizeof(tmp)) == -1) {
191     ERR("listen: Failed to call setsockopt(REUSEADDR) on %d: %d %s", fd, errno, STRERROR(errno));
192     exit(2);
193   }
194 
195   if (bind(fd, (struct sockaddr*)&hs->sin, sizeof(struct sockaddr_in))) {
196     ERR("listen: Unable to bind socket %d (%s): %d %s (trying again in 2 secs)",
197 	fd, hs->specstr, errno, STRERROR(errno));
198     /* It appears to be a problem under 2.5.7x series kernels that if you kill a process that
199      * was listening to a port, you can not immediately bind on that same port again. */
200     sleep(2);
201     if (bind (fd, (struct sockaddr*)&hs->sin, sizeof(struct sockaddr_in))) {
202       ERR("listen: Unable to bind socket %d (%s): %d %s (giving up)",
203 	  fd, hs->specstr, errno, STRERROR(errno));
204       close(fd);
205       return 0;
206     }
207   }
208 
209   if (listen(fd, listen_backlog)) {
210     ERR("Unable to listen(%d, %d) (%s): %d %s",
211 	fd, listen_backlog, hs->specstr, errno, STRERROR(errno));
212     close(fd);
213     return 0;
214   }
215 
216   io = shf->ios + fd;
217 
218 #ifdef LINUX
219   {
220     struct epoll_event ev;
221     ev.events = EPOLLIN | EPOLLET;  /* ET == EdgeTriggered */
222     ev.data.ptr = io;
223     if (epoll_ctl(shf->ep, EPOLL_CTL_ADD, fd, &ev)) {
224       ERR("Unable to epoll_ctl(%d) (%s): %d %s", fd, hs->specstr, errno, STRERROR(errno));
225       close(fd);
226       return 0;
227     }
228   }
229 #endif
230 #ifdef SUNOS
231   {
232     struct pollfd pfd;
233     pfd.fd = fd;
234     pfd.events = POLLIN | POLLERR;
235     if (write(shf->ep, &pfd, sizeof(pfd)) == -1) {
236       ERR("Unable to write to /dev/poll fd(%d) (%s): %d %s", fd, hs->specstr, errno, STRERROR(errno));
237       close(fd);
238       return 0;
239     }
240   }
241 #endif
242 #if defined(MACOSX) || defined(FREEBSD)
243   {
244     struct kevent kev;
245     EV_SET(kev, fd, EVFILT_READ, EV_ADD, 0, 0, &zero_timeout);
246     if (kevent(hit->shf->ep, &kev, 1, 0,0,0) == -1) {
247       ERR("kevent: fd(%d): %d %s", fd, errno, STRERROR(errno));
248       close(fd);
249       return 0;
250     }
251   }
252 #endif
253 
254   io->fd = fd;
255   io->qel.kind = HI_LISTENT;
256   io->qel.proto = proto;
257   D("listen(%x) hs(%s)", fd, hs->specstr);
258   return io;
259 }
260 
261 #if defined(MACOSX) || defined(FREEBSD)
262 const struct timespec* zero_timeout = {0,0};
263 #endif
264 
265 /*() When poll marker is consumed from the todo, perform OS dependent epoll(2) or similar. */
266 
267 /* Called by:  hi_shuffle */
hi_poll(struct hi_thr * hit)268 void hi_poll(struct hi_thr* hit)
269 {
270   struct hi_io* io;
271   int i;
272   D("epoll(%x)", hit->shf->ep);
273  retry:
274 #ifdef LINUX
275   hit->shf->n_evs = epoll_wait(hit->shf->ep, hit->shf->evs, hit->shf->max_evs, -1);
276   if (hit->shf->n_evs == -1) {
277     if (errno == EINTR) {
278       D("EINTR fd(%x)", hit->shf->ep);
279       goto retry;
280     }
281     ERR("epoll_wait(%x): %d %s", hit->shf->ep, errno, STRERROR(errno));
282     return;
283   }
284   for (i = 0; i < hit->shf->n_evs; ++i) {
285     io = (struct hi_io*)hit->shf->evs[i].data.ptr;
286     io->events = hit->shf->evs[i].events;
287     /* *** Should the todo_mut lock be batched? The advantage might not be big
288      * as we need to either do pthread_cond_signal(3) to wake up one worker
289      * or pthread_cond_broadcast(3) to wake them up all, which may be overkill.
290      * N.B. hi_todo_produce() has logic to avoid enqueuing io that is closed. */
291     hi_todo_produce(hit, &io->qel, "poll", 1);
292   }
293 #endif
294 #ifdef SUNOS
295   {
296     struct dvpoll dp;
297     dp.dp_timeout = -1;
298     dp.dp_nfds = hit->shf->max_evs;
299     dp.dp_fds = hit->shf->evs;
300     hit->shf->n_evs = ioctl(hit->shf->ep, DP_POLL, &dp);
301     if (hit->shf->n_evs < 0) {
302       if (errno == EINTR) {
303 	D("EINTR fd(%x)", hit->shf->ep);
304 	goto retry;
305       }
306       ERR("/dev/poll ioctl(%x): %d %s", hit->shf->ep, errno, STRERROR(errno));
307       return;
308     }
309     for (i = 0; i < hit->shf->n_evs; ++i) {
310       io = hit->shf->ios + hit->shf->evs[i].fd;
311       io->events = hit->shf->evs[i].revents;
312       /* Poll says work is possible: sched wk for io if not under wk yet, or cur_pdu needs wk. */
313       /*if (!io->cur_pdu || io->cur_pdu->need)   *** cur_pdu is always set */
314       hi_todo_produce(hit, &io->qel, "poll", 1); /* *** should the todo_mut lock be batched? */
315     }
316   }
317 #endif
318 #if defined(MACOSX) || defined(FREEBSD)
319   hit->shf->n_evs = kevent(hit->shf->ep, 0,0, hit->shf->evs, hit->shf->max_evs, &zero_timeout);
320   if (hit->shf->n_evs == -1) {
321     if (errno == EINTR) {
322       D("EINTR fd(%x)", hit->shf->ep);
323       goto retry;
324     }
325     ERR("epoll_wait(%x): %d %s", hit->shf->ep, errno, STRERROR(errno));
326     return;
327   }
328   /* *** double check whether the evs (array of kevents) is in filedescriptor order */
329   for (i = 0; i < hit->shf->n_evs; ++i) {
330     io = (struct hi_io*)hit->shf->evs[i].data.ptr;
331     io->events = hit->shf->evs[i].events;
332     /* *** Should the todo_mut lock be batched? The advantage might not be big
333      * as we need to either do pthread_cond_signal(3) to wake up one worker
334      * or pthread_cond_broadcast(3) to wake them up all, which may be overkill.
335      * N.B. hi_todo_produce() has logic to avoid enqueuing io that is closed. */
336     hi_todo_produce(hit, &io->qel, "poll", 1);
337   }
338 #endif
339   LOCK(hit->shf->todo_mut, "todo_poll");
340   D("POLL LK&UNLK todo_mut.thr=%lx repoll", (long)hit->shf->todo_mut.thr);
341   hit->shf->poll_tok.proto = HIPROTO_POLL_ON;  /* special "on" flag - not a real protocol */
342   UNLOCK(hit->shf->todo_mut, "todo_poll");
343 }
344 
345 /*() Add file descriptor to poll
346  * locking:: must be called inside io->qel.mut */
347 
348 /* Called by:  hi_accept_book, hi_open_tcp x2, serial_init */
hi_add_fd(struct hi_thr * hit,struct hi_io * io,int fd,int kind)349 struct hi_io* hi_add_fd(struct hi_thr* hit, struct hi_io* io, int fd, int kind)
350 {
351   ASSERTOPI(fd, <, hit->shf->max_ios);
352   ASSERTOPI(io->n_thr, ==, 0);
353   ++io->n_thr;  /* May be returned by poll at any time, thus there is "virtual thread" */
354 
355 #ifdef LINUX
356   {
357     struct epoll_event ev;
358     ev.events = EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP | EPOLLET;  /* ET == EdgeTriggered */
359     ev.data.ptr = io;
360     if (epoll_ctl(hit->shf->ep, EPOLL_CTL_ADD, fd, &ev)) {
361       ERR("Unable to epoll_ctl(%d): %d %s", fd, errno, STRERROR(errno));
362 #ifdef USE_OPENSSL
363       if (io->ssl) {
364 	SSL_free(io->ssl);
365 	io->ssl = 0;
366       }
367 #endif
368       close(fd);
369       return 0;
370     }
371   }
372 #endif
373 #ifdef SUNOS
374   {
375     struct pollfd pfd;
376     pfd.fd = fd;
377     pfd.events = POLLIN | POLLOUT | POLLERR | POLLHUP;
378     if (write(hit->shf->ep, &pfd, sizeof(pfd)) == -1) {
379       ERR("Unable to write to /dev/poll fd(%d): %d %s", fd, errno, STRERROR(errno));
380 #ifdef USE_OPENSSL
381       if (io->ssl) {
382 	SSL_free(io->ssl);
383 	io->ssl = 0;
384       }
385 #endif
386       close(fd);
387       return 0;
388     }
389   }
390 #endif
391 
392 #if defined(MACOSX) || defined(FREEBSD)
393   {
394     struct kevent kev;
395     EV_SET(kev, fd, EVFILT_READ | EVFILT_WRITE, EV_ADD, 0, 0, &zero_timeout);
396     if (kevent(hit->shf->ep, &kev, 1, 0,0,0) == -1) {
397       ERR("kevent: fd(%d): %d %s", fd, errno, STRERROR(errno));
398 #ifdef USE_OPENSSL
399       if (io->ssl) {
400 	SSL_free(io->ssl);
401 	io->ssl = 0;
402       }
403 #endif
404       close(fd);
405       return 0;
406     }
407   }
408 #endif
409 
410   /* memset(io, 0, sizeof(struct hi_io)); *** MUST NOT clear as there are important fields like cur_pdu and lock initializations already set. All memory was zeroed in hi_new_shuff(). After that all changes should be field by field. */
411   ASSERTOPI(io->writing, ==, 0);
412   ASSERTOPI(io->reading, ==, 0);
413   ASSERTOPI(io->n_to_write, ==, 0);
414   ASSERTOPP(io->in_write, ==, 0);
415   ASSERTOPP(io->to_write_consume, ==, 0);
416   ASSERTOPP(io->to_write_produce, ==, 0);
417   ASSERT(io->cur_pdu);  /* cur_pdu is always set to some value */
418   ASSERTOPP(io->reqs, ==, 0);
419   ASSERTOPP(io->pending, ==, 0);
420   ASSERTOPI(io->qel.intodo, ==, HI_INTODO_SHF_FREE);
421   io->qel.intodo = HI_INTODO_IOINUSE;
422   //io->ap = io->m;       /* Nothing read yet */
423   io->ent = 0;          /* Not authenticated yet */
424   io->qel.kind = kind;
425   io->fd = fd;          /* This change marks the slot as used in the big table. */
426   return io;
427 }
428 
429 /*() Remove files descriptor from poll. */
430 
431 /* Called by:  hi_close */
hi_del_fd(struct hi_thr * hit,int fd)432 void hi_del_fd(struct hi_thr* hit, int fd)
433 {
434   ASSERTOPI(fd, <, hit->shf->max_ios);
435 #ifdef LINUX
436   {
437     if (epoll_ctl(hit->shf->ep, EPOLL_CTL_DEL, fd&0x7fffffff, 0)) {
438       ERR("Unable to epoll_ctl(%x): %d %s", fd, errno, STRERROR(errno));
439       /* N.B. Even if it fails, do not close the fd as we depend on that as synchronization. */
440     }
441   }
442 #endif
443 #ifdef SUNOS
444   {
445     struct pollfd pfd;
446     pfd.fd = fd&0x7fffffff;
447     pfd.events = 0 /*POLLIN | POLLOUT | POLLERR | POLLHUP*/; /* *** not sure if this is right */
448     if (write(hit->shf->ep, &pfd, sizeof(pfd)) == -1) {
449       ERR("Unable to write to /dev/poll fd(%x): %d %s", fd, errno, STRERROR(errno));
450     }
451   }
452 #endif
453 #if defined(MACOSX) || defined(FREEBSD)
454   {
455     struct kevent kev;
456     EV_SET(kev, fd&0x7fffffff, EVFILT_READ | EVFILT_WRITE, EV_DEL, 0, 0, &zero_timeout);
457     if (kevent(hit->shf->ep, &kev, 1, 0,0,0) == -1) {
458       ERR("kevent: fd(%d): %d %s", fd, errno, STRERROR(errno));
459     }
460   }
461 #endif
462 }
463 
464 /*() Create client socket. */
465 
466 /* Called by:  smtp_send, zxbusd_main */
hi_open_tcp(struct hi_thr * hit,struct hi_host_spec * hs,int proto)467 struct hi_io* hi_open_tcp(struct hi_thr* hit, struct hi_host_spec* hs, int proto)
468 {
469   struct hi_io* io;
470   int fd;
471   if ((fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
472     ERR("Unable to create socket(AF_INET, SOCK_STREAM, 0) %d %s", errno, STRERROR(errno));
473     return 0;
474   }
475 
476   if (fd >= hit->shf->max_ios) {
477     ERR("File descriptor limit(%d) exceeded fd=%d. Consider increasing the limit with -nfd flag, or figure out if there are any descriptor leaks.", hit->shf->max_ios, fd);
478     goto errout;
479   }
480   io = hit->shf->ios + fd;
481   io->qel.proto = proto;
482 
483   nonblock(fd);
484   if (nkbuf)
485     setkernelbufsizes(fd, nkbuf, nkbuf);
486 
487   if ((connect(fd, (struct sockaddr*)&hs->sin, sizeof(hs->sin)) == -1)
488       && (errno != EINPROGRESS)) {
489     ERR("Connection to %s failed: %d %s", hs->specstr, errno, STRERROR(errno));
490     goto errout;
491   }
492 
493   D("connect(%x) hs(%s)", fd, hs->specstr);
494   /*SSL_CTX_add_extra_chain_cert(hit->shf->ssl_ctx, ca_cert);*/
495 
496 #ifdef USE_OPENSSL
497   if (hi_prototab[proto].is_tls) {
498     --io->qel.proto;  /* Nonssl protocol is always one smaller than SSL variant. */
499     io->ssl = SSL_new(hit->shf->ssl_ctx);
500     if (!io->ssl) {
501       ERR("TLS/SSL connect to(%s): SSL object initialization problem", hs->specstr);
502       zx_report_openssl_err("open_tcp-ssl");
503       goto errout;
504     }
505     if (!SSL_set_fd(io->ssl, fd)) {
506       ERR("TLS/SSL connect to(%s): SSL fd(%x) initialization problem", hs->specstr, fd);
507       zx_report_openssl_err("open_tcp-set_fd");
508       goto sslerrout;
509     }
510 
511 #ifdef SSL_IMMEDIATE
512     switch (err = SSL_get_error(io->ssl, SSL_connect(io->ssl))) {
513     case SSL_ERROR_NONE: break;
514       /*case SSL_ERROR_WANT_ACCEPT:  documented, but undeclared */
515     case SSL_ERROR_WANT_READ:
516     case SSL_ERROR_WANT_CONNECT:
517     case SSL_ERROR_WANT_WRITE: break;
518     default:
519       ERR("TLS/SSL connect to(%s): handshake problem (%d)", hs->specstr, err);
520       zx_report_openssl_err("open_tcp-ssl_connect");
521       write(fd, SSL_ENCRYPTED_HINT, sizeof(SSL_ENCRYPTED_HINT)-1);
522       goto sslerrout;
523     }
524     if (!hi_vfy_peer_ssl_cred(hit, io, hs->specstr))
525       goto sslerrout;
526 #else
527     SSL_set_connect_state(io->ssl);
528     /* *** how/when to hi_vfy_peer_ssl_cred() ? */
529 #endif
530   }
531   LOCK(io->qel.mut, "hi_open_tcp");
532   hi_add_fd(hit, io, fd, HI_TCP_C);
533   UNLOCK(io->qel.mut, "hi_open_tcp");
534   return io;
535  sslerrout:
536   if (io->ssl) {
537     SSL_free(io->ssl);
538     io->ssl = 0;
539   }
540 #else
541   io->ssl = 0;
542   LOCK(io->qel.mut, "hi_open_tcp-2");
543   hi_add_fd(hit, io, fd, HI_TCP_C);
544   UNLOCK(io->qel.mut, "hi_open_tcp-2");
545   return io;
546 #endif
547  errout:
548   close(fd);
549   return 0;
550 }
551 
552 /*() Process half accepted socket (already accepted at socket layer, but
553  * not yet booked in our data structures - perhaps due to delayed
554  * booking used to cope with threads that are still looking at
555  * the old connection. */
556 
557 /* Called by:  hi_accept, hi_shuffle */
hi_accept_book(struct hi_thr * hit,struct hi_io * io,int fd)558 void hi_accept_book(struct hi_thr* hit, struct hi_io* io, int fd)
559 {
560   int n_thr;
561   struct hi_io* nio;
562 
563 #ifdef USE_OPENSSL
564   io->ssl = 0;
565   D("proto(%d), is_tls=%d", io->qel.proto, hi_prototab[io->qel.proto].is_tls);
566   if (hi_prototab[io->qel.proto].is_tls) {
567     --io->qel.proto;  /* Non SSL protocol is always one smaller */
568     D("SSL proto(%d)", io->qel.proto);
569     io->ssl = SSL_new(hit->shf->ssl_ctx);
570     if (!io->ssl) {
571       ERR("TLS/SSL accept: SSL object initialization problem %d", 0);
572       zx_report_openssl_err("accept-ssl");
573       goto errout;
574     }
575     if (!SSL_set_fd(io->ssl, fd)) {
576       ERR("TLS/SSL accept: fd(%x) SSL initialization problem", fd);
577       zx_report_openssl_err("accept-set_fd");
578       goto sslerrout;
579     }
580 
581 #ifdef SSL_IMMEDIATE
582     switch (err = SSL_get_error(io->ssl, SSL_accept(io->ssl))) {
583     case SSL_ERROR_NONE: break;
584       /*case SSL_ERROR_WANT_ACCEPT:  documented, but undeclared */
585     case SSL_ERROR_WANT_READ:
586     case SSL_ERROR_WANT_CONNECT:
587     case SSL_ERROR_WANT_WRITE: break;
588     default:
589       ERR("TLS/SSL accept: connect or handshake problem (%d)", err);
590       zx_report_openssl_err("accept-ssl_accept");
591       write(fd, SSL_ENCRYPTED_HINT, sizeof(SSL_ENCRYPTED_HINT)-1);
592       goto sslerrout;
593     }
594 #else
595     SSL_set_accept_state(io->ssl);
596 #endif
597   }
598 #endif
599 
600   /* We may accept new connection with same fd as an old one before all references
601    * to the old one are gone. We could try reference counting - or we can delay
602    * fully closing the fd before every reference has gone away.
603    * *** Arguably this should never happen due to our half close strategy
604    * keeping the fd occupied until all threads really are gone. */
605   LOCK(io->qel.mut, "hi_accept");
606   D("ACCEPT LK&UNLK io(%x)->qel.thr=%lx", fd, (long)io->qel.mut.thr);
607   n_thr = io->n_thr;
608   if (n_thr) {
609     D("old fd(%x) n_thr=%d still going", fd, n_thr);
610     NEVERNEVER("NOT POSSIBLE due to half close n_thr=%d", n_thr);
611     io->qel.kind = HI_HALF_ACCEPT;
612     UNLOCK(io->qel.mut, "hi_accept-fail");
613     hi_todo_produce(hit, &io->qel, "accept", 0);  /* schedule a new try */
614     return;
615   }
616 
617   nio = hi_add_fd(hit, io, fd, HI_TCP_S);
618   UNLOCK(io->qel.mut, "hi_accept");
619   if (!nio || nio != io) {
620     ERR("Adding fd failed: io=%p nio=%p", io, nio);
621     goto sslerrout;
622   }
623   INFO("ACCEPTed and booked(%x)", io->fd);  /* add IP and port of client */
624 
625   switch (io->qel.proto) {
626   case HIPROTO_STOMP:
627     /* *** Go straight to reading STOMP/CONNECT pdu without passing through TODO */
628     break;
629   case HIPROTO_SMTP: /* In SMTP, server starts speaking first */
630     hi_sendf(hit, io, 0, 0, "220 %s smtp ready\r\n", SMTP_GREET_DOMAIN);
631     io->ad.smtp.state = SMTP_START;
632     break;
633 #ifdef ENA_S5066
634   case HIPROTO_DTS:
635     {
636       struct hi_host_spec* hs;
637       ZMALLOC(io->ad.dts);
638       io->ad.dts->remote_station_addr[0] = 0x61;   /* three nibbles long (padded with zeroes) */
639       io->ad.dts->remote_station_addr[1] = 0x45;
640       io->ad.dts->remote_station_addr[2] = 0x00;
641       io->ad.dts->remote_station_addr[3] = 0x00;
642       if (!(hs = hi_prototab[HIPROTO_DTS].specs)) {
643 	ZMALLOC(hs);
644 	hs->proto = HIPROTO_DTS;
645 	hs->specstr = "dts:accepted:connections";
646 	hs->next = hi_prototab[HIPROTO_DTS].specs;
647 	hi_prototab[HIPROTO_DTS].specs = hs;
648       }
649       io->n = hs->conns;
650       hs->conns = io;
651     }
652     break;
653 #endif
654   }
655   return;
656 
657  sslerrout:
658 #ifdef USE_OPENSSL
659   if (io->ssl) {
660     SSL_shutdown(io->ssl);
661     SSL_free(io->ssl);
662     io->ssl = 0;
663   }
664 #endif
665  errout:
666   close(fd);
667 }
668 
669 /*() Create server side worker socket by accept(2)ing from listener socket. */
670 
671 /* Called by:  hi_shuffle */
hi_accept(struct hi_thr * hit,struct hi_io * listener)672 void hi_accept(struct hi_thr* hit, struct hi_io* listener)
673 {
674   struct hi_io* io;
675   struct sockaddr_in sa;
676   int fd;
677   size_t size;
678   size = sizeof(sa);
679   D("accept from(%x)", listener->fd);
680   if ((fd = accept(listener->fd, (struct sockaddr*)&sa, &size)) == -1) {
681     if (errno != EAGAIN)
682       ERR("Unable to accept from %x: %d %s", listener->fd, errno, STRERROR(errno));
683     else
684       D("accept(%x): EAGAIN", listener->fd);
685     return;
686   }
687   if (fd >= hit->shf->max_ios) {
688     ERR("accept: File descriptor limit(%d) exceeded fd=%d. Consider increasing the limit with -nfd flag, or figure out if there are any descriptor leaks.", hit->shf->max_ios, fd);
689     close(fd);
690     return;
691   }
692   nonblock(fd);
693   if (nkbuf)
694     setkernelbufsizes(fd, nkbuf, nkbuf);
695 
696   ++listener->n_read;  /* n_read counter is used for accounting accepts */
697   io = hit->shf->ios + fd;
698   io->qel.proto = listener->qel.proto;
699   hi_accept_book(hit, io, fd);
700   hi_todo_produce(hit, &listener->qel, "relisten", 0); /* Must exhaust accept: reenqueue (could also loop) */
701 }
702 
703 /* EOF  --  hinet.c */
704