1 /*
2  * nghttp2 - HTTP/2 C Library
3  *
4  * Copyright (c) 2015 Tatsuhiro Tsujikawa
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 #include "shrpx_connection.h"
26 
27 #ifdef HAVE_UNISTD_H
28 #  include <unistd.h>
29 #endif // HAVE_UNISTD_H
30 #include <netinet/tcp.h>
31 
32 #include <limits>
33 
34 #include <openssl/err.h>
35 
36 #include "shrpx_tls.h"
37 #include "shrpx_memcached_request.h"
38 #include "shrpx_log.h"
39 #include "memchunk.h"
40 #include "util.h"
41 #include "ssl_compat.h"
42 
43 using namespace nghttp2;
44 
45 namespace shrpx {
46 
47 #if !LIBRESSL_2_7_API && !OPENSSL_1_1_API
48 
BIO_get_data(BIO * bio)49 void *BIO_get_data(BIO *bio) { return bio->ptr; }
BIO_set_data(BIO * bio,void * ptr)50 void BIO_set_data(BIO *bio, void *ptr) { bio->ptr = ptr; }
BIO_set_init(BIO * bio,int init)51 void BIO_set_init(BIO *bio, int init) { bio->init = init; }
52 
53 #endif // !LIBRESSL_2_7_API && !OPENSSL_1_1_API
54 
Connection(struct ev_loop * loop,int fd,SSL * ssl,MemchunkPool * mcpool,ev_tstamp write_timeout,ev_tstamp read_timeout,const RateLimitConfig & write_limit,const RateLimitConfig & read_limit,IOCb writecb,IOCb readcb,TimerCb timeoutcb,void * data,size_t tls_dyn_rec_warmup_threshold,ev_tstamp tls_dyn_rec_idle_timeout,Proto proto)55 Connection::Connection(struct ev_loop *loop, int fd, SSL *ssl,
56                        MemchunkPool *mcpool, ev_tstamp write_timeout,
57                        ev_tstamp read_timeout,
58                        const RateLimitConfig &write_limit,
59                        const RateLimitConfig &read_limit, IOCb writecb,
60                        IOCb readcb, TimerCb timeoutcb, void *data,
61                        size_t tls_dyn_rec_warmup_threshold,
62                        ev_tstamp tls_dyn_rec_idle_timeout, Proto proto)
63     : tls{DefaultMemchunks(mcpool), DefaultPeekMemchunks(mcpool),
64           DefaultMemchunks(mcpool)},
65       wlimit(loop, &wev, write_limit.rate, write_limit.burst),
66       rlimit(loop, &rev, read_limit.rate, read_limit.burst, this),
67       loop(loop),
68       data(data),
69       fd(fd),
70       tls_dyn_rec_warmup_threshold(tls_dyn_rec_warmup_threshold),
71       tls_dyn_rec_idle_timeout(tls_dyn_rec_idle_timeout),
72       proto(proto),
73       last_read(0.),
74       read_timeout(read_timeout) {
75 
76   ev_io_init(&wev, writecb, fd, EV_WRITE);
77   ev_io_init(&rev, readcb, proto == Proto::HTTP3 ? 0 : fd, EV_READ);
78 
79   wev.data = this;
80   rev.data = this;
81 
82   ev_timer_init(&wt, timeoutcb, 0., write_timeout);
83   ev_timer_init(&rt, timeoutcb, 0., read_timeout);
84 
85   wt.data = this;
86   rt.data = this;
87 
88   // set 0. to double field explicitly just in case
89   tls.last_write_idle = 0.;
90 
91   if (ssl) {
92     set_ssl(ssl);
93   }
94 }
95 
~Connection()96 Connection::~Connection() { disconnect(); }
97 
disconnect()98 void Connection::disconnect() {
99   if (tls.ssl) {
100     SSL_set_shutdown(tls.ssl,
101                      SSL_get_shutdown(tls.ssl) | SSL_RECEIVED_SHUTDOWN);
102     ERR_clear_error();
103 
104     if (tls.cached_session) {
105       SSL_SESSION_free(tls.cached_session);
106       tls.cached_session = nullptr;
107     }
108 
109     if (tls.cached_session_lookup_req) {
110       tls.cached_session_lookup_req->canceled = true;
111       tls.cached_session_lookup_req = nullptr;
112     }
113 
114     SSL_shutdown(tls.ssl);
115     SSL_free(tls.ssl);
116     tls.ssl = nullptr;
117 
118     tls.wbuf.reset();
119     tls.rbuf.reset();
120     tls.last_write_idle = 0.;
121     tls.warmup_writelen = 0;
122     tls.last_writelen = 0;
123     tls.last_readlen = 0;
124     tls.handshake_state = TLSHandshakeState::NORMAL;
125     tls.initial_handshake_done = false;
126     tls.reneg_started = false;
127     tls.sct_requested = false;
128     tls.early_data_finish = false;
129   }
130 
131   if (proto != Proto::HTTP3 && fd != -1) {
132     shutdown(fd, SHUT_WR);
133     close(fd);
134     fd = -1;
135   }
136 
137   // Stop watchers here because they could be activated in
138   // SSL_shutdown().
139   ev_timer_stop(loop, &rt);
140   ev_timer_stop(loop, &wt);
141 
142   rlimit.stopw();
143   wlimit.stopw();
144 }
145 
prepare_client_handshake()146 void Connection::prepare_client_handshake() {
147   SSL_set_connect_state(tls.ssl);
148   // This prevents SSL_read_early_data from being called.
149   tls.early_data_finish = true;
150 }
151 
prepare_server_handshake()152 void Connection::prepare_server_handshake() {
153   SSL_set_accept_state(tls.ssl);
154   tls.server_handshake = true;
155 }
156 
157 // BIO implementation is inspired by openldap implementation:
158 // http://www.openldap.org/devel/cvsweb.cgi/~checkout~/libraries/libldap/tls_o.c
159 namespace {
shrpx_bio_write(BIO * b,const char * buf,int len)160 int shrpx_bio_write(BIO *b, const char *buf, int len) {
161   if (buf == nullptr || len <= 0) {
162     return 0;
163   }
164 
165   auto conn = static_cast<Connection *>(BIO_get_data(b));
166   auto &wbuf = conn->tls.wbuf;
167 
168   BIO_clear_retry_flags(b);
169 
170   if (conn->tls.initial_handshake_done) {
171     // After handshake finished, send |buf| of length |len| to the
172     // socket directly.
173 
174     // Only when TLS session was prematurely ended before server sent
175     // all handshake message, this condition is true.  This could be
176     // alert from SSL_shutdown().  Since connection is already down,
177     // just return error.
178     if (wbuf.rleft()) {
179       return -1;
180     }
181     auto nwrite = conn->write_clear(buf, len);
182     if (nwrite < 0) {
183       return -1;
184     }
185 
186     if (nwrite == 0) {
187       BIO_set_retry_write(b);
188       return -1;
189     }
190 
191     return nwrite;
192   }
193 
194   wbuf.append(buf, len);
195 
196   return len;
197 }
198 } // namespace
199 
200 namespace {
shrpx_bio_read(BIO * b,char * buf,int len)201 int shrpx_bio_read(BIO *b, char *buf, int len) {
202   if (buf == nullptr || len <= 0) {
203     return 0;
204   }
205 
206   auto conn = static_cast<Connection *>(BIO_get_data(b));
207   auto &rbuf = conn->tls.rbuf;
208 
209   BIO_clear_retry_flags(b);
210 
211   if (conn->tls.initial_handshake_done && rbuf.rleft() == 0) {
212     auto nread = conn->read_clear(buf, len);
213     if (nread < 0) {
214       return -1;
215     }
216     if (nread == 0) {
217       BIO_set_retry_read(b);
218       return -1;
219     }
220     return nread;
221   }
222 
223   if (rbuf.rleft() == 0) {
224     BIO_set_retry_read(b);
225     return -1;
226   }
227 
228   return rbuf.remove(buf, len);
229 }
230 } // namespace
231 
232 namespace {
shrpx_bio_puts(BIO * b,const char * str)233 int shrpx_bio_puts(BIO *b, const char *str) {
234   return shrpx_bio_write(b, str, strlen(str));
235 }
236 } // namespace
237 
238 namespace {
shrpx_bio_gets(BIO * b,char * buf,int len)239 int shrpx_bio_gets(BIO *b, char *buf, int len) { return -1; }
240 } // namespace
241 
242 namespace {
shrpx_bio_ctrl(BIO * b,int cmd,long num,void * ptr)243 long shrpx_bio_ctrl(BIO *b, int cmd, long num, void *ptr) {
244   switch (cmd) {
245   case BIO_CTRL_FLUSH:
246     return 1;
247   }
248 
249   return 0;
250 }
251 } // namespace
252 
253 namespace {
shrpx_bio_create(BIO * b)254 int shrpx_bio_create(BIO *b) {
255 #if OPENSSL_1_1_API
256   BIO_set_init(b, 1);
257 #else  // !OPENSSL_1_1_API
258   b->init = 1;
259   b->num = 0;
260   b->ptr = nullptr;
261   b->flags = 0;
262 #endif // !OPENSSL_1_1_API
263   return 1;
264 }
265 } // namespace
266 
267 namespace {
shrpx_bio_destroy(BIO * b)268 int shrpx_bio_destroy(BIO *b) {
269   if (b == nullptr) {
270     return 0;
271   }
272 
273 #if !OPENSSL_1_1_API
274   b->ptr = nullptr;
275   b->init = 0;
276   b->flags = 0;
277 #endif // !OPENSSL_1_1_API
278 
279   return 1;
280 }
281 } // namespace
282 
283 #if OPENSSL_1_1_API
284 
create_bio_method()285 BIO_METHOD *create_bio_method() {
286   auto meth = BIO_meth_new(BIO_TYPE_FD, "nghttpx-bio");
287   BIO_meth_set_write(meth, shrpx_bio_write);
288   BIO_meth_set_read(meth, shrpx_bio_read);
289   BIO_meth_set_puts(meth, shrpx_bio_puts);
290   BIO_meth_set_gets(meth, shrpx_bio_gets);
291   BIO_meth_set_ctrl(meth, shrpx_bio_ctrl);
292   BIO_meth_set_create(meth, shrpx_bio_create);
293   BIO_meth_set_destroy(meth, shrpx_bio_destroy);
294 
295   return meth;
296 }
297 
298 #else // !OPENSSL_1_1_API
299 
create_bio_method()300 BIO_METHOD *create_bio_method() {
301   static auto meth = new BIO_METHOD{
302       BIO_TYPE_FD,    "nghttpx-bio",    shrpx_bio_write,
303       shrpx_bio_read, shrpx_bio_puts,   shrpx_bio_gets,
304       shrpx_bio_ctrl, shrpx_bio_create, shrpx_bio_destroy,
305   };
306 
307   return meth;
308 }
309 
310 #endif // !OPENSSL_1_1_API
311 
set_ssl(SSL * ssl)312 void Connection::set_ssl(SSL *ssl) {
313   tls.ssl = ssl;
314 
315   if (proto != Proto::HTTP3) {
316     auto &tlsconf = get_config()->tls;
317     auto bio = BIO_new(tlsconf.bio_method);
318     BIO_set_data(bio, this);
319     SSL_set_bio(tls.ssl, bio, bio);
320   }
321 
322   SSL_set_app_data(tls.ssl, this);
323 }
324 
325 namespace {
326 // We should buffer at least full encrypted TLS record here.
327 // Theoretically, peer can send client hello in several TLS records,
328 // which could exceed this limit, but it is not portable, and we don't
329 // have to handle such exotic behaviour.
read_buffer_full(DefaultPeekMemchunks & rbuf)330 bool read_buffer_full(DefaultPeekMemchunks &rbuf) {
331   return rbuf.rleft_buffered() >= 20_k;
332 }
333 } // namespace
334 
tls_handshake()335 int Connection::tls_handshake() {
336   wlimit.stopw();
337   ev_timer_stop(loop, &wt);
338 
339   std::array<uint8_t, 16_k> buf;
340 
341   if (ev_is_active(&rev)) {
342     auto nread = read_clear(buf.data(), buf.size());
343     if (nread < 0) {
344       if (LOG_ENABLED(INFO)) {
345         LOG(INFO) << "tls: handshake read error";
346       }
347       return -1;
348     }
349     tls.rbuf.append(buf.data(), nread);
350     if (read_buffer_full(tls.rbuf)) {
351       rlimit.stopw();
352     }
353   }
354 
355   if (tls.initial_handshake_done) {
356     return write_tls_pending_handshake();
357   }
358 
359   switch (tls.handshake_state) {
360   case TLSHandshakeState::WAIT_FOR_SESSION_CACHE:
361     return SHRPX_ERR_INPROGRESS;
362   case TLSHandshakeState::GOT_SESSION_CACHE: {
363     // Use the same trick invented by @kazuho in h2o project.
364 
365     // Discard all outgoing data.
366     tls.wbuf.reset();
367     // Rewind buffered incoming data to replay client hello.
368     tls.rbuf.disable_peek(false);
369 
370     auto ssl_ctx = SSL_get_SSL_CTX(tls.ssl);
371     auto ssl_opts = SSL_get_options(tls.ssl);
372     SSL_free(tls.ssl);
373 
374     auto ssl = tls::create_ssl(ssl_ctx);
375     if (!ssl) {
376       return -1;
377     }
378     if (ssl_opts & SSL_OP_NO_TICKET) {
379       SSL_set_options(ssl, SSL_OP_NO_TICKET);
380     }
381 
382     set_ssl(ssl);
383 
384     SSL_set_accept_state(tls.ssl);
385 
386     tls.handshake_state = TLSHandshakeState::NORMAL;
387     break;
388   }
389   case TLSHandshakeState::CANCEL_SESSION_CACHE:
390     tls.handshake_state = TLSHandshakeState::NORMAL;
391     break;
392   default:
393     break;
394   }
395 
396   int rv;
397 
398   ERR_clear_error();
399 
400 #if OPENSSL_1_1_1_API
401   if (!tls.server_handshake || tls.early_data_finish) {
402     rv = SSL_do_handshake(tls.ssl);
403   } else {
404     auto &tlsconf = get_config()->tls;
405     for (;;) {
406       size_t nread;
407 
408       rv = SSL_read_early_data(tls.ssl, buf.data(), buf.size(), &nread);
409       if (rv == SSL_READ_EARLY_DATA_ERROR) {
410         // If we have early data, and server sends ServerHello, assume
411         // that handshake is completed in server side, and start
412         // processing request.  If we don't exit handshake code here,
413         // server waits for EndOfEarlyData and Finished message from
414         // client, which voids the purpose of 0-RTT data.  The left
415         // over of handshake is done through write_tls or read_tls.
416         if (tlsconf.no_postpone_early_data &&
417             (tls.handshake_state == TLSHandshakeState::WRITE_STARTED ||
418              tls.wbuf.rleft()) &&
419             tls.earlybuf.rleft()) {
420           rv = 1;
421         }
422 
423         break;
424       }
425 
426       if (LOG_ENABLED(INFO)) {
427         LOG(INFO) << "tls: read early data " << nread << " bytes";
428       }
429 
430       tls.earlybuf.append(buf.data(), nread);
431 
432       if (rv == SSL_READ_EARLY_DATA_FINISH) {
433         if (LOG_ENABLED(INFO)) {
434           LOG(INFO) << "tls: read all early data; total "
435                     << tls.earlybuf.rleft() << " bytes";
436         }
437         tls.early_data_finish = true;
438         // The same reason stated above.
439         if (tlsconf.no_postpone_early_data &&
440             (tls.handshake_state == TLSHandshakeState::WRITE_STARTED ||
441              tls.wbuf.rleft()) &&
442             tls.earlybuf.rleft()) {
443           rv = 1;
444         } else {
445           ERR_clear_error();
446           rv = SSL_do_handshake(tls.ssl);
447         }
448         break;
449       }
450     }
451   }
452 #else  // !OPENSSL_1_1_1_API
453   rv = SSL_do_handshake(tls.ssl);
454 #endif // !OPENSSL_1_1_1_API
455 
456   if (rv <= 0) {
457     auto err = SSL_get_error(tls.ssl, rv);
458     switch (err) {
459     case SSL_ERROR_WANT_READ:
460       if (read_buffer_full(tls.rbuf)) {
461         if (LOG_ENABLED(INFO)) {
462           LOG(INFO) << "tls: handshake message is too large";
463         }
464         return -1;
465       }
466       break;
467     case SSL_ERROR_WANT_WRITE:
468       break;
469     case SSL_ERROR_SSL: {
470       if (LOG_ENABLED(INFO)) {
471         LOG(INFO) << "tls: handshake libssl error: "
472                   << ERR_error_string(ERR_get_error(), nullptr);
473       }
474 
475       struct iovec iov[1];
476       auto iovcnt = tls.wbuf.riovec(iov, 1);
477       auto nwrite = writev_clear(iov, iovcnt);
478       if (nwrite > 0) {
479         tls.wbuf.drain(nwrite);
480       }
481 
482       return SHRPX_ERR_NETWORK;
483     }
484     default:
485       if (LOG_ENABLED(INFO)) {
486         LOG(INFO) << "tls: handshake libssl error " << err;
487       }
488       return SHRPX_ERR_NETWORK;
489     }
490   }
491 
492   if (tls.handshake_state == TLSHandshakeState::WAIT_FOR_SESSION_CACHE) {
493     if (LOG_ENABLED(INFO)) {
494       LOG(INFO) << "tls: handshake is still in progress";
495     }
496     return SHRPX_ERR_INPROGRESS;
497   }
498 
499   // Don't send handshake data if handshake was completed in OpenSSL
500   // routine.  We have to check HTTP/2 requirement if HTTP/2 was
501   // negotiated before sending finished message to the peer.
502   if (rv != 1 && tls.wbuf.rleft()) {
503     // First write indicates that resumption stuff has done.
504     if (tls.handshake_state != TLSHandshakeState::WRITE_STARTED) {
505       tls.handshake_state = TLSHandshakeState::WRITE_STARTED;
506       // If peek has already disabled, this is noop.
507       tls.rbuf.disable_peek(true);
508     }
509     std::array<struct iovec, 4> iov;
510     auto iovcnt = tls.wbuf.riovec(iov.data(), iov.size());
511     auto nwrite = writev_clear(iov.data(), iovcnt);
512     if (nwrite < 0) {
513       if (LOG_ENABLED(INFO)) {
514         LOG(INFO) << "tls: handshake write error";
515       }
516       return -1;
517     }
518     tls.wbuf.drain(nwrite);
519 
520     if (tls.wbuf.rleft()) {
521       wlimit.startw();
522       ev_timer_again(loop, &wt);
523     }
524   }
525 
526   if (!read_buffer_full(tls.rbuf)) {
527     // We may have stopped reading
528     rlimit.startw();
529   }
530 
531   if (rv != 1) {
532     if (LOG_ENABLED(INFO)) {
533       LOG(INFO) << "tls: handshake is still in progress";
534     }
535     return SHRPX_ERR_INPROGRESS;
536   }
537 
538   // Handshake was done
539 
540   rv = check_http2_requirement();
541   if (rv != 0) {
542     return -1;
543   }
544 
545   // Just in case
546   tls.rbuf.disable_peek(true);
547 
548   tls.initial_handshake_done = true;
549 
550   return write_tls_pending_handshake();
551 }
552 
write_tls_pending_handshake()553 int Connection::write_tls_pending_handshake() {
554   // Send handshake data left in the buffer
555   while (tls.wbuf.rleft()) {
556     std::array<struct iovec, 4> iov;
557     auto iovcnt = tls.wbuf.riovec(iov.data(), iov.size());
558     auto nwrite = writev_clear(iov.data(), iovcnt);
559     if (nwrite < 0) {
560       if (LOG_ENABLED(INFO)) {
561         LOG(INFO) << "tls: handshake write error";
562       }
563       return -1;
564     }
565     if (nwrite == 0) {
566       wlimit.startw();
567       ev_timer_again(loop, &wt);
568 
569       return SHRPX_ERR_INPROGRESS;
570     }
571     tls.wbuf.drain(nwrite);
572   }
573 
574   // We have to start read watcher, since later stage of code expects
575   // this.
576   rlimit.startw();
577 
578   // We may have whole request in tls.rbuf.  This means that we don't
579   // get notified further read event.  This is especially true for
580   // HTTP/1.1.
581   handle_tls_pending_read();
582 
583   if (LOG_ENABLED(INFO)) {
584     LOG(INFO) << "SSL/TLS handshake completed";
585     nghttp2::tls::TLSSessionInfo tls_info{};
586     if (nghttp2::tls::get_tls_session_info(&tls_info, tls.ssl)) {
587       LOG(INFO) << "cipher=" << tls_info.cipher
588                 << " protocol=" << tls_info.protocol
589                 << " resumption=" << (tls_info.session_reused ? "yes" : "no")
590                 << " session_id="
591                 << util::format_hex(tls_info.session_id,
592                                     tls_info.session_id_length);
593     }
594   }
595 
596   return 0;
597 }
598 
check_http2_requirement()599 int Connection::check_http2_requirement() {
600   const unsigned char *next_proto = nullptr;
601   unsigned int next_proto_len;
602 
603 #ifndef OPENSSL_NO_NEXTPROTONEG
604   SSL_get0_next_proto_negotiated(tls.ssl, &next_proto, &next_proto_len);
605 #endif // !OPENSSL_NO_NEXTPROTONEG
606 #if OPENSSL_VERSION_NUMBER >= 0x10002000L
607   if (next_proto == nullptr) {
608     SSL_get0_alpn_selected(tls.ssl, &next_proto, &next_proto_len);
609   }
610 #endif // OPENSSL_VERSION_NUMBER >= 0x10002000L
611   if (next_proto == nullptr ||
612       !util::check_h2_is_selected(StringRef{next_proto, next_proto_len})) {
613     return 0;
614   }
615   if (!nghttp2::tls::check_http2_tls_version(tls.ssl)) {
616     if (LOG_ENABLED(INFO)) {
617       LOG(INFO) << "TLSv1.2 was not negotiated.  HTTP/2 must not be used.";
618     }
619     return -1;
620   }
621 
622   auto check_block_list = false;
623   if (tls.server_handshake) {
624     check_block_list = !get_config()->tls.no_http2_cipher_block_list;
625   } else {
626     check_block_list = !get_config()->tls.client.no_http2_cipher_block_list;
627   }
628 
629   if (check_block_list &&
630       nghttp2::tls::check_http2_cipher_block_list(tls.ssl)) {
631     if (LOG_ENABLED(INFO)) {
632       LOG(INFO) << "The negotiated cipher suite is in HTTP/2 cipher suite "
633                    "block list.  HTTP/2 must not be used.";
634     }
635     return -1;
636   }
637 
638   return 0;
639 }
640 
641 namespace {
642 constexpr size_t SHRPX_SMALL_WRITE_LIMIT = 1300;
643 } // namespace
644 
get_tls_write_limit()645 size_t Connection::get_tls_write_limit() {
646 
647   if (tls_dyn_rec_warmup_threshold == 0) {
648     return std::numeric_limits<ssize_t>::max();
649   }
650 
651   auto t = ev_now(loop);
652 
653   if (tls.last_write_idle >= 0. &&
654       t - tls.last_write_idle > tls_dyn_rec_idle_timeout) {
655     // Time out, use small record size
656     tls.warmup_writelen = 0;
657     return SHRPX_SMALL_WRITE_LIMIT;
658   }
659 
660   if (tls.warmup_writelen >= tls_dyn_rec_warmup_threshold) {
661     return std::numeric_limits<ssize_t>::max();
662   }
663 
664   return SHRPX_SMALL_WRITE_LIMIT;
665 }
666 
update_tls_warmup_writelen(size_t n)667 void Connection::update_tls_warmup_writelen(size_t n) {
668   if (tls.warmup_writelen < tls_dyn_rec_warmup_threshold) {
669     tls.warmup_writelen += n;
670   }
671 }
672 
start_tls_write_idle()673 void Connection::start_tls_write_idle() {
674   if (tls.last_write_idle < 0.) {
675     tls.last_write_idle = ev_now(loop);
676   }
677 }
678 
write_tls(const void * data,size_t len)679 ssize_t Connection::write_tls(const void *data, size_t len) {
680   // SSL_write requires the same arguments (buf pointer and its
681   // length) on SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE.
682   // get_write_limit() may return smaller length than previously
683   // passed to SSL_write, which violates OpenSSL assumption.  To avoid
684   // this, we keep last legnth passed to SSL_write to
685   // tls.last_writelen if SSL_write indicated I/O blocking.
686   if (tls.last_writelen == 0) {
687     len = std::min(len, wlimit.avail());
688     len = std::min(len, get_tls_write_limit());
689     if (len == 0) {
690       return 0;
691     }
692   } else {
693     len = tls.last_writelen;
694     tls.last_writelen = 0;
695   }
696 
697   tls.last_write_idle = -1.;
698 
699   ERR_clear_error();
700 
701 #if OPENSSL_1_1_1_API
702   int rv;
703   if (SSL_is_init_finished(tls.ssl)) {
704     rv = SSL_write(tls.ssl, data, len);
705   } else {
706     size_t nwrite;
707     rv = SSL_write_early_data(tls.ssl, data, len, &nwrite);
708     // Use the same semantics with SSL_write.
709     if (rv == 1) {
710       rv = nwrite;
711     }
712   }
713 #else  // !OPENSSL_1_1_1_API
714   auto rv = SSL_write(tls.ssl, data, len);
715 #endif // !OPENSSL_1_1_1_API
716 
717   if (rv <= 0) {
718     auto err = SSL_get_error(tls.ssl, rv);
719     switch (err) {
720     case SSL_ERROR_WANT_READ:
721       if (LOG_ENABLED(INFO)) {
722         LOG(INFO) << "Close connection due to TLS renegotiation";
723       }
724       return SHRPX_ERR_NETWORK;
725     case SSL_ERROR_WANT_WRITE:
726       tls.last_writelen = len;
727       // starting write watcher and timer is done in write_clear via
728       // bio.
729       return 0;
730     case SSL_ERROR_SSL:
731       if (LOG_ENABLED(INFO)) {
732         LOG(INFO) << "SSL_write: "
733                   << ERR_error_string(ERR_get_error(), nullptr);
734       }
735       return SHRPX_ERR_NETWORK;
736     default:
737       if (LOG_ENABLED(INFO)) {
738         LOG(INFO) << "SSL_write: SSL_get_error returned " << err;
739       }
740       return SHRPX_ERR_NETWORK;
741     }
742   }
743 
744   update_tls_warmup_writelen(rv);
745 
746   return rv;
747 }
748 
read_tls(void * data,size_t len)749 ssize_t Connection::read_tls(void *data, size_t len) {
750   ERR_clear_error();
751 
752 #if OPENSSL_1_1_1_API
753   if (tls.earlybuf.rleft()) {
754     return tls.earlybuf.remove(data, len);
755   }
756 #endif // OPENSSL_1_1_1_API
757 
758   // SSL_read requires the same arguments (buf pointer and its
759   // length) on SSL_ERROR_WANT_READ or SSL_ERROR_WANT_WRITE.
760   // rlimit_.avail() or rlimit_.avail() may return different length
761   // than the length previously passed to SSL_read, which violates
762   // OpenSSL assumption.  To avoid this, we keep last legnth passed
763   // to SSL_read to tls_last_readlen_ if SSL_read indicated I/O
764   // blocking.
765   if (tls.last_readlen == 0) {
766     len = std::min(len, rlimit.avail());
767     if (len == 0) {
768       return 0;
769     }
770   } else {
771     len = tls.last_readlen;
772     tls.last_readlen = 0;
773   }
774 
775 #if OPENSSL_1_1_1_API
776   if (!tls.early_data_finish) {
777     // TLSv1.3 handshake is still going on.
778     size_t nread;
779     auto rv = SSL_read_early_data(tls.ssl, data, len, &nread);
780     if (rv == SSL_READ_EARLY_DATA_ERROR) {
781       auto err = SSL_get_error(tls.ssl, rv);
782       switch (err) {
783       case SSL_ERROR_WANT_READ:
784         tls.last_readlen = len;
785         return 0;
786       case SSL_ERROR_SSL:
787         if (LOG_ENABLED(INFO)) {
788           LOG(INFO) << "SSL_read: "
789                     << ERR_error_string(ERR_get_error(), nullptr);
790         }
791         return SHRPX_ERR_NETWORK;
792       default:
793         if (LOG_ENABLED(INFO)) {
794           LOG(INFO) << "SSL_read: SSL_get_error returned " << err;
795         }
796         return SHRPX_ERR_NETWORK;
797       }
798     }
799 
800     if (LOG_ENABLED(INFO)) {
801       LOG(INFO) << "tls: read early data " << nread << " bytes";
802     }
803 
804     if (rv == SSL_READ_EARLY_DATA_FINISH) {
805       if (LOG_ENABLED(INFO)) {
806         LOG(INFO) << "tls: read all early data";
807       }
808       tls.early_data_finish = true;
809       // We may have stopped write watcher in write_tls.
810       wlimit.startw();
811     }
812     return nread;
813   }
814 #endif // OPENSSL_1_1_1_API
815 
816   auto rv = SSL_read(tls.ssl, data, len);
817 
818   if (rv <= 0) {
819     auto err = SSL_get_error(tls.ssl, rv);
820     switch (err) {
821     case SSL_ERROR_WANT_READ:
822       tls.last_readlen = len;
823       return 0;
824     case SSL_ERROR_WANT_WRITE:
825       if (LOG_ENABLED(INFO)) {
826         LOG(INFO) << "Close connection due to TLS renegotiation";
827       }
828       return SHRPX_ERR_NETWORK;
829     case SSL_ERROR_ZERO_RETURN:
830       return SHRPX_ERR_EOF;
831     case SSL_ERROR_SSL:
832       if (LOG_ENABLED(INFO)) {
833         LOG(INFO) << "SSL_read: " << ERR_error_string(ERR_get_error(), nullptr);
834       }
835       return SHRPX_ERR_NETWORK;
836     default:
837       if (LOG_ENABLED(INFO)) {
838         LOG(INFO) << "SSL_read: SSL_get_error returned " << err;
839       }
840       return SHRPX_ERR_NETWORK;
841     }
842   }
843 
844   return rv;
845 }
846 
write_clear(const void * data,size_t len)847 ssize_t Connection::write_clear(const void *data, size_t len) {
848   len = std::min(len, wlimit.avail());
849   if (len == 0) {
850     return 0;
851   }
852 
853   ssize_t nwrite;
854   while ((nwrite = write(fd, data, len)) == -1 && errno == EINTR)
855     ;
856   if (nwrite == -1) {
857     if (errno == EAGAIN || errno == EWOULDBLOCK) {
858       wlimit.startw();
859       ev_timer_again(loop, &wt);
860       return 0;
861     }
862     return SHRPX_ERR_NETWORK;
863   }
864 
865   wlimit.drain(nwrite);
866 
867   if (ev_is_active(&wt)) {
868     ev_timer_again(loop, &wt);
869   }
870 
871   return nwrite;
872 }
873 
writev_clear(struct iovec * iov,int iovcnt)874 ssize_t Connection::writev_clear(struct iovec *iov, int iovcnt) {
875   iovcnt = limit_iovec(iov, iovcnt, wlimit.avail());
876   if (iovcnt == 0) {
877     return 0;
878   }
879 
880   ssize_t nwrite;
881   while ((nwrite = writev(fd, iov, iovcnt)) == -1 && errno == EINTR)
882     ;
883   if (nwrite == -1) {
884     if (errno == EAGAIN || errno == EWOULDBLOCK) {
885       wlimit.startw();
886       ev_timer_again(loop, &wt);
887       return 0;
888     }
889     return SHRPX_ERR_NETWORK;
890   }
891 
892   wlimit.drain(nwrite);
893 
894   if (ev_is_active(&wt)) {
895     ev_timer_again(loop, &wt);
896   }
897 
898   return nwrite;
899 }
900 
read_clear(void * data,size_t len)901 ssize_t Connection::read_clear(void *data, size_t len) {
902   len = std::min(len, rlimit.avail());
903   if (len == 0) {
904     return 0;
905   }
906 
907   ssize_t nread;
908   while ((nread = read(fd, data, len)) == -1 && errno == EINTR)
909     ;
910   if (nread == -1) {
911     if (errno == EAGAIN || errno == EWOULDBLOCK) {
912       return 0;
913     }
914     return SHRPX_ERR_NETWORK;
915   }
916 
917   if (nread == 0) {
918     return SHRPX_ERR_EOF;
919   }
920 
921   rlimit.drain(nread);
922 
923   return nread;
924 }
925 
handle_tls_pending_read()926 void Connection::handle_tls_pending_read() {
927   if (!ev_is_active(&rev)) {
928     return;
929   }
930   rlimit.handle_tls_pending_read();
931 }
932 
get_tcp_hint(TCPHint * hint) const933 int Connection::get_tcp_hint(TCPHint *hint) const {
934 #if defined(TCP_INFO) && defined(TCP_NOTSENT_LOWAT)
935   struct tcp_info tcp_info;
936   socklen_t tcp_info_len = sizeof(tcp_info);
937   int rv;
938 
939   rv = getsockopt(fd, IPPROTO_TCP, TCP_INFO, &tcp_info, &tcp_info_len);
940 
941   if (rv != 0) {
942     return -1;
943   }
944 
945   auto avail_packets = tcp_info.tcpi_snd_cwnd > tcp_info.tcpi_unacked
946                            ? tcp_info.tcpi_snd_cwnd - tcp_info.tcpi_unacked
947                            : 0;
948 
949   // http://www.slideshare.net/kazuho/programming-tcp-for-responsiveness
950 
951   // TODO 29 (5 (header) + 8 (explicit nonce) + 16 (tag)) is TLS
952   // overhead for AES-GCM.  For CHACHA20_POLY1305, it is 21 since it
953   // does not need 8 bytes explicit nonce.
954   //
955   // For TLSv1.3, AES-GCM and CHACHA20_POLY1305 overhead are now 22
956   // bytes (5 (header) + 1 (ContentType) + 16 (tag)).
957   size_t tls_overhead;
958 #  ifdef TLS1_3_VERSION
959   if (SSL_version(tls.ssl) == TLS1_3_VERSION) {
960     tls_overhead = 22;
961   } else
962 #  endif // TLS1_3_VERSION
963   {
964     tls_overhead = 29;
965   }
966 
967   auto writable_size =
968       (avail_packets + 2) * (tcp_info.tcpi_snd_mss - tls_overhead);
969   if (writable_size > 16_k) {
970     writable_size = writable_size & ~(16_k - 1);
971   } else {
972     if (writable_size < 536) {
973       LOG(INFO) << "writable_size is too small: " << writable_size;
974     }
975     // TODO is this required?
976     writable_size = std::max(writable_size, static_cast<size_t>(536 * 2));
977   }
978 
979   // if (LOG_ENABLED(INFO)) {
980   //   LOG(INFO) << "snd_cwnd=" << tcp_info.tcpi_snd_cwnd
981   //             << ", unacked=" << tcp_info.tcpi_unacked
982   //             << ", snd_mss=" << tcp_info.tcpi_snd_mss
983   //             << ", rtt=" << tcp_info.tcpi_rtt << "us"
984   //             << ", rcv_space=" << tcp_info.tcpi_rcv_space
985   //             << ", writable=" << writable_size;
986   // }
987 
988   hint->write_buffer_size = writable_size;
989   // TODO tcpi_rcv_space is considered as rwin, is that correct?
990   hint->rwin = tcp_info.tcpi_rcv_space;
991 
992   return 0;
993 #else  // !defined(TCP_INFO) || !defined(TCP_NOTSENT_LOWAT)
994   return -1;
995 #endif // !defined(TCP_INFO) || !defined(TCP_NOTSENT_LOWAT)
996 }
997 
again_rt(ev_tstamp t)998 void Connection::again_rt(ev_tstamp t) {
999   read_timeout = t;
1000   rt.repeat = t;
1001   ev_timer_again(loop, &rt);
1002   last_read = ev_now(loop);
1003 }
1004 
again_rt()1005 void Connection::again_rt() {
1006   rt.repeat = read_timeout;
1007   ev_timer_again(loop, &rt);
1008   last_read = ev_now(loop);
1009 }
1010 
expired_rt()1011 bool Connection::expired_rt() {
1012   auto delta = read_timeout - (ev_now(loop) - last_read);
1013   if (delta < 1e-9) {
1014     return true;
1015   }
1016   rt.repeat = delta;
1017   ev_timer_again(loop, &rt);
1018   return false;
1019 }
1020 
1021 } // namespace shrpx
1022