1 /*
2  * Copyright (c) 2014-2016 DeNA Co., Ltd., Kazuho Oku, Fastly, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to
6  * deal in the Software without restriction, including without limitation the
7  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8  * sell copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20  * IN THE SOFTWARE.
21  */
22 #ifndef h2o__socket_h
23 #define h2o__socket_h
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 #include <stdint.h>
30 #include <sys/socket.h>
31 #include <openssl/ssl.h>
32 #include <openssl/opensslconf.h>
33 #include "picotls.h"
34 #include "h2o/cache.h"
35 #include "h2o/ebpf.h"
36 #include "h2o/memory.h"
37 #include "h2o/openssl_backport.h"
38 #include "h2o/string_.h"
39 
40 #ifndef H2O_USE_LIBUV
41 #if H2O_USE_POLL || H2O_USE_EPOLL || H2O_USE_KQUEUE
42 #define H2O_USE_LIBUV 0
43 #else
44 #define H2O_USE_LIBUV 1
45 #endif
46 #endif
47 
48 #if OPENSSL_VERSION_NUMBER >= 0x10002000L
49 #define H2O_USE_ALPN 1
50 #ifndef OPENSSL_NO_NEXTPROTONEG
51 #define H2O_USE_NPN 1
52 #else
53 #define H2O_USE_NPN 0
54 #endif
55 #elif OPENSSL_VERSION_NUMBER >= 0x10001000L
56 #define H2O_USE_ALPN 0
57 #define H2O_USE_NPN 1
58 #else
59 #define H2O_USE_ALPN 0
60 #define H2O_USE_NPN 0
61 #endif
62 
63 /**
64  * Maximum amount of TLS records to generate at once. Default is 4 full-sized TLS records using 32-byte tag.
65  */
66 #define H2O_SOCKET_DEFAULT_SSL_BUFFER_SIZE ((5 + 16384 + 32) * 4)
67 
68 typedef struct st_h2o_sliding_counter_t {
69     uint64_t average;
70     struct {
71         uint64_t sum;
72         uint64_t slots[8];
73         size_t index;
74     } prev;
75     struct {
76         uint64_t start_at;
77     } cur;
78 } h2o_sliding_counter_t;
79 
80 static int h2o_sliding_counter_is_running(h2o_sliding_counter_t *counter);
81 static void h2o_sliding_counter_start(h2o_sliding_counter_t *counter, uint64_t now);
82 void h2o_sliding_counter_stop(h2o_sliding_counter_t *counter, uint64_t now);
83 
84 #define H2O_SOCKET_INITIAL_INPUT_BUFFER_SIZE 4096
85 
86 #define H2O_SESSID_CTX ((const uint8_t *)"h2o")
87 #define H2O_SESSID_CTX_LEN (sizeof("h2o") - 1)
88 
89 typedef struct st_h2o_socket_t h2o_socket_t;
90 
91 typedef void (*h2o_socket_cb)(h2o_socket_t *sock, const char *err);
92 
93 #if H2O_USE_LIBUV
94 #include "socket/uv-binding.h"
95 #else
96 #include "socket/evloop.h"
97 #endif
98 
99 struct st_h2o_socket_addr_t {
100     socklen_t len;
101     struct sockaddr addr;
102 };
103 
104 enum {
105     H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_TBD = 0,
106     H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_NEEDS_UPDATE,
107     H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DISABLED,
108     H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DETERMINED
109 };
110 
111 /**
112  * abstraction layer for sockets (SSL vs. TCP)
113  */
114 struct st_h2o_socket_t {
115     void *data;
116     struct st_h2o_socket_ssl_t *ssl;
117     h2o_buffer_t *input;
118     /**
119      * total bytes read (above the TLS layer)
120      */
121     uint64_t bytes_read;
122     /**
123      * total bytes written (above the TLS layer)
124      */
125     uint64_t bytes_written;
126     /**
127      * boolean flag to indicate if sock is NOT being traced
128      */
129     unsigned _skip_tracing : 1;
130     struct {
131         void (*cb)(void *data);
132         void *data;
133     } on_close;
134     struct {
135         h2o_socket_cb read;
136         h2o_socket_cb write;
137     } _cb;
138     struct st_h2o_socket_addr_t *_peername;
139     struct st_h2o_socket_addr_t *_sockname;
140     struct {
141         size_t cnt;
142         h2o_iovec_t *bufs;
143         union {
144             h2o_iovec_t *alloced_ptr;
145             h2o_iovec_t smallbufs[4];
146         };
147     } _write_buf;
148     struct {
149         uint8_t state; /* one of H2O_SOCKET_LATENCY_STATE_* */
150         uint8_t notsent_is_minimized : 1;
151         size_t suggested_tls_payload_size; /* suggested TLS record payload size, or SIZE_MAX when no need to restrict */
152         size_t suggested_write_size;       /* SIZE_MAX if no need to optimize for latency */
153     } _latency_optimization;
154 };
155 
156 typedef struct st_h2o_socket_export_t {
157     int fd;
158     struct st_h2o_socket_ssl_t *ssl;
159     h2o_buffer_t *input;
160 } h2o_socket_export_t;
161 
162 /**
163  * sets the conditions to enable the optimization
164  */
165 typedef struct st_h2o_socket_latency_optimization_conditions_t {
166     /**
167      * in milliseconds
168      */
169     unsigned min_rtt;
170     /**
171      * percent ratio
172      */
173     unsigned max_additional_delay;
174     /**
175      * in number of octets
176      */
177     unsigned max_cwnd;
178 } h2o_socket_latency_optimization_conditions_t;
179 
180 typedef void (*h2o_socket_ssl_resumption_get_async_cb)(h2o_socket_t *sock, h2o_iovec_t session_id);
181 typedef void (*h2o_socket_ssl_resumption_new_cb)(h2o_socket_t *sock, h2o_iovec_t session_id, h2o_iovec_t session_data);
182 typedef void (*h2o_socket_ssl_resumption_remove_cb)(h2o_iovec_t session_id);
183 
184 extern h2o_buffer_mmap_settings_t h2o_socket_buffer_mmap_settings;
185 extern h2o_buffer_prototype_t h2o_socket_buffer_prototype;
186 
187 extern size_t h2o_socket_ssl_buffer_size;
188 extern __thread h2o_mem_recycle_t h2o_socket_ssl_buffer_allocator;
189 
190 extern const char h2o_socket_error_out_of_memory[];
191 extern const char h2o_socket_error_io[];
192 extern const char h2o_socket_error_closed[];
193 extern const char h2o_socket_error_conn_fail[];
194 extern const char h2o_socket_error_conn_refused[];
195 extern const char h2o_socket_error_conn_timed_out[];
196 extern const char h2o_socket_error_network_unreachable[];
197 extern const char h2o_socket_error_host_unreachable[];
198 extern const char h2o_socket_error_socket_fail[];
199 extern const char h2o_socket_error_ssl_no_cert[];
200 extern const char h2o_socket_error_ssl_cert_invalid[];
201 extern const char h2o_socket_error_ssl_cert_name_mismatch[];
202 extern const char h2o_socket_error_ssl_decode[];
203 extern const char h2o_socket_error_ssl_handshake[];
204 
205 /**
206  * returns the loop
207  */
208 h2o_loop_t *h2o_socket_get_loop(h2o_socket_t *sock);
209 /**
210  * detaches a socket from loop.
211  */
212 int h2o_socket_export(h2o_socket_t *sock, h2o_socket_export_t *info);
213 /**
214  * attaches a socket onto a loop.
215  */
216 h2o_socket_t *h2o_socket_import(h2o_loop_t *loop, h2o_socket_export_t *info);
217 /**
218  * destroys an exported socket info.
219  */
220 void h2o_socket_dispose_export(h2o_socket_export_t *info);
221 /**
222  * closes the socket
223  */
224 void h2o_socket_close(h2o_socket_t *sock);
225 /**
226  * Schedules a callback that would be invoked when the socket becomes immediately writable
227  */
228 void h2o_socket_notify_write(h2o_socket_t *sock, h2o_socket_cb cb);
229 /**
230  * Obtain the underlying fd of a sock struct
231  */
232 int h2o_socket_get_fd(h2o_socket_t *sock);
233 /**
234  * Set/Unset the H2O_SOCKET_FLAG_DONT_READ flag.
235  * Setting it allows to be simply notified rather than having the data
236  * automatically be read.
237  */
238 void h2o_socket_dont_read(h2o_socket_t *sock, int dont_read);
239 /**
240  * connects to peer
241  */
242 h2o_socket_t *h2o_socket_connect(h2o_loop_t *loop, struct sockaddr *addr, socklen_t addrlen, h2o_socket_cb cb, const char **err);
243 /**
244  * prepares for latency-optimized write and returns the number of octets that should be written, or SIZE_MAX if failed to prepare
245  */
246 static size_t h2o_socket_prepare_for_latency_optimized_write(h2o_socket_t *sock,
247                                                              const h2o_socket_latency_optimization_conditions_t *conditions);
248 size_t h2o_socket_do_prepare_for_latency_optimized_write(h2o_socket_t *sock,
249                                                          const h2o_socket_latency_optimization_conditions_t *conditions);
250 /**
251  * writes given data to socket
252  * @param sock the socket
253  * @param bufs an array of buffers
254  * @param bufcnt length of the buffer array
255  * @param cb callback to be called when write is complete
256  */
257 void h2o_socket_write(h2o_socket_t *sock, h2o_iovec_t *bufs, size_t bufcnt, h2o_socket_cb cb);
258 /**
259  * starts polling on the socket (for read) and calls given callback when data arrives
260  * @param sock the socket
261  * @param cb callback to be called when data arrives
262  * @note callback is called when any data arrives at the TCP level so that the
263  * applications can update their timeout counters.  In other words, there is no
264  * guarantee that _new_ data is available when the callback gets called (e.g.
265  * in cases like receiving a partial SSL record or a corrupt TCP packet).
266  */
267 void h2o_socket_read_start(h2o_socket_t *sock, h2o_socket_cb cb);
268 /**
269  * stops polling on the socket (for read)
270  * @param sock the socket
271  */
272 void h2o_socket_read_stop(h2o_socket_t *sock);
273 /**
274  * returns a boolean value indicating whether if there is a write is under operation
275  */
276 static int h2o_socket_is_writing(h2o_socket_t *sock);
277 /**
278  * returns a boolean value indicating whether if the socket is being polled for read
279  */
280 static int h2o_socket_is_reading(h2o_socket_t *sock);
281 /**
282  * returns the length of the local address obtained (or 0 if failed)
283  */
284 socklen_t h2o_socket_getsockname(h2o_socket_t *sock, struct sockaddr *sa);
285 /**
286  * returns the length of the remote address obtained (or 0 if failed)
287  */
288 socklen_t h2o_socket_getpeername(h2o_socket_t *sock, struct sockaddr *sa);
289 /**
290  * sets the remote address (used for overriding the value)
291  */
292 void h2o_socket_setpeername(h2o_socket_t *sock, struct sockaddr *sa, socklen_t len);
293 /**
294  *
295  */
296 ptls_t *h2o_socket_get_ptls(h2o_socket_t *sock);
297 /**
298  *
299  */
300 h2o_iovec_t h2o_socket_log_tcp_congestion_controller(h2o_socket_t *sock, h2o_mem_pool_t *pool);
301 h2o_iovec_t h2o_socket_log_tcp_delivery_rate(h2o_socket_t *sock, h2o_mem_pool_t *pool);
302 const char *h2o_socket_get_ssl_protocol_version(h2o_socket_t *sock);
303 int h2o_socket_get_ssl_session_reused(h2o_socket_t *sock);
304 const char *h2o_socket_get_ssl_cipher(h2o_socket_t *sock);
305 int h2o_socket_get_ssl_cipher_bits(h2o_socket_t *sock);
306 h2o_iovec_t h2o_socket_get_ssl_session_id(h2o_socket_t *sock);
307 const char *h2o_socket_get_ssl_server_name(const h2o_socket_t *sock);
308 static h2o_iovec_t h2o_socket_log_ssl_protocol_version(h2o_socket_t *sock, h2o_mem_pool_t *pool);
309 static h2o_iovec_t h2o_socket_log_ssl_session_reused(h2o_socket_t *sock, h2o_mem_pool_t *pool);
310 static h2o_iovec_t h2o_socket_log_ssl_cipher(h2o_socket_t *sock, h2o_mem_pool_t *pool);
311 h2o_iovec_t h2o_socket_log_ssl_cipher_bits(h2o_socket_t *sock, h2o_mem_pool_t *pool);
312 h2o_iovec_t h2o_socket_log_ssl_session_id(h2o_socket_t *sock, h2o_mem_pool_t *pool);
313 static h2o_iovec_t h2o_socket_log_ssl_server_name(h2o_socket_t *sock, h2o_mem_pool_t *pool);
314 static h2o_iovec_t h2o_socket_log_ssl_negotiated_protocol(h2o_socket_t *sock, h2o_mem_pool_t *pool);
315 int h2o_socket_ssl_new_session_cb(SSL *s, SSL_SESSION *sess);
316 
317 /**
318  * compares socket addresses
319  */
320 int h2o_socket_compare_address(struct sockaddr *x, struct sockaddr *y, int check_port);
321 /**
322  * getnameinfo (buf should be NI_MAXHOST in length), returns SIZE_MAX if failed
323  */
324 size_t h2o_socket_getnumerichost(const struct sockaddr *sa, socklen_t salen, char *buf);
325 /**
326  * returns the port number, or -1 if failed
327  */
328 int32_t h2o_socket_getport(const struct sockaddr *sa);
329 /**
330  * converts given error number to string representation if known, otherwise returns `default_err`
331  */
332 const char *h2o_socket_get_error_string(int errnum, const char *default_err);
333 /**
334  * performs SSL handshake on a socket
335  * @param sock the socket
336  * @param ssl_ctx SSL context
337  * @param handshake_cb callback to be called when handshake is complete
338  */
339 void h2o_socket_ssl_handshake(h2o_socket_t *sock, SSL_CTX *ssl_ctx, const char *server_name, h2o_iovec_t alpn_protos,
340                               h2o_socket_cb handshake_cb);
341 /**
342  * resumes SSL handshake with given session data
343  * @param sock the socket
344  * @param session_data session data (or {NULL,0} if not available)
345  */
346 void h2o_socket_ssl_resume_server_handshake(h2o_socket_t *sock, h2o_iovec_t session_data);
347 /**
348  * registers callbacks to be called for handling session data
349  */
350 void h2o_socket_ssl_async_resumption_init(h2o_socket_ssl_resumption_get_async_cb get_cb, h2o_socket_ssl_resumption_new_cb new_cb);
351 /**
352  * setups the SSL context to use the async resumption
353  */
354 void h2o_socket_ssl_async_resumption_setup_ctx(SSL_CTX *ctx);
355 /**
356  * returns the name of the protocol selected using either NPN or ALPN (ALPN has the precedence).
357  * @param sock the socket
358  */
359 h2o_iovec_t h2o_socket_ssl_get_selected_protocol(h2o_socket_t *sock);
360 /**
361  * returns if the socket is in early-data state (i.e. have not yet seen ClientFinished)
362  */
363 int h2o_socket_ssl_is_early_data(h2o_socket_t *sock);
364 /**
365  *
366  */
367 struct st_ptls_context_t *h2o_socket_ssl_get_picotls_context(SSL_CTX *ossl);
368 /**
369  * associates a picotls context to SSL_CTX
370  */
371 void h2o_socket_ssl_set_picotls_context(SSL_CTX *ossl, struct st_ptls_context_t *ptls);
372 /**
373  *
374  */
375 h2o_cache_t *h2o_socket_ssl_get_session_cache(SSL_CTX *ctx);
376 /**
377  *
378  */
379 void h2o_socket_ssl_set_session_cache(SSL_CTX *ctx, h2o_cache_t *cache);
380 /**
381  *
382  */
383 void h2o_socket_ssl_destroy_session_cache_entry(h2o_iovec_t value);
384 /**
385  * registers the protocol list to be used for ALPN
386  */
387 void h2o_ssl_register_alpn_protocols(SSL_CTX *ctx, const h2o_iovec_t *protocols);
388 /**
389  * registers the protocol list to be used for NPN
390  */
391 void h2o_ssl_register_npn_protocols(SSL_CTX *ctx, const char *protocols);
392 /**
393  * Sets the DF bit if possible. Returns true when the operation was succcessful, or when the operating system does not provide the
394  * necessary features. In either case, operation can continue with or without the DF bit being set.
395  */
396 int h2o_socket_set_df_bit(int fd, int domain);
397 /**
398  * helper to check if socket the socket is target of tracing
399  */
400 static int h2o_socket_skip_tracing(h2o_socket_t *sock);
401 /**
402  *
403  */
404 void h2o_socket_set_skip_tracing(h2o_socket_t *sock, int skip_tracing);
405 
406 /**
407  * Prepares eBPF maps. Requires root privileges and thus should be called before dropping the privileges. Returns a boolean
408  * indicating if operation succeeded.
409  */
410 int h2o_socket_ebpf_setup(void);
411 /**
412  * Function to lookup if the connection is tagged for special treatment. The result is a union of `H2O_EBPF_FLAGS_*`.
413  */
414 uint64_t h2o_socket_ebpf_lookup_flags(h2o_loop_t *loop, int (*init_key)(h2o_ebpf_map_key_t *key, void *cbdata), void *cbdata);
415 /**
416  *
417  */
418 uint64_t h2o_socket_ebpf_lookup_flags_sni(h2o_loop_t *loop, uint64_t flags, const char *server_name, size_t server_name_len);
419 /**
420  * function for initializing the ebpf lookup key from raw information
421  */
422 int h2o_socket_ebpf_init_key_raw(h2o_ebpf_map_key_t *key, int sock_type, struct sockaddr *local, struct sockaddr *remote);
423 /**
424  * callback for initializing the ebpf lookup key from `h2o_socket_t`
425  */
426 int h2o_socket_ebpf_init_key(h2o_ebpf_map_key_t *key, void *sock);
427 
428 /* inline defs */
429 
h2o_socket_is_writing(h2o_socket_t * sock)430 inline int h2o_socket_is_writing(h2o_socket_t *sock)
431 {
432     return sock->_cb.write != NULL;
433 }
434 
h2o_socket_is_reading(h2o_socket_t * sock)435 inline int h2o_socket_is_reading(h2o_socket_t *sock)
436 {
437     return sock->_cb.read != NULL;
438 }
439 
h2o_socket_prepare_for_latency_optimized_write(h2o_socket_t * sock,const h2o_socket_latency_optimization_conditions_t * conditions)440 inline size_t h2o_socket_prepare_for_latency_optimized_write(h2o_socket_t *sock,
441                                                              const h2o_socket_latency_optimization_conditions_t *conditions)
442 {
443     switch (sock->_latency_optimization.state) {
444     case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_TBD:
445     case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_NEEDS_UPDATE:
446         return h2o_socket_do_prepare_for_latency_optimized_write(sock, conditions);
447     default:
448         return sock->_latency_optimization.suggested_write_size;
449     }
450 }
451 
h2o_socket_log_ssl_protocol_version(h2o_socket_t * sock,h2o_mem_pool_t * pool)452 inline h2o_iovec_t h2o_socket_log_ssl_protocol_version(h2o_socket_t *sock, h2o_mem_pool_t *pool)
453 {
454     (void)pool;
455     const char *s = h2o_socket_get_ssl_protocol_version(sock);
456     return s != NULL ? h2o_iovec_init(s, strlen(s)) : h2o_iovec_init(NULL, 0);
457 }
458 
h2o_socket_log_ssl_session_reused(h2o_socket_t * sock,h2o_mem_pool_t * pool)459 inline h2o_iovec_t h2o_socket_log_ssl_session_reused(h2o_socket_t *sock, h2o_mem_pool_t *pool)
460 {
461     (void)pool;
462     switch (h2o_socket_get_ssl_session_reused(sock)) {
463     case 0:
464         return h2o_iovec_init(H2O_STRLIT("0"));
465     case 1:
466         return h2o_iovec_init(H2O_STRLIT("1"));
467     default:
468         return h2o_iovec_init(NULL, 0);
469     }
470 }
471 
h2o_socket_log_ssl_cipher(h2o_socket_t * sock,h2o_mem_pool_t * pool)472 inline h2o_iovec_t h2o_socket_log_ssl_cipher(h2o_socket_t *sock, h2o_mem_pool_t *pool)
473 {
474     (void)pool;
475     const char *s = h2o_socket_get_ssl_cipher(sock);
476     return s != NULL ? h2o_iovec_init(s, strlen(s)) : h2o_iovec_init(NULL, 0);
477 }
478 
h2o_socket_log_ssl_server_name(h2o_socket_t * sock,h2o_mem_pool_t * pool)479 inline h2o_iovec_t h2o_socket_log_ssl_server_name(h2o_socket_t *sock, h2o_mem_pool_t *pool)
480 {
481     (void)pool;
482     const char *s = h2o_socket_get_ssl_server_name(sock);
483     return s != NULL ? h2o_iovec_init(s, strlen(s)) : h2o_iovec_init(NULL, 0);
484 }
485 
h2o_socket_log_ssl_negotiated_protocol(h2o_socket_t * sock,h2o_mem_pool_t * pool)486 inline h2o_iovec_t h2o_socket_log_ssl_negotiated_protocol(h2o_socket_t *sock, h2o_mem_pool_t *pool)
487 {
488     (void)pool;
489     return h2o_socket_ssl_get_selected_protocol(sock);
490 }
491 
h2o_sliding_counter_is_running(h2o_sliding_counter_t * counter)492 inline int h2o_sliding_counter_is_running(h2o_sliding_counter_t *counter)
493 {
494     return counter->cur.start_at != 0;
495 }
496 
h2o_sliding_counter_start(h2o_sliding_counter_t * counter,uint64_t now)497 inline void h2o_sliding_counter_start(h2o_sliding_counter_t *counter, uint64_t now)
498 {
499     counter->cur.start_at = now;
500 }
501 
h2o_socket_skip_tracing(h2o_socket_t * sock)502 inline int h2o_socket_skip_tracing(h2o_socket_t *sock)
503 {
504     return sock->_skip_tracing;
505 }
506 
507 #ifdef __cplusplus
508 }
509 #endif
510 
511 #endif
512