1 /*
2 * Copyright (c) 2014-2016 DeNA Co., Ltd., Kazuho Oku, Fastly, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22 #ifndef h2o__socket_h
23 #define h2o__socket_h
24
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28
29 #include <stdint.h>
30 #include <sys/socket.h>
31 #include <openssl/ssl.h>
32 #include <openssl/opensslconf.h>
33 #include "picotls.h"
34 #include "h2o/cache.h"
35 #include "h2o/ebpf.h"
36 #include "h2o/memory.h"
37 #include "h2o/openssl_backport.h"
38 #include "h2o/string_.h"
39
40 #ifndef H2O_USE_LIBUV
41 #if H2O_USE_POLL || H2O_USE_EPOLL || H2O_USE_KQUEUE
42 #define H2O_USE_LIBUV 0
43 #else
44 #define H2O_USE_LIBUV 1
45 #endif
46 #endif
47
48 #if OPENSSL_VERSION_NUMBER >= 0x10002000L
49 #define H2O_USE_ALPN 1
50 #ifndef OPENSSL_NO_NEXTPROTONEG
51 #define H2O_USE_NPN 1
52 #else
53 #define H2O_USE_NPN 0
54 #endif
55 #elif OPENSSL_VERSION_NUMBER >= 0x10001000L
56 #define H2O_USE_ALPN 0
57 #define H2O_USE_NPN 1
58 #else
59 #define H2O_USE_ALPN 0
60 #define H2O_USE_NPN 0
61 #endif
62
63 /**
64 * Maximum amount of TLS records to generate at once. Default is 4 full-sized TLS records using 32-byte tag.
65 */
66 #define H2O_SOCKET_DEFAULT_SSL_BUFFER_SIZE ((5 + 16384 + 32) * 4)
67
68 typedef struct st_h2o_sliding_counter_t {
69 uint64_t average;
70 struct {
71 uint64_t sum;
72 uint64_t slots[8];
73 size_t index;
74 } prev;
75 struct {
76 uint64_t start_at;
77 } cur;
78 } h2o_sliding_counter_t;
79
80 static int h2o_sliding_counter_is_running(h2o_sliding_counter_t *counter);
81 static void h2o_sliding_counter_start(h2o_sliding_counter_t *counter, uint64_t now);
82 void h2o_sliding_counter_stop(h2o_sliding_counter_t *counter, uint64_t now);
83
84 #define H2O_SOCKET_INITIAL_INPUT_BUFFER_SIZE 4096
85
86 #define H2O_SESSID_CTX ((const uint8_t *)"h2o")
87 #define H2O_SESSID_CTX_LEN (sizeof("h2o") - 1)
88
89 typedef struct st_h2o_socket_t h2o_socket_t;
90
91 typedef void (*h2o_socket_cb)(h2o_socket_t *sock, const char *err);
92
93 #if H2O_USE_LIBUV
94 #include "socket/uv-binding.h"
95 #else
96 #include "socket/evloop.h"
97 #endif
98
99 struct st_h2o_socket_addr_t {
100 socklen_t len;
101 struct sockaddr addr;
102 };
103
104 enum {
105 H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_TBD = 0,
106 H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_NEEDS_UPDATE,
107 H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DISABLED,
108 H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_DETERMINED
109 };
110
111 /**
112 * abstraction layer for sockets (SSL vs. TCP)
113 */
114 struct st_h2o_socket_t {
115 void *data;
116 struct st_h2o_socket_ssl_t *ssl;
117 h2o_buffer_t *input;
118 /**
119 * total bytes read (above the TLS layer)
120 */
121 uint64_t bytes_read;
122 /**
123 * total bytes written (above the TLS layer)
124 */
125 uint64_t bytes_written;
126 /**
127 * boolean flag to indicate if sock is NOT being traced
128 */
129 unsigned _skip_tracing : 1;
130 struct {
131 void (*cb)(void *data);
132 void *data;
133 } on_close;
134 struct {
135 h2o_socket_cb read;
136 h2o_socket_cb write;
137 } _cb;
138 struct st_h2o_socket_addr_t *_peername;
139 struct st_h2o_socket_addr_t *_sockname;
140 struct {
141 size_t cnt;
142 h2o_iovec_t *bufs;
143 union {
144 h2o_iovec_t *alloced_ptr;
145 h2o_iovec_t smallbufs[4];
146 };
147 } _write_buf;
148 struct {
149 uint8_t state; /* one of H2O_SOCKET_LATENCY_STATE_* */
150 uint8_t notsent_is_minimized : 1;
151 size_t suggested_tls_payload_size; /* suggested TLS record payload size, or SIZE_MAX when no need to restrict */
152 size_t suggested_write_size; /* SIZE_MAX if no need to optimize for latency */
153 } _latency_optimization;
154 };
155
156 typedef struct st_h2o_socket_export_t {
157 int fd;
158 struct st_h2o_socket_ssl_t *ssl;
159 h2o_buffer_t *input;
160 } h2o_socket_export_t;
161
162 /**
163 * sets the conditions to enable the optimization
164 */
165 typedef struct st_h2o_socket_latency_optimization_conditions_t {
166 /**
167 * in milliseconds
168 */
169 unsigned min_rtt;
170 /**
171 * percent ratio
172 */
173 unsigned max_additional_delay;
174 /**
175 * in number of octets
176 */
177 unsigned max_cwnd;
178 } h2o_socket_latency_optimization_conditions_t;
179
180 typedef void (*h2o_socket_ssl_resumption_get_async_cb)(h2o_socket_t *sock, h2o_iovec_t session_id);
181 typedef void (*h2o_socket_ssl_resumption_new_cb)(h2o_socket_t *sock, h2o_iovec_t session_id, h2o_iovec_t session_data);
182 typedef void (*h2o_socket_ssl_resumption_remove_cb)(h2o_iovec_t session_id);
183
184 extern h2o_buffer_mmap_settings_t h2o_socket_buffer_mmap_settings;
185 extern h2o_buffer_prototype_t h2o_socket_buffer_prototype;
186
187 extern size_t h2o_socket_ssl_buffer_size;
188 extern __thread h2o_mem_recycle_t h2o_socket_ssl_buffer_allocator;
189
190 extern const char h2o_socket_error_out_of_memory[];
191 extern const char h2o_socket_error_io[];
192 extern const char h2o_socket_error_closed[];
193 extern const char h2o_socket_error_conn_fail[];
194 extern const char h2o_socket_error_conn_refused[];
195 extern const char h2o_socket_error_conn_timed_out[];
196 extern const char h2o_socket_error_network_unreachable[];
197 extern const char h2o_socket_error_host_unreachable[];
198 extern const char h2o_socket_error_socket_fail[];
199 extern const char h2o_socket_error_ssl_no_cert[];
200 extern const char h2o_socket_error_ssl_cert_invalid[];
201 extern const char h2o_socket_error_ssl_cert_name_mismatch[];
202 extern const char h2o_socket_error_ssl_decode[];
203 extern const char h2o_socket_error_ssl_handshake[];
204
205 /**
206 * returns the loop
207 */
208 h2o_loop_t *h2o_socket_get_loop(h2o_socket_t *sock);
209 /**
210 * detaches a socket from loop.
211 */
212 int h2o_socket_export(h2o_socket_t *sock, h2o_socket_export_t *info);
213 /**
214 * attaches a socket onto a loop.
215 */
216 h2o_socket_t *h2o_socket_import(h2o_loop_t *loop, h2o_socket_export_t *info);
217 /**
218 * destroys an exported socket info.
219 */
220 void h2o_socket_dispose_export(h2o_socket_export_t *info);
221 /**
222 * closes the socket
223 */
224 void h2o_socket_close(h2o_socket_t *sock);
225 /**
226 * Schedules a callback that would be invoked when the socket becomes immediately writable
227 */
228 void h2o_socket_notify_write(h2o_socket_t *sock, h2o_socket_cb cb);
229 /**
230 * Obtain the underlying fd of a sock struct
231 */
232 int h2o_socket_get_fd(h2o_socket_t *sock);
233 /**
234 * Set/Unset the H2O_SOCKET_FLAG_DONT_READ flag.
235 * Setting it allows to be simply notified rather than having the data
236 * automatically be read.
237 */
238 void h2o_socket_dont_read(h2o_socket_t *sock, int dont_read);
239 /**
240 * connects to peer
241 */
242 h2o_socket_t *h2o_socket_connect(h2o_loop_t *loop, struct sockaddr *addr, socklen_t addrlen, h2o_socket_cb cb, const char **err);
243 /**
244 * prepares for latency-optimized write and returns the number of octets that should be written, or SIZE_MAX if failed to prepare
245 */
246 static size_t h2o_socket_prepare_for_latency_optimized_write(h2o_socket_t *sock,
247 const h2o_socket_latency_optimization_conditions_t *conditions);
248 size_t h2o_socket_do_prepare_for_latency_optimized_write(h2o_socket_t *sock,
249 const h2o_socket_latency_optimization_conditions_t *conditions);
250 /**
251 * writes given data to socket
252 * @param sock the socket
253 * @param bufs an array of buffers
254 * @param bufcnt length of the buffer array
255 * @param cb callback to be called when write is complete
256 */
257 void h2o_socket_write(h2o_socket_t *sock, h2o_iovec_t *bufs, size_t bufcnt, h2o_socket_cb cb);
258 /**
259 * starts polling on the socket (for read) and calls given callback when data arrives
260 * @param sock the socket
261 * @param cb callback to be called when data arrives
262 * @note callback is called when any data arrives at the TCP level so that the
263 * applications can update their timeout counters. In other words, there is no
264 * guarantee that _new_ data is available when the callback gets called (e.g.
265 * in cases like receiving a partial SSL record or a corrupt TCP packet).
266 */
267 void h2o_socket_read_start(h2o_socket_t *sock, h2o_socket_cb cb);
268 /**
269 * stops polling on the socket (for read)
270 * @param sock the socket
271 */
272 void h2o_socket_read_stop(h2o_socket_t *sock);
273 /**
274 * returns a boolean value indicating whether if there is a write is under operation
275 */
276 static int h2o_socket_is_writing(h2o_socket_t *sock);
277 /**
278 * returns a boolean value indicating whether if the socket is being polled for read
279 */
280 static int h2o_socket_is_reading(h2o_socket_t *sock);
281 /**
282 * returns the length of the local address obtained (or 0 if failed)
283 */
284 socklen_t h2o_socket_getsockname(h2o_socket_t *sock, struct sockaddr *sa);
285 /**
286 * returns the length of the remote address obtained (or 0 if failed)
287 */
288 socklen_t h2o_socket_getpeername(h2o_socket_t *sock, struct sockaddr *sa);
289 /**
290 * sets the remote address (used for overriding the value)
291 */
292 void h2o_socket_setpeername(h2o_socket_t *sock, struct sockaddr *sa, socklen_t len);
293 /**
294 *
295 */
296 ptls_t *h2o_socket_get_ptls(h2o_socket_t *sock);
297 /**
298 *
299 */
300 h2o_iovec_t h2o_socket_log_tcp_congestion_controller(h2o_socket_t *sock, h2o_mem_pool_t *pool);
301 h2o_iovec_t h2o_socket_log_tcp_delivery_rate(h2o_socket_t *sock, h2o_mem_pool_t *pool);
302 const char *h2o_socket_get_ssl_protocol_version(h2o_socket_t *sock);
303 int h2o_socket_get_ssl_session_reused(h2o_socket_t *sock);
304 const char *h2o_socket_get_ssl_cipher(h2o_socket_t *sock);
305 int h2o_socket_get_ssl_cipher_bits(h2o_socket_t *sock);
306 h2o_iovec_t h2o_socket_get_ssl_session_id(h2o_socket_t *sock);
307 const char *h2o_socket_get_ssl_server_name(const h2o_socket_t *sock);
308 static h2o_iovec_t h2o_socket_log_ssl_protocol_version(h2o_socket_t *sock, h2o_mem_pool_t *pool);
309 static h2o_iovec_t h2o_socket_log_ssl_session_reused(h2o_socket_t *sock, h2o_mem_pool_t *pool);
310 static h2o_iovec_t h2o_socket_log_ssl_cipher(h2o_socket_t *sock, h2o_mem_pool_t *pool);
311 h2o_iovec_t h2o_socket_log_ssl_cipher_bits(h2o_socket_t *sock, h2o_mem_pool_t *pool);
312 h2o_iovec_t h2o_socket_log_ssl_session_id(h2o_socket_t *sock, h2o_mem_pool_t *pool);
313 static h2o_iovec_t h2o_socket_log_ssl_server_name(h2o_socket_t *sock, h2o_mem_pool_t *pool);
314 static h2o_iovec_t h2o_socket_log_ssl_negotiated_protocol(h2o_socket_t *sock, h2o_mem_pool_t *pool);
315 int h2o_socket_ssl_new_session_cb(SSL *s, SSL_SESSION *sess);
316
317 /**
318 * compares socket addresses
319 */
320 int h2o_socket_compare_address(struct sockaddr *x, struct sockaddr *y, int check_port);
321 /**
322 * getnameinfo (buf should be NI_MAXHOST in length), returns SIZE_MAX if failed
323 */
324 size_t h2o_socket_getnumerichost(const struct sockaddr *sa, socklen_t salen, char *buf);
325 /**
326 * returns the port number, or -1 if failed
327 */
328 int32_t h2o_socket_getport(const struct sockaddr *sa);
329 /**
330 * converts given error number to string representation if known, otherwise returns `default_err`
331 */
332 const char *h2o_socket_get_error_string(int errnum, const char *default_err);
333 /**
334 * performs SSL handshake on a socket
335 * @param sock the socket
336 * @param ssl_ctx SSL context
337 * @param handshake_cb callback to be called when handshake is complete
338 */
339 void h2o_socket_ssl_handshake(h2o_socket_t *sock, SSL_CTX *ssl_ctx, const char *server_name, h2o_iovec_t alpn_protos,
340 h2o_socket_cb handshake_cb);
341 /**
342 * resumes SSL handshake with given session data
343 * @param sock the socket
344 * @param session_data session data (or {NULL,0} if not available)
345 */
346 void h2o_socket_ssl_resume_server_handshake(h2o_socket_t *sock, h2o_iovec_t session_data);
347 /**
348 * registers callbacks to be called for handling session data
349 */
350 void h2o_socket_ssl_async_resumption_init(h2o_socket_ssl_resumption_get_async_cb get_cb, h2o_socket_ssl_resumption_new_cb new_cb);
351 /**
352 * setups the SSL context to use the async resumption
353 */
354 void h2o_socket_ssl_async_resumption_setup_ctx(SSL_CTX *ctx);
355 /**
356 * returns the name of the protocol selected using either NPN or ALPN (ALPN has the precedence).
357 * @param sock the socket
358 */
359 h2o_iovec_t h2o_socket_ssl_get_selected_protocol(h2o_socket_t *sock);
360 /**
361 * returns if the socket is in early-data state (i.e. have not yet seen ClientFinished)
362 */
363 int h2o_socket_ssl_is_early_data(h2o_socket_t *sock);
364 /**
365 *
366 */
367 struct st_ptls_context_t *h2o_socket_ssl_get_picotls_context(SSL_CTX *ossl);
368 /**
369 * associates a picotls context to SSL_CTX
370 */
371 void h2o_socket_ssl_set_picotls_context(SSL_CTX *ossl, struct st_ptls_context_t *ptls);
372 /**
373 *
374 */
375 h2o_cache_t *h2o_socket_ssl_get_session_cache(SSL_CTX *ctx);
376 /**
377 *
378 */
379 void h2o_socket_ssl_set_session_cache(SSL_CTX *ctx, h2o_cache_t *cache);
380 /**
381 *
382 */
383 void h2o_socket_ssl_destroy_session_cache_entry(h2o_iovec_t value);
384 /**
385 * registers the protocol list to be used for ALPN
386 */
387 void h2o_ssl_register_alpn_protocols(SSL_CTX *ctx, const h2o_iovec_t *protocols);
388 /**
389 * registers the protocol list to be used for NPN
390 */
391 void h2o_ssl_register_npn_protocols(SSL_CTX *ctx, const char *protocols);
392 /**
393 * Sets the DF bit if possible. Returns true when the operation was succcessful, or when the operating system does not provide the
394 * necessary features. In either case, operation can continue with or without the DF bit being set.
395 */
396 int h2o_socket_set_df_bit(int fd, int domain);
397 /**
398 * helper to check if socket the socket is target of tracing
399 */
400 static int h2o_socket_skip_tracing(h2o_socket_t *sock);
401 /**
402 *
403 */
404 void h2o_socket_set_skip_tracing(h2o_socket_t *sock, int skip_tracing);
405
406 /**
407 * Prepares eBPF maps. Requires root privileges and thus should be called before dropping the privileges. Returns a boolean
408 * indicating if operation succeeded.
409 */
410 int h2o_socket_ebpf_setup(void);
411 /**
412 * Function to lookup if the connection is tagged for special treatment. The result is a union of `H2O_EBPF_FLAGS_*`.
413 */
414 uint64_t h2o_socket_ebpf_lookup_flags(h2o_loop_t *loop, int (*init_key)(h2o_ebpf_map_key_t *key, void *cbdata), void *cbdata);
415 /**
416 *
417 */
418 uint64_t h2o_socket_ebpf_lookup_flags_sni(h2o_loop_t *loop, uint64_t flags, const char *server_name, size_t server_name_len);
419 /**
420 * function for initializing the ebpf lookup key from raw information
421 */
422 int h2o_socket_ebpf_init_key_raw(h2o_ebpf_map_key_t *key, int sock_type, struct sockaddr *local, struct sockaddr *remote);
423 /**
424 * callback for initializing the ebpf lookup key from `h2o_socket_t`
425 */
426 int h2o_socket_ebpf_init_key(h2o_ebpf_map_key_t *key, void *sock);
427
428 /* inline defs */
429
h2o_socket_is_writing(h2o_socket_t * sock)430 inline int h2o_socket_is_writing(h2o_socket_t *sock)
431 {
432 return sock->_cb.write != NULL;
433 }
434
h2o_socket_is_reading(h2o_socket_t * sock)435 inline int h2o_socket_is_reading(h2o_socket_t *sock)
436 {
437 return sock->_cb.read != NULL;
438 }
439
h2o_socket_prepare_for_latency_optimized_write(h2o_socket_t * sock,const h2o_socket_latency_optimization_conditions_t * conditions)440 inline size_t h2o_socket_prepare_for_latency_optimized_write(h2o_socket_t *sock,
441 const h2o_socket_latency_optimization_conditions_t *conditions)
442 {
443 switch (sock->_latency_optimization.state) {
444 case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_TBD:
445 case H2O_SOCKET_LATENCY_OPTIMIZATION_STATE_NEEDS_UPDATE:
446 return h2o_socket_do_prepare_for_latency_optimized_write(sock, conditions);
447 default:
448 return sock->_latency_optimization.suggested_write_size;
449 }
450 }
451
h2o_socket_log_ssl_protocol_version(h2o_socket_t * sock,h2o_mem_pool_t * pool)452 inline h2o_iovec_t h2o_socket_log_ssl_protocol_version(h2o_socket_t *sock, h2o_mem_pool_t *pool)
453 {
454 (void)pool;
455 const char *s = h2o_socket_get_ssl_protocol_version(sock);
456 return s != NULL ? h2o_iovec_init(s, strlen(s)) : h2o_iovec_init(NULL, 0);
457 }
458
h2o_socket_log_ssl_session_reused(h2o_socket_t * sock,h2o_mem_pool_t * pool)459 inline h2o_iovec_t h2o_socket_log_ssl_session_reused(h2o_socket_t *sock, h2o_mem_pool_t *pool)
460 {
461 (void)pool;
462 switch (h2o_socket_get_ssl_session_reused(sock)) {
463 case 0:
464 return h2o_iovec_init(H2O_STRLIT("0"));
465 case 1:
466 return h2o_iovec_init(H2O_STRLIT("1"));
467 default:
468 return h2o_iovec_init(NULL, 0);
469 }
470 }
471
h2o_socket_log_ssl_cipher(h2o_socket_t * sock,h2o_mem_pool_t * pool)472 inline h2o_iovec_t h2o_socket_log_ssl_cipher(h2o_socket_t *sock, h2o_mem_pool_t *pool)
473 {
474 (void)pool;
475 const char *s = h2o_socket_get_ssl_cipher(sock);
476 return s != NULL ? h2o_iovec_init(s, strlen(s)) : h2o_iovec_init(NULL, 0);
477 }
478
h2o_socket_log_ssl_server_name(h2o_socket_t * sock,h2o_mem_pool_t * pool)479 inline h2o_iovec_t h2o_socket_log_ssl_server_name(h2o_socket_t *sock, h2o_mem_pool_t *pool)
480 {
481 (void)pool;
482 const char *s = h2o_socket_get_ssl_server_name(sock);
483 return s != NULL ? h2o_iovec_init(s, strlen(s)) : h2o_iovec_init(NULL, 0);
484 }
485
h2o_socket_log_ssl_negotiated_protocol(h2o_socket_t * sock,h2o_mem_pool_t * pool)486 inline h2o_iovec_t h2o_socket_log_ssl_negotiated_protocol(h2o_socket_t *sock, h2o_mem_pool_t *pool)
487 {
488 (void)pool;
489 return h2o_socket_ssl_get_selected_protocol(sock);
490 }
491
h2o_sliding_counter_is_running(h2o_sliding_counter_t * counter)492 inline int h2o_sliding_counter_is_running(h2o_sliding_counter_t *counter)
493 {
494 return counter->cur.start_at != 0;
495 }
496
h2o_sliding_counter_start(h2o_sliding_counter_t * counter,uint64_t now)497 inline void h2o_sliding_counter_start(h2o_sliding_counter_t *counter, uint64_t now)
498 {
499 counter->cur.start_at = now;
500 }
501
h2o_socket_skip_tracing(h2o_socket_t * sock)502 inline int h2o_socket_skip_tracing(h2o_socket_t *sock)
503 {
504 return sock->_skip_tracing;
505 }
506
507 #ifdef __cplusplus
508 }
509 #endif
510
511 #endif
512