1 #include "first.h"
2
3 #include "base.h"
4 #include "buffer.h"
5 #include "burl.h" /* HTTP_PARSEOPT_HEADER_STRICT */
6 #include "chunk.h"
7 #include "log.h"
8 #include "connections.h"
9 #include "fdevent.h"
10 #include "h2.h"
11 #include "http_header.h"
12
13 #include "reqpool.h"
14 #include "request.h"
15 #include "response.h"
16 #include "network.h"
17 #include "stat_cache.h"
18
19 #include "plugin.h"
20
21 #include "sock_addr_cache.h"
22
23 #include <sys/stat.h>
24
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <errno.h>
28 #include <string.h>
29
30 #include "sys-socket.h"
31
32 #define HTTP_LINGER_TIMEOUT 5
33
34 #define connection_set_state(r, n) ((r)->state = (n))
35
36 __attribute_cold__
connection_set_state_error(request_st * const r,const request_state_t state)37 static void connection_set_state_error(request_st * const r, const request_state_t state) {
38 connection_set_state(r, state);
39 }
40
41 __attribute_cold__
42 static connection *connection_init(server *srv);
43
44 static void connection_reset(connection *con);
45
connections_get_new_connection(server * srv)46 static connection *connections_get_new_connection(server *srv) {
47 connection *con;
48 --srv->lim_conns;
49 if (srv->conns_pool) {
50 con = srv->conns_pool;
51 srv->conns_pool = con->next;
52 }
53 else {
54 con = connection_init(srv);
55 connection_reset(con);
56 }
57 /*con->prev = NULL;*//*(already set)*/
58 if ((con->next = srv->conns))
59 con->next->prev = con;
60 return (srv->conns = con);
61 }
62
connection_del(server * srv,connection * con)63 static void connection_del(server *srv, connection *con) {
64 if (con->next)
65 con->next->prev = con->prev;
66 if (con->prev)
67 con->prev->next = con->next;
68 else
69 srv->conns = con->next;
70 con->prev = NULL;
71 con->next = srv->conns_pool;
72 srv->conns_pool = con;
73 ++srv->lim_conns;
74 }
75
connection_close(connection * con)76 static void connection_close(connection *con) {
77 if (con->fd < 0) con->fd = -con->fd;
78
79 plugins_call_handle_connection_close(con);
80
81 server * const srv = con->srv;
82 request_st * const r = &con->request;
83 request_reset_ex(r); /*(r->conf.* is still valid below)*/
84 connection_set_state(r, CON_STATE_CONNECT);
85
86 chunkqueue_reset(con->read_queue);
87 con->request_count = 0;
88 con->is_ssl_sock = 0;
89 con->revents_err = 0;
90
91 fdevent_fdnode_event_del(srv->ev, con->fdn);
92 fdevent_unregister(srv->ev, con->fd);
93 con->fdn = NULL;
94 #ifdef __WIN32
95 if (0 == closesocket(con->fd))
96 #else
97 if (0 == close(con->fd))
98 #endif
99 --srv->cur_fds;
100 else
101 log_perror(r->conf.errh, __FILE__, __LINE__,
102 "(warning) close: %d", con->fd);
103
104 if (r->conf.log_state_handling) {
105 log_error(r->conf.errh, __FILE__, __LINE__,
106 "connection closed for fd %d", con->fd);
107 }
108 con->fd = -1;
109
110 connection_del(srv, con);
111 }
112
connection_read_for_eos_plain(connection * const con)113 static void connection_read_for_eos_plain(connection * const con) {
114 /* we have to do the linger_on_close stuff regardless
115 * of r->keep_alive; even non-keepalive sockets
116 * may still have unread data, and closing before reading
117 * it will make the client not see all our output.
118 */
119 ssize_t len;
120 const int type = sock_addr_get_family(&con->dst_addr);
121 char buf[16384];
122 do {
123 len = fdevent_socket_read_discard(con->fd, buf, sizeof(buf),
124 type, SOCK_STREAM);
125 } while (len > 0 || (len < 0 && errno == EINTR));
126
127 if (len < 0 && errno == EAGAIN) return;
128 #if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
129 if (len < 0 && errno == EWOULDBLOCK) return;
130 #endif
131
132 /* 0 == len || (len < 0 && (errno is a non-recoverable error)) */
133 con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
134 }
135
connection_read_for_eos_ssl(connection * const con)136 static void connection_read_for_eos_ssl(connection * const con) {
137 if (con->network_read(con, con->read_queue, MAX_READ_LIMIT) < 0)
138 con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
139 chunkqueue_reset(con->read_queue);
140 }
141
connection_read_for_eos(connection * const con)142 static void connection_read_for_eos(connection * const con) {
143 !con->is_ssl_sock
144 ? connection_read_for_eos_plain(con)
145 : connection_read_for_eos_ssl(con);
146 }
147
connection_handle_close_state(connection * con)148 static void connection_handle_close_state(connection *con) {
149 connection_read_for_eos(con);
150
151 if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
152 connection_close(con);
153 }
154 }
155
connection_handle_shutdown(connection * con)156 static void connection_handle_shutdown(connection *con) {
157 plugins_call_handle_connection_shut_wr(con);
158
159 connection_reset(con);
160 ++con->srv->con_closed;
161
162 /* close the connection */
163 if (con->fd >= 0
164 && (con->is_ssl_sock || 0 == shutdown(con->fd, SHUT_WR))) {
165 con->close_timeout_ts = log_monotonic_secs;
166
167 request_st * const r = &con->request;
168 connection_set_state(r, CON_STATE_CLOSE);
169 if (r->conf.log_state_handling) {
170 log_error(r->conf.errh, __FILE__, __LINE__,
171 "shutdown for fd %d", con->fd);
172 }
173 } else {
174 connection_close(con);
175 }
176 }
177
178
connection_handle_response_end_state(request_st * const r,connection * const con)179 static void connection_handle_response_end_state(request_st * const r, connection * const con) {
180 if (r->http_version > HTTP_VERSION_1_1) {
181 h2_retire_con(r, con);
182 r->keep_alive = 0;
183 /* set a status so that mod_accesslog, mod_rrdtool hooks are called
184 * in plugins_call_handle_request_done() (XXX: or set to 0 to omit) */
185 r->http_status = 100; /* XXX: what if con->state == CON_STATE_ERROR? */
186 }
187
188 /* call request_done hook if http_status set (e.g. to log request) */
189 /* (even if error, connection dropped, as long as http_status is set) */
190 if (r->http_status) plugins_call_handle_request_done(r);
191
192 if (r->state != CON_STATE_ERROR) ++con->srv->con_written;
193
194 if (r->reqbody_length != r->reqbody_queue.bytes_in
195 || r->state == CON_STATE_ERROR) {
196 /* request body may not have been read completely */
197 r->keep_alive = 0;
198 /* clean up failed partial write of 1xx intermediate responses*/
199 if (&r->write_queue != con->write_queue) { /*(for HTTP/1.1)*/
200 chunkqueue_free(con->write_queue);
201 con->write_queue = &r->write_queue;
202 }
203 }
204
205 if (r->keep_alive > 0) {
206 request_reset(r);
207 con->is_readable = 1; /* potentially trigger optimistic read */
208 /*(accounting used by mod_accesslog for HTTP/1.0 and HTTP/1.1)*/
209 r->bytes_read_ckpt = con->bytes_read;
210 r->bytes_written_ckpt = con->bytes_written;
211 #if 0
212 r->start_hp.tv_sec = log_epoch_secs;
213 con->read_idle_ts = log_monotonic_secs;
214 #endif
215 connection_set_state(r, CON_STATE_REQUEST_START);
216 } else {
217 connection_handle_shutdown(con);
218 }
219 }
220
221
222 __attribute_pure__
223 static off_t
connection_write_throttled(const connection * const con,off_t max_bytes)224 connection_write_throttled (const connection * const con, off_t max_bytes)
225 {
226 const request_config * const restrict rconf = &con->request.conf;
227 if (0 == rconf->global_bytes_per_second && 0 == rconf->bytes_per_second)
228 return max_bytes;
229
230 if (rconf->global_bytes_per_second) {
231 off_t limit = (off_t)rconf->global_bytes_per_second
232 - *(rconf->global_bytes_per_second_cnt_ptr);
233 if (max_bytes > limit)
234 max_bytes = limit;
235 }
236
237 if (rconf->bytes_per_second) {
238 off_t limit = (off_t)rconf->bytes_per_second
239 - con->bytes_written_cur_second;
240 if (max_bytes > limit)
241 max_bytes = limit;
242 }
243
244 return max_bytes > 0 ? max_bytes : 0; /*(0 == reached traffic limit)*/
245 }
246
247
248 static off_t
connection_write_throttle(connection * const con,off_t max_bytes)249 connection_write_throttle (connection * const con, off_t max_bytes)
250 {
251 /*assert(max_bytes > 0);*/
252 max_bytes = connection_write_throttled(con, max_bytes);
253 if (0 == max_bytes) con->traffic_limit_reached = 1;
254 return max_bytes;
255 }
256
257
258 static int
connection_write_chunkqueue(connection * const con,chunkqueue * const restrict cq,off_t max_bytes)259 connection_write_chunkqueue (connection * const con, chunkqueue * const restrict cq, off_t max_bytes)
260 {
261 /*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
262
263 con->write_request_ts = log_monotonic_secs;
264
265 max_bytes = connection_write_throttle(con, max_bytes);
266 if (0 == max_bytes) return 1;
267
268 off_t written = cq->bytes_out;
269 int ret;
270
271 #ifdef TCP_CORK
272 int corked = 0;
273 #endif
274
275 /* walk chunkqueue up to first FILE_CHUNK (if present)
276 * This may incur memory load misses for pointer chasing, but effectively
277 * preloads part of the chunkqueue, something which used to be a side effect
278 * of a previous (less efficient) version of chunkqueue_length() which
279 * walked the entire chunkqueue (on each and every call). The loads here
280 * make a measurable difference in performance in underlying call to
281 * con->network_write() */
282 if (cq->first->next && cq->first->type == MEM_CHUNK) {
283 const chunk *c = cq->first;
284 do { c = c->next; } while (c && c->type == MEM_CHUNK);
285 #ifdef TCP_CORK
286 /* Linux: put a cork into socket as we want to combine write() calls
287 * but only if we really have multiple chunks including non-MEM_CHUNK
288 * (or if multiple chunks and TLS), and only if TCP socket */
289 if (NULL != c || (max_bytes > 16384 && con->is_ssl_sock)) {
290 const int sa_family = sock_addr_get_family(&con->srv_socket->addr);
291 if (sa_family == AF_INET || sa_family == AF_INET6) {
292 corked = 1;
293 (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
294 &corked, sizeof(corked));
295 }
296 }
297 #endif
298 }
299
300 ret = con->network_write(con, cq, max_bytes);
301 if (ret >= 0) {
302 ret = chunkqueue_is_empty(cq) ? 0 : 1;
303 }
304
305 #ifdef TCP_CORK
306 if (corked) {
307 corked = 0;
308 (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
309 &corked, sizeof(corked));
310 }
311 #endif
312
313 written = cq->bytes_out - written;
314 con->bytes_written += written;
315 con->bytes_written_cur_second += written;
316 request_st * const r = &con->request;
317 if (r->conf.global_bytes_per_second_cnt_ptr)
318 *(r->conf.global_bytes_per_second_cnt_ptr) += written;
319
320 return ret;
321 }
322
323
324 static int
connection_write_1xx_info(request_st * const r,connection * const con)325 connection_write_1xx_info (request_st * const r, connection * const con)
326 {
327 /* (Note: prior 1xx intermediate responses may be present in cq) */
328 /* (Note: also choosing not to update con->write_request_ts
329 * which differs from connection_write_chunkqueue()) */
330 chunkqueue * const cq = con->write_queue;
331 off_t written = cq->bytes_out;
332
333 int rc = con->network_write(con, cq, MAX_WRITE_LIMIT);
334
335 written = cq->bytes_out - written;
336 con->bytes_written += written;
337 con->bytes_written_cur_second += written;
338 if (r->conf.global_bytes_per_second_cnt_ptr)
339 *(r->conf.global_bytes_per_second_cnt_ptr) += written;
340
341 if (rc < 0) {
342 connection_set_state_error(r, CON_STATE_ERROR);
343 return 0; /* error */
344 }
345
346 if (!chunkqueue_is_empty(cq)) { /* partial write (unlikely) */
347 con->is_writable = 0;
348 if (cq == &r->write_queue) {
349 /* save partial write of 1xx in separate chunkqueue
350 * Note: sending of remainder of 1xx might be delayed
351 * until next set of response headers are sent */
352 con->write_queue = chunkqueue_init(NULL);
353 chunkqueue_append_chunkqueue(con->write_queue, cq);
354 }
355 }
356
357 #if 0
358 /* XXX: accounting inconsistency
359 * 1xx is not currently included in r->resp_header_len,
360 * so mod_accesslog reporting of %b or %B (FORMAT_BYTES_OUT_NO_HEADER)
361 * reports all bytes out minus len of final response headers,
362 * but including 1xx intermediate responses. If 1xx intermediate
363 * responses were included in r->resp_header_len, then there are a
364 * few places in the code which must be adjusted to use r->resp_header_done
365 * instead of (0 == r->resp_header_len) as flag that final response was set
366 * (Doing the following would "discard" the 1xx len from bytes_out)
367 */
368 r->write_queue.bytes_in = r->write_queue.bytes_out = 0;
369 #endif
370
371 return 1; /* success */
372 }
373
374
375 int
connection_send_1xx(request_st * const r,connection * const con)376 connection_send_1xx (request_st * const r, connection * const con)
377 {
378 /* Make best effort to send HTTP/1.1 1xx intermediate */
379 /* (Note: if other modules set response headers *before* the
380 * handle_response_start hook, and the backends subsequently sends 1xx,
381 * then the response headers are sent here with 1xx and might be cleared
382 * by caller (http_response_parse_headers() and http_response_check_1xx()),
383 * instead of being sent with the final response.
384 * (e.g. mod_magnet setting response headers, then backend sending 103)) */
385
386 chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
387
388 buffer * const b = chunkqueue_append_buffer_open(cq);
389 buffer_copy_string_len(b, CONST_STR_LEN("HTTP/1.1 "));
390 http_status_append(b, r->http_status);
391 for (uint32_t i = 0; i < r->resp_headers.used; ++i) {
392 const data_string * const ds = (data_string *)r->resp_headers.data[i];
393 const uint32_t klen = buffer_clen(&ds->key);
394 const uint32_t vlen = buffer_clen(&ds->value);
395 if (0 == klen || 0 == vlen) continue;
396 buffer_append_str2(b, CONST_STR_LEN("\r\n"), ds->key.ptr, klen);
397 buffer_append_str2(b, CONST_STR_LEN(": "), ds->value.ptr, vlen);
398 }
399 buffer_append_string_len(b, CONST_STR_LEN("\r\n\r\n"));
400 chunkqueue_append_buffer_commit(cq);
401
402 if (con->traffic_limit_reached)
403 return 1; /* success; send later if throttled */
404
405 return connection_write_1xx_info(r, con);
406 }
407
408
409 static int
connection_write_100_continue(request_st * const r,connection * const con)410 connection_write_100_continue (request_st * const r, connection * const con)
411 {
412 /* Make best effort to send "HTTP/1.1 100 Continue" */
413 static const char http_100_continue[] = "HTTP/1.1 100 Continue\r\n\r\n";
414
415 if (con->traffic_limit_reached)
416 return 1; /* success; skip sending if throttled */
417
418 chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
419 chunkqueue_append_mem(cq, http_100_continue, sizeof(http_100_continue)-1);
420 return connection_write_1xx_info(r, con);
421 }
422
423
connection_handle_write(request_st * const r,connection * const con)424 static int connection_handle_write(request_st * const r, connection * const con) {
425 /*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
426
427 if (con->is_writable <= 0) return CON_STATE_WRITE;
428 int rc = connection_write_chunkqueue(con, con->write_queue, MAX_WRITE_LIMIT);
429 switch (rc) {
430 case 0:
431 if (r->resp_body_finished) {
432 connection_set_state(r, CON_STATE_RESPONSE_END);
433 return CON_STATE_RESPONSE_END;
434 }
435 break;
436 case -1: /* error on our side */
437 log_error(r->conf.errh, __FILE__, __LINE__,
438 "connection closed: write failed on fd %d", con->fd);
439 connection_set_state_error(r, CON_STATE_ERROR);
440 return CON_STATE_ERROR;
441 case -2: /* remote close */
442 connection_set_state_error(r, CON_STATE_ERROR);
443 return CON_STATE_ERROR;
444 case 1:
445 /* do not spin trying to send HTTP/2 server Connection Preface
446 * while waiting for TLS negotiation to complete */
447 if (con->write_queue->bytes_out)
448 con->is_writable = 0;
449
450 /* not finished yet -> WRITE */
451 break;
452 }
453
454 return CON_STATE_WRITE; /*(state did not change)*/
455 }
456
connection_handle_write_state(request_st * const r,connection * const con)457 static int connection_handle_write_state(request_st * const r, connection * const con) {
458 do {
459 /* only try to write if we have something in the queue */
460 if (!chunkqueue_is_empty(&r->write_queue)) {
461 if (r->http_version <= HTTP_VERSION_1_1) {
462 int rc = connection_handle_write(r, con);
463 if (rc != CON_STATE_WRITE) return rc;
464 }
465 } else if (r->resp_body_finished) {
466 connection_set_state(r, CON_STATE_RESPONSE_END);
467 return CON_STATE_RESPONSE_END;
468 }
469
470 if (r->handler_module && !r->resp_body_finished) {
471 const plugin * const p = r->handler_module;
472 int rc = p->handle_subrequest(r, p->data);
473 switch(rc) {
474 case HANDLER_WAIT_FOR_EVENT:
475 case HANDLER_FINISHED:
476 case HANDLER_GO_ON:
477 break;
478 case HANDLER_COMEBACK:
479 default:
480 log_error(r->conf.errh, __FILE__, __LINE__,
481 "unexpected subrequest handler ret-value: %d %d",
482 con->fd, rc);
483 __attribute_fallthrough__
484 case HANDLER_ERROR:
485 connection_set_state_error(r, CON_STATE_ERROR);
486 return CON_STATE_ERROR;
487 }
488 }
489 } while (r->http_version <= HTTP_VERSION_1_1
490 && (!chunkqueue_is_empty(&r->write_queue)
491 ? con->is_writable > 0 && 0 == con->traffic_limit_reached
492 : r->resp_body_finished));
493
494 return CON_STATE_WRITE;
495 }
496
497
498 __attribute_cold__
connection_init(server * srv)499 static connection *connection_init(server *srv) {
500 connection * const con = calloc(1, sizeof(*con));
501 force_assert(NULL != con);
502
503 con->srv = srv;
504 con->plugin_slots = srv->plugin_slots;
505 con->config_data_base = srv->config_data_base;
506
507 request_st * const r = &con->request;
508 request_init_data(r, con, srv);
509 con->write_queue = &r->write_queue;
510 con->read_queue = &r->read_queue;
511
512 /* init plugin-specific per-connection structures */
513 con->plugin_ctx = calloc(1, (srv->plugins.used + 1) * sizeof(void *));
514 force_assert(NULL != con->plugin_ctx);
515
516 return con;
517 }
518
519
connection_free(connection * const con)520 static void connection_free(connection * const con) {
521 request_st * const r = &con->request;
522
523 connection_reset(con);
524 if (con->write_queue != &r->write_queue)
525 chunkqueue_free(con->write_queue);
526 if (con->read_queue != &r->read_queue)
527 chunkqueue_free(con->read_queue);
528 request_free_data(r);
529
530 free(con->plugin_ctx);
531 free(con->dst_addr_buf.ptr);
532 free(con);
533 }
534
connections_pool_clear(server * const srv)535 void connections_pool_clear(server * const srv) {
536 connection *con;
537 while ((con = srv->conns_pool)) {
538 srv->conns_pool = con->next;
539 connection_free(con);
540 }
541 }
542
connections_free(server * srv)543 void connections_free(server *srv) {
544 connections_pool_clear(srv);
545
546 connection *con;
547 while ((con = srv->conns)) {
548 srv->conns = con->next;
549 connection_free(con);
550 }
551 }
552
553
connection_reset(connection * con)554 static void connection_reset(connection *con) {
555 request_st * const r = &con->request;
556 request_reset(r);
557 r->bytes_read_ckpt = 0;
558 r->bytes_written_ckpt = 0;
559 con->is_readable = 1;
560
561 con->bytes_written = 0;
562 con->bytes_written_cur_second = 0;
563 con->bytes_read = 0;
564 }
565
566
567 __attribute_cold__
568 static chunk *
connection_discard_blank_line(chunkqueue * const cq,uint32_t header_len)569 connection_discard_blank_line (chunkqueue * const cq, uint32_t header_len)
570 {
571 /*(separate func only to be able to mark with compiler hint as cold)*/
572 chunkqueue_mark_written(cq, header_len);
573 return cq->first; /* refresh c after chunkqueue_mark_written() */
574 }
575
576
connection_read_header_more(connection * con,chunkqueue * cq,chunk * c,const size_t olen)577 static chunk * connection_read_header_more(connection *con, chunkqueue *cq, chunk *c, const size_t olen) {
578 /*(should not be reached by HTTP/2 streams)*/
579 /*if (r->http_version == HTTP_VERSION_2) return NULL;*/
580 /*(However, new connections over TLS may become HTTP/2 connections via ALPN
581 * and return from this routine with r->http_version == HTTP_VERSION_2) */
582
583 if ((NULL == c || NULL == c->next) && con->is_readable > 0) {
584 con->read_idle_ts = log_monotonic_secs;
585 if (0 != con->network_read(con, cq, MAX_READ_LIMIT)) {
586 request_st * const r = &con->request;
587 connection_set_state_error(r, CON_STATE_ERROR);
588 }
589 /* check if switched to HTTP/2 (ALPN "h2" during TLS negotiation) */
590 request_st * const r = &con->request;
591 if (r->http_version == HTTP_VERSION_2) return NULL;
592 }
593
594 if (cq->first != cq->last && 0 != olen) {
595 const size_t clen = chunkqueue_length(cq);
596 size_t block = (olen + (16384-1)) & ~(16384-1);
597 block += (block - olen > 1024 ? 0 : 16384);
598 chunkqueue_compact_mem(cq, block > clen ? clen : block);
599 }
600
601 /* detect if data is added to chunk */
602 c = cq->first;
603 return (c && (size_t)c->offset + olen < buffer_clen(c->mem))
604 ? c
605 : NULL;
606 }
607
608
609 static void
connection_transition_h2(request_st * const h2r,connection * const con)610 connection_transition_h2 (request_st * const h2r, connection * const con)
611 {
612 buffer_copy_string_len(&h2r->target, CONST_STR_LEN("*"));
613 buffer_copy_string_len(&h2r->target_orig, CONST_STR_LEN("*"));
614 buffer_copy_string_len(&h2r->uri.path, CONST_STR_LEN("*"));
615 h2r->http_method = HTTP_METHOD_PRI;
616 h2r->reqbody_length = -1; /*(unnecessary for h2r?)*/
617 h2r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
618
619 /* (h2r->state == CON_STATE_READ) for transition by ALPN
620 * or starting cleartext HTTP/2 with Prior Knowledge
621 * (e.g. via HTTP Alternative Services)
622 * (h2r->state == CON_STATE_RESPONSE_END) for Upgrade: h2c */
623
624 if (h2r->state != CON_STATE_ERROR)
625 connection_set_state(h2r, CON_STATE_WRITE);
626
627 #if 0 /* ... if it turns out we need a separate fdevent handler for HTTP/2 */
628 con->fdn->handler = connection_handle_fdevent_h2;
629 #endif
630
631 if (NULL == con->h2) /*(not yet transitioned to HTTP/2; not Upgrade: h2c)*/
632 h2_init_con(h2r, con, NULL);
633 }
634
635
636 /**
637 * handle request header read
638 *
639 * we get called by the state-engine and by the fdevent-handler
640 */
641 __attribute_noinline__
connection_handle_read_state(connection * const con)642 static int connection_handle_read_state(connection * const con) {
643 /*(should not be reached by HTTP/2 streams)*/
644 chunkqueue * const cq = con->read_queue;
645 chunk *c = cq->first;
646 uint32_t clen = 0;
647 uint32_t header_len = 0;
648 request_st * const r = &con->request;
649 uint8_t keepalive_request_start = 0;
650 uint8_t pipelined_request_start = 0;
651 uint8_t discard_blank = 0;
652 unsigned short hoff[8192]; /* max num header lines + 3; 16k on stack */
653
654 if (con->request_count > 1) {
655 discard_blank = 1;
656 if (con->bytes_read == r->bytes_read_ckpt) {
657 keepalive_request_start = 1;
658 if (NULL != c) { /* !chunkqueue_is_empty(cq)) */
659 pipelined_request_start = 1;
660 /* partial header of next request has already been read,
661 * so optimistically check for more data received on
662 * socket while processing the previous request */
663 con->is_readable = 1;
664 /*(if partially read next request and unable to read any bytes,
665 * then will unnecessarily scan again before subsequent read)*/
666 }
667 }
668 }
669
670 do {
671 if (NULL == c) continue;
672 clen = buffer_clen(c->mem) - c->offset;
673 if (0 == clen) continue;
674 if (__builtin_expect( (c->offset > USHRT_MAX), 0)) /*(highly unlikely)*/
675 chunkqueue_compact_mem_offset(cq);
676
677 hoff[0] = 1; /* number of lines */
678 hoff[1] = (unsigned short)c->offset; /* base offset for all lines */
679 /*hoff[2] = ...;*/ /* offset from base for 2nd line */
680
681 header_len = http_header_parse_hoff(c->mem->ptr + c->offset,clen,hoff);
682
683 /* casting to (unsigned short) might truncate, and the hoff[]
684 * addition might overflow, but max_request_field_size is USHRT_MAX,
685 * so failure will be detected below */
686 const uint32_t max_request_field_size = r->conf.max_request_field_size;
687 if ((header_len ? header_len : clen) > max_request_field_size
688 || hoff[0] >= sizeof(hoff)/sizeof(hoff[0])-1) {
689 log_error(r->conf.errh, __FILE__, __LINE__, "%s",
690 "oversized request-header -> sending Status 431");
691 r->http_status = 431; /* Request Header Fields Too Large */
692 r->keep_alive = 0;
693 connection_set_state(r, CON_STATE_REQUEST_END);
694 return 1;
695 }
696
697 if (__builtin_expect( (0 != header_len), 1)) {
698 if (__builtin_expect( (hoff[0] > 1), 1))
699 break; /* common case; request headers complete */
700
701 if (discard_blank) { /* skip one blank line e.g. following POST */
702 if (header_len == clen) continue;
703 const int ch = c->mem->ptr[c->offset+header_len];
704 if (ch != '\r' && ch != '\n') {
705 /* discard prior blank line if next line is not blank */
706 discard_blank = 0;
707 clen = 0;/*(for connection_read_header_more() to return c)*/
708 c = connection_discard_blank_line(cq, header_len);/*cold*/
709 continue;
710 } /*(else fall through to error out in next block)*/
711 }
712 }
713
714 if (((unsigned char *)c->mem->ptr)[c->offset] < 32) {
715 /* expecting ASCII method beginning with alpha char
716 * or HTTP/2 pseudo-header beginning with ':' */
717 /*(TLS handshake begins with SYN 0x16 (decimal 22))*/
718 log_error(r->conf.errh, __FILE__, __LINE__, "%s",
719 c->mem->ptr[c->offset] == 0x16
720 ? "unexpected TLS ClientHello on clear port"
721 : "invalid request-line -> sending Status 400");
722 r->http_status = 400; /* Bad Request */
723 r->keep_alive = 0;
724 connection_set_state(r, CON_STATE_REQUEST_END);
725 return 1;
726 }
727 } while ((c = connection_read_header_more(con, cq, c, clen)));
728
729 if (keepalive_request_start) {
730 if (con->bytes_read > r->bytes_read_ckpt) {
731 /* update r->start_hp.tv_sec timestamp when first byte of
732 * next request is received on a keep-alive connection */
733 r->start_hp.tv_sec = log_epoch_secs;
734 if (r->conf.high_precision_timestamps)
735 log_clock_gettime_realtime(&r->start_hp);
736 }
737 if (pipelined_request_start && c)
738 con->read_idle_ts = log_monotonic_secs;
739 }
740
741 if (NULL == c) return 0; /* incomplete request headers */
742
743 #ifdef __COVERITY__
744 if (buffer_clen(c->mem) < hoff[1]) {
745 return 1;
746 }
747 #endif
748
749 char * const hdrs = c->mem->ptr + hoff[1];
750
751 if (con->request_count > 1) {
752 /* clear buffers which may have been kept for reporting on keep-alive,
753 * (e.g. mod_status) */
754 request_reset_ex(r);
755 }
756 /* RFC7540 3.5 HTTP/2 Connection Preface
757 * "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
758 * (Connection Preface MUST be exact match)
759 * If ALT-SVC used to advertise HTTP/2, then client might start
760 * http connection (not TLS) sending HTTP/2 connection preface.
761 * (note: intentionally checking only on initial request) */
762 else if (!con->is_ssl_sock && r->conf.h2proto
763 && hoff[0] == 2 && hoff[2] == 16
764 && hdrs[0]=='P' && hdrs[1]=='R' && hdrs[2]=='I' && hdrs[3]==' ') {
765 r->http_version = HTTP_VERSION_2;
766 return 0;
767 }
768
769 r->rqst_header_len = header_len;
770 if (r->conf.log_request_header)
771 log_error_multiline(r->conf.errh, __FILE__, __LINE__,
772 hdrs, header_len, "fd:%d rqst: ", con->fd);
773 http_request_headers_process(r, hdrs, hoff, con->proto_default_port);
774 chunkqueue_mark_written(cq, r->rqst_header_len);
775 connection_set_state(r, CON_STATE_REQUEST_END);
776
777 if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)
778 && 0 == r->http_status
779 && h2_check_con_upgrade_h2c(r)) {
780 /*(Upgrade: h2c over cleartext does not have SNI; no COMP_HTTP_HOST)*/
781 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
782 | (1 << COMP_HTTP_REMOTE_IP);
783 /*connection_handle_write(r, con);*//* defer write to network */
784 return 0;
785 }
786
787 return 1;
788 }
789
790
connection_handle_fdevent(void * const context,const int revents)791 static handler_t connection_handle_fdevent(void * const context, const int revents) {
792 connection * restrict con = context;
793 const int is_ssl_sock = con->is_ssl_sock;
794
795 joblist_append(con);
796
797 if (revents & ~(FDEVENT_IN | FDEVENT_OUT))
798 con->revents_err |= (revents & ~(FDEVENT_IN | FDEVENT_OUT));
799
800 if (revents & (FDEVENT_IN | FDEVENT_OUT)) {
801 if (is_ssl_sock) /*(ssl may read and write for both reads and writes)*/
802 con->is_readable = con->is_writable = 1;
803 else {
804 if (revents & FDEVENT_IN)
805 con->is_readable = 1;
806 if (revents & FDEVENT_OUT)
807 con->is_writable = 1;
808 }
809 }
810
811 return HANDLER_FINISHED;
812 }
813
814
815 __attribute_cold__
connection_read_cq_err(connection * con)816 static int connection_read_cq_err(connection *con) {
817 request_st * const r = &con->request;
818 #if defined(__WIN32)
819 int lastError = WSAGetLastError();
820 switch (lastError) {
821 case EAGAIN:
822 return 0;
823 case EINTR:
824 /* we have been interrupted before we could read */
825 con->is_readable = 1;
826 return 0;
827 case ECONNRESET:
828 /* suppress logging for this error, expected for keep-alive */
829 break;
830 default:
831 log_error(r->conf.errh, __FILE__, __LINE__,
832 "connection closed - recv failed: %d", lastError);
833 break;
834 }
835 #else /* __WIN32 */
836 switch (errno) {
837 case EAGAIN:
838 return 0;
839 case EINTR:
840 /* we have been interrupted before we could read */
841 con->is_readable = 1;
842 return 0;
843 case ECONNRESET:
844 /* suppress logging for this error, expected for keep-alive */
845 break;
846 default:
847 log_perror(r->conf.errh, __FILE__, __LINE__,
848 "connection closed - read failed");
849 break;
850 }
851 #endif /* __WIN32 */
852
853 connection_set_state_error(r, CON_STATE_ERROR);
854 return -1;
855 }
856
857
858 /* 0: everything ok, -1: error, -2: con closed */
connection_read_cq(connection * con,chunkqueue * cq,off_t max_bytes)859 static int connection_read_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
860 ssize_t len;
861 size_t mem_len = 0;
862
863 do {
864 /* obtain chunk memory into which to read
865 * fill previous chunk if it has a reasonable amount of space available
866 * (use mem_len=0 to obtain large buffer at least half of chunk_buf_sz)
867 */
868 chunk *ckpt = cq->last;
869 char * const mem = chunkqueue_get_memory(cq, &mem_len);
870 if (mem_len > (size_t)max_bytes) mem_len = (size_t)max_bytes;
871
872 #if defined(__WIN32)
873 len = recv(con->fd, mem, mem_len, 0);
874 #else
875 len = read(con->fd, mem, mem_len);
876 #endif
877
878 chunkqueue_use_memory(cq, ckpt, len > 0 ? len : 0);
879
880 if (len != (ssize_t)mem_len) {
881 /* we got less then expected, wait for the next fd-event */
882 con->is_readable = 0;
883
884 if (len > 0) {
885 con->bytes_read += len;
886 return 0;
887 }
888 else if (0 == len) /* other end close connection -> KEEP-ALIVE */
889 return -2; /* (pipelining) */
890 else
891 return connection_read_cq_err(con);
892 }
893
894 con->bytes_read += len;
895 max_bytes -= len;
896
897 int frd;
898 mem_len = (0 == fdevent_ioctl_fionread(con->fd, S_IFSOCK, &frd))
899 ? (frd < max_bytes) ? (size_t)frd : (size_t)max_bytes
900 : 0;
901 } while (max_bytes);
902 return 0;
903 }
904
905
connection_write_cq(connection * con,chunkqueue * cq,off_t max_bytes)906 static int connection_write_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
907 request_st * const r = &con->request;
908 return con->srv->network_backend_write(con->fd,cq,max_bytes,r->conf.errh);
909 }
910
911
912 static handler_t connection_handle_read_post_state(request_st * const r);
913
connection_accepted(server * srv,const server_socket * srv_socket,sock_addr * cnt_addr,int cnt)914 connection *connection_accepted(server *srv, const server_socket *srv_socket, sock_addr *cnt_addr, int cnt) {
915 connection *con;
916
917 srv->cur_fds++;
918
919 /* ok, we have the connection, register it */
920 #if 0
921 log_error(srv->errh, __FILE__, __LINE__, "accepted() %d", cnt);
922 #endif
923 srv->con_opened++;
924
925 con = connections_get_new_connection(srv);
926
927 con->fd = cnt;
928 con->fdn = fdevent_register(srv->ev, con->fd, connection_handle_fdevent, con);
929 con->network_read = connection_read_cq;
930 con->network_write = connection_write_cq;
931 con->reqbody_read = connection_handle_read_post_state;
932
933 request_st * const r = &con->request;
934 connection_set_state(r, CON_STATE_REQUEST_START);
935
936 con->connection_start = log_monotonic_secs;
937 con->dst_addr = *cnt_addr;
938 sock_addr_cache_inet_ntop_copy_buffer(&con->dst_addr_buf,
939 &con->dst_addr);
940 con->srv_socket = srv_socket;
941 con->is_ssl_sock = srv_socket->is_ssl;
942 con->proto_default_port = 80; /* "http" */
943
944 config_cond_cache_reset(r);
945 r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
946 | (1 << COMP_HTTP_REMOTE_IP);
947
948 if (HANDLER_GO_ON != plugins_call_handle_connection_accept(con)) {
949 connection_reset(con);
950 connection_close(con);
951 return NULL;
952 }
953 if (r->http_status < 0) connection_set_state(r, CON_STATE_WRITE);
954 return con;
955 }
956
957
958 __attribute_cold__
959 __attribute_noinline__
960 static const char *
connection_get_state(request_state_t state)961 connection_get_state (request_state_t state)
962 {
963 switch (state) {
964 case CON_STATE_CONNECT: return "connect";
965 case CON_STATE_READ: return "read";
966 case CON_STATE_READ_POST: return "readpost";
967 case CON_STATE_WRITE: return "write";
968 case CON_STATE_CLOSE: return "close";
969 case CON_STATE_ERROR: return "error";
970 case CON_STATE_HANDLE_REQUEST: return "handle-req";
971 case CON_STATE_REQUEST_START: return "req-start";
972 case CON_STATE_REQUEST_END: return "req-end";
973 case CON_STATE_RESPONSE_START: return "resp-start";
974 case CON_STATE_RESPONSE_END: return "resp-end";
975 default: return "(unknown)";
976 }
977 }
978
979
980 static void connection_state_machine_h2 (request_st *h2r, connection *con);
981
982
983 static void
connection_state_machine_loop(request_st * const r,connection * const con)984 connection_state_machine_loop (request_st * const r, connection * const con)
985 {
986 request_state_t ostate;
987 do {
988 if (r->conf.log_state_handling) {
989 log_error(r->conf.errh, __FILE__, __LINE__,
990 "state for fd:%d id:%d %s", con->fd, r->h2id,
991 connection_get_state(r->state));
992 }
993
994 switch ((ostate = r->state)) {
995 case CON_STATE_REQUEST_START: /* transient */
996 /*(should not be reached by HTTP/2 streams)*/
997 r->start_hp.tv_sec = log_epoch_secs;
998 con->read_idle_ts = log_monotonic_secs;
999 if (r->conf.high_precision_timestamps)
1000 log_clock_gettime_realtime(&r->start_hp);
1001
1002 con->request_count++;
1003 r->loops_per_request = 0;
1004
1005 connection_set_state(r, CON_STATE_READ);
1006 __attribute_fallthrough__
1007 case CON_STATE_READ:
1008 /*(should not be reached by HTTP/2 streams)*/
1009 if (!connection_handle_read_state(con)) {
1010 if (r->http_version == HTTP_VERSION_2) {
1011 connection_transition_h2(r, con);
1012 connection_state_machine_h2(r, con);
1013 return;
1014 }
1015 break;
1016 }
1017 /*if (r->state != CON_STATE_REQUEST_END) break;*/
1018 __attribute_fallthrough__
1019 case CON_STATE_REQUEST_END: /* transient */
1020 ostate = (0 == r->reqbody_length)
1021 ? CON_STATE_HANDLE_REQUEST
1022 : CON_STATE_READ_POST;
1023 connection_set_state(r, ostate);
1024 __attribute_fallthrough__
1025 case CON_STATE_READ_POST:
1026 case CON_STATE_HANDLE_REQUEST:
1027 switch (http_response_handler(r)) {
1028 case HANDLER_GO_ON:/*CON_STATE_RESPONSE_START occurred;transient*/
1029 case HANDLER_FINISHED:
1030 break;
1031 case HANDLER_WAIT_FOR_EVENT:
1032 return;
1033 case HANDLER_COMEBACK:
1034 /* redo loop; will not match r->state */
1035 ostate = CON_STATE_CONNECT;
1036 continue;
1037 /*case HANDLER_ERROR:*/
1038 default:
1039 connection_set_state_error(r, CON_STATE_ERROR);
1040 continue;
1041 }
1042 /*__attribute_fallthrough__*/
1043 /*case CON_STATE_RESPONSE_START:*//*occurred;transient*/
1044 if (r->http_version > HTTP_VERSION_1_1)
1045 h2_send_headers(r, con);
1046 else
1047 http_response_write_header(r);
1048 connection_set_state(r, CON_STATE_WRITE);
1049 __attribute_fallthrough__
1050 case CON_STATE_WRITE:
1051 if (connection_handle_write_state(r, con)
1052 != CON_STATE_RESPONSE_END)
1053 break;
1054 __attribute_fallthrough__
1055 case CON_STATE_RESPONSE_END: /* transient */
1056 case CON_STATE_ERROR: /* transient */
1057 if (r->http_version > HTTP_VERSION_1_1 && r != &con->request)
1058 return;
1059 connection_handle_response_end_state(r, con);
1060 break;
1061 case CON_STATE_CLOSE:
1062 /*(should not be reached by HTTP/2 streams)*/
1063 connection_handle_close_state(con);
1064 break;
1065 case CON_STATE_CONNECT:
1066 break;
1067 default:
1068 log_error(r->conf.errh, __FILE__, __LINE__,
1069 "unknown state: %d %d", con->fd, r->state);
1070 break;
1071 }
1072 } while (ostate != (request_state_t)r->state);
1073 }
1074
1075
1076 __attribute_cold__
1077 static void
connection_revents_err(request_st * const r,connection * const con)1078 connection_revents_err (request_st * const r, connection * const con)
1079 {
1080 /* defer handling FDEVENT_HUP and FDEVENT_ERR to here in order to
1081 * first attempt (in callers) to read data in kernel socket buffers */
1082 /*assert(con->revents_err & ~(FDEVENT_IN | FDEVENT_OUT));*/
1083 const int revents = (int)con->revents_err;
1084 con->revents_err = 0;
1085
1086 if (r->state == CON_STATE_CLOSE)
1087 con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
1088 else if (revents & FDEVENT_HUP)
1089 connection_set_state_error(r, CON_STATE_ERROR);
1090 else if (revents & FDEVENT_RDHUP) {
1091 int events = fdevent_fdnode_interest(con->fdn);
1092 events &= ~(FDEVENT_IN|FDEVENT_RDHUP);
1093 r->conf.stream_request_body &=
1094 ~(FDEVENT_STREAM_REQUEST_BUFMIN|FDEVENT_STREAM_REQUEST_POLLIN);
1095 r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLRDHUP;
1096 con->is_readable = 1; /*(can read 0 for end-of-stream)*/
1097 if (chunkqueue_is_empty(con->read_queue)) r->keep_alive = 0;
1098 if (r->reqbody_length < -1)/*(transparent proxy mode; no more rd data)*/
1099 r->reqbody_length = r->reqbody_queue.bytes_in;
1100 if (sock_addr_get_family(&con->dst_addr) == AF_UNIX) {
1101 /* future: will getpeername() on AF_UNIX check if still connected?*/
1102 fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1103 }
1104 else if (fdevent_is_tcp_half_closed(con->fd)) {
1105 /* Success of fdevent_is_tcp_half_closed() after FDEVENT_RDHUP
1106 * indicates TCP FIN received, but does not distinguish between
1107 * client shutdown(fd, SHUT_WR) and client close(fd). Remove
1108 * FDEVENT_RDHUP so that we do not spin on ready event. However,
1109 * a later TCP RST will not be detected until next write to socket.
1110 * future: might getpeername() to check for TCP RST on half-closed
1111 * sockets (without FDEVENT_RDHUP interest) when checking for write
1112 * timeouts once a second in server.c, though getpeername() on
1113 * Windows might not indicate this */
1114 r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_TCP_FIN;
1115 fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1116 }
1117 else {
1118 /* Failure of fdevent_is_tcp_half_closed() indicates TCP RST
1119 * (or unable to tell (unsupported OS), though should not
1120 * be setting FDEVENT_RDHUP in that case) */
1121 connection_set_state_error(r, CON_STATE_ERROR);
1122 }
1123 }
1124 else if (revents & FDEVENT_ERR) /* error, connection reset */
1125 connection_set_state_error(r, CON_STATE_ERROR);
1126 else
1127 log_error(r->conf.errh, __FILE__, __LINE__,
1128 "connection closed: poll() -> ??? %d", revents);
1129 }
1130
1131
1132 static void
connection_set_fdevent_interest(request_st * const r,connection * const con)1133 connection_set_fdevent_interest (request_st * const r, connection * const con)
1134 {
1135 if (con->fd < 0) return;
1136
1137 if (con->revents_err && r->state != CON_STATE_ERROR) {
1138 connection_revents_err(r, con); /* resets con->revents_err = 0 */
1139 connection_state_machine(con);
1140 return;
1141 /* connection_state_machine() will end up calling back into
1142 * connection_set_fdevent_interest(), but with 0 == con->revents_err */
1143 }
1144
1145 int n = 0;
1146 switch(r->state) {
1147 case CON_STATE_READ:
1148 n = FDEVENT_IN;
1149 if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1150 n |= FDEVENT_RDHUP;
1151 break;
1152 case CON_STATE_WRITE:
1153 if (!chunkqueue_is_empty(con->write_queue)
1154 && 0 == con->is_writable && 0 == con->traffic_limit_reached)
1155 n |= FDEVENT_OUT;
1156 __attribute_fallthrough__
1157 case CON_STATE_READ_POST:
1158 if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLIN)
1159 n |= FDEVENT_IN;
1160 if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1161 n |= FDEVENT_RDHUP;
1162 break;
1163 case CON_STATE_CLOSE:
1164 n = FDEVENT_IN;
1165 break;
1166 case CON_STATE_CONNECT:
1167 return;
1168 default:
1169 break;
1170 }
1171
1172 const int events = fdevent_fdnode_interest(con->fdn);
1173 if (con->is_readable < 0) {
1174 con->is_readable = 0;
1175 n |= FDEVENT_IN;
1176 }
1177 if (con->is_writable < 0) {
1178 con->is_writable = 0;
1179 n |= FDEVENT_OUT;
1180 }
1181 if (events & FDEVENT_RDHUP)
1182 n |= FDEVENT_RDHUP;
1183
1184 if (n == events) return;
1185
1186 /* update timestamps when enabling interest in events */
1187 if ((n & FDEVENT_IN) && !(events & FDEVENT_IN))
1188 con->read_idle_ts = log_monotonic_secs;
1189 if ((n & FDEVENT_OUT) && !(events & FDEVENT_OUT))
1190 con->write_request_ts = log_monotonic_secs;
1191 fdevent_fdnode_event_set(con->srv->ev, con->fdn, n);
1192 }
1193
1194
1195 __attribute_cold__
1196 static void
connection_request_end_h2(request_st * const h2r,connection * const con)1197 connection_request_end_h2 (request_st * const h2r, connection * const con)
1198 {
1199 if (h2r->keep_alive >= 0) {
1200 h2r->keep_alive = -1;
1201 h2_send_goaway(con, H2_E_NO_ERROR);
1202 }
1203 else /*(abort connection upon second request to close h2 connection)*/
1204 h2_send_goaway(con, H2_E_ENHANCE_YOUR_CALM);
1205 }
1206
1207
1208 static void
connection_state_machine_h2(request_st * const h2r,connection * const con)1209 connection_state_machine_h2 (request_st * const h2r, connection * const con)
1210 {
1211 h2con * const h2c = con->h2;
1212
1213 if (h2c->sent_goaway <= 0
1214 && (chunkqueue_is_empty(con->read_queue) || h2_parse_frames(con))
1215 && con->is_readable > 0) {
1216 chunkqueue * const cq = con->read_queue;
1217 const off_t mark = cq->bytes_in;
1218 if (0 == con->network_read(con, cq, MAX_READ_LIMIT)) {
1219 if (mark < cq->bytes_in)
1220 h2_parse_frames(con);
1221 }
1222 else {
1223 /* network error; do not send GOAWAY, but pretend that we did */
1224 h2c->sent_goaway = H2_E_CONNECT_ERROR; /*any error (not NO_ERROR)*/
1225 connection_set_state_error(h2r, CON_STATE_ERROR);
1226 }
1227 }
1228
1229 /* process requests on HTTP/2 streams */
1230 int resched = 0;
1231 if (h2c->sent_goaway <= 0 && h2c->rused) {
1232 /* coarse check for write throttling
1233 * (connection.kbytes-per-second, server.kbytes-per-second)
1234 * obtain an approximate limit, not refreshed per request_st,
1235 * even though we are not calculating response HEADERS frames
1236 * or frame overhead here */
1237 off_t max_bytes = con->is_writable > 0
1238 ? connection_write_throttle(con, MAX_WRITE_LIMIT)
1239 : 0;
1240 const off_t cqlen = chunkqueue_length(con->write_queue);
1241 if (cqlen > 8192 && max_bytes > 65536) max_bytes = 65536;
1242 max_bytes -= cqlen;
1243 if (max_bytes < 0) max_bytes = 0;
1244
1245 /* XXX: to avoid buffer bloat due to staging too much data in
1246 * con->write_queue, consider setting limit on how much is staged
1247 * for sending on con->write_queue: adjusting max_bytes down */
1248
1249 /* XXX: TODO: process requests in stream priority order */
1250 for (uint32_t i = 0; i < h2c->rused; ++i) {
1251 request_st * const r = h2c->r[i];
1252 /* future: might track read/write interest per request
1253 * to avoid iterating through all active requests */
1254
1255 #if 0
1256 const int log_state_handling = r->conf.log_state_handling;
1257 if (log_state_handling)
1258 log_error(r->conf.errh, __FILE__, __LINE__,
1259 "state at enter %d %d %s", con->fd, r->h2id,
1260 connection_get_state(r->state));
1261 #endif
1262
1263 connection_state_machine_loop(r, con);
1264
1265 if (r->resp_header_len && !chunkqueue_is_empty(&r->write_queue)
1266 && max_bytes
1267 && (r->resp_body_finished
1268 || (r->conf.stream_response_body
1269 & (FDEVENT_STREAM_RESPONSE
1270 |FDEVENT_STREAM_RESPONSE_BUFMIN)))) {
1271
1272 uint32_t dlen = max_bytes > 32768 ? 32768 : (uint32_t)max_bytes;
1273 dlen = h2_send_cqdata(r, con, &r->write_queue, dlen);
1274 if (dlen) { /*(do not resched (spin) if swin empty window)*/
1275 max_bytes -= (off_t)dlen;
1276 if (!chunkqueue_is_empty(&r->write_queue))
1277 resched |= 1;
1278 }
1279 }
1280
1281 {
1282 if (chunkqueue_is_empty(&r->write_queue)) {
1283 if (r->resp_body_finished && r->state == CON_STATE_WRITE) {
1284 connection_set_state(r, CON_STATE_RESPONSE_END);
1285 if (__builtin_expect( (r->conf.log_state_handling), 0))
1286 connection_state_machine_loop(r, con);
1287 }
1288 }
1289 }
1290
1291 #if 0
1292 if (log_state_handling)
1293 log_error(r->conf.errh, __FILE__, __LINE__,
1294 "state at exit %d %d %s", con->fd, r->h2id,
1295 connection_get_state(r->state));
1296 #endif
1297
1298 if (r->state==CON_STATE_RESPONSE_END || r->state==CON_STATE_ERROR) {
1299 /*(trigger reschedule of con if frames pending)*/
1300 if (h2c->rused == sizeof(h2c->r)/sizeof(*h2c->r)
1301 && !chunkqueue_is_empty(con->read_queue))
1302 resched |= 2;
1303 h2_send_end_stream(r, con);
1304 const int alive = r->keep_alive;
1305 h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1306 --i;/* adjust loop i; h2c->rused was modified to retire r */
1307 /*(special-case: allow *stream* to set r->keep_alive = -1 to
1308 * trigger goaway on h2 connection, e.g. after mod_auth failure
1309 * in attempt to mitigate brute force attacks by forcing a
1310 * reconnect and (somewhat) slowing down retries)*/
1311 if (alive < 0)
1312 connection_request_end_h2(h2r, con);
1313 }
1314 }
1315
1316 if (0 == max_bytes) resched |= 1;
1317 }
1318
1319 if (h2c->sent_goaway > 0 && h2c->rused) {
1320 /* retire streams if an error has occurred
1321 * note: this is not done to other streams in the loop above
1322 * (besides the current stream in the loop) due to the specific
1323 * implementation above, where doing so would mess up the iterator */
1324 for (uint32_t i = 0; i < h2c->rused; ++i) {
1325 request_st * const r = h2c->r[i];
1326 /*assert(r->h2state == H2_STATE_CLOSED);*/
1327 h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1328 --i;/* adjust loop i; h2c->rused was modified to retire r */
1329 }
1330 /* XXX: ? should we discard con->write_queue
1331 * and change h2r->state to CON_STATE_RESPONSE_END ? */
1332 }
1333
1334 if (h2r->state == CON_STATE_WRITE) {
1335 /* write HTTP/2 frames to socket */
1336 if (!chunkqueue_is_empty(con->write_queue))
1337 connection_handle_write(h2r, con);
1338
1339 if (chunkqueue_is_empty(con->write_queue)
1340 && 0 == h2c->rused && h2c->sent_goaway)
1341 connection_set_state(h2r, CON_STATE_RESPONSE_END);
1342 }
1343
1344 if (h2r->state == CON_STATE_WRITE) {
1345 /* (resched & 1) more data is available to write, if still able to write
1346 * (resched & 2) resched to read deferred frames from con->read_queue */
1347 /*(con->is_writable set to 0 if !chunkqueue_is_empty(con->write_queue)
1348 * after trying to write in connection_handle_write() above)*/
1349 if (((resched & 1) && con->is_writable>0 && !con->traffic_limit_reached)
1350 || (resched & 2))
1351 joblist_append(con);
1352
1353 if (h2_want_read(con))
1354 h2r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
1355 else
1356 h2r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1357 }
1358 else /* e.g. CON_STATE_RESPONSE_END or CON_STATE_ERROR */
1359 connection_state_machine_loop(h2r, con);
1360
1361 connection_set_fdevent_interest(h2r, con);
1362 }
1363
1364
1365 static void
connection_state_machine_h1(request_st * const r,connection * const con)1366 connection_state_machine_h1 (request_st * const r, connection * const con)
1367 {
1368 const int log_state_handling = r->conf.log_state_handling;
1369 if (log_state_handling) {
1370 log_error(r->conf.errh, __FILE__, __LINE__,
1371 "state at enter %d %s", con->fd, connection_get_state(r->state));
1372 }
1373
1374 connection_state_machine_loop(r, con);
1375
1376 if (log_state_handling) {
1377 log_error(r->conf.errh, __FILE__, __LINE__,
1378 "state at exit: %d %s", con->fd, connection_get_state(r->state));
1379 }
1380
1381 connection_set_fdevent_interest(r, con);
1382 }
1383
1384
1385 void
connection_state_machine(connection * const con)1386 connection_state_machine (connection * const con)
1387 {
1388 request_st * const r = &con->request;
1389 if (r->http_version == HTTP_VERSION_2)
1390 connection_state_machine_h2(r, con);
1391 else /* if (r->http_version <= HTTP_VERSION_1_1) */
1392 connection_state_machine_h1(r, con);
1393 }
1394
1395
connection_check_timeout(connection * const con,const unix_time64_t cur_ts)1396 static void connection_check_timeout (connection * const con, const unix_time64_t cur_ts) {
1397 const int waitevents = fdevent_fdnode_interest(con->fdn);
1398 int changed = 0;
1399 int t_diff;
1400
1401 request_st * const r = &con->request;
1402 if (r->state == CON_STATE_CLOSE) {
1403 if (cur_ts - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
1404 changed = 1;
1405 }
1406 }
1407 else if (con->h2 && r->state == CON_STATE_WRITE) {
1408 h2con * const h2c = con->h2;
1409 if (h2c->rused) {
1410 for (uint32_t i = 0; i < h2c->rused; ++i) {
1411 request_st * const rr = h2c->r[i];
1412 if (rr->state == CON_STATE_ERROR) { /*(should not happen)*/
1413 changed = 1;
1414 continue;
1415 }
1416 if (rr->reqbody_length != rr->reqbody_queue.bytes_in) {
1417 /* XXX: should timeout apply if not trying to read on h2con?
1418 * (still applying timeout to catch stuck connections) */
1419 /* XXX: con->read_idle_ts is not per-request, so timeout
1420 * will not occur if other read activity occurs on h2con
1421 * (future: might keep separate timestamp per-request) */
1422 if (cur_ts - con->read_idle_ts > rr->conf.max_read_idle) {
1423 /* time - out */
1424 if (rr->conf.log_request_handling) {
1425 log_error(rr->conf.errh, __FILE__, __LINE__,
1426 "request aborted - read timeout: %d", con->fd);
1427 }
1428 connection_set_state_error(r, CON_STATE_ERROR);
1429 changed = 1;
1430 }
1431 }
1432
1433 if (rr->state != CON_STATE_READ_POST
1434 && con->write_request_ts != 0) {
1435 /* XXX: con->write_request_ts is not per-request, so timeout
1436 * will not occur if other write activity occurs on h2con
1437 * (future: might keep separate timestamp per-request) */
1438 if (cur_ts - con->write_request_ts
1439 > r->conf.max_write_idle) {
1440 /*(see comment further down about max_write_idle)*/
1441 /* time - out */
1442 if (r->conf.log_timeouts) {
1443 log_error(r->conf.errh, __FILE__, __LINE__,
1444 "NOTE: a request from %s for %.*s timed out "
1445 "after writing %lld bytes. We waited %d seconds. "
1446 "If this is a problem, increase "
1447 "server.max-write-idle",
1448 con->dst_addr_buf.ptr,
1449 BUFFER_INTLEN_PTR(&r->target),
1450 (long long)r->write_queue.bytes_out,
1451 (int)r->conf.max_write_idle);
1452 }
1453 connection_set_state_error(r, CON_STATE_ERROR);
1454 changed = 1;
1455 }
1456 }
1457 }
1458 }
1459 else {
1460 if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1461 /* time - out */
1462 if (r->conf.log_request_handling) {
1463 log_error(r->conf.errh, __FILE__, __LINE__,
1464 "connection closed - keep-alive timeout: %d",
1465 con->fd);
1466 }
1467 connection_set_state(r, CON_STATE_RESPONSE_END);
1468 changed = 1;
1469 }
1470 }
1471 /* process changes before optimistic read of additional HTTP/2 frames */
1472 if (changed)
1473 con->is_readable = 0;
1474 }
1475 else if (waitevents & FDEVENT_IN) {
1476 if (con->request_count == 1 || r->state != CON_STATE_READ) {
1477 /* e.g. CON_STATE_READ_POST || CON_STATE_WRITE */
1478 if (cur_ts - con->read_idle_ts > r->conf.max_read_idle) {
1479 /* time - out */
1480 if (r->conf.log_request_handling) {
1481 log_error(r->conf.errh, __FILE__, __LINE__,
1482 "connection closed - read timeout: %d", con->fd);
1483 }
1484
1485 connection_set_state_error(r, CON_STATE_ERROR);
1486 changed = 1;
1487 }
1488 } else {
1489 if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1490 /* time - out */
1491 if (r->conf.log_request_handling) {
1492 log_error(r->conf.errh, __FILE__, __LINE__,
1493 "connection closed - keep-alive timeout: %d",
1494 con->fd);
1495 }
1496
1497 connection_set_state_error(r, CON_STATE_ERROR);
1498 changed = 1;
1499 }
1500 }
1501 }
1502
1503 /* max_write_idle timeout currently functions as backend timeout,
1504 * too, after response has been started.
1505 * Although backend timeouts now exist, there is no default for timeouts
1506 * to backends, so were this client timeout now to be changed to check
1507 * for write interest to the client, then timeout would not occur if the
1508 * backend hung and there was no backend read timeout set. Therefore,
1509 * max_write_idle timeout remains timeout for both reading from backend
1510 * and writing to client, though this check here is only for HTTP/1.1.
1511 * In the future, if there were a quick way to detect that a backend
1512 * read timeout was in effect, then this timeout could check for write
1513 * interest to client. (not a priority) */
1514 /*if (waitevents & FDEVENT_OUT)*/
1515 if (r->http_version <= HTTP_VERSION_1_1
1516 && r->state == CON_STATE_WRITE && con->write_request_ts != 0) {
1517 #if 0
1518 if (cur_ts - con->write_request_ts > 60) {
1519 log_error(r->conf.errh, __FILE__, __LINE__,
1520 "connection closed - pre-write-request-timeout: %d %d",
1521 con->fd, cur_ts - con->write_request_ts);
1522 }
1523 #endif
1524
1525 if (cur_ts - con->write_request_ts > r->conf.max_write_idle) {
1526 /* time - out */
1527 if (r->conf.log_timeouts) {
1528 log_error(r->conf.errh, __FILE__, __LINE__,
1529 "NOTE: a request from %s for %.*s timed out after writing "
1530 "%lld bytes. We waited %d seconds. If this is a problem, "
1531 "increase server.max-write-idle",
1532 con->dst_addr_buf.ptr,
1533 BUFFER_INTLEN_PTR(&r->target),
1534 (long long)con->bytes_written, (int)r->conf.max_write_idle);
1535 }
1536 connection_set_state_error(r, CON_STATE_ERROR);
1537 changed = 1;
1538 }
1539 }
1540
1541 /* lighttpd HTTP/2 limitation: rate limit config r->conf.bytes_per_second
1542 * (currently) taken only from top-level config (socket), with host if SNI
1543 * used, but not any other config conditions, e.g. not per-file-type */
1544
1545 if (0 == (t_diff = cur_ts - con->connection_start)) t_diff = 1;
1546
1547 if (con->traffic_limit_reached &&
1548 (r->conf.bytes_per_second == 0 ||
1549 con->bytes_written < (off_t)r->conf.bytes_per_second * t_diff)) {
1550 /* enable connection again */
1551 con->traffic_limit_reached = 0;
1552
1553 changed = 1;
1554 }
1555
1556 con->bytes_written_cur_second = 0;
1557
1558 if (changed) {
1559 connection_state_machine(con);
1560 }
1561 }
1562
connection_periodic_maint(server * const srv,const unix_time64_t cur_ts)1563 void connection_periodic_maint (server * const srv, const unix_time64_t cur_ts) {
1564 /* check all connections for timeouts */
1565 for (connection *con = srv->conns, *tc; con; con = tc) {
1566 tc = con->next;
1567 connection_check_timeout(con, cur_ts);
1568 }
1569 }
1570
connection_graceful_shutdown_maint(server * srv)1571 void connection_graceful_shutdown_maint (server *srv) {
1572 const int graceful_expire =
1573 (srv->graceful_expire_ts && srv->graceful_expire_ts < log_monotonic_secs);
1574 for (connection *con = srv->conns, *tc; con; con = tc) {
1575 tc = con->next;
1576 int changed = 0;
1577
1578 request_st * const r = &con->request;
1579 if (r->state == CON_STATE_CLOSE) {
1580 /* reduce remaining linger timeout to be
1581 * (from zero) *up to* one more second, but no more */
1582 if (HTTP_LINGER_TIMEOUT > 1)
1583 con->close_timeout_ts -= (HTTP_LINGER_TIMEOUT - 1);
1584 if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT)
1585 changed = 1;
1586 }
1587 else if (con->h2 && r->state == CON_STATE_WRITE) {
1588 h2_send_goaway(con, H2_E_NO_ERROR);
1589 if (0 == con->h2->rused && chunkqueue_is_empty(con->write_queue)) {
1590 connection_set_state(r, CON_STATE_RESPONSE_END);
1591 changed = 1;
1592 }
1593 }
1594 else if (r->state == CON_STATE_READ && con->request_count > 1
1595 && chunkqueue_is_empty(con->read_queue)) {
1596 /* close connections in keep-alive waiting for next request */
1597 connection_set_state_error(r, CON_STATE_ERROR);
1598 changed = 1;
1599 }
1600
1601 if (graceful_expire) {
1602 connection_set_state_error(r, CON_STATE_ERROR);
1603 changed = 1;
1604 }
1605
1606 r->keep_alive = 0; /* disable keep-alive */
1607
1608 r->conf.bytes_per_second = 0; /* disable rate limit */
1609 r->conf.global_bytes_per_second = 0; /* disable rate limit */
1610 if (con->traffic_limit_reached) {
1611 con->traffic_limit_reached = 0;
1612 changed = 1;
1613 }
1614
1615 if (changed) {
1616 connection_state_machine(con);
1617 }
1618 }
1619 }
1620
1621
1622 static int
connection_handle_read_post_cq_compact(chunkqueue * const cq)1623 connection_handle_read_post_cq_compact (chunkqueue * const cq)
1624 {
1625 /* combine first mem chunk with next non-empty mem chunk
1626 * (loop if next chunk is empty) */
1627 chunk *c = cq->first;
1628 if (NULL == c) return 0;
1629 const uint32_t mlen = buffer_clen(c->mem) - (size_t)c->offset;
1630 while ((c = c->next)) {
1631 const uint32_t blen = buffer_clen(c->mem) - (size_t)c->offset;
1632 if (0 == blen) continue;
1633 chunkqueue_compact_mem(cq, mlen + blen);
1634 return 1;
1635 }
1636 return 0;
1637 }
1638
1639
1640 __attribute_pure__
1641 static int
connection_handle_read_post_chunked_crlf(chunkqueue * const cq)1642 connection_handle_read_post_chunked_crlf (chunkqueue * const cq)
1643 {
1644 /* caller might check chunkqueue_length(cq) >= 2 before calling here
1645 * to limit return value to either 1 for good or -1 for error */
1646 chunk *c;
1647 buffer *b;
1648 char *p;
1649 size_t len;
1650
1651 /* caller must have called chunkqueue_remove_finished_chunks(cq), so if
1652 * chunkqueue is not empty, it contains chunk with at least one char */
1653 if (chunkqueue_is_empty(cq)) return 0;
1654
1655 c = cq->first;
1656 b = c->mem;
1657 p = b->ptr+c->offset;
1658 if (p[0] != '\r') return -1; /* error */
1659 if (p[1] == '\n') return 1;
1660 len = buffer_clen(b) - (size_t)c->offset;
1661 if (1 != len) return -1; /* error */
1662
1663 while (NULL != (c = c->next)) {
1664 b = c->mem;
1665 len = buffer_clen(b) - (size_t)c->offset;
1666 if (0 == len) continue;
1667 p = b->ptr+c->offset;
1668 return (p[0] == '\n') ? 1 : -1; /* error if not '\n' */
1669 }
1670 return 0;
1671 }
1672
1673
1674 static handler_t
connection_handle_read_post_chunked(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1675 connection_handle_read_post_chunked (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1676 {
1677 /* r->conf.max_request_size is in kBytes */
1678 const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1679 off_t te_chunked = r->te_chunked;
1680 do {
1681 off_t len = chunkqueue_length(cq);
1682
1683 while (0 == te_chunked) {
1684 char *p;
1685 chunk *c = cq->first;
1686 if (NULL == c) break;
1687 force_assert(c->type == MEM_CHUNK);
1688 p = strchr(c->mem->ptr+c->offset, '\n');
1689 if (NULL != p) { /* found HTTP chunked header line */
1690 off_t hsz = p + 1 - (c->mem->ptr+c->offset);
1691 unsigned char *s = (unsigned char *)c->mem->ptr+c->offset;
1692 for (unsigned char u;(u=(unsigned char)hex2int(*s))!=0xFF;++s) {
1693 if (te_chunked > (off_t)(1uLL<<(8*sizeof(off_t)-5))-1-2) {
1694 log_error(r->conf.errh, __FILE__, __LINE__,
1695 "chunked data size too large -> 400");
1696 /* 400 Bad Request */
1697 return http_response_reqbody_read_error(r, 400);
1698 }
1699 te_chunked <<= 4;
1700 te_chunked |= u;
1701 }
1702 if (s == (unsigned char *)c->mem->ptr+c->offset) { /*(no hex)*/
1703 log_error(r->conf.errh, __FILE__, __LINE__,
1704 "chunked header invalid chars -> 400");
1705 /* 400 Bad Request */
1706 return http_response_reqbody_read_error(r, 400);
1707 }
1708 while (*s == ' ' || *s == '\t') ++s;
1709 if (*s != '\r' && *s != ';') {
1710 log_error(r->conf.errh, __FILE__, __LINE__,
1711 "chunked header invalid chars -> 400");
1712 /* 400 Bad Request */
1713 return http_response_reqbody_read_error(r, 400);
1714 }
1715
1716 if (hsz >= 1024) {
1717 /* prevent theoretical integer overflow
1718 * casting to (size_t) and adding 2 (for "\r\n") */
1719 log_error(r->conf.errh, __FILE__, __LINE__,
1720 "chunked header line too long -> 400");
1721 /* 400 Bad Request */
1722 return http_response_reqbody_read_error(r, 400);
1723 }
1724
1725 if (0 == te_chunked) {
1726 /* do not consume final chunked header until
1727 * (optional) trailers received along with
1728 * request-ending blank line "\r\n" */
1729 if (p[0] == '\r' && p[1] == '\n') {
1730 /*(common case with no trailers; final \r\n received)*/
1731 hsz += 2;
1732 }
1733 else {
1734 /* trailers or final CRLF crosses into next cq chunk */
1735 hsz -= 2;
1736 do {
1737 c = cq->first;
1738 p = strstr(c->mem->ptr+c->offset+hsz, "\r\n\r\n");
1739 } while (NULL == p
1740 && connection_handle_read_post_cq_compact(cq));
1741 if (NULL == p) {
1742 /*(effectively doubles max request field size
1743 * potentially received by backend, if in the future
1744 * these trailers are added to request headers)*/
1745 if ((off_t)buffer_clen(c->mem) - c->offset
1746 < (off_t)r->conf.max_request_field_size) {
1747 break;
1748 }
1749 else {
1750 /* ignore excessively long trailers;
1751 * disable keep-alive on connection */
1752 r->keep_alive = 0;
1753 p = c->mem->ptr + buffer_clen(c->mem)
1754 - 4;
1755 }
1756 }
1757 hsz = p + 4 - (c->mem->ptr+c->offset);
1758 /* trailers currently ignored, but could be processed
1759 * here if 0 == (r->conf.stream_request_body &
1760 * & (FDEVENT_STREAM_REQUEST
1761 * |FDEVENT_STREAM_REQUEST_BUFMIN))
1762 * taking care to reject fields forbidden in trailers,
1763 * making trailers available to CGI and other backends*/
1764 }
1765 chunkqueue_mark_written(cq, (size_t)hsz);
1766 r->reqbody_length = dst_cq->bytes_in;
1767 break; /* done reading HTTP chunked request body */
1768 }
1769
1770 /* consume HTTP chunked header */
1771 chunkqueue_mark_written(cq, (size_t)hsz);
1772 len = chunkqueue_length(cq);
1773
1774 if (0 !=max_request_size
1775 && (max_request_size < te_chunked
1776 || max_request_size - te_chunked < dst_cq->bytes_in)) {
1777 log_error(r->conf.errh, __FILE__, __LINE__,
1778 "request-size too long: %lld -> 413",
1779 (long long)(dst_cq->bytes_in + te_chunked));
1780 /* 413 Payload Too Large */
1781 return http_response_reqbody_read_error(r, 413);
1782 }
1783
1784 te_chunked += 2; /*(for trailing "\r\n" after chunked data)*/
1785
1786 break; /* read HTTP chunked header */
1787 }
1788
1789 /*(likely better ways to handle chunked header crossing chunkqueue
1790 * chunks, but this situation is not expected to occur frequently)*/
1791 if ((off_t)buffer_clen(c->mem) - c->offset >= 1024) {
1792 log_error(r->conf.errh, __FILE__, __LINE__,
1793 "chunked header line too long -> 400");
1794 /* 400 Bad Request */
1795 return http_response_reqbody_read_error(r, 400);
1796 }
1797 else if (!connection_handle_read_post_cq_compact(cq)) {
1798 break;
1799 }
1800 }
1801 if (0 == te_chunked) break;
1802
1803 if (te_chunked > 2) {
1804 if (len > te_chunked-2) len = te_chunked-2;
1805 if (dst_cq->bytes_in + te_chunked <= 64*1024) {
1806 /* avoid buffering request bodies <= 64k on disk */
1807 chunkqueue_steal(dst_cq, cq, len);
1808 }
1809 else if (0 != chunkqueue_steal_with_tempfiles(dst_cq, cq, len,
1810 r->conf.errh)) {
1811 /* 500 Internal Server Error */
1812 return http_response_reqbody_read_error(r, 500);
1813 }
1814 te_chunked -= len;
1815 len = chunkqueue_length(cq);
1816 }
1817
1818 if (len < te_chunked) break;
1819
1820 if (2 == te_chunked) {
1821 if (-1 == connection_handle_read_post_chunked_crlf(cq)) {
1822 log_error(r->conf.errh, __FILE__, __LINE__,
1823 "chunked data missing end CRLF -> 400");
1824 /* 400 Bad Request */
1825 return http_response_reqbody_read_error(r, 400);
1826 }
1827 chunkqueue_mark_written(cq, 2);/*consume \r\n at end of chunk data*/
1828 te_chunked -= 2;
1829 }
1830
1831 } while (!chunkqueue_is_empty(cq));
1832
1833 r->te_chunked = te_chunked;
1834 return HANDLER_GO_ON;
1835 }
1836
1837
1838 static handler_t
connection_handle_read_body_unknown(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1839 connection_handle_read_body_unknown (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1840 {
1841 /* r->conf.max_request_size is in kBytes */
1842 const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1843 chunkqueue_append_chunkqueue(dst_cq, cq);
1844 if (0 != max_request_size && dst_cq->bytes_in > max_request_size) {
1845 log_error(r->conf.errh, __FILE__, __LINE__,
1846 "request-size too long: %lld -> 413", (long long)dst_cq->bytes_in);
1847 /* 413 Payload Too Large */
1848 return http_response_reqbody_read_error(r, 413);
1849 }
1850 return HANDLER_GO_ON;
1851 }
1852
1853
1854 __attribute_cold__
1855 static int
connection_check_expect_100(request_st * const r,connection * const con)1856 connection_check_expect_100 (request_st * const r, connection * const con)
1857 {
1858 if (con->is_writable <= 0)
1859 return 1;
1860
1861 const buffer * const vb =
1862 http_header_request_get(r, HTTP_HEADER_EXPECT,
1863 CONST_STR_LEN("Expect"));
1864 if (NULL == vb)
1865 return 1;
1866
1867 /* (always unset Expect header so that check is not repeated for request */
1868 int rc = buffer_eq_icase_slen(vb, CONST_STR_LEN("100-continue"));
1869 http_header_request_unset(r, HTTP_HEADER_EXPECT,
1870 CONST_STR_LEN("Expect"));
1871 if (!rc
1872 || 0 != r->reqbody_queue.bytes_in
1873 || !chunkqueue_is_empty(&r->read_queue)
1874 || !chunkqueue_is_empty(&r->write_queue))
1875 return 1;
1876
1877 /* send 100 Continue only if no request body data received yet
1878 * and response has not yet started (checked above) */
1879 if (r->http_version > HTTP_VERSION_1_1)
1880 h2_send_100_continue(r, con);
1881 else if (r->http_version == HTTP_VERSION_1_1)
1882 return connection_write_100_continue(r, con);
1883
1884 return 1;
1885 }
1886
1887
1888 static handler_t
connection_handle_read_post_state(request_st * const r)1889 connection_handle_read_post_state (request_st * const r)
1890 {
1891 connection * const con = r->con;
1892 chunkqueue * const cq = &r->read_queue;
1893 chunkqueue * const dst_cq = &r->reqbody_queue;
1894
1895 int is_closed = 0;
1896
1897 if (r->http_version > HTTP_VERSION_1_1) {
1898 /*(H2_STATE_HALF_CLOSED_REMOTE or H2_STATE_CLOSED)*/
1899 if (r->h2state >= H2_STATE_HALF_CLOSED_REMOTE)
1900 is_closed = 1;
1901 }
1902 else if (con->is_readable > 0) {
1903 con->read_idle_ts = log_monotonic_secs;
1904 const off_t max_per_read =
1905 !(r->conf.stream_request_body /*(if not streaming request body)*/
1906 & (FDEVENT_STREAM_REQUEST|FDEVENT_STREAM_REQUEST_BUFMIN))
1907 ? MAX_READ_LIMIT
1908 : (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_BUFMIN)
1909 ? 16384 /* FDEVENT_STREAM_REQUEST_BUFMIN */
1910 : 65536; /* FDEVENT_STREAM_REQUEST */
1911 switch(con->network_read(con, cq, max_per_read)) {
1912 case -1:
1913 connection_set_state_error(r, CON_STATE_ERROR);
1914 return HANDLER_ERROR;
1915 case -2:
1916 is_closed = 1;
1917 break;
1918 default:
1919 break;
1920 }
1921
1922 chunkqueue_remove_finished_chunks(cq);
1923 }
1924
1925 /* Check for Expect: 100-continue in request headers */
1926 if (light_btst(r->rqst_htags, HTTP_HEADER_EXPECT)
1927 && !connection_check_expect_100(r, con))
1928 return HANDLER_ERROR;
1929
1930 if (r->http_version > HTTP_VERSION_1_1) {
1931 /* h2_recv_data() places frame payload directly into r->reqbody_queue */
1932 }
1933 else if (r->reqbody_length < 0) {
1934 /*(-1: Transfer-Encoding: chunked, -2: unspecified length)*/
1935 handler_t rc = (-1 == r->reqbody_length)
1936 ? connection_handle_read_post_chunked(r, cq, dst_cq)
1937 : connection_handle_read_body_unknown(r, cq, dst_cq);
1938 if (HANDLER_GO_ON != rc) return rc;
1939 chunkqueue_remove_finished_chunks(cq);
1940 }
1941 else {
1942 off_t len = (off_t)r->reqbody_length - dst_cq->bytes_in;
1943 if (r->reqbody_length <= 64*1024) {
1944 /* don't buffer request bodies <= 64k on disk */
1945 chunkqueue_steal(dst_cq, cq, len);
1946 }
1947 else if (0 !=
1948 chunkqueue_steal_with_tempfiles(dst_cq,cq,len,r->conf.errh)) {
1949 /* writing to temp file failed */ /* Internal Server Error */
1950 return http_response_reqbody_read_error(r, 500);
1951 }
1952 chunkqueue_remove_finished_chunks(cq);
1953 }
1954
1955 if (dst_cq->bytes_in == (off_t)r->reqbody_length) {
1956 /* Content is ready */
1957 r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1958 if (r->state == CON_STATE_READ_POST) {
1959 connection_set_state(r, CON_STATE_HANDLE_REQUEST);
1960 }
1961 return HANDLER_GO_ON;
1962 }
1963 else if (is_closed) {
1964 #if 0
1965 return http_response_reqbody_read_error(r, 400); /* Bad Request */
1966 #endif
1967 return HANDLER_ERROR;
1968 }
1969 else {
1970 r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
1971 return (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
1972 ? HANDLER_GO_ON
1973 : HANDLER_WAIT_FOR_EVENT;
1974 }
1975 }
1976