1 #include "first.h"
2 
3 #include "base.h"
4 #include "buffer.h"
5 #include "burl.h"       /* HTTP_PARSEOPT_HEADER_STRICT */
6 #include "chunk.h"
7 #include "log.h"
8 #include "connections.h"
9 #include "fdevent.h"
10 #include "h2.h"
11 #include "http_header.h"
12 
13 #include "reqpool.h"
14 #include "request.h"
15 #include "response.h"
16 #include "network.h"
17 #include "stat_cache.h"
18 
19 #include "plugin.h"
20 
21 #include "sock_addr_cache.h"
22 
23 #include <sys/stat.h>
24 
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <errno.h>
28 #include <string.h>
29 
30 #include "sys-socket.h"
31 
32 #define HTTP_LINGER_TIMEOUT 5
33 
34 #define connection_set_state(r, n) ((r)->state = (n))
35 
36 __attribute_cold__
connection_set_state_error(request_st * const r,const request_state_t state)37 static void connection_set_state_error(request_st * const r, const request_state_t state) {
38     connection_set_state(r, state);
39 }
40 
41 __attribute_cold__
42 static connection *connection_init(server *srv);
43 
44 static void connection_reset(connection *con);
45 
connections_get_new_connection(server * srv)46 static connection *connections_get_new_connection(server *srv) {
47     connection *con;
48     --srv->lim_conns;
49     if (srv->conns_pool) {
50         con = srv->conns_pool;
51         srv->conns_pool = con->next;
52     }
53     else {
54         con = connection_init(srv);
55         connection_reset(con);
56     }
57     /*con->prev = NULL;*//*(already set)*/
58     if ((con->next = srv->conns))
59         con->next->prev = con;
60     return (srv->conns = con);
61 }
62 
connection_del(server * srv,connection * con)63 static void connection_del(server *srv, connection *con) {
64     if (con->next)
65         con->next->prev = con->prev;
66     if (con->prev)
67         con->prev->next = con->next;
68     else
69         srv->conns = con->next;
70     con->prev = NULL;
71     con->next = srv->conns_pool;
72     srv->conns_pool = con;
73     ++srv->lim_conns;
74 }
75 
connection_close(connection * con)76 static void connection_close(connection *con) {
77 	if (con->fd < 0) con->fd = -con->fd;
78 
79 	plugins_call_handle_connection_close(con);
80 
81 	server * const srv = con->srv;
82 	request_st * const r = &con->request;
83 	request_reset_ex(r); /*(r->conf.* is still valid below)*/
84 	connection_set_state(r, CON_STATE_CONNECT);
85 
86 	chunkqueue_reset(con->read_queue);
87 	con->request_count = 0;
88 	con->is_ssl_sock = 0;
89 	con->revents_err = 0;
90 
91 	fdevent_fdnode_event_del(srv->ev, con->fdn);
92 	fdevent_unregister(srv->ev, con->fd);
93 	con->fdn = NULL;
94 #ifdef __WIN32
95 	if (0 == closesocket(con->fd))
96 #else
97 	if (0 == close(con->fd))
98 #endif
99 		--srv->cur_fds;
100 	else
101 		log_perror(r->conf.errh, __FILE__, __LINE__,
102 		  "(warning) close: %d", con->fd);
103 
104 	if (r->conf.log_state_handling) {
105 		log_error(r->conf.errh, __FILE__, __LINE__,
106 		  "connection closed for fd %d", con->fd);
107 	}
108 	con->fd = -1;
109 
110 	connection_del(srv, con);
111 }
112 
connection_read_for_eos_plain(connection * const con)113 static void connection_read_for_eos_plain(connection * const con) {
114 	/* we have to do the linger_on_close stuff regardless
115 	 * of r->keep_alive; even non-keepalive sockets
116 	 * may still have unread data, and closing before reading
117 	 * it will make the client not see all our output.
118 	 */
119 	ssize_t len;
120 	const int type = sock_addr_get_family(&con->dst_addr);
121 	char buf[16384];
122 	do {
123 		len = fdevent_socket_read_discard(con->fd, buf, sizeof(buf),
124 						  type, SOCK_STREAM);
125 	} while (len > 0 || (len < 0 && errno == EINTR));
126 
127 	if (len < 0 && errno == EAGAIN) return;
128       #if defined(EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
129 	if (len < 0 && errno == EWOULDBLOCK) return;
130       #endif
131 
132 	/* 0 == len || (len < 0 && (errno is a non-recoverable error)) */
133 		con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
134 }
135 
connection_read_for_eos_ssl(connection * const con)136 static void connection_read_for_eos_ssl(connection * const con) {
137 	if (con->network_read(con, con->read_queue, MAX_READ_LIMIT) < 0)
138 		con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
139 	chunkqueue_reset(con->read_queue);
140 }
141 
connection_read_for_eos(connection * const con)142 static void connection_read_for_eos(connection * const con) {
143 	!con->is_ssl_sock
144 	  ? connection_read_for_eos_plain(con)
145 	  : connection_read_for_eos_ssl(con);
146 }
147 
connection_handle_close_state(connection * con)148 static void connection_handle_close_state(connection *con) {
149 	connection_read_for_eos(con);
150 
151 	if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
152 		connection_close(con);
153 	}
154 }
155 
connection_handle_shutdown(connection * con)156 static void connection_handle_shutdown(connection *con) {
157 	plugins_call_handle_connection_shut_wr(con);
158 
159 	connection_reset(con);
160 	++con->srv->con_closed;
161 
162 	/* close the connection */
163 	if (con->fd >= 0
164 	    && (con->is_ssl_sock || 0 == shutdown(con->fd, SHUT_WR))) {
165 		con->close_timeout_ts = log_monotonic_secs;
166 
167 		request_st * const r = &con->request;
168 		connection_set_state(r, CON_STATE_CLOSE);
169 		if (r->conf.log_state_handling) {
170 			log_error(r->conf.errh, __FILE__, __LINE__,
171 			  "shutdown for fd %d", con->fd);
172 		}
173 	} else {
174 		connection_close(con);
175 	}
176 }
177 
178 
connection_handle_response_end_state(request_st * const r,connection * const con)179 static void connection_handle_response_end_state(request_st * const r, connection * const con) {
180 	if (r->http_version > HTTP_VERSION_1_1) {
181 		h2_retire_con(r, con);
182 		r->keep_alive = 0;
183 		/* set a status so that mod_accesslog, mod_rrdtool hooks are called
184 		 * in plugins_call_handle_request_done() (XXX: or set to 0 to omit) */
185 		r->http_status = 100; /* XXX: what if con->state == CON_STATE_ERROR? */
186 	}
187 
188 	/* call request_done hook if http_status set (e.g. to log request) */
189 	/* (even if error, connection dropped, as long as http_status is set) */
190 	if (r->http_status) plugins_call_handle_request_done(r);
191 
192 	if (r->state != CON_STATE_ERROR) ++con->srv->con_written;
193 
194 	if (r->reqbody_length != r->reqbody_queue.bytes_in
195 	    || r->state == CON_STATE_ERROR) {
196 		/* request body may not have been read completely */
197 		r->keep_alive = 0;
198 		/* clean up failed partial write of 1xx intermediate responses*/
199 		if (&r->write_queue != con->write_queue) { /*(for HTTP/1.1)*/
200 			chunkqueue_free(con->write_queue);
201 			con->write_queue = &r->write_queue;
202 		}
203 	}
204 
205         if (r->keep_alive > 0) {
206 		request_reset(r);
207 		con->is_readable = 1; /* potentially trigger optimistic read */
208 		/*(accounting used by mod_accesslog for HTTP/1.0 and HTTP/1.1)*/
209 		r->bytes_read_ckpt = con->bytes_read;
210 		r->bytes_written_ckpt = con->bytes_written;
211 #if 0
212 		r->start_hp.tv_sec = log_epoch_secs;
213 		con->read_idle_ts = log_monotonic_secs;
214 #endif
215 		connection_set_state(r, CON_STATE_REQUEST_START);
216 	} else {
217 		connection_handle_shutdown(con);
218 	}
219 }
220 
221 
222 __attribute_pure__
223 static off_t
connection_write_throttled(const connection * const con,off_t max_bytes)224 connection_write_throttled (const connection * const con, off_t max_bytes)
225 {
226     const request_config * const restrict rconf = &con->request.conf;
227     if (0 == rconf->global_bytes_per_second && 0 == rconf->bytes_per_second)
228         return max_bytes;
229 
230     if (rconf->global_bytes_per_second) {
231         off_t limit = (off_t)rconf->global_bytes_per_second
232                     - *(rconf->global_bytes_per_second_cnt_ptr);
233         if (max_bytes > limit)
234             max_bytes = limit;
235     }
236 
237     if (rconf->bytes_per_second) {
238         off_t limit = (off_t)rconf->bytes_per_second
239                     - con->bytes_written_cur_second;
240         if (max_bytes > limit)
241             max_bytes = limit;
242     }
243 
244     return max_bytes > 0 ? max_bytes : 0; /*(0 == reached traffic limit)*/
245 }
246 
247 
248 static off_t
connection_write_throttle(connection * const con,off_t max_bytes)249 connection_write_throttle (connection * const con, off_t max_bytes)
250 {
251     /*assert(max_bytes > 0);*/
252     max_bytes = connection_write_throttled(con, max_bytes);
253     if (0 == max_bytes) con->traffic_limit_reached = 1;
254     return max_bytes;
255 }
256 
257 
258 static int
connection_write_chunkqueue(connection * const con,chunkqueue * const restrict cq,off_t max_bytes)259 connection_write_chunkqueue (connection * const con, chunkqueue * const restrict cq, off_t max_bytes)
260 {
261     /*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
262 
263     con->write_request_ts = log_monotonic_secs;
264 
265     max_bytes = connection_write_throttle(con, max_bytes);
266     if (0 == max_bytes) return 1;
267 
268     off_t written = cq->bytes_out;
269     int ret;
270 
271   #ifdef TCP_CORK
272     int corked = 0;
273   #endif
274 
275     /* walk chunkqueue up to first FILE_CHUNK (if present)
276      * This may incur memory load misses for pointer chasing, but effectively
277      * preloads part of the chunkqueue, something which used to be a side effect
278      * of a previous (less efficient) version of chunkqueue_length() which
279      * walked the entire chunkqueue (on each and every call).  The loads here
280      * make a measurable difference in performance in underlying call to
281      * con->network_write() */
282     if (cq->first->next && cq->first->type == MEM_CHUNK) {
283         const chunk *c = cq->first;
284         do { c = c->next; } while (c && c->type == MEM_CHUNK);
285       #ifdef TCP_CORK
286         /* Linux: put a cork into socket as we want to combine write() calls
287          * but only if we really have multiple chunks including non-MEM_CHUNK
288          * (or if multiple chunks and TLS), and only if TCP socket */
289         if (NULL != c || (max_bytes > 16384 && con->is_ssl_sock)) {
290             const int sa_family = sock_addr_get_family(&con->srv_socket->addr);
291             if (sa_family == AF_INET || sa_family == AF_INET6) {
292                 corked = 1;
293                 (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
294                                  &corked, sizeof(corked));
295             }
296         }
297       #endif
298     }
299 
300     ret = con->network_write(con, cq, max_bytes);
301     if (ret >= 0) {
302         ret = chunkqueue_is_empty(cq) ? 0 : 1;
303     }
304 
305   #ifdef TCP_CORK
306     if (corked) {
307         corked = 0;
308         (void)setsockopt(con->fd, IPPROTO_TCP, TCP_CORK,
309                          &corked, sizeof(corked));
310     }
311   #endif
312 
313     written = cq->bytes_out - written;
314     con->bytes_written += written;
315     con->bytes_written_cur_second += written;
316     request_st * const r = &con->request;
317     if (r->conf.global_bytes_per_second_cnt_ptr)
318         *(r->conf.global_bytes_per_second_cnt_ptr) += written;
319 
320     return ret;
321 }
322 
323 
324 static int
connection_write_1xx_info(request_st * const r,connection * const con)325 connection_write_1xx_info (request_st * const r, connection * const con)
326 {
327     /* (Note: prior 1xx intermediate responses may be present in cq) */
328     /* (Note: also choosing not to update con->write_request_ts
329      *  which differs from connection_write_chunkqueue()) */
330     chunkqueue * const cq = con->write_queue;
331     off_t written = cq->bytes_out;
332 
333     int rc = con->network_write(con, cq, MAX_WRITE_LIMIT);
334 
335     written = cq->bytes_out - written;
336     con->bytes_written += written;
337     con->bytes_written_cur_second += written;
338     if (r->conf.global_bytes_per_second_cnt_ptr)
339         *(r->conf.global_bytes_per_second_cnt_ptr) += written;
340 
341     if (rc < 0) {
342         connection_set_state_error(r, CON_STATE_ERROR);
343         return 0; /* error */
344     }
345 
346     if (!chunkqueue_is_empty(cq)) { /* partial write (unlikely) */
347         con->is_writable = 0;
348         if (cq == &r->write_queue) {
349             /* save partial write of 1xx in separate chunkqueue
350              * Note: sending of remainder of 1xx might be delayed
351              * until next set of response headers are sent */
352             con->write_queue = chunkqueue_init(NULL);
353             chunkqueue_append_chunkqueue(con->write_queue, cq);
354         }
355     }
356 
357   #if 0
358     /* XXX: accounting inconsistency
359      * 1xx is not currently included in r->resp_header_len,
360      * so mod_accesslog reporting of %b or %B (FORMAT_BYTES_OUT_NO_HEADER)
361      * reports all bytes out minus len of final response headers,
362      * but including 1xx intermediate responses.  If 1xx intermediate
363      * responses were included in r->resp_header_len, then there are a
364      * few places in the code which must be adjusted to use r->resp_header_done
365      * instead of (0 == r->resp_header_len) as flag that final response was set
366      * (Doing the following would "discard" the 1xx len from bytes_out)
367      */
368     r->write_queue.bytes_in = r->write_queue.bytes_out = 0;
369   #endif
370 
371     return 1; /* success */
372 }
373 
374 
375 int
connection_send_1xx(request_st * const r,connection * const con)376 connection_send_1xx (request_st * const r, connection * const con)
377 {
378     /* Make best effort to send HTTP/1.1 1xx intermediate */
379     /* (Note: if other modules set response headers *before* the
380      *  handle_response_start hook, and the backends subsequently sends 1xx,
381      *  then the response headers are sent here with 1xx and might be cleared
382      *  by caller (http_response_parse_headers() and http_response_check_1xx()),
383      *  instead of being sent with the final response.
384      *  (e.g. mod_magnet setting response headers, then backend sending 103)) */
385 
386     chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
387 
388     buffer * const b = chunkqueue_append_buffer_open(cq);
389     buffer_copy_string_len(b, CONST_STR_LEN("HTTP/1.1 "));
390     http_status_append(b, r->http_status);
391     for (uint32_t i = 0; i < r->resp_headers.used; ++i) {
392         const data_string * const ds = (data_string *)r->resp_headers.data[i];
393         const uint32_t klen = buffer_clen(&ds->key);
394         const uint32_t vlen = buffer_clen(&ds->value);
395         if (0 == klen || 0 == vlen) continue;
396         buffer_append_str2(b, CONST_STR_LEN("\r\n"), ds->key.ptr, klen);
397         buffer_append_str2(b, CONST_STR_LEN(": "), ds->value.ptr, vlen);
398     }
399     buffer_append_string_len(b, CONST_STR_LEN("\r\n\r\n"));
400     chunkqueue_append_buffer_commit(cq);
401 
402     if (con->traffic_limit_reached)
403         return 1; /* success; send later if throttled */
404 
405     return connection_write_1xx_info(r, con);
406 }
407 
408 
409 static int
connection_write_100_continue(request_st * const r,connection * const con)410 connection_write_100_continue (request_st * const r, connection * const con)
411 {
412     /* Make best effort to send "HTTP/1.1 100 Continue" */
413     static const char http_100_continue[] = "HTTP/1.1 100 Continue\r\n\r\n";
414 
415     if (con->traffic_limit_reached)
416         return 1; /* success; skip sending if throttled */
417 
418     chunkqueue * const cq = con->write_queue; /*(bypass r->write_queue)*/
419     chunkqueue_append_mem(cq, http_100_continue, sizeof(http_100_continue)-1);
420     return connection_write_1xx_info(r, con);
421 }
422 
423 
connection_handle_write(request_st * const r,connection * const con)424 static int connection_handle_write(request_st * const r, connection * const con) {
425 	/*assert(!chunkqueue_is_empty(cq));*//* checked by callers */
426 
427 	if (con->is_writable <= 0) return CON_STATE_WRITE;
428 	int rc = connection_write_chunkqueue(con, con->write_queue, MAX_WRITE_LIMIT);
429 	switch (rc) {
430 	case 0:
431 		if (r->resp_body_finished) {
432 			connection_set_state(r, CON_STATE_RESPONSE_END);
433 			return CON_STATE_RESPONSE_END;
434 		}
435 		break;
436 	case -1: /* error on our side */
437 		log_error(r->conf.errh, __FILE__, __LINE__,
438 		  "connection closed: write failed on fd %d", con->fd);
439 		connection_set_state_error(r, CON_STATE_ERROR);
440 		return CON_STATE_ERROR;
441 	case -2: /* remote close */
442 		connection_set_state_error(r, CON_STATE_ERROR);
443 		return CON_STATE_ERROR;
444 	case 1:
445 		/* do not spin trying to send HTTP/2 server Connection Preface
446 		 * while waiting for TLS negotiation to complete */
447 		if (con->write_queue->bytes_out)
448 			con->is_writable = 0;
449 
450 		/* not finished yet -> WRITE */
451 		break;
452 	}
453 
454 	return CON_STATE_WRITE; /*(state did not change)*/
455 }
456 
connection_handle_write_state(request_st * const r,connection * const con)457 static int connection_handle_write_state(request_st * const r, connection * const con) {
458     do {
459         /* only try to write if we have something in the queue */
460         if (!chunkqueue_is_empty(&r->write_queue)) {
461             if (r->http_version <= HTTP_VERSION_1_1) {
462                 int rc = connection_handle_write(r, con);
463                 if (rc != CON_STATE_WRITE) return rc;
464             }
465         } else if (r->resp_body_finished) {
466             connection_set_state(r, CON_STATE_RESPONSE_END);
467             return CON_STATE_RESPONSE_END;
468         }
469 
470         if (r->handler_module && !r->resp_body_finished) {
471             const plugin * const p = r->handler_module;
472             int rc = p->handle_subrequest(r, p->data);
473             switch(rc) {
474             case HANDLER_WAIT_FOR_EVENT:
475             case HANDLER_FINISHED:
476             case HANDLER_GO_ON:
477                 break;
478             case HANDLER_COMEBACK:
479             default:
480                 log_error(r->conf.errh, __FILE__, __LINE__,
481                   "unexpected subrequest handler ret-value: %d %d",
482                   con->fd, rc);
483                 __attribute_fallthrough__
484             case HANDLER_ERROR:
485                 connection_set_state_error(r, CON_STATE_ERROR);
486                 return CON_STATE_ERROR;
487             }
488         }
489     } while (r->http_version <= HTTP_VERSION_1_1
490              && (!chunkqueue_is_empty(&r->write_queue)
491                  ? con->is_writable > 0 && 0 == con->traffic_limit_reached
492                  : r->resp_body_finished));
493 
494     return CON_STATE_WRITE;
495 }
496 
497 
498 __attribute_cold__
connection_init(server * srv)499 static connection *connection_init(server *srv) {
500 	connection * const con = calloc(1, sizeof(*con));
501 	force_assert(NULL != con);
502 
503 	con->srv = srv;
504 	con->plugin_slots = srv->plugin_slots;
505 	con->config_data_base = srv->config_data_base;
506 
507 	request_st * const r = &con->request;
508 	request_init_data(r, con, srv);
509 	con->write_queue = &r->write_queue;
510 	con->read_queue = &r->read_queue;
511 
512 	/* init plugin-specific per-connection structures */
513 	con->plugin_ctx = calloc(1, (srv->plugins.used + 1) * sizeof(void *));
514 	force_assert(NULL != con->plugin_ctx);
515 
516 	return con;
517 }
518 
519 
connection_free(connection * const con)520 static void connection_free(connection * const con) {
521     request_st * const r = &con->request;
522 
523     connection_reset(con);
524     if (con->write_queue != &r->write_queue)
525         chunkqueue_free(con->write_queue);
526     if (con->read_queue != &r->read_queue)
527         chunkqueue_free(con->read_queue);
528     request_free_data(r);
529 
530     free(con->plugin_ctx);
531     free(con->dst_addr_buf.ptr);
532     free(con);
533 }
534 
connections_pool_clear(server * const srv)535 void connections_pool_clear(server * const srv) {
536     connection *con;
537     while ((con = srv->conns_pool)) {
538         srv->conns_pool = con->next;
539         connection_free(con);
540     }
541 }
542 
connections_free(server * srv)543 void connections_free(server *srv) {
544     connections_pool_clear(srv);
545 
546     connection *con;
547     while ((con = srv->conns)) {
548         srv->conns = con->next;
549         connection_free(con);
550     }
551 }
552 
553 
connection_reset(connection * con)554 static void connection_reset(connection *con) {
555 	request_st * const r = &con->request;
556 	request_reset(r);
557 	r->bytes_read_ckpt = 0;
558 	r->bytes_written_ckpt = 0;
559 	con->is_readable = 1;
560 
561 	con->bytes_written = 0;
562 	con->bytes_written_cur_second = 0;
563 	con->bytes_read = 0;
564 }
565 
566 
567 __attribute_cold__
568 static chunk *
connection_discard_blank_line(chunkqueue * const cq,uint32_t header_len)569 connection_discard_blank_line (chunkqueue * const cq, uint32_t header_len)
570 {
571     /*(separate func only to be able to mark with compiler hint as cold)*/
572     chunkqueue_mark_written(cq, header_len);
573     return cq->first; /* refresh c after chunkqueue_mark_written() */
574 }
575 
576 
connection_read_header_more(connection * con,chunkqueue * cq,chunk * c,const size_t olen)577 static chunk * connection_read_header_more(connection *con, chunkqueue *cq, chunk *c, const size_t olen) {
578     /*(should not be reached by HTTP/2 streams)*/
579     /*if (r->http_version == HTTP_VERSION_2) return NULL;*/
580     /*(However, new connections over TLS may become HTTP/2 connections via ALPN
581      * and return from this routine with r->http_version == HTTP_VERSION_2) */
582 
583     if ((NULL == c || NULL == c->next) && con->is_readable > 0) {
584         con->read_idle_ts = log_monotonic_secs;
585         if (0 != con->network_read(con, cq, MAX_READ_LIMIT)) {
586             request_st * const r = &con->request;
587             connection_set_state_error(r, CON_STATE_ERROR);
588         }
589         /* check if switched to HTTP/2 (ALPN "h2" during TLS negotiation) */
590         request_st * const r = &con->request;
591         if (r->http_version == HTTP_VERSION_2) return NULL;
592     }
593 
594     if (cq->first != cq->last && 0 != olen) {
595         const size_t clen = chunkqueue_length(cq);
596         size_t block = (olen + (16384-1)) & ~(16384-1);
597         block += (block - olen > 1024 ? 0 : 16384);
598         chunkqueue_compact_mem(cq, block > clen ? clen : block);
599     }
600 
601     /* detect if data is added to chunk */
602     c = cq->first;
603     return (c && (size_t)c->offset + olen < buffer_clen(c->mem))
604       ? c
605       : NULL;
606 }
607 
608 
609 static void
connection_transition_h2(request_st * const h2r,connection * const con)610 connection_transition_h2 (request_st * const h2r, connection * const con)
611 {
612     buffer_copy_string_len(&h2r->target,      CONST_STR_LEN("*"));
613     buffer_copy_string_len(&h2r->target_orig, CONST_STR_LEN("*"));
614     buffer_copy_string_len(&h2r->uri.path,    CONST_STR_LEN("*"));
615     h2r->http_method = HTTP_METHOD_PRI;
616     h2r->reqbody_length = -1; /*(unnecessary for h2r?)*/
617     h2r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
618 
619     /* (h2r->state == CON_STATE_READ) for transition by ALPN
620      *   or starting cleartext HTTP/2 with Prior Knowledge
621      *   (e.g. via HTTP Alternative Services)
622      * (h2r->state == CON_STATE_RESPONSE_END) for Upgrade: h2c */
623 
624     if (h2r->state != CON_STATE_ERROR)
625         connection_set_state(h2r, CON_STATE_WRITE);
626 
627   #if 0 /* ... if it turns out we need a separate fdevent handler for HTTP/2 */
628     con->fdn->handler = connection_handle_fdevent_h2;
629   #endif
630 
631     if (NULL == con->h2) /*(not yet transitioned to HTTP/2; not Upgrade: h2c)*/
632         h2_init_con(h2r, con, NULL);
633 }
634 
635 
636 /**
637  * handle request header read
638  *
639  * we get called by the state-engine and by the fdevent-handler
640  */
641 __attribute_noinline__
connection_handle_read_state(connection * const con)642 static int connection_handle_read_state(connection * const con)  {
643     /*(should not be reached by HTTP/2 streams)*/
644     chunkqueue * const cq = con->read_queue;
645     chunk *c = cq->first;
646     uint32_t clen = 0;
647     uint32_t header_len = 0;
648     request_st * const r = &con->request;
649     uint8_t keepalive_request_start = 0;
650     uint8_t pipelined_request_start = 0;
651     uint8_t discard_blank = 0;
652     unsigned short hoff[8192]; /* max num header lines + 3; 16k on stack */
653 
654     if (con->request_count > 1) {
655         discard_blank = 1;
656         if (con->bytes_read == r->bytes_read_ckpt) {
657             keepalive_request_start = 1;
658             if (NULL != c) { /* !chunkqueue_is_empty(cq)) */
659                 pipelined_request_start = 1;
660                 /* partial header of next request has already been read,
661                  * so optimistically check for more data received on
662                  * socket while processing the previous request */
663                 con->is_readable = 1;
664                 /*(if partially read next request and unable to read any bytes,
665                  * then will unnecessarily scan again before subsequent read)*/
666             }
667         }
668     }
669 
670     do {
671         if (NULL == c) continue;
672         clen = buffer_clen(c->mem) - c->offset;
673         if (0 == clen) continue;
674         if (__builtin_expect( (c->offset > USHRT_MAX), 0)) /*(highly unlikely)*/
675             chunkqueue_compact_mem_offset(cq);
676 
677         hoff[0] = 1;                         /* number of lines */
678         hoff[1] = (unsigned short)c->offset; /* base offset for all lines */
679         /*hoff[2] = ...;*/                   /* offset from base for 2nd line */
680 
681         header_len = http_header_parse_hoff(c->mem->ptr + c->offset,clen,hoff);
682 
683         /* casting to (unsigned short) might truncate, and the hoff[]
684          * addition might overflow, but max_request_field_size is USHRT_MAX,
685          * so failure will be detected below */
686         const uint32_t max_request_field_size = r->conf.max_request_field_size;
687         if ((header_len ? header_len : clen) > max_request_field_size
688             || hoff[0] >= sizeof(hoff)/sizeof(hoff[0])-1) {
689             log_error(r->conf.errh, __FILE__, __LINE__, "%s",
690                       "oversized request-header -> sending Status 431");
691             r->http_status = 431; /* Request Header Fields Too Large */
692             r->keep_alive = 0;
693             connection_set_state(r, CON_STATE_REQUEST_END);
694             return 1;
695         }
696 
697         if (__builtin_expect( (0 != header_len), 1)) {
698             if (__builtin_expect( (hoff[0] > 1), 1))
699                 break; /* common case; request headers complete */
700 
701             if (discard_blank) { /* skip one blank line e.g. following POST */
702                 if (header_len == clen) continue;
703                 const int ch = c->mem->ptr[c->offset+header_len];
704                 if (ch != '\r' && ch != '\n') {
705                     /* discard prior blank line if next line is not blank */
706                     discard_blank = 0;
707                     clen = 0;/*(for connection_read_header_more() to return c)*/
708                     c = connection_discard_blank_line(cq, header_len);/*cold*/
709                     continue;
710                 } /*(else fall through to error out in next block)*/
711             }
712         }
713 
714         if (((unsigned char *)c->mem->ptr)[c->offset] < 32) {
715             /* expecting ASCII method beginning with alpha char
716              * or HTTP/2 pseudo-header beginning with ':' */
717             /*(TLS handshake begins with SYN 0x16 (decimal 22))*/
718             log_error(r->conf.errh, __FILE__, __LINE__, "%s",
719                       c->mem->ptr[c->offset] == 0x16
720                       ? "unexpected TLS ClientHello on clear port"
721                       : "invalid request-line -> sending Status 400");
722             r->http_status = 400; /* Bad Request */
723             r->keep_alive = 0;
724             connection_set_state(r, CON_STATE_REQUEST_END);
725             return 1;
726         }
727     } while ((c = connection_read_header_more(con, cq, c, clen)));
728 
729     if (keepalive_request_start) {
730         if (con->bytes_read > r->bytes_read_ckpt) {
731             /* update r->start_hp.tv_sec timestamp when first byte of
732              * next request is received on a keep-alive connection */
733             r->start_hp.tv_sec = log_epoch_secs;
734             if (r->conf.high_precision_timestamps)
735                 log_clock_gettime_realtime(&r->start_hp);
736         }
737         if (pipelined_request_start && c)
738             con->read_idle_ts = log_monotonic_secs;
739     }
740 
741     if (NULL == c) return 0; /* incomplete request headers */
742 
743   #ifdef __COVERITY__
744     if (buffer_clen(c->mem) < hoff[1]) {
745         return 1;
746     }
747   #endif
748 
749     char * const hdrs = c->mem->ptr + hoff[1];
750 
751     if (con->request_count > 1) {
752         /* clear buffers which may have been kept for reporting on keep-alive,
753          * (e.g. mod_status) */
754         request_reset_ex(r);
755     }
756     /* RFC7540 3.5 HTTP/2 Connection Preface
757      * "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
758      * (Connection Preface MUST be exact match)
759      * If ALT-SVC used to advertise HTTP/2, then client might start
760      * http connection (not TLS) sending HTTP/2 connection preface.
761      * (note: intentionally checking only on initial request) */
762     else if (!con->is_ssl_sock && r->conf.h2proto
763              && hoff[0] == 2 && hoff[2] == 16
764              && hdrs[0]=='P' && hdrs[1]=='R' && hdrs[2]=='I' && hdrs[3]==' ') {
765         r->http_version = HTTP_VERSION_2;
766         return 0;
767     }
768 
769     r->rqst_header_len = header_len;
770     if (r->conf.log_request_header)
771         log_error_multiline(r->conf.errh, __FILE__, __LINE__,
772                             hdrs, header_len, "fd:%d rqst: ", con->fd);
773     http_request_headers_process(r, hdrs, hoff, con->proto_default_port);
774     chunkqueue_mark_written(cq, r->rqst_header_len);
775     connection_set_state(r, CON_STATE_REQUEST_END);
776 
777     if (light_btst(r->rqst_htags, HTTP_HEADER_UPGRADE)
778         && 0 == r->http_status
779         && h2_check_con_upgrade_h2c(r)) {
780         /*(Upgrade: h2c over cleartext does not have SNI; no COMP_HTTP_HOST)*/
781         r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
782                                 | (1 << COMP_HTTP_REMOTE_IP);
783         /*connection_handle_write(r, con);*//* defer write to network */
784         return 0;
785     }
786 
787     return 1;
788 }
789 
790 
connection_handle_fdevent(void * const context,const int revents)791 static handler_t connection_handle_fdevent(void * const context, const int revents) {
792     connection * restrict con = context;
793     const int is_ssl_sock = con->is_ssl_sock;
794 
795     joblist_append(con);
796 
797     if (revents & ~(FDEVENT_IN | FDEVENT_OUT))
798         con->revents_err |= (revents & ~(FDEVENT_IN | FDEVENT_OUT));
799 
800     if (revents & (FDEVENT_IN | FDEVENT_OUT)) {
801         if (is_ssl_sock) /*(ssl may read and write for both reads and writes)*/
802             con->is_readable = con->is_writable = 1;
803         else {
804             if (revents & FDEVENT_IN)
805                 con->is_readable = 1;
806             if (revents & FDEVENT_OUT)
807                 con->is_writable = 1;
808         }
809     }
810 
811     return HANDLER_FINISHED;
812 }
813 
814 
815 __attribute_cold__
connection_read_cq_err(connection * con)816 static int connection_read_cq_err(connection *con) {
817     request_st * const r = &con->request;
818   #if defined(__WIN32)
819     int lastError = WSAGetLastError();
820     switch (lastError) {
821     case EAGAIN:
822         return 0;
823     case EINTR:
824         /* we have been interrupted before we could read */
825         con->is_readable = 1;
826         return 0;
827     case ECONNRESET:
828         /* suppress logging for this error, expected for keep-alive */
829         break;
830     default:
831         log_error(r->conf.errh, __FILE__, __LINE__,
832           "connection closed - recv failed: %d", lastError);
833         break;
834     }
835   #else /* __WIN32 */
836     switch (errno) {
837     case EAGAIN:
838         return 0;
839     case EINTR:
840         /* we have been interrupted before we could read */
841         con->is_readable = 1;
842         return 0;
843     case ECONNRESET:
844         /* suppress logging for this error, expected for keep-alive */
845         break;
846     default:
847         log_perror(r->conf.errh, __FILE__, __LINE__,
848           "connection closed - read failed");
849         break;
850     }
851   #endif /* __WIN32 */
852 
853     connection_set_state_error(r, CON_STATE_ERROR);
854     return -1;
855 }
856 
857 
858 /* 0: everything ok, -1: error, -2: con closed */
connection_read_cq(connection * con,chunkqueue * cq,off_t max_bytes)859 static int connection_read_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
860     ssize_t len;
861     size_t mem_len = 0;
862 
863     do {
864         /* obtain chunk memory into which to read
865          * fill previous chunk if it has a reasonable amount of space available
866          * (use mem_len=0 to obtain large buffer at least half of chunk_buf_sz)
867          */
868         chunk *ckpt = cq->last;
869         char * const mem = chunkqueue_get_memory(cq, &mem_len);
870         if (mem_len > (size_t)max_bytes) mem_len = (size_t)max_bytes;
871 
872       #if defined(__WIN32)
873         len = recv(con->fd, mem, mem_len, 0);
874       #else
875         len = read(con->fd, mem, mem_len);
876       #endif
877 
878         chunkqueue_use_memory(cq, ckpt, len > 0 ? len : 0);
879 
880         if (len != (ssize_t)mem_len) {
881             /* we got less then expected, wait for the next fd-event */
882             con->is_readable = 0;
883 
884             if (len > 0) {
885                 con->bytes_read += len;
886                 return 0;
887             }
888             else if (0 == len) /* other end close connection -> KEEP-ALIVE */
889                 return -2;     /* (pipelining) */
890             else
891                 return connection_read_cq_err(con);
892         }
893 
894         con->bytes_read += len;
895         max_bytes -= len;
896 
897         int frd;
898         mem_len = (0 == fdevent_ioctl_fionread(con->fd, S_IFSOCK, &frd))
899           ? (frd < max_bytes) ? (size_t)frd : (size_t)max_bytes
900           : 0;
901     } while (max_bytes);
902     return 0;
903 }
904 
905 
connection_write_cq(connection * con,chunkqueue * cq,off_t max_bytes)906 static int connection_write_cq(connection *con, chunkqueue *cq, off_t max_bytes) {
907     request_st * const r = &con->request;
908     return con->srv->network_backend_write(con->fd,cq,max_bytes,r->conf.errh);
909 }
910 
911 
912 static handler_t connection_handle_read_post_state(request_st * const r);
913 
connection_accepted(server * srv,const server_socket * srv_socket,sock_addr * cnt_addr,int cnt)914 connection *connection_accepted(server *srv, const server_socket *srv_socket, sock_addr *cnt_addr, int cnt) {
915 		connection *con;
916 
917 		srv->cur_fds++;
918 
919 		/* ok, we have the connection, register it */
920 #if 0
921 		log_error(srv->errh, __FILE__, __LINE__, "accepted() %d", cnt);
922 #endif
923 		srv->con_opened++;
924 
925 		con = connections_get_new_connection(srv);
926 
927 		con->fd = cnt;
928 		con->fdn = fdevent_register(srv->ev, con->fd, connection_handle_fdevent, con);
929 		con->network_read = connection_read_cq;
930 		con->network_write = connection_write_cq;
931 		con->reqbody_read = connection_handle_read_post_state;
932 
933 		request_st * const r = &con->request;
934 		connection_set_state(r, CON_STATE_REQUEST_START);
935 
936 		con->connection_start = log_monotonic_secs;
937 		con->dst_addr = *cnt_addr;
938 		sock_addr_cache_inet_ntop_copy_buffer(&con->dst_addr_buf,
939 		                                      &con->dst_addr);
940 		con->srv_socket = srv_socket;
941 		con->is_ssl_sock = srv_socket->is_ssl;
942 		con->proto_default_port = 80; /* "http" */
943 
944 		config_cond_cache_reset(r);
945 		r->conditional_is_valid = (1 << COMP_SERVER_SOCKET)
946 		                        | (1 << COMP_HTTP_REMOTE_IP);
947 
948 		if (HANDLER_GO_ON != plugins_call_handle_connection_accept(con)) {
949 			connection_reset(con);
950 			connection_close(con);
951 			return NULL;
952 		}
953 		if (r->http_status < 0) connection_set_state(r, CON_STATE_WRITE);
954 		return con;
955 }
956 
957 
958 __attribute_cold__
959 __attribute_noinline__
960 static const char *
connection_get_state(request_state_t state)961 connection_get_state (request_state_t state)
962 {
963     switch (state) {
964       case CON_STATE_CONNECT:        return "connect";
965       case CON_STATE_READ:           return "read";
966       case CON_STATE_READ_POST:      return "readpost";
967       case CON_STATE_WRITE:          return "write";
968       case CON_STATE_CLOSE:          return "close";
969       case CON_STATE_ERROR:          return "error";
970       case CON_STATE_HANDLE_REQUEST: return "handle-req";
971       case CON_STATE_REQUEST_START:  return "req-start";
972       case CON_STATE_REQUEST_END:    return "req-end";
973       case CON_STATE_RESPONSE_START: return "resp-start";
974       case CON_STATE_RESPONSE_END:   return "resp-end";
975       default:                       return "(unknown)";
976     }
977 }
978 
979 
980 static void connection_state_machine_h2 (request_st *h2r, connection *con);
981 
982 
983 static void
connection_state_machine_loop(request_st * const r,connection * const con)984 connection_state_machine_loop (request_st * const r, connection * const con)
985 {
986 	request_state_t ostate;
987 	do {
988 		if (r->conf.log_state_handling) {
989 			log_error(r->conf.errh, __FILE__, __LINE__,
990 			  "state for fd:%d id:%d %s", con->fd, r->h2id,
991 			  connection_get_state(r->state));
992 		}
993 
994 		switch ((ostate = r->state)) {
995 		case CON_STATE_REQUEST_START: /* transient */
996 			/*(should not be reached by HTTP/2 streams)*/
997 			r->start_hp.tv_sec = log_epoch_secs;
998 			con->read_idle_ts = log_monotonic_secs;
999 			if (r->conf.high_precision_timestamps)
1000 				log_clock_gettime_realtime(&r->start_hp);
1001 
1002 			con->request_count++;
1003 			r->loops_per_request = 0;
1004 
1005 			connection_set_state(r, CON_STATE_READ);
1006 			__attribute_fallthrough__
1007 		case CON_STATE_READ:
1008 			/*(should not be reached by HTTP/2 streams)*/
1009 			if (!connection_handle_read_state(con)) {
1010 				if (r->http_version == HTTP_VERSION_2) {
1011 					connection_transition_h2(r, con);
1012 					connection_state_machine_h2(r, con);
1013 					return;
1014 				}
1015 				break;
1016 			}
1017 			/*if (r->state != CON_STATE_REQUEST_END) break;*/
1018 			__attribute_fallthrough__
1019 		case CON_STATE_REQUEST_END: /* transient */
1020 			ostate = (0 == r->reqbody_length)
1021 			  ? CON_STATE_HANDLE_REQUEST
1022 			  : CON_STATE_READ_POST;
1023 			connection_set_state(r, ostate);
1024 			__attribute_fallthrough__
1025 		case CON_STATE_READ_POST:
1026 		case CON_STATE_HANDLE_REQUEST:
1027 			switch (http_response_handler(r)) {
1028 			  case HANDLER_GO_ON:/*CON_STATE_RESPONSE_START occurred;transient*/
1029 			  case HANDLER_FINISHED:
1030 				break;
1031 			  case HANDLER_WAIT_FOR_EVENT:
1032 				return;
1033 			  case HANDLER_COMEBACK:
1034 				/* redo loop; will not match r->state */
1035 				ostate = CON_STATE_CONNECT;
1036 				continue;
1037 			  /*case HANDLER_ERROR:*/
1038 			  default:
1039 				connection_set_state_error(r, CON_STATE_ERROR);
1040 				continue;
1041 			}
1042 			/*__attribute_fallthrough__*/
1043 		/*case CON_STATE_RESPONSE_START:*//*occurred;transient*/
1044 			if (r->http_version > HTTP_VERSION_1_1)
1045 				h2_send_headers(r, con);
1046 			else
1047 				http_response_write_header(r);
1048 			connection_set_state(r, CON_STATE_WRITE);
1049 			__attribute_fallthrough__
1050 		case CON_STATE_WRITE:
1051 			if (connection_handle_write_state(r, con)
1052 			    != CON_STATE_RESPONSE_END)
1053 				break;
1054 			__attribute_fallthrough__
1055 		case CON_STATE_RESPONSE_END: /* transient */
1056 		case CON_STATE_ERROR:        /* transient */
1057 			if (r->http_version > HTTP_VERSION_1_1 && r != &con->request)
1058 				return;
1059 			connection_handle_response_end_state(r, con);
1060 			break;
1061 		case CON_STATE_CLOSE:
1062 			/*(should not be reached by HTTP/2 streams)*/
1063 			connection_handle_close_state(con);
1064 			break;
1065 		case CON_STATE_CONNECT:
1066 			break;
1067 		default:
1068 			log_error(r->conf.errh, __FILE__, __LINE__,
1069 			  "unknown state: %d %d", con->fd, r->state);
1070 			break;
1071 		}
1072 	} while (ostate != (request_state_t)r->state);
1073 }
1074 
1075 
1076 __attribute_cold__
1077 static void
connection_revents_err(request_st * const r,connection * const con)1078 connection_revents_err (request_st * const r, connection * const con)
1079 {
1080     /* defer handling FDEVENT_HUP and FDEVENT_ERR to here in order to
1081      * first attempt (in callers) to read data in kernel socket buffers */
1082     /*assert(con->revents_err & ~(FDEVENT_IN | FDEVENT_OUT));*/
1083     const int revents = (int)con->revents_err;
1084     con->revents_err = 0;
1085 
1086     if (r->state == CON_STATE_CLOSE)
1087         con->close_timeout_ts = log_monotonic_secs - (HTTP_LINGER_TIMEOUT+1);
1088     else if (revents & FDEVENT_HUP)
1089         connection_set_state_error(r, CON_STATE_ERROR);
1090     else if (revents & FDEVENT_RDHUP) {
1091         int events = fdevent_fdnode_interest(con->fdn);
1092         events &= ~(FDEVENT_IN|FDEVENT_RDHUP);
1093         r->conf.stream_request_body &=
1094           ~(FDEVENT_STREAM_REQUEST_BUFMIN|FDEVENT_STREAM_REQUEST_POLLIN);
1095         r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLRDHUP;
1096         con->is_readable = 1; /*(can read 0 for end-of-stream)*/
1097         if (chunkqueue_is_empty(con->read_queue)) r->keep_alive = 0;
1098         if (r->reqbody_length < -1)/*(transparent proxy mode; no more rd data)*/
1099             r->reqbody_length = r->reqbody_queue.bytes_in;
1100         if (sock_addr_get_family(&con->dst_addr) == AF_UNIX) {
1101             /* future: will getpeername() on AF_UNIX check if still connected?*/
1102             fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1103         }
1104         else if (fdevent_is_tcp_half_closed(con->fd)) {
1105             /* Success of fdevent_is_tcp_half_closed() after FDEVENT_RDHUP
1106              * indicates TCP FIN received, but does not distinguish between
1107              * client shutdown(fd, SHUT_WR) and client close(fd).  Remove
1108              * FDEVENT_RDHUP so that we do not spin on ready event.  However,
1109              * a later TCP RST will not be detected until next write to socket.
1110              * future: might getpeername() to check for TCP RST on half-closed
1111              * sockets (without FDEVENT_RDHUP interest) when checking for write
1112              * timeouts once a second in server.c, though getpeername() on
1113              * Windows might not indicate this */
1114             r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_TCP_FIN;
1115             fdevent_fdnode_event_set(con->srv->ev, con->fdn, events);
1116         }
1117         else {
1118             /* Failure of fdevent_is_tcp_half_closed() indicates TCP RST
1119              * (or unable to tell (unsupported OS), though should not
1120              * be setting FDEVENT_RDHUP in that case) */
1121             connection_set_state_error(r, CON_STATE_ERROR);
1122         }
1123     }
1124     else if (revents & FDEVENT_ERR)  /* error, connection reset */
1125         connection_set_state_error(r, CON_STATE_ERROR);
1126     else
1127         log_error(r->conf.errh, __FILE__, __LINE__,
1128           "connection closed: poll() -> ??? %d", revents);
1129 }
1130 
1131 
1132 static void
connection_set_fdevent_interest(request_st * const r,connection * const con)1133 connection_set_fdevent_interest (request_st * const r, connection * const con)
1134 {
1135     if (con->fd < 0) return;
1136 
1137     if (con->revents_err && r->state != CON_STATE_ERROR) {
1138         connection_revents_err(r, con); /* resets con->revents_err = 0 */
1139         connection_state_machine(con);
1140         return;
1141         /* connection_state_machine() will end up calling back into
1142          * connection_set_fdevent_interest(), but with 0 == con->revents_err */
1143     }
1144 
1145     int n = 0;
1146     switch(r->state) {
1147       case CON_STATE_READ:
1148         n = FDEVENT_IN;
1149         if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1150             n |= FDEVENT_RDHUP;
1151         break;
1152       case CON_STATE_WRITE:
1153         if (!chunkqueue_is_empty(con->write_queue)
1154             && 0 == con->is_writable && 0 == con->traffic_limit_reached)
1155             n |= FDEVENT_OUT;
1156         __attribute_fallthrough__
1157       case CON_STATE_READ_POST:
1158         if (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLIN)
1159             n |= FDEVENT_IN;
1160         if (!(r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_POLLRDHUP))
1161             n |= FDEVENT_RDHUP;
1162         break;
1163       case CON_STATE_CLOSE:
1164         n = FDEVENT_IN;
1165         break;
1166       case CON_STATE_CONNECT:
1167         return;
1168       default:
1169         break;
1170     }
1171 
1172     const int events = fdevent_fdnode_interest(con->fdn);
1173     if (con->is_readable < 0) {
1174         con->is_readable = 0;
1175         n |= FDEVENT_IN;
1176     }
1177     if (con->is_writable < 0) {
1178         con->is_writable = 0;
1179         n |= FDEVENT_OUT;
1180     }
1181     if (events & FDEVENT_RDHUP)
1182         n |= FDEVENT_RDHUP;
1183 
1184     if (n == events) return;
1185 
1186     /* update timestamps when enabling interest in events */
1187     if ((n & FDEVENT_IN) && !(events & FDEVENT_IN))
1188         con->read_idle_ts = log_monotonic_secs;
1189     if ((n & FDEVENT_OUT) && !(events & FDEVENT_OUT))
1190         con->write_request_ts = log_monotonic_secs;
1191     fdevent_fdnode_event_set(con->srv->ev, con->fdn, n);
1192 }
1193 
1194 
1195 __attribute_cold__
1196 static void
connection_request_end_h2(request_st * const h2r,connection * const con)1197 connection_request_end_h2 (request_st * const h2r, connection * const con)
1198 {
1199     if (h2r->keep_alive >= 0) {
1200         h2r->keep_alive = -1;
1201         h2_send_goaway(con, H2_E_NO_ERROR);
1202     }
1203     else /*(abort connection upon second request to close h2 connection)*/
1204         h2_send_goaway(con, H2_E_ENHANCE_YOUR_CALM);
1205 }
1206 
1207 
1208 static void
connection_state_machine_h2(request_st * const h2r,connection * const con)1209 connection_state_machine_h2 (request_st * const h2r, connection * const con)
1210 {
1211     h2con * const h2c = con->h2;
1212 
1213     if (h2c->sent_goaway <= 0
1214         && (chunkqueue_is_empty(con->read_queue) || h2_parse_frames(con))
1215         && con->is_readable > 0) {
1216         chunkqueue * const cq = con->read_queue;
1217         const off_t mark = cq->bytes_in;
1218         if (0 == con->network_read(con, cq, MAX_READ_LIMIT)) {
1219             if (mark < cq->bytes_in)
1220                 h2_parse_frames(con);
1221         }
1222         else {
1223             /* network error; do not send GOAWAY, but pretend that we did */
1224             h2c->sent_goaway = H2_E_CONNECT_ERROR; /*any error (not NO_ERROR)*/
1225             connection_set_state_error(h2r, CON_STATE_ERROR);
1226         }
1227     }
1228 
1229     /* process requests on HTTP/2 streams */
1230     int resched = 0;
1231     if (h2c->sent_goaway <= 0 && h2c->rused) {
1232         /* coarse check for write throttling
1233          * (connection.kbytes-per-second, server.kbytes-per-second)
1234          * obtain an approximate limit, not refreshed per request_st,
1235          * even though we are not calculating response HEADERS frames
1236          * or frame overhead here */
1237         off_t max_bytes = con->is_writable > 0
1238           ? connection_write_throttle(con, MAX_WRITE_LIMIT)
1239           : 0;
1240         const off_t cqlen = chunkqueue_length(con->write_queue);
1241         if (cqlen > 8192 && max_bytes > 65536) max_bytes = 65536;
1242         max_bytes -= cqlen;
1243         if (max_bytes < 0) max_bytes = 0;
1244 
1245         /* XXX: to avoid buffer bloat due to staging too much data in
1246          * con->write_queue, consider setting limit on how much is staged
1247          * for sending on con->write_queue: adjusting max_bytes down */
1248 
1249         /* XXX: TODO: process requests in stream priority order */
1250         for (uint32_t i = 0; i < h2c->rused; ++i) {
1251             request_st * const r = h2c->r[i];
1252             /* future: might track read/write interest per request
1253              * to avoid iterating through all active requests */
1254 
1255           #if 0
1256             const int log_state_handling = r->conf.log_state_handling;
1257             if (log_state_handling)
1258                 log_error(r->conf.errh, __FILE__, __LINE__,
1259                   "state at enter %d %d %s", con->fd, r->h2id,
1260                   connection_get_state(r->state));
1261           #endif
1262 
1263             connection_state_machine_loop(r, con);
1264 
1265             if (r->resp_header_len && !chunkqueue_is_empty(&r->write_queue)
1266                 && max_bytes
1267                 && (r->resp_body_finished
1268                     || (r->conf.stream_response_body
1269                         & (FDEVENT_STREAM_RESPONSE
1270                           |FDEVENT_STREAM_RESPONSE_BUFMIN)))) {
1271 
1272                 uint32_t dlen = max_bytes > 32768 ? 32768 : (uint32_t)max_bytes;
1273                 dlen = h2_send_cqdata(r, con, &r->write_queue, dlen);
1274                 if (dlen) { /*(do not resched (spin) if swin empty window)*/
1275                     max_bytes -= (off_t)dlen;
1276                     if (!chunkqueue_is_empty(&r->write_queue))
1277                         resched |= 1;
1278                 }
1279             }
1280 
1281             {
1282                 if (chunkqueue_is_empty(&r->write_queue)) {
1283                     if (r->resp_body_finished && r->state == CON_STATE_WRITE) {
1284                         connection_set_state(r, CON_STATE_RESPONSE_END);
1285                         if (__builtin_expect( (r->conf.log_state_handling), 0))
1286                             connection_state_machine_loop(r, con);
1287                     }
1288                 }
1289             }
1290 
1291           #if 0
1292             if (log_state_handling)
1293                 log_error(r->conf.errh, __FILE__, __LINE__,
1294                   "state at exit %d %d %s", con->fd, r->h2id,
1295                   connection_get_state(r->state));
1296           #endif
1297 
1298             if (r->state==CON_STATE_RESPONSE_END || r->state==CON_STATE_ERROR) {
1299                 /*(trigger reschedule of con if frames pending)*/
1300                 if (h2c->rused == sizeof(h2c->r)/sizeof(*h2c->r)
1301                     && !chunkqueue_is_empty(con->read_queue))
1302                     resched |= 2;
1303                 h2_send_end_stream(r, con);
1304                 const int alive = r->keep_alive;
1305                 h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1306                 --i;/* adjust loop i; h2c->rused was modified to retire r */
1307                 /*(special-case: allow *stream* to set r->keep_alive = -1 to
1308                  * trigger goaway on h2 connection, e.g. after mod_auth failure
1309                  * in attempt to mitigate brute force attacks by forcing a
1310                  * reconnect and (somewhat) slowing down retries)*/
1311                 if (alive < 0)
1312                     connection_request_end_h2(h2r, con);
1313             }
1314         }
1315 
1316         if (0 == max_bytes) resched |= 1;
1317     }
1318 
1319     if (h2c->sent_goaway > 0 && h2c->rused) {
1320         /* retire streams if an error has occurred
1321          * note: this is not done to other streams in the loop above
1322          * (besides the current stream in the loop) due to the specific
1323          * implementation above, where doing so would mess up the iterator */
1324         for (uint32_t i = 0; i < h2c->rused; ++i) {
1325             request_st * const r = h2c->r[i];
1326             /*assert(r->h2state == H2_STATE_CLOSED);*/
1327             h2_retire_stream(r, con);/*r invalidated;removed from h2c->r[]*/
1328             --i;/* adjust loop i; h2c->rused was modified to retire r */
1329         }
1330         /* XXX: ? should we discard con->write_queue
1331          *        and change h2r->state to CON_STATE_RESPONSE_END ? */
1332     }
1333 
1334     if (h2r->state == CON_STATE_WRITE) {
1335         /* write HTTP/2 frames to socket */
1336         if (!chunkqueue_is_empty(con->write_queue))
1337             connection_handle_write(h2r, con);
1338 
1339         if (chunkqueue_is_empty(con->write_queue)
1340             && 0 == h2c->rused && h2c->sent_goaway)
1341             connection_set_state(h2r, CON_STATE_RESPONSE_END);
1342     }
1343 
1344     if (h2r->state == CON_STATE_WRITE) {
1345         /* (resched & 1) more data is available to write, if still able to write
1346          * (resched & 2) resched to read deferred frames from con->read_queue */
1347         /*(con->is_writable set to 0 if !chunkqueue_is_empty(con->write_queue)
1348          * after trying to write in connection_handle_write() above)*/
1349         if (((resched & 1) && con->is_writable>0 && !con->traffic_limit_reached)
1350             || (resched & 2))
1351             joblist_append(con);
1352 
1353         if (h2_want_read(con))
1354             h2r->conf.stream_request_body |=  FDEVENT_STREAM_REQUEST_POLLIN;
1355         else
1356             h2r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1357     }
1358     else /* e.g. CON_STATE_RESPONSE_END or CON_STATE_ERROR */
1359         connection_state_machine_loop(h2r, con);
1360 
1361     connection_set_fdevent_interest(h2r, con);
1362 }
1363 
1364 
1365 static void
connection_state_machine_h1(request_st * const r,connection * const con)1366 connection_state_machine_h1 (request_st * const r, connection * const con)
1367 {
1368 	const int log_state_handling = r->conf.log_state_handling;
1369 	if (log_state_handling) {
1370 		log_error(r->conf.errh, __FILE__, __LINE__,
1371 		  "state at enter %d %s", con->fd, connection_get_state(r->state));
1372 	}
1373 
1374 	connection_state_machine_loop(r, con);
1375 
1376 	if (log_state_handling) {
1377 		log_error(r->conf.errh, __FILE__, __LINE__,
1378 		  "state at exit: %d %s", con->fd, connection_get_state(r->state));
1379 	}
1380 
1381 	connection_set_fdevent_interest(r, con);
1382 }
1383 
1384 
1385 void
connection_state_machine(connection * const con)1386 connection_state_machine (connection * const con)
1387 {
1388     request_st * const r = &con->request;
1389     if (r->http_version == HTTP_VERSION_2)
1390         connection_state_machine_h2(r, con);
1391     else /* if (r->http_version <= HTTP_VERSION_1_1) */
1392         connection_state_machine_h1(r, con);
1393 }
1394 
1395 
connection_check_timeout(connection * const con,const unix_time64_t cur_ts)1396 static void connection_check_timeout (connection * const con, const unix_time64_t cur_ts) {
1397     const int waitevents = fdevent_fdnode_interest(con->fdn);
1398     int changed = 0;
1399     int t_diff;
1400 
1401     request_st * const r = &con->request;
1402     if (r->state == CON_STATE_CLOSE) {
1403         if (cur_ts - con->close_timeout_ts > HTTP_LINGER_TIMEOUT) {
1404             changed = 1;
1405         }
1406     }
1407     else if (con->h2 && r->state == CON_STATE_WRITE) {
1408         h2con * const h2c = con->h2;
1409         if (h2c->rused) {
1410             for (uint32_t i = 0; i < h2c->rused; ++i) {
1411                 request_st * const rr = h2c->r[i];
1412                 if (rr->state == CON_STATE_ERROR) { /*(should not happen)*/
1413                     changed = 1;
1414                     continue;
1415                 }
1416                 if (rr->reqbody_length != rr->reqbody_queue.bytes_in) {
1417                     /* XXX: should timeout apply if not trying to read on h2con?
1418                      * (still applying timeout to catch stuck connections) */
1419                     /* XXX: con->read_idle_ts is not per-request, so timeout
1420                      * will not occur if other read activity occurs on h2con
1421                      * (future: might keep separate timestamp per-request) */
1422                     if (cur_ts - con->read_idle_ts > rr->conf.max_read_idle) {
1423                         /* time - out */
1424                         if (rr->conf.log_request_handling) {
1425                             log_error(rr->conf.errh, __FILE__, __LINE__,
1426                               "request aborted - read timeout: %d", con->fd);
1427                         }
1428                         connection_set_state_error(r, CON_STATE_ERROR);
1429                         changed = 1;
1430                     }
1431                 }
1432 
1433                 if (rr->state != CON_STATE_READ_POST
1434                     && con->write_request_ts != 0) {
1435                     /* XXX: con->write_request_ts is not per-request, so timeout
1436                      * will not occur if other write activity occurs on h2con
1437                      * (future: might keep separate timestamp per-request) */
1438                     if (cur_ts - con->write_request_ts
1439                         > r->conf.max_write_idle) {
1440                         /*(see comment further down about max_write_idle)*/
1441                         /* time - out */
1442                         if (r->conf.log_timeouts) {
1443                             log_error(r->conf.errh, __FILE__, __LINE__,
1444                               "NOTE: a request from %s for %.*s timed out "
1445                               "after writing %lld bytes. We waited %d seconds. "
1446                               "If this is a problem, increase "
1447                               "server.max-write-idle",
1448                               con->dst_addr_buf.ptr,
1449                               BUFFER_INTLEN_PTR(&r->target),
1450                               (long long)r->write_queue.bytes_out,
1451                               (int)r->conf.max_write_idle);
1452                         }
1453                         connection_set_state_error(r, CON_STATE_ERROR);
1454                         changed = 1;
1455                     }
1456                 }
1457             }
1458         }
1459         else {
1460             if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1461                 /* time - out */
1462                 if (r->conf.log_request_handling) {
1463                     log_error(r->conf.errh, __FILE__, __LINE__,
1464                               "connection closed - keep-alive timeout: %d",
1465                               con->fd);
1466                 }
1467                 connection_set_state(r, CON_STATE_RESPONSE_END);
1468                 changed = 1;
1469             }
1470         }
1471         /* process changes before optimistic read of additional HTTP/2 frames */
1472         if (changed)
1473             con->is_readable = 0;
1474     }
1475     else if (waitevents & FDEVENT_IN) {
1476         if (con->request_count == 1 || r->state != CON_STATE_READ) {
1477             /* e.g. CON_STATE_READ_POST || CON_STATE_WRITE */
1478             if (cur_ts - con->read_idle_ts > r->conf.max_read_idle) {
1479                 /* time - out */
1480                 if (r->conf.log_request_handling) {
1481                     log_error(r->conf.errh, __FILE__, __LINE__,
1482                               "connection closed - read timeout: %d", con->fd);
1483                 }
1484 
1485                 connection_set_state_error(r, CON_STATE_ERROR);
1486                 changed = 1;
1487             }
1488         } else {
1489             if (cur_ts - con->read_idle_ts > con->keep_alive_idle) {
1490                 /* time - out */
1491                 if (r->conf.log_request_handling) {
1492                     log_error(r->conf.errh, __FILE__, __LINE__,
1493                               "connection closed - keep-alive timeout: %d",
1494                               con->fd);
1495                 }
1496 
1497                 connection_set_state_error(r, CON_STATE_ERROR);
1498                 changed = 1;
1499             }
1500         }
1501     }
1502 
1503     /* max_write_idle timeout currently functions as backend timeout,
1504      * too, after response has been started.
1505      * Although backend timeouts now exist, there is no default for timeouts
1506      * to backends, so were this client timeout now to be changed to check
1507      * for write interest to the client, then timeout would not occur if the
1508      * backend hung and there was no backend read timeout set.  Therefore,
1509      * max_write_idle timeout remains timeout for both reading from backend
1510      * and writing to client, though this check here is only for HTTP/1.1.
1511      * In the future, if there were a quick way to detect that a backend
1512      * read timeout was in effect, then this timeout could check for write
1513      * interest to client.  (not a priority) */
1514     /*if (waitevents & FDEVENT_OUT)*/
1515     if (r->http_version <= HTTP_VERSION_1_1
1516         && r->state == CON_STATE_WRITE && con->write_request_ts != 0) {
1517       #if 0
1518         if (cur_ts - con->write_request_ts > 60) {
1519             log_error(r->conf.errh, __FILE__, __LINE__,
1520                       "connection closed - pre-write-request-timeout: %d %d",
1521                       con->fd, cur_ts - con->write_request_ts);
1522         }
1523       #endif
1524 
1525         if (cur_ts - con->write_request_ts > r->conf.max_write_idle) {
1526             /* time - out */
1527             if (r->conf.log_timeouts) {
1528                 log_error(r->conf.errh, __FILE__, __LINE__,
1529                   "NOTE: a request from %s for %.*s timed out after writing "
1530                   "%lld bytes. We waited %d seconds. If this is a problem, "
1531                   "increase server.max-write-idle",
1532                   con->dst_addr_buf.ptr,
1533                   BUFFER_INTLEN_PTR(&r->target),
1534                   (long long)con->bytes_written, (int)r->conf.max_write_idle);
1535             }
1536             connection_set_state_error(r, CON_STATE_ERROR);
1537             changed = 1;
1538         }
1539     }
1540 
1541     /* lighttpd HTTP/2 limitation: rate limit config r->conf.bytes_per_second
1542      * (currently) taken only from top-level config (socket), with host if SNI
1543      * used, but not any other config conditions, e.g. not per-file-type */
1544 
1545     if (0 == (t_diff = cur_ts - con->connection_start)) t_diff = 1;
1546 
1547     if (con->traffic_limit_reached &&
1548         (r->conf.bytes_per_second == 0 ||
1549          con->bytes_written < (off_t)r->conf.bytes_per_second * t_diff)) {
1550         /* enable connection again */
1551         con->traffic_limit_reached = 0;
1552 
1553         changed = 1;
1554     }
1555 
1556     con->bytes_written_cur_second = 0;
1557 
1558     if (changed) {
1559         connection_state_machine(con);
1560     }
1561 }
1562 
connection_periodic_maint(server * const srv,const unix_time64_t cur_ts)1563 void connection_periodic_maint (server * const srv, const unix_time64_t cur_ts) {
1564     /* check all connections for timeouts */
1565     for (connection *con = srv->conns, *tc; con; con = tc) {
1566         tc = con->next;
1567         connection_check_timeout(con, cur_ts);
1568     }
1569 }
1570 
connection_graceful_shutdown_maint(server * srv)1571 void connection_graceful_shutdown_maint (server *srv) {
1572     const int graceful_expire =
1573       (srv->graceful_expire_ts && srv->graceful_expire_ts < log_monotonic_secs);
1574     for (connection *con = srv->conns, *tc; con; con = tc) {
1575         tc = con->next;
1576         int changed = 0;
1577 
1578         request_st * const r = &con->request;
1579         if (r->state == CON_STATE_CLOSE) {
1580             /* reduce remaining linger timeout to be
1581              * (from zero) *up to* one more second, but no more */
1582             if (HTTP_LINGER_TIMEOUT > 1)
1583                 con->close_timeout_ts -= (HTTP_LINGER_TIMEOUT - 1);
1584             if (log_monotonic_secs - con->close_timeout_ts > HTTP_LINGER_TIMEOUT)
1585                 changed = 1;
1586         }
1587         else if (con->h2 && r->state == CON_STATE_WRITE) {
1588             h2_send_goaway(con, H2_E_NO_ERROR);
1589             if (0 == con->h2->rused && chunkqueue_is_empty(con->write_queue)) {
1590                 connection_set_state(r, CON_STATE_RESPONSE_END);
1591                 changed = 1;
1592             }
1593         }
1594         else if (r->state == CON_STATE_READ && con->request_count > 1
1595                  && chunkqueue_is_empty(con->read_queue)) {
1596             /* close connections in keep-alive waiting for next request */
1597             connection_set_state_error(r, CON_STATE_ERROR);
1598             changed = 1;
1599         }
1600 
1601         if (graceful_expire) {
1602             connection_set_state_error(r, CON_STATE_ERROR);
1603             changed = 1;
1604         }
1605 
1606         r->keep_alive = 0;            /* disable keep-alive */
1607 
1608         r->conf.bytes_per_second = 0;         /* disable rate limit */
1609         r->conf.global_bytes_per_second = 0;  /* disable rate limit */
1610         if (con->traffic_limit_reached) {
1611             con->traffic_limit_reached = 0;
1612             changed = 1;
1613         }
1614 
1615         if (changed) {
1616             connection_state_machine(con);
1617         }
1618     }
1619 }
1620 
1621 
1622 static int
connection_handle_read_post_cq_compact(chunkqueue * const cq)1623 connection_handle_read_post_cq_compact (chunkqueue * const cq)
1624 {
1625     /* combine first mem chunk with next non-empty mem chunk
1626      * (loop if next chunk is empty) */
1627     chunk *c = cq->first;
1628     if (NULL == c) return 0;
1629     const uint32_t mlen = buffer_clen(c->mem) - (size_t)c->offset;
1630     while ((c = c->next)) {
1631         const uint32_t blen = buffer_clen(c->mem) - (size_t)c->offset;
1632         if (0 == blen) continue;
1633         chunkqueue_compact_mem(cq, mlen + blen);
1634         return 1;
1635     }
1636     return 0;
1637 }
1638 
1639 
1640 __attribute_pure__
1641 static int
connection_handle_read_post_chunked_crlf(chunkqueue * const cq)1642 connection_handle_read_post_chunked_crlf (chunkqueue * const cq)
1643 {
1644     /* caller might check chunkqueue_length(cq) >= 2 before calling here
1645      * to limit return value to either 1 for good or -1 for error */
1646     chunk *c;
1647     buffer *b;
1648     char *p;
1649     size_t len;
1650 
1651     /* caller must have called chunkqueue_remove_finished_chunks(cq), so if
1652      * chunkqueue is not empty, it contains chunk with at least one char */
1653     if (chunkqueue_is_empty(cq)) return 0;
1654 
1655     c = cq->first;
1656     b = c->mem;
1657     p = b->ptr+c->offset;
1658     if (p[0] != '\r') return -1; /* error */
1659     if (p[1] == '\n') return 1;
1660     len = buffer_clen(b) - (size_t)c->offset;
1661     if (1 != len) return -1; /* error */
1662 
1663     while (NULL != (c = c->next)) {
1664         b = c->mem;
1665         len = buffer_clen(b) - (size_t)c->offset;
1666         if (0 == len) continue;
1667         p = b->ptr+c->offset;
1668         return (p[0] == '\n') ? 1 : -1; /* error if not '\n' */
1669     }
1670     return 0;
1671 }
1672 
1673 
1674 static handler_t
connection_handle_read_post_chunked(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1675 connection_handle_read_post_chunked (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1676 {
1677     /* r->conf.max_request_size is in kBytes */
1678     const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1679     off_t te_chunked = r->te_chunked;
1680     do {
1681         off_t len = chunkqueue_length(cq);
1682 
1683         while (0 == te_chunked) {
1684             char *p;
1685             chunk *c = cq->first;
1686             if (NULL == c) break;
1687             force_assert(c->type == MEM_CHUNK);
1688             p = strchr(c->mem->ptr+c->offset, '\n');
1689             if (NULL != p) { /* found HTTP chunked header line */
1690                 off_t hsz = p + 1 - (c->mem->ptr+c->offset);
1691                 unsigned char *s = (unsigned char *)c->mem->ptr+c->offset;
1692                 for (unsigned char u;(u=(unsigned char)hex2int(*s))!=0xFF;++s) {
1693                     if (te_chunked > (off_t)(1uLL<<(8*sizeof(off_t)-5))-1-2) {
1694                         log_error(r->conf.errh, __FILE__, __LINE__,
1695                           "chunked data size too large -> 400");
1696                         /* 400 Bad Request */
1697                         return http_response_reqbody_read_error(r, 400);
1698                     }
1699                     te_chunked <<= 4;
1700                     te_chunked |= u;
1701                 }
1702                 if (s == (unsigned char *)c->mem->ptr+c->offset) { /*(no hex)*/
1703                     log_error(r->conf.errh, __FILE__, __LINE__,
1704                       "chunked header invalid chars -> 400");
1705                     /* 400 Bad Request */
1706                     return http_response_reqbody_read_error(r, 400);
1707                 }
1708                 while (*s == ' ' || *s == '\t') ++s;
1709                 if (*s != '\r' && *s != ';') {
1710                     log_error(r->conf.errh, __FILE__, __LINE__,
1711                       "chunked header invalid chars -> 400");
1712                     /* 400 Bad Request */
1713                     return http_response_reqbody_read_error(r, 400);
1714                 }
1715 
1716                 if (hsz >= 1024) {
1717                     /* prevent theoretical integer overflow
1718                      * casting to (size_t) and adding 2 (for "\r\n") */
1719                     log_error(r->conf.errh, __FILE__, __LINE__,
1720                       "chunked header line too long -> 400");
1721                     /* 400 Bad Request */
1722                     return http_response_reqbody_read_error(r, 400);
1723                 }
1724 
1725                 if (0 == te_chunked) {
1726                     /* do not consume final chunked header until
1727                      * (optional) trailers received along with
1728                      * request-ending blank line "\r\n" */
1729                     if (p[0] == '\r' && p[1] == '\n') {
1730                         /*(common case with no trailers; final \r\n received)*/
1731                         hsz += 2;
1732                     }
1733                     else {
1734                         /* trailers or final CRLF crosses into next cq chunk */
1735                         hsz -= 2;
1736                         do {
1737                             c = cq->first;
1738                             p = strstr(c->mem->ptr+c->offset+hsz, "\r\n\r\n");
1739                         } while (NULL == p
1740                                  && connection_handle_read_post_cq_compact(cq));
1741                         if (NULL == p) {
1742                             /*(effectively doubles max request field size
1743                              * potentially received by backend, if in the future
1744                              * these trailers are added to request headers)*/
1745                             if ((off_t)buffer_clen(c->mem) - c->offset
1746                                 < (off_t)r->conf.max_request_field_size) {
1747                                 break;
1748                             }
1749                             else {
1750                                 /* ignore excessively long trailers;
1751                                  * disable keep-alive on connection */
1752                                 r->keep_alive = 0;
1753                                 p = c->mem->ptr + buffer_clen(c->mem)
1754                                   - 4;
1755                             }
1756                         }
1757                         hsz = p + 4 - (c->mem->ptr+c->offset);
1758                         /* trailers currently ignored, but could be processed
1759                          * here if 0 == (r->conf.stream_request_body &
1760                          *               & (FDEVENT_STREAM_REQUEST
1761                          *                 |FDEVENT_STREAM_REQUEST_BUFMIN))
1762                          * taking care to reject fields forbidden in trailers,
1763                          * making trailers available to CGI and other backends*/
1764                     }
1765                     chunkqueue_mark_written(cq, (size_t)hsz);
1766                     r->reqbody_length = dst_cq->bytes_in;
1767                     break; /* done reading HTTP chunked request body */
1768                 }
1769 
1770                 /* consume HTTP chunked header */
1771                 chunkqueue_mark_written(cq, (size_t)hsz);
1772                 len = chunkqueue_length(cq);
1773 
1774                 if (0 !=max_request_size
1775                     && (max_request_size < te_chunked
1776                      || max_request_size - te_chunked < dst_cq->bytes_in)) {
1777                     log_error(r->conf.errh, __FILE__, __LINE__,
1778                       "request-size too long: %lld -> 413",
1779                       (long long)(dst_cq->bytes_in + te_chunked));
1780                     /* 413 Payload Too Large */
1781                     return http_response_reqbody_read_error(r, 413);
1782                 }
1783 
1784                 te_chunked += 2; /*(for trailing "\r\n" after chunked data)*/
1785 
1786                 break; /* read HTTP chunked header */
1787             }
1788 
1789             /*(likely better ways to handle chunked header crossing chunkqueue
1790              * chunks, but this situation is not expected to occur frequently)*/
1791             if ((off_t)buffer_clen(c->mem) - c->offset >= 1024) {
1792                 log_error(r->conf.errh, __FILE__, __LINE__,
1793                   "chunked header line too long -> 400");
1794                 /* 400 Bad Request */
1795                 return http_response_reqbody_read_error(r, 400);
1796             }
1797             else if (!connection_handle_read_post_cq_compact(cq)) {
1798                 break;
1799             }
1800         }
1801         if (0 == te_chunked) break;
1802 
1803         if (te_chunked > 2) {
1804             if (len > te_chunked-2) len = te_chunked-2;
1805             if (dst_cq->bytes_in + te_chunked <= 64*1024) {
1806                 /* avoid buffering request bodies <= 64k on disk */
1807                 chunkqueue_steal(dst_cq, cq, len);
1808             }
1809             else if (0 != chunkqueue_steal_with_tempfiles(dst_cq, cq, len,
1810                                                           r->conf.errh)) {
1811                 /* 500 Internal Server Error */
1812                 return http_response_reqbody_read_error(r, 500);
1813             }
1814             te_chunked -= len;
1815             len = chunkqueue_length(cq);
1816         }
1817 
1818         if (len < te_chunked) break;
1819 
1820         if (2 == te_chunked) {
1821             if (-1 == connection_handle_read_post_chunked_crlf(cq)) {
1822                 log_error(r->conf.errh, __FILE__, __LINE__,
1823                   "chunked data missing end CRLF -> 400");
1824                 /* 400 Bad Request */
1825                 return http_response_reqbody_read_error(r, 400);
1826             }
1827             chunkqueue_mark_written(cq, 2);/*consume \r\n at end of chunk data*/
1828             te_chunked -= 2;
1829         }
1830 
1831     } while (!chunkqueue_is_empty(cq));
1832 
1833     r->te_chunked = te_chunked;
1834     return HANDLER_GO_ON;
1835 }
1836 
1837 
1838 static handler_t
connection_handle_read_body_unknown(request_st * const r,chunkqueue * const cq,chunkqueue * const dst_cq)1839 connection_handle_read_body_unknown (request_st * const r, chunkqueue * const cq, chunkqueue * const dst_cq)
1840 {
1841     /* r->conf.max_request_size is in kBytes */
1842     const off_t max_request_size = (off_t)r->conf.max_request_size << 10;
1843     chunkqueue_append_chunkqueue(dst_cq, cq);
1844     if (0 != max_request_size && dst_cq->bytes_in > max_request_size) {
1845         log_error(r->conf.errh, __FILE__, __LINE__,
1846           "request-size too long: %lld -> 413", (long long)dst_cq->bytes_in);
1847         /* 413 Payload Too Large */
1848         return http_response_reqbody_read_error(r, 413);
1849     }
1850     return HANDLER_GO_ON;
1851 }
1852 
1853 
1854 __attribute_cold__
1855 static int
connection_check_expect_100(request_st * const r,connection * const con)1856 connection_check_expect_100 (request_st * const r, connection * const con)
1857 {
1858     if (con->is_writable <= 0)
1859         return 1;
1860 
1861     const buffer * const vb =
1862       http_header_request_get(r, HTTP_HEADER_EXPECT,
1863                               CONST_STR_LEN("Expect"));
1864     if (NULL == vb)
1865         return 1;
1866 
1867     /* (always unset Expect header so that check is not repeated for request */
1868     int rc = buffer_eq_icase_slen(vb, CONST_STR_LEN("100-continue"));
1869     http_header_request_unset(r, HTTP_HEADER_EXPECT,
1870                               CONST_STR_LEN("Expect"));
1871     if (!rc
1872         || 0 != r->reqbody_queue.bytes_in
1873         || !chunkqueue_is_empty(&r->read_queue)
1874         || !chunkqueue_is_empty(&r->write_queue))
1875         return 1;
1876 
1877     /* send 100 Continue only if no request body data received yet
1878      * and response has not yet started (checked above) */
1879     if (r->http_version > HTTP_VERSION_1_1)
1880         h2_send_100_continue(r, con);
1881     else if (r->http_version == HTTP_VERSION_1_1)
1882         return connection_write_100_continue(r, con);
1883 
1884     return 1;
1885 }
1886 
1887 
1888 static handler_t
connection_handle_read_post_state(request_st * const r)1889 connection_handle_read_post_state (request_st * const r)
1890 {
1891     connection * const con = r->con;
1892     chunkqueue * const cq = &r->read_queue;
1893     chunkqueue * const dst_cq = &r->reqbody_queue;
1894 
1895     int is_closed = 0;
1896 
1897     if (r->http_version > HTTP_VERSION_1_1) {
1898         /*(H2_STATE_HALF_CLOSED_REMOTE or H2_STATE_CLOSED)*/
1899         if (r->h2state >= H2_STATE_HALF_CLOSED_REMOTE)
1900             is_closed = 1;
1901     }
1902     else if (con->is_readable > 0) {
1903         con->read_idle_ts = log_monotonic_secs;
1904         const off_t max_per_read =
1905           !(r->conf.stream_request_body /*(if not streaming request body)*/
1906             & (FDEVENT_STREAM_REQUEST|FDEVENT_STREAM_REQUEST_BUFMIN))
1907             ? MAX_READ_LIMIT
1908             : (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST_BUFMIN)
1909               ? 16384  /* FDEVENT_STREAM_REQUEST_BUFMIN */
1910               : 65536; /* FDEVENT_STREAM_REQUEST */
1911         switch(con->network_read(con, cq, max_per_read)) {
1912         case -1:
1913             connection_set_state_error(r, CON_STATE_ERROR);
1914             return HANDLER_ERROR;
1915         case -2:
1916             is_closed = 1;
1917             break;
1918         default:
1919             break;
1920         }
1921 
1922         chunkqueue_remove_finished_chunks(cq);
1923     }
1924 
1925     /* Check for Expect: 100-continue in request headers */
1926     if (light_btst(r->rqst_htags, HTTP_HEADER_EXPECT)
1927         && !connection_check_expect_100(r, con))
1928         return HANDLER_ERROR;
1929 
1930     if (r->http_version > HTTP_VERSION_1_1) {
1931         /* h2_recv_data() places frame payload directly into r->reqbody_queue */
1932     }
1933     else if (r->reqbody_length < 0) {
1934         /*(-1: Transfer-Encoding: chunked, -2: unspecified length)*/
1935         handler_t rc = (-1 == r->reqbody_length)
1936                      ? connection_handle_read_post_chunked(r, cq, dst_cq)
1937                      : connection_handle_read_body_unknown(r, cq, dst_cq);
1938         if (HANDLER_GO_ON != rc) return rc;
1939         chunkqueue_remove_finished_chunks(cq);
1940     }
1941     else {
1942         off_t len = (off_t)r->reqbody_length - dst_cq->bytes_in;
1943         if (r->reqbody_length <= 64*1024) {
1944             /* don't buffer request bodies <= 64k on disk */
1945             chunkqueue_steal(dst_cq, cq, len);
1946         }
1947         else if (0 !=
1948                  chunkqueue_steal_with_tempfiles(dst_cq,cq,len,r->conf.errh)) {
1949             /* writing to temp file failed */ /* Internal Server Error */
1950             return http_response_reqbody_read_error(r, 500);
1951         }
1952         chunkqueue_remove_finished_chunks(cq);
1953     }
1954 
1955     if (dst_cq->bytes_in == (off_t)r->reqbody_length) {
1956         /* Content is ready */
1957         r->conf.stream_request_body &= ~FDEVENT_STREAM_REQUEST_POLLIN;
1958         if (r->state == CON_STATE_READ_POST) {
1959             connection_set_state(r, CON_STATE_HANDLE_REQUEST);
1960         }
1961         return HANDLER_GO_ON;
1962     }
1963     else if (is_closed) {
1964       #if 0
1965         return http_response_reqbody_read_error(r, 400); /* Bad Request */
1966       #endif
1967         return HANDLER_ERROR;
1968     }
1969     else {
1970         r->conf.stream_request_body |= FDEVENT_STREAM_REQUEST_POLLIN;
1971         return (r->conf.stream_request_body & FDEVENT_STREAM_REQUEST)
1972           ? HANDLER_GO_ON
1973           : HANDLER_WAIT_FOR_EVENT;
1974     }
1975 }
1976