1 /*
2  * HTTP/1 protocol analyzer
3  *
4  * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <ctype.h>
14 #include <common/config.h>
15 #include <common/h1.h>
16 #include <common/http-hdr.h>
17 
18 #include <proto/channel.h>
19 
20 /* Parse the Content-Length header field of an HTTP/1 request. The function
21  * checks all possible occurrences of a comma-delimited value, and verifies
22  * if any of them doesn't match a previous value. It returns <0 if a value
23  * differs, 0 if the whole header can be dropped (i.e. already known), or >0
24  * if the value can be indexed (first one). In the last case, the value might
25  * be adjusted and the caller must only add the updated value.
26  */
h1_parse_cont_len_header(struct h1m * h1m,struct ist * value)27 int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
28 {
29 	char *e, *n;
30 	long long cl;
31 	int not_first = !!(h1m->flags & H1_MF_CLEN);
32 	struct ist word;
33 
34 	word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
35 	e = value->ptr + value->len;
36 
37 	while (++word.ptr < e) {
38 		/* skip leading delimitor and blanks */
39 		if (unlikely(HTTP_IS_LWS(*word.ptr)))
40 			continue;
41 
42 		/* digits only now */
43 		for (cl = 0, n = word.ptr; n < e; n++) {
44 			unsigned int c = *n - '0';
45 			if (unlikely(c > 9)) {
46 				/* non-digit */
47 				if (unlikely(n == word.ptr)) // spaces only
48 					goto fail;
49 				break;
50 			}
51 			if (unlikely(cl > ULLONG_MAX / 10ULL))
52 				goto fail; /* multiply overflow */
53 			cl = cl * 10ULL;
54 			if (unlikely(cl + c < cl))
55 				goto fail; /* addition overflow */
56 			cl = cl + c;
57 		}
58 
59 		/* keep a copy of the exact cleaned value */
60 		word.len = n - word.ptr;
61 
62 		/* skip trailing LWS till next comma or EOL */
63 		for (; n < e; n++) {
64 			if (!HTTP_IS_LWS(*n)) {
65 				if (unlikely(*n != ','))
66 					goto fail;
67 				break;
68 			}
69 		}
70 
71 		/* if duplicate, must be equal */
72 		if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
73 			goto fail;
74 
75 		/* OK, store this result as the one to be indexed */
76 		h1m->flags |= H1_MF_CLEN;
77 		h1m->curr_len = h1m->body_len = cl;
78 		*value = word;
79 		word.ptr = n;
80 	}
81 	/* here we've reached the end with a single value or a series of
82 	 * identical values, all matching previous series if any. The last
83 	 * parsed value was sent back into <value>. We just have to decide
84 	 * if this occurrence has to be indexed (it's the first one) or
85 	 * silently skipped (it's not the first one)
86 	 */
87 	return !not_first;
88  fail:
89 	return -1;
90 }
91 
92 /* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
93  * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
94  * this case. Any other token found or any empty header field found will reset
95  * this flag, so that it accurately represents the token's presence at the last
96  * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
97  * are case-insensitive (cf RFC7230#4).
98  */
h1_parse_xfer_enc_header(struct h1m * h1m,struct ist value)99 void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
100 {
101 	char *e, *n;
102 	struct ist word;
103 
104 	h1m->flags |= H1_MF_XFER_ENC;
105 	h1m->flags &= ~H1_MF_CHNK;
106 
107 	word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
108 	e = value.ptr + value.len;
109 
110 	while (++word.ptr < e) {
111 		/* skip leading delimitor and blanks */
112 		if (HTTP_IS_LWS(*word.ptr))
113 			continue;
114 
115 		n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
116 		word.len = n - word.ptr;
117 
118 		/* trim trailing blanks */
119 		while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
120 			word.len--;
121 
122 		h1m->flags &= ~H1_MF_CHNK;
123 		if (isteqi(word, ist("chunked")))
124 			h1m->flags |= H1_MF_CHNK;
125 
126 		word.ptr = n;
127 	}
128 }
129 
130 /* Parse the Connection: header of an HTTP/1 request, looking for "close",
131  * "keep-alive", and "upgrade" values, and updating h1m->flags according to
132  * what was found there. Note that flags are only added, not removed, so the
133  * function is safe for being called multiple times if multiple occurrences
134  * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
135  * up from "keep-alive" and "close" values. To do so, the header value is
136  * rewritten in place and its length is updated.
137  */
h1_parse_connection_header(struct h1m * h1m,struct ist * value)138 void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
139 {
140 	char *e, *n, *p;
141 	struct ist word;
142 
143 	word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
144 	p = value->ptr;
145 	e = value->ptr + value->len;
146 	if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
147 		value->len = 0;
148 
149 	while (++word.ptr < e) {
150 		/* skip leading delimitor and blanks */
151 		if (HTTP_IS_LWS(*word.ptr))
152 			continue;
153 
154 		n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
155 		word.len = n - word.ptr;
156 
157 		/* trim trailing blanks */
158 		while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
159 			word.len--;
160 
161 		if (isteqi(word, ist("keep-alive"))) {
162 			h1m->flags |= H1_MF_CONN_KAL;
163 			if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
164 				goto skip_val;
165 		}
166 		else if (isteqi(word, ist("close"))) {
167 			h1m->flags |= H1_MF_CONN_CLO;
168 			if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
169 				goto skip_val;
170 		}
171 		else if (isteqi(word, ist("upgrade")))
172 			h1m->flags |= H1_MF_CONN_UPG;
173 
174 		if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
175 			if (value->ptr + value->len == p) {
176 				/* no rewrite done till now */
177 				value->len = n - value->ptr;
178 			}
179 			else {
180 				if (value->len)
181 					value->ptr[value->len++] = ',';
182 				istcat(value, word, e - value->ptr);
183 			}
184 		}
185 
186 	  skip_val:
187 		word.ptr = p = n;
188 	}
189 }
190 
191 /* Macros used in the HTTP/1 parser, to check for the expected presence of
192  * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
193  */
194 
195 /* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
196  * <bad>.
197  */
198 #define EXPECT_LF_HERE(ptr, bad, state, where)                  \
199 	do {                                                    \
200 		if (unlikely(*(ptr) != '\n')) {                 \
201 			state = (where);                        \
202 			goto bad;                               \
203 		}                                               \
204 	} while (0)
205 
206 /* Increments pointer <ptr>, continues to label <more> if it's still below
207  * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
208  * of buffer was reached.
209  */
210 #define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where)        \
211 	do {                                                              \
212 		if (likely(++(ptr) < (end)))                              \
213 			goto more;                                        \
214 		else {                                                    \
215 			state = (where);                                  \
216 			goto stop;                                        \
217 		}                                                         \
218 	} while (0)
219 
220 /* This function parses a contiguous HTTP/1 headers block starting at <start>
221  * and ending before <stop>, at once, and converts it a list of (name,value)
222  * pairs representing header fields into the array <hdr> of size <hdr_num>,
223  * whose last entry will have an empty name and an empty value. If <hdr_num> is
224  * too small to represent the whole message, an error is returned. Some
225  * protocol elements such as content-length and transfer-encoding will be
226  * parsed and stored into h1m as well. <hdr> may be null, in which case only
227  * the parsing state will be updated. This may be used to restart the parsing
228  * where it stopped for example.
229  *
230  * For now it's limited to the response. If the header block is incomplete,
231  * 0 is returned, waiting to be called again with more data to try it again.
232  * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
233  * and h1m->next to zero on the first call, the parser will do the rest. If
234  * an incomplete message is seen, the caller only needs to present h1m->state
235  * and h1m->next again, with an empty header list so that the parser can start
236  * again. In this case, it will detect that it interrupted a previous session
237  * and will first look for the end of the message before reparsing it again and
238  * indexing it at the same time. This ensures that incomplete messages fed 1
239  * character at a time are never processed entirely more than exactly twice,
240  * and that there is no need to store all the internal state and pre-parsed
241  * headers or start line between calls.
242  *
243  * A pointer to a start line descriptor may be passed in <slp>, in which case
244  * the parser will fill it with whatever it found.
245  *
246  * The code derived from the main HTTP/1 parser above but was simplified and
247  * optimized to process responses produced or forwarded by haproxy. The caller
248  * is responsible for ensuring that the message doesn't wrap, and should ensure
249  * it is complete to avoid having to retry the operation after a failed
250  * attempt. The message is not supposed to be invalid, which is why a few
251  * properties such as the character set used in the header field names are not
252  * checked. In case of an unparsable response message, a negative value will be
253  * returned with h1m->err_pos and h1m->err_state matching the location and
254  * state where the error was met. Leading blank likes are tolerated but not
255  * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
256  * parsed and the start line is skipped. It is not required to set h1m->state
257  * nor h1m->next in this case.
258  *
259  * This function returns :
260  *    -1 in case of error. In this case, h1m->err_state is filled (if h1m is
261  *       set) with the state the error occurred in and h1m->err_pos with the
262  *       the position relative to <start>
263  *    -2 if the output is full (hdr_num reached). err_state and err_pos also
264  *       indicate where it failed.
265  *     0 in case of missing data.
266  *   > 0 on success, it then corresponds to the number of bytes read since
267  *       <start> so that the caller can go on with the payload.
268  */
h1_headers_to_hdr_list(char * start,const char * stop,struct http_hdr * hdr,unsigned int hdr_num,struct h1m * h1m,union h1_sl * slp)269 int h1_headers_to_hdr_list(char *start, const char *stop,
270                            struct http_hdr *hdr, unsigned int hdr_num,
271                            struct h1m *h1m, union h1_sl *slp)
272 {
273 	enum h1m_state state;
274 	register char *ptr;
275 	register const char *end;
276 	unsigned int hdr_count;
277 	unsigned int skip; /* number of bytes skipped at the beginning */
278 	unsigned int sol;  /* start of line */
279 	unsigned int col;  /* position of the colon */
280 	unsigned int eol;  /* end of line */
281 	unsigned int sov;  /* start of value */
282 	union h1_sl sl;
283 	int skip_update;
284 	int restarting;
285 	int host_idx;
286 	struct ist n, v;       /* header name and value during parsing */
287 
288 	skip = 0; // do it only once to keep track of the leading CRLF.
289 
290  try_again:
291 	hdr_count = sol = col = eol = sov = 0;
292 	sl.st.status = 0;
293 	skip_update = restarting = 0;
294 	host_idx = -1;
295 
296 	if (h1m->flags & H1_MF_HDRS_ONLY) {
297 		state = H1_MSG_HDR_FIRST;
298 		h1m->next = 0;
299 	}
300 	else {
301 		state = h1m->state;
302 		if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
303 			restarting = 1;
304 	}
305 
306 	ptr   = start + h1m->next;
307 	end   = stop;
308 
309 	if (unlikely(ptr >= end))
310 		goto http_msg_ood;
311 
312 	/* don't update output if hdr is NULL or if we're restarting */
313 	if (!hdr || restarting)
314 		skip_update = 1;
315 
316 	switch (state)	{
317 	case H1_MSG_RQBEFORE:
318 	http_msg_rqbefore:
319 		if (likely(HTTP_IS_TOKEN(*ptr))) {
320 			/* we have a start of message, we may have skipped some
321 			 * heading CRLF. Skip them now.
322 			 */
323 			skip += ptr - start;
324 			start = ptr;
325 
326 			sol = 0;
327 			sl.rq.m.ptr = ptr;
328 			hdr_count = 0;
329 			state = H1_MSG_RQMETH;
330 			goto http_msg_rqmeth;
331 		}
332 
333 		if (unlikely(!HTTP_IS_CRLF(*ptr))) {
334 			state = H1_MSG_RQBEFORE;
335 			goto http_msg_invalid;
336 		}
337 
338 		if (unlikely(*ptr == '\n'))
339 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
340 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
341 		/* stop here */
342 
343 	case H1_MSG_RQBEFORE_CR:
344 	http_msg_rqbefore_cr:
345 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
346 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
347 		/* stop here */
348 
349 	case H1_MSG_RQMETH:
350 	http_msg_rqmeth:
351 		if (likely(HTTP_IS_TOKEN(*ptr)))
352 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
353 
354 		if (likely(HTTP_IS_SPHT(*ptr))) {
355 			sl.rq.m.len = ptr - sl.rq.m.ptr;
356 			sl.rq.meth = find_http_meth(start, sl.rq.m.len);
357 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
358 		}
359 
360 		if (likely(HTTP_IS_CRLF(*ptr))) {
361 			/* HTTP 0.9 request */
362 			sl.rq.m.len = ptr - sl.rq.m.ptr;
363 			sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
364 		http_msg_req09_uri:
365 			sl.rq.u.ptr = ptr;
366 		http_msg_req09_uri_e:
367 			sl.rq.u.len = ptr - sl.rq.u.ptr;
368 		http_msg_req09_ver:
369 			sl.rq.v.ptr = ptr;
370 			sl.rq.v.len = 0;
371 			goto http_msg_rqline_eol;
372 		}
373 		state = H1_MSG_RQMETH;
374 		goto http_msg_invalid;
375 
376 	case H1_MSG_RQMETH_SP:
377 	http_msg_rqmeth_sp:
378 		if (likely(!HTTP_IS_LWS(*ptr))) {
379 			sl.rq.u.ptr = ptr;
380 			goto http_msg_rquri;
381 		}
382 		if (likely(HTTP_IS_SPHT(*ptr)))
383 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
384 		/* so it's a CR/LF, meaning an HTTP 0.9 request */
385 		goto http_msg_req09_uri;
386 
387 	case H1_MSG_RQURI:
388 	http_msg_rquri:
389 #if defined(__x86_64__) ||						\
390     defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
391     defined(__ARM_ARCH_7A__)
392 		/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
393 		while (ptr <= end - sizeof(int)) {
394 			int x = *(int *)ptr - 0x21212121;
395 			if (x & 0x80808080)
396 				break;
397 
398 			x -= 0x5e5e5e5e;
399 			if (!(x & 0x80808080))
400 				break;
401 
402 			ptr += sizeof(int);
403 		}
404 #endif
405 		if (ptr >= end) {
406 			state = H1_MSG_RQURI;
407 			goto http_msg_ood;
408 		}
409 	http_msg_rquri2:
410 		if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
411 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
412 
413 		if (likely(HTTP_IS_SPHT(*ptr))) {
414 			sl.rq.u.len = ptr - sl.rq.u.ptr;
415 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
416 		}
417 		if (likely((unsigned char)*ptr >= 128)) {
418 			/* non-ASCII chars are forbidden unless option
419 			 * accept-invalid-http-request is enabled in the frontend.
420 			 * In any case, we capture the faulty char.
421 			 */
422 			if (h1m->err_pos < -1)
423 				goto invalid_char;
424 			if (h1m->err_pos == -1)
425 				h1m->err_pos = ptr - start + skip;
426 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
427 		}
428 
429 		if (likely(HTTP_IS_CRLF(*ptr))) {
430 			/* so it's a CR/LF, meaning an HTTP 0.9 request */
431 			goto http_msg_req09_uri_e;
432 		}
433 
434 		/* OK forbidden chars, 0..31 or 127 */
435 	invalid_char:
436 		state = H1_MSG_RQURI;
437 		goto http_msg_invalid;
438 
439 	case H1_MSG_RQURI_SP:
440 	http_msg_rquri_sp:
441 		if (likely(!HTTP_IS_LWS(*ptr))) {
442 			sl.rq.v.ptr = ptr;
443 			goto http_msg_rqver;
444 		}
445 		if (likely(HTTP_IS_SPHT(*ptr)))
446 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
447 		/* so it's a CR/LF, meaning an HTTP 0.9 request */
448 		goto http_msg_req09_ver;
449 
450 
451 	case H1_MSG_RQVER:
452 	http_msg_rqver:
453 		if (likely(HTTP_IS_VER_TOKEN(*ptr)))
454 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
455 
456 		if (likely(HTTP_IS_CRLF(*ptr))) {
457 			sl.rq.v.len = ptr - sl.rq.v.ptr;
458 		http_msg_rqline_eol:
459 			/* We have seen the end of line. Note that we do not
460 			 * necessarily have the \n yet, but at least we know that we
461 			 * have EITHER \r OR \n, otherwise the request would not be
462 			 * complete. We can then record the request length and return
463 			 * to the caller which will be able to register it.
464 			 */
465 
466 			if (likely(!skip_update)) {
467 				if ((sl.rq.v.len == 8) &&
468 				    (*(sl.rq.v.ptr + 5) > '1' ||
469 				     (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
470 					h1m->flags |= H1_MF_VER_11;
471 
472 				if (unlikely(hdr_count >= hdr_num)) {
473 					state = H1_MSG_RQVER;
474 					goto http_output_full;
475 				}
476 				if (!(h1m->flags & H1_MF_NO_PHDR))
477 					http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
478 
479 				if (unlikely(hdr_count >= hdr_num)) {
480 					state = H1_MSG_RQVER;
481 					goto http_output_full;
482 				}
483 				if (!(h1m->flags & H1_MF_NO_PHDR))
484 					http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
485 			}
486 
487 			sol = ptr - start;
488 			if (likely(*ptr == '\r'))
489 				EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
490 			goto http_msg_rqline_end;
491 		}
492 
493 		/* neither an HTTP_VER token nor a CRLF */
494 		state = H1_MSG_RQVER;
495 		goto http_msg_invalid;
496 
497 	case H1_MSG_RQLINE_END:
498 	http_msg_rqline_end:
499 		/* check for HTTP/0.9 request : no version information
500 		 * available. sol must point to the first of CR or LF. However
501 		 * since we don't save these elements between calls, if we come
502 		 * here from a restart, we don't necessarily know. Thus in this
503 		 * case we simply start over.
504 		 */
505 		if (restarting)
506 			goto restart;
507 
508 		if (unlikely(sl.rq.v.len == 0))
509 			goto http_msg_last_lf;
510 
511 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
512 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
513 		/* stop here */
514 
515 	/*
516 	 * Common states below
517 	 */
518 	case H1_MSG_RPBEFORE:
519 	http_msg_rpbefore:
520 		if (likely(HTTP_IS_TOKEN(*ptr))) {
521 			/* we have a start of message, we may have skipped some
522 			 * heading CRLF. Skip them now.
523 			 */
524 			skip += ptr - start;
525 			start = ptr;
526 
527 			sol = 0;
528 			sl.st.v.ptr = ptr;
529 			hdr_count = 0;
530 			state = H1_MSG_RPVER;
531 			goto http_msg_rpver;
532 		}
533 
534 		if (unlikely(!HTTP_IS_CRLF(*ptr))) {
535 			state = H1_MSG_RPBEFORE;
536 			goto http_msg_invalid;
537 		}
538 
539 		if (unlikely(*ptr == '\n'))
540 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
541 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
542 		/* stop here */
543 
544 	case H1_MSG_RPBEFORE_CR:
545 	http_msg_rpbefore_cr:
546 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
547 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
548 		/* stop here */
549 
550 	case H1_MSG_RPVER:
551 	http_msg_rpver:
552 		if (likely(HTTP_IS_VER_TOKEN(*ptr)))
553 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
554 
555 		if (likely(HTTP_IS_SPHT(*ptr))) {
556 			sl.st.v.len = ptr - sl.st.v.ptr;
557 
558 			if ((sl.st.v.len == 8) &&
559 			    (*(sl.st.v.ptr + 5) > '1' ||
560 			     (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
561 				h1m->flags |= H1_MF_VER_11;
562 
563 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
564 		}
565 		state = H1_MSG_RPVER;
566 		goto http_msg_invalid;
567 
568 	case H1_MSG_RPVER_SP:
569 	http_msg_rpver_sp:
570 		if (likely(!HTTP_IS_LWS(*ptr))) {
571 			sl.st.status = 0;
572 			sl.st.c.ptr = ptr;
573 			goto http_msg_rpcode;
574 		}
575 		if (likely(HTTP_IS_SPHT(*ptr)))
576 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
577 		/* so it's a CR/LF, this is invalid */
578 		state = H1_MSG_RPVER_SP;
579 		goto http_msg_invalid;
580 
581 	case H1_MSG_RPCODE:
582 	http_msg_rpcode:
583 		if (likely(HTTP_IS_DIGIT(*ptr))) {
584 			sl.st.status = sl.st.status * 10 + *ptr - '0';
585 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
586 		}
587 
588 		if (unlikely(!HTTP_IS_LWS(*ptr))) {
589 			state = H1_MSG_RPCODE;
590 			goto http_msg_invalid;
591 		}
592 
593 		if (likely(HTTP_IS_SPHT(*ptr))) {
594 			sl.st.c.len = ptr - sl.st.c.ptr;
595 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
596 		}
597 
598 		/* so it's a CR/LF, so there is no reason phrase */
599 		sl.st.c.len = ptr - sl.st.c.ptr;
600 
601 	http_msg_rsp_reason:
602 		sl.st.r.ptr = ptr;
603 		sl.st.r.len = 0;
604 		goto http_msg_rpline_eol;
605 
606 	case H1_MSG_RPCODE_SP:
607 	http_msg_rpcode_sp:
608 		if (likely(!HTTP_IS_LWS(*ptr))) {
609 			sl.st.r.ptr = ptr;
610 			goto http_msg_rpreason;
611 		}
612 		if (likely(HTTP_IS_SPHT(*ptr)))
613 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
614 		/* so it's a CR/LF, so there is no reason phrase */
615 		goto http_msg_rsp_reason;
616 
617 	case H1_MSG_RPREASON:
618 	http_msg_rpreason:
619 		if (likely(!HTTP_IS_CRLF(*ptr)))
620 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
621 		sl.st.r.len = ptr - sl.st.r.ptr;
622 	http_msg_rpline_eol:
623 		/* We have seen the end of line. Note that we do not
624 		 * necessarily have the \n yet, but at least we know that we
625 		 * have EITHER \r OR \n, otherwise the response would not be
626 		 * complete. We can then record the response length and return
627 		 * to the caller which will be able to register it.
628 		 */
629 
630 		if (likely(!skip_update)) {
631 			if (unlikely(hdr_count >= hdr_num)) {
632 				state = H1_MSG_RPREASON;
633 				goto http_output_full;
634 			}
635 			if (!(h1m->flags & H1_MF_NO_PHDR))
636 				http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
637 		}
638 
639 		sol = ptr - start;
640 		if (likely(*ptr == '\r'))
641 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
642 		goto http_msg_rpline_end;
643 
644 	case H1_MSG_RPLINE_END:
645 	http_msg_rpline_end:
646 		/* sol must point to the first of CR or LF. */
647 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
648 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
649 		/* stop here */
650 
651 	case H1_MSG_HDR_FIRST:
652 	http_msg_hdr_first:
653 		sol = ptr - start;
654 		if (likely(!HTTP_IS_CRLF(*ptr))) {
655 			goto http_msg_hdr_name;
656 		}
657 
658 		if (likely(*ptr == '\r'))
659 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
660 		goto http_msg_last_lf;
661 
662 	case H1_MSG_HDR_NAME:
663 	http_msg_hdr_name:
664 		/* assumes sol points to the first char */
665 		if (likely(HTTP_IS_TOKEN(*ptr))) {
666 			if (!skip_update) {
667 				/* turn it to lower case if needed */
668 				if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
669 					*ptr = tolower(*ptr);
670 			}
671 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
672 		}
673 
674 		if (likely(*ptr == ':')) {
675 			col = ptr - start;
676 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
677 		}
678 
679 		if (likely(h1m->err_pos < -1) || *ptr == '\n') {
680 			state = H1_MSG_HDR_NAME;
681 			goto http_msg_invalid;
682 		}
683 
684 		if (h1m->err_pos == -1) /* capture the error pointer */
685 			h1m->err_pos = ptr - start + skip; /* >= 0 now */
686 
687 		/* and we still accept this non-token character */
688 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
689 
690 	case H1_MSG_HDR_L1_SP:
691 	http_msg_hdr_l1_sp:
692 		/* assumes sol points to the first char */
693 		if (likely(HTTP_IS_SPHT(*ptr)))
694 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
695 
696 		/* header value can be basically anything except CR/LF */
697 		sov = ptr - start;
698 
699 		if (likely(!HTTP_IS_CRLF(*ptr))) {
700 			goto http_msg_hdr_val;
701 		}
702 
703 		if (likely(*ptr == '\r'))
704 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
705 		goto http_msg_hdr_l1_lf;
706 
707 	case H1_MSG_HDR_L1_LF:
708 	http_msg_hdr_l1_lf:
709 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
710 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
711 
712 	case H1_MSG_HDR_L1_LWS:
713 	http_msg_hdr_l1_lws:
714 		if (likely(HTTP_IS_SPHT(*ptr))) {
715 			if (!skip_update) {
716 				/* replace HT,CR,LF with spaces */
717 				for (; start + sov < ptr; sov++)
718 					start[sov] = ' ';
719 			}
720 			goto http_msg_hdr_l1_sp;
721 		}
722 		/* we had a header consisting only in spaces ! */
723 		eol = sov;
724 		goto http_msg_complete_header;
725 
726 	case H1_MSG_HDR_VAL:
727 	http_msg_hdr_val:
728 		/* assumes sol points to the first char, and sov
729 		 * points to the first character of the value.
730 		 */
731 
732 		/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
733 		 * and lower. In fact since most of the time is spent in the loop, we
734 		 * also remove the sign bit test so that bytes 0x8e..0x0d break the
735 		 * loop, but we don't care since they're very rare in header values.
736 		 */
737 #if defined(__x86_64__)
738 		while (ptr <= end - sizeof(long)) {
739 			if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
740 				goto http_msg_hdr_val2;
741 			ptr += sizeof(long);
742 		}
743 #endif
744 #if defined(__x86_64__) || \
745     defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
746     defined(__ARM_ARCH_7A__)
747 		while (ptr <= end - sizeof(int)) {
748 			if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
749 				goto http_msg_hdr_val2;
750 			ptr += sizeof(int);
751 		}
752 #endif
753 		if (ptr >= end) {
754 			state = H1_MSG_HDR_VAL;
755 			goto http_msg_ood;
756 		}
757 	http_msg_hdr_val2:
758 		if (likely(!HTTP_IS_CRLF(*ptr)))
759 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
760 
761 		eol = ptr - start;
762 		/* Note: we could also copy eol into ->eoh so that we have the
763 		 * real header end in case it ends with lots of LWS, but is this
764 		 * really needed ?
765 		 */
766 		if (likely(*ptr == '\r'))
767 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
768 		goto http_msg_hdr_l2_lf;
769 
770 	case H1_MSG_HDR_L2_LF:
771 	http_msg_hdr_l2_lf:
772 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
773 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
774 
775 	case H1_MSG_HDR_L2_LWS:
776 	http_msg_hdr_l2_lws:
777 		if (unlikely(HTTP_IS_SPHT(*ptr))) {
778 			if (!skip_update) {
779 				/* LWS: replace HT,CR,LF with spaces */
780 				for (; start + eol < ptr; eol++)
781 					start[eol] = ' ';
782 			}
783 			goto http_msg_hdr_val;
784 		}
785 	http_msg_complete_header:
786 		/*
787 		 * It was a new header, so the last one is finished. Assumes
788 		 * <sol> points to the first char of the name, <col> to the
789 		 * colon, <sov> points to the first character of the value and
790 		 * <eol> to the first CR or LF so we know how the line ends. We
791 		 * will trim spaces around the value. It's possible to do it by
792 		 * adjusting <eol> and <sov> which are no more used after this.
793 		 * We can add the header field to the list.
794 		 */
795 		if (likely(!skip_update)) {
796 			while (sov < eol && HTTP_IS_LWS(start[sov]))
797 				sov++;
798 
799 			while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
800 				eol--;
801 
802 
803 			n = ist2(start + sol, col - sol);
804 			v = ist2(start + sov, eol - sov);
805 
806 			do {
807 				int ret;
808 
809 				if (unlikely(hdr_count >= hdr_num)) {
810 					state = H1_MSG_HDR_L2_LWS;
811 					goto http_output_full;
812 				}
813 
814 				if (isteqi(n, ist("transfer-encoding"))) {
815 					h1_parse_xfer_enc_header(h1m, v);
816 				}
817 				else if (isteqi(n, ist("content-length"))) {
818 					ret = h1_parse_cont_len_header(h1m, &v);
819 
820 					if (ret < 0) {
821 						state = H1_MSG_HDR_L2_LWS;
822 						ptr = v.ptr; /* Set ptr on the error */
823 						goto http_msg_invalid;
824 					}
825 					else if (ret == 0) {
826 						/* skip it */
827 						break;
828 					}
829 				}
830 				else if (isteqi(n, ist("connection"))) {
831 					h1_parse_connection_header(h1m, &v);
832 					if (!v.len) {
833 						/* skip it */
834 						break;
835 					}
836 				}
837 				else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
838 					if (host_idx == -1) {
839 						struct ist authority;
840 
841 						authority = http_get_authority(sl.rq.u, 1);
842 						if (authority.len && !isteqi(v, authority)) {
843 							if (h1m->err_pos < -1) {
844 								state = H1_MSG_HDR_L2_LWS;
845 								ptr = v.ptr; /* Set ptr on the error */
846 								goto http_msg_invalid;
847 							}
848 							if (h1m->err_pos == -1) /* capture the error pointer */
849 								h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
850 						}
851 						host_idx = hdr_count;
852 					}
853 					else {
854 						if (!isteqi(v, hdr[host_idx].v)) {
855 							state = H1_MSG_HDR_L2_LWS;
856 							ptr = v.ptr; /* Set ptr on the error */
857 							goto http_msg_invalid;
858 						}
859 						/* if the same host, skip it */
860 						break;
861 					}
862 				}
863 
864 				http_set_hdr(&hdr[hdr_count++], n, v);
865 			} while (0);
866 		}
867 
868 		sol = ptr - start;
869 
870 		if (likely(!HTTP_IS_CRLF(*ptr)))
871 			goto http_msg_hdr_name;
872 
873 		if (likely(*ptr == '\r'))
874 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
875 		goto http_msg_last_lf;
876 
877 	case H1_MSG_LAST_LF:
878 	http_msg_last_lf:
879 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
880 		ptr++;
881 		/* <ptr> now points to the first byte of payload. If needed sol
882 		 * still points to the first of either CR or LF of the empty
883 		 * line ending the headers block.
884 		 */
885 		if (likely(!skip_update)) {
886 			if (unlikely(hdr_count >= hdr_num)) {
887 				state = H1_MSG_LAST_LF;
888 				goto http_output_full;
889 			}
890 			http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
891 		}
892 
893 		/* reaching here we've parsed the whole message. We may detect
894 		 * that we were already continuing an interrupted parsing pass
895 		 * so we were silently looking for the end of message not
896 		 * updating anything before deciding to parse it fully at once.
897 		 * It's guaranteed that we won't match this test twice in a row
898 		 * since restarting will turn zero.
899 		 */
900 		if (restarting)
901 			goto restart;
902 
903 		state = H1_MSG_DATA;
904 		if (h1m->flags & H1_MF_XFER_ENC) {
905 			if (h1m->flags & H1_MF_CLEN) {
906 				h1m->flags &= ~H1_MF_CLEN;
907 				hdr_count = http_del_hdr(hdr, ist("content-length"));
908 			}
909 
910 			if (h1m->flags & H1_MF_CHNK)
911 				state = H1_MSG_CHUNK_SIZE;
912 			else if (!(h1m->flags & H1_MF_RESP)) {
913 				/* cf RFC7230#3.3.3 : transfer-encoding in
914 				 * request without chunked encoding is invalid.
915 				 */
916 				goto http_msg_invalid;
917 			}
918 		}
919 
920 		break;
921 
922 	default:
923 		/* impossible states */
924 		goto http_msg_invalid;
925 	}
926 
927 	/* Now we've left the headers state and are either in H1_MSG_DATA or
928 	 * H1_MSG_CHUNK_SIZE.
929 	 */
930 
931 	if (slp && !skip_update)
932 		*slp = sl;
933 
934 	h1m->state = state;
935 	h1m->next  = ptr - start + skip;
936 	return h1m->next;
937 
938  http_msg_ood:
939 	/* out of data at <ptr> during state <state> */
940 	if (slp && !skip_update)
941 		*slp = sl;
942 
943 	h1m->state = state;
944 	h1m->next  = ptr - start + skip;
945 	return 0;
946 
947  http_msg_invalid:
948 	/* invalid message, error at <ptr> */
949 	if (slp && !skip_update)
950 		*slp = sl;
951 
952 	h1m->err_state = h1m->state = state;
953 	h1m->err_pos   = h1m->next  = ptr - start + skip;
954 	return -1;
955 
956  http_output_full:
957 	/* no more room to store the current header, error at <ptr> */
958 	if (slp && !skip_update)
959 		*slp = sl;
960 
961 	h1m->err_state = h1m->state = state;
962 	h1m->err_pos   = h1m->next  = ptr - start + skip;
963 	return -2;
964 
965  restart:
966 	h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
967 	h1m->curr_len = h1m->body_len = h1m->next  = 0;
968 	if (h1m->flags & H1_MF_RESP)
969 		h1m->state = H1_MSG_RPBEFORE;
970 	else
971 		h1m->state = H1_MSG_RQBEFORE;
972 	goto try_again;
973 }
974 
975 /* This function performs a very minimal parsing of the trailers block present
976  * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
977  * bytes to delete to skip the trailers. It may return 0 if it's missing some
978  * input data, or < 0 in case of parse error (in which case the caller may have
979  * to decide how to proceed, possibly eating everything).
980  */
h1_measure_trailers(const struct buffer * buf,unsigned int ofs,unsigned int max)981 int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
982 {
983 	const char *stop = b_peek(buf, ofs + max);
984 	int count = ofs;
985 
986 	while (1) {
987 		const char *p1 = NULL, *p2 = NULL;
988 		const char *start = b_peek(buf, count);
989 		const char *ptr   = start;
990 
991 		/* scan current line and stop at LF or CRLF */
992 		while (1) {
993 			if (ptr == stop)
994 				return 0;
995 
996 			if (*ptr == '\n') {
997 				if (!p1)
998 					p1 = ptr;
999 				p2 = ptr;
1000 				break;
1001 			}
1002 
1003 			if (*ptr == '\r') {
1004 				if (p1)
1005 					return -1;
1006 				p1 = ptr;
1007 			}
1008 
1009 			ptr = b_next(buf, ptr);
1010 		}
1011 
1012 		/* after LF; point to beginning of next line */
1013 		p2 = b_next(buf, p2);
1014 		count += b_dist(buf, start, p2);
1015 
1016 		/* LF/CRLF at beginning of line => end of trailers at p2.
1017 		 * Everything was scheduled for forwarding, there's nothing left
1018 		 * from this message. */
1019 		if (p1 == start)
1020 			break;
1021 		/* OK, next line then */
1022 	}
1023 	return count - ofs;
1024 }
1025