1 /*
2  * HTTP/1 protocol analyzer
3  *
4  * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <ctype.h>
14 #include <haproxy/api.h>
15 #include <haproxy/h1.h>
16 #include <haproxy/http-hdr.h>
17 
18 /* Parse the Content-Length header field of an HTTP/1 request. The function
19  * checks all possible occurrences of a comma-delimited value, and verifies
20  * if any of them doesn't match a previous value. It returns <0 if a value
21  * differs, 0 if the whole header can be dropped (i.e. already known), or >0
22  * if the value can be indexed (first one). In the last case, the value might
23  * be adjusted and the caller must only add the updated value.
24  */
h1_parse_cont_len_header(struct h1m * h1m,struct ist * value)25 int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
26 {
27 	char *e, *n;
28 	long long cl;
29 	int not_first = !!(h1m->flags & H1_MF_CLEN);
30 	struct ist word;
31 
32 	word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
33 	e = value->ptr + value->len;
34 
35 	while (++word.ptr < e) {
36 		/* skip leading delimiter and blanks */
37 		if (unlikely(HTTP_IS_LWS(*word.ptr)))
38 			continue;
39 
40 		/* digits only now */
41 		for (cl = 0, n = word.ptr; n < e; n++) {
42 			unsigned int c = *n - '0';
43 			if (unlikely(c > 9)) {
44 				/* non-digit */
45 				if (unlikely(n == word.ptr)) // spaces only
46 					goto fail;
47 				break;
48 			}
49 			if (unlikely(cl > ULLONG_MAX / 10ULL))
50 				goto fail; /* multiply overflow */
51 			cl = cl * 10ULL;
52 			if (unlikely(cl + c < cl))
53 				goto fail; /* addition overflow */
54 			cl = cl + c;
55 		}
56 
57 		/* keep a copy of the exact cleaned value */
58 		word.len = n - word.ptr;
59 
60 		/* skip trailing LWS till next comma or EOL */
61 		for (; n < e; n++) {
62 			if (!HTTP_IS_LWS(*n)) {
63 				if (unlikely(*n != ','))
64 					goto fail;
65 				break;
66 			}
67 		}
68 
69 		/* if duplicate, must be equal */
70 		if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
71 			goto fail;
72 
73 		/* OK, store this result as the one to be indexed */
74 		h1m->flags |= H1_MF_CLEN;
75 		h1m->curr_len = h1m->body_len = cl;
76 		*value = word;
77 		word.ptr = n;
78 	}
79 	/* here we've reached the end with a single value or a series of
80 	 * identical values, all matching previous series if any. The last
81 	 * parsed value was sent back into <value>. We just have to decide
82 	 * if this occurrence has to be indexed (it's the first one) or
83 	 * silently skipped (it's not the first one)
84 	 */
85 	return !not_first;
86  fail:
87 	return -1;
88 }
89 
90 /* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
91  * "chunked" being the last value, and setting H1_MF_CHNK in h1m->flags only in
92  * this case. Any other token found or any empty header field found will reset
93  * this flag, so that it accurately represents the token's presence at the last
94  * position. The H1_MF_XFER_ENC flag is always set. Note that transfer codings
95  * are case-insensitive (cf RFC7230#4).
96  */
h1_parse_xfer_enc_header(struct h1m * h1m,struct ist value)97 void h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
98 {
99 	char *e, *n;
100 	struct ist word;
101 
102 	h1m->flags |= H1_MF_XFER_ENC;
103 	h1m->flags &= ~H1_MF_CHNK;
104 
105 	word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
106 	e = value.ptr + value.len;
107 
108 	while (++word.ptr < e) {
109 		/* skip leading delimiter and blanks */
110 		if (HTTP_IS_LWS(*word.ptr))
111 			continue;
112 
113 		n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
114 		word.len = n - word.ptr;
115 
116 		/* trim trailing blanks */
117 		while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
118 			word.len--;
119 
120 		h1m->flags &= ~H1_MF_CHNK;
121 		if (isteqi(word, ist("chunked")))
122 			h1m->flags |= H1_MF_CHNK;
123 
124 		word.ptr = n;
125 	}
126 }
127 
128 /* Parse the Connection: header of an HTTP/1 request, looking for "close",
129  * "keep-alive", and "upgrade" values, and updating h1m->flags according to
130  * what was found there. Note that flags are only added, not removed, so the
131  * function is safe for being called multiple times if multiple occurrences
132  * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
133  * up from "keep-alive" and "close" values. To do so, the header value is
134  * rewritten in place and its length is updated.
135  */
h1_parse_connection_header(struct h1m * h1m,struct ist * value)136 void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
137 {
138 	char *e, *n, *p;
139 	struct ist word;
140 
141 	word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
142 	p = value->ptr;
143 	e = value->ptr + value->len;
144 	if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
145 		value->len = 0;
146 
147 	while (++word.ptr < e) {
148 		/* skip leading delimiter and blanks */
149 		if (HTTP_IS_LWS(*word.ptr))
150 			continue;
151 
152 		n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
153 		word.len = n - word.ptr;
154 
155 		/* trim trailing blanks */
156 		while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
157 			word.len--;
158 
159 		if (isteqi(word, ist("keep-alive"))) {
160 			h1m->flags |= H1_MF_CONN_KAL;
161 			if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
162 				goto skip_val;
163 		}
164 		else if (isteqi(word, ist("close"))) {
165 			h1m->flags |= H1_MF_CONN_CLO;
166 			if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
167 				goto skip_val;
168 		}
169 		else if (isteqi(word, ist("upgrade")))
170 			h1m->flags |= H1_MF_CONN_UPG;
171 
172 		if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
173 			if (value->ptr + value->len == p) {
174 				/* no rewrite done till now */
175 				value->len = n - value->ptr;
176 			}
177 			else {
178 				if (value->len)
179 					value->ptr[value->len++] = ',';
180 				istcat(value, word, e - value->ptr);
181 			}
182 		}
183 
184 	  skip_val:
185 		word.ptr = p = n;
186 	}
187 }
188 
189 /* Macros used in the HTTP/1 parser, to check for the expected presence of
190  * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
191  */
192 
193 /* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
194  * <bad>.
195  */
196 #define EXPECT_LF_HERE(ptr, bad, state, where)                  \
197 	do {                                                    \
198 		if (unlikely(*(ptr) != '\n')) {                 \
199 			state = (where);                        \
200 			goto bad;                               \
201 		}                                               \
202 	} while (0)
203 
204 /* Increments pointer <ptr>, continues to label <more> if it's still below
205  * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
206  * of buffer was reached.
207  */
208 #define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where)        \
209 	do {                                                              \
210 		if (likely(++(ptr) < (end)))                              \
211 			goto more;                                        \
212 		else {                                                    \
213 			state = (where);                                  \
214 			goto stop;                                        \
215 		}                                                         \
216 	} while (0)
217 
218 /* This function parses a contiguous HTTP/1 headers block starting at <start>
219  * and ending before <stop>, at once, and converts it a list of (name,value)
220  * pairs representing header fields into the array <hdr> of size <hdr_num>,
221  * whose last entry will have an empty name and an empty value. If <hdr_num> is
222  * too small to represent the whole message, an error is returned. Some
223  * protocol elements such as content-length and transfer-encoding will be
224  * parsed and stored into h1m as well. <hdr> may be null, in which case only
225  * the parsing state will be updated. This may be used to restart the parsing
226  * where it stopped for example.
227  *
228  * For now it's limited to the response. If the header block is incomplete,
229  * 0 is returned, waiting to be called again with more data to try it again.
230  * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
231  * and h1m->next to zero on the first call, the parser will do the rest. If
232  * an incomplete message is seen, the caller only needs to present h1m->state
233  * and h1m->next again, with an empty header list so that the parser can start
234  * again. In this case, it will detect that it interrupted a previous session
235  * and will first look for the end of the message before reparsing it again and
236  * indexing it at the same time. This ensures that incomplete messages fed 1
237  * character at a time are never processed entirely more than exactly twice,
238  * and that there is no need to store all the internal state and pre-parsed
239  * headers or start line between calls.
240  *
241  * A pointer to a start line descriptor may be passed in <slp>, in which case
242  * the parser will fill it with whatever it found.
243  *
244  * The code derived from the main HTTP/1 parser above but was simplified and
245  * optimized to process responses produced or forwarded by haproxy. The caller
246  * is responsible for ensuring that the message doesn't wrap, and should ensure
247  * it is complete to avoid having to retry the operation after a failed
248  * attempt. The message is not supposed to be invalid, which is why a few
249  * properties such as the character set used in the header field names are not
250  * checked. In case of an unparsable response message, a negative value will be
251  * returned with h1m->err_pos and h1m->err_state matching the location and
252  * state where the error was met. Leading blank likes are tolerated but not
253  * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
254  * parsed and the start line is skipped. It is not required to set h1m->state
255  * nor h1m->next in this case.
256  *
257  * This function returns :
258  *    -1 in case of error. In this case, h1m->err_state is filled (if h1m is
259  *       set) with the state the error occurred in and h1m->err_pos with the
260  *       the position relative to <start>
261  *    -2 if the output is full (hdr_num reached). err_state and err_pos also
262  *       indicate where it failed.
263  *     0 in case of missing data.
264  *   > 0 on success, it then corresponds to the number of bytes read since
265  *       <start> so that the caller can go on with the payload.
266  */
h1_headers_to_hdr_list(char * start,const char * stop,struct http_hdr * hdr,unsigned int hdr_num,struct h1m * h1m,union h1_sl * slp)267 int h1_headers_to_hdr_list(char *start, const char *stop,
268                            struct http_hdr *hdr, unsigned int hdr_num,
269                            struct h1m *h1m, union h1_sl *slp)
270 {
271 	enum h1m_state state;
272 	register char *ptr;
273 	register const char *end;
274 	unsigned int hdr_count;
275 	unsigned int skip; /* number of bytes skipped at the beginning */
276 	unsigned int sol;  /* start of line */
277 	unsigned int col;  /* position of the colon */
278 	unsigned int eol;  /* end of line */
279 	unsigned int sov;  /* start of value */
280 	union h1_sl sl;
281 	int skip_update;
282 	int restarting;
283 	int host_idx;
284 	struct ist n, v;       /* header name and value during parsing */
285 
286 	skip = 0; // do it only once to keep track of the leading CRLF.
287 
288  try_again:
289 	hdr_count = sol = col = eol = sov = 0;
290 	sl.st.status = 0;
291 	skip_update = restarting = 0;
292 	host_idx = -1;
293 
294 	if (h1m->flags & H1_MF_HDRS_ONLY) {
295 		state = H1_MSG_HDR_FIRST;
296 		h1m->next = 0;
297 	}
298 	else {
299 		state = h1m->state;
300 		if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
301 			restarting = 1;
302 	}
303 
304 	ptr   = start + h1m->next;
305 	end   = stop;
306 
307 	if (unlikely(ptr >= end))
308 		goto http_msg_ood;
309 
310 	/* don't update output if hdr is NULL or if we're restarting */
311 	if (!hdr || restarting)
312 		skip_update = 1;
313 
314 	switch (state)	{
315 	case H1_MSG_RQBEFORE:
316 	http_msg_rqbefore:
317 		if (likely(HTTP_IS_TOKEN(*ptr))) {
318 			/* we have a start of message, we may have skipped some
319 			 * heading CRLF. Skip them now.
320 			 */
321 			skip += ptr - start;
322 			start = ptr;
323 
324 			sol = 0;
325 			sl.rq.m.ptr = ptr;
326 			hdr_count = 0;
327 			state = H1_MSG_RQMETH;
328 			goto http_msg_rqmeth;
329 		}
330 
331 		if (unlikely(!HTTP_IS_CRLF(*ptr))) {
332 			state = H1_MSG_RQBEFORE;
333 			goto http_msg_invalid;
334 		}
335 
336 		if (unlikely(*ptr == '\n'))
337 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
338 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
339 		/* stop here */
340 
341 	case H1_MSG_RQBEFORE_CR:
342 	http_msg_rqbefore_cr:
343 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
344 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
345 		/* stop here */
346 
347 	case H1_MSG_RQMETH:
348 	http_msg_rqmeth:
349 		if (likely(HTTP_IS_TOKEN(*ptr)))
350 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
351 
352 		if (likely(HTTP_IS_SPHT(*ptr))) {
353 			sl.rq.m.len = ptr - sl.rq.m.ptr;
354 			sl.rq.meth = find_http_meth(start, sl.rq.m.len);
355 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
356 		}
357 
358 		if (likely(HTTP_IS_CRLF(*ptr))) {
359 			/* HTTP 0.9 request */
360 			sl.rq.m.len = ptr - sl.rq.m.ptr;
361 			sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
362 		http_msg_req09_uri:
363 			sl.rq.u.ptr = ptr;
364 		http_msg_req09_uri_e:
365 			sl.rq.u.len = ptr - sl.rq.u.ptr;
366 		http_msg_req09_ver:
367 			sl.rq.v.ptr = ptr;
368 			sl.rq.v.len = 0;
369 			goto http_msg_rqline_eol;
370 		}
371 		state = H1_MSG_RQMETH;
372 		goto http_msg_invalid;
373 
374 	case H1_MSG_RQMETH_SP:
375 	http_msg_rqmeth_sp:
376 		if (likely(!HTTP_IS_LWS(*ptr))) {
377 			sl.rq.u.ptr = ptr;
378 			goto http_msg_rquri;
379 		}
380 		if (likely(HTTP_IS_SPHT(*ptr)))
381 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
382 		/* so it's a CR/LF, meaning an HTTP 0.9 request */
383 		goto http_msg_req09_uri;
384 
385 	case H1_MSG_RQURI:
386 	http_msg_rquri:
387 #ifdef HA_UNALIGNED_LE
388 		/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
389 		while (ptr <= end - sizeof(int)) {
390 			int x = *(int *)ptr - 0x21212121;
391 			if (x & 0x80808080)
392 				break;
393 
394 			x -= 0x5e5e5e5e;
395 			if (!(x & 0x80808080))
396 				break;
397 
398 			ptr += sizeof(int);
399 		}
400 #endif
401 		if (ptr >= end) {
402 			state = H1_MSG_RQURI;
403 			goto http_msg_ood;
404 		}
405 	http_msg_rquri2:
406 		if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
407 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
408 
409 		if (likely(HTTP_IS_SPHT(*ptr))) {
410 			sl.rq.u.len = ptr - sl.rq.u.ptr;
411 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
412 		}
413 		if (likely((unsigned char)*ptr >= 128)) {
414 			/* non-ASCII chars are forbidden unless option
415 			 * accept-invalid-http-request is enabled in the frontend.
416 			 * In any case, we capture the faulty char.
417 			 */
418 			if (h1m->err_pos < -1)
419 				goto invalid_char;
420 			if (h1m->err_pos == -1)
421 				h1m->err_pos = ptr - start + skip;
422 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
423 		}
424 
425 		if (likely(HTTP_IS_CRLF(*ptr))) {
426 			/* so it's a CR/LF, meaning an HTTP 0.9 request */
427 			goto http_msg_req09_uri_e;
428 		}
429 
430 		/* OK forbidden chars, 0..31 or 127 */
431 	invalid_char:
432 		state = H1_MSG_RQURI;
433 		goto http_msg_invalid;
434 
435 	case H1_MSG_RQURI_SP:
436 	http_msg_rquri_sp:
437 		if (likely(!HTTP_IS_LWS(*ptr))) {
438 			sl.rq.v.ptr = ptr;
439 			goto http_msg_rqver;
440 		}
441 		if (likely(HTTP_IS_SPHT(*ptr)))
442 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
443 		/* so it's a CR/LF, meaning an HTTP 0.9 request */
444 		goto http_msg_req09_ver;
445 
446 
447 	case H1_MSG_RQVER:
448 	http_msg_rqver:
449 		if (likely(HTTP_IS_VER_TOKEN(*ptr)))
450 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
451 
452 		if (likely(HTTP_IS_CRLF(*ptr))) {
453 			sl.rq.v.len = ptr - sl.rq.v.ptr;
454 		http_msg_rqline_eol:
455 			/* We have seen the end of line. Note that we do not
456 			 * necessarily have the \n yet, but at least we know that we
457 			 * have EITHER \r OR \n, otherwise the request would not be
458 			 * complete. We can then record the request length and return
459 			 * to the caller which will be able to register it.
460 			 */
461 
462 			if (likely(!skip_update)) {
463 				if ((sl.rq.v.len == 8) &&
464 				    (*(sl.rq.v.ptr + 5) > '1' ||
465 				     (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
466 					h1m->flags |= H1_MF_VER_11;
467 
468 				if (unlikely(hdr_count >= hdr_num)) {
469 					state = H1_MSG_RQVER;
470 					goto http_output_full;
471 				}
472 				if (!(h1m->flags & H1_MF_NO_PHDR))
473 					http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
474 
475 				if (unlikely(hdr_count >= hdr_num)) {
476 					state = H1_MSG_RQVER;
477 					goto http_output_full;
478 				}
479 				if (!(h1m->flags & H1_MF_NO_PHDR))
480 					http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
481 			}
482 
483 			sol = ptr - start;
484 			if (likely(*ptr == '\r'))
485 				EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
486 			goto http_msg_rqline_end;
487 		}
488 
489 		/* neither an HTTP_VER token nor a CRLF */
490 		state = H1_MSG_RQVER;
491 		goto http_msg_invalid;
492 
493 	case H1_MSG_RQLINE_END:
494 	http_msg_rqline_end:
495 		/* check for HTTP/0.9 request : no version information
496 		 * available. sol must point to the first of CR or LF. However
497 		 * since we don't save these elements between calls, if we come
498 		 * here from a restart, we don't necessarily know. Thus in this
499 		 * case we simply start over.
500 		 */
501 		if (restarting)
502 			goto restart;
503 
504 		if (unlikely(sl.rq.v.len == 0))
505 			goto http_msg_last_lf;
506 
507 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
508 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
509 		/* stop here */
510 
511 	/*
512 	 * Common states below
513 	 */
514 	case H1_MSG_RPBEFORE:
515 	http_msg_rpbefore:
516 		if (likely(HTTP_IS_TOKEN(*ptr))) {
517 			/* we have a start of message, we may have skipped some
518 			 * heading CRLF. Skip them now.
519 			 */
520 			skip += ptr - start;
521 			start = ptr;
522 
523 			sol = 0;
524 			sl.st.v.ptr = ptr;
525 			hdr_count = 0;
526 			state = H1_MSG_RPVER;
527 			goto http_msg_rpver;
528 		}
529 
530 		if (unlikely(!HTTP_IS_CRLF(*ptr))) {
531 			state = H1_MSG_RPBEFORE;
532 			goto http_msg_invalid;
533 		}
534 
535 		if (unlikely(*ptr == '\n'))
536 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
537 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
538 		/* stop here */
539 
540 	case H1_MSG_RPBEFORE_CR:
541 	http_msg_rpbefore_cr:
542 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
543 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
544 		/* stop here */
545 
546 	case H1_MSG_RPVER:
547 	http_msg_rpver:
548 		if (likely(HTTP_IS_VER_TOKEN(*ptr)))
549 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
550 
551 		if (likely(HTTP_IS_SPHT(*ptr))) {
552 			sl.st.v.len = ptr - sl.st.v.ptr;
553 
554 			if ((sl.st.v.len == 8) &&
555 			    (*(sl.st.v.ptr + 5) > '1' ||
556 			     (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
557 				h1m->flags |= H1_MF_VER_11;
558 
559 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
560 		}
561 		state = H1_MSG_RPVER;
562 		goto http_msg_invalid;
563 
564 	case H1_MSG_RPVER_SP:
565 	http_msg_rpver_sp:
566 		if (likely(!HTTP_IS_LWS(*ptr))) {
567 			sl.st.status = 0;
568 			sl.st.c.ptr = ptr;
569 			goto http_msg_rpcode;
570 		}
571 		if (likely(HTTP_IS_SPHT(*ptr)))
572 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
573 		/* so it's a CR/LF, this is invalid */
574 		state = H1_MSG_RPVER_SP;
575 		goto http_msg_invalid;
576 
577 	case H1_MSG_RPCODE:
578 	http_msg_rpcode:
579 		if (likely(HTTP_IS_DIGIT(*ptr))) {
580 			sl.st.status = sl.st.status * 10 + *ptr - '0';
581 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
582 		}
583 
584 		if (unlikely(!HTTP_IS_LWS(*ptr))) {
585 			state = H1_MSG_RPCODE;
586 			goto http_msg_invalid;
587 		}
588 
589 		if (likely(HTTP_IS_SPHT(*ptr))) {
590 			sl.st.c.len = ptr - sl.st.c.ptr;
591 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
592 		}
593 
594 		/* so it's a CR/LF, so there is no reason phrase */
595 		sl.st.c.len = ptr - sl.st.c.ptr;
596 
597 	http_msg_rsp_reason:
598 		sl.st.r.ptr = ptr;
599 		sl.st.r.len = 0;
600 		goto http_msg_rpline_eol;
601 
602 	case H1_MSG_RPCODE_SP:
603 	http_msg_rpcode_sp:
604 		if (likely(!HTTP_IS_LWS(*ptr))) {
605 			sl.st.r.ptr = ptr;
606 			goto http_msg_rpreason;
607 		}
608 		if (likely(HTTP_IS_SPHT(*ptr)))
609 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
610 		/* so it's a CR/LF, so there is no reason phrase */
611 		goto http_msg_rsp_reason;
612 
613 	case H1_MSG_RPREASON:
614 	http_msg_rpreason:
615 		if (likely(!HTTP_IS_CRLF(*ptr)))
616 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
617 		sl.st.r.len = ptr - sl.st.r.ptr;
618 	http_msg_rpline_eol:
619 		/* We have seen the end of line. Note that we do not
620 		 * necessarily have the \n yet, but at least we know that we
621 		 * have EITHER \r OR \n, otherwise the response would not be
622 		 * complete. We can then record the response length and return
623 		 * to the caller which will be able to register it.
624 		 */
625 
626 		if (likely(!skip_update)) {
627 			if (unlikely(hdr_count >= hdr_num)) {
628 				state = H1_MSG_RPREASON;
629 				goto http_output_full;
630 			}
631 			if (!(h1m->flags & H1_MF_NO_PHDR))
632 				http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
633 		}
634 
635 		sol = ptr - start;
636 		if (likely(*ptr == '\r'))
637 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
638 		goto http_msg_rpline_end;
639 
640 	case H1_MSG_RPLINE_END:
641 	http_msg_rpline_end:
642 		/* sol must point to the first of CR or LF. */
643 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
644 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
645 		/* stop here */
646 
647 	case H1_MSG_HDR_FIRST:
648 	http_msg_hdr_first:
649 		sol = ptr - start;
650 		if (likely(!HTTP_IS_CRLF(*ptr))) {
651 			goto http_msg_hdr_name;
652 		}
653 
654 		if (likely(*ptr == '\r'))
655 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
656 		goto http_msg_last_lf;
657 
658 	case H1_MSG_HDR_NAME:
659 	http_msg_hdr_name:
660 		/* assumes sol points to the first char */
661 		if (likely(HTTP_IS_TOKEN(*ptr))) {
662 			if (!skip_update) {
663 				/* turn it to lower case if needed */
664 				if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
665 					*ptr = tolower((unsigned char)*ptr);
666 			}
667 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
668 		}
669 
670 		if (likely(*ptr == ':')) {
671 			col = ptr - start;
672 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
673 		}
674 
675 		if (likely(h1m->err_pos < -1) || *ptr == '\n') {
676 			state = H1_MSG_HDR_NAME;
677 			goto http_msg_invalid;
678 		}
679 
680 		if (h1m->err_pos == -1) /* capture the error pointer */
681 			h1m->err_pos = ptr - start + skip; /* >= 0 now */
682 
683 		/* and we still accept this non-token character */
684 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
685 
686 	case H1_MSG_HDR_L1_SP:
687 	http_msg_hdr_l1_sp:
688 		/* assumes sol points to the first char */
689 		if (likely(HTTP_IS_SPHT(*ptr)))
690 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
691 
692 		/* header value can be basically anything except CR/LF */
693 		sov = ptr - start;
694 
695 		if (likely(!HTTP_IS_CRLF(*ptr))) {
696 			goto http_msg_hdr_val;
697 		}
698 
699 		if (likely(*ptr == '\r'))
700 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
701 		goto http_msg_hdr_l1_lf;
702 
703 	case H1_MSG_HDR_L1_LF:
704 	http_msg_hdr_l1_lf:
705 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
706 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
707 
708 	case H1_MSG_HDR_L1_LWS:
709 	http_msg_hdr_l1_lws:
710 		if (likely(HTTP_IS_SPHT(*ptr))) {
711 			if (!skip_update) {
712 				/* replace HT,CR,LF with spaces */
713 				for (; start + sov < ptr; sov++)
714 					start[sov] = ' ';
715 			}
716 			goto http_msg_hdr_l1_sp;
717 		}
718 		/* we had a header consisting only in spaces ! */
719 		eol = sov;
720 		goto http_msg_complete_header;
721 
722 	case H1_MSG_HDR_VAL:
723 	http_msg_hdr_val:
724 		/* assumes sol points to the first char, and sov
725 		 * points to the first character of the value.
726 		 */
727 
728 		/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
729 		 * and lower. In fact since most of the time is spent in the loop, we
730 		 * also remove the sign bit test so that bytes 0x8e..0x0d break the
731 		 * loop, but we don't care since they're very rare in header values.
732 		 */
733 #ifdef HA_UNALIGNED_LE64
734 		while (ptr <= end - sizeof(long)) {
735 			if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
736 				goto http_msg_hdr_val2;
737 			ptr += sizeof(long);
738 		}
739 #endif
740 #ifdef HA_UNALIGNED_LE
741 		while (ptr <= end - sizeof(int)) {
742 			if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
743 				goto http_msg_hdr_val2;
744 			ptr += sizeof(int);
745 		}
746 #endif
747 		if (ptr >= end) {
748 			state = H1_MSG_HDR_VAL;
749 			goto http_msg_ood;
750 		}
751 	http_msg_hdr_val2:
752 		if (likely(!HTTP_IS_CRLF(*ptr)))
753 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
754 
755 		eol = ptr - start;
756 		/* Note: we could also copy eol into ->eoh so that we have the
757 		 * real header end in case it ends with lots of LWS, but is this
758 		 * really needed ?
759 		 */
760 		if (likely(*ptr == '\r'))
761 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
762 		goto http_msg_hdr_l2_lf;
763 
764 	case H1_MSG_HDR_L2_LF:
765 	http_msg_hdr_l2_lf:
766 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
767 		EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
768 
769 	case H1_MSG_HDR_L2_LWS:
770 	http_msg_hdr_l2_lws:
771 		if (unlikely(HTTP_IS_SPHT(*ptr))) {
772 			if (!skip_update) {
773 				/* LWS: replace HT,CR,LF with spaces */
774 				for (; start + eol < ptr; eol++)
775 					start[eol] = ' ';
776 			}
777 			goto http_msg_hdr_val;
778 		}
779 	http_msg_complete_header:
780 		/*
781 		 * It was a new header, so the last one is finished. Assumes
782 		 * <sol> points to the first char of the name, <col> to the
783 		 * colon, <sov> points to the first character of the value and
784 		 * <eol> to the first CR or LF so we know how the line ends. We
785 		 * will trim spaces around the value. It's possible to do it by
786 		 * adjusting <eol> and <sov> which are no more used after this.
787 		 * We can add the header field to the list.
788 		 */
789 		if (likely(!skip_update)) {
790 			while (sov < eol && HTTP_IS_LWS(start[sov]))
791 				sov++;
792 
793 			while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
794 				eol--;
795 
796 
797 			n = ist2(start + sol, col - sol);
798 			v = ist2(start + sov, eol - sov);
799 
800 			do {
801 				int ret;
802 
803 				if (unlikely(hdr_count >= hdr_num)) {
804 					state = H1_MSG_HDR_L2_LWS;
805 					goto http_output_full;
806 				}
807 
808 				if (isteqi(n, ist("transfer-encoding"))) {
809 					h1_parse_xfer_enc_header(h1m, v);
810 				}
811 				else if (isteqi(n, ist("content-length"))) {
812 					ret = h1_parse_cont_len_header(h1m, &v);
813 
814 					if (ret < 0) {
815 						state = H1_MSG_HDR_L2_LWS;
816 						ptr = v.ptr; /* Set ptr on the error */
817 						goto http_msg_invalid;
818 					}
819 					else if (ret == 0) {
820 						/* skip it */
821 						break;
822 					}
823 				}
824 				else if (isteqi(n, ist("connection"))) {
825 					h1_parse_connection_header(h1m, &v);
826 					if (!v.len) {
827 						/* skip it */
828 						break;
829 					}
830 				}
831 				else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
832 					if (host_idx == -1) {
833 						struct ist authority;
834 
835 						authority = http_get_authority(sl.rq.u, 1);
836 						if (authority.len && !isteqi(v, authority)) {
837 							if (h1m->err_pos < -1) {
838 								state = H1_MSG_HDR_L2_LWS;
839 								ptr = v.ptr; /* Set ptr on the error */
840 								goto http_msg_invalid;
841 							}
842 							if (h1m->err_pos == -1) /* capture the error pointer */
843 								h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
844 						}
845 						host_idx = hdr_count;
846 					}
847 					else {
848 						if (!isteqi(v, hdr[host_idx].v)) {
849 							state = H1_MSG_HDR_L2_LWS;
850 							ptr = v.ptr; /* Set ptr on the error */
851 							goto http_msg_invalid;
852 						}
853 						/* if the same host, skip it */
854 						break;
855 					}
856 				}
857 
858 				http_set_hdr(&hdr[hdr_count++], n, v);
859 			} while (0);
860 		}
861 
862 		sol = ptr - start;
863 
864 		if (likely(!HTTP_IS_CRLF(*ptr)))
865 			goto http_msg_hdr_name;
866 
867 		if (likely(*ptr == '\r'))
868 			EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
869 		goto http_msg_last_lf;
870 
871 	case H1_MSG_LAST_LF:
872 	http_msg_last_lf:
873 		EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
874 		ptr++;
875 		/* <ptr> now points to the first byte of payload. If needed sol
876 		 * still points to the first of either CR or LF of the empty
877 		 * line ending the headers block.
878 		 */
879 		if (likely(!skip_update)) {
880 			if (unlikely(hdr_count >= hdr_num)) {
881 				state = H1_MSG_LAST_LF;
882 				goto http_output_full;
883 			}
884 			http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
885 		}
886 
887 		/* reaching here we've parsed the whole message. We may detect
888 		 * that we were already continuing an interrupted parsing pass
889 		 * so we were silently looking for the end of message not
890 		 * updating anything before deciding to parse it fully at once.
891 		 * It's guaranteed that we won't match this test twice in a row
892 		 * since restarting will turn zero.
893 		 */
894 		if (restarting)
895 			goto restart;
896 
897 		state = H1_MSG_DATA;
898 		if (h1m->flags & H1_MF_XFER_ENC) {
899 			if (h1m->flags & H1_MF_CLEN) {
900 				h1m->flags &= ~H1_MF_CLEN;
901 				hdr_count = http_del_hdr(hdr, ist("content-length"));
902 			}
903 
904 			if (h1m->flags & H1_MF_CHNK)
905 				state = H1_MSG_CHUNK_SIZE;
906 			else if (!(h1m->flags & H1_MF_RESP)) {
907 				/* cf RFC7230#3.3.3 : transfer-encoding in
908 				 * request without chunked encoding is invalid.
909 				 */
910 				goto http_msg_invalid;
911 			}
912 		}
913 
914 		break;
915 
916 	default:
917 		/* impossible states */
918 		goto http_msg_invalid;
919 	}
920 
921 	/* Now we've left the headers state and are either in H1_MSG_DATA or
922 	 * H1_MSG_CHUNK_SIZE.
923 	 */
924 
925 	if (slp && !skip_update)
926 		*slp = sl;
927 
928 	h1m->state = state;
929 	h1m->next  = ptr - start + skip;
930 	return h1m->next;
931 
932  http_msg_ood:
933 	/* out of data at <ptr> during state <state> */
934 	if (slp && !skip_update)
935 		*slp = sl;
936 
937 	h1m->state = state;
938 	h1m->next  = ptr - start + skip;
939 	return 0;
940 
941  http_msg_invalid:
942 	/* invalid message, error at <ptr> */
943 	if (slp && !skip_update)
944 		*slp = sl;
945 
946 	h1m->err_state = h1m->state = state;
947 	h1m->err_pos   = h1m->next  = ptr - start + skip;
948 	return -1;
949 
950  http_output_full:
951 	/* no more room to store the current header, error at <ptr> */
952 	if (slp && !skip_update)
953 		*slp = sl;
954 
955 	h1m->err_state = h1m->state = state;
956 	h1m->err_pos   = h1m->next  = ptr - start + skip;
957 	return -2;
958 
959  restart:
960 	h1m->flags &= ~(H1_MF_VER_11|H1_MF_CLEN|H1_MF_XFER_ENC|H1_MF_CHNK|H1_MF_CONN_KAL|H1_MF_CONN_CLO|H1_MF_CONN_UPG);
961 	h1m->curr_len = h1m->body_len = h1m->next  = 0;
962 	if (h1m->flags & H1_MF_RESP)
963 		h1m->state = H1_MSG_RPBEFORE;
964 	else
965 		h1m->state = H1_MSG_RQBEFORE;
966 	goto try_again;
967 }
968 
969 /* This function performs a very minimal parsing of the trailers block present
970  * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
971  * bytes to delete to skip the trailers. It may return 0 if it's missing some
972  * input data, or < 0 in case of parse error (in which case the caller may have
973  * to decide how to proceed, possibly eating everything).
974  */
h1_measure_trailers(const struct buffer * buf,unsigned int ofs,unsigned int max)975 int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
976 {
977 	const char *stop = b_peek(buf, ofs + max);
978 	int count = ofs;
979 
980 	while (1) {
981 		const char *p1 = NULL, *p2 = NULL;
982 		const char *start = b_peek(buf, count);
983 		const char *ptr   = start;
984 
985 		/* scan current line and stop at LF or CRLF */
986 		while (1) {
987 			if (ptr == stop)
988 				return 0;
989 
990 			if (*ptr == '\n') {
991 				if (!p1)
992 					p1 = ptr;
993 				p2 = ptr;
994 				break;
995 			}
996 
997 			if (*ptr == '\r') {
998 				if (p1)
999 					return -1;
1000 				p1 = ptr;
1001 			}
1002 
1003 			ptr = b_next(buf, ptr);
1004 		}
1005 
1006 		/* after LF; point to beginning of next line */
1007 		p2 = b_next(buf, p2);
1008 		count += b_dist(buf, start, p2);
1009 
1010 		/* LF/CRLF at beginning of line => end of trailers at p2.
1011 		 * Everything was scheduled for forwarding, there's nothing left
1012 		 * from this message. */
1013 		if (p1 == start)
1014 			break;
1015 		/* OK, next line then */
1016 	}
1017 	return count - ofs;
1018 }
1019