1 /*-
2  * Copyright (c) 2006 Verdens Gang AS
3  * Copyright (c) 2006-2015 Varnish Software AS
4  * All rights reserved.
5  *
6  * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7  *
8  * SPDX-License-Identifier: BSD-2-Clause
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * HTTP protocol requests
32  *
33  * The trouble with the "until magic sequence" design of HTTP protocol messages
34  * is that either you have to read a single character at a time, which is
35  * inefficient, or you risk reading too much, and pre-read some of the object,
36  * or even the next pipelined request, which follows the one you want.
37  *
38  * HTC reads a HTTP protocol header into a workspace, subject to limits,
39  * and stops when we see the magic marker (double [CR]NL), and if we overshoot,
40  * it keeps track of the "pipelined" data.
41  *
42  * We use this both for client and backend connections.
43  */
44 
45 #include "config.h"
46 
47 #include "cache/cache_varnishd.h"
48 #include "cache/cache_transport.h"
49 
50 #include "cache_http1.h"
51 
52 #include "vct.h"
53 
54 const int HTTP1_Req[3] = {
55 	HTTP_HDR_METHOD, HTTP_HDR_URL, HTTP_HDR_PROTO
56 };
57 
58 const int HTTP1_Resp[3] = {
59 	HTTP_HDR_PROTO, HTTP_HDR_STATUS, HTTP_HDR_REASON
60 };
61 
62 /*--------------------------------------------------------------------
63  * Check if we have a complete HTTP request or response yet
64  */
65 
v_matchproto_(htc_complete_f)66 enum htc_status_e v_matchproto_(htc_complete_f)
67 HTTP1_Complete(struct http_conn *htc)
68 {
69 	char *p;
70 	enum htc_status_e retval;
71 
72 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
73 	AN(WS_Reservation(htc->ws));
74 	assert(pdiff(htc->rxbuf_b, htc->rxbuf_e) <= WS_ReservationSize(htc->ws));
75 
76 	/* Skip any leading white space */
77 	for (p = htc->rxbuf_b ; p < htc->rxbuf_e && vct_islws(*p); p++)
78 		continue;
79 	if (p == htc->rxbuf_e)
80 		return (HTC_S_EMPTY);
81 
82 	/* Do not return a partial H2 connection preface */
83 	retval = H2_prism_complete(htc);
84 	if (retval != HTC_S_JUNK)
85 		return (retval);
86 
87 	/*
88 	 * Here we just look for NL[CR]NL to see that reception
89 	 * is completed.  More stringent validation happens later.
90 	 */
91 	while (1) {
92 		p = memchr(p, '\n', htc->rxbuf_e - p);
93 		if (p == NULL)
94 			return (HTC_S_MORE);
95 		if (++p == htc->rxbuf_e)
96 			return (HTC_S_MORE);
97 		if (*p == '\r' && ++p == htc->rxbuf_e)
98 			return (HTC_S_MORE);
99 		if (*p == '\n')
100 			break;
101 	}
102 	return (HTC_S_COMPLETE);
103 }
104 
105 /*--------------------------------------------------------------------
106  * Dissect the headers of the HTTP protocol message.
107  * Detect conditionals (headers which start with '^[Ii][Ff]-')
108  */
109 
110 static uint16_t
http1_dissect_hdrs(struct http * hp,char * p,struct http_conn * htc,unsigned maxhdr)111 http1_dissect_hdrs(struct http *hp, char *p, struct http_conn *htc,
112     unsigned maxhdr)
113 {
114 	char *q, *r, *s;
115 	int i;
116 
117 	assert(p > htc->rxbuf_b);
118 	assert(p <= htc->rxbuf_e);
119 	hp->nhd = HTTP_HDR_FIRST;
120 	r = NULL;		/* For FlexeLint */
121 	for (; p < htc->rxbuf_e; p = r) {
122 
123 		/* Find end of next header */
124 		q = r = p;
125 		if (vct_iscrlf(p, htc->rxbuf_e))
126 			break;
127 		while (r < htc->rxbuf_e) {
128 			if (vct_ishdrval(*r)) {
129 				r++;
130 				continue;
131 			}
132 			i = vct_iscrlf(r, htc->rxbuf_e);
133 			if (i == 0) {
134 				VSLb(hp->vsl, SLT_BogoHeader,
135 				    "Header has ctrl char 0x%02x", *r);
136 				return (400);
137 			}
138 			q = r;
139 			r += i;
140 			assert(r <= htc->rxbuf_e);
141 			if (r == htc->rxbuf_e)
142 				break;
143 			if (vct_iscrlf(r, htc->rxbuf_e))
144 				break;
145 			/* If line does not continue: got it. */
146 			if (!vct_issp(*r))
147 				break;
148 
149 			/* Clear line continuation LWS to spaces */
150 			while (q < r)
151 				*q++ = ' ';
152 			while (q < htc->rxbuf_e && vct_issp(*q))
153 				*q++ = ' ';
154 		}
155 
156 		/* Empty header = end of headers */
157 		if (p == q)
158 			break;
159 
160 		if (q - p > maxhdr) {
161 			VSLb(hp->vsl, SLT_BogoHeader, "Header too long: %.*s",
162 			    (int)(q - p > 20 ? 20 : q - p), p);
163 			return (400);
164 		}
165 
166 		if (vct_islws(*p)) {
167 			VSLb(hp->vsl, SLT_BogoHeader,
168 			    "1st header has white space: %.*s",
169 			    (int)(q - p > 20 ? 20 : q - p), p);
170 			return (400);
171 		}
172 
173 		if (*p == ':') {
174 			VSLb(hp->vsl, SLT_BogoHeader,
175 			    "Missing header name: %.*s",
176 			    (int)(q - p > 20 ? 20 : q - p), p);
177 			return (400);
178 		}
179 
180 		while (q > p && vct_issp(q[-1]))
181 			q--;
182 		*q = '\0';
183 
184 		for (s = p; *s != ':' && s < q; s++) {
185 			if (!vct_istchar(*s)) {
186 				VSLb(hp->vsl, SLT_BogoHeader,
187 				    "Illegal char 0x%02x in header name", *s);
188 				return (400);
189 			}
190 		}
191 		if (*s != ':') {
192 			VSLb(hp->vsl, SLT_BogoHeader, "Header without ':' %.*s",
193 			    (int)(q - p > 20 ? 20 : q - p), p);
194 			return (400);
195 		}
196 
197 		if (hp->nhd < hp->shd) {
198 			hp->hdf[hp->nhd] = 0;
199 			hp->hd[hp->nhd].b = p;
200 			hp->hd[hp->nhd].e = q;
201 			hp->nhd++;
202 		} else {
203 			VSLb(hp->vsl, SLT_BogoHeader, "Too many headers: %.*s",
204 			    (int)(q - p > 20 ? 20 : q - p), p);
205 			return (400);
206 		}
207 	}
208 	i = vct_iscrlf(p, htc->rxbuf_e);
209 	assert(i > 0);		/* HTTP1_Complete guarantees this */
210 	p += i;
211 	HTC_RxPipeline(htc, p);
212 	htc->rxbuf_e = p;
213 	return (0);
214 }
215 
216 /*--------------------------------------------------------------------
217  * Deal with first line of HTTP protocol message.
218  */
219 
220 static uint16_t
http1_splitline(struct http * hp,struct http_conn * htc,const int * hf,unsigned maxhdr)221 http1_splitline(struct http *hp, struct http_conn *htc, const int *hf,
222     unsigned maxhdr)
223 {
224 	char *p, *q;
225 	int i;
226 
227 	assert(hf == HTTP1_Req || hf == HTTP1_Resp);
228 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
229 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
230 	assert(htc->rxbuf_e >= htc->rxbuf_b);
231 
232 	AZ(hp->hd[hf[0]].b);
233 	AZ(hp->hd[hf[1]].b);
234 	AZ(hp->hd[hf[2]].b);
235 
236 	/* Skip leading LWS */
237 	for (p = htc->rxbuf_b ; vct_islws(*p); p++)
238 		continue;
239 	hp->hd[hf[0]].b = p;
240 
241 	/* First field cannot contain SP or CTL */
242 	for (; !vct_issp(*p); p++) {
243 		if (vct_isctl(*p))
244 			return (400);
245 	}
246 	hp->hd[hf[0]].e = p;
247 	assert(Tlen(hp->hd[hf[0]]));
248 	*p++ = '\0';
249 
250 	/* Skip SP */
251 	for (; vct_issp(*p); p++) {
252 		if (vct_isctl(*p))
253 			return (400);
254 	}
255 	hp->hd[hf[1]].b = p;
256 
257 	/* Second field cannot contain LWS or CTL */
258 	for (; !vct_islws(*p); p++) {
259 		if (vct_isctl(*p))
260 			return (400);
261 	}
262 	hp->hd[hf[1]].e = p;
263 	if (!Tlen(hp->hd[hf[1]]))
264 		return (400);
265 
266 	/* Skip SP */
267 	q = p;
268 	for (; vct_issp(*p); p++) {
269 		if (vct_isctl(*p))
270 			return (400);
271 	}
272 	if (q < p)
273 		*q = '\0';	/* Nul guard for the 2nd field. If q == p
274 				 * (the third optional field is not
275 				 * present), the last nul guard will
276 				 * cover this field. */
277 
278 	/* Third field is optional and cannot contain CTL except TAB */
279 	q = p;
280 	for (; p < htc->rxbuf_e && !vct_iscrlf(p, htc->rxbuf_e); p++) {
281 		if (vct_isctl(*p) && !vct_issp(*p))
282 			return (400);
283 	}
284 	if (p > q) {
285 		hp->hd[hf[2]].b = q;
286 		hp->hd[hf[2]].e = p;
287 	}
288 
289 	/* Skip CRLF */
290 	i = vct_iscrlf(p, htc->rxbuf_e);
291 	if (!i)
292 		return (400);
293 	*p = '\0';
294 	p += i;
295 
296 	http_Proto(hp);
297 
298 	return (http1_dissect_hdrs(hp, p, htc, maxhdr));
299 }
300 
301 /*--------------------------------------------------------------------*/
302 
303 static body_status_t
http1_body_status(const struct http * hp,struct http_conn * htc,int request)304 http1_body_status(const struct http *hp, struct http_conn *htc, int request)
305 {
306 	ssize_t cl;
307 	const char *b;
308 
309 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
310 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
311 
312 	htc->content_length = -1;
313 
314 	cl = http_GetContentLength(hp);
315 	if (cl == -2)
316 		return (BS_ERROR);
317 	if (http_GetHdr(hp, H_Transfer_Encoding, &b)) {
318 		if (strcasecmp(b, "chunked"))
319 			return (BS_ERROR);
320 		if (cl != -1) {
321 			/*
322 			 * RFC7230 3.3.3 allows more lenient handling
323 			 * but we're going to be strict.
324 			 */
325 			return (BS_ERROR);
326 		}
327 		return (BS_CHUNKED);
328 	}
329 	if (cl >= 0) {
330 		htc->content_length = cl;
331 		return (cl == 0 ? BS_NONE : BS_LENGTH);
332 	}
333 
334 	if (hp->protover == 11 && request)
335 		return (BS_NONE);
336 
337 	if (http_HdrIs(hp, H_Connection, "keep-alive")) {
338 		/*
339 		 * Keep alive with neither TE=Chunked or C-Len is impossible.
340 		 * We assume a zero length body.
341 		 */
342 		return (BS_NONE);
343 	}
344 
345 	/*
346 	 * Fall back to EOF transfer.
347 	 */
348 	return (BS_EOF);
349 }
350 
351 /*--------------------------------------------------------------------*/
352 
353 uint16_t
HTTP1_DissectRequest(struct http_conn * htc,struct http * hp)354 HTTP1_DissectRequest(struct http_conn *htc, struct http *hp)
355 {
356 	uint16_t retval;
357 	const char *p;
358 	const char *b = NULL, *e;
359 
360 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
361 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
362 
363 	retval = http1_splitline(hp, htc,
364 	    HTTP1_Req, cache_param->http_req_hdr_len);
365 	if (retval != 0)
366 		return (retval);
367 
368 	if (hp->protover < 10 || hp->protover > 11)
369 		return (400);
370 
371 	if (http_CountHdr(hp, H_Host) > 1)
372 		return (400);
373 
374 	if (http_CountHdr(hp, H_Content_Length) > 1)
375 		return (400);
376 
377 	/* RFC2616, section 5.2, point 1 */
378 	if (!strncasecmp(hp->hd[HTTP_HDR_URL].b, "http://", 7))
379 		b = hp->hd[HTTP_HDR_URL].b + 7;
380 	else if (FEATURE(FEATURE_HTTPS_SCHEME) &&
381 	    !strncasecmp(hp->hd[HTTP_HDR_URL].b, "https://", 8))
382 		b = hp->hd[HTTP_HDR_URL].b + 8;
383 	if (b) {
384 		e = strchr(b, '/');
385 		if (e) {
386 			http_Unset(hp, H_Host);
387 			http_PrintfHeader(hp, "Host: %.*s", (int)(e - b), b);
388 			hp->hd[HTTP_HDR_URL].b = e;
389 		}
390 	}
391 
392 	htc->body_status = http1_body_status(hp, htc, 1);
393 	if (htc->body_status == BS_ERROR)
394 		return (400);
395 
396 	p = http_GetMethod(hp);
397 	AN(p);
398 
399 	if (htc->body_status == BS_EOF) {
400 		assert(hp->protover == 10);
401 		/* RFC1945 8.3 p32 and D.1.1 p58 */
402 		if (!strcasecmp(p, "post") || !strcasecmp(p, "put"))
403 			return (400);
404 		htc->body_status = BS_NONE;
405 	}
406 
407 	/* HEAD with a body is a hard error */
408 	if (htc->body_status != BS_NONE && !strcasecmp(p, "head"))
409 		return (400);
410 
411 	return (retval);
412 }
413 
414 /*--------------------------------------------------------------------*/
415 
416 uint16_t
HTTP1_DissectResponse(struct http_conn * htc,struct http * hp,const struct http * rhttp)417 HTTP1_DissectResponse(struct http_conn *htc, struct http *hp,
418     const struct http *rhttp)
419 {
420 	uint16_t retval = 0;
421 	const char *p;
422 
423 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
424 	CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
425 	CHECK_OBJ_NOTNULL(rhttp, HTTP_MAGIC);
426 
427 	if (http1_splitline(hp, htc,
428 	    HTTP1_Resp, cache_param->http_resp_hdr_len))
429 		retval = 503;
430 
431 	if (retval == 0 && hp->protover < 10)
432 		retval = 503;
433 
434 	if (retval == 0 && hp->protover > rhttp->protover)
435 		http_SetH(hp, HTTP_HDR_PROTO, rhttp->hd[HTTP_HDR_PROTO].b);
436 
437 	if (retval == 0 && Tlen(hp->hd[HTTP_HDR_STATUS]) != 3)
438 		retval = 503;
439 
440 	if (retval == 0) {
441 		p = hp->hd[HTTP_HDR_STATUS].b;
442 
443 		if (p[0] >= '1' && p[0] <= '9' &&
444 		    p[1] >= '0' && p[1] <= '9' &&
445 		    p[2] >= '0' && p[2] <= '9')
446 			hp->status =
447 			    100 * (p[0] - '0') + 10 * (p[1] - '0') + p[2] - '0';
448 		else
449 			retval = 503;
450 	}
451 
452 	if (retval != 0) {
453 		VSLb(hp->vsl, SLT_HttpGarbage, "%.*s",
454 		    (int)(htc->rxbuf_e - htc->rxbuf_b), htc->rxbuf_b);
455 		assert(retval >= 100 && retval <= 999);
456 		assert(retval == 503);
457 		http_SetStatus(hp, 503, NULL);
458 	}
459 
460 	if (hp->hd[HTTP_HDR_REASON].b == NULL ||
461 	    !Tlen(hp->hd[HTTP_HDR_REASON])) {
462 		http_SetH(hp, HTTP_HDR_REASON,
463 		    http_Status2Reason(hp->status, NULL));
464 	}
465 
466 	htc->body_status = http1_body_status(hp, htc, 0);
467 
468 	return (retval);
469 }
470 
471 /*--------------------------------------------------------------------*/
472 
473 static unsigned
http1_WrTxt(const struct worker * wrk,const txt * hh,const char * suf)474 http1_WrTxt(const struct worker *wrk, const txt *hh, const char *suf)
475 {
476 	unsigned u;
477 
478 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
479 	AN(wrk);
480 	AN(hh);
481 	AN(hh->b);
482 	AN(hh->e);
483 	u = V1L_Write(wrk, hh->b, hh->e - hh->b);
484 	if (suf != NULL)
485 		u += V1L_Write(wrk, suf, -1);
486 	return (u);
487 }
488 
489 unsigned
HTTP1_Write(const struct worker * w,const struct http * hp,const int * hf)490 HTTP1_Write(const struct worker *w, const struct http *hp, const int *hf)
491 {
492 	unsigned u, l;
493 
494 	assert(hf == HTTP1_Req || hf == HTTP1_Resp);
495 	AN(hp->hd[hf[0]].b);
496 	AN(hp->hd[hf[1]].b);
497 	AN(hp->hd[hf[2]].b);
498 	l = http1_WrTxt(w, &hp->hd[hf[0]], " ");
499 	l += http1_WrTxt(w, &hp->hd[hf[1]], " ");
500 	l += http1_WrTxt(w, &hp->hd[hf[2]], "\r\n");
501 
502 	for (u = HTTP_HDR_FIRST; u < hp->nhd; u++)
503 		l += http1_WrTxt(w, &hp->hd[u], "\r\n");
504 	l += V1L_Write(w, "\r\n", -1);
505 	return (l);
506 }
507