1 /*-
2 * Copyright (c) 2006 Verdens Gang AS
3 * Copyright (c) 2006-2015 Varnish Software AS
4 * All rights reserved.
5 *
6 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7 *
8 * SPDX-License-Identifier: BSD-2-Clause
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * HTTP protocol requests
32 *
33 * The trouble with the "until magic sequence" design of HTTP protocol messages
34 * is that either you have to read a single character at a time, which is
35 * inefficient, or you risk reading too much, and pre-read some of the object,
36 * or even the next pipelined request, which follows the one you want.
37 *
38 * HTC reads a HTTP protocol header into a workspace, subject to limits,
39 * and stops when we see the magic marker (double [CR]NL), and if we overshoot,
40 * it keeps track of the "pipelined" data.
41 *
42 * We use this both for client and backend connections.
43 */
44
45 #include "config.h"
46
47 #include "cache/cache_varnishd.h"
48 #include "cache/cache_transport.h"
49
50 #include "cache_http1.h"
51
52 #include "vct.h"
53
54 const int HTTP1_Req[3] = {
55 HTTP_HDR_METHOD, HTTP_HDR_URL, HTTP_HDR_PROTO
56 };
57
58 const int HTTP1_Resp[3] = {
59 HTTP_HDR_PROTO, HTTP_HDR_STATUS, HTTP_HDR_REASON
60 };
61
62 /*--------------------------------------------------------------------
63 * Check if we have a complete HTTP request or response yet
64 */
65
v_matchproto_(htc_complete_f)66 enum htc_status_e v_matchproto_(htc_complete_f)
67 HTTP1_Complete(struct http_conn *htc)
68 {
69 char *p;
70 enum htc_status_e retval;
71
72 CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
73 AN(WS_Reservation(htc->ws));
74 assert(pdiff(htc->rxbuf_b, htc->rxbuf_e) <= WS_ReservationSize(htc->ws));
75
76 /* Skip any leading white space */
77 for (p = htc->rxbuf_b ; p < htc->rxbuf_e && vct_islws(*p); p++)
78 continue;
79 if (p == htc->rxbuf_e)
80 return (HTC_S_EMPTY);
81
82 /* Do not return a partial H2 connection preface */
83 retval = H2_prism_complete(htc);
84 if (retval != HTC_S_JUNK)
85 return (retval);
86
87 /*
88 * Here we just look for NL[CR]NL to see that reception
89 * is completed. More stringent validation happens later.
90 */
91 while (1) {
92 p = memchr(p, '\n', htc->rxbuf_e - p);
93 if (p == NULL)
94 return (HTC_S_MORE);
95 if (++p == htc->rxbuf_e)
96 return (HTC_S_MORE);
97 if (*p == '\r' && ++p == htc->rxbuf_e)
98 return (HTC_S_MORE);
99 if (*p == '\n')
100 break;
101 }
102 return (HTC_S_COMPLETE);
103 }
104
105 /*--------------------------------------------------------------------
106 * Dissect the headers of the HTTP protocol message.
107 * Detect conditionals (headers which start with '^[Ii][Ff]-')
108 */
109
110 static uint16_t
http1_dissect_hdrs(struct http * hp,char * p,struct http_conn * htc,unsigned maxhdr)111 http1_dissect_hdrs(struct http *hp, char *p, struct http_conn *htc,
112 unsigned maxhdr)
113 {
114 char *q, *r, *s;
115 int i;
116
117 assert(p > htc->rxbuf_b);
118 assert(p <= htc->rxbuf_e);
119 hp->nhd = HTTP_HDR_FIRST;
120 r = NULL; /* For FlexeLint */
121 for (; p < htc->rxbuf_e; p = r) {
122
123 /* Find end of next header */
124 q = r = p;
125 if (vct_iscrlf(p, htc->rxbuf_e))
126 break;
127 while (r < htc->rxbuf_e) {
128 if (vct_ishdrval(*r)) {
129 r++;
130 continue;
131 }
132 i = vct_iscrlf(r, htc->rxbuf_e);
133 if (i == 0) {
134 VSLb(hp->vsl, SLT_BogoHeader,
135 "Header has ctrl char 0x%02x", *r);
136 return (400);
137 }
138 q = r;
139 r += i;
140 assert(r <= htc->rxbuf_e);
141 if (r == htc->rxbuf_e)
142 break;
143 if (vct_iscrlf(r, htc->rxbuf_e))
144 break;
145 /* If line does not continue: got it. */
146 if (!vct_issp(*r))
147 break;
148
149 /* Clear line continuation LWS to spaces */
150 while (q < r)
151 *q++ = ' ';
152 while (q < htc->rxbuf_e && vct_issp(*q))
153 *q++ = ' ';
154 }
155
156 /* Empty header = end of headers */
157 if (p == q)
158 break;
159
160 if (q - p > maxhdr) {
161 VSLb(hp->vsl, SLT_BogoHeader, "Header too long: %.*s",
162 (int)(q - p > 20 ? 20 : q - p), p);
163 return (400);
164 }
165
166 if (vct_islws(*p)) {
167 VSLb(hp->vsl, SLT_BogoHeader,
168 "1st header has white space: %.*s",
169 (int)(q - p > 20 ? 20 : q - p), p);
170 return (400);
171 }
172
173 if (*p == ':') {
174 VSLb(hp->vsl, SLT_BogoHeader,
175 "Missing header name: %.*s",
176 (int)(q - p > 20 ? 20 : q - p), p);
177 return (400);
178 }
179
180 while (q > p && vct_issp(q[-1]))
181 q--;
182 *q = '\0';
183
184 for (s = p; *s != ':' && s < q; s++) {
185 if (!vct_istchar(*s)) {
186 VSLb(hp->vsl, SLT_BogoHeader,
187 "Illegal char 0x%02x in header name", *s);
188 return (400);
189 }
190 }
191 if (*s != ':') {
192 VSLb(hp->vsl, SLT_BogoHeader, "Header without ':' %.*s",
193 (int)(q - p > 20 ? 20 : q - p), p);
194 return (400);
195 }
196
197 if (hp->nhd < hp->shd) {
198 hp->hdf[hp->nhd] = 0;
199 hp->hd[hp->nhd].b = p;
200 hp->hd[hp->nhd].e = q;
201 hp->nhd++;
202 } else {
203 VSLb(hp->vsl, SLT_BogoHeader, "Too many headers: %.*s",
204 (int)(q - p > 20 ? 20 : q - p), p);
205 return (400);
206 }
207 }
208 i = vct_iscrlf(p, htc->rxbuf_e);
209 assert(i > 0); /* HTTP1_Complete guarantees this */
210 p += i;
211 HTC_RxPipeline(htc, p);
212 htc->rxbuf_e = p;
213 return (0);
214 }
215
216 /*--------------------------------------------------------------------
217 * Deal with first line of HTTP protocol message.
218 */
219
220 static uint16_t
http1_splitline(struct http * hp,struct http_conn * htc,const int * hf,unsigned maxhdr)221 http1_splitline(struct http *hp, struct http_conn *htc, const int *hf,
222 unsigned maxhdr)
223 {
224 char *p, *q;
225 int i;
226
227 assert(hf == HTTP1_Req || hf == HTTP1_Resp);
228 CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
229 CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
230 assert(htc->rxbuf_e >= htc->rxbuf_b);
231
232 AZ(hp->hd[hf[0]].b);
233 AZ(hp->hd[hf[1]].b);
234 AZ(hp->hd[hf[2]].b);
235
236 /* Skip leading LWS */
237 for (p = htc->rxbuf_b ; vct_islws(*p); p++)
238 continue;
239 hp->hd[hf[0]].b = p;
240
241 /* First field cannot contain SP or CTL */
242 for (; !vct_issp(*p); p++) {
243 if (vct_isctl(*p))
244 return (400);
245 }
246 hp->hd[hf[0]].e = p;
247 assert(Tlen(hp->hd[hf[0]]));
248 *p++ = '\0';
249
250 /* Skip SP */
251 for (; vct_issp(*p); p++) {
252 if (vct_isctl(*p))
253 return (400);
254 }
255 hp->hd[hf[1]].b = p;
256
257 /* Second field cannot contain LWS or CTL */
258 for (; !vct_islws(*p); p++) {
259 if (vct_isctl(*p))
260 return (400);
261 }
262 hp->hd[hf[1]].e = p;
263 if (!Tlen(hp->hd[hf[1]]))
264 return (400);
265
266 /* Skip SP */
267 q = p;
268 for (; vct_issp(*p); p++) {
269 if (vct_isctl(*p))
270 return (400);
271 }
272 if (q < p)
273 *q = '\0'; /* Nul guard for the 2nd field. If q == p
274 * (the third optional field is not
275 * present), the last nul guard will
276 * cover this field. */
277
278 /* Third field is optional and cannot contain CTL except TAB */
279 q = p;
280 for (; p < htc->rxbuf_e && !vct_iscrlf(p, htc->rxbuf_e); p++) {
281 if (vct_isctl(*p) && !vct_issp(*p))
282 return (400);
283 }
284 if (p > q) {
285 hp->hd[hf[2]].b = q;
286 hp->hd[hf[2]].e = p;
287 }
288
289 /* Skip CRLF */
290 i = vct_iscrlf(p, htc->rxbuf_e);
291 if (!i)
292 return (400);
293 *p = '\0';
294 p += i;
295
296 http_Proto(hp);
297
298 return (http1_dissect_hdrs(hp, p, htc, maxhdr));
299 }
300
301 /*--------------------------------------------------------------------*/
302
303 static body_status_t
http1_body_status(const struct http * hp,struct http_conn * htc,int request)304 http1_body_status(const struct http *hp, struct http_conn *htc, int request)
305 {
306 ssize_t cl;
307 const char *b;
308
309 CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
310 CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
311
312 htc->content_length = -1;
313
314 cl = http_GetContentLength(hp);
315 if (cl == -2)
316 return (BS_ERROR);
317 if (http_GetHdr(hp, H_Transfer_Encoding, &b)) {
318 if (strcasecmp(b, "chunked"))
319 return (BS_ERROR);
320 if (cl != -1) {
321 /*
322 * RFC7230 3.3.3 allows more lenient handling
323 * but we're going to be strict.
324 */
325 return (BS_ERROR);
326 }
327 return (BS_CHUNKED);
328 }
329 if (cl >= 0) {
330 htc->content_length = cl;
331 return (cl == 0 ? BS_NONE : BS_LENGTH);
332 }
333
334 if (hp->protover == 11 && request)
335 return (BS_NONE);
336
337 if (http_HdrIs(hp, H_Connection, "keep-alive")) {
338 /*
339 * Keep alive with neither TE=Chunked or C-Len is impossible.
340 * We assume a zero length body.
341 */
342 return (BS_NONE);
343 }
344
345 /*
346 * Fall back to EOF transfer.
347 */
348 return (BS_EOF);
349 }
350
351 /*--------------------------------------------------------------------*/
352
353 uint16_t
HTTP1_DissectRequest(struct http_conn * htc,struct http * hp)354 HTTP1_DissectRequest(struct http_conn *htc, struct http *hp)
355 {
356 uint16_t retval;
357 const char *p;
358 const char *b = NULL, *e;
359
360 CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
361 CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
362
363 retval = http1_splitline(hp, htc,
364 HTTP1_Req, cache_param->http_req_hdr_len);
365 if (retval != 0)
366 return (retval);
367
368 if (hp->protover < 10 || hp->protover > 11)
369 return (400);
370
371 if (http_CountHdr(hp, H_Host) > 1)
372 return (400);
373
374 if (http_CountHdr(hp, H_Content_Length) > 1)
375 return (400);
376
377 /* RFC2616, section 5.2, point 1 */
378 if (!strncasecmp(hp->hd[HTTP_HDR_URL].b, "http://", 7))
379 b = hp->hd[HTTP_HDR_URL].b + 7;
380 else if (FEATURE(FEATURE_HTTPS_SCHEME) &&
381 !strncasecmp(hp->hd[HTTP_HDR_URL].b, "https://", 8))
382 b = hp->hd[HTTP_HDR_URL].b + 8;
383 if (b) {
384 e = strchr(b, '/');
385 if (e) {
386 http_Unset(hp, H_Host);
387 http_PrintfHeader(hp, "Host: %.*s", (int)(e - b), b);
388 hp->hd[HTTP_HDR_URL].b = e;
389 }
390 }
391
392 htc->body_status = http1_body_status(hp, htc, 1);
393 if (htc->body_status == BS_ERROR)
394 return (400);
395
396 p = http_GetMethod(hp);
397 AN(p);
398
399 if (htc->body_status == BS_EOF) {
400 assert(hp->protover == 10);
401 /* RFC1945 8.3 p32 and D.1.1 p58 */
402 if (!strcasecmp(p, "post") || !strcasecmp(p, "put"))
403 return (400);
404 htc->body_status = BS_NONE;
405 }
406
407 /* HEAD with a body is a hard error */
408 if (htc->body_status != BS_NONE && !strcasecmp(p, "head"))
409 return (400);
410
411 return (retval);
412 }
413
414 /*--------------------------------------------------------------------*/
415
416 uint16_t
HTTP1_DissectResponse(struct http_conn * htc,struct http * hp,const struct http * rhttp)417 HTTP1_DissectResponse(struct http_conn *htc, struct http *hp,
418 const struct http *rhttp)
419 {
420 uint16_t retval = 0;
421 const char *p;
422
423 CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
424 CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC);
425 CHECK_OBJ_NOTNULL(rhttp, HTTP_MAGIC);
426
427 if (http1_splitline(hp, htc,
428 HTTP1_Resp, cache_param->http_resp_hdr_len))
429 retval = 503;
430
431 if (retval == 0 && hp->protover < 10)
432 retval = 503;
433
434 if (retval == 0 && hp->protover > rhttp->protover)
435 http_SetH(hp, HTTP_HDR_PROTO, rhttp->hd[HTTP_HDR_PROTO].b);
436
437 if (retval == 0 && Tlen(hp->hd[HTTP_HDR_STATUS]) != 3)
438 retval = 503;
439
440 if (retval == 0) {
441 p = hp->hd[HTTP_HDR_STATUS].b;
442
443 if (p[0] >= '1' && p[0] <= '9' &&
444 p[1] >= '0' && p[1] <= '9' &&
445 p[2] >= '0' && p[2] <= '9')
446 hp->status =
447 100 * (p[0] - '0') + 10 * (p[1] - '0') + p[2] - '0';
448 else
449 retval = 503;
450 }
451
452 if (retval != 0) {
453 VSLb(hp->vsl, SLT_HttpGarbage, "%.*s",
454 (int)(htc->rxbuf_e - htc->rxbuf_b), htc->rxbuf_b);
455 assert(retval >= 100 && retval <= 999);
456 assert(retval == 503);
457 http_SetStatus(hp, 503, NULL);
458 }
459
460 if (hp->hd[HTTP_HDR_REASON].b == NULL ||
461 !Tlen(hp->hd[HTTP_HDR_REASON])) {
462 http_SetH(hp, HTTP_HDR_REASON,
463 http_Status2Reason(hp->status, NULL));
464 }
465
466 htc->body_status = http1_body_status(hp, htc, 0);
467
468 return (retval);
469 }
470
471 /*--------------------------------------------------------------------*/
472
473 static unsigned
http1_WrTxt(const struct worker * wrk,const txt * hh,const char * suf)474 http1_WrTxt(const struct worker *wrk, const txt *hh, const char *suf)
475 {
476 unsigned u;
477
478 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
479 AN(wrk);
480 AN(hh);
481 AN(hh->b);
482 AN(hh->e);
483 u = V1L_Write(wrk, hh->b, hh->e - hh->b);
484 if (suf != NULL)
485 u += V1L_Write(wrk, suf, -1);
486 return (u);
487 }
488
489 unsigned
HTTP1_Write(const struct worker * w,const struct http * hp,const int * hf)490 HTTP1_Write(const struct worker *w, const struct http *hp, const int *hf)
491 {
492 unsigned u, l;
493
494 assert(hf == HTTP1_Req || hf == HTTP1_Resp);
495 AN(hp->hd[hf[0]].b);
496 AN(hp->hd[hf[1]].b);
497 AN(hp->hd[hf[2]].b);
498 l = http1_WrTxt(w, &hp->hd[hf[0]], " ");
499 l += http1_WrTxt(w, &hp->hd[hf[1]], " ");
500 l += http1_WrTxt(w, &hp->hd[hf[2]], "\r\n");
501
502 for (u = HTTP_HDR_FIRST; u < hp->nhd; u++)
503 l += http1_WrTxt(w, &hp->hd[u], "\r\n");
504 l += V1L_Write(w, "\r\n", -1);
505 return (l);
506 }
507