1 /*
2  * HTTP semantics
3  *
4  * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <ctype.h>
14 #include <common/config.h>
15 #include <common/http.h>
16 #include <common/standard.h>
17 
18 /* It is about twice as fast on recent architectures to lookup a byte in a
19  * table than to perform a boolean AND or OR between two tests. Refer to
20  * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
21  * neither a separator nor a CTL char. An http ver_token is any ASCII which can
22  * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
23  * digit. Note: please do not overwrite values in assignment since gcc-2.95
24  * will not handle them correctly. It's worth noting that chars 128..255 are
25  * nothing, not even control chars.
26  */
27 const unsigned char http_char_classes[256] = {
28 	[  0] = HTTP_FLG_CTL,
29 	[  1] = HTTP_FLG_CTL,
30 	[  2] = HTTP_FLG_CTL,
31 	[  3] = HTTP_FLG_CTL,
32 	[  4] = HTTP_FLG_CTL,
33 	[  5] = HTTP_FLG_CTL,
34 	[  6] = HTTP_FLG_CTL,
35 	[  7] = HTTP_FLG_CTL,
36 	[  8] = HTTP_FLG_CTL,
37 	[  9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
38 	[ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
39 	[ 11] = HTTP_FLG_CTL,
40 	[ 12] = HTTP_FLG_CTL,
41 	[ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
42 	[ 14] = HTTP_FLG_CTL,
43 	[ 15] = HTTP_FLG_CTL,
44 	[ 16] = HTTP_FLG_CTL,
45 	[ 17] = HTTP_FLG_CTL,
46 	[ 18] = HTTP_FLG_CTL,
47 	[ 19] = HTTP_FLG_CTL,
48 	[ 20] = HTTP_FLG_CTL,
49 	[ 21] = HTTP_FLG_CTL,
50 	[ 22] = HTTP_FLG_CTL,
51 	[ 23] = HTTP_FLG_CTL,
52 	[ 24] = HTTP_FLG_CTL,
53 	[ 25] = HTTP_FLG_CTL,
54 	[ 26] = HTTP_FLG_CTL,
55 	[ 27] = HTTP_FLG_CTL,
56 	[ 28] = HTTP_FLG_CTL,
57 	[ 29] = HTTP_FLG_CTL,
58 	[ 30] = HTTP_FLG_CTL,
59 	[ 31] = HTTP_FLG_CTL,
60 	[' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
61 	['!'] = HTTP_FLG_TOK,
62 	['"'] = HTTP_FLG_SEP,
63 	['#'] = HTTP_FLG_TOK,
64 	['$'] = HTTP_FLG_TOK,
65 	['%'] = HTTP_FLG_TOK,
66 	['&'] = HTTP_FLG_TOK,
67 	[ 39] = HTTP_FLG_TOK,
68 	['('] = HTTP_FLG_SEP,
69 	[')'] = HTTP_FLG_SEP,
70 	['*'] = HTTP_FLG_TOK,
71 	['+'] = HTTP_FLG_TOK,
72 	[','] = HTTP_FLG_SEP,
73 	['-'] = HTTP_FLG_TOK,
74 	['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
75 	['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
76 	['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
77 	['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
78 	['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
79 	['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
80 	['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
81 	['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
82 	['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
83 	['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
84 	['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
85 	['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
86 	[':'] = HTTP_FLG_SEP,
87 	[';'] = HTTP_FLG_SEP,
88 	['<'] = HTTP_FLG_SEP,
89 	['='] = HTTP_FLG_SEP,
90 	['>'] = HTTP_FLG_SEP,
91 	['?'] = HTTP_FLG_SEP,
92 	['@'] = HTTP_FLG_SEP,
93 	['A'] = HTTP_FLG_TOK,
94 	['B'] = HTTP_FLG_TOK,
95 	['C'] = HTTP_FLG_TOK,
96 	['D'] = HTTP_FLG_TOK,
97 	['E'] = HTTP_FLG_TOK,
98 	['F'] = HTTP_FLG_TOK,
99 	['G'] = HTTP_FLG_TOK,
100 	['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
101 	['I'] = HTTP_FLG_TOK,
102 	['J'] = HTTP_FLG_TOK,
103 	['K'] = HTTP_FLG_TOK,
104 	['L'] = HTTP_FLG_TOK,
105 	['M'] = HTTP_FLG_TOK,
106 	['N'] = HTTP_FLG_TOK,
107 	['O'] = HTTP_FLG_TOK,
108 	['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
109 	['Q'] = HTTP_FLG_TOK,
110 	['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
111 	['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
112 	['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
113 	['U'] = HTTP_FLG_TOK,
114 	['V'] = HTTP_FLG_TOK,
115 	['W'] = HTTP_FLG_TOK,
116 	['X'] = HTTP_FLG_TOK,
117 	['Y'] = HTTP_FLG_TOK,
118 	['Z'] = HTTP_FLG_TOK,
119 	['['] = HTTP_FLG_SEP,
120 	[ 92] = HTTP_FLG_SEP,
121 	[']'] = HTTP_FLG_SEP,
122 	['^'] = HTTP_FLG_TOK,
123 	['_'] = HTTP_FLG_TOK,
124 	['`'] = HTTP_FLG_TOK,
125 	['a'] = HTTP_FLG_TOK,
126 	['b'] = HTTP_FLG_TOK,
127 	['c'] = HTTP_FLG_TOK,
128 	['d'] = HTTP_FLG_TOK,
129 	['e'] = HTTP_FLG_TOK,
130 	['f'] = HTTP_FLG_TOK,
131 	['g'] = HTTP_FLG_TOK,
132 	['h'] = HTTP_FLG_TOK,
133 	['i'] = HTTP_FLG_TOK,
134 	['j'] = HTTP_FLG_TOK,
135 	['k'] = HTTP_FLG_TOK,
136 	['l'] = HTTP_FLG_TOK,
137 	['m'] = HTTP_FLG_TOK,
138 	['n'] = HTTP_FLG_TOK,
139 	['o'] = HTTP_FLG_TOK,
140 	['p'] = HTTP_FLG_TOK,
141 	['q'] = HTTP_FLG_TOK,
142 	['r'] = HTTP_FLG_TOK,
143 	['s'] = HTTP_FLG_TOK,
144 	['t'] = HTTP_FLG_TOK,
145 	['u'] = HTTP_FLG_TOK,
146 	['v'] = HTTP_FLG_TOK,
147 	['w'] = HTTP_FLG_TOK,
148 	['x'] = HTTP_FLG_TOK,
149 	['y'] = HTTP_FLG_TOK,
150 	['z'] = HTTP_FLG_TOK,
151 	['{'] = HTTP_FLG_SEP,
152 	['|'] = HTTP_FLG_TOK,
153 	['}'] = HTTP_FLG_SEP,
154 	['~'] = HTTP_FLG_TOK,
155 	[127] = HTTP_FLG_CTL,
156 };
157 
158 const struct ist HTTP_100 = IST("HTTP/1.1 100 Continue\r\n\r\n");
159 
160 const struct ist HTTP_103 = IST("HTTP/1.1 103 Early Hints\r\n");
161 
162 /* Warning: no "connection" header is provided with the 3xx messages below */
163 const char *HTTP_301 =
164 	"HTTP/1.1 301 Moved Permanently\r\n"
165 	"Content-length: 0\r\n"
166 	"Location: "; /* not terminated since it will be concatenated with the URL */
167 
168 const char *HTTP_302 =
169 	"HTTP/1.1 302 Found\r\n"
170 	"Cache-Control: no-cache\r\n"
171 	"Content-length: 0\r\n"
172 	"Location: "; /* not terminated since it will be concatenated with the URL */
173 
174 /* same as 302 except that the browser MUST retry with the GET method */
175 const char *HTTP_303 =
176 	"HTTP/1.1 303 See Other\r\n"
177 	"Cache-Control: no-cache\r\n"
178 	"Content-length: 0\r\n"
179 	"Location: "; /* not terminated since it will be concatenated with the URL */
180 
181 /* same as 302 except that the browser MUST retry with the same method */
182 const char *HTTP_307 =
183 	"HTTP/1.1 307 Temporary Redirect\r\n"
184 	"Cache-Control: no-cache\r\n"
185 	"Content-length: 0\r\n"
186 	"Location: "; /* not terminated since it will be concatenated with the URL */
187 
188 /* same as 301 except that the browser MUST retry with the same method */
189 const char *HTTP_308 =
190 	"HTTP/1.1 308 Permanent Redirect\r\n"
191 	"Content-length: 0\r\n"
192 	"Location: "; /* not terminated since it will be concatenated with the URL */
193 
194 /* Warning: this one is an sprintf() fmt string, with <realm> as its only argument */
195 const char *HTTP_401_fmt =
196 	"HTTP/1.1 401 Unauthorized\r\n"
197 	"Content-length: 112\r\n"
198 	"Cache-Control: no-cache\r\n"
199 	"Connection: close\r\n"
200 	"Content-Type: text/html\r\n"
201 	"WWW-Authenticate: Basic realm=\"%s\"\r\n"
202 	"\r\n"
203 	"<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
204 
205 const char *HTTP_407_fmt =
206 	"HTTP/1.1 407 Unauthorized\r\n"
207 	"Content-length: 112\r\n"
208 	"Cache-Control: no-cache\r\n"
209 	"Connection: close\r\n"
210 	"Content-Type: text/html\r\n"
211 	"Proxy-Authenticate: Basic realm=\"%s\"\r\n"
212 	"\r\n"
213 	"<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n";
214 
215 const int http_err_codes[HTTP_ERR_SIZE] = {
216 	[HTTP_ERR_200] = 200,  /* used by "monitor-uri" */
217 	[HTTP_ERR_400] = 400,
218 	[HTTP_ERR_403] = 403,
219 	[HTTP_ERR_404] = 404,
220 	[HTTP_ERR_405] = 405,
221 	[HTTP_ERR_408] = 408,
222 	[HTTP_ERR_410] = 410,
223 	[HTTP_ERR_413] = 413,
224 	[HTTP_ERR_421] = 421,
225 	[HTTP_ERR_425] = 425,
226 	[HTTP_ERR_429] = 429,
227 	[HTTP_ERR_500] = 500,
228 	[HTTP_ERR_502] = 502,
229 	[HTTP_ERR_503] = 503,
230 	[HTTP_ERR_504] = 504,
231 };
232 
233 const char *http_err_msgs[HTTP_ERR_SIZE] = {
234 	[HTTP_ERR_200] =
235 	"HTTP/1.1 200 OK\r\n"
236 	"Content-length: 58\r\n"
237 	"Cache-Control: no-cache\r\n"
238 	"Connection: close\r\n"
239 	"Content-Type: text/html\r\n"
240 	"\r\n"
241 	"<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
242 
243 	[HTTP_ERR_400] =
244 	"HTTP/1.1 400 Bad request\r\n"
245 	"Content-length: 90\r\n"
246 	"Cache-Control: no-cache\r\n"
247 	"Connection: close\r\n"
248 	"Content-Type: text/html\r\n"
249 	"\r\n"
250 	"<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
251 
252 	[HTTP_ERR_403] =
253 	"HTTP/1.1 403 Forbidden\r\n"
254 	"Content-length: 93\r\n"
255 	"Cache-Control: no-cache\r\n"
256 	"Connection: close\r\n"
257 	"Content-Type: text/html\r\n"
258 	"\r\n"
259 	"<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
260 
261 	[HTTP_ERR_404] =
262 	"HTTP/1.1 404 Not Found\r\n"
263 	"Content-length: 83\r\n"
264 	"Cache-Control: no-cache\r\n"
265 	"Connection: close\r\n"
266 	"Content-Type: text/html\r\n"
267 	"\r\n"
268 	"<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
269 
270 	[HTTP_ERR_405] =
271 	"HTTP/1.1 405 Method Not Allowed\r\n"
272 	"Content-length: 146\r\n"
273 	"Cache-Control: no-cache\r\n"
274 	"Connection: close\r\n"
275 	"Content-Type: text/html\r\n"
276 	"\r\n"
277 	"<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
278 
279 	[HTTP_ERR_408] =
280 	"HTTP/1.1 408 Request Time-out\r\n"
281 	"Content-length: 110\r\n"
282 	"Cache-Control: no-cache\r\n"
283 	"Connection: close\r\n"
284 	"Content-Type: text/html\r\n"
285 	"\r\n"
286 	"<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
287 
288 	[HTTP_ERR_410] =
289 	"HTTP/1.1 410 Gone\r\n"
290 	"Content-length: 114\r\n"
291 	"Cache-Control: no-cache\r\n"
292 	"Connection: close\r\n"
293 	"Content-Type: text/html\r\n"
294 	"\r\n"
295 	"<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
296 
297 	[HTTP_ERR_413] =
298 	"HTTP/1.1 413 Payload Too Large\r\n"
299 	"Content-length: 106\r\n"
300 	"Cache-Control: no-cache\r\n"
301 	"Connection: close\r\n"
302 	"Content-Type: text/html\r\n"
303 	"\r\n"
304 	"<html><body><h1>413 Payload Too Large</h1>\nThe request entity exceeds the maximum allowed.\n</body></html>\n",
305 
306 	[HTTP_ERR_421] =
307 	"HTTP/1.1 421 Misdirected Request\r\n"
308 	"Content-length: 104\r\n"
309 	"Cache-Control: no-cache\r\n"
310 	"Connection: close\r\n"
311 	"Content-Type: text/html\r\n"
312 	"\r\n"
313 	"<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
314 
315 	[HTTP_ERR_425] =
316 	"HTTP/1.1 425 Too Early\r\n"
317 	"Content-length: 80\r\n"
318 	"Cache-Control: no-cache\r\n"
319 	"Connection: close\r\n"
320 	"Content-Type: text/html\r\n"
321 	"\r\n"
322 	"<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
323 
324 	[HTTP_ERR_429] =
325 	"HTTP/1.1 429 Too Many Requests\r\n"
326 	"Content-length: 117\r\n"
327 	"Cache-Control: no-cache\r\n"
328 	"Connection: close\r\n"
329 	"Content-Type: text/html\r\n"
330 	"\r\n"
331 	"<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
332 
333 	[HTTP_ERR_500] =
334 	"HTTP/1.1 500 Internal Server Error\r\n"
335 	"Content-length: 96\r\n"
336 	"Cache-Control: no-cache\r\n"
337 	"Connection: close\r\n"
338 	"Content-Type: text/html\r\n"
339 	"\r\n"
340 	"<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occured.\n</body></html>\n",
341 
342 	[HTTP_ERR_502] =
343 	"HTTP/1.1 502 Bad Gateway\r\n"
344 	"Content-length: 107\r\n"
345 	"Cache-Control: no-cache\r\n"
346 	"Connection: close\r\n"
347 	"Content-Type: text/html\r\n"
348 	"\r\n"
349 	"<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
350 
351 	[HTTP_ERR_503] =
352 	"HTTP/1.1 503 Service Unavailable\r\n"
353 	"Content-length: 107\r\n"
354 	"Cache-Control: no-cache\r\n"
355 	"Connection: close\r\n"
356 	"Content-Type: text/html\r\n"
357 	"\r\n"
358 	"<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
359 
360 	[HTTP_ERR_504] =
361 	"HTTP/1.1 504 Gateway Time-out\r\n"
362 	"Content-length: 92\r\n"
363 	"Cache-Control: no-cache\r\n"
364 	"Connection: close\r\n"
365 	"Content-Type: text/html\r\n"
366 	"\r\n"
367 	"<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
368 
369 };
370 
371 const struct ist http_known_methods[HTTP_METH_OTHER] = {
372 	[HTTP_METH_OPTIONS] = IST("OPTIONS"),
373 	[HTTP_METH_GET]     = IST("GET"),
374 	[HTTP_METH_HEAD]    = IST("HEAD"),
375 	[HTTP_METH_POST]    = IST("POST"),
376 	[HTTP_METH_PUT]     = IST("PUT"),
377 	[HTTP_METH_DELETE]  = IST("DELETE"),
378 	[HTTP_METH_TRACE]   = IST("TRACE"),
379 	[HTTP_METH_CONNECT] = IST("CONNECT"),
380 };
381 
382 /*
383  * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
384  * ones.
385  */
find_http_meth(const char * str,const int len)386 enum http_meth_t find_http_meth(const char *str, const int len)
387 {
388 	const struct ist m = ist2(str, len);
389 
390 	if      (isteq(m, ist("GET")))     return HTTP_METH_GET;
391 	else if (isteq(m, ist("HEAD")))    return HTTP_METH_HEAD;
392 	else if (isteq(m, ist("POST")))    return HTTP_METH_POST;
393 	else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
394 	else if (isteq(m, ist("PUT")))     return HTTP_METH_PUT;
395 	else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
396 	else if (isteq(m, ist("DELETE")))  return HTTP_METH_DELETE;
397 	else if (isteq(m, ist("TRACE")))   return HTTP_METH_TRACE;
398 	else                               return HTTP_METH_OTHER;
399 }
400 
401 /* This function returns HTTP_ERR_<num> (enum) matching http status code.
402  * Returned value should match codes from http_err_codes.
403  */
http_get_status_idx(unsigned int status)404 int http_get_status_idx(unsigned int status)
405 {
406 	switch (status) {
407 	case 200: return HTTP_ERR_200;
408 	case 400: return HTTP_ERR_400;
409 	case 403: return HTTP_ERR_403;
410 	case 404: return HTTP_ERR_404;
411 	case 405: return HTTP_ERR_405;
412 	case 408: return HTTP_ERR_408;
413 	case 410: return HTTP_ERR_410;
414 	case 413: return HTTP_ERR_413;
415 	case 421: return HTTP_ERR_421;
416 	case 425: return HTTP_ERR_425;
417 	case 429: return HTTP_ERR_429;
418 	case 500: return HTTP_ERR_500;
419 	case 502: return HTTP_ERR_502;
420 	case 503: return HTTP_ERR_503;
421 	case 504: return HTTP_ERR_504;
422 	default: return HTTP_ERR_500;
423 	}
424 }
425 
426 /* This function returns a reason associated with the HTTP status.
427  * This function never fails, a message is always returned.
428  */
http_get_reason(unsigned int status)429 const char *http_get_reason(unsigned int status)
430 {
431 	switch (status) {
432 	case 100: return "Continue";
433 	case 101: return "Switching Protocols";
434 	case 102: return "Processing";
435 	case 200: return "OK";
436 	case 201: return "Created";
437 	case 202: return "Accepted";
438 	case 203: return "Non-Authoritative Information";
439 	case 204: return "No Content";
440 	case 205: return "Reset Content";
441 	case 206: return "Partial Content";
442 	case 207: return "Multi-Status";
443 	case 210: return "Content Different";
444 	case 226: return "IM Used";
445 	case 300: return "Multiple Choices";
446 	case 301: return "Moved Permanently";
447 	case 302: return "Moved Temporarily";
448 	case 303: return "See Other";
449 	case 304: return "Not Modified";
450 	case 305: return "Use Proxy";
451 	case 307: return "Temporary Redirect";
452 	case 308: return "Permanent Redirect";
453 	case 310: return "Too many Redirects";
454 	case 400: return "Bad Request";
455 	case 401: return "Unauthorized";
456 	case 402: return "Payment Required";
457 	case 403: return "Forbidden";
458 	case 404: return "Not Found";
459 	case 405: return "Method Not Allowed";
460 	case 406: return "Not Acceptable";
461 	case 407: return "Proxy Authentication Required";
462 	case 408: return "Request Time-out";
463 	case 409: return "Conflict";
464 	case 410: return "Gone";
465 	case 411: return "Length Required";
466 	case 412: return "Precondition Failed";
467 	case 413: return "Request Entity Too Large";
468 	case 414: return "Request-URI Too Long";
469 	case 415: return "Unsupported Media Type";
470 	case 416: return "Requested range unsatisfiable";
471 	case 417: return "Expectation failed";
472 	case 418: return "I'm a teapot";
473 	case 421: return "Misdirected Request";
474 	case 422: return "Unprocessable entity";
475 	case 423: return "Locked";
476 	case 424: return "Method failure";
477 	case 425: return "Too Early";
478 	case 426: return "Upgrade Required";
479 	case 428: return "Precondition Required";
480 	case 429: return "Too Many Requests";
481 	case 431: return "Request Header Fields Too Large";
482 	case 449: return "Retry With";
483 	case 450: return "Blocked by Windows Parental Controls";
484 	case 451: return "Unavailable For Legal Reasons";
485 	case 456: return "Unrecoverable Error";
486 	case 499: return "client has closed connection";
487 	case 500: return "Internal Server Error";
488 	case 501: return "Not Implemented";
489 	case 502: return "Bad Gateway or Proxy Error";
490 	case 503: return "Service Unavailable";
491 	case 504: return "Gateway Time-out";
492 	case 505: return "HTTP Version not supported";
493 	case 506: return "Variant also negociate";
494 	case 507: return "Insufficient storage";
495 	case 508: return "Loop detected";
496 	case 509: return "Bandwidth Limit Exceeded";
497 	case 510: return "Not extended";
498 	case 511: return "Network authentication required";
499 	case 520: return "Web server is returning an unknown error";
500 	default:
501 		switch (status) {
502 		case 100 ... 199: return "Informational";
503 		case 200 ... 299: return "Success";
504 		case 300 ... 399: return "Redirection";
505 		case 400 ... 499: return "Client Error";
506 		case 500 ... 599: return "Server Error";
507 		default:          return "Other";
508 		}
509 	}
510 }
511 
512 /* Parse the uri and looks for the authority, between the scheme and the
513  * path. if no_userinfo is not zero, the part before the '@' (including it) is
514  * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
515  * on the authority is returned.
516  */
http_get_authority(const struct ist uri,int no_userinfo)517 struct ist http_get_authority(const struct ist uri, int no_userinfo)
518 {
519 	const char *ptr, *start, *end;
520 
521 	if (!uri.len)
522 		goto not_found;
523 
524 	ptr = uri.ptr;
525 	start = ptr;
526 	end = ptr + uri.len;
527 
528 	/* RFC7230, par. 2.7 :
529 	 * Request-URI = "*" | absuri | abspath | authority
530 	 */
531 
532 	if (*ptr == '*' || *ptr == '/')
533 		goto not_found;
534 
535 	if (isalpha((unsigned char)*ptr)) {
536 		/* this is a scheme as described by RFC3986, par. 3.1, or only
537 		 * an authority (in case of a CONNECT method).
538 		 */
539 		ptr++;
540 		while (ptr < end &&
541 		       (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
542 			ptr++;
543 		/* skip '://' or take the whole as authority if not found */
544 		if (ptr == end || *ptr++ != ':')
545 			goto authority;
546 		if (ptr == end || *ptr++ != '/')
547 			goto authority;
548 		if (ptr == end || *ptr++ != '/')
549 			goto authority;
550 	}
551 
552 	start = ptr;
553 	while (ptr < end && *ptr != '/') {
554 		if (*ptr++ == '@' && no_userinfo)
555 			start = ptr;
556 	}
557 
558 	/* OK, ptr point on the '/' or the end */
559 	end = ptr;
560 
561   authority:
562 	return ist2(start, end - start);
563 
564   not_found:
565 	return ist2(NULL, 0);
566 }
567 
568 /* Parse the URI from the given transaction (which is assumed to be in request
569  * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
570  * returned. Otherwise the pointer and length are returned.
571  */
http_get_path(const struct ist uri)572 struct ist http_get_path(const struct ist uri)
573 {
574 	const char *ptr, *end;
575 
576 	if (!uri.len)
577 		goto not_found;
578 
579 	ptr = uri.ptr;
580 	end = ptr + uri.len;
581 
582 	/* RFC7230, par. 2.7 :
583 	 * Request-URI = "*" | absuri | abspath | authority
584 	 */
585 
586 	if (*ptr == '*')
587 		goto not_found;
588 
589 	if (isalpha((unsigned char)*ptr)) {
590 		/* this is a scheme as described by RFC3986, par. 3.1 */
591 		ptr++;
592 		while (ptr < end &&
593 		       (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
594 			ptr++;
595 		/* skip '://' */
596 		if (ptr == end || *ptr++ != ':')
597 			goto not_found;
598 		if (ptr == end || *ptr++ != '/')
599 			goto not_found;
600 		if (ptr == end || *ptr++ != '/')
601 			goto not_found;
602 	}
603 	/* skip [user[:passwd]@]host[:[port]] */
604 
605 	while (ptr < end && *ptr != '/')
606 		ptr++;
607 
608 	if (ptr == end)
609 		goto not_found;
610 
611 	/* OK, we got the '/' ! */
612 	return ist2(ptr, end - ptr);
613 
614  not_found:
615 	return ist2(NULL, 0);
616 }
617 
618 /*
619  * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
620  * If so, returns the position of the first non-space character relative to
621  * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
622  * to return a pointer to the place after the first space. Returns 0 if the
623  * header name does not match. Checks are case-insensitive.
624  */
http_header_match2(const char * hdr,const char * end,const char * name,int len)625 int http_header_match2(const char *hdr, const char *end,
626 		       const char *name, int len)
627 {
628 	const char *val;
629 
630 	if (hdr + len >= end)
631 		return 0;
632 	if (hdr[len] != ':')
633 		return 0;
634 	if (strncasecmp(hdr, name, len) != 0)
635 		return 0;
636 	val = hdr + len + 1;
637 	while (val < end && HTTP_IS_SPHT(*val))
638 		val++;
639 	if ((val >= end) && (len + 2 <= end - hdr))
640 		return len + 2; /* we may replace starting from second space */
641 	return val - hdr;
642 }
643 
644 /* Find the end of the header value contained between <s> and <e>. See RFC7230,
645  * par 3.2 for more information. Note that it requires a valid header to return
646  * a valid result. This works for headers defined as comma-separated lists.
647  */
http_find_hdr_value_end(char * s,const char * e)648 char *http_find_hdr_value_end(char *s, const char *e)
649 {
650 	int quoted, qdpair;
651 
652 	quoted = qdpair = 0;
653 
654 #if defined(__x86_64__) ||						\
655     defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
656     defined(__ARM_ARCH_7A__)
657 	/* speedup: skip everything not a comma nor a double quote */
658 	for (; s <= e - sizeof(int); s += sizeof(int)) {
659 		unsigned int c = *(int *)s; // comma
660 		unsigned int q = c;         // quote
661 
662 		c ^= 0x2c2c2c2c; // contains one zero on a comma
663 		q ^= 0x22222222; // contains one zero on a quote
664 
665 		c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
666 		q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
667 
668 		if ((c | q) & 0x80808080)
669 			break; // found a comma or a quote
670 	}
671 #endif
672 	for (; s < e; s++) {
673 		if (qdpair)                    qdpair = 0;
674 		else if (quoted) {
675 			if (*s == '\\')        qdpair = 1;
676 			else if (*s == '"')    quoted = 0;
677 		}
678 		else if (*s == '"')            quoted = 1;
679 		else if (*s == ',')            return s;
680 	}
681 	return s;
682 }
683 
684 /* Find the end of a cookie value contained between <s> and <e>. It works the
685  * same way as with headers above except that the semi-colon also ends a token.
686  * See RFC2965 for more information. Note that it requires a valid header to
687  * return a valid result.
688  */
http_find_cookie_value_end(char * s,const char * e)689 char *http_find_cookie_value_end(char *s, const char *e)
690 {
691 	int quoted, qdpair;
692 
693 	quoted = qdpair = 0;
694 	for (; s < e; s++) {
695 		if (qdpair)                    qdpair = 0;
696 		else if (quoted) {
697 			if (*s == '\\')        qdpair = 1;
698 			else if (*s == '"')    quoted = 0;
699 		}
700 		else if (*s == '"')            quoted = 1;
701 		else if (*s == ',' || *s == ';') return s;
702 	}
703 	return s;
704 }
705 
706 /* Try to find the next occurrence of a cookie name in a cookie header value.
707  * To match on any cookie name, <cookie_name_l> must be set to 0.
708  * The lookup begins at <hdr>. The pointer and size of the next occurrence of
709  * the cookie value is returned into *value and *value_l, and the function
710  * returns a pointer to the next pointer to search from if the value was found.
711  * Otherwise if the cookie was not found, NULL is returned and neither value
712  * nor value_l are touched. The input <hdr> string should first point to the
713  * header's value, and the <hdr_end> pointer must point to the first character
714  * not part of the value. <list> must be non-zero if value may represent a list
715  * of values (cookie headers). This makes it faster to abort parsing when no
716  * list is expected.
717  */
http_extract_cookie_value(char * hdr,const char * hdr_end,char * cookie_name,size_t cookie_name_l,int list,char ** value,size_t * value_l)718 char *http_extract_cookie_value(char *hdr, const char *hdr_end,
719                                 char *cookie_name, size_t cookie_name_l,
720                                 int list, char **value, size_t *value_l)
721 {
722 	char *equal, *att_end, *att_beg, *val_beg, *val_end;
723 	char *next;
724 
725 	/* we search at least a cookie name followed by an equal, and more
726 	 * generally something like this :
727 	 * Cookie:    NAME1  =  VALUE 1  ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
728 	 */
729 	for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
730 		/* Iterate through all cookies on this line */
731 
732 		while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
733 			att_beg++;
734 
735 		/* find att_end : this is the first character after the last non
736 		 * space before the equal. It may be equal to hdr_end.
737 		 */
738 		equal = att_end = att_beg;
739 
740 		while (equal < hdr_end) {
741 			if (*equal == '=' || *equal == ';' || (list && *equal == ','))
742 				break;
743 			if (HTTP_IS_SPHT(*equal++))
744 				continue;
745 			att_end = equal;
746 		}
747 
748 		/* here, <equal> points to '=', a delimitor or the end. <att_end>
749 		 * is between <att_beg> and <equal>, both may be identical.
750 		 */
751 
752 		/* look for end of cookie if there is an equal sign */
753 		if (equal < hdr_end && *equal == '=') {
754 			/* look for the beginning of the value */
755 			val_beg = equal + 1;
756 			while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
757 				val_beg++;
758 
759 			/* find the end of the value, respecting quotes */
760 			next = http_find_cookie_value_end(val_beg, hdr_end);
761 
762 			/* make val_end point to the first white space or delimitor after the value */
763 			val_end = next;
764 			while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
765 				val_end--;
766 		} else {
767 			val_beg = val_end = next = equal;
768 		}
769 
770 		/* We have nothing to do with attributes beginning with '$'. However,
771 		 * they will automatically be removed if a header before them is removed,
772 		 * since they're supposed to be linked together.
773 		 */
774 		if (*att_beg == '$')
775 			continue;
776 
777 		/* Ignore cookies with no equal sign */
778 		if (equal == next)
779 			continue;
780 
781 		/* Now we have the cookie name between att_beg and att_end, and
782 		 * its value between val_beg and val_end.
783 		 */
784 
785 		if (cookie_name_l == 0 || (att_end - att_beg == cookie_name_l &&
786 		    memcmp(att_beg, cookie_name, cookie_name_l) == 0)) {
787 			/* let's return this value and indicate where to go on from */
788 			*value = val_beg;
789 			*value_l = val_end - val_beg;
790 			return next + 1;
791 		}
792 
793 		/* Set-Cookie headers only have the name in the first attr=value part */
794 		if (!list)
795 			break;
796 	}
797 
798 	return NULL;
799 }
800 
801 /* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
802  * value is larger than 1000, it is bound to 1000. The parser consumes up to
803  * 1 digit, one dot and 3 digits and stops on the first invalid character.
804  * Unparsable qvalues return 1000 as "q=1.000".
805  */
http_parse_qvalue(const char * qvalue,const char ** end)806 int http_parse_qvalue(const char *qvalue, const char **end)
807 {
808 	int q = 1000;
809 
810 	if (!isdigit((unsigned char)*qvalue))
811 		goto out;
812 	q = (*qvalue++ - '0') * 1000;
813 
814 	if (*qvalue++ != '.')
815 		goto out;
816 
817 	if (!isdigit((unsigned char)*qvalue))
818 		goto out;
819 	q += (*qvalue++ - '0') * 100;
820 
821 	if (!isdigit((unsigned char)*qvalue))
822 		goto out;
823 	q += (*qvalue++ - '0') * 10;
824 
825 	if (!isdigit((unsigned char)*qvalue))
826 		goto out;
827 	q += (*qvalue++ - '0') * 1;
828  out:
829 	if (q > 1000)
830 		q = 1000;
831 	if (end)
832 		*end = qvalue;
833 	return q;
834 }
835 
836 /*
837  * Given a url parameter, find the starting position of the first occurrence,
838  * or NULL if the parameter is not found.
839  *
840  * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
841  * the function will return query_string+8.
842  *
843  * Warning: this function returns a pointer that can point to the first chunk
844  * or the second chunk. The caller must be check the position before using the
845  * result.
846  */
http_find_url_param_pos(const char ** chunks,const char * url_param_name,size_t url_param_name_l,char delim)847 const char *http_find_url_param_pos(const char **chunks,
848                                     const char* url_param_name, size_t url_param_name_l,
849                                     char delim)
850 {
851 	const char *pos, *last, *equal;
852 	const char **bufs = chunks;
853 	int l1, l2;
854 
855 
856 	pos  = bufs[0];
857 	last = bufs[1];
858 	while (pos < last) {
859 		/* Check the equal. */
860 		equal = pos + url_param_name_l;
861 		if (fix_pointer_if_wrap(chunks, &equal)) {
862 			if (equal >= chunks[3])
863 				return NULL;
864 		} else {
865 			if (equal >= chunks[1])
866 				return NULL;
867 		}
868 		if (*equal == '=') {
869 			if (pos + url_param_name_l > last) {
870 				/* process wrap case, we detect a wrap. In this case, the
871 				 * comparison is performed in two parts.
872 				 */
873 
874 				/* This is the end, we dont have any other chunk. */
875 				if (bufs != chunks || !bufs[2])
876 					return NULL;
877 
878 				/* Compute the length of each part of the comparison. */
879 				l1 = last - pos;
880 				l2 = url_param_name_l - l1;
881 
882 				/* The second buffer is too short to contain the compared string. */
883 				if (bufs[2] + l2 > bufs[3])
884 					return NULL;
885 
886 				if (memcmp(pos,     url_param_name,    l1) == 0 &&
887 				    memcmp(bufs[2], url_param_name+l1, l2) == 0)
888 					return pos;
889 
890 				/* Perform wrapping and jump the string who fail the comparison. */
891 				bufs += 2;
892 				pos = bufs[0] + l2;
893 				last = bufs[1];
894 
895 			} else {
896 				/* process a simple comparison. */
897 				if (memcmp(pos, url_param_name, url_param_name_l) == 0)
898 					return pos;
899 				pos += url_param_name_l + 1;
900 				if (fix_pointer_if_wrap(chunks, &pos))
901 					last = bufs[2];
902 			}
903 		}
904 
905 		while (1) {
906 			/* Look for the next delimiter. */
907 			while (pos < last && !http_is_param_delimiter(*pos, delim))
908 				pos++;
909 			if (pos < last)
910 				break;
911 			/* process buffer wrapping. */
912 			if (bufs != chunks || !bufs[2])
913 				return NULL;
914 			bufs += 2;
915 			pos = bufs[0];
916 			last = bufs[1];
917 		}
918 		pos++;
919 	}
920 	return NULL;
921 }
922 
923 /*
924  * Given a url parameter name and a query string, find the next value.
925  * An empty url_param_name matches the first available parameter.
926  * If the parameter is found, 1 is returned and *vstart / *vend are updated to
927  * respectively provide a pointer to the value and its end.
928  * Otherwise, 0 is returned and vstart/vend are not modified.
929  */
http_find_next_url_param(const char ** chunks,const char * url_param_name,size_t url_param_name_l,const char ** vstart,const char ** vend,char delim)930 int http_find_next_url_param(const char **chunks,
931                              const char* url_param_name, size_t url_param_name_l,
932                              const char **vstart, const char **vend, char delim)
933 {
934 	const char *arg_start, *qs_end;
935 	const char *value_start, *value_end;
936 
937 	arg_start = chunks[0];
938 	qs_end = chunks[1];
939 	if (url_param_name_l) {
940 		/* Looks for an argument name. */
941 		arg_start = http_find_url_param_pos(chunks,
942 		                                    url_param_name, url_param_name_l,
943 		                                    delim);
944 		/* Check for wrapping. */
945 		if (arg_start >= qs_end)
946 			qs_end = chunks[3];
947 	}
948 	if (!arg_start)
949 		return 0;
950 
951 	if (!url_param_name_l) {
952 		while (1) {
953 			/* looks for the first argument. */
954 			value_start = memchr(arg_start, '=', qs_end - arg_start);
955 			if (!value_start) {
956 				/* Check for wrapping. */
957 				if (arg_start >= chunks[0] &&
958 				    arg_start < chunks[1] &&
959 				    chunks[2]) {
960 					arg_start = chunks[2];
961 					qs_end = chunks[3];
962 					continue;
963 				}
964 				return 0;
965 			}
966 			break;
967 		}
968 		value_start++;
969 	}
970 	else {
971 		/* Jump the argument length. */
972 		value_start = arg_start + url_param_name_l + 1;
973 
974 		/* Check for pointer wrapping. */
975 		if (fix_pointer_if_wrap(chunks, &value_start)) {
976 			/* Update the end pointer. */
977 			qs_end = chunks[3];
978 
979 			/* Check for overflow. */
980 			if (value_start >= qs_end)
981 				return 0;
982 		}
983 	}
984 
985 	value_end = value_start;
986 
987 	while (1) {
988 		while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
989 			value_end++;
990 		if (value_end < qs_end)
991 			break;
992 		/* process buffer wrapping. */
993 		if (value_end >= chunks[0] &&
994 		    value_end < chunks[1] &&
995 		    chunks[2]) {
996 			value_end = chunks[2];
997 			qs_end = chunks[3];
998 			continue;
999 		}
1000 		break;
1001 	}
1002 
1003 	*vstart = value_start;
1004 	*vend = value_end;
1005 	return 1;
1006 }
1007 
1008 /* Parses a single header line (without the CRLF) and splits it into its name
1009  * and its value. The parsing is pretty naive and just skip spaces.
1010  */
http_parse_header(const struct ist hdr,struct ist * name,struct ist * value)1011 int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value)
1012 {
1013         char *p   = hdr.ptr;
1014         char *end = p + hdr.len;
1015 
1016         name->len = value->len = 0;
1017 
1018         /* Skip leading spaces */
1019         for (; p < end && HTTP_IS_SPHT(*p); p++);
1020 
1021         /* Set the header name */
1022         name->ptr = p;
1023         for (; p < end && HTTP_IS_TOKEN(*p); p++);
1024         name->len = p - name->ptr;
1025 
1026         /* Skip the ':' and spaces before and after it */
1027         for (; p < end && HTTP_IS_SPHT(*p); p++);
1028         if (p < end && *p == ':') p++;
1029         for (; p < end && HTTP_IS_SPHT(*p); p++);
1030 
1031         /* Set the header value */
1032         value->ptr = p;
1033         value->len = end - p;
1034 
1035         return 1;
1036 }
1037 
1038 /* Parses a single start line (without the CRLF) and splits it into 3 parts. The
1039  * parsing is pretty naive and just skip spaces.
1040  */
http_parse_stline(const struct ist line,struct ist * p1,struct ist * p2,struct ist * p3)1041 int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3)
1042 {
1043         char *p   = line.ptr;
1044         char *end = p + line.len;
1045 
1046         p1->len = p2->len = p3->len = 0;
1047 
1048         /* Skip leading spaces */
1049         for (; p < end && HTTP_IS_SPHT(*p); p++);
1050 
1051         /* Set the first part */
1052         p1->ptr = p;
1053         for (; p < end && HTTP_IS_TOKEN(*p); p++);
1054         p1->len = p - p1->ptr;
1055 
1056         /* Skip spaces between p1 and p2 */
1057         for (; p < end && HTTP_IS_SPHT(*p); p++);
1058 
1059         /* Set the second part */
1060         p2->ptr = p;
1061         for (; p < end && !HTTP_IS_SPHT(*p); p++);
1062         p2->len = p - p2->ptr;
1063 
1064         /* Skip spaces between p2 and p3 */
1065         for (; p < end && HTTP_IS_SPHT(*p); p++);
1066 
1067         /* The remaing is the third value */
1068         p3->ptr = p;
1069         p3->len = end - p;
1070 
1071         return 1;
1072 }
1073 
1074 /* Parses value of a Status header with the following format: "Status: Code[
1075  * Reason]".  The parsing is pretty naive and just skip spaces. It return the
1076  * numeric value of the status code.
1077  */
http_parse_status_val(const struct ist value,struct ist * status,struct ist * reason)1078 int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason)
1079 {
1080 	char *p   = value.ptr;
1081         char *end = p + value.len;
1082 	uint16_t code;
1083 
1084 	status->len = reason->len = 0;
1085 
1086 	/* Skip leading spaces */
1087         for (; p < end && HTTP_IS_SPHT(*p); p++);
1088 
1089         /* Set the status part */
1090         status->ptr = p;
1091         for (; p < end && HTTP_IS_TOKEN(*p); p++);
1092         status->len = p - status->ptr;
1093 
1094 	/* Skip spaces between status and reason */
1095         for (; p < end && HTTP_IS_SPHT(*p); p++);
1096 
1097 	/* the remaining is the reason */
1098         reason->ptr = p;
1099         reason->len = end - p;
1100 
1101 	code = strl2ui(status->ptr, status->len);
1102 	return code;
1103 }
1104