1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "config.h"
25 #include "http_parser.h"
26 #include <assert.h>
27
28 #ifndef ULLONG_MAX
29 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
30 #endif
31
32 #ifndef MIN
33 # define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #endif
35
36 #ifndef ARRAY_SIZE
37 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38 #endif
39
40 #ifndef BIT_AT
41 # define BIT_AT(a, i) \
42 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
43 (1 << ((unsigned int) (i) & 7))))
44 #endif
45
46 #ifndef ELEM_AT
47 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
48 #endif
49
50 #define SET_ERRNO(e) \
51 do { \
52 parser->http_errno = (e); \
53 } while(0)
54
55
56 /* Run the notify callback FOR, returning ER if it fails */
57 #define CALLBACK_NOTIFY_(FOR, ER) \
58 do { \
59 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
60 \
61 if (settings->on_##FOR) { \
62 if (0 != settings->on_##FOR(parser)) { \
63 SET_ERRNO(HPE_CB_##FOR); \
64 } \
65 \
66 /* We either errored above or got paused; get out */ \
67 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
68 return (ER); \
69 } \
70 } \
71 } while (0)
72
73 /* Run the notify callback FOR and consume the current byte */
74 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
75
76 /* Run the notify callback FOR and don't consume the current byte */
77 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
78
79 /* Run data callback FOR with LEN bytes, returning ER if it fails */
80 #define CALLBACK_DATA_(FOR, LEN, ER) \
81 do { \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83 \
84 if (FOR##_mark) { \
85 if (settings->on_##FOR) { \
86 if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
88 } \
89 \
90 /* We either errored above or got paused; get out */ \
91 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
92 return (ER); \
93 } \
94 } \
95 FOR##_mark = NULL; \
96 } \
97 } while (0)
98
99 /* Run the data callback FOR and consume the current byte */
100 #define CALLBACK_DATA(FOR) \
101 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
102
103 /* Run the data callback FOR and don't consume the current byte */
104 #define CALLBACK_DATA_NOADVANCE(FOR) \
105 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
106
107 /* Set the mark FOR; non-destructive if mark is already set */
108 #define MARK(FOR) \
109 do { \
110 if (!FOR##_mark) { \
111 FOR##_mark = p; \
112 } \
113 } while (0)
114
115
116 #define PROXY_CONNECTION "proxy-connection"
117 #define CONNECTION "connection"
118 #define CONTENT_LENGTH "content-length"
119 #define TRANSFER_ENCODING "transfer-encoding"
120 #define UPGRADE "upgrade"
121 #define CHUNKED "chunked"
122 #define KEEP_ALIVE "keep-alive"
123 #define CLOSE "close"
124
125 enum rspamd_http_message_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
126
127 static const char *method_strings[] =
128 {
129 #define XX(num, name, string) #string,
130 HTTP_METHOD_MAP(XX)
131 #undef XX
132 };
133
134
135 /* Tokens as defined by rfc 2616. Also lowercases them.
136 * token = 1*<any CHAR except CTLs or separators>
137 * separators = "(" | ")" | "<" | ">" | "@"
138 * | "," | ";" | ":" | "\" | <">
139 * | "/" | "[" | "]" | "?" | "="
140 * | "{" | "}" | SP | HT
141 */
142 static const char tokens[256] = {
143 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
144 0, 0, 0, 0, 0, 0, 0, 0,
145 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
146 0, 0, 0, 0, 0, 0, 0, 0,
147 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
148 0, 0, 0, 0, 0, 0, 0, 0,
149 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
150 0, 0, 0, 0, 0, 0, 0, 0,
151 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
152 0, '!', 0, '#', '$', '%', '&', '\'',
153 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
154 0, 0, '*', '+', 0, '-', '.', 0,
155 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
156 '0', '1', '2', '3', '4', '5', '6', '7',
157 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
158 '8', '9', 0, 0, 0, 0, 0, 0,
159 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
160 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
161 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
162 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
163 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
164 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
165 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
166 'x', 'y', 'z', 0, 0, 0, '^', '_',
167 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
168 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
169 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
170 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
171 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
172 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
173 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
174 'x', 'y', 'z', 0, '|', 0, '~', 0 };
175
176
177 static const int8_t unhex[256] =
178 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
179 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
180 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
181 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
182 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
183 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
185 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
186 };
187
188
189 #if HTTP_PARSER_STRICT
190 # define T(v) 0
191 #else
192 # define T(v) v
193 #endif
194
195
196 static const uint8_t normal_url_char[32] = {
197 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
198 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
199 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
200 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
201 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
202 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
203 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
204 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
205 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
206 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
207 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
208 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
209 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
210 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
211 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
212 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
213 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
214 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
215 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
216 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
217 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
218 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
219 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
220 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
221 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
222 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
223 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
224 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
225 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
226 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
227 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
228 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
229
230 #undef T
231
232 enum state
233 { s_dead = 1 /* important that this is > 0 */
234
235 , s_start_req_or_res
236 , s_res_or_resp_H
237 , s_start_res
238 , s_res_H
239 , s_res_HT
240 , s_res_HTT
241 , s_res_HTTP
242 , s_res_first_http_major
243 , s_res_http_major
244 , s_res_first_http_minor
245 , s_res_http_minor
246 , s_res_first_status_code
247 , s_res_status_code
248 , s_res_status_start
249 , s_res_status
250 , s_res_line_almost_done
251
252 , s_start_req
253
254 , s_req_method
255 , s_req_spaces_before_url
256 , s_req_schema
257 , s_req_schema_slash
258 , s_req_schema_slash_slash
259 , s_req_server_start
260 , s_req_server
261 , s_req_server_with_at
262 , s_req_path
263 , s_req_query_string_start
264 , s_req_query_string
265 , s_req_fragment_start
266 , s_req_fragment
267 , s_req_http_start
268 , s_req_http_H
269 , s_req_http_HT
270 , s_req_http_HTT
271 , s_req_http_HTTP
272 , s_req_first_http_major
273 , s_req_http_major
274 , s_req_first_http_minor
275 , s_req_http_minor
276 , s_req_spamc_start
277 , s_req_spamc
278 , s_req_line_almost_done
279
280 , s_header_field_start
281 , s_header_field
282 , s_header_value_start
283 , s_header_value
284 , s_header_value_lws
285
286 , s_header_almost_done
287
288 , s_chunk_size_start
289 , s_chunk_size
290 , s_chunk_parameters
291 , s_chunk_size_almost_done
292
293 , s_headers_almost_done
294 , s_headers_done
295
296 /* Important: 's_headers_done' must be the last 'header' state. All
297 * states beyond this must be 'body' states. It is used for overflow
298 * checking. See the PARSING_HEADER() macro.
299 */
300
301 , s_chunk_data
302 , s_chunk_data_almost_done
303 , s_chunk_data_done
304
305 , s_body_identity
306 , s_body_identity_eof
307
308 , s_message_done
309 };
310
311
312 #define PARSING_HEADER(state) (state <= s_headers_done)
313
314
315 enum header_states
316 { h_general = 0
317 , h_C
318 , h_CO
319 , h_CON
320
321 , h_matching_connection
322 , h_matching_proxy_connection
323 , h_matching_content_length
324 , h_matching_transfer_encoding
325 , h_matching_upgrade
326
327 , h_connection
328 , h_content_length
329 , h_transfer_encoding
330 , h_upgrade
331
332 , h_matching_transfer_encoding_chunked
333 , h_matching_connection_keep_alive
334 , h_matching_connection_close
335
336 , h_transfer_encoding_chunked
337 , h_connection_keep_alive
338 , h_connection_close
339 };
340
341 enum http_host_state
342 {
343 s_http_host_dead = 1
344 , s_http_userinfo_start
345 , s_http_userinfo
346 , s_http_host_start
347 , s_http_host_v6_start
348 , s_http_host
349 , s_http_host_v6
350 , s_http_host_v6_end
351 , s_http_host_port_start
352 , s_http_host_port
353 };
354
355 /* Macros for character classes; depends on strict-mode */
356 #define CR '\r'
357 #define LF '\n'
358 #define LOWER(c) (unsigned char)(c | 0x20)
359 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
360 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
361 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
362 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
364 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365 (c) == ')')
366 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368 (c) == '$' || (c) == ',')
369
370 #if HTTP_PARSER_STRICT
371 #define TOKEN(c) (tokens[(unsigned char)c])
372 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
373 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374 #else
375 #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
376 #define IS_URL_CHAR(c) \
377 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378 #define IS_HOST_CHAR(c) \
379 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380 #endif
381
382
383 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
384
385
386 #if HTTP_PARSER_STRICT
387 # define STRICT_CHECK(cond) \
388 do { \
389 if (cond) { \
390 SET_ERRNO(HPE_STRICT); \
391 goto error; \
392 } \
393 } while (0)
394 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
395 #else
396 # define STRICT_CHECK(cond)
397 # define NEW_MESSAGE() start_state
398 #endif
399
400
401 /* Map errno values to strings for human-readable output */
402 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
403 static struct {
404 const char *name;
405 const char *description;
406 } http_strerror_tab[] = {
407 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
408 };
409 #undef HTTP_STRERROR_GEN
410
411 int http_message_needs_eof(const http_parser *parser);
412
413 /* Our URL parser.
414 *
415 * This is designed to be shared by http_parser_execute() for URL validation,
416 * hence it has a state transition + byte-for-byte interface. In addition, it
417 * is meant to be embedded in http_parser_parse_url(), which does the dirty
418 * work of turning state transitions URL components for its API.
419 *
420 * This function should only be invoked with non-space characters. It is
421 * assumed that the caller cares about (and can detect) the transition between
422 * URL and non-URL states by looking for these.
423 */
424 static enum state
parse_url_char(enum state s,const char ch)425 parse_url_char(enum state s, const char ch)
426 {
427 if (ch == ' ' || ch == '\r' || ch == '\n') {
428 return s_dead;
429 }
430
431 #if HTTP_PARSER_STRICT
432 if (ch == '\t' || ch == '\f') {
433 return s_dead;
434 }
435 #endif
436
437 switch (s) {
438 case s_req_spaces_before_url:
439 /* Proxied requests are followed by scheme of an absolute URI (alpha).
440 * All methods except CONNECT are followed by '/' or '*'.
441 */
442
443 if (ch == '/' || ch == '*') {
444 return s_req_path;
445 }
446
447 if (IS_ALPHA(ch)) {
448 return s_req_schema;
449 }
450
451 break;
452
453 case s_req_schema:
454 if (IS_ALPHA(ch)) {
455 return s;
456 }
457
458 if (ch == ':') {
459 return s_req_schema_slash;
460 }
461
462 break;
463
464 case s_req_schema_slash:
465 if (ch == '/') {
466 return s_req_schema_slash_slash;
467 }
468
469 break;
470
471 case s_req_schema_slash_slash:
472 if (ch == '/') {
473 return s_req_server_start;
474 }
475
476 break;
477
478 case s_req_server_with_at:
479 if (ch == '@') {
480 return s_dead;
481 }
482
483 /* FALLTHROUGH */
484 case s_req_server_start:
485 case s_req_server:
486 if (ch == '/') {
487 return s_req_path;
488 }
489
490 if (ch == '?') {
491 return s_req_query_string_start;
492 }
493
494 if (ch == '@') {
495 return s_req_server_with_at;
496 }
497
498 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
499 return s_req_server;
500 }
501
502 break;
503
504 case s_req_path:
505 if (IS_URL_CHAR(ch)) {
506 return s;
507 }
508
509 switch (ch) {
510 case '?':
511 return s_req_query_string_start;
512
513 case '#':
514 return s_req_fragment_start;
515 }
516
517 break;
518
519 case s_req_query_string_start:
520 case s_req_query_string:
521 if (IS_URL_CHAR(ch)) {
522 return s_req_query_string;
523 }
524
525 switch (ch) {
526 case '?':
527 /* allow extra '?' in query string */
528 return s_req_query_string;
529
530 case '#':
531 return s_req_fragment_start;
532 }
533
534 break;
535
536 case s_req_fragment_start:
537 if (IS_URL_CHAR(ch)) {
538 return s_req_fragment;
539 }
540
541 switch (ch) {
542 case '?':
543 return s_req_fragment;
544
545 case '#':
546 return s;
547 }
548
549 break;
550
551 case s_req_fragment:
552 if (IS_URL_CHAR(ch)) {
553 return s;
554 }
555
556 switch (ch) {
557 case '?':
558 case '#':
559 return s;
560 }
561
562 break;
563
564 default:
565 break;
566 }
567
568 /* We should never fall out of the switch above unless there's an error */
569 return s_dead;
570 }
571
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)572 size_t http_parser_execute (http_parser *parser,
573 const http_parser_settings *settings,
574 const char *data,
575 size_t len)
576 {
577 char c, ch;
578 int8_t unhex_val;
579 const char *p = data;
580 const char *header_field_mark = 0;
581 const char *header_value_mark = 0;
582 const char *url_mark = 0;
583 const char *body_mark = 0;
584 const char *status_mark = 0;
585
586 /* We're in an error state. Don't bother doing anything. */
587 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
588 return 0;
589 }
590
591 if (len == 0) {
592 switch (parser->state) {
593 case s_body_identity_eof:
594 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
595 * we got paused.
596 */
597 CALLBACK_NOTIFY_NOADVANCE(message_complete);
598 return 0;
599
600 case s_dead:
601 case s_start_req_or_res:
602 case s_start_res:
603 case s_start_req:
604 return 0;
605
606 default:
607 SET_ERRNO(HPE_INVALID_EOF_STATE);
608 return 1;
609 }
610 }
611
612
613 if (parser->state == s_header_field)
614 header_field_mark = data;
615 if (parser->state == s_header_value)
616 header_value_mark = data;
617 switch (parser->state) {
618 case s_req_path:
619 case s_req_schema:
620 case s_req_schema_slash:
621 case s_req_schema_slash_slash:
622 case s_req_server_start:
623 case s_req_server:
624 case s_req_server_with_at:
625 case s_req_query_string_start:
626 case s_req_query_string:
627 case s_req_fragment_start:
628 case s_req_fragment:
629 url_mark = data;
630 break;
631 case s_res_status:
632 status_mark = data;
633 break;
634 }
635
636 for (p=data; p != data + len; p++) {
637 ch = *p;
638
639 if (PARSING_HEADER(parser->state)) {
640 ++parser->nread;
641 /* Don't allow the total size of the HTTP headers (including the status
642 * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
643 * embedders against denial-of-service attacks where the attacker feeds
644 * us a never-ending header that the embedder keeps buffering.
645 *
646 * This check is arguably the responsibility of embedders but we're doing
647 * it on the embedder's behalf because most won't bother and this way we
648 * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
649 * than any reasonable request or response so this should never affect
650 * day-to-day operation.
651 */
652 if (parser->nread > HTTP_MAX_HEADER_SIZE) {
653 SET_ERRNO(HPE_HEADER_OVERFLOW);
654 goto error;
655 }
656 }
657
658 reexecute_byte:
659 switch (parser->state) {
660
661 case s_dead:
662 /* this state is used after a 'Connection: close' message
663 * the parser will error out if it reads another message
664 */
665 if (ch == CR || ch == LF)
666 break;
667
668 SET_ERRNO(HPE_CLOSED_CONNECTION);
669 goto error;
670
671 case s_start_req_or_res:
672 {
673 if (ch == CR || ch == LF)
674 break;
675 parser->flags = 0;
676 parser->content_length = ULLONG_MAX;
677
678 if (ch == 'H') {
679 parser->state = s_res_or_resp_H;
680
681 CALLBACK_NOTIFY(message_begin);
682 } else {
683 parser->type = HTTP_REQUEST;
684 parser->state = s_start_req;
685 goto reexecute_byte;
686 }
687
688 break;
689 }
690
691 case s_res_or_resp_H:
692 if (ch == 'T') {
693 parser->type = HTTP_RESPONSE;
694 parser->state = s_res_HT;
695 } else {
696 if (ch != 'E') {
697 SET_ERRNO(HPE_INVALID_CONSTANT);
698 goto error;
699 }
700
701 parser->type = HTTP_REQUEST;
702 parser->method = HTTP_HEAD;
703 parser->index = 2;
704 parser->state = s_req_method;
705 }
706 break;
707
708 case s_start_res:
709 {
710 parser->flags = 0;
711 parser->content_length = ULLONG_MAX;
712
713 switch (ch) {
714 case 'H':
715 parser->state = s_res_H;
716 break;
717
718 case CR:
719 case LF:
720 break;
721
722 default:
723 SET_ERRNO(HPE_INVALID_CONSTANT);
724 goto error;
725 }
726
727 CALLBACK_NOTIFY(message_begin);
728 break;
729 }
730
731 case s_res_H:
732 STRICT_CHECK(ch != 'T');
733 parser->state = s_res_HT;
734 break;
735
736 case s_res_HT:
737 STRICT_CHECK(ch != 'T');
738 parser->state = s_res_HTT;
739 break;
740
741 case s_res_HTT:
742 STRICT_CHECK(ch != 'P');
743 parser->state = s_res_HTTP;
744 break;
745
746 case s_res_HTTP:
747 STRICT_CHECK(ch != '/');
748 parser->state = s_res_first_http_major;
749 break;
750
751 case s_res_first_http_major:
752 if (ch < '0' || ch > '9') {
753 SET_ERRNO(HPE_INVALID_VERSION);
754 goto error;
755 }
756
757 parser->http_major = ch - '0';
758 parser->state = s_res_http_major;
759 break;
760
761 /* major HTTP version or dot */
762 case s_res_http_major:
763 {
764 if (ch == '.') {
765 parser->state = s_res_first_http_minor;
766 break;
767 }
768
769 if (!IS_NUM(ch)) {
770 SET_ERRNO(HPE_INVALID_VERSION);
771 goto error;
772 }
773
774 parser->http_major *= 10;
775 parser->http_major += ch - '0';
776
777 if (parser->http_major > 999) {
778 SET_ERRNO(HPE_INVALID_VERSION);
779 goto error;
780 }
781
782 break;
783 }
784
785 /* first digit of minor HTTP version */
786 case s_res_first_http_minor:
787 if (!IS_NUM(ch)) {
788 SET_ERRNO(HPE_INVALID_VERSION);
789 goto error;
790 }
791
792 parser->http_minor = ch - '0';
793 parser->state = s_res_http_minor;
794 break;
795
796 /* minor HTTP version or end of request line */
797 case s_res_http_minor:
798 {
799 if (ch == ' ') {
800 parser->state = s_res_first_status_code;
801 break;
802 }
803
804 if (!IS_NUM(ch)) {
805 SET_ERRNO(HPE_INVALID_VERSION);
806 goto error;
807 }
808
809 parser->http_minor *= 10;
810 parser->http_minor += ch - '0';
811
812 if (parser->http_minor > 999) {
813 SET_ERRNO(HPE_INVALID_VERSION);
814 goto error;
815 }
816
817 break;
818 }
819
820 case s_res_first_status_code:
821 {
822 if (!IS_NUM(ch)) {
823 if (ch == ' ') {
824 break;
825 }
826
827 SET_ERRNO(HPE_INVALID_STATUS);
828 goto error;
829 }
830 parser->status_code = ch - '0';
831 parser->state = s_res_status_code;
832 break;
833 }
834
835 case s_res_status_code:
836 {
837 if (!IS_NUM(ch)) {
838 switch (ch) {
839 case ' ':
840 parser->state = s_res_status_start;
841 break;
842 case CR:
843 parser->state = s_res_line_almost_done;
844 break;
845 case LF:
846 parser->state = s_header_field_start;
847 break;
848 default:
849 SET_ERRNO(HPE_INVALID_STATUS);
850 goto error;
851 }
852 break;
853 }
854
855 parser->status_code *= 10;
856 parser->status_code += ch - '0';
857
858 if (parser->status_code > 999) {
859 SET_ERRNO(HPE_INVALID_STATUS);
860 goto error;
861 }
862
863 break;
864 }
865
866 case s_res_status_start:
867 {
868 if (ch == CR) {
869 parser->state = s_res_line_almost_done;
870 break;
871 }
872
873 if (ch == LF) {
874 parser->state = s_header_field_start;
875 break;
876 }
877
878 MARK(status);
879 parser->state = s_res_status;
880 parser->index = 0;
881 break;
882 }
883
884 case s_res_status:
885 if (ch == CR) {
886 parser->state = s_res_line_almost_done;
887 CALLBACK_DATA(status);
888 break;
889 }
890
891 if (ch == LF) {
892 parser->state = s_header_field_start;
893 CALLBACK_DATA(status);
894 break;
895 }
896
897 break;
898
899 case s_res_line_almost_done:
900 STRICT_CHECK(ch != LF);
901 parser->state = s_header_field_start;
902 break;
903
904 case s_start_req:
905 {
906 if (ch == CR || ch == LF)
907 break;
908 parser->flags = 0;
909 parser->content_length = ULLONG_MAX;
910
911 if (!IS_ALPHA(ch)) {
912 SET_ERRNO(HPE_INVALID_METHOD);
913 goto error;
914 }
915
916 parser->method = (enum http_method) 0;
917 parser->index = 1;
918 switch (ch) {
919 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
920 case 'D': parser->method = HTTP_DELETE; break;
921 case 'G': parser->method = HTTP_GET; break;
922 case 'H': parser->method = HTTP_HEAD; break;
923 case 'L': parser->method = HTTP_LOCK; break;
924 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
925 case 'N': parser->method = HTTP_NOTIFY; break;
926 case 'O': parser->method = HTTP_OPTIONS; break;
927 case 'P': parser->method = HTTP_POST;
928 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
929 break;
930 case 'R': parser->method = HTTP_REPORT; break;
931 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH or SYMBOLS */ break;
932 case 'T': parser->method = HTTP_TRACE; break;
933 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
934 default:
935 SET_ERRNO(HPE_INVALID_METHOD);
936 goto error;
937 }
938 parser->state = s_req_method;
939
940 CALLBACK_NOTIFY(message_begin);
941
942 break;
943 }
944
945 case s_req_method:
946 {
947 const char *matcher;
948 if (ch == '\0') {
949 SET_ERRNO(HPE_INVALID_METHOD);
950 goto error;
951 }
952
953 matcher = method_strings[parser->method];
954 if (ch == ' ' && matcher[parser->index] == '\0') {
955 if (parser->method != HTTP_SYMBOLS && parser->method != HTTP_CHECK) {
956 parser->state = s_req_spaces_before_url;
957 }
958 else {
959 parser->state = s_req_spamc_start;
960 }
961 } else if (ch == matcher[parser->index]) {
962 ; /* nada */
963 } else if (parser->method == HTTP_CONNECT) {
964 if (parser->index == 1 && ch == 'H') {
965 /* XXX: CHECKOUT has been removed */
966 parser->method = HTTP_CHECK;
967 } else if (parser->index == 2 && ch == 'P') {
968 parser->method = HTTP_COPY;
969 } else {
970 SET_ERRNO(HPE_INVALID_METHOD);
971 goto error;
972 }
973 } else if (parser->method == HTTP_MKCOL) {
974 if (parser->index == 1 && ch == 'O') {
975 parser->method = HTTP_MOVE;
976 } else if (parser->index == 1 && ch == 'E') {
977 parser->method = HTTP_MERGE;
978 } else if (parser->index == 1 && ch == '-') {
979 parser->method = HTTP_MSEARCH;
980 } else if (parser->index == 2 && ch == 'A') {
981 parser->method = HTTP_MKACTIVITY;
982 } else {
983 SET_ERRNO(HPE_INVALID_METHOD);
984 goto error;
985 }
986 } else if (parser->method == HTTP_SUBSCRIBE) {
987 if (parser->index == 1 && ch == 'E') {
988 parser->method = HTTP_SEARCH;
989 } else if (ch == 'Y') {
990 parser->method = HTTP_SYMBOLS;
991 }
992 else {
993 SET_ERRNO(HPE_INVALID_METHOD);
994 goto error;
995 }
996 } else if (parser->index == 1 && parser->method == HTTP_POST) {
997 if (ch == 'R') {
998 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
999 } else if (ch == 'U') {
1000 parser->method = HTTP_PUT; /* or HTTP_PURGE */
1001 } else if (ch == 'A') {
1002 parser->method = HTTP_PATCH;
1003 } else {
1004 SET_ERRNO(HPE_INVALID_METHOD);
1005 goto error;
1006 }
1007 } else if (parser->index == 2) {
1008 if (parser->method == HTTP_PUT) {
1009 if (ch == 'R') {
1010 parser->method = HTTP_PURGE;
1011 } else {
1012 SET_ERRNO(HPE_INVALID_METHOD);
1013 goto error;
1014 }
1015 } else if (parser->method == HTTP_UNLOCK) {
1016 if (ch == 'S') {
1017 parser->method = HTTP_UNSUBSCRIBE;
1018 } else {
1019 SET_ERRNO(HPE_INVALID_METHOD);
1020 goto error;
1021 }
1022 } else {
1023 SET_ERRNO(HPE_INVALID_METHOD);
1024 goto error;
1025 }
1026 } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
1027 parser->method = HTTP_PROPPATCH;
1028 } else {
1029 SET_ERRNO(HPE_INVALID_METHOD);
1030 goto error;
1031 }
1032
1033 ++parser->index;
1034 break;
1035 }
1036
1037 case s_req_spaces_before_url:
1038 {
1039 if (ch == ' ') break;
1040
1041 MARK(url);
1042 if (parser->method == HTTP_CONNECT) {
1043 parser->state = s_req_server_start;
1044 }
1045
1046 parser->state = parse_url_char((enum state)parser->state, ch);
1047 if (parser->state == s_dead) {
1048 SET_ERRNO(HPE_INVALID_URL);
1049 goto error;
1050 }
1051
1052 break;
1053 }
1054
1055 case s_req_schema:
1056 case s_req_schema_slash:
1057 case s_req_schema_slash_slash:
1058 case s_req_server_start:
1059 {
1060 switch (ch) {
1061 /* No whitespace allowed here */
1062 case ' ':
1063 case CR:
1064 case LF:
1065 SET_ERRNO(HPE_INVALID_URL);
1066 goto error;
1067 default:
1068 parser->state = parse_url_char((enum state)parser->state, ch);
1069 if (parser->state == s_dead) {
1070 SET_ERRNO(HPE_INVALID_URL);
1071 goto error;
1072 }
1073 }
1074
1075 break;
1076 }
1077
1078 case s_req_server:
1079 case s_req_server_with_at:
1080 case s_req_path:
1081 case s_req_query_string_start:
1082 case s_req_query_string:
1083 case s_req_fragment_start:
1084 case s_req_fragment:
1085 {
1086 switch (ch) {
1087 case ' ':
1088 parser->state = s_req_http_start;
1089 CALLBACK_DATA(url);
1090 break;
1091 case CR:
1092 case LF:
1093 parser->http_major = 0;
1094 parser->http_minor = 9;
1095 parser->state = (ch == CR) ?
1096 s_req_line_almost_done :
1097 s_header_field_start;
1098 CALLBACK_DATA(url);
1099 break;
1100 default:
1101 parser->state = parse_url_char((enum state)parser->state, ch);
1102 if (parser->state == s_dead) {
1103 SET_ERRNO(HPE_INVALID_URL);
1104 goto error;
1105 }
1106 }
1107 break;
1108 }
1109
1110 case s_req_http_start:
1111 switch (ch) {
1112 case 'H':
1113 parser->state = s_req_http_H;
1114 break;
1115 case ' ':
1116 break;
1117 default:
1118 SET_ERRNO(HPE_INVALID_CONSTANT);
1119 goto error;
1120 }
1121 break;
1122
1123 case s_req_http_H:
1124 STRICT_CHECK(ch != 'T');
1125 parser->state = s_req_http_HT;
1126 break;
1127
1128 case s_req_http_HT:
1129 STRICT_CHECK(ch != 'T');
1130 parser->state = s_req_http_HTT;
1131 break;
1132
1133 case s_req_http_HTT:
1134 STRICT_CHECK(ch != 'P');
1135 parser->state = s_req_http_HTTP;
1136 break;
1137
1138 case s_req_http_HTTP:
1139 STRICT_CHECK(ch != '/');
1140 parser->state = s_req_first_http_major;
1141 break;
1142
1143 /* first digit of major HTTP version */
1144 case s_req_first_http_major:
1145 if (ch < '1' || ch > '9') {
1146 SET_ERRNO(HPE_INVALID_VERSION);
1147 goto error;
1148 }
1149
1150 parser->http_major = ch - '0';
1151 parser->state = s_req_http_major;
1152 break;
1153
1154 /* major HTTP version or dot */
1155 case s_req_http_major:
1156 {
1157 if (ch == '.') {
1158 parser->state = s_req_first_http_minor;
1159 break;
1160 }
1161
1162 if (!IS_NUM(ch)) {
1163 SET_ERRNO(HPE_INVALID_VERSION);
1164 goto error;
1165 }
1166
1167 parser->http_major *= 10;
1168 parser->http_major += ch - '0';
1169
1170 if (parser->http_major > 999) {
1171 SET_ERRNO(HPE_INVALID_VERSION);
1172 goto error;
1173 }
1174
1175 break;
1176 }
1177
1178 /* first digit of minor HTTP version */
1179 case s_req_first_http_minor:
1180 if (!IS_NUM(ch)) {
1181 SET_ERRNO(HPE_INVALID_VERSION);
1182 goto error;
1183 }
1184
1185 parser->http_minor = ch - '0';
1186 parser->state = s_req_http_minor;
1187 break;
1188
1189 /* minor HTTP version or end of request line */
1190 case s_req_http_minor:
1191 {
1192 if (ch == CR) {
1193 parser->state = s_req_line_almost_done;
1194 break;
1195 }
1196
1197 if (ch == LF) {
1198 parser->state = s_header_field_start;
1199 break;
1200 }
1201
1202 /* XXX allow spaces after digit? */
1203
1204 if (!IS_NUM(ch)) {
1205 SET_ERRNO(HPE_INVALID_VERSION);
1206 goto error;
1207 }
1208
1209 parser->http_minor *= 10;
1210 parser->http_minor += ch - '0';
1211
1212 if (parser->http_minor > 999) {
1213 SET_ERRNO(HPE_INVALID_VERSION);
1214 goto error;
1215 }
1216
1217 break;
1218 }
1219 case s_req_spamc_start: {
1220 if (ch == 'S') {
1221 parser->flags |= F_SPAMC;
1222 parser->state = s_req_spamc;
1223 }
1224 else if (ch == 'R') {
1225 parser->state = s_req_spamc;
1226 }
1227 else if (ch != ' ') {
1228 SET_ERRNO(HPE_INVALID_CONSTANT);
1229 goto error;
1230 }
1231
1232 break;
1233 }
1234
1235 case s_req_spamc:
1236 {
1237 if (ch == CR) {
1238 parser->state = s_req_line_almost_done;
1239 }
1240 else if (ch == LF) {
1241 parser->state = s_header_field_start;
1242 }
1243 break;
1244 }
1245
1246 /* end of request line */
1247 case s_req_line_almost_done:
1248 {
1249 if (ch != LF) {
1250 SET_ERRNO(HPE_LF_EXPECTED);
1251 goto error;
1252 }
1253
1254 parser->state = s_header_field_start;
1255 break;
1256 }
1257
1258 case s_header_field_start:
1259 {
1260 if (ch == CR) {
1261 parser->state = s_headers_almost_done;
1262 break;
1263 }
1264
1265 if (ch == LF) {
1266 /* they might be just sending \n instead of \r\n so this would be
1267 * the second \n to denote the end of headers*/
1268 parser->state = s_headers_almost_done;
1269 goto reexecute_byte;
1270 }
1271
1272 c = TOKEN(ch);
1273
1274 if (!c) {
1275 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1276 goto error;
1277 }
1278
1279 MARK(header_field);
1280
1281 parser->index = 0;
1282 parser->state = s_header_field;
1283
1284 switch (c) {
1285 case 'c':
1286 parser->header_state = h_C;
1287 break;
1288
1289 case 'p':
1290 parser->header_state = h_matching_proxy_connection;
1291 break;
1292
1293 case 't':
1294 parser->header_state = h_matching_transfer_encoding;
1295 break;
1296
1297 case 'u':
1298 parser->header_state = h_matching_upgrade;
1299 break;
1300
1301 default:
1302 parser->header_state = h_general;
1303 break;
1304 }
1305 break;
1306 }
1307
1308 case s_header_field:
1309 {
1310 c = TOKEN(ch);
1311
1312 if (c) {
1313 switch (parser->header_state) {
1314 case h_general:
1315 break;
1316
1317 case h_C:
1318 parser->index++;
1319 parser->header_state = (c == 'o' ? h_CO : h_general);
1320 break;
1321
1322 case h_CO:
1323 parser->index++;
1324 parser->header_state = (c == 'n' ? h_CON : h_general);
1325 break;
1326
1327 case h_CON:
1328 parser->index++;
1329 switch (c) {
1330 case 'n':
1331 parser->header_state = h_matching_connection;
1332 break;
1333 case 't':
1334 parser->header_state = h_matching_content_length;
1335 break;
1336 default:
1337 parser->header_state = h_general;
1338 break;
1339 }
1340 break;
1341
1342 /* connection */
1343
1344 case h_matching_connection:
1345 parser->index++;
1346 if (parser->index > sizeof(CONNECTION)-1
1347 || c != CONNECTION[parser->index]) {
1348 parser->header_state = h_general;
1349 } else if (parser->index == sizeof(CONNECTION)-2) {
1350 parser->header_state = h_connection;
1351 }
1352 break;
1353
1354 /* proxy-connection */
1355
1356 case h_matching_proxy_connection:
1357 parser->index++;
1358 if (parser->index > sizeof(PROXY_CONNECTION)-1
1359 || c != PROXY_CONNECTION[parser->index]) {
1360 parser->header_state = h_general;
1361 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1362 parser->header_state = h_connection;
1363 }
1364 break;
1365
1366 /* content-length */
1367
1368 case h_matching_content_length:
1369 parser->index++;
1370 if (parser->index > sizeof(CONTENT_LENGTH)-1
1371 || c != CONTENT_LENGTH[parser->index]) {
1372 parser->header_state = h_general;
1373 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1374 parser->header_state = h_content_length;
1375 }
1376 break;
1377
1378 /* transfer-encoding */
1379
1380 case h_matching_transfer_encoding:
1381 parser->index++;
1382 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1383 || c != TRANSFER_ENCODING[parser->index]) {
1384 parser->header_state = h_general;
1385 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1386 parser->header_state = h_transfer_encoding;
1387 }
1388 break;
1389
1390 /* upgrade */
1391
1392 case h_matching_upgrade:
1393 parser->index++;
1394 if (parser->index > sizeof(UPGRADE)-1
1395 || c != UPGRADE[parser->index]) {
1396 parser->header_state = h_general;
1397 } else if (parser->index == sizeof(UPGRADE)-2) {
1398 parser->header_state = h_upgrade;
1399 }
1400 break;
1401
1402 case h_connection:
1403 case h_content_length:
1404 case h_transfer_encoding:
1405 case h_upgrade:
1406 if (ch != ' ') parser->header_state = h_general;
1407 break;
1408
1409 default:
1410 assert(0 && "Unknown header_state");
1411 break;
1412 }
1413 break;
1414 }
1415
1416 if (ch == ':') {
1417 parser->state = s_header_value_start;
1418 CALLBACK_DATA(header_field);
1419 break;
1420 }
1421
1422 if (ch == CR) {
1423 parser->state = s_header_almost_done;
1424 CALLBACK_DATA(header_field);
1425 break;
1426 }
1427
1428 if (ch == LF) {
1429 parser->state = s_header_field_start;
1430 CALLBACK_DATA(header_field);
1431 break;
1432 }
1433
1434 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1435 goto error;
1436 }
1437
1438 case s_header_value_start:
1439 {
1440 if (ch == ' ' || ch == '\t') break;
1441
1442 MARK(header_value);
1443
1444 parser->state = s_header_value;
1445 parser->index = 0;
1446
1447 if (ch == CR) {
1448 parser->header_state = h_general;
1449 parser->state = s_header_almost_done;
1450 CALLBACK_DATA(header_value);
1451 break;
1452 }
1453
1454 if (ch == LF) {
1455 parser->state = s_header_field_start;
1456 CALLBACK_DATA(header_value);
1457 break;
1458 }
1459
1460 c = LOWER(ch);
1461
1462 switch (parser->header_state) {
1463 case h_upgrade:
1464 parser->flags |= F_UPGRADE;
1465 parser->header_state = h_general;
1466 break;
1467
1468 case h_transfer_encoding:
1469 /* looking for 'Transfer-Encoding: chunked' */
1470 if ('c' == c) {
1471 parser->header_state = h_matching_transfer_encoding_chunked;
1472 } else {
1473 parser->header_state = h_general;
1474 }
1475 break;
1476
1477 case h_content_length:
1478 if (!IS_NUM(ch)) {
1479 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1480 goto error;
1481 }
1482
1483 parser->content_length = ch - '0';
1484 break;
1485
1486 case h_connection:
1487 /* looking for 'Connection: keep-alive' */
1488 if (c == 'k') {
1489 parser->header_state = h_matching_connection_keep_alive;
1490 /* looking for 'Connection: close' */
1491 } else if (c == 'c') {
1492 parser->header_state = h_matching_connection_close;
1493 } else {
1494 parser->header_state = h_general;
1495 }
1496 break;
1497
1498 default:
1499 parser->header_state = h_general;
1500 break;
1501 }
1502 break;
1503 }
1504
1505 case s_header_value:
1506 {
1507
1508 if (ch == CR) {
1509 parser->state = s_header_almost_done;
1510 CALLBACK_DATA(header_value);
1511 break;
1512 }
1513
1514 if (ch == LF) {
1515 parser->state = s_header_almost_done;
1516 CALLBACK_DATA_NOADVANCE(header_value);
1517 goto reexecute_byte;
1518 }
1519
1520 c = LOWER(ch);
1521
1522 switch (parser->header_state) {
1523 case h_general:
1524 break;
1525
1526 case h_connection:
1527 case h_transfer_encoding:
1528 assert(0 && "Shouldn't get here.");
1529 break;
1530
1531 case h_content_length:
1532 {
1533 uint64_t t;
1534
1535 if (ch == ' ') break;
1536
1537 if (!IS_NUM(ch)) {
1538 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1539 goto error;
1540 }
1541
1542 t = parser->content_length;
1543 t *= 10;
1544 t += ch - '0';
1545
1546 /* Overflow? */
1547 if (t < parser->content_length || t == ULLONG_MAX) {
1548 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1549 goto error;
1550 }
1551
1552 parser->content_length = t;
1553 break;
1554 }
1555
1556 /* Transfer-Encoding: chunked */
1557 case h_matching_transfer_encoding_chunked:
1558 parser->index++;
1559 if (parser->index > sizeof(CHUNKED)-1
1560 || c != CHUNKED[parser->index]) {
1561 parser->header_state = h_general;
1562 } else if (parser->index == sizeof(CHUNKED)-2) {
1563 parser->header_state = h_transfer_encoding_chunked;
1564 }
1565 break;
1566
1567 /* looking for 'Connection: keep-alive' */
1568 case h_matching_connection_keep_alive:
1569 parser->index++;
1570 if (parser->index > sizeof(KEEP_ALIVE)-1
1571 || c != KEEP_ALIVE[parser->index]) {
1572 parser->header_state = h_general;
1573 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1574 parser->header_state = h_connection_keep_alive;
1575 }
1576 break;
1577
1578 /* looking for 'Connection: close' */
1579 case h_matching_connection_close:
1580 parser->index++;
1581 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1582 parser->header_state = h_general;
1583 } else if (parser->index == sizeof(CLOSE)-2) {
1584 parser->header_state = h_connection_close;
1585 }
1586 break;
1587
1588 case h_transfer_encoding_chunked:
1589 case h_connection_keep_alive:
1590 case h_connection_close:
1591 if (ch != ' ') parser->header_state = h_general;
1592 break;
1593
1594 default:
1595 parser->state = s_header_value;
1596 parser->header_state = h_general;
1597 break;
1598 }
1599 break;
1600 }
1601
1602 case s_header_almost_done:
1603 {
1604 STRICT_CHECK(ch != LF);
1605
1606 parser->state = s_header_value_lws;
1607
1608 switch (parser->header_state) {
1609 case h_connection_keep_alive:
1610 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1611 break;
1612 case h_connection_close:
1613 /* XXX: not needed for rspamd parser->flags |= F_CONNECTION_CLOSE; */
1614 break;
1615 case h_transfer_encoding_chunked:
1616 parser->flags |= F_CHUNKED;
1617 break;
1618 default:
1619 break;
1620 }
1621
1622 break;
1623 }
1624
1625 case s_header_value_lws:
1626 {
1627 if (ch == ' ' || ch == '\t')
1628 parser->state = s_header_value_start;
1629 else
1630 {
1631 parser->state = s_header_field_start;
1632 goto reexecute_byte;
1633 }
1634 break;
1635 }
1636
1637 case s_headers_almost_done:
1638 {
1639 STRICT_CHECK(ch != LF);
1640
1641 if (parser->flags & F_TRAILING) {
1642 /* End of a chunked request */
1643 parser->state = NEW_MESSAGE();
1644 CALLBACK_NOTIFY(message_complete);
1645 break;
1646 }
1647
1648 parser->state = s_headers_done;
1649
1650 /* Set this here so that on_headers_complete() callbacks can see it */
1651 parser->upgrade =
1652 (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1653
1654 /* Here we call the headers_complete callback. This is somewhat
1655 * different than other callbacks because if the user returns 1, we
1656 * will interpret that as saying that this message has no body. This
1657 * is needed for the annoying case of receiving a response to a HEAD
1658 * request.
1659 *
1660 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1661 * we have to simulate it by handling a change in errno below.
1662 */
1663 if (settings->on_headers_complete) {
1664 switch (settings->on_headers_complete(parser)) {
1665 case 0:
1666 break;
1667
1668 case 1:
1669 parser->flags |= F_SKIPBODY;
1670 break;
1671
1672 default:
1673 SET_ERRNO(HPE_CB_headers_complete);
1674 return p - data; /* Error */
1675 }
1676 }
1677
1678 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1679 return p - data;
1680 }
1681
1682 goto reexecute_byte;
1683 }
1684
1685 case s_headers_done:
1686 {
1687 STRICT_CHECK(ch != LF);
1688
1689 parser->nread = 0;
1690
1691 /* Exit, the rest of the connect is in a different protocol. */
1692 if (parser->upgrade) {
1693 parser->state = NEW_MESSAGE();
1694 CALLBACK_NOTIFY(message_complete);
1695 return (p - data) + 1;
1696 }
1697
1698 if (parser->flags & F_SKIPBODY) {
1699 parser->state = NEW_MESSAGE();
1700 CALLBACK_NOTIFY(message_complete);
1701 } else if (parser->flags & F_CHUNKED) {
1702 /* chunked encoding - ignore Content-Length header */
1703 parser->state = s_chunk_size_start;
1704 } else {
1705 if (parser->content_length == 0) {
1706 /* Content-Length header given but zero: Content-Length: 0\r\n */
1707 parser->state = NEW_MESSAGE();
1708 CALLBACK_NOTIFY(message_complete);
1709 } else if (parser->content_length != ULLONG_MAX) {
1710 /* Content-Length header given and non-zero */
1711 parser->state = s_body_identity;
1712 } else {
1713 if (parser->type == HTTP_REQUEST ||
1714 !http_message_needs_eof(parser)) {
1715 /* Assume content-length 0 - read the next */
1716 parser->state = NEW_MESSAGE();
1717 CALLBACK_NOTIFY(message_complete);
1718 } else {
1719 /* Read body until EOF */
1720 parser->state = s_body_identity_eof;
1721 }
1722 }
1723 }
1724
1725 break;
1726 }
1727
1728 case s_body_identity:
1729 {
1730 uint64_t to_read = MIN(parser->content_length,
1731 (uint64_t) ((data + len) - p));
1732
1733 assert(parser->content_length != 0
1734 && parser->content_length != ULLONG_MAX);
1735
1736 /* The difference between advancing content_length and p is because
1737 * the latter will automatically advance on the next loop iteration.
1738 * Further, if content_length ends up at 0, we want to see the last
1739 * byte again for our message complete callback.
1740 */
1741 MARK(body);
1742 parser->content_length -= to_read;
1743 p += to_read - 1;
1744
1745 if (parser->content_length == 0) {
1746 parser->state = s_message_done;
1747
1748 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1749 *
1750 * The alternative to doing this is to wait for the next byte to
1751 * trigger the data callback, just as in every other case. The
1752 * problem with this is that this makes it difficult for the test
1753 * harness to distinguish between complete-on-EOF and
1754 * complete-on-length. It's not clear that this distinction is
1755 * important for applications, but let's keep it for now.
1756 */
1757 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1758 goto reexecute_byte;
1759 }
1760
1761 break;
1762 }
1763
1764 /* read until EOF */
1765 case s_body_identity_eof:
1766 MARK(body);
1767 p = data + len - 1;
1768
1769 break;
1770
1771 case s_message_done:
1772 parser->state = NEW_MESSAGE();
1773 CALLBACK_NOTIFY(message_complete);
1774 break;
1775
1776 case s_chunk_size_start:
1777 {
1778 assert(parser->nread == 1);
1779 assert(parser->flags & F_CHUNKED);
1780
1781 unhex_val = unhex[(unsigned char)ch];
1782 if (unhex_val == -1) {
1783 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1784 goto error;
1785 }
1786
1787 parser->content_length = unhex_val;
1788 parser->state = s_chunk_size;
1789 break;
1790 }
1791
1792 case s_chunk_size:
1793 {
1794 uint64_t t;
1795
1796 assert(parser->flags & F_CHUNKED);
1797
1798 if (ch == CR) {
1799 parser->state = s_chunk_size_almost_done;
1800 break;
1801 }
1802
1803 unhex_val = unhex[(unsigned char)ch];
1804
1805 if (unhex_val == -1) {
1806 if (ch == ';' || ch == ' ') {
1807 parser->state = s_chunk_parameters;
1808 break;
1809 }
1810
1811 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1812 goto error;
1813 }
1814
1815 t = parser->content_length;
1816 t *= 16;
1817 t += unhex_val;
1818
1819 /* Overflow? */
1820 if (t < parser->content_length || t == ULLONG_MAX) {
1821 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1822 goto error;
1823 }
1824
1825 parser->content_length = t;
1826 break;
1827 }
1828
1829 case s_chunk_parameters:
1830 {
1831 assert(parser->flags & F_CHUNKED);
1832 /* just ignore this shit. TODO check for overflow */
1833 if (ch == CR) {
1834 parser->state = s_chunk_size_almost_done;
1835 break;
1836 }
1837 break;
1838 }
1839
1840 case s_chunk_size_almost_done:
1841 {
1842 assert(parser->flags & F_CHUNKED);
1843 STRICT_CHECK(ch != LF);
1844
1845 parser->nread = 0;
1846
1847 if (parser->content_length == 0) {
1848 parser->flags |= F_TRAILING;
1849 parser->state = s_header_field_start;
1850 } else {
1851 parser->state = s_chunk_data;
1852 }
1853 break;
1854 }
1855
1856 case s_chunk_data:
1857 {
1858 uint64_t to_read = MIN(parser->content_length,
1859 (uint64_t) ((data + len) - p));
1860
1861 assert(parser->flags & F_CHUNKED);
1862 assert(parser->content_length != 0
1863 && parser->content_length != ULLONG_MAX);
1864
1865 /* See the explanation in s_body_identity for why the content
1866 * length and data pointers are managed this way.
1867 */
1868 MARK(body);
1869 parser->content_length -= to_read;
1870 p += to_read - 1;
1871
1872 if (parser->content_length == 0) {
1873 parser->state = s_chunk_data_almost_done;
1874 }
1875
1876 break;
1877 }
1878
1879 case s_chunk_data_almost_done:
1880 assert(parser->flags & F_CHUNKED);
1881 assert(parser->content_length == 0);
1882 STRICT_CHECK(ch != CR);
1883 parser->state = s_chunk_data_done;
1884 CALLBACK_DATA(body);
1885 break;
1886
1887 case s_chunk_data_done:
1888 assert(parser->flags & F_CHUNKED);
1889 STRICT_CHECK(ch != LF);
1890 parser->nread = 0;
1891 parser->state = s_chunk_size_start;
1892 break;
1893
1894 default:
1895 assert(0 && "unhandled state");
1896 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1897 goto error;
1898 }
1899 }
1900
1901 /* Run callbacks for any marks that we have leftover after we ran our of
1902 * bytes. There should be at most one of these set, so it's OK to invoke
1903 * them in series (unset marks will not result in callbacks).
1904 *
1905 * We use the NOADVANCE() variety of callbacks here because 'p' has already
1906 * overflowed 'data' and this allows us to correct for the off-by-one that
1907 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1908 * value that's in-bounds).
1909 */
1910
1911 assert(((header_field_mark ? 1 : 0) +
1912 (header_value_mark ? 1 : 0) +
1913 (url_mark ? 1 : 0) +
1914 (body_mark ? 1 : 0) +
1915 (status_mark ? 1 : 0)) <= 1);
1916
1917 CALLBACK_DATA_NOADVANCE(header_field);
1918 CALLBACK_DATA_NOADVANCE(header_value);
1919 CALLBACK_DATA_NOADVANCE(url);
1920 CALLBACK_DATA_NOADVANCE(body);
1921 CALLBACK_DATA_NOADVANCE(status);
1922
1923 return len;
1924
1925 error:
1926 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1927 SET_ERRNO(HPE_UNKNOWN);
1928 }
1929
1930 return (p - data);
1931 }
1932
1933
1934 /* Does the parser need to see an EOF to find the end of the message? */
1935 int
http_message_needs_eof(const http_parser * parser)1936 http_message_needs_eof (const http_parser *parser)
1937 {
1938 if (parser->type == HTTP_REQUEST) {
1939 return 0;
1940 }
1941
1942 /* See RFC 2616 section 4.4 */
1943 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1944 parser->status_code == 204 || /* No Content */
1945 parser->status_code == 304 || /* Not Modified */
1946 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1947 return 0;
1948 }
1949
1950 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1951 return 0;
1952 }
1953
1954 return 1;
1955 }
1956
1957
1958 int
http_should_keep_alive(const http_parser * parser)1959 http_should_keep_alive (const http_parser *parser)
1960 {
1961 if (parser->http_major > 0 && parser->http_minor > 0) {
1962 /* HTTP/1.1 */
1963 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1964 return 0;
1965 }
1966 } else {
1967 /* HTTP/1.0 or earlier */
1968 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1969 return 0;
1970 }
1971 }
1972
1973 return !http_message_needs_eof(parser);
1974 }
1975
1976
1977 const char *
http_method_str(enum http_method m)1978 http_method_str (enum http_method m)
1979 {
1980 return ELEM_AT(method_strings, m, "<unknown>");
1981 }
1982
1983
1984 void
http_parser_init(http_parser * parser,int t)1985 http_parser_init (http_parser *parser, int t)
1986 {
1987 void *data = parser->data; /* preserve application data */
1988 memset(parser, 0, sizeof(*parser));
1989 parser->data = data;
1990 parser->type = t;
1991 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1992 parser->http_errno = HPE_OK;
1993 }
1994
1995 const char *
http_errno_name(enum http_errno err)1996 http_errno_name(enum http_errno err) {
1997 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1998 return http_strerror_tab[err].name;
1999 }
2000
2001 const char *
http_errno_description(enum http_errno err)2002 http_errno_description(enum http_errno err) {
2003 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2004 return http_strerror_tab[err].description;
2005 }
2006
2007 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2008 http_parse_host_char(enum http_host_state s, const char ch) {
2009 switch(s) {
2010 case s_http_userinfo:
2011 case s_http_userinfo_start:
2012 if (ch == '@') {
2013 return s_http_host_start;
2014 }
2015
2016 if (IS_USERINFO_CHAR(ch)) {
2017 return s_http_userinfo;
2018 }
2019 break;
2020
2021 case s_http_host_start:
2022 if (ch == '[') {
2023 return s_http_host_v6_start;
2024 }
2025
2026 if (IS_HOST_CHAR(ch)) {
2027 return s_http_host;
2028 }
2029
2030 break;
2031
2032 case s_http_host:
2033 if (IS_HOST_CHAR(ch)) {
2034 return s_http_host;
2035 }
2036
2037 /* FALLTHROUGH */
2038 case s_http_host_v6_end:
2039 if (ch == ':') {
2040 return s_http_host_port_start;
2041 }
2042
2043 break;
2044
2045 case s_http_host_v6:
2046 if (ch == ']') {
2047 return s_http_host_v6_end;
2048 }
2049
2050 /* FALLTHROUGH */
2051 case s_http_host_v6_start:
2052 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2053 return s_http_host_v6;
2054 }
2055
2056 break;
2057
2058 case s_http_host_port:
2059 case s_http_host_port_start:
2060 if (IS_NUM(ch)) {
2061 return s_http_host_port;
2062 }
2063
2064 break;
2065
2066 default:
2067 break;
2068 }
2069 return s_http_host_dead;
2070 }
2071
2072 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2073 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2074 enum http_host_state s;
2075
2076 const char *p;
2077 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2078
2079 u->field_data[UF_HOST].len = 0;
2080
2081 s = found_at ? s_http_userinfo_start : s_http_host_start;
2082
2083 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2084 enum http_host_state new_s = http_parse_host_char(s, *p);
2085
2086 if (new_s == s_http_host_dead) {
2087 return 1;
2088 }
2089
2090 switch(new_s) {
2091 case s_http_host:
2092 if (s != s_http_host) {
2093 u->field_data[UF_HOST].off = p - buf;
2094 }
2095 u->field_data[UF_HOST].len++;
2096 break;
2097
2098 case s_http_host_v6:
2099 if (s != s_http_host_v6) {
2100 u->field_data[UF_HOST].off = p - buf;
2101 }
2102 u->field_data[UF_HOST].len++;
2103 break;
2104
2105 case s_http_host_port:
2106 if (s != s_http_host_port) {
2107 u->field_data[UF_PORT].off = p - buf;
2108 u->field_data[UF_PORT].len = 0;
2109 u->field_set |= (1 << UF_PORT);
2110 }
2111 u->field_data[UF_PORT].len++;
2112 break;
2113
2114 case s_http_userinfo:
2115 if (s != s_http_userinfo) {
2116 u->field_data[UF_USERINFO].off = p - buf ;
2117 u->field_data[UF_USERINFO].len = 0;
2118 u->field_set |= (1 << UF_USERINFO);
2119 }
2120 u->field_data[UF_USERINFO].len++;
2121 break;
2122
2123 default:
2124 break;
2125 }
2126 s = new_s;
2127 }
2128
2129 /* Make sure we don't end somewhere unexpected */
2130 switch (s) {
2131 case s_http_host_start:
2132 case s_http_host_v6_start:
2133 case s_http_host_v6:
2134 case s_http_host_port_start:
2135 case s_http_userinfo:
2136 case s_http_userinfo_start:
2137 return 1;
2138 default:
2139 break;
2140 }
2141
2142 return 0;
2143 }
2144
2145 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2146 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2147 struct http_parser_url *u)
2148 {
2149 enum state s;
2150 const char *p;
2151 enum http_parser_url_fields uf, old_uf;
2152 int found_at = 0;
2153
2154 u->port = u->field_set = 0;
2155 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2156 uf = old_uf = UF_MAX;
2157
2158 for (p = buf; p < buf + buflen; p++) {
2159 s = parse_url_char(s, *p);
2160
2161 /* Figure out the next field that we're operating on */
2162 switch (s) {
2163 case s_dead:
2164 return 1;
2165
2166 /* Skip delimiters */
2167 case s_req_schema_slash:
2168 case s_req_schema_slash_slash:
2169 case s_req_server_start:
2170 case s_req_query_string_start:
2171 case s_req_fragment_start:
2172 continue;
2173
2174 case s_req_schema:
2175 uf = UF_SCHEMA;
2176 break;
2177
2178 case s_req_server_with_at:
2179 found_at = 1;
2180
2181 /* FALLTROUGH */
2182 case s_req_server:
2183 uf = UF_HOST;
2184 break;
2185
2186 case s_req_path:
2187 uf = UF_PATH;
2188 break;
2189
2190 case s_req_query_string:
2191 uf = UF_QUERY;
2192 break;
2193
2194 case s_req_fragment:
2195 uf = UF_FRAGMENT;
2196 break;
2197
2198 default:
2199 assert(!"Unexpected state");
2200 return 1;
2201 }
2202
2203 /* Nothing's changed; soldier on */
2204 if (uf == old_uf) {
2205 u->field_data[uf].len++;
2206 continue;
2207 }
2208
2209 u->field_data[uf].off = p - buf;
2210 u->field_data[uf].len = 1;
2211
2212 u->field_set |= (1 << uf);
2213 old_uf = uf;
2214 }
2215
2216 /* host must be present if there is a schema */
2217 /* parsing http:///toto will fail */
2218 if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2219 if (http_parse_host(buf, u, found_at) != 0) {
2220 return 1;
2221 }
2222 }
2223
2224 /* CONNECT requests can only contain "hostname:port" */
2225 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2226 return 1;
2227 }
2228
2229 if (u->field_set & (1 << UF_PORT)) {
2230 /* Don't bother with endp; we've already validated the string */
2231 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2232
2233 /* Ports have a max value of 2^16 */
2234 if (v > 0xffff) {
2235 return 1;
2236 }
2237
2238 u->port = (uint16_t) v;
2239 }
2240
2241 return 0;
2242 }
2243
2244 void
http_parser_pause(http_parser * parser,int paused)2245 http_parser_pause(http_parser *parser, int paused) {
2246 /* Users should only be pausing/unpausing a parser that is not in an error
2247 * state. In non-debug builds, there's not much that we can do about this
2248 * other than ignore it.
2249 */
2250 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2251 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2252 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2253 } else {
2254 assert(0 && "Attempting to pause parser in error state");
2255 }
2256 }
2257
2258 int
http_body_is_final(const struct http_parser * parser)2259 http_body_is_final(const struct http_parser *parser) {
2260 return parser->state == s_message_done;
2261 }
2262
2263 unsigned long
http_parser_version(void)2264 http_parser_version(void) {
2265 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2266 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2267 HTTP_PARSER_VERSION_PATCH * 0x00001;
2268 }
2269