1 /* Copyright Joyent, Inc. and other Node contributors.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21 #include "http_parser.h"
22 #include <assert.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include <limits.h>
27
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29
30 #ifndef ULLONG_MAX
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
32 #endif
33
34 #ifndef MIN
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
36 #endif
37
38 #ifndef ARRAY_SIZE
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
40 #endif
41
42 #ifndef BIT_AT
43 # define BIT_AT(a, i) \
44 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
45 (1 << ((unsigned int) (i) & 7))))
46 #endif
47
48 #ifndef ELEM_AT
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50 #endif
51
52 #define SET_ERRNO(e) \
53 do { \
54 parser->nread = nread; \
55 parser->http_errno = (e); \
56 } while(0)
57
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
60 #define RETURN(V) \
61 do { \
62 parser->nread = nread; \
63 parser->state = CURRENT_STATE(); \
64 return (V); \
65 } while (0);
66 #define REEXECUTE() \
67 goto reexecute; \
68
69
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77
78
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER) \
81 do { \
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
83 \
84 if (LIKELY(settings->on_##FOR)) { \
85 parser->state = CURRENT_STATE(); \
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
87 SET_ERRNO(HPE_CB_##FOR); \
88 } \
89 UPDATE_STATE(parser->state); \
90 \
91 /* We either errored above or got paused; get out */ \
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
93 return (ER); \
94 } \
95 } \
96 } while (0)
97
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
100
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
103
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
106 do { \
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
108 \
109 if (FOR##_mark) { \
110 if (LIKELY(settings->on_##FOR)) { \
111 parser->state = CURRENT_STATE(); \
112 if (UNLIKELY(0 != \
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114 SET_ERRNO(HPE_CB_##FOR); \
115 } \
116 UPDATE_STATE(parser->state); \
117 \
118 /* We either errored above or got paused; get out */ \
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
120 return (ER); \
121 } \
122 } \
123 FOR##_mark = NULL; \
124 } \
125 } while (0)
126
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR) \
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR) \
137 do { \
138 if (!FOR##_mark) { \
139 FOR##_mark = p; \
140 } \
141 } while (0)
142
143 /* Don't allow the total size of the HTTP headers (including the status
144 * line) to exceed max_header_size. This check is here to protect
145 * embedders against denial-of-service attacks where the attacker feeds
146 * us a never-ending header that the embedder keeps buffering.
147 *
148 * This check is arguably the responsibility of embedders but we're doing
149 * it on the embedder's behalf because most won't bother and this way we
150 * make the web a little safer. max_header_size is still far bigger
151 * than any reasonable request or response so this should never affect
152 * day-to-day operation.
153 */
154 #define COUNT_HEADER_SIZE(V) \
155 do { \
156 nread += (uint32_t)(V); \
157 if (UNLIKELY(nread > max_header_size)) { \
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
159 goto error; \
160 } \
161 } while (0)
162
163
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172
173
174 static const char *method_strings[] =
175 {
176 #define XX(num, name, string) #string,
177 HTTP_METHOD_MAP(XX)
178 #undef XX
179 };
180
181
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183 * token = 1*<any CHAR except CTLs or separators>
184 * separators = "(" | ")" | "<" | ">" | "@"
185 * | "," | ";" | ":" | "\" | <">
186 * | "/" | "[" | "]" | "?" | "="
187 * | "{" | "}" | SP | HT
188 */
189 static const char tokens[256] = {
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
191 0, 0, 0, 0, 0, 0, 0, 0,
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
193 0, 0, 0, 0, 0, 0, 0, 0,
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
195 0, 0, 0, 0, 0, 0, 0, 0,
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
197 0, 0, 0, 0, 0, 0, 0, 0,
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
199 ' ', '!', 0, '#', '$', '%', '&', '\'',
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
201 0, 0, '*', '+', 0, '-', '.', 0,
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
203 '0', '1', '2', '3', '4', '5', '6', '7',
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
205 '8', '9', 0, 0, 0, 0, 0, 0,
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
222
223
224 static const int8_t unhex[256] =
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233 };
234
235
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241
242
243 static const uint8_t normal_url_char[32] = {
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
276
277 #undef T
278
279 enum state
280 { s_dead = 1 /* important that this is > 0 */
281
282 , s_start_req_or_res
283 , s_res_or_resp_H
284 , s_start_res
285 , s_res_H
286 , s_res_HT
287 , s_res_HTT
288 , s_res_HTTP
289 , s_res_http_major
290 , s_res_http_dot
291 , s_res_http_minor
292 , s_res_http_end
293 , s_res_first_status_code
294 , s_res_status_code
295 , s_res_status_start
296 , s_res_status
297 , s_res_line_almost_done
298
299 , s_start_req
300
301 , s_req_method
302 , s_req_spaces_before_url
303 , s_req_schema
304 , s_req_schema_slash
305 , s_req_schema_slash_slash
306 , s_req_server_start
307 , s_req_server
308 , s_req_server_with_at
309 , s_req_path
310 , s_req_query_string_start
311 , s_req_query_string
312 , s_req_fragment_start
313 , s_req_fragment
314 , s_req_http_start
315 , s_req_http_H
316 , s_req_http_HT
317 , s_req_http_HTT
318 , s_req_http_HTTP
319 , s_req_http_I
320 , s_req_http_IC
321 , s_req_http_major
322 , s_req_http_dot
323 , s_req_http_minor
324 , s_req_http_end
325 , s_req_line_almost_done
326
327 , s_header_field_start
328 , s_header_field
329 , s_header_value_discard_ws
330 , s_header_value_discard_ws_almost_done
331 , s_header_value_discard_lws
332 , s_header_value_start
333 , s_header_value
334 , s_header_value_lws
335
336 , s_header_almost_done
337
338 , s_chunk_size_start
339 , s_chunk_size
340 , s_chunk_parameters
341 , s_chunk_size_almost_done
342
343 , s_headers_almost_done
344 , s_headers_done
345
346 /* Important: 's_headers_done' must be the last 'header' state. All
347 * states beyond this must be 'body' states. It is used for overflow
348 * checking. See the PARSING_HEADER() macro.
349 */
350
351 , s_chunk_data
352 , s_chunk_data_almost_done
353 , s_chunk_data_done
354
355 , s_body_identity
356 , s_body_identity_eof
357
358 , s_message_done
359 };
360
361
362 #define PARSING_HEADER(state) (state <= s_headers_done)
363
364
365 enum header_states
366 { h_general = 0
367 , h_C
368 , h_CO
369 , h_CON
370
371 , h_matching_connection
372 , h_matching_proxy_connection
373 , h_matching_content_length
374 , h_matching_transfer_encoding
375 , h_matching_upgrade
376
377 , h_connection
378 , h_content_length
379 , h_content_length_num
380 , h_content_length_ws
381 , h_transfer_encoding
382 , h_upgrade
383
384 , h_matching_transfer_encoding_token_start
385 , h_matching_transfer_encoding_chunked
386 , h_matching_transfer_encoding_token
387
388 , h_matching_connection_token_start
389 , h_matching_connection_keep_alive
390 , h_matching_connection_close
391 , h_matching_connection_upgrade
392 , h_matching_connection_token
393
394 , h_transfer_encoding_chunked
395 , h_connection_keep_alive
396 , h_connection_close
397 , h_connection_upgrade
398 };
399
400 enum http_host_state
401 {
402 s_http_host_dead = 1
403 , s_http_userinfo_start
404 , s_http_userinfo
405 , s_http_host_start
406 , s_http_host_v6_start
407 , s_http_host
408 , s_http_host_v6
409 , s_http_host_v6_end
410 , s_http_host_v6_zone_start
411 , s_http_host_v6_zone
412 , s_http_host_port_start
413 , s_http_host_port
414 };
415
416 /* Macros for character classes; depends on strict-mode */
417 #define CR '\r'
418 #define LF '\n'
419 #define LOWER(c) (unsigned char)(c | 0x20)
420 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
422 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
423 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
425 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
426 (c) == ')')
427 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429 (c) == '$' || (c) == ',')
430
431 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
432
433 #if HTTP_PARSER_STRICT
434 #define TOKEN(c) STRICT_TOKEN(c)
435 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
436 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
437 #else
438 #define TOKEN(c) tokens[(unsigned char)c]
439 #define IS_URL_CHAR(c) \
440 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441 #define IS_HOST_CHAR(c) \
442 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443 #endif
444
445 /**
446 * Verify that a char is a valid visible (printable) US-ASCII
447 * character or %x80-FF
448 **/
449 #define IS_HEADER_CHAR(ch) \
450 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
451
452 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
453
454
455 #if HTTP_PARSER_STRICT
456 # define STRICT_CHECK(cond) \
457 do { \
458 if (cond) { \
459 SET_ERRNO(HPE_STRICT); \
460 goto error; \
461 } \
462 } while (0)
463 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
464 #else
465 # define STRICT_CHECK(cond)
466 # define NEW_MESSAGE() start_state
467 #endif
468
469
470 /* Map errno values to strings for human-readable output */
471 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
472 static struct {
473 const char *name;
474 const char *description;
475 } http_strerror_tab[] = {
476 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
477 };
478 #undef HTTP_STRERROR_GEN
479
480 int http_message_needs_eof(const http_parser *parser);
481
482 /* Our URL parser.
483 *
484 * This is designed to be shared by http_parser_execute() for URL validation,
485 * hence it has a state transition + byte-for-byte interface. In addition, it
486 * is meant to be embedded in http_parser_parse_url(), which does the dirty
487 * work of turning state transitions URL components for its API.
488 *
489 * This function should only be invoked with non-space characters. It is
490 * assumed that the caller cares about (and can detect) the transition between
491 * URL and non-URL states by looking for these.
492 */
493 static enum state
parse_url_char(enum state s,const char ch)494 parse_url_char(enum state s, const char ch)
495 {
496 if (ch == ' ' || ch == '\r' || ch == '\n') {
497 return s_dead;
498 }
499
500 #if HTTP_PARSER_STRICT
501 if (ch == '\t' || ch == '\f') {
502 return s_dead;
503 }
504 #endif
505
506 switch (s) {
507 case s_req_spaces_before_url:
508 /* Proxied requests are followed by scheme of an absolute URI (alpha).
509 * All methods except CONNECT are followed by '/' or '*'.
510 */
511
512 if (ch == '/' || ch == '*') {
513 return s_req_path;
514 }
515
516 if (IS_ALPHA(ch)) {
517 return s_req_schema;
518 }
519
520 break;
521
522 case s_req_schema:
523 if (IS_ALPHA(ch)) {
524 return s;
525 }
526
527 if (ch == ':') {
528 return s_req_schema_slash;
529 }
530
531 break;
532
533 case s_req_schema_slash:
534 if (ch == '/') {
535 return s_req_schema_slash_slash;
536 }
537
538 break;
539
540 case s_req_schema_slash_slash:
541 if (ch == '/') {
542 return s_req_server_start;
543 }
544
545 break;
546
547 case s_req_server_with_at:
548 if (ch == '@') {
549 return s_dead;
550 }
551
552 /* fall through */
553 case s_req_server_start:
554 case s_req_server:
555 if (ch == '/') {
556 return s_req_path;
557 }
558
559 if (ch == '?') {
560 return s_req_query_string_start;
561 }
562
563 if (ch == '@') {
564 return s_req_server_with_at;
565 }
566
567 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
568 return s_req_server;
569 }
570
571 break;
572
573 case s_req_path:
574 if (IS_URL_CHAR(ch)) {
575 return s;
576 }
577
578 switch (ch) {
579 case '?':
580 return s_req_query_string_start;
581
582 case '#':
583 return s_req_fragment_start;
584 }
585
586 break;
587
588 case s_req_query_string_start:
589 case s_req_query_string:
590 if (IS_URL_CHAR(ch)) {
591 return s_req_query_string;
592 }
593
594 switch (ch) {
595 case '?':
596 /* allow extra '?' in query string */
597 return s_req_query_string;
598
599 case '#':
600 return s_req_fragment_start;
601 }
602
603 break;
604
605 case s_req_fragment_start:
606 if (IS_URL_CHAR(ch)) {
607 return s_req_fragment;
608 }
609
610 switch (ch) {
611 case '?':
612 return s_req_fragment;
613
614 case '#':
615 return s;
616 }
617
618 break;
619
620 case s_req_fragment:
621 if (IS_URL_CHAR(ch)) {
622 return s;
623 }
624
625 switch (ch) {
626 case '?':
627 case '#':
628 return s;
629 }
630
631 break;
632
633 default:
634 break;
635 }
636
637 /* We should never fall out of the switch above unless there's an error */
638 return s_dead;
639 }
640
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)641 size_t http_parser_execute (http_parser *parser,
642 const http_parser_settings *settings,
643 const char *data,
644 size_t len)
645 {
646 char c, ch;
647 int8_t unhex_val;
648 const char *p = data;
649 const char *header_field_mark = 0;
650 const char *header_value_mark = 0;
651 const char *url_mark = 0;
652 const char *body_mark = 0;
653 const char *status_mark = 0;
654 enum state p_state = (enum state) parser->state;
655 const unsigned int lenient = parser->lenient_http_headers;
656 uint32_t nread = parser->nread;
657
658 /* We're in an error state. Don't bother doing anything. */
659 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
660 return 0;
661 }
662
663 if (len == 0) {
664 switch (CURRENT_STATE()) {
665 case s_body_identity_eof:
666 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
667 * we got paused.
668 */
669 CALLBACK_NOTIFY_NOADVANCE(message_complete);
670 return 0;
671
672 case s_dead:
673 case s_start_req_or_res:
674 case s_start_res:
675 case s_start_req:
676 return 0;
677
678 default:
679 SET_ERRNO(HPE_INVALID_EOF_STATE);
680 return 1;
681 }
682 }
683
684
685 if (CURRENT_STATE() == s_header_field)
686 header_field_mark = data;
687 if (CURRENT_STATE() == s_header_value)
688 header_value_mark = data;
689 switch (CURRENT_STATE()) {
690 case s_req_path:
691 case s_req_schema:
692 case s_req_schema_slash:
693 case s_req_schema_slash_slash:
694 case s_req_server_start:
695 case s_req_server:
696 case s_req_server_with_at:
697 case s_req_query_string_start:
698 case s_req_query_string:
699 case s_req_fragment_start:
700 case s_req_fragment:
701 url_mark = data;
702 break;
703 case s_res_status:
704 status_mark = data;
705 break;
706 default:
707 break;
708 }
709
710 for (p=data; p != data + len; p++) {
711 ch = *p;
712
713 if (PARSING_HEADER(CURRENT_STATE()))
714 COUNT_HEADER_SIZE(1);
715
716 reexecute:
717 switch (CURRENT_STATE()) {
718
719 case s_dead:
720 /* this state is used after a 'Connection: close' message
721 * the parser will error out if it reads another message
722 */
723 if (LIKELY(ch == CR || ch == LF))
724 break;
725
726 SET_ERRNO(HPE_CLOSED_CONNECTION);
727 goto error;
728
729 case s_start_req_or_res:
730 {
731 if (ch == CR || ch == LF)
732 break;
733 parser->flags = 0;
734 parser->extra_flags = 0;
735 parser->content_length = ULLONG_MAX;
736
737 if (ch == 'H') {
738 UPDATE_STATE(s_res_or_resp_H);
739
740 CALLBACK_NOTIFY(message_begin);
741 } else {
742 parser->type = HTTP_REQUEST;
743 UPDATE_STATE(s_start_req);
744 REEXECUTE();
745 }
746
747 break;
748 }
749
750 case s_res_or_resp_H:
751 if (ch == 'T') {
752 parser->type = HTTP_RESPONSE;
753 UPDATE_STATE(s_res_HT);
754 } else {
755 if (UNLIKELY(ch != 'E')) {
756 SET_ERRNO(HPE_INVALID_CONSTANT);
757 goto error;
758 }
759
760 parser->type = HTTP_REQUEST;
761 parser->method = HTTP_HEAD;
762 parser->index = 2;
763 UPDATE_STATE(s_req_method);
764 }
765 break;
766
767 case s_start_res:
768 {
769 if (ch == CR || ch == LF)
770 break;
771 parser->flags = 0;
772 parser->extra_flags = 0;
773 parser->content_length = ULLONG_MAX;
774
775 if (ch == 'H') {
776 UPDATE_STATE(s_res_H);
777 } else {
778 SET_ERRNO(HPE_INVALID_CONSTANT);
779 goto error;
780 }
781
782 CALLBACK_NOTIFY(message_begin);
783 break;
784 }
785
786 case s_res_H:
787 STRICT_CHECK(ch != 'T');
788 UPDATE_STATE(s_res_HT);
789 break;
790
791 case s_res_HT:
792 STRICT_CHECK(ch != 'T');
793 UPDATE_STATE(s_res_HTT);
794 break;
795
796 case s_res_HTT:
797 STRICT_CHECK(ch != 'P');
798 UPDATE_STATE(s_res_HTTP);
799 break;
800
801 case s_res_HTTP:
802 STRICT_CHECK(ch != '/');
803 UPDATE_STATE(s_res_http_major);
804 break;
805
806 case s_res_http_major:
807 if (UNLIKELY(!IS_NUM(ch))) {
808 SET_ERRNO(HPE_INVALID_VERSION);
809 goto error;
810 }
811
812 parser->http_major = ch - '0';
813 UPDATE_STATE(s_res_http_dot);
814 break;
815
816 case s_res_http_dot:
817 {
818 if (UNLIKELY(ch != '.')) {
819 SET_ERRNO(HPE_INVALID_VERSION);
820 goto error;
821 }
822
823 UPDATE_STATE(s_res_http_minor);
824 break;
825 }
826
827 case s_res_http_minor:
828 if (UNLIKELY(!IS_NUM(ch))) {
829 SET_ERRNO(HPE_INVALID_VERSION);
830 goto error;
831 }
832
833 parser->http_minor = ch - '0';
834 UPDATE_STATE(s_res_http_end);
835 break;
836
837 case s_res_http_end:
838 {
839 if (UNLIKELY(ch != ' ')) {
840 SET_ERRNO(HPE_INVALID_VERSION);
841 goto error;
842 }
843
844 UPDATE_STATE(s_res_first_status_code);
845 break;
846 }
847
848 case s_res_first_status_code:
849 {
850 if (!IS_NUM(ch)) {
851 if (ch == ' ') {
852 break;
853 }
854
855 SET_ERRNO(HPE_INVALID_STATUS);
856 goto error;
857 }
858 parser->status_code = ch - '0';
859 UPDATE_STATE(s_res_status_code);
860 break;
861 }
862
863 case s_res_status_code:
864 {
865 if (!IS_NUM(ch)) {
866 switch (ch) {
867 case ' ':
868 UPDATE_STATE(s_res_status_start);
869 break;
870 case CR:
871 case LF:
872 UPDATE_STATE(s_res_status_start);
873 REEXECUTE();
874 break;
875 default:
876 SET_ERRNO(HPE_INVALID_STATUS);
877 goto error;
878 }
879 break;
880 }
881
882 parser->status_code *= 10;
883 parser->status_code += ch - '0';
884
885 if (UNLIKELY(parser->status_code > 999)) {
886 SET_ERRNO(HPE_INVALID_STATUS);
887 goto error;
888 }
889
890 break;
891 }
892
893 case s_res_status_start:
894 {
895 MARK(status);
896 UPDATE_STATE(s_res_status);
897 parser->index = 0;
898
899 if (ch == CR || ch == LF)
900 REEXECUTE();
901
902 break;
903 }
904
905 case s_res_status:
906 if (ch == CR) {
907 UPDATE_STATE(s_res_line_almost_done);
908 CALLBACK_DATA(status);
909 break;
910 }
911
912 if (ch == LF) {
913 UPDATE_STATE(s_header_field_start);
914 CALLBACK_DATA(status);
915 break;
916 }
917
918 break;
919
920 case s_res_line_almost_done:
921 STRICT_CHECK(ch != LF);
922 UPDATE_STATE(s_header_field_start);
923 break;
924
925 case s_start_req:
926 {
927 if (ch == CR || ch == LF)
928 break;
929 parser->flags = 0;
930 parser->extra_flags = 0;
931 parser->content_length = ULLONG_MAX;
932
933 if (UNLIKELY(!IS_ALPHA(ch))) {
934 SET_ERRNO(HPE_INVALID_METHOD);
935 goto error;
936 }
937
938 parser->method = (enum http_method) 0;
939 parser->index = 1;
940 switch (ch) {
941 case 'A': parser->method = HTTP_ACL; break;
942 case 'B': parser->method = HTTP_BIND; break;
943 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
944 case 'D': parser->method = HTTP_DELETE; break;
945 case 'G': parser->method = HTTP_GET; break;
946 case 'H': parser->method = HTTP_HEAD; break;
947 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
948 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
949 case 'N': parser->method = HTTP_NOTIFY; break;
950 case 'O': parser->method = HTTP_OPTIONS; break;
951 case 'P': parser->method = HTTP_POST;
952 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
953 break;
954 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
955 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
956 case 'T': parser->method = HTTP_TRACE; break;
957 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
958 default:
959 SET_ERRNO(HPE_INVALID_METHOD);
960 goto error;
961 }
962 UPDATE_STATE(s_req_method);
963
964 CALLBACK_NOTIFY(message_begin);
965
966 break;
967 }
968
969 case s_req_method:
970 {
971 const char *matcher;
972 if (UNLIKELY(ch == '\0')) {
973 SET_ERRNO(HPE_INVALID_METHOD);
974 goto error;
975 }
976
977 matcher = method_strings[parser->method];
978 if (ch == ' ' && matcher[parser->index] == '\0') {
979 UPDATE_STATE(s_req_spaces_before_url);
980 } else if (ch == matcher[parser->index]) {
981 ; /* nada */
982 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
983
984 switch (parser->method << 16 | parser->index << 8 | ch) {
985 #define XX(meth, pos, ch, new_meth) \
986 case (HTTP_##meth << 16 | pos << 8 | ch): \
987 parser->method = HTTP_##new_meth; break;
988
989 XX(POST, 1, 'U', PUT)
990 XX(POST, 1, 'A', PATCH)
991 XX(POST, 1, 'R', PROPFIND)
992 XX(PUT, 2, 'R', PURGE)
993 XX(CONNECT, 1, 'H', CHECKOUT)
994 XX(CONNECT, 2, 'P', COPY)
995 XX(MKCOL, 1, 'O', MOVE)
996 XX(MKCOL, 1, 'E', MERGE)
997 XX(MKCOL, 1, '-', MSEARCH)
998 XX(MKCOL, 2, 'A', MKACTIVITY)
999 XX(MKCOL, 3, 'A', MKCALENDAR)
1000 XX(SUBSCRIBE, 1, 'E', SEARCH)
1001 XX(SUBSCRIBE, 1, 'O', SOURCE)
1002 XX(REPORT, 2, 'B', REBIND)
1003 XX(PROPFIND, 4, 'P', PROPPATCH)
1004 XX(LOCK, 1, 'I', LINK)
1005 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
1006 XX(UNLOCK, 2, 'B', UNBIND)
1007 XX(UNLOCK, 3, 'I', UNLINK)
1008 #undef XX
1009 default:
1010 SET_ERRNO(HPE_INVALID_METHOD);
1011 goto error;
1012 }
1013 } else {
1014 SET_ERRNO(HPE_INVALID_METHOD);
1015 goto error;
1016 }
1017
1018 ++parser->index;
1019 break;
1020 }
1021
1022 case s_req_spaces_before_url:
1023 {
1024 if (ch == ' ') break;
1025
1026 MARK(url);
1027 if (parser->method == HTTP_CONNECT) {
1028 UPDATE_STATE(s_req_server_start);
1029 }
1030
1031 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1032 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1033 SET_ERRNO(HPE_INVALID_URL);
1034 goto error;
1035 }
1036
1037 break;
1038 }
1039
1040 case s_req_schema:
1041 case s_req_schema_slash:
1042 case s_req_schema_slash_slash:
1043 case s_req_server_start:
1044 {
1045 switch (ch) {
1046 /* No whitespace allowed here */
1047 case ' ':
1048 case CR:
1049 case LF:
1050 SET_ERRNO(HPE_INVALID_URL);
1051 goto error;
1052 default:
1053 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1054 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1055 SET_ERRNO(HPE_INVALID_URL);
1056 goto error;
1057 }
1058 }
1059
1060 break;
1061 }
1062
1063 case s_req_server:
1064 case s_req_server_with_at:
1065 case s_req_path:
1066 case s_req_query_string_start:
1067 case s_req_query_string:
1068 case s_req_fragment_start:
1069 case s_req_fragment:
1070 {
1071 switch (ch) {
1072 case ' ':
1073 UPDATE_STATE(s_req_http_start);
1074 CALLBACK_DATA(url);
1075 break;
1076 case CR:
1077 case LF:
1078 parser->http_major = 0;
1079 parser->http_minor = 9;
1080 UPDATE_STATE((ch == CR) ?
1081 s_req_line_almost_done :
1082 s_header_field_start);
1083 CALLBACK_DATA(url);
1084 break;
1085 default:
1086 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1087 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1088 SET_ERRNO(HPE_INVALID_URL);
1089 goto error;
1090 }
1091 }
1092 break;
1093 }
1094
1095 case s_req_http_start:
1096 switch (ch) {
1097 case ' ':
1098 break;
1099 case 'H':
1100 UPDATE_STATE(s_req_http_H);
1101 break;
1102 case 'I':
1103 if (parser->method == HTTP_SOURCE) {
1104 UPDATE_STATE(s_req_http_I);
1105 break;
1106 }
1107 /* fall through */
1108 default:
1109 SET_ERRNO(HPE_INVALID_CONSTANT);
1110 goto error;
1111 }
1112 break;
1113
1114 case s_req_http_H:
1115 STRICT_CHECK(ch != 'T');
1116 UPDATE_STATE(s_req_http_HT);
1117 break;
1118
1119 case s_req_http_HT:
1120 STRICT_CHECK(ch != 'T');
1121 UPDATE_STATE(s_req_http_HTT);
1122 break;
1123
1124 case s_req_http_HTT:
1125 STRICT_CHECK(ch != 'P');
1126 UPDATE_STATE(s_req_http_HTTP);
1127 break;
1128
1129 case s_req_http_I:
1130 STRICT_CHECK(ch != 'C');
1131 UPDATE_STATE(s_req_http_IC);
1132 break;
1133
1134 case s_req_http_IC:
1135 STRICT_CHECK(ch != 'E');
1136 UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
1137 break;
1138
1139 case s_req_http_HTTP:
1140 STRICT_CHECK(ch != '/');
1141 UPDATE_STATE(s_req_http_major);
1142 break;
1143
1144 case s_req_http_major:
1145 if (UNLIKELY(!IS_NUM(ch))) {
1146 SET_ERRNO(HPE_INVALID_VERSION);
1147 goto error;
1148 }
1149
1150 parser->http_major = ch - '0';
1151 UPDATE_STATE(s_req_http_dot);
1152 break;
1153
1154 case s_req_http_dot:
1155 {
1156 if (UNLIKELY(ch != '.')) {
1157 SET_ERRNO(HPE_INVALID_VERSION);
1158 goto error;
1159 }
1160
1161 UPDATE_STATE(s_req_http_minor);
1162 break;
1163 }
1164
1165 case s_req_http_minor:
1166 if (UNLIKELY(!IS_NUM(ch))) {
1167 SET_ERRNO(HPE_INVALID_VERSION);
1168 goto error;
1169 }
1170
1171 parser->http_minor = ch - '0';
1172 UPDATE_STATE(s_req_http_end);
1173 break;
1174
1175 case s_req_http_end:
1176 {
1177 if (ch == CR) {
1178 UPDATE_STATE(s_req_line_almost_done);
1179 break;
1180 }
1181
1182 if (ch == LF) {
1183 UPDATE_STATE(s_header_field_start);
1184 break;
1185 }
1186
1187 SET_ERRNO(HPE_INVALID_VERSION);
1188 goto error;
1189 break;
1190 }
1191
1192 /* end of request line */
1193 case s_req_line_almost_done:
1194 {
1195 if (UNLIKELY(ch != LF)) {
1196 SET_ERRNO(HPE_LF_EXPECTED);
1197 goto error;
1198 }
1199
1200 UPDATE_STATE(s_header_field_start);
1201 break;
1202 }
1203
1204 case s_header_field_start:
1205 {
1206 if (ch == CR) {
1207 UPDATE_STATE(s_headers_almost_done);
1208 break;
1209 }
1210
1211 if (ch == LF) {
1212 /* they might be just sending \n instead of \r\n so this would be
1213 * the second \n to denote the end of headers*/
1214 UPDATE_STATE(s_headers_almost_done);
1215 REEXECUTE();
1216 }
1217
1218 c = TOKEN(ch);
1219
1220 if (UNLIKELY(!c)) {
1221 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1222 goto error;
1223 }
1224
1225 MARK(header_field);
1226
1227 parser->index = 0;
1228 UPDATE_STATE(s_header_field);
1229
1230 switch (c) {
1231 case 'c':
1232 parser->header_state = h_C;
1233 break;
1234
1235 case 'p':
1236 parser->header_state = h_matching_proxy_connection;
1237 break;
1238
1239 case 't':
1240 parser->header_state = h_matching_transfer_encoding;
1241 break;
1242
1243 case 'u':
1244 parser->header_state = h_matching_upgrade;
1245 break;
1246
1247 default:
1248 parser->header_state = h_general;
1249 break;
1250 }
1251 break;
1252 }
1253
1254 case s_header_field:
1255 {
1256 const char* start = p;
1257 for (; p != data + len; p++) {
1258 ch = *p;
1259 c = TOKEN(ch);
1260
1261 if (!c)
1262 break;
1263
1264 switch (parser->header_state) {
1265 case h_general: {
1266 size_t left = data + len - p;
1267 const char* pe = p + MIN(left, max_header_size);
1268 while (p+1 < pe && TOKEN(p[1])) {
1269 p++;
1270 }
1271 break;
1272 }
1273
1274 case h_C:
1275 parser->index++;
1276 parser->header_state = (c == 'o' ? h_CO : h_general);
1277 break;
1278
1279 case h_CO:
1280 parser->index++;
1281 parser->header_state = (c == 'n' ? h_CON : h_general);
1282 break;
1283
1284 case h_CON:
1285 parser->index++;
1286 switch (c) {
1287 case 'n':
1288 parser->header_state = h_matching_connection;
1289 break;
1290 case 't':
1291 parser->header_state = h_matching_content_length;
1292 break;
1293 default:
1294 parser->header_state = h_general;
1295 break;
1296 }
1297 break;
1298
1299 /* connection */
1300
1301 case h_matching_connection:
1302 parser->index++;
1303 if (parser->index > sizeof(CONNECTION)-1
1304 || c != CONNECTION[parser->index]) {
1305 parser->header_state = h_general;
1306 } else if (parser->index == sizeof(CONNECTION)-2) {
1307 parser->header_state = h_connection;
1308 }
1309 break;
1310
1311 /* proxy-connection */
1312
1313 case h_matching_proxy_connection:
1314 parser->index++;
1315 if (parser->index > sizeof(PROXY_CONNECTION)-1
1316 || c != PROXY_CONNECTION[parser->index]) {
1317 parser->header_state = h_general;
1318 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1319 parser->header_state = h_connection;
1320 }
1321 break;
1322
1323 /* content-length */
1324
1325 case h_matching_content_length:
1326 parser->index++;
1327 if (parser->index > sizeof(CONTENT_LENGTH)-1
1328 || c != CONTENT_LENGTH[parser->index]) {
1329 parser->header_state = h_general;
1330 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1331 parser->header_state = h_content_length;
1332 }
1333 break;
1334
1335 /* transfer-encoding */
1336
1337 case h_matching_transfer_encoding:
1338 parser->index++;
1339 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1340 || c != TRANSFER_ENCODING[parser->index]) {
1341 parser->header_state = h_general;
1342 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1343 parser->header_state = h_transfer_encoding;
1344 parser->extra_flags |= F_TRANSFER_ENCODING >> 8;
1345 }
1346 break;
1347
1348 /* upgrade */
1349
1350 case h_matching_upgrade:
1351 parser->index++;
1352 if (parser->index > sizeof(UPGRADE)-1
1353 || c != UPGRADE[parser->index]) {
1354 parser->header_state = h_general;
1355 } else if (parser->index == sizeof(UPGRADE)-2) {
1356 parser->header_state = h_upgrade;
1357 }
1358 break;
1359
1360 case h_connection:
1361 case h_content_length:
1362 case h_transfer_encoding:
1363 case h_upgrade:
1364 if (ch != ' ') parser->header_state = h_general;
1365 break;
1366
1367 default:
1368 assert(0 && "Unknown header_state");
1369 break;
1370 }
1371 }
1372
1373 if (p == data + len) {
1374 --p;
1375 COUNT_HEADER_SIZE(p - start);
1376 break;
1377 }
1378
1379 COUNT_HEADER_SIZE(p - start);
1380
1381 if (ch == ':') {
1382 UPDATE_STATE(s_header_value_discard_ws);
1383 CALLBACK_DATA(header_field);
1384 break;
1385 }
1386
1387 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1388 goto error;
1389 }
1390
1391 case s_header_value_discard_ws:
1392 if (ch == ' ' || ch == '\t') break;
1393
1394 if (ch == CR) {
1395 UPDATE_STATE(s_header_value_discard_ws_almost_done);
1396 break;
1397 }
1398
1399 if (ch == LF) {
1400 UPDATE_STATE(s_header_value_discard_lws);
1401 break;
1402 }
1403
1404 /* fall through */
1405
1406 case s_header_value_start:
1407 {
1408 MARK(header_value);
1409
1410 UPDATE_STATE(s_header_value);
1411 parser->index = 0;
1412
1413 c = LOWER(ch);
1414
1415 switch (parser->header_state) {
1416 case h_upgrade:
1417 parser->flags |= F_UPGRADE;
1418 parser->header_state = h_general;
1419 break;
1420
1421 case h_transfer_encoding:
1422 /* looking for 'Transfer-Encoding: chunked' */
1423 if ('c' == c) {
1424 parser->header_state = h_matching_transfer_encoding_chunked;
1425 } else {
1426 parser->header_state = h_matching_transfer_encoding_token;
1427 }
1428 break;
1429
1430 /* Multi-value `Transfer-Encoding` header */
1431 case h_matching_transfer_encoding_token_start:
1432 break;
1433
1434 case h_content_length:
1435 if (UNLIKELY(!IS_NUM(ch))) {
1436 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1437 goto error;
1438 }
1439
1440 if (parser->flags & F_CONTENTLENGTH) {
1441 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1442 goto error;
1443 }
1444
1445 parser->flags |= F_CONTENTLENGTH;
1446 parser->content_length = ch - '0';
1447 parser->header_state = h_content_length_num;
1448 break;
1449
1450 /* when obsolete line folding is encountered for content length
1451 * continue to the s_header_value state */
1452 case h_content_length_ws:
1453 break;
1454
1455 case h_connection:
1456 /* looking for 'Connection: keep-alive' */
1457 if (c == 'k') {
1458 parser->header_state = h_matching_connection_keep_alive;
1459 /* looking for 'Connection: close' */
1460 } else if (c == 'c') {
1461 parser->header_state = h_matching_connection_close;
1462 } else if (c == 'u') {
1463 parser->header_state = h_matching_connection_upgrade;
1464 } else {
1465 parser->header_state = h_matching_connection_token;
1466 }
1467 break;
1468
1469 /* Multi-value `Connection` header */
1470 case h_matching_connection_token_start:
1471 break;
1472
1473 default:
1474 parser->header_state = h_general;
1475 break;
1476 }
1477 break;
1478 }
1479
1480 case s_header_value:
1481 {
1482 const char* start = p;
1483 enum header_states h_state = (enum header_states) parser->header_state;
1484 for (; p != data + len; p++) {
1485 ch = *p;
1486 if (ch == CR) {
1487 UPDATE_STATE(s_header_almost_done);
1488 parser->header_state = h_state;
1489 CALLBACK_DATA(header_value);
1490 break;
1491 }
1492
1493 if (ch == LF) {
1494 UPDATE_STATE(s_header_almost_done);
1495 COUNT_HEADER_SIZE(p - start);
1496 parser->header_state = h_state;
1497 CALLBACK_DATA_NOADVANCE(header_value);
1498 REEXECUTE();
1499 }
1500
1501 if (!lenient && !IS_HEADER_CHAR(ch)) {
1502 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1503 goto error;
1504 }
1505
1506 c = LOWER(ch);
1507
1508 switch (h_state) {
1509 case h_general:
1510 {
1511 size_t left = data + len - p;
1512 const char* pe = p + MIN(left, max_header_size);
1513
1514 for (; p != pe; p++) {
1515 ch = *p;
1516 if (ch == CR || ch == LF) {
1517 --p;
1518 break;
1519 }
1520 if (!lenient && !IS_HEADER_CHAR(ch)) {
1521 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1522 goto error;
1523 }
1524 }
1525 if (p == data + len)
1526 --p;
1527 break;
1528 }
1529
1530 case h_connection:
1531 case h_transfer_encoding:
1532 assert(0 && "Shouldn't get here.");
1533 break;
1534
1535 case h_content_length:
1536 if (ch == ' ') break;
1537 h_state = h_content_length_num;
1538 /* fall through */
1539
1540 case h_content_length_num:
1541 {
1542 uint64_t t;
1543
1544 if (ch == ' ') {
1545 h_state = h_content_length_ws;
1546 break;
1547 }
1548
1549 if (UNLIKELY(!IS_NUM(ch))) {
1550 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1551 parser->header_state = h_state;
1552 goto error;
1553 }
1554
1555 t = parser->content_length;
1556 t *= 10;
1557 t += ch - '0';
1558
1559 /* Overflow? Test against a conservative limit for simplicity. */
1560 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1561 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1562 parser->header_state = h_state;
1563 goto error;
1564 }
1565
1566 parser->content_length = t;
1567 break;
1568 }
1569
1570 case h_content_length_ws:
1571 if (ch == ' ') break;
1572 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1573 parser->header_state = h_state;
1574 goto error;
1575
1576 /* Transfer-Encoding: chunked */
1577 case h_matching_transfer_encoding_token_start:
1578 /* looking for 'Transfer-Encoding: chunked' */
1579 if ('c' == c) {
1580 h_state = h_matching_transfer_encoding_chunked;
1581 } else if (STRICT_TOKEN(c)) {
1582 /* TODO(indutny): similar code below does this, but why?
1583 * At the very least it seems to be inconsistent given that
1584 * h_matching_transfer_encoding_token does not check for
1585 * `STRICT_TOKEN`
1586 */
1587 h_state = h_matching_transfer_encoding_token;
1588 } else if (c == ' ' || c == '\t') {
1589 /* Skip lws */
1590 } else {
1591 h_state = h_general;
1592 }
1593 break;
1594
1595 case h_matching_transfer_encoding_chunked:
1596 parser->index++;
1597 if (parser->index > sizeof(CHUNKED)-1
1598 || c != CHUNKED[parser->index]) {
1599 h_state = h_matching_transfer_encoding_token;
1600 } else if (parser->index == sizeof(CHUNKED)-2) {
1601 h_state = h_transfer_encoding_chunked;
1602 }
1603 break;
1604
1605 case h_matching_transfer_encoding_token:
1606 if (ch == ',') {
1607 h_state = h_matching_transfer_encoding_token_start;
1608 parser->index = 0;
1609 }
1610 break;
1611
1612 case h_matching_connection_token_start:
1613 /* looking for 'Connection: keep-alive' */
1614 if (c == 'k') {
1615 h_state = h_matching_connection_keep_alive;
1616 /* looking for 'Connection: close' */
1617 } else if (c == 'c') {
1618 h_state = h_matching_connection_close;
1619 } else if (c == 'u') {
1620 h_state = h_matching_connection_upgrade;
1621 } else if (STRICT_TOKEN(c)) {
1622 h_state = h_matching_connection_token;
1623 } else if (c == ' ' || c == '\t') {
1624 /* Skip lws */
1625 } else {
1626 h_state = h_general;
1627 }
1628 break;
1629
1630 /* looking for 'Connection: keep-alive' */
1631 case h_matching_connection_keep_alive:
1632 parser->index++;
1633 if (parser->index > sizeof(KEEP_ALIVE)-1
1634 || c != KEEP_ALIVE[parser->index]) {
1635 h_state = h_matching_connection_token;
1636 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1637 h_state = h_connection_keep_alive;
1638 }
1639 break;
1640
1641 /* looking for 'Connection: close' */
1642 case h_matching_connection_close:
1643 parser->index++;
1644 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1645 h_state = h_matching_connection_token;
1646 } else if (parser->index == sizeof(CLOSE)-2) {
1647 h_state = h_connection_close;
1648 }
1649 break;
1650
1651 /* looking for 'Connection: upgrade' */
1652 case h_matching_connection_upgrade:
1653 parser->index++;
1654 if (parser->index > sizeof(UPGRADE) - 1 ||
1655 c != UPGRADE[parser->index]) {
1656 h_state = h_matching_connection_token;
1657 } else if (parser->index == sizeof(UPGRADE)-2) {
1658 h_state = h_connection_upgrade;
1659 }
1660 break;
1661
1662 case h_matching_connection_token:
1663 if (ch == ',') {
1664 h_state = h_matching_connection_token_start;
1665 parser->index = 0;
1666 }
1667 break;
1668
1669 case h_transfer_encoding_chunked:
1670 if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1671 break;
1672
1673 case h_connection_keep_alive:
1674 case h_connection_close:
1675 case h_connection_upgrade:
1676 if (ch == ',') {
1677 if (h_state == h_connection_keep_alive) {
1678 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1679 } else if (h_state == h_connection_close) {
1680 parser->flags |= F_CONNECTION_CLOSE;
1681 } else if (h_state == h_connection_upgrade) {
1682 parser->flags |= F_CONNECTION_UPGRADE;
1683 }
1684 h_state = h_matching_connection_token_start;
1685 parser->index = 0;
1686 } else if (ch != ' ') {
1687 h_state = h_matching_connection_token;
1688 }
1689 break;
1690
1691 default:
1692 UPDATE_STATE(s_header_value);
1693 h_state = h_general;
1694 break;
1695 }
1696 }
1697 parser->header_state = h_state;
1698
1699 if (p == data + len)
1700 --p;
1701
1702 COUNT_HEADER_SIZE(p - start);
1703 break;
1704 }
1705
1706 case s_header_almost_done:
1707 {
1708 if (UNLIKELY(ch != LF)) {
1709 SET_ERRNO(HPE_LF_EXPECTED);
1710 goto error;
1711 }
1712
1713 UPDATE_STATE(s_header_value_lws);
1714 break;
1715 }
1716
1717 case s_header_value_lws:
1718 {
1719 if (ch == ' ' || ch == '\t') {
1720 if (parser->header_state == h_content_length_num) {
1721 /* treat obsolete line folding as space */
1722 parser->header_state = h_content_length_ws;
1723 }
1724 UPDATE_STATE(s_header_value_start);
1725 REEXECUTE();
1726 }
1727
1728 /* finished the header */
1729 switch (parser->header_state) {
1730 case h_connection_keep_alive:
1731 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1732 break;
1733 case h_connection_close:
1734 parser->flags |= F_CONNECTION_CLOSE;
1735 break;
1736 case h_transfer_encoding_chunked:
1737 parser->flags |= F_CHUNKED;
1738 break;
1739 case h_connection_upgrade:
1740 parser->flags |= F_CONNECTION_UPGRADE;
1741 break;
1742 default:
1743 break;
1744 }
1745
1746 UPDATE_STATE(s_header_field_start);
1747 REEXECUTE();
1748 }
1749
1750 case s_header_value_discard_ws_almost_done:
1751 {
1752 STRICT_CHECK(ch != LF);
1753 UPDATE_STATE(s_header_value_discard_lws);
1754 break;
1755 }
1756
1757 case s_header_value_discard_lws:
1758 {
1759 if (ch == ' ' || ch == '\t') {
1760 UPDATE_STATE(s_header_value_discard_ws);
1761 break;
1762 } else {
1763 switch (parser->header_state) {
1764 case h_connection_keep_alive:
1765 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1766 break;
1767 case h_connection_close:
1768 parser->flags |= F_CONNECTION_CLOSE;
1769 break;
1770 case h_connection_upgrade:
1771 parser->flags |= F_CONNECTION_UPGRADE;
1772 break;
1773 case h_transfer_encoding_chunked:
1774 parser->flags |= F_CHUNKED;
1775 break;
1776 case h_content_length:
1777 /* do not allow empty content length */
1778 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1779 goto error;
1780 break;
1781 default:
1782 break;
1783 }
1784
1785 /* header value was empty */
1786 MARK(header_value);
1787 UPDATE_STATE(s_header_field_start);
1788 CALLBACK_DATA_NOADVANCE(header_value);
1789 REEXECUTE();
1790 }
1791 }
1792
1793 case s_headers_almost_done:
1794 {
1795 STRICT_CHECK(ch != LF);
1796
1797 if (parser->flags & F_TRAILING) {
1798 /* End of a chunked request */
1799 UPDATE_STATE(s_message_done);
1800 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1801 REEXECUTE();
1802 }
1803
1804 /* Cannot us transfer-encoding and a content-length header together
1805 per the HTTP specification. (RFC 7230 Section 3.3.3) */
1806 if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) &&
1807 (parser->flags & F_CONTENTLENGTH)) {
1808 /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1809 * not `chunked`
1810 */
1811 if (!lenient || (parser->flags & F_CHUNKED)) {
1812 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1813 goto error;
1814 }
1815 }
1816
1817 UPDATE_STATE(s_headers_done);
1818
1819 /* Set this here so that on_headers_complete() callbacks can see it */
1820 if ((parser->flags & F_UPGRADE) &&
1821 (parser->flags & F_CONNECTION_UPGRADE)) {
1822 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1823 * mandatory only when it is a 101 Switching Protocols response,
1824 * otherwise it is purely informational, to announce support.
1825 */
1826 parser->upgrade =
1827 (parser->type == HTTP_REQUEST || parser->status_code == 101);
1828 } else {
1829 parser->upgrade = (parser->method == HTTP_CONNECT);
1830 }
1831
1832 /* Here we call the headers_complete callback. This is somewhat
1833 * different than other callbacks because if the user returns 1, we
1834 * will interpret that as saying that this message has no body. This
1835 * is needed for the annoying case of recieving a response to a HEAD
1836 * request.
1837 *
1838 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1839 * we have to simulate it by handling a change in errno below.
1840 */
1841 if (settings->on_headers_complete) {
1842 switch (settings->on_headers_complete(parser)) {
1843 case 0:
1844 break;
1845
1846 case 2:
1847 parser->upgrade = 1;
1848
1849 /* fall through */
1850 case 1:
1851 parser->flags |= F_SKIPBODY;
1852 break;
1853
1854 default:
1855 SET_ERRNO(HPE_CB_headers_complete);
1856 RETURN(p - data); /* Error */
1857 }
1858 }
1859
1860 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1861 RETURN(p - data);
1862 }
1863
1864 REEXECUTE();
1865 }
1866
1867 case s_headers_done:
1868 {
1869 int hasBody;
1870 STRICT_CHECK(ch != LF);
1871
1872 parser->nread = 0;
1873 nread = 0;
1874
1875 hasBody = parser->flags & F_CHUNKED ||
1876 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1877 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1878 (parser->flags & F_SKIPBODY) || !hasBody)) {
1879 /* Exit, the rest of the message is in a different protocol. */
1880 UPDATE_STATE(NEW_MESSAGE());
1881 CALLBACK_NOTIFY(message_complete);
1882 RETURN((p - data) + 1);
1883 }
1884
1885 if (parser->flags & F_SKIPBODY) {
1886 UPDATE_STATE(NEW_MESSAGE());
1887 CALLBACK_NOTIFY(message_complete);
1888 } else if (parser->flags & F_CHUNKED) {
1889 /* chunked encoding - ignore Content-Length header,
1890 * prepare for a chunk */
1891 UPDATE_STATE(s_chunk_size_start);
1892 } else if (parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) {
1893 if (parser->type == HTTP_REQUEST && !lenient) {
1894 /* RFC 7230 3.3.3 */
1895
1896 /* If a Transfer-Encoding header field
1897 * is present in a request and the chunked transfer coding is not
1898 * the final encoding, the message body length cannot be determined
1899 * reliably; the server MUST respond with the 400 (Bad Request)
1900 * status code and then close the connection.
1901 */
1902 SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1903 RETURN(p - data); /* Error */
1904 } else {
1905 /* RFC 7230 3.3.3 */
1906
1907 /* If a Transfer-Encoding header field is present in a response and
1908 * the chunked transfer coding is not the final encoding, the
1909 * message body length is determined by reading the connection until
1910 * it is closed by the server.
1911 */
1912 UPDATE_STATE(s_body_identity_eof);
1913 }
1914 } else {
1915 if (parser->content_length == 0) {
1916 /* Content-Length header given but zero: Content-Length: 0\r\n */
1917 UPDATE_STATE(NEW_MESSAGE());
1918 CALLBACK_NOTIFY(message_complete);
1919 } else if (parser->content_length != ULLONG_MAX) {
1920 /* Content-Length header given and non-zero */
1921 UPDATE_STATE(s_body_identity);
1922 } else {
1923 if (!http_message_needs_eof(parser)) {
1924 /* Assume content-length 0 - read the next */
1925 UPDATE_STATE(NEW_MESSAGE());
1926 CALLBACK_NOTIFY(message_complete);
1927 } else {
1928 /* Read body until EOF */
1929 UPDATE_STATE(s_body_identity_eof);
1930 }
1931 }
1932 }
1933
1934 break;
1935 }
1936
1937 case s_body_identity:
1938 {
1939 uint64_t to_read = MIN(parser->content_length,
1940 (uint64_t) ((data + len) - p));
1941
1942 assert(parser->content_length != 0
1943 && parser->content_length != ULLONG_MAX);
1944
1945 /* The difference between advancing content_length and p is because
1946 * the latter will automaticaly advance on the next loop iteration.
1947 * Further, if content_length ends up at 0, we want to see the last
1948 * byte again for our message complete callback.
1949 */
1950 MARK(body);
1951 parser->content_length -= to_read;
1952 p += to_read - 1;
1953
1954 if (parser->content_length == 0) {
1955 UPDATE_STATE(s_message_done);
1956
1957 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1958 *
1959 * The alternative to doing this is to wait for the next byte to
1960 * trigger the data callback, just as in every other case. The
1961 * problem with this is that this makes it difficult for the test
1962 * harness to distinguish between complete-on-EOF and
1963 * complete-on-length. It's not clear that this distinction is
1964 * important for applications, but let's keep it for now.
1965 */
1966 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1967 REEXECUTE();
1968 }
1969
1970 break;
1971 }
1972
1973 /* read until EOF */
1974 case s_body_identity_eof:
1975 MARK(body);
1976 p = data + len - 1;
1977
1978 break;
1979
1980 case s_message_done:
1981 UPDATE_STATE(NEW_MESSAGE());
1982 CALLBACK_NOTIFY(message_complete);
1983 if (parser->upgrade) {
1984 /* Exit, the rest of the message is in a different protocol. */
1985 RETURN((p - data) + 1);
1986 }
1987 break;
1988
1989 case s_chunk_size_start:
1990 {
1991 assert(nread == 1);
1992 assert(parser->flags & F_CHUNKED);
1993
1994 unhex_val = unhex[(unsigned char)ch];
1995 if (UNLIKELY(unhex_val == -1)) {
1996 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1997 goto error;
1998 }
1999
2000 parser->content_length = unhex_val;
2001 UPDATE_STATE(s_chunk_size);
2002 break;
2003 }
2004
2005 case s_chunk_size:
2006 {
2007 uint64_t t;
2008
2009 assert(parser->flags & F_CHUNKED);
2010
2011 if (ch == CR) {
2012 UPDATE_STATE(s_chunk_size_almost_done);
2013 break;
2014 }
2015
2016 unhex_val = unhex[(unsigned char)ch];
2017
2018 if (unhex_val == -1) {
2019 if (ch == ';' || ch == ' ') {
2020 UPDATE_STATE(s_chunk_parameters);
2021 break;
2022 }
2023
2024 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2025 goto error;
2026 }
2027
2028 t = parser->content_length;
2029 t *= 16;
2030 t += unhex_val;
2031
2032 /* Overflow? Test against a conservative limit for simplicity. */
2033 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2034 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2035 goto error;
2036 }
2037
2038 parser->content_length = t;
2039 break;
2040 }
2041
2042 case s_chunk_parameters:
2043 {
2044 assert(parser->flags & F_CHUNKED);
2045 /* just ignore this shit. TODO check for overflow */
2046 if (ch == CR) {
2047 UPDATE_STATE(s_chunk_size_almost_done);
2048 break;
2049 }
2050 break;
2051 }
2052
2053 case s_chunk_size_almost_done:
2054 {
2055 assert(parser->flags & F_CHUNKED);
2056 STRICT_CHECK(ch != LF);
2057
2058 parser->nread = 0;
2059 nread = 0;
2060
2061 if (parser->content_length == 0) {
2062 parser->flags |= F_TRAILING;
2063 UPDATE_STATE(s_header_field_start);
2064 } else {
2065 UPDATE_STATE(s_chunk_data);
2066 }
2067 CALLBACK_NOTIFY(chunk_header);
2068 break;
2069 }
2070
2071 case s_chunk_data:
2072 {
2073 uint64_t to_read = MIN(parser->content_length,
2074 (uint64_t) ((data + len) - p));
2075
2076 assert(parser->flags & F_CHUNKED);
2077 assert(parser->content_length != 0
2078 && parser->content_length != ULLONG_MAX);
2079
2080 /* See the explanation in s_body_identity for why the content
2081 * length and data pointers are managed this way.
2082 */
2083 MARK(body);
2084 parser->content_length -= to_read;
2085 p += to_read - 1;
2086
2087 if (parser->content_length == 0) {
2088 UPDATE_STATE(s_chunk_data_almost_done);
2089 }
2090
2091 break;
2092 }
2093
2094 case s_chunk_data_almost_done:
2095 assert(parser->flags & F_CHUNKED);
2096 assert(parser->content_length == 0);
2097 STRICT_CHECK(ch != CR);
2098 UPDATE_STATE(s_chunk_data_done);
2099 CALLBACK_DATA(body);
2100 break;
2101
2102 case s_chunk_data_done:
2103 assert(parser->flags & F_CHUNKED);
2104 STRICT_CHECK(ch != LF);
2105 parser->nread = 0;
2106 nread = 0;
2107 UPDATE_STATE(s_chunk_size_start);
2108 CALLBACK_NOTIFY(chunk_complete);
2109 break;
2110
2111 default:
2112 assert(0 && "unhandled state");
2113 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2114 goto error;
2115 }
2116 }
2117
2118 /* Run callbacks for any marks that we have leftover after we ran out of
2119 * bytes. There should be at most one of these set, so it's OK to invoke
2120 * them in series (unset marks will not result in callbacks).
2121 *
2122 * We use the NOADVANCE() variety of callbacks here because 'p' has already
2123 * overflowed 'data' and this allows us to correct for the off-by-one that
2124 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2125 * value that's in-bounds).
2126 */
2127
2128 assert(((header_field_mark ? 1 : 0) +
2129 (header_value_mark ? 1 : 0) +
2130 (url_mark ? 1 : 0) +
2131 (body_mark ? 1 : 0) +
2132 (status_mark ? 1 : 0)) <= 1);
2133
2134 CALLBACK_DATA_NOADVANCE(header_field);
2135 CALLBACK_DATA_NOADVANCE(header_value);
2136 CALLBACK_DATA_NOADVANCE(url);
2137 CALLBACK_DATA_NOADVANCE(body);
2138 CALLBACK_DATA_NOADVANCE(status);
2139
2140 RETURN(len);
2141
2142 error:
2143 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2144 SET_ERRNO(HPE_UNKNOWN);
2145 }
2146
2147 RETURN(p - data);
2148 }
2149
2150
2151 /* Does the parser need to see an EOF to find the end of the message? */
2152 int
http_message_needs_eof(const http_parser * parser)2153 http_message_needs_eof (const http_parser *parser)
2154 {
2155 if (parser->type == HTTP_REQUEST) {
2156 return 0;
2157 }
2158
2159 /* See RFC 2616 section 4.4 */
2160 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2161 parser->status_code == 204 || /* No Content */
2162 parser->status_code == 304 || /* Not Modified */
2163 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2164 return 0;
2165 }
2166
2167 /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2168 if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) &&
2169 (parser->flags & F_CHUNKED) == 0) {
2170 return 1;
2171 }
2172
2173 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2174 return 0;
2175 }
2176
2177 return 1;
2178 }
2179
2180
2181 int
http_should_keep_alive(const http_parser * parser)2182 http_should_keep_alive (const http_parser *parser)
2183 {
2184 if (parser->http_major > 0 && parser->http_minor > 0) {
2185 /* HTTP/1.1 */
2186 if (parser->flags & F_CONNECTION_CLOSE) {
2187 return 0;
2188 }
2189 } else {
2190 /* HTTP/1.0 or earlier */
2191 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2192 return 0;
2193 }
2194 }
2195
2196 return !http_message_needs_eof(parser);
2197 }
2198
2199
2200 const char *
http_method_str(enum http_method m)2201 http_method_str (enum http_method m)
2202 {
2203 return ELEM_AT(method_strings, m, "<unknown>");
2204 }
2205
2206 const char *
http_status_str(enum http_status s)2207 http_status_str (enum http_status s)
2208 {
2209 switch (s) {
2210 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2211 HTTP_STATUS_MAP(XX)
2212 #undef XX
2213 default: return "<unknown>";
2214 }
2215 }
2216
2217 void
http_parser_init(http_parser * parser,enum http_parser_type t)2218 http_parser_init (http_parser *parser, enum http_parser_type t)
2219 {
2220 void *data = parser->data; /* preserve application data */
2221 memset(parser, 0, sizeof(*parser));
2222 parser->data = data;
2223 parser->type = t;
2224 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2225 parser->http_errno = HPE_OK;
2226 }
2227
2228 void
http_parser_settings_init(http_parser_settings * settings)2229 http_parser_settings_init(http_parser_settings *settings)
2230 {
2231 memset(settings, 0, sizeof(*settings));
2232 }
2233
2234 const char *
http_errno_name(enum http_errno err)2235 http_errno_name(enum http_errno err) {
2236 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2237 return http_strerror_tab[err].name;
2238 }
2239
2240 const char *
http_errno_description(enum http_errno err)2241 http_errno_description(enum http_errno err) {
2242 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2243 return http_strerror_tab[err].description;
2244 }
2245
2246 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2247 http_parse_host_char(enum http_host_state s, const char ch) {
2248 switch(s) {
2249 case s_http_userinfo:
2250 case s_http_userinfo_start:
2251 if (ch == '@') {
2252 return s_http_host_start;
2253 }
2254
2255 if (IS_USERINFO_CHAR(ch)) {
2256 return s_http_userinfo;
2257 }
2258 break;
2259
2260 case s_http_host_start:
2261 if (ch == '[') {
2262 return s_http_host_v6_start;
2263 }
2264
2265 if (IS_HOST_CHAR(ch)) {
2266 return s_http_host;
2267 }
2268
2269 break;
2270
2271 case s_http_host:
2272 if (IS_HOST_CHAR(ch)) {
2273 return s_http_host;
2274 }
2275
2276 /* fall through */
2277 case s_http_host_v6_end:
2278 if (ch == ':') {
2279 return s_http_host_port_start;
2280 }
2281
2282 break;
2283
2284 case s_http_host_v6:
2285 if (ch == ']') {
2286 return s_http_host_v6_end;
2287 }
2288
2289 /* fall through */
2290 case s_http_host_v6_start:
2291 if (IS_HEX(ch) || ch == ':' || ch == '.') {
2292 return s_http_host_v6;
2293 }
2294
2295 if (s == s_http_host_v6 && ch == '%') {
2296 return s_http_host_v6_zone_start;
2297 }
2298 break;
2299
2300 case s_http_host_v6_zone:
2301 if (ch == ']') {
2302 return s_http_host_v6_end;
2303 }
2304
2305 /* fall through */
2306 case s_http_host_v6_zone_start:
2307 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2308 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2309 ch == '~') {
2310 return s_http_host_v6_zone;
2311 }
2312 break;
2313
2314 case s_http_host_port:
2315 case s_http_host_port_start:
2316 if (IS_NUM(ch)) {
2317 return s_http_host_port;
2318 }
2319
2320 break;
2321
2322 default:
2323 break;
2324 }
2325 return s_http_host_dead;
2326 }
2327
2328 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2329 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2330 enum http_host_state s;
2331
2332 const char *p;
2333 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2334
2335 assert(u->field_set & (1 << UF_HOST));
2336
2337 u->field_data[UF_HOST].len = 0;
2338
2339 s = found_at ? s_http_userinfo_start : s_http_host_start;
2340
2341 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2342 enum http_host_state new_s = http_parse_host_char(s, *p);
2343
2344 if (new_s == s_http_host_dead) {
2345 return 1;
2346 }
2347
2348 switch(new_s) {
2349 case s_http_host:
2350 if (s != s_http_host) {
2351 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2352 }
2353 u->field_data[UF_HOST].len++;
2354 break;
2355
2356 case s_http_host_v6:
2357 if (s != s_http_host_v6) {
2358 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2359 }
2360 u->field_data[UF_HOST].len++;
2361 break;
2362
2363 case s_http_host_v6_zone_start:
2364 case s_http_host_v6_zone:
2365 u->field_data[UF_HOST].len++;
2366 break;
2367
2368 case s_http_host_port:
2369 if (s != s_http_host_port) {
2370 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2371 u->field_data[UF_PORT].len = 0;
2372 u->field_set |= (1 << UF_PORT);
2373 }
2374 u->field_data[UF_PORT].len++;
2375 break;
2376
2377 case s_http_userinfo:
2378 if (s != s_http_userinfo) {
2379 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2380 u->field_data[UF_USERINFO].len = 0;
2381 u->field_set |= (1 << UF_USERINFO);
2382 }
2383 u->field_data[UF_USERINFO].len++;
2384 break;
2385
2386 default:
2387 break;
2388 }
2389 s = new_s;
2390 }
2391
2392 /* Make sure we don't end somewhere unexpected */
2393 switch (s) {
2394 case s_http_host_start:
2395 case s_http_host_v6_start:
2396 case s_http_host_v6:
2397 case s_http_host_v6_zone_start:
2398 case s_http_host_v6_zone:
2399 case s_http_host_port_start:
2400 case s_http_userinfo:
2401 case s_http_userinfo_start:
2402 return 1;
2403 default:
2404 break;
2405 }
2406
2407 return 0;
2408 }
2409
2410 void
http_parser_url_init(struct http_parser_url * u)2411 http_parser_url_init(struct http_parser_url *u) {
2412 memset(u, 0, sizeof(*u));
2413 }
2414
2415 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2416 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2417 struct http_parser_url *u)
2418 {
2419 enum state s;
2420 const char *p;
2421 enum http_parser_url_fields uf, old_uf;
2422 int found_at = 0;
2423
2424 if (buflen == 0) {
2425 return 1;
2426 }
2427
2428 u->port = u->field_set = 0;
2429 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2430 old_uf = UF_MAX;
2431
2432 for (p = buf; p < buf + buflen; p++) {
2433 s = parse_url_char(s, *p);
2434
2435 /* Figure out the next field that we're operating on */
2436 switch (s) {
2437 case s_dead:
2438 return 1;
2439
2440 /* Skip delimeters */
2441 case s_req_schema_slash:
2442 case s_req_schema_slash_slash:
2443 case s_req_server_start:
2444 case s_req_query_string_start:
2445 case s_req_fragment_start:
2446 continue;
2447
2448 case s_req_schema:
2449 uf = UF_SCHEMA;
2450 break;
2451
2452 case s_req_server_with_at:
2453 found_at = 1;
2454
2455 /* fall through */
2456 case s_req_server:
2457 uf = UF_HOST;
2458 break;
2459
2460 case s_req_path:
2461 uf = UF_PATH;
2462 break;
2463
2464 case s_req_query_string:
2465 uf = UF_QUERY;
2466 break;
2467
2468 case s_req_fragment:
2469 uf = UF_FRAGMENT;
2470 break;
2471
2472 default:
2473 assert(!"Unexpected state");
2474 return 1;
2475 }
2476
2477 /* Nothing's changed; soldier on */
2478 if (uf == old_uf) {
2479 u->field_data[uf].len++;
2480 continue;
2481 }
2482
2483 u->field_data[uf].off = (uint16_t)(p - buf);
2484 u->field_data[uf].len = 1;
2485
2486 u->field_set |= (1 << uf);
2487 old_uf = uf;
2488 }
2489
2490 /* host must be present if there is a schema */
2491 /* parsing http:///toto will fail */
2492 if ((u->field_set & (1 << UF_SCHEMA)) &&
2493 (u->field_set & (1 << UF_HOST)) == 0) {
2494 return 1;
2495 }
2496
2497 if (u->field_set & (1 << UF_HOST)) {
2498 if (http_parse_host(buf, u, found_at) != 0) {
2499 return 1;
2500 }
2501 }
2502
2503 /* CONNECT requests can only contain "hostname:port" */
2504 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2505 return 1;
2506 }
2507
2508 if (u->field_set & (1 << UF_PORT)) {
2509 uint16_t off;
2510 uint16_t len;
2511 const char* p;
2512 const char* end;
2513 unsigned long v;
2514
2515 off = u->field_data[UF_PORT].off;
2516 len = u->field_data[UF_PORT].len;
2517 end = buf + off + len;
2518
2519 /* NOTE: The characters are already validated and are in the [0-9] range */
2520 assert(off + len <= buflen && "Port number overflow");
2521 v = 0;
2522 for (p = buf + off; p < end; p++) {
2523 v *= 10;
2524 v += *p - '0';
2525
2526 /* Ports have a max value of 2^16 */
2527 if (v > 0xffff) {
2528 return 1;
2529 }
2530 }
2531
2532 u->port = (uint16_t) v;
2533 }
2534
2535 return 0;
2536 }
2537
2538 void
http_parser_pause(http_parser * parser,int paused)2539 http_parser_pause(http_parser *parser, int paused) {
2540 /* Users should only be pausing/unpausing a parser that is not in an error
2541 * state. In non-debug builds, there's not much that we can do about this
2542 * other than ignore it.
2543 */
2544 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2545 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2546 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2547 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2548 } else {
2549 assert(0 && "Attempting to pause parser in error state");
2550 }
2551 }
2552
2553 int
http_body_is_final(const struct http_parser * parser)2554 http_body_is_final(const struct http_parser *parser) {
2555 return parser->state == s_message_done;
2556 }
2557
2558 unsigned long
http_parser_version(void)2559 http_parser_version(void) {
2560 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2561 HTTP_PARSER_VERSION_MINOR * 0x00100 |
2562 HTTP_PARSER_VERSION_PATCH * 0x00001;
2563 }
2564
2565 void
http_parser_set_max_header_size(uint32_t size)2566 http_parser_set_max_header_size(uint32_t size) {
2567 max_header_size = size;
2568 }
2569