1 /* Copyright Joyent, Inc. and other Node contributors.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to
5  * deal in the Software without restriction, including without limitation the
6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE.
20  */
21 #include "http_parser.h"
22 #include <assert.h>
23 #include <stddef.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include <limits.h>
27 
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
29 
30 #ifndef ULLONG_MAX
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
32 #endif
33 
34 #ifndef MIN
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
36 #endif
37 
38 #ifndef ARRAY_SIZE
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
40 #endif
41 
42 #ifndef BIT_AT
43 # define BIT_AT(a, i)                                                \
44   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
45    (1 << ((unsigned int) (i) & 7))))
46 #endif
47 
48 #ifndef ELEM_AT
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
50 #endif
51 
52 #define SET_ERRNO(e)                                                 \
53 do {                                                                 \
54   parser->nread = nread;                                             \
55   parser->http_errno = (e);                                          \
56 } while(0)
57 
58 #define CURRENT_STATE() p_state
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
60 #define RETURN(V)                                                    \
61 do {                                                                 \
62   parser->nread = nread;                                             \
63   parser->state = CURRENT_STATE();                                   \
64   return (V);                                                        \
65 } while (0);
66 #define REEXECUTE()                                                  \
67   goto reexecute;                                                    \
68 
69 
70 #ifdef __GNUC__
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
73 #else
74 # define LIKELY(X) (X)
75 # define UNLIKELY(X) (X)
76 #endif
77 
78 
79 /* Run the notify callback FOR, returning ER if it fails */
80 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
81 do {                                                                 \
82   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
83                                                                      \
84   if (LIKELY(settings->on_##FOR)) {                                  \
85     parser->state = CURRENT_STATE();                                 \
86     if (UNLIKELY(0 != settings->on_##FOR(parser))) {                 \
87       SET_ERRNO(HPE_CB_##FOR);                                       \
88     }                                                                \
89     UPDATE_STATE(parser->state);                                     \
90                                                                      \
91     /* We either errored above or got paused; get out */             \
92     if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {             \
93       return (ER);                                                   \
94     }                                                                \
95   }                                                                  \
96 } while (0)
97 
98 /* Run the notify callback FOR and consume the current byte */
99 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
100 
101 /* Run the notify callback FOR and don't consume the current byte */
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
103 
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
105 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
106 do {                                                                 \
107   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
108                                                                      \
109   if (FOR##_mark) {                                                  \
110     if (LIKELY(settings->on_##FOR)) {                                \
111       parser->state = CURRENT_STATE();                               \
112       if (UNLIKELY(0 !=                                              \
113                    settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
114         SET_ERRNO(HPE_CB_##FOR);                                     \
115       }                                                              \
116       UPDATE_STATE(parser->state);                                   \
117                                                                      \
118       /* We either errored above or got paused; get out */           \
119       if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) {           \
120         return (ER);                                                 \
121       }                                                              \
122     }                                                                \
123     FOR##_mark = NULL;                                               \
124   }                                                                  \
125 } while (0)
126 
127 /* Run the data callback FOR and consume the current byte */
128 #define CALLBACK_DATA(FOR)                                           \
129     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
130 
131 /* Run the data callback FOR and don't consume the current byte */
132 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
133     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
134 
135 /* Set the mark FOR; non-destructive if mark is already set */
136 #define MARK(FOR)                                                    \
137 do {                                                                 \
138   if (!FOR##_mark) {                                                 \
139     FOR##_mark = p;                                                  \
140   }                                                                  \
141 } while (0)
142 
143 /* Don't allow the total size of the HTTP headers (including the status
144  * line) to exceed max_header_size.  This check is here to protect
145  * embedders against denial-of-service attacks where the attacker feeds
146  * us a never-ending header that the embedder keeps buffering.
147  *
148  * This check is arguably the responsibility of embedders but we're doing
149  * it on the embedder's behalf because most won't bother and this way we
150  * make the web a little safer.  max_header_size is still far bigger
151  * than any reasonable request or response so this should never affect
152  * day-to-day operation.
153  */
154 #define COUNT_HEADER_SIZE(V)                                         \
155 do {                                                                 \
156   nread += (uint32_t)(V);                                            \
157   if (UNLIKELY(nread > max_header_size)) {                           \
158     SET_ERRNO(HPE_HEADER_OVERFLOW);                                  \
159     goto error;                                                      \
160   }                                                                  \
161 } while (0)
162 
163 
164 #define PROXY_CONNECTION "proxy-connection"
165 #define CONNECTION "connection"
166 #define CONTENT_LENGTH "content-length"
167 #define TRANSFER_ENCODING "transfer-encoding"
168 #define UPGRADE "upgrade"
169 #define CHUNKED "chunked"
170 #define KEEP_ALIVE "keep-alive"
171 #define CLOSE "close"
172 
173 
174 static const char *method_strings[] =
175   {
176 #define XX(num, name, string) #string,
177   HTTP_METHOD_MAP(XX)
178 #undef XX
179   };
180 
181 
182 /* Tokens as defined by rfc 2616. Also lowercases them.
183  *        token       = 1*<any CHAR except CTLs or separators>
184  *     separators     = "(" | ")" | "<" | ">" | "@"
185  *                    | "," | ";" | ":" | "\" | <">
186  *                    | "/" | "[" | "]" | "?" | "="
187  *                    | "{" | "}" | SP | HT
188  */
189 static const char tokens[256] = {
190 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
191         0,       0,       0,       0,       0,       0,       0,       0,
192 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
193         0,       0,       0,       0,       0,       0,       0,       0,
194 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
195         0,       0,       0,       0,       0,       0,       0,       0,
196 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
197         0,       0,       0,       0,       0,       0,       0,       0,
198 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
199        ' ',     '!',      0,      '#',     '$',     '%',     '&',    '\'',
200 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
201         0,       0,      '*',     '+',      0,      '-',     '.',      0,
202 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
203        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
204 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
205        '8',     '9',      0,       0,       0,       0,       0,       0,
206 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
207         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
208 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
209        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
210 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
211        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
212 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
213        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
214 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
215        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
216 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
217        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
218 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
219        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
220 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
221        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
222 
223 
224 static const int8_t unhex[256] =
225   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
226   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
227   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
228   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
229   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
230   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
231   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
232   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
233   };
234 
235 
236 #if HTTP_PARSER_STRICT
237 # define T(v) 0
238 #else
239 # define T(v) v
240 #endif
241 
242 
243 static const uint8_t normal_url_char[32] = {
244 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
245         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
246 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
247         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
248 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
249         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
250 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
251         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
252 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
253         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
254 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
255         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
256 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
257         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
258 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
259         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
260 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
261         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
262 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
263         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
264 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
265         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
266 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
267         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
268 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
269         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
270 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
271         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
272 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
273         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
274 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
275         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
276 
277 #undef T
278 
279 enum state
280   { s_dead = 1 /* important that this is > 0 */
281 
282   , s_start_req_or_res
283   , s_res_or_resp_H
284   , s_start_res
285   , s_res_H
286   , s_res_HT
287   , s_res_HTT
288   , s_res_HTTP
289   , s_res_http_major
290   , s_res_http_dot
291   , s_res_http_minor
292   , s_res_http_end
293   , s_res_first_status_code
294   , s_res_status_code
295   , s_res_status_start
296   , s_res_status
297   , s_res_line_almost_done
298 
299   , s_start_req
300 
301   , s_req_method
302   , s_req_spaces_before_url
303   , s_req_schema
304   , s_req_schema_slash
305   , s_req_schema_slash_slash
306   , s_req_server_start
307   , s_req_server
308   , s_req_server_with_at
309   , s_req_path
310   , s_req_query_string_start
311   , s_req_query_string
312   , s_req_fragment_start
313   , s_req_fragment
314   , s_req_http_start
315   , s_req_http_H
316   , s_req_http_HT
317   , s_req_http_HTT
318   , s_req_http_HTTP
319   , s_req_http_I
320   , s_req_http_IC
321   , s_req_http_major
322   , s_req_http_dot
323   , s_req_http_minor
324   , s_req_http_end
325   , s_req_line_almost_done
326 
327   , s_header_field_start
328   , s_header_field
329   , s_header_value_discard_ws
330   , s_header_value_discard_ws_almost_done
331   , s_header_value_discard_lws
332   , s_header_value_start
333   , s_header_value
334   , s_header_value_lws
335 
336   , s_header_almost_done
337 
338   , s_chunk_size_start
339   , s_chunk_size
340   , s_chunk_parameters
341   , s_chunk_size_almost_done
342 
343   , s_headers_almost_done
344   , s_headers_done
345 
346   /* Important: 's_headers_done' must be the last 'header' state. All
347    * states beyond this must be 'body' states. It is used for overflow
348    * checking. See the PARSING_HEADER() macro.
349    */
350 
351   , s_chunk_data
352   , s_chunk_data_almost_done
353   , s_chunk_data_done
354 
355   , s_body_identity
356   , s_body_identity_eof
357 
358   , s_message_done
359   };
360 
361 
362 #define PARSING_HEADER(state) (state <= s_headers_done)
363 
364 
365 enum header_states
366   { h_general = 0
367   , h_C
368   , h_CO
369   , h_CON
370 
371   , h_matching_connection
372   , h_matching_proxy_connection
373   , h_matching_content_length
374   , h_matching_transfer_encoding
375   , h_matching_upgrade
376 
377   , h_connection
378   , h_content_length
379   , h_content_length_num
380   , h_content_length_ws
381   , h_transfer_encoding
382   , h_upgrade
383 
384   , h_matching_transfer_encoding_token_start
385   , h_matching_transfer_encoding_chunked
386   , h_matching_transfer_encoding_token
387 
388   , h_matching_connection_token_start
389   , h_matching_connection_keep_alive
390   , h_matching_connection_close
391   , h_matching_connection_upgrade
392   , h_matching_connection_token
393 
394   , h_transfer_encoding_chunked
395   , h_connection_keep_alive
396   , h_connection_close
397   , h_connection_upgrade
398   };
399 
400 enum http_host_state
401   {
402     s_http_host_dead = 1
403   , s_http_userinfo_start
404   , s_http_userinfo
405   , s_http_host_start
406   , s_http_host_v6_start
407   , s_http_host
408   , s_http_host_v6
409   , s_http_host_v6_end
410   , s_http_host_v6_zone_start
411   , s_http_host_v6_zone
412   , s_http_host_port_start
413   , s_http_host_port
414 };
415 
416 /* Macros for character classes; depends on strict-mode  */
417 #define CR                  '\r'
418 #define LF                  '\n'
419 #define LOWER(c)            (unsigned char)(c | 0x20)
420 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
421 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
422 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
423 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
424 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
425   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
426   (c) == ')')
427 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
428   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
429   (c) == '$' || (c) == ',')
430 
431 #define STRICT_TOKEN(c)     ((c == ' ') ? 0 : tokens[(unsigned char)c])
432 
433 #if HTTP_PARSER_STRICT
434 #define TOKEN(c)            STRICT_TOKEN(c)
435 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
436 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
437 #else
438 #define TOKEN(c)            tokens[(unsigned char)c]
439 #define IS_URL_CHAR(c)                                                         \
440   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
441 #define IS_HOST_CHAR(c)                                                        \
442   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
443 #endif
444 
445 /**
446  * Verify that a char is a valid visible (printable) US-ASCII
447  * character or %x80-FF
448  **/
449 #define IS_HEADER_CHAR(ch)                                                     \
450   (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
451 
452 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
453 
454 
455 #if HTTP_PARSER_STRICT
456 # define STRICT_CHECK(cond)                                          \
457 do {                                                                 \
458   if (cond) {                                                        \
459     SET_ERRNO(HPE_STRICT);                                           \
460     goto error;                                                      \
461   }                                                                  \
462 } while (0)
463 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
464 #else
465 # define STRICT_CHECK(cond)
466 # define NEW_MESSAGE() start_state
467 #endif
468 
469 
470 /* Map errno values to strings for human-readable output */
471 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
472 static struct {
473   const char *name;
474   const char *description;
475 } http_strerror_tab[] = {
476   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
477 };
478 #undef HTTP_STRERROR_GEN
479 
480 int http_message_needs_eof(const http_parser *parser);
481 
482 /* Our URL parser.
483  *
484  * This is designed to be shared by http_parser_execute() for URL validation,
485  * hence it has a state transition + byte-for-byte interface. In addition, it
486  * is meant to be embedded in http_parser_parse_url(), which does the dirty
487  * work of turning state transitions URL components for its API.
488  *
489  * This function should only be invoked with non-space characters. It is
490  * assumed that the caller cares about (and can detect) the transition between
491  * URL and non-URL states by looking for these.
492  */
493 static enum state
parse_url_char(enum state s,const char ch)494 parse_url_char(enum state s, const char ch)
495 {
496   if (ch == ' ' || ch == '\r' || ch == '\n') {
497     return s_dead;
498   }
499 
500 #if HTTP_PARSER_STRICT
501   if (ch == '\t' || ch == '\f') {
502     return s_dead;
503   }
504 #endif
505 
506   switch (s) {
507     case s_req_spaces_before_url:
508       /* Proxied requests are followed by scheme of an absolute URI (alpha).
509        * All methods except CONNECT are followed by '/' or '*'.
510        */
511 
512       if (ch == '/' || ch == '*') {
513         return s_req_path;
514       }
515 
516       if (IS_ALPHA(ch)) {
517         return s_req_schema;
518       }
519 
520       break;
521 
522     case s_req_schema:
523       if (IS_ALPHA(ch)) {
524         return s;
525       }
526 
527       if (ch == ':') {
528         return s_req_schema_slash;
529       }
530 
531       break;
532 
533     case s_req_schema_slash:
534       if (ch == '/') {
535         return s_req_schema_slash_slash;
536       }
537 
538       break;
539 
540     case s_req_schema_slash_slash:
541       if (ch == '/') {
542         return s_req_server_start;
543       }
544 
545       break;
546 
547     case s_req_server_with_at:
548       if (ch == '@') {
549         return s_dead;
550       }
551 
552     /* fall through */
553     case s_req_server_start:
554     case s_req_server:
555       if (ch == '/') {
556         return s_req_path;
557       }
558 
559       if (ch == '?') {
560         return s_req_query_string_start;
561       }
562 
563       if (ch == '@') {
564         return s_req_server_with_at;
565       }
566 
567       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
568         return s_req_server;
569       }
570 
571       break;
572 
573     case s_req_path:
574       if (IS_URL_CHAR(ch)) {
575         return s;
576       }
577 
578       switch (ch) {
579         case '?':
580           return s_req_query_string_start;
581 
582         case '#':
583           return s_req_fragment_start;
584       }
585 
586       break;
587 
588     case s_req_query_string_start:
589     case s_req_query_string:
590       if (IS_URL_CHAR(ch)) {
591         return s_req_query_string;
592       }
593 
594       switch (ch) {
595         case '?':
596           /* allow extra '?' in query string */
597           return s_req_query_string;
598 
599         case '#':
600           return s_req_fragment_start;
601       }
602 
603       break;
604 
605     case s_req_fragment_start:
606       if (IS_URL_CHAR(ch)) {
607         return s_req_fragment;
608       }
609 
610       switch (ch) {
611         case '?':
612           return s_req_fragment;
613 
614         case '#':
615           return s;
616       }
617 
618       break;
619 
620     case s_req_fragment:
621       if (IS_URL_CHAR(ch)) {
622         return s;
623       }
624 
625       switch (ch) {
626         case '?':
627         case '#':
628           return s;
629       }
630 
631       break;
632 
633     default:
634       break;
635   }
636 
637   /* We should never fall out of the switch above unless there's an error */
638   return s_dead;
639 }
640 
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)641 size_t http_parser_execute (http_parser *parser,
642                             const http_parser_settings *settings,
643                             const char *data,
644                             size_t len)
645 {
646   char c, ch;
647   int8_t unhex_val;
648   const char *p = data;
649   const char *header_field_mark = 0;
650   const char *header_value_mark = 0;
651   const char *url_mark = 0;
652   const char *body_mark = 0;
653   const char *status_mark = 0;
654   enum state p_state = (enum state) parser->state;
655   const unsigned int lenient = parser->lenient_http_headers;
656   uint32_t nread = parser->nread;
657 
658   /* We're in an error state. Don't bother doing anything. */
659   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
660     return 0;
661   }
662 
663   if (len == 0) {
664     switch (CURRENT_STATE()) {
665       case s_body_identity_eof:
666         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
667          * we got paused.
668          */
669         CALLBACK_NOTIFY_NOADVANCE(message_complete);
670         return 0;
671 
672       case s_dead:
673       case s_start_req_or_res:
674       case s_start_res:
675       case s_start_req:
676         return 0;
677 
678       default:
679         SET_ERRNO(HPE_INVALID_EOF_STATE);
680         return 1;
681     }
682   }
683 
684 
685   if (CURRENT_STATE() == s_header_field)
686     header_field_mark = data;
687   if (CURRENT_STATE() == s_header_value)
688     header_value_mark = data;
689   switch (CURRENT_STATE()) {
690   case s_req_path:
691   case s_req_schema:
692   case s_req_schema_slash:
693   case s_req_schema_slash_slash:
694   case s_req_server_start:
695   case s_req_server:
696   case s_req_server_with_at:
697   case s_req_query_string_start:
698   case s_req_query_string:
699   case s_req_fragment_start:
700   case s_req_fragment:
701     url_mark = data;
702     break;
703   case s_res_status:
704     status_mark = data;
705     break;
706   default:
707     break;
708   }
709 
710   for (p=data; p != data + len; p++) {
711     ch = *p;
712 
713     if (PARSING_HEADER(CURRENT_STATE()))
714       COUNT_HEADER_SIZE(1);
715 
716 reexecute:
717     switch (CURRENT_STATE()) {
718 
719       case s_dead:
720         /* this state is used after a 'Connection: close' message
721          * the parser will error out if it reads another message
722          */
723         if (LIKELY(ch == CR || ch == LF))
724           break;
725 
726         SET_ERRNO(HPE_CLOSED_CONNECTION);
727         goto error;
728 
729       case s_start_req_or_res:
730       {
731         if (ch == CR || ch == LF)
732           break;
733         parser->flags = 0;
734         parser->extra_flags = 0;
735         parser->content_length = ULLONG_MAX;
736 
737         if (ch == 'H') {
738           UPDATE_STATE(s_res_or_resp_H);
739 
740           CALLBACK_NOTIFY(message_begin);
741         } else {
742           parser->type = HTTP_REQUEST;
743           UPDATE_STATE(s_start_req);
744           REEXECUTE();
745         }
746 
747         break;
748       }
749 
750       case s_res_or_resp_H:
751         if (ch == 'T') {
752           parser->type = HTTP_RESPONSE;
753           UPDATE_STATE(s_res_HT);
754         } else {
755           if (UNLIKELY(ch != 'E')) {
756             SET_ERRNO(HPE_INVALID_CONSTANT);
757             goto error;
758           }
759 
760           parser->type = HTTP_REQUEST;
761           parser->method = HTTP_HEAD;
762           parser->index = 2;
763           UPDATE_STATE(s_req_method);
764         }
765         break;
766 
767       case s_start_res:
768       {
769         if (ch == CR || ch == LF)
770           break;
771         parser->flags = 0;
772         parser->extra_flags = 0;
773         parser->content_length = ULLONG_MAX;
774 
775         if (ch == 'H') {
776           UPDATE_STATE(s_res_H);
777         } else {
778           SET_ERRNO(HPE_INVALID_CONSTANT);
779           goto error;
780         }
781 
782         CALLBACK_NOTIFY(message_begin);
783         break;
784       }
785 
786       case s_res_H:
787         STRICT_CHECK(ch != 'T');
788         UPDATE_STATE(s_res_HT);
789         break;
790 
791       case s_res_HT:
792         STRICT_CHECK(ch != 'T');
793         UPDATE_STATE(s_res_HTT);
794         break;
795 
796       case s_res_HTT:
797         STRICT_CHECK(ch != 'P');
798         UPDATE_STATE(s_res_HTTP);
799         break;
800 
801       case s_res_HTTP:
802         STRICT_CHECK(ch != '/');
803         UPDATE_STATE(s_res_http_major);
804         break;
805 
806       case s_res_http_major:
807         if (UNLIKELY(!IS_NUM(ch))) {
808           SET_ERRNO(HPE_INVALID_VERSION);
809           goto error;
810         }
811 
812         parser->http_major = ch - '0';
813         UPDATE_STATE(s_res_http_dot);
814         break;
815 
816       case s_res_http_dot:
817       {
818         if (UNLIKELY(ch != '.')) {
819           SET_ERRNO(HPE_INVALID_VERSION);
820           goto error;
821         }
822 
823         UPDATE_STATE(s_res_http_minor);
824         break;
825       }
826 
827       case s_res_http_minor:
828         if (UNLIKELY(!IS_NUM(ch))) {
829           SET_ERRNO(HPE_INVALID_VERSION);
830           goto error;
831         }
832 
833         parser->http_minor = ch - '0';
834         UPDATE_STATE(s_res_http_end);
835         break;
836 
837       case s_res_http_end:
838       {
839         if (UNLIKELY(ch != ' ')) {
840           SET_ERRNO(HPE_INVALID_VERSION);
841           goto error;
842         }
843 
844         UPDATE_STATE(s_res_first_status_code);
845         break;
846       }
847 
848       case s_res_first_status_code:
849       {
850         if (!IS_NUM(ch)) {
851           if (ch == ' ') {
852             break;
853           }
854 
855           SET_ERRNO(HPE_INVALID_STATUS);
856           goto error;
857         }
858         parser->status_code = ch - '0';
859         UPDATE_STATE(s_res_status_code);
860         break;
861       }
862 
863       case s_res_status_code:
864       {
865         if (!IS_NUM(ch)) {
866           switch (ch) {
867             case ' ':
868               UPDATE_STATE(s_res_status_start);
869               break;
870             case CR:
871             case LF:
872               UPDATE_STATE(s_res_status_start);
873               REEXECUTE();
874               break;
875             default:
876               SET_ERRNO(HPE_INVALID_STATUS);
877               goto error;
878           }
879           break;
880         }
881 
882         parser->status_code *= 10;
883         parser->status_code += ch - '0';
884 
885         if (UNLIKELY(parser->status_code > 999)) {
886           SET_ERRNO(HPE_INVALID_STATUS);
887           goto error;
888         }
889 
890         break;
891       }
892 
893       case s_res_status_start:
894       {
895         MARK(status);
896         UPDATE_STATE(s_res_status);
897         parser->index = 0;
898 
899         if (ch == CR || ch == LF)
900           REEXECUTE();
901 
902         break;
903       }
904 
905       case s_res_status:
906         if (ch == CR) {
907           UPDATE_STATE(s_res_line_almost_done);
908           CALLBACK_DATA(status);
909           break;
910         }
911 
912         if (ch == LF) {
913           UPDATE_STATE(s_header_field_start);
914           CALLBACK_DATA(status);
915           break;
916         }
917 
918         break;
919 
920       case s_res_line_almost_done:
921         STRICT_CHECK(ch != LF);
922         UPDATE_STATE(s_header_field_start);
923         break;
924 
925       case s_start_req:
926       {
927         if (ch == CR || ch == LF)
928           break;
929         parser->flags = 0;
930         parser->extra_flags = 0;
931         parser->content_length = ULLONG_MAX;
932 
933         if (UNLIKELY(!IS_ALPHA(ch))) {
934           SET_ERRNO(HPE_INVALID_METHOD);
935           goto error;
936         }
937 
938         parser->method = (enum http_method) 0;
939         parser->index = 1;
940         switch (ch) {
941           case 'A': parser->method = HTTP_ACL; break;
942           case 'B': parser->method = HTTP_BIND; break;
943           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
944           case 'D': parser->method = HTTP_DELETE; break;
945           case 'G': parser->method = HTTP_GET; break;
946           case 'H': parser->method = HTTP_HEAD; break;
947           case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
948           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
949           case 'N': parser->method = HTTP_NOTIFY; break;
950           case 'O': parser->method = HTTP_OPTIONS; break;
951           case 'P': parser->method = HTTP_POST;
952             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
953             break;
954           case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
955           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
956           case 'T': parser->method = HTTP_TRACE; break;
957           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
958           default:
959             SET_ERRNO(HPE_INVALID_METHOD);
960             goto error;
961         }
962         UPDATE_STATE(s_req_method);
963 
964         CALLBACK_NOTIFY(message_begin);
965 
966         break;
967       }
968 
969       case s_req_method:
970       {
971         const char *matcher;
972         if (UNLIKELY(ch == '\0')) {
973           SET_ERRNO(HPE_INVALID_METHOD);
974           goto error;
975         }
976 
977         matcher = method_strings[parser->method];
978         if (ch == ' ' && matcher[parser->index] == '\0') {
979           UPDATE_STATE(s_req_spaces_before_url);
980         } else if (ch == matcher[parser->index]) {
981           ; /* nada */
982         } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
983 
984           switch (parser->method << 16 | parser->index << 8 | ch) {
985 #define XX(meth, pos, ch, new_meth) \
986             case (HTTP_##meth << 16 | pos << 8 | ch): \
987               parser->method = HTTP_##new_meth; break;
988 
989             XX(POST,      1, 'U', PUT)
990             XX(POST,      1, 'A', PATCH)
991             XX(POST,      1, 'R', PROPFIND)
992             XX(PUT,       2, 'R', PURGE)
993             XX(CONNECT,   1, 'H', CHECKOUT)
994             XX(CONNECT,   2, 'P', COPY)
995             XX(MKCOL,     1, 'O', MOVE)
996             XX(MKCOL,     1, 'E', MERGE)
997             XX(MKCOL,     1, '-', MSEARCH)
998             XX(MKCOL,     2, 'A', MKACTIVITY)
999             XX(MKCOL,     3, 'A', MKCALENDAR)
1000             XX(SUBSCRIBE, 1, 'E', SEARCH)
1001             XX(SUBSCRIBE, 1, 'O', SOURCE)
1002             XX(REPORT,    2, 'B', REBIND)
1003             XX(PROPFIND,  4, 'P', PROPPATCH)
1004             XX(LOCK,      1, 'I', LINK)
1005             XX(UNLOCK,    2, 'S', UNSUBSCRIBE)
1006             XX(UNLOCK,    2, 'B', UNBIND)
1007             XX(UNLOCK,    3, 'I', UNLINK)
1008 #undef XX
1009             default:
1010               SET_ERRNO(HPE_INVALID_METHOD);
1011               goto error;
1012           }
1013         } else {
1014           SET_ERRNO(HPE_INVALID_METHOD);
1015           goto error;
1016         }
1017 
1018         ++parser->index;
1019         break;
1020       }
1021 
1022       case s_req_spaces_before_url:
1023       {
1024         if (ch == ' ') break;
1025 
1026         MARK(url);
1027         if (parser->method == HTTP_CONNECT) {
1028           UPDATE_STATE(s_req_server_start);
1029         }
1030 
1031         UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1032         if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1033           SET_ERRNO(HPE_INVALID_URL);
1034           goto error;
1035         }
1036 
1037         break;
1038       }
1039 
1040       case s_req_schema:
1041       case s_req_schema_slash:
1042       case s_req_schema_slash_slash:
1043       case s_req_server_start:
1044       {
1045         switch (ch) {
1046           /* No whitespace allowed here */
1047           case ' ':
1048           case CR:
1049           case LF:
1050             SET_ERRNO(HPE_INVALID_URL);
1051             goto error;
1052           default:
1053             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1054             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1055               SET_ERRNO(HPE_INVALID_URL);
1056               goto error;
1057             }
1058         }
1059 
1060         break;
1061       }
1062 
1063       case s_req_server:
1064       case s_req_server_with_at:
1065       case s_req_path:
1066       case s_req_query_string_start:
1067       case s_req_query_string:
1068       case s_req_fragment_start:
1069       case s_req_fragment:
1070       {
1071         switch (ch) {
1072           case ' ':
1073             UPDATE_STATE(s_req_http_start);
1074             CALLBACK_DATA(url);
1075             break;
1076           case CR:
1077           case LF:
1078             parser->http_major = 0;
1079             parser->http_minor = 9;
1080             UPDATE_STATE((ch == CR) ?
1081               s_req_line_almost_done :
1082               s_header_field_start);
1083             CALLBACK_DATA(url);
1084             break;
1085           default:
1086             UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1087             if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1088               SET_ERRNO(HPE_INVALID_URL);
1089               goto error;
1090             }
1091         }
1092         break;
1093       }
1094 
1095       case s_req_http_start:
1096         switch (ch) {
1097           case ' ':
1098             break;
1099           case 'H':
1100             UPDATE_STATE(s_req_http_H);
1101             break;
1102           case 'I':
1103             if (parser->method == HTTP_SOURCE) {
1104               UPDATE_STATE(s_req_http_I);
1105               break;
1106             }
1107             /* fall through */
1108           default:
1109             SET_ERRNO(HPE_INVALID_CONSTANT);
1110             goto error;
1111         }
1112         break;
1113 
1114       case s_req_http_H:
1115         STRICT_CHECK(ch != 'T');
1116         UPDATE_STATE(s_req_http_HT);
1117         break;
1118 
1119       case s_req_http_HT:
1120         STRICT_CHECK(ch != 'T');
1121         UPDATE_STATE(s_req_http_HTT);
1122         break;
1123 
1124       case s_req_http_HTT:
1125         STRICT_CHECK(ch != 'P');
1126         UPDATE_STATE(s_req_http_HTTP);
1127         break;
1128 
1129       case s_req_http_I:
1130         STRICT_CHECK(ch != 'C');
1131         UPDATE_STATE(s_req_http_IC);
1132         break;
1133 
1134       case s_req_http_IC:
1135         STRICT_CHECK(ch != 'E');
1136         UPDATE_STATE(s_req_http_HTTP);  /* Treat "ICE" as "HTTP". */
1137         break;
1138 
1139       case s_req_http_HTTP:
1140         STRICT_CHECK(ch != '/');
1141         UPDATE_STATE(s_req_http_major);
1142         break;
1143 
1144       case s_req_http_major:
1145         if (UNLIKELY(!IS_NUM(ch))) {
1146           SET_ERRNO(HPE_INVALID_VERSION);
1147           goto error;
1148         }
1149 
1150         parser->http_major = ch - '0';
1151         UPDATE_STATE(s_req_http_dot);
1152         break;
1153 
1154       case s_req_http_dot:
1155       {
1156         if (UNLIKELY(ch != '.')) {
1157           SET_ERRNO(HPE_INVALID_VERSION);
1158           goto error;
1159         }
1160 
1161         UPDATE_STATE(s_req_http_minor);
1162         break;
1163       }
1164 
1165       case s_req_http_minor:
1166         if (UNLIKELY(!IS_NUM(ch))) {
1167           SET_ERRNO(HPE_INVALID_VERSION);
1168           goto error;
1169         }
1170 
1171         parser->http_minor = ch - '0';
1172         UPDATE_STATE(s_req_http_end);
1173         break;
1174 
1175       case s_req_http_end:
1176       {
1177         if (ch == CR) {
1178           UPDATE_STATE(s_req_line_almost_done);
1179           break;
1180         }
1181 
1182         if (ch == LF) {
1183           UPDATE_STATE(s_header_field_start);
1184           break;
1185         }
1186 
1187         SET_ERRNO(HPE_INVALID_VERSION);
1188         goto error;
1189         break;
1190       }
1191 
1192       /* end of request line */
1193       case s_req_line_almost_done:
1194       {
1195         if (UNLIKELY(ch != LF)) {
1196           SET_ERRNO(HPE_LF_EXPECTED);
1197           goto error;
1198         }
1199 
1200         UPDATE_STATE(s_header_field_start);
1201         break;
1202       }
1203 
1204       case s_header_field_start:
1205       {
1206         if (ch == CR) {
1207           UPDATE_STATE(s_headers_almost_done);
1208           break;
1209         }
1210 
1211         if (ch == LF) {
1212           /* they might be just sending \n instead of \r\n so this would be
1213            * the second \n to denote the end of headers*/
1214           UPDATE_STATE(s_headers_almost_done);
1215           REEXECUTE();
1216         }
1217 
1218         c = TOKEN(ch);
1219 
1220         if (UNLIKELY(!c)) {
1221           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1222           goto error;
1223         }
1224 
1225         MARK(header_field);
1226 
1227         parser->index = 0;
1228         UPDATE_STATE(s_header_field);
1229 
1230         switch (c) {
1231           case 'c':
1232             parser->header_state = h_C;
1233             break;
1234 
1235           case 'p':
1236             parser->header_state = h_matching_proxy_connection;
1237             break;
1238 
1239           case 't':
1240             parser->header_state = h_matching_transfer_encoding;
1241             break;
1242 
1243           case 'u':
1244             parser->header_state = h_matching_upgrade;
1245             break;
1246 
1247           default:
1248             parser->header_state = h_general;
1249             break;
1250         }
1251         break;
1252       }
1253 
1254       case s_header_field:
1255       {
1256         const char* start = p;
1257         for (; p != data + len; p++) {
1258           ch = *p;
1259           c = TOKEN(ch);
1260 
1261           if (!c)
1262             break;
1263 
1264           switch (parser->header_state) {
1265             case h_general: {
1266               size_t left = data + len - p;
1267               const char* pe = p + MIN(left, max_header_size);
1268               while (p+1 < pe && TOKEN(p[1])) {
1269                 p++;
1270               }
1271               break;
1272             }
1273 
1274             case h_C:
1275               parser->index++;
1276               parser->header_state = (c == 'o' ? h_CO : h_general);
1277               break;
1278 
1279             case h_CO:
1280               parser->index++;
1281               parser->header_state = (c == 'n' ? h_CON : h_general);
1282               break;
1283 
1284             case h_CON:
1285               parser->index++;
1286               switch (c) {
1287                 case 'n':
1288                   parser->header_state = h_matching_connection;
1289                   break;
1290                 case 't':
1291                   parser->header_state = h_matching_content_length;
1292                   break;
1293                 default:
1294                   parser->header_state = h_general;
1295                   break;
1296               }
1297               break;
1298 
1299             /* connection */
1300 
1301             case h_matching_connection:
1302               parser->index++;
1303               if (parser->index > sizeof(CONNECTION)-1
1304                   || c != CONNECTION[parser->index]) {
1305                 parser->header_state = h_general;
1306               } else if (parser->index == sizeof(CONNECTION)-2) {
1307                 parser->header_state = h_connection;
1308               }
1309               break;
1310 
1311             /* proxy-connection */
1312 
1313             case h_matching_proxy_connection:
1314               parser->index++;
1315               if (parser->index > sizeof(PROXY_CONNECTION)-1
1316                   || c != PROXY_CONNECTION[parser->index]) {
1317                 parser->header_state = h_general;
1318               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1319                 parser->header_state = h_connection;
1320               }
1321               break;
1322 
1323             /* content-length */
1324 
1325             case h_matching_content_length:
1326               parser->index++;
1327               if (parser->index > sizeof(CONTENT_LENGTH)-1
1328                   || c != CONTENT_LENGTH[parser->index]) {
1329                 parser->header_state = h_general;
1330               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1331                 parser->header_state = h_content_length;
1332               }
1333               break;
1334 
1335             /* transfer-encoding */
1336 
1337             case h_matching_transfer_encoding:
1338               parser->index++;
1339               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1340                   || c != TRANSFER_ENCODING[parser->index]) {
1341                 parser->header_state = h_general;
1342               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1343                 parser->header_state = h_transfer_encoding;
1344                 parser->extra_flags |= F_TRANSFER_ENCODING >> 8;
1345               }
1346               break;
1347 
1348             /* upgrade */
1349 
1350             case h_matching_upgrade:
1351               parser->index++;
1352               if (parser->index > sizeof(UPGRADE)-1
1353                   || c != UPGRADE[parser->index]) {
1354                 parser->header_state = h_general;
1355               } else if (parser->index == sizeof(UPGRADE)-2) {
1356                 parser->header_state = h_upgrade;
1357               }
1358               break;
1359 
1360             case h_connection:
1361             case h_content_length:
1362             case h_transfer_encoding:
1363             case h_upgrade:
1364               if (ch != ' ') parser->header_state = h_general;
1365               break;
1366 
1367             default:
1368               assert(0 && "Unknown header_state");
1369               break;
1370           }
1371         }
1372 
1373         if (p == data + len) {
1374           --p;
1375           COUNT_HEADER_SIZE(p - start);
1376           break;
1377         }
1378 
1379         COUNT_HEADER_SIZE(p - start);
1380 
1381         if (ch == ':') {
1382           UPDATE_STATE(s_header_value_discard_ws);
1383           CALLBACK_DATA(header_field);
1384           break;
1385         }
1386 
1387         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1388         goto error;
1389       }
1390 
1391       case s_header_value_discard_ws:
1392         if (ch == ' ' || ch == '\t') break;
1393 
1394         if (ch == CR) {
1395           UPDATE_STATE(s_header_value_discard_ws_almost_done);
1396           break;
1397         }
1398 
1399         if (ch == LF) {
1400           UPDATE_STATE(s_header_value_discard_lws);
1401           break;
1402         }
1403 
1404         /* fall through */
1405 
1406       case s_header_value_start:
1407       {
1408         MARK(header_value);
1409 
1410         UPDATE_STATE(s_header_value);
1411         parser->index = 0;
1412 
1413         c = LOWER(ch);
1414 
1415         switch (parser->header_state) {
1416           case h_upgrade:
1417             parser->flags |= F_UPGRADE;
1418             parser->header_state = h_general;
1419             break;
1420 
1421           case h_transfer_encoding:
1422             /* looking for 'Transfer-Encoding: chunked' */
1423             if ('c' == c) {
1424               parser->header_state = h_matching_transfer_encoding_chunked;
1425             } else {
1426               parser->header_state = h_matching_transfer_encoding_token;
1427             }
1428             break;
1429 
1430           /* Multi-value `Transfer-Encoding` header */
1431           case h_matching_transfer_encoding_token_start:
1432             break;
1433 
1434           case h_content_length:
1435             if (UNLIKELY(!IS_NUM(ch))) {
1436               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1437               goto error;
1438             }
1439 
1440             if (parser->flags & F_CONTENTLENGTH) {
1441               SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1442               goto error;
1443             }
1444 
1445             parser->flags |= F_CONTENTLENGTH;
1446             parser->content_length = ch - '0';
1447             parser->header_state = h_content_length_num;
1448             break;
1449 
1450           /* when obsolete line folding is encountered for content length
1451            * continue to the s_header_value state */
1452           case h_content_length_ws:
1453             break;
1454 
1455           case h_connection:
1456             /* looking for 'Connection: keep-alive' */
1457             if (c == 'k') {
1458               parser->header_state = h_matching_connection_keep_alive;
1459             /* looking for 'Connection: close' */
1460             } else if (c == 'c') {
1461               parser->header_state = h_matching_connection_close;
1462             } else if (c == 'u') {
1463               parser->header_state = h_matching_connection_upgrade;
1464             } else {
1465               parser->header_state = h_matching_connection_token;
1466             }
1467             break;
1468 
1469           /* Multi-value `Connection` header */
1470           case h_matching_connection_token_start:
1471             break;
1472 
1473           default:
1474             parser->header_state = h_general;
1475             break;
1476         }
1477         break;
1478       }
1479 
1480       case s_header_value:
1481       {
1482         const char* start = p;
1483         enum header_states h_state = (enum header_states) parser->header_state;
1484         for (; p != data + len; p++) {
1485           ch = *p;
1486           if (ch == CR) {
1487             UPDATE_STATE(s_header_almost_done);
1488             parser->header_state = h_state;
1489             CALLBACK_DATA(header_value);
1490             break;
1491           }
1492 
1493           if (ch == LF) {
1494             UPDATE_STATE(s_header_almost_done);
1495             COUNT_HEADER_SIZE(p - start);
1496             parser->header_state = h_state;
1497             CALLBACK_DATA_NOADVANCE(header_value);
1498             REEXECUTE();
1499           }
1500 
1501           if (!lenient && !IS_HEADER_CHAR(ch)) {
1502             SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1503             goto error;
1504           }
1505 
1506           c = LOWER(ch);
1507 
1508           switch (h_state) {
1509             case h_general:
1510               {
1511                 size_t left = data + len - p;
1512                 const char* pe = p + MIN(left, max_header_size);
1513 
1514                 for (; p != pe; p++) {
1515                   ch = *p;
1516                   if (ch == CR || ch == LF) {
1517                     --p;
1518                     break;
1519                   }
1520                   if (!lenient && !IS_HEADER_CHAR(ch)) {
1521                     SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1522                     goto error;
1523                   }
1524                 }
1525                 if (p == data + len)
1526                   --p;
1527                 break;
1528               }
1529 
1530             case h_connection:
1531             case h_transfer_encoding:
1532               assert(0 && "Shouldn't get here.");
1533               break;
1534 
1535             case h_content_length:
1536               if (ch == ' ') break;
1537               h_state = h_content_length_num;
1538               /* fall through */
1539 
1540             case h_content_length_num:
1541             {
1542               uint64_t t;
1543 
1544               if (ch == ' ') {
1545                 h_state = h_content_length_ws;
1546                 break;
1547               }
1548 
1549               if (UNLIKELY(!IS_NUM(ch))) {
1550                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1551                 parser->header_state = h_state;
1552                 goto error;
1553               }
1554 
1555               t = parser->content_length;
1556               t *= 10;
1557               t += ch - '0';
1558 
1559               /* Overflow? Test against a conservative limit for simplicity. */
1560               if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1561                 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1562                 parser->header_state = h_state;
1563                 goto error;
1564               }
1565 
1566               parser->content_length = t;
1567               break;
1568             }
1569 
1570             case h_content_length_ws:
1571               if (ch == ' ') break;
1572               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1573               parser->header_state = h_state;
1574               goto error;
1575 
1576             /* Transfer-Encoding: chunked */
1577             case h_matching_transfer_encoding_token_start:
1578               /* looking for 'Transfer-Encoding: chunked' */
1579               if ('c' == c) {
1580                 h_state = h_matching_transfer_encoding_chunked;
1581               } else if (STRICT_TOKEN(c)) {
1582                 /* TODO(indutny): similar code below does this, but why?
1583                  * At the very least it seems to be inconsistent given that
1584                  * h_matching_transfer_encoding_token does not check for
1585                  * `STRICT_TOKEN`
1586                  */
1587                 h_state = h_matching_transfer_encoding_token;
1588               } else if (c == ' ' || c == '\t') {
1589                 /* Skip lws */
1590               } else {
1591                 h_state = h_general;
1592               }
1593               break;
1594 
1595             case h_matching_transfer_encoding_chunked:
1596               parser->index++;
1597               if (parser->index > sizeof(CHUNKED)-1
1598                   || c != CHUNKED[parser->index]) {
1599                 h_state = h_matching_transfer_encoding_token;
1600               } else if (parser->index == sizeof(CHUNKED)-2) {
1601                 h_state = h_transfer_encoding_chunked;
1602               }
1603               break;
1604 
1605             case h_matching_transfer_encoding_token:
1606               if (ch == ',') {
1607                 h_state = h_matching_transfer_encoding_token_start;
1608                 parser->index = 0;
1609               }
1610               break;
1611 
1612             case h_matching_connection_token_start:
1613               /* looking for 'Connection: keep-alive' */
1614               if (c == 'k') {
1615                 h_state = h_matching_connection_keep_alive;
1616               /* looking for 'Connection: close' */
1617               } else if (c == 'c') {
1618                 h_state = h_matching_connection_close;
1619               } else if (c == 'u') {
1620                 h_state = h_matching_connection_upgrade;
1621               } else if (STRICT_TOKEN(c)) {
1622                 h_state = h_matching_connection_token;
1623               } else if (c == ' ' || c == '\t') {
1624                 /* Skip lws */
1625               } else {
1626                 h_state = h_general;
1627               }
1628               break;
1629 
1630             /* looking for 'Connection: keep-alive' */
1631             case h_matching_connection_keep_alive:
1632               parser->index++;
1633               if (parser->index > sizeof(KEEP_ALIVE)-1
1634                   || c != KEEP_ALIVE[parser->index]) {
1635                 h_state = h_matching_connection_token;
1636               } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1637                 h_state = h_connection_keep_alive;
1638               }
1639               break;
1640 
1641             /* looking for 'Connection: close' */
1642             case h_matching_connection_close:
1643               parser->index++;
1644               if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1645                 h_state = h_matching_connection_token;
1646               } else if (parser->index == sizeof(CLOSE)-2) {
1647                 h_state = h_connection_close;
1648               }
1649               break;
1650 
1651             /* looking for 'Connection: upgrade' */
1652             case h_matching_connection_upgrade:
1653               parser->index++;
1654               if (parser->index > sizeof(UPGRADE) - 1 ||
1655                   c != UPGRADE[parser->index]) {
1656                 h_state = h_matching_connection_token;
1657               } else if (parser->index == sizeof(UPGRADE)-2) {
1658                 h_state = h_connection_upgrade;
1659               }
1660               break;
1661 
1662             case h_matching_connection_token:
1663               if (ch == ',') {
1664                 h_state = h_matching_connection_token_start;
1665                 parser->index = 0;
1666               }
1667               break;
1668 
1669             case h_transfer_encoding_chunked:
1670               if (ch != ' ') h_state = h_matching_transfer_encoding_token;
1671               break;
1672 
1673             case h_connection_keep_alive:
1674             case h_connection_close:
1675             case h_connection_upgrade:
1676               if (ch == ',') {
1677                 if (h_state == h_connection_keep_alive) {
1678                   parser->flags |= F_CONNECTION_KEEP_ALIVE;
1679                 } else if (h_state == h_connection_close) {
1680                   parser->flags |= F_CONNECTION_CLOSE;
1681                 } else if (h_state == h_connection_upgrade) {
1682                   parser->flags |= F_CONNECTION_UPGRADE;
1683                 }
1684                 h_state = h_matching_connection_token_start;
1685                 parser->index = 0;
1686               } else if (ch != ' ') {
1687                 h_state = h_matching_connection_token;
1688               }
1689               break;
1690 
1691             default:
1692               UPDATE_STATE(s_header_value);
1693               h_state = h_general;
1694               break;
1695           }
1696         }
1697         parser->header_state = h_state;
1698 
1699         if (p == data + len)
1700           --p;
1701 
1702         COUNT_HEADER_SIZE(p - start);
1703         break;
1704       }
1705 
1706       case s_header_almost_done:
1707       {
1708         if (UNLIKELY(ch != LF)) {
1709           SET_ERRNO(HPE_LF_EXPECTED);
1710           goto error;
1711         }
1712 
1713         UPDATE_STATE(s_header_value_lws);
1714         break;
1715       }
1716 
1717       case s_header_value_lws:
1718       {
1719         if (ch == ' ' || ch == '\t') {
1720           if (parser->header_state == h_content_length_num) {
1721               /* treat obsolete line folding as space */
1722               parser->header_state = h_content_length_ws;
1723           }
1724           UPDATE_STATE(s_header_value_start);
1725           REEXECUTE();
1726         }
1727 
1728         /* finished the header */
1729         switch (parser->header_state) {
1730           case h_connection_keep_alive:
1731             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1732             break;
1733           case h_connection_close:
1734             parser->flags |= F_CONNECTION_CLOSE;
1735             break;
1736           case h_transfer_encoding_chunked:
1737             parser->flags |= F_CHUNKED;
1738             break;
1739           case h_connection_upgrade:
1740             parser->flags |= F_CONNECTION_UPGRADE;
1741             break;
1742           default:
1743             break;
1744         }
1745 
1746         UPDATE_STATE(s_header_field_start);
1747         REEXECUTE();
1748       }
1749 
1750       case s_header_value_discard_ws_almost_done:
1751       {
1752         STRICT_CHECK(ch != LF);
1753         UPDATE_STATE(s_header_value_discard_lws);
1754         break;
1755       }
1756 
1757       case s_header_value_discard_lws:
1758       {
1759         if (ch == ' ' || ch == '\t') {
1760           UPDATE_STATE(s_header_value_discard_ws);
1761           break;
1762         } else {
1763           switch (parser->header_state) {
1764             case h_connection_keep_alive:
1765               parser->flags |= F_CONNECTION_KEEP_ALIVE;
1766               break;
1767             case h_connection_close:
1768               parser->flags |= F_CONNECTION_CLOSE;
1769               break;
1770             case h_connection_upgrade:
1771               parser->flags |= F_CONNECTION_UPGRADE;
1772               break;
1773             case h_transfer_encoding_chunked:
1774               parser->flags |= F_CHUNKED;
1775               break;
1776             case h_content_length:
1777               /* do not allow empty content length */
1778               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1779               goto error;
1780               break;
1781             default:
1782               break;
1783           }
1784 
1785           /* header value was empty */
1786           MARK(header_value);
1787           UPDATE_STATE(s_header_field_start);
1788           CALLBACK_DATA_NOADVANCE(header_value);
1789           REEXECUTE();
1790         }
1791       }
1792 
1793       case s_headers_almost_done:
1794       {
1795         STRICT_CHECK(ch != LF);
1796 
1797         if (parser->flags & F_TRAILING) {
1798           /* End of a chunked request */
1799           UPDATE_STATE(s_message_done);
1800           CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1801           REEXECUTE();
1802         }
1803 
1804         /* Cannot us transfer-encoding and a content-length header together
1805            per the HTTP specification. (RFC 7230 Section 3.3.3) */
1806         if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) &&
1807             (parser->flags & F_CONTENTLENGTH)) {
1808           /* Allow it for lenient parsing as long as `Transfer-Encoding` is
1809            * not `chunked`
1810            */
1811           if (!lenient || (parser->flags & F_CHUNKED)) {
1812             SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1813             goto error;
1814           }
1815         }
1816 
1817         UPDATE_STATE(s_headers_done);
1818 
1819         /* Set this here so that on_headers_complete() callbacks can see it */
1820         if ((parser->flags & F_UPGRADE) &&
1821             (parser->flags & F_CONNECTION_UPGRADE)) {
1822           /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1823            * mandatory only when it is a 101 Switching Protocols response,
1824            * otherwise it is purely informational, to announce support.
1825            */
1826           parser->upgrade =
1827               (parser->type == HTTP_REQUEST || parser->status_code == 101);
1828         } else {
1829           parser->upgrade = (parser->method == HTTP_CONNECT);
1830         }
1831 
1832         /* Here we call the headers_complete callback. This is somewhat
1833          * different than other callbacks because if the user returns 1, we
1834          * will interpret that as saying that this message has no body. This
1835          * is needed for the annoying case of recieving a response to a HEAD
1836          * request.
1837          *
1838          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1839          * we have to simulate it by handling a change in errno below.
1840          */
1841         if (settings->on_headers_complete) {
1842           switch (settings->on_headers_complete(parser)) {
1843             case 0:
1844               break;
1845 
1846             case 2:
1847               parser->upgrade = 1;
1848 
1849               /* fall through */
1850             case 1:
1851               parser->flags |= F_SKIPBODY;
1852               break;
1853 
1854             default:
1855               SET_ERRNO(HPE_CB_headers_complete);
1856               RETURN(p - data); /* Error */
1857           }
1858         }
1859 
1860         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1861           RETURN(p - data);
1862         }
1863 
1864         REEXECUTE();
1865       }
1866 
1867       case s_headers_done:
1868       {
1869         int hasBody;
1870         STRICT_CHECK(ch != LF);
1871 
1872         parser->nread = 0;
1873         nread = 0;
1874 
1875         hasBody = parser->flags & F_CHUNKED ||
1876           (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1877         if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1878                                 (parser->flags & F_SKIPBODY) || !hasBody)) {
1879           /* Exit, the rest of the message is in a different protocol. */
1880           UPDATE_STATE(NEW_MESSAGE());
1881           CALLBACK_NOTIFY(message_complete);
1882           RETURN((p - data) + 1);
1883         }
1884 
1885         if (parser->flags & F_SKIPBODY) {
1886           UPDATE_STATE(NEW_MESSAGE());
1887           CALLBACK_NOTIFY(message_complete);
1888         } else if (parser->flags & F_CHUNKED) {
1889           /* chunked encoding - ignore Content-Length header,
1890            * prepare for a chunk */
1891           UPDATE_STATE(s_chunk_size_start);
1892         } else if (parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) {
1893           if (parser->type == HTTP_REQUEST && !lenient) {
1894             /* RFC 7230 3.3.3 */
1895 
1896             /* If a Transfer-Encoding header field
1897              * is present in a request and the chunked transfer coding is not
1898              * the final encoding, the message body length cannot be determined
1899              * reliably; the server MUST respond with the 400 (Bad Request)
1900              * status code and then close the connection.
1901              */
1902             SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING);
1903             RETURN(p - data); /* Error */
1904           } else {
1905             /* RFC 7230 3.3.3 */
1906 
1907             /* If a Transfer-Encoding header field is present in a response and
1908              * the chunked transfer coding is not the final encoding, the
1909              * message body length is determined by reading the connection until
1910              * it is closed by the server.
1911              */
1912             UPDATE_STATE(s_body_identity_eof);
1913           }
1914         } else {
1915           if (parser->content_length == 0) {
1916             /* Content-Length header given but zero: Content-Length: 0\r\n */
1917             UPDATE_STATE(NEW_MESSAGE());
1918             CALLBACK_NOTIFY(message_complete);
1919           } else if (parser->content_length != ULLONG_MAX) {
1920             /* Content-Length header given and non-zero */
1921             UPDATE_STATE(s_body_identity);
1922           } else {
1923             if (!http_message_needs_eof(parser)) {
1924               /* Assume content-length 0 - read the next */
1925               UPDATE_STATE(NEW_MESSAGE());
1926               CALLBACK_NOTIFY(message_complete);
1927             } else {
1928               /* Read body until EOF */
1929               UPDATE_STATE(s_body_identity_eof);
1930             }
1931           }
1932         }
1933 
1934         break;
1935       }
1936 
1937       case s_body_identity:
1938       {
1939         uint64_t to_read = MIN(parser->content_length,
1940                                (uint64_t) ((data + len) - p));
1941 
1942         assert(parser->content_length != 0
1943             && parser->content_length != ULLONG_MAX);
1944 
1945         /* The difference between advancing content_length and p is because
1946          * the latter will automaticaly advance on the next loop iteration.
1947          * Further, if content_length ends up at 0, we want to see the last
1948          * byte again for our message complete callback.
1949          */
1950         MARK(body);
1951         parser->content_length -= to_read;
1952         p += to_read - 1;
1953 
1954         if (parser->content_length == 0) {
1955           UPDATE_STATE(s_message_done);
1956 
1957           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1958            *
1959            * The alternative to doing this is to wait for the next byte to
1960            * trigger the data callback, just as in every other case. The
1961            * problem with this is that this makes it difficult for the test
1962            * harness to distinguish between complete-on-EOF and
1963            * complete-on-length. It's not clear that this distinction is
1964            * important for applications, but let's keep it for now.
1965            */
1966           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1967           REEXECUTE();
1968         }
1969 
1970         break;
1971       }
1972 
1973       /* read until EOF */
1974       case s_body_identity_eof:
1975         MARK(body);
1976         p = data + len - 1;
1977 
1978         break;
1979 
1980       case s_message_done:
1981         UPDATE_STATE(NEW_MESSAGE());
1982         CALLBACK_NOTIFY(message_complete);
1983         if (parser->upgrade) {
1984           /* Exit, the rest of the message is in a different protocol. */
1985           RETURN((p - data) + 1);
1986         }
1987         break;
1988 
1989       case s_chunk_size_start:
1990       {
1991         assert(nread == 1);
1992         assert(parser->flags & F_CHUNKED);
1993 
1994         unhex_val = unhex[(unsigned char)ch];
1995         if (UNLIKELY(unhex_val == -1)) {
1996           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1997           goto error;
1998         }
1999 
2000         parser->content_length = unhex_val;
2001         UPDATE_STATE(s_chunk_size);
2002         break;
2003       }
2004 
2005       case s_chunk_size:
2006       {
2007         uint64_t t;
2008 
2009         assert(parser->flags & F_CHUNKED);
2010 
2011         if (ch == CR) {
2012           UPDATE_STATE(s_chunk_size_almost_done);
2013           break;
2014         }
2015 
2016         unhex_val = unhex[(unsigned char)ch];
2017 
2018         if (unhex_val == -1) {
2019           if (ch == ';' || ch == ' ') {
2020             UPDATE_STATE(s_chunk_parameters);
2021             break;
2022           }
2023 
2024           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
2025           goto error;
2026         }
2027 
2028         t = parser->content_length;
2029         t *= 16;
2030         t += unhex_val;
2031 
2032         /* Overflow? Test against a conservative limit for simplicity. */
2033         if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
2034           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
2035           goto error;
2036         }
2037 
2038         parser->content_length = t;
2039         break;
2040       }
2041 
2042       case s_chunk_parameters:
2043       {
2044         assert(parser->flags & F_CHUNKED);
2045         /* just ignore this shit. TODO check for overflow */
2046         if (ch == CR) {
2047           UPDATE_STATE(s_chunk_size_almost_done);
2048           break;
2049         }
2050         break;
2051       }
2052 
2053       case s_chunk_size_almost_done:
2054       {
2055         assert(parser->flags & F_CHUNKED);
2056         STRICT_CHECK(ch != LF);
2057 
2058         parser->nread = 0;
2059         nread = 0;
2060 
2061         if (parser->content_length == 0) {
2062           parser->flags |= F_TRAILING;
2063           UPDATE_STATE(s_header_field_start);
2064         } else {
2065           UPDATE_STATE(s_chunk_data);
2066         }
2067         CALLBACK_NOTIFY(chunk_header);
2068         break;
2069       }
2070 
2071       case s_chunk_data:
2072       {
2073         uint64_t to_read = MIN(parser->content_length,
2074                                (uint64_t) ((data + len) - p));
2075 
2076         assert(parser->flags & F_CHUNKED);
2077         assert(parser->content_length != 0
2078             && parser->content_length != ULLONG_MAX);
2079 
2080         /* See the explanation in s_body_identity for why the content
2081          * length and data pointers are managed this way.
2082          */
2083         MARK(body);
2084         parser->content_length -= to_read;
2085         p += to_read - 1;
2086 
2087         if (parser->content_length == 0) {
2088           UPDATE_STATE(s_chunk_data_almost_done);
2089         }
2090 
2091         break;
2092       }
2093 
2094       case s_chunk_data_almost_done:
2095         assert(parser->flags & F_CHUNKED);
2096         assert(parser->content_length == 0);
2097         STRICT_CHECK(ch != CR);
2098         UPDATE_STATE(s_chunk_data_done);
2099         CALLBACK_DATA(body);
2100         break;
2101 
2102       case s_chunk_data_done:
2103         assert(parser->flags & F_CHUNKED);
2104         STRICT_CHECK(ch != LF);
2105         parser->nread = 0;
2106         nread = 0;
2107         UPDATE_STATE(s_chunk_size_start);
2108         CALLBACK_NOTIFY(chunk_complete);
2109         break;
2110 
2111       default:
2112         assert(0 && "unhandled state");
2113         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
2114         goto error;
2115     }
2116   }
2117 
2118   /* Run callbacks for any marks that we have leftover after we ran out of
2119    * bytes. There should be at most one of these set, so it's OK to invoke
2120    * them in series (unset marks will not result in callbacks).
2121    *
2122    * We use the NOADVANCE() variety of callbacks here because 'p' has already
2123    * overflowed 'data' and this allows us to correct for the off-by-one that
2124    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2125    * value that's in-bounds).
2126    */
2127 
2128   assert(((header_field_mark ? 1 : 0) +
2129           (header_value_mark ? 1 : 0) +
2130           (url_mark ? 1 : 0)  +
2131           (body_mark ? 1 : 0) +
2132           (status_mark ? 1 : 0)) <= 1);
2133 
2134   CALLBACK_DATA_NOADVANCE(header_field);
2135   CALLBACK_DATA_NOADVANCE(header_value);
2136   CALLBACK_DATA_NOADVANCE(url);
2137   CALLBACK_DATA_NOADVANCE(body);
2138   CALLBACK_DATA_NOADVANCE(status);
2139 
2140   RETURN(len);
2141 
2142 error:
2143   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2144     SET_ERRNO(HPE_UNKNOWN);
2145   }
2146 
2147   RETURN(p - data);
2148 }
2149 
2150 
2151 /* Does the parser need to see an EOF to find the end of the message? */
2152 int
http_message_needs_eof(const http_parser * parser)2153 http_message_needs_eof (const http_parser *parser)
2154 {
2155   if (parser->type == HTTP_REQUEST) {
2156     return 0;
2157   }
2158 
2159   /* See RFC 2616 section 4.4 */
2160   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2161       parser->status_code == 204 ||     /* No Content */
2162       parser->status_code == 304 ||     /* Not Modified */
2163       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
2164     return 0;
2165   }
2166 
2167   /* RFC 7230 3.3.3, see `s_headers_almost_done` */
2168   if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) &&
2169       (parser->flags & F_CHUNKED) == 0) {
2170     return 1;
2171   }
2172 
2173   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2174     return 0;
2175   }
2176 
2177   return 1;
2178 }
2179 
2180 
2181 int
http_should_keep_alive(const http_parser * parser)2182 http_should_keep_alive (const http_parser *parser)
2183 {
2184   if (parser->http_major > 0 && parser->http_minor > 0) {
2185     /* HTTP/1.1 */
2186     if (parser->flags & F_CONNECTION_CLOSE) {
2187       return 0;
2188     }
2189   } else {
2190     /* HTTP/1.0 or earlier */
2191     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
2192       return 0;
2193     }
2194   }
2195 
2196   return !http_message_needs_eof(parser);
2197 }
2198 
2199 
2200 const char *
http_method_str(enum http_method m)2201 http_method_str (enum http_method m)
2202 {
2203   return ELEM_AT(method_strings, m, "<unknown>");
2204 }
2205 
2206 const char *
http_status_str(enum http_status s)2207 http_status_str (enum http_status s)
2208 {
2209   switch (s) {
2210 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
2211     HTTP_STATUS_MAP(XX)
2212 #undef XX
2213     default: return "<unknown>";
2214   }
2215 }
2216 
2217 void
http_parser_init(http_parser * parser,enum http_parser_type t)2218 http_parser_init (http_parser *parser, enum http_parser_type t)
2219 {
2220   void *data = parser->data; /* preserve application data */
2221   memset(parser, 0, sizeof(*parser));
2222   parser->data = data;
2223   parser->type = t;
2224   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
2225   parser->http_errno = HPE_OK;
2226 }
2227 
2228 void
http_parser_settings_init(http_parser_settings * settings)2229 http_parser_settings_init(http_parser_settings *settings)
2230 {
2231   memset(settings, 0, sizeof(*settings));
2232 }
2233 
2234 const char *
http_errno_name(enum http_errno err)2235 http_errno_name(enum http_errno err) {
2236   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2237   return http_strerror_tab[err].name;
2238 }
2239 
2240 const char *
http_errno_description(enum http_errno err)2241 http_errno_description(enum http_errno err) {
2242   assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2243   return http_strerror_tab[err].description;
2244 }
2245 
2246 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)2247 http_parse_host_char(enum http_host_state s, const char ch) {
2248   switch(s) {
2249     case s_http_userinfo:
2250     case s_http_userinfo_start:
2251       if (ch == '@') {
2252         return s_http_host_start;
2253       }
2254 
2255       if (IS_USERINFO_CHAR(ch)) {
2256         return s_http_userinfo;
2257       }
2258       break;
2259 
2260     case s_http_host_start:
2261       if (ch == '[') {
2262         return s_http_host_v6_start;
2263       }
2264 
2265       if (IS_HOST_CHAR(ch)) {
2266         return s_http_host;
2267       }
2268 
2269       break;
2270 
2271     case s_http_host:
2272       if (IS_HOST_CHAR(ch)) {
2273         return s_http_host;
2274       }
2275 
2276     /* fall through */
2277     case s_http_host_v6_end:
2278       if (ch == ':') {
2279         return s_http_host_port_start;
2280       }
2281 
2282       break;
2283 
2284     case s_http_host_v6:
2285       if (ch == ']') {
2286         return s_http_host_v6_end;
2287       }
2288 
2289     /* fall through */
2290     case s_http_host_v6_start:
2291       if (IS_HEX(ch) || ch == ':' || ch == '.') {
2292         return s_http_host_v6;
2293       }
2294 
2295       if (s == s_http_host_v6 && ch == '%') {
2296         return s_http_host_v6_zone_start;
2297       }
2298       break;
2299 
2300     case s_http_host_v6_zone:
2301       if (ch == ']') {
2302         return s_http_host_v6_end;
2303       }
2304 
2305     /* fall through */
2306     case s_http_host_v6_zone_start:
2307       /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2308       if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2309           ch == '~') {
2310         return s_http_host_v6_zone;
2311       }
2312       break;
2313 
2314     case s_http_host_port:
2315     case s_http_host_port_start:
2316       if (IS_NUM(ch)) {
2317         return s_http_host_port;
2318       }
2319 
2320       break;
2321 
2322     default:
2323       break;
2324   }
2325   return s_http_host_dead;
2326 }
2327 
2328 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)2329 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2330   enum http_host_state s;
2331 
2332   const char *p;
2333   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2334 
2335   assert(u->field_set & (1 << UF_HOST));
2336 
2337   u->field_data[UF_HOST].len = 0;
2338 
2339   s = found_at ? s_http_userinfo_start : s_http_host_start;
2340 
2341   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2342     enum http_host_state new_s = http_parse_host_char(s, *p);
2343 
2344     if (new_s == s_http_host_dead) {
2345       return 1;
2346     }
2347 
2348     switch(new_s) {
2349       case s_http_host:
2350         if (s != s_http_host) {
2351           u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2352         }
2353         u->field_data[UF_HOST].len++;
2354         break;
2355 
2356       case s_http_host_v6:
2357         if (s != s_http_host_v6) {
2358           u->field_data[UF_HOST].off = (uint16_t)(p - buf);
2359         }
2360         u->field_data[UF_HOST].len++;
2361         break;
2362 
2363       case s_http_host_v6_zone_start:
2364       case s_http_host_v6_zone:
2365         u->field_data[UF_HOST].len++;
2366         break;
2367 
2368       case s_http_host_port:
2369         if (s != s_http_host_port) {
2370           u->field_data[UF_PORT].off = (uint16_t)(p - buf);
2371           u->field_data[UF_PORT].len = 0;
2372           u->field_set |= (1 << UF_PORT);
2373         }
2374         u->field_data[UF_PORT].len++;
2375         break;
2376 
2377       case s_http_userinfo:
2378         if (s != s_http_userinfo) {
2379           u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
2380           u->field_data[UF_USERINFO].len = 0;
2381           u->field_set |= (1 << UF_USERINFO);
2382         }
2383         u->field_data[UF_USERINFO].len++;
2384         break;
2385 
2386       default:
2387         break;
2388     }
2389     s = new_s;
2390   }
2391 
2392   /* Make sure we don't end somewhere unexpected */
2393   switch (s) {
2394     case s_http_host_start:
2395     case s_http_host_v6_start:
2396     case s_http_host_v6:
2397     case s_http_host_v6_zone_start:
2398     case s_http_host_v6_zone:
2399     case s_http_host_port_start:
2400     case s_http_userinfo:
2401     case s_http_userinfo_start:
2402       return 1;
2403     default:
2404       break;
2405   }
2406 
2407   return 0;
2408 }
2409 
2410 void
http_parser_url_init(struct http_parser_url * u)2411 http_parser_url_init(struct http_parser_url *u) {
2412   memset(u, 0, sizeof(*u));
2413 }
2414 
2415 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2416 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2417                       struct http_parser_url *u)
2418 {
2419   enum state s;
2420   const char *p;
2421   enum http_parser_url_fields uf, old_uf;
2422   int found_at = 0;
2423 
2424   if (buflen == 0) {
2425     return 1;
2426   }
2427 
2428   u->port = u->field_set = 0;
2429   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2430   old_uf = UF_MAX;
2431 
2432   for (p = buf; p < buf + buflen; p++) {
2433     s = parse_url_char(s, *p);
2434 
2435     /* Figure out the next field that we're operating on */
2436     switch (s) {
2437       case s_dead:
2438         return 1;
2439 
2440       /* Skip delimeters */
2441       case s_req_schema_slash:
2442       case s_req_schema_slash_slash:
2443       case s_req_server_start:
2444       case s_req_query_string_start:
2445       case s_req_fragment_start:
2446         continue;
2447 
2448       case s_req_schema:
2449         uf = UF_SCHEMA;
2450         break;
2451 
2452       case s_req_server_with_at:
2453         found_at = 1;
2454 
2455       /* fall through */
2456       case s_req_server:
2457         uf = UF_HOST;
2458         break;
2459 
2460       case s_req_path:
2461         uf = UF_PATH;
2462         break;
2463 
2464       case s_req_query_string:
2465         uf = UF_QUERY;
2466         break;
2467 
2468       case s_req_fragment:
2469         uf = UF_FRAGMENT;
2470         break;
2471 
2472       default:
2473         assert(!"Unexpected state");
2474         return 1;
2475     }
2476 
2477     /* Nothing's changed; soldier on */
2478     if (uf == old_uf) {
2479       u->field_data[uf].len++;
2480       continue;
2481     }
2482 
2483     u->field_data[uf].off = (uint16_t)(p - buf);
2484     u->field_data[uf].len = 1;
2485 
2486     u->field_set |= (1 << uf);
2487     old_uf = uf;
2488   }
2489 
2490   /* host must be present if there is a schema */
2491   /* parsing http:///toto will fail */
2492   if ((u->field_set & (1 << UF_SCHEMA)) &&
2493       (u->field_set & (1 << UF_HOST)) == 0) {
2494     return 1;
2495   }
2496 
2497   if (u->field_set & (1 << UF_HOST)) {
2498     if (http_parse_host(buf, u, found_at) != 0) {
2499       return 1;
2500     }
2501   }
2502 
2503   /* CONNECT requests can only contain "hostname:port" */
2504   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2505     return 1;
2506   }
2507 
2508   if (u->field_set & (1 << UF_PORT)) {
2509     uint16_t off;
2510     uint16_t len;
2511     const char* p;
2512     const char* end;
2513     unsigned long v;
2514 
2515     off = u->field_data[UF_PORT].off;
2516     len = u->field_data[UF_PORT].len;
2517     end = buf + off + len;
2518 
2519     /* NOTE: The characters are already validated and are in the [0-9] range */
2520     assert(off + len <= buflen && "Port number overflow");
2521     v = 0;
2522     for (p = buf + off; p < end; p++) {
2523       v *= 10;
2524       v += *p - '0';
2525 
2526       /* Ports have a max value of 2^16 */
2527       if (v > 0xffff) {
2528         return 1;
2529       }
2530     }
2531 
2532     u->port = (uint16_t) v;
2533   }
2534 
2535   return 0;
2536 }
2537 
2538 void
http_parser_pause(http_parser * parser,int paused)2539 http_parser_pause(http_parser *parser, int paused) {
2540   /* Users should only be pausing/unpausing a parser that is not in an error
2541    * state. In non-debug builds, there's not much that we can do about this
2542    * other than ignore it.
2543    */
2544   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2545       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2546     uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
2547     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2548   } else {
2549     assert(0 && "Attempting to pause parser in error state");
2550   }
2551 }
2552 
2553 int
http_body_is_final(const struct http_parser * parser)2554 http_body_is_final(const struct http_parser *parser) {
2555     return parser->state == s_message_done;
2556 }
2557 
2558 unsigned long
http_parser_version(void)2559 http_parser_version(void) {
2560   return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2561          HTTP_PARSER_VERSION_MINOR * 0x00100 |
2562          HTTP_PARSER_VERSION_PATCH * 0x00001;
2563 }
2564 
2565 void
http_parser_set_max_header_size(uint32_t size)2566 http_parser_set_max_header_size(uint32_t size) {
2567   max_header_size = size;
2568 }
2569