1 /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2  *
3  * Additional changes are licensed under the same terms as NGINX and
4  * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 #include "http_parser.h"
25 #include <assert.h>
26 #include <stddef.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <limits.h>
31 
32 #ifndef ULLONG_MAX
33 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34 #endif
35 
36 #ifndef MIN
37 # define MIN(a,b) ((a) < (b) ? (a) : (b))
38 #endif
39 
40 #ifndef ARRAY_SIZE
41 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42 #endif
43 
44 #ifndef BIT_AT
45 # define BIT_AT(a, i)                                                \
46   (!!((unsigned int) (a)[(unsigned int) (i) >> 3] &                  \
47    (1 << ((unsigned int) (i) & 7))))
48 #endif
49 
50 #ifndef ELEM_AT
51 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52 #endif
53 
54 #define SET_ERRNO(e)                                                 \
55 do {                                                                 \
56   parser->http_errno = (e);                                          \
57 } while(0)
58 
59 
60 /* Run the notify callback FOR, returning ER if it fails */
61 #define CALLBACK_NOTIFY_(FOR, ER)                                    \
62 do {                                                                 \
63   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
64                                                                      \
65   if (settings->on_##FOR) {                                          \
66     if (0 != settings->on_##FOR(parser)) {                           \
67       SET_ERRNO(HPE_CB_##FOR);                                       \
68     }                                                                \
69                                                                      \
70     /* We either errored above or got paused; get out */             \
71     if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                       \
72       return (ER);                                                   \
73     }                                                                \
74   }                                                                  \
75 } while (0)
76 
77 /* Run the notify callback FOR and consume the current byte */
78 #define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
79 
80 /* Run the notify callback FOR and don't consume the current byte */
81 #define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
82 
83 /* Run data callback FOR with LEN bytes, returning ER if it fails */
84 #define CALLBACK_DATA_(FOR, LEN, ER)                                 \
85 do {                                                                 \
86   assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
87                                                                      \
88   if (FOR##_mark) {                                                  \
89     if (settings->on_##FOR) {                                        \
90       if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) {      \
91         SET_ERRNO(HPE_CB_##FOR);                                     \
92       }                                                              \
93                                                                      \
94       /* We either errored above or got paused; get out */           \
95       if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                     \
96         return (ER);                                                 \
97       }                                                              \
98     }                                                                \
99     FOR##_mark = NULL;                                               \
100   }                                                                  \
101 } while (0)
102 
103 /* Run the data callback FOR and consume the current byte */
104 #define CALLBACK_DATA(FOR)                                           \
105     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106 
107 /* Run the data callback FOR and don't consume the current byte */
108 #define CALLBACK_DATA_NOADVANCE(FOR)                                 \
109     CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110 
111 /* Set the mark FOR; non-destructive if mark is already set */
112 #define MARK(FOR)                                                    \
113 do {                                                                 \
114   if (!FOR##_mark) {                                                 \
115     FOR##_mark = p;                                                  \
116   }                                                                  \
117 } while (0)
118 
119 
120 #define PROXY_CONNECTION "proxy-connection"
121 #define CONNECTION "connection"
122 #define CONTENT_LENGTH "content-length"
123 #define TRANSFER_ENCODING "transfer-encoding"
124 #define UPGRADE "upgrade"
125 #define CHUNKED "chunked"
126 #define KEEP_ALIVE "keep-alive"
127 #define CLOSE "close"
128 
129 
130 static const char *method_strings[] =
131   {
132 #define XX(num, name, string) #string,
133   HTTP_METHOD_MAP(XX)
134 #undef XX
135   };
136 
137 
138 /* Tokens as defined by rfc 2616. Also lowercases them.
139  *        token       = 1*<any CHAR except CTLs or separators>
140  *     separators     = "(" | ")" | "<" | ">" | "@"
141  *                    | "," | ";" | ":" | "\" | <">
142  *                    | "/" | "[" | "]" | "?" | "="
143  *                    | "{" | "}" | SP | HT
144  */
145 static const char tokens[256] = {
146 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
147         0,       0,       0,       0,       0,       0,       0,       0,
148 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
149         0,       0,       0,       0,       0,       0,       0,       0,
150 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
151         0,       0,       0,       0,       0,       0,       0,       0,
152 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
153         0,       0,       0,       0,       0,       0,       0,       0,
154 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
155         0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
156 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
157         0,       0,      '*',     '+',      0,      '-',     '.',      0,
158 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
159        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
160 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
161        '8',     '9',      0,       0,       0,       0,       0,       0,
162 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
163         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
164 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
165        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
166 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
167        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
168 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
169        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
170 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
171        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
172 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
173        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
174 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
175        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
176 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
177        'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
178 
179 
180 static const int8_t unhex[256] =
181   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189   };
190 
191 
192 #if HTTP_PARSER_STRICT
193 # define T(v) 0
194 #else
195 # define T(v) v
196 #endif
197 
198 
199 static const uint8_t normal_url_char[32] = {
200 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
201         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
202 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
203         0    | T(2)   |   0    |   0    | T(16)  |   0    |   0    |   0,
204 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
205         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
206 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
207         0    |   0    |   0    |   0    |   0    |   0    |   0    |   0,
208 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
209         0    |   2    |   4    |   0    |   16   |   32   |   64   |  128,
210 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
211         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
212 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
213         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
214 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
215         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0,
216 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
217         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
218 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
219         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
220 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
221         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
222 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
223         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
224 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
225         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
226 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
227         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
228 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
229         1    |   2    |   4    |   8    |   16   |   32   |   64   |  128,
230 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
231         1    |   2    |   4    |   8    |   16   |   32   |   64   |   0, };
232 
233 #undef T
234 
235 enum state
236   { s_dead = 1 /* important that this is > 0 */
237 
238   , s_start_req_or_res
239   , s_res_or_resp_H
240   , s_start_res
241   , s_res_H
242   , s_res_HT
243   , s_res_HTT
244   , s_res_HTTP
245   , s_res_first_http_major
246   , s_res_http_major
247   , s_res_first_http_minor
248   , s_res_http_minor
249   , s_res_first_status_code
250   , s_res_status_code
251   , s_res_status
252   , s_res_line_almost_done
253 
254   , s_start_req
255 
256   , s_req_method
257   , s_req_spaces_before_url
258   , s_req_schema
259   , s_req_schema_slash
260   , s_req_schema_slash_slash
261   , s_req_server_start
262   , s_req_server
263   , s_req_server_with_at
264   , s_req_path
265   , s_req_query_string_start
266   , s_req_query_string
267   , s_req_fragment_start
268   , s_req_fragment
269   , s_req_http_start
270   , s_req_http_H
271   , s_req_http_HT
272   , s_req_http_HTT
273   , s_req_http_HTTP
274   , s_req_first_http_major
275   , s_req_http_major
276   , s_req_first_http_minor
277   , s_req_http_minor
278   , s_req_line_almost_done
279 
280   , s_header_field_start
281   , s_header_field
282   , s_header_value_start
283   , s_header_value
284   , s_header_value_lws
285 
286   , s_header_almost_done
287 
288   , s_chunk_size_start
289   , s_chunk_size
290   , s_chunk_parameters
291   , s_chunk_size_almost_done
292 
293   , s_headers_almost_done
294   , s_headers_done
295 
296   /* Important: 's_headers_done' must be the last 'header' state. All
297    * states beyond this must be 'body' states. It is used for overflow
298    * checking. See the PARSING_HEADER() macro.
299    */
300 
301   , s_chunk_data
302   , s_chunk_data_almost_done
303   , s_chunk_data_done
304 
305   , s_body_identity
306   , s_body_identity_eof
307 
308   , s_message_done
309   };
310 
311 
312 #define PARSING_HEADER(state) (state <= s_headers_done)
313 
314 
315 enum header_states
316   { h_general = 0
317   , h_C
318   , h_CO
319   , h_CON
320 
321   , h_matching_connection
322   , h_matching_proxy_connection
323   , h_matching_content_length
324   , h_matching_transfer_encoding
325   , h_matching_upgrade
326 
327   , h_connection
328   , h_content_length
329   , h_transfer_encoding
330   , h_upgrade
331 
332   , h_matching_transfer_encoding_chunked
333   , h_matching_connection_keep_alive
334   , h_matching_connection_close
335 
336   , h_transfer_encoding_chunked
337   , h_connection_keep_alive
338   , h_connection_close
339   };
340 
341 enum http_host_state
342   {
343     s_http_host_dead = 1
344   , s_http_userinfo_start
345   , s_http_userinfo
346   , s_http_host_start
347   , s_http_host_v6_start
348   , s_http_host
349   , s_http_host_v6
350   , s_http_host_v6_end
351   , s_http_host_port_start
352   , s_http_host_port
353 };
354 
355 /* Macros for character classes; depends on strict-mode  */
356 #define CR                  '\r'
357 #define LF                  '\n'
358 #define LOWER(c)            (unsigned char)(c | 0x20)
359 #define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
360 #define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
361 #define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
362 #define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363 #define IS_MARK(c)          ((c) == '-' || (c) == '_' || (c) == '.' || \
364   (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365   (c) == ')')
366 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367   (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368   (c) == '$' || (c) == ',')
369 
370 #if HTTP_PARSER_STRICT
371 #define TOKEN(c)            (tokens[(unsigned char)c])
372 #define IS_URL_CHAR(c)      (BIT_AT(normal_url_char, (unsigned char)c))
373 #define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374 #else
375 #define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
376 #define IS_URL_CHAR(c)                                                         \
377   (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378 #define IS_HOST_CHAR(c)                                                        \
379   (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380 #endif
381 
382 
383 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
384 
385 
386 #if HTTP_PARSER_STRICT
387 # define STRICT_CHECK(cond)                                          \
388 do {                                                                 \
389   if (cond) {                                                        \
390     SET_ERRNO(HPE_STRICT);                                           \
391     goto error;                                                      \
392   }                                                                  \
393 } while (0)
394 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
395 #else
396 # define STRICT_CHECK(cond)
397 # define NEW_MESSAGE() start_state
398 #endif
399 
400 
401 /* Map errno values to strings for human-readable output */
402 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
403 static struct {
404   const char *name;
405   const char *description;
406 } http_strerror_tab[] = {
407   HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
408 };
409 #undef HTTP_STRERROR_GEN
410 
411 int http_message_needs_eof(const http_parser *parser);
412 
413 /* Our URL parser.
414  *
415  * This is designed to be shared by http_parser_execute() for URL validation,
416  * hence it has a state transition + byte-for-byte interface. In addition, it
417  * is meant to be embedded in http_parser_parse_url(), which does the dirty
418  * work of turning state transitions URL components for its API.
419  *
420  * This function should only be invoked with non-space characters. It is
421  * assumed that the caller cares about (and can detect) the transition between
422  * URL and non-URL states by looking for these.
423  */
424 static enum state
parse_url_char(enum state s,const char ch)425 parse_url_char(enum state s, const char ch)
426 {
427   if (ch == ' ' || ch == '\r' || ch == '\n') {
428     return s_dead;
429   }
430 
431 #if HTTP_PARSER_STRICT
432   if (ch == '\t' || ch == '\f') {
433     return s_dead;
434   }
435 #endif
436 
437   switch (s) {
438     case s_req_spaces_before_url:
439       /* Proxied requests are followed by scheme of an absolute URI (alpha).
440        * All methods except CONNECT are followed by '/' or '*'.
441        */
442 
443       if (ch == '/' || ch == '*') {
444         return s_req_path;
445       }
446 
447       /* The schema must start with an alpha character. After that, it may
448        * consist of digits, '+', '-' or '.', followed by a ':'.
449        */
450       if (IS_ALPHA(ch)) {
451         return s_req_schema;
452       }
453 
454       break;
455 
456     case s_req_schema:
457       if (IS_ALPHANUM(ch) || ch == '+' || ch == '-' || ch == '.') {
458         return s;
459       }
460 
461       if (ch == ':') {
462         return s_req_schema_slash;
463       }
464 
465       break;
466 
467     case s_req_schema_slash:
468       if (ch == '/') {
469         return s_req_schema_slash_slash;
470       }
471 
472       break;
473 
474     case s_req_schema_slash_slash:
475       if (ch == '/') {
476         return s_req_server_start;
477       }
478 
479       break;
480 
481     case s_req_server_with_at:
482       if (ch == '@') {
483         return s_dead;
484       }
485 
486     /* FALLTHROUGH */
487     case s_req_server_start:
488     case s_req_server:
489       if (ch == '/') {
490         return s_req_path;
491       }
492 
493       if (ch == '?') {
494         return s_req_query_string_start;
495       }
496 
497       if (ch == '@') {
498         return s_req_server_with_at;
499       }
500 
501       if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
502         return s_req_server;
503       }
504 
505       break;
506 
507     case s_req_path:
508       if (IS_URL_CHAR(ch)) {
509         return s;
510       }
511 
512       switch (ch) {
513         case '?':
514           return s_req_query_string_start;
515 
516         case '#':
517           return s_req_fragment_start;
518       }
519 
520       break;
521 
522     case s_req_query_string_start:
523     case s_req_query_string:
524       if (IS_URL_CHAR(ch)) {
525         return s_req_query_string;
526       }
527 
528       switch (ch) {
529         case '?':
530           /* allow extra '?' in query string */
531           return s_req_query_string;
532 
533         case '#':
534           return s_req_fragment_start;
535       }
536 
537       break;
538 
539     case s_req_fragment_start:
540       if (IS_URL_CHAR(ch)) {
541         return s_req_fragment;
542       }
543 
544       switch (ch) {
545         case '?':
546           return s_req_fragment;
547 
548         case '#':
549           return s;
550       }
551 
552       break;
553 
554     case s_req_fragment:
555       if (IS_URL_CHAR(ch)) {
556         return s;
557       }
558 
559       switch (ch) {
560         case '?':
561         case '#':
562           return s;
563       }
564 
565       break;
566 
567     default:
568       break;
569   }
570 
571   /* We should never fall out of the switch above unless there's an error */
572   return s_dead;
573 }
574 
http_parser_execute(http_parser * parser,const http_parser_settings * settings,const char * data,size_t len)575 size_t http_parser_execute (http_parser *parser,
576                             const http_parser_settings *settings,
577                             const char *data,
578                             size_t len)
579 {
580   char c, ch;
581   int8_t unhex_val;
582   const char *p = data;
583   const char *header_field_mark = 0;
584   const char *header_value_mark = 0;
585   const char *url_mark = 0;
586   const char *body_mark = 0;
587 
588   /* We're in an error state. Don't bother doing anything. */
589   if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
590     return 0;
591   }
592 
593   if (len == 0) {
594     switch (parser->state) {
595       case s_body_identity_eof:
596         /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
597          * we got paused.
598          */
599         CALLBACK_NOTIFY_NOADVANCE(message_complete);
600         return 0;
601 
602       case s_dead:
603       case s_start_req_or_res:
604       case s_start_res:
605       case s_start_req:
606         return 0;
607 
608       default:
609         SET_ERRNO(HPE_INVALID_EOF_STATE);
610         return 1;
611     }
612   }
613 
614 
615   if (parser->state == s_header_field)
616     header_field_mark = data;
617   if (parser->state == s_header_value)
618     header_value_mark = data;
619   switch (parser->state) {
620   case s_req_path:
621   case s_req_schema:
622   case s_req_schema_slash:
623   case s_req_schema_slash_slash:
624   case s_req_server_start:
625   case s_req_server:
626   case s_req_server_with_at:
627   case s_req_query_string_start:
628   case s_req_query_string:
629   case s_req_fragment_start:
630   case s_req_fragment:
631     url_mark = data;
632     break;
633   }
634 
635   for (p=data; p != data + len; p++) {
636     ch = *p;
637 
638     if (PARSING_HEADER(parser->state)) {
639       ++parser->nread;
640       /* Buffer overflow attack */
641       if (parser->nread > HTTP_MAX_HEADER_SIZE) {
642         SET_ERRNO(HPE_HEADER_OVERFLOW);
643         goto error;
644       }
645     }
646 
647     reexecute_byte:
648     switch (parser->state) {
649 
650       case s_dead:
651         /* this state is used after a 'Connection: close' message
652          * the parser will error out if it reads another message
653          */
654         if (ch == CR || ch == LF)
655           break;
656 
657         SET_ERRNO(HPE_CLOSED_CONNECTION);
658         goto error;
659 
660       case s_start_req_or_res:
661       {
662         if (ch == CR || ch == LF)
663           break;
664         parser->flags = 0;
665         parser->content_length = ULLONG_MAX;
666 
667         if (ch == 'H') {
668           parser->state = s_res_or_resp_H;
669 
670           CALLBACK_NOTIFY(message_begin);
671         } else {
672           parser->type = HTTP_REQUEST;
673           parser->state = s_start_req;
674           goto reexecute_byte;
675         }
676 
677         break;
678       }
679 
680       case s_res_or_resp_H:
681         if (ch == 'T') {
682           parser->type = HTTP_RESPONSE;
683           parser->state = s_res_HT;
684         } else {
685           if (ch != 'E') {
686             SET_ERRNO(HPE_INVALID_CONSTANT);
687             goto error;
688           }
689 
690           parser->type = HTTP_REQUEST;
691           parser->method = HTTP_HEAD;
692           parser->index = 2;
693           parser->state = s_req_method;
694         }
695         break;
696 
697       case s_start_res:
698       {
699         parser->flags = 0;
700         parser->content_length = ULLONG_MAX;
701 
702         switch (ch) {
703           case 'H':
704             parser->state = s_res_H;
705             break;
706 
707           case CR:
708           case LF:
709             break;
710 
711           default:
712             SET_ERRNO(HPE_INVALID_CONSTANT);
713             goto error;
714         }
715 
716         CALLBACK_NOTIFY(message_begin);
717         break;
718       }
719 
720       case s_res_H:
721         STRICT_CHECK(ch != 'T');
722         parser->state = s_res_HT;
723         break;
724 
725       case s_res_HT:
726         STRICT_CHECK(ch != 'T');
727         parser->state = s_res_HTT;
728         break;
729 
730       case s_res_HTT:
731         STRICT_CHECK(ch != 'P');
732         parser->state = s_res_HTTP;
733         break;
734 
735       case s_res_HTTP:
736         STRICT_CHECK(ch != '/');
737         parser->state = s_res_first_http_major;
738         break;
739 
740       case s_res_first_http_major:
741         if (ch < '0' || ch > '9') {
742           SET_ERRNO(HPE_INVALID_VERSION);
743           goto error;
744         }
745 
746         parser->http_major = ch - '0';
747         parser->state = s_res_http_major;
748         break;
749 
750       /* major HTTP version or dot */
751       case s_res_http_major:
752       {
753         if (ch == '.') {
754           parser->state = s_res_first_http_minor;
755           break;
756         }
757 
758         if (!IS_NUM(ch)) {
759           SET_ERRNO(HPE_INVALID_VERSION);
760           goto error;
761         }
762 
763         parser->http_major *= 10;
764         parser->http_major += ch - '0';
765 
766         if (parser->http_major > 999) {
767           SET_ERRNO(HPE_INVALID_VERSION);
768           goto error;
769         }
770 
771         break;
772       }
773 
774       /* first digit of minor HTTP version */
775       case s_res_first_http_minor:
776         if (!IS_NUM(ch)) {
777           SET_ERRNO(HPE_INVALID_VERSION);
778           goto error;
779         }
780 
781         parser->http_minor = ch - '0';
782         parser->state = s_res_http_minor;
783         break;
784 
785       /* minor HTTP version or end of request line */
786       case s_res_http_minor:
787       {
788         if (ch == ' ') {
789           parser->state = s_res_first_status_code;
790           break;
791         }
792 
793         if (!IS_NUM(ch)) {
794           SET_ERRNO(HPE_INVALID_VERSION);
795           goto error;
796         }
797 
798         parser->http_minor *= 10;
799         parser->http_minor += ch - '0';
800 
801         if (parser->http_minor > 999) {
802           SET_ERRNO(HPE_INVALID_VERSION);
803           goto error;
804         }
805 
806         break;
807       }
808 
809       case s_res_first_status_code:
810       {
811         if (!IS_NUM(ch)) {
812           if (ch == ' ') {
813             break;
814           }
815 
816           SET_ERRNO(HPE_INVALID_STATUS);
817           goto error;
818         }
819         parser->status_code = ch - '0';
820         parser->state = s_res_status_code;
821         break;
822       }
823 
824       case s_res_status_code:
825       {
826         if (!IS_NUM(ch)) {
827           switch (ch) {
828             case ' ':
829               parser->state = s_res_status;
830               break;
831             case CR:
832               parser->state = s_res_line_almost_done;
833               break;
834             case LF:
835               parser->state = s_header_field_start;
836               break;
837             default:
838               SET_ERRNO(HPE_INVALID_STATUS);
839               goto error;
840           }
841           break;
842         }
843 
844         parser->status_code *= 10;
845         parser->status_code += ch - '0';
846 
847         if (parser->status_code > 999) {
848           SET_ERRNO(HPE_INVALID_STATUS);
849           goto error;
850         }
851 
852         break;
853       }
854 
855       case s_res_status:
856         /* the human readable status. e.g. "NOT FOUND"
857          * we are not humans so just ignore this */
858         if (ch == CR) {
859           parser->state = s_res_line_almost_done;
860           break;
861         }
862 
863         if (ch == LF) {
864           parser->state = s_header_field_start;
865           break;
866         }
867         break;
868 
869       case s_res_line_almost_done:
870         STRICT_CHECK(ch != LF);
871         parser->state = s_header_field_start;
872         break;
873 
874       case s_start_req:
875       {
876         if (ch == CR || ch == LF)
877           break;
878         parser->flags = 0;
879         parser->content_length = ULLONG_MAX;
880 
881         if (!IS_ALPHA(ch)) {
882           SET_ERRNO(HPE_INVALID_METHOD);
883           goto error;
884         }
885 
886         parser->method = (enum http_method) 0;
887         parser->index = 1;
888         switch (ch) {
889           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
890           case 'D': parser->method = HTTP_DELETE; break;
891           case 'G': parser->method = HTTP_GET; break;
892           case 'H': parser->method = HTTP_HEAD; break;
893           case 'L': parser->method = HTTP_LOCK; break;
894           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
895           case 'N': parser->method = HTTP_NOTIFY; break;
896           case 'O': parser->method = HTTP_OPTIONS; break;
897           case 'P': parser->method = HTTP_POST;
898             /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
899             break;
900           case 'R': parser->method = HTTP_REPORT; break;
901           case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
902           case 'T': parser->method = HTTP_TRACE; break;
903           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
904           default:
905             SET_ERRNO(HPE_INVALID_METHOD);
906             goto error;
907         }
908         parser->state = s_req_method;
909 
910         CALLBACK_NOTIFY(message_begin);
911 
912         break;
913       }
914 
915       case s_req_method:
916       {
917         const char *matcher;
918         if (ch == '\0') {
919           SET_ERRNO(HPE_INVALID_METHOD);
920           goto error;
921         }
922 
923         matcher = method_strings[parser->method];
924         if (ch == ' ' && matcher[parser->index] == '\0') {
925           parser->state = s_req_spaces_before_url;
926         } else if (ch == matcher[parser->index]) {
927           ; /* nada */
928         } else if (parser->method == HTTP_CONNECT) {
929           if (parser->index == 1 && ch == 'H') {
930             parser->method = HTTP_CHECKOUT;
931           } else if (parser->index == 2  && ch == 'P') {
932             parser->method = HTTP_COPY;
933           } else {
934             goto error;
935           }
936         } else if (parser->method == HTTP_MKCOL) {
937           if (parser->index == 1 && ch == 'O') {
938             parser->method = HTTP_MOVE;
939           } else if (parser->index == 1 && ch == 'E') {
940             parser->method = HTTP_MERGE;
941           } else if (parser->index == 1 && ch == '-') {
942             parser->method = HTTP_MSEARCH;
943           } else if (parser->index == 2 && ch == 'A') {
944             parser->method = HTTP_MKACTIVITY;
945           } else {
946             goto error;
947           }
948         } else if (parser->method == HTTP_SUBSCRIBE) {
949           if (parser->index == 1 && ch == 'E') {
950             parser->method = HTTP_SEARCH;
951           } else {
952             goto error;
953           }
954         } else if (parser->index == 1 && parser->method == HTTP_POST) {
955           if (ch == 'R') {
956             parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
957           } else if (ch == 'U') {
958             parser->method = HTTP_PUT; /* or HTTP_PURGE */
959           } else if (ch == 'A') {
960             parser->method = HTTP_PATCH;
961           } else {
962             goto error;
963           }
964         } else if (parser->index == 2) {
965           if (parser->method == HTTP_PUT) {
966             if (ch == 'R') parser->method = HTTP_PURGE;
967           } else if (parser->method == HTTP_UNLOCK) {
968             if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
969           }
970         } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
971           parser->method = HTTP_PROPPATCH;
972         } else {
973           SET_ERRNO(HPE_INVALID_METHOD);
974           goto error;
975         }
976 
977         ++parser->index;
978         break;
979       }
980 
981       case s_req_spaces_before_url:
982       {
983         if (ch == ' ') break;
984 
985         MARK(url);
986         if (parser->method == HTTP_CONNECT) {
987           parser->state = s_req_server_start;
988         }
989 
990         parser->state = parse_url_char((enum state)parser->state, ch);
991         if (parser->state == s_dead) {
992           SET_ERRNO(HPE_INVALID_URL);
993           goto error;
994         }
995 
996         break;
997       }
998 
999       case s_req_schema:
1000       case s_req_schema_slash:
1001       case s_req_schema_slash_slash:
1002       case s_req_server_start:
1003       {
1004         switch (ch) {
1005           /* No whitespace allowed here */
1006           case ' ':
1007           case CR:
1008           case LF:
1009             SET_ERRNO(HPE_INVALID_URL);
1010             goto error;
1011           default:
1012             parser->state = parse_url_char((enum state)parser->state, ch);
1013             if (parser->state == s_dead) {
1014               SET_ERRNO(HPE_INVALID_URL);
1015               goto error;
1016             }
1017         }
1018 
1019         break;
1020       }
1021 
1022       case s_req_server:
1023       case s_req_server_with_at:
1024       case s_req_path:
1025       case s_req_query_string_start:
1026       case s_req_query_string:
1027       case s_req_fragment_start:
1028       case s_req_fragment:
1029       {
1030         switch (ch) {
1031           case ' ':
1032             parser->state = s_req_http_start;
1033             CALLBACK_DATA(url);
1034             break;
1035           case CR:
1036           case LF:
1037             parser->http_major = 0;
1038             parser->http_minor = 9;
1039             parser->state = (ch == CR) ?
1040               s_req_line_almost_done :
1041               s_header_field_start;
1042             CALLBACK_DATA(url);
1043             break;
1044           default:
1045             parser->state = parse_url_char((enum state)parser->state, ch);
1046             if (parser->state == s_dead) {
1047               SET_ERRNO(HPE_INVALID_URL);
1048               goto error;
1049             }
1050         }
1051         break;
1052       }
1053 
1054       case s_req_http_start:
1055         switch (ch) {
1056           case 'H':
1057             parser->state = s_req_http_H;
1058             break;
1059           case ' ':
1060             break;
1061           default:
1062             SET_ERRNO(HPE_INVALID_CONSTANT);
1063             goto error;
1064         }
1065         break;
1066 
1067       case s_req_http_H:
1068         STRICT_CHECK(ch != 'T');
1069         parser->state = s_req_http_HT;
1070         break;
1071 
1072       case s_req_http_HT:
1073         STRICT_CHECK(ch != 'T');
1074         parser->state = s_req_http_HTT;
1075         break;
1076 
1077       case s_req_http_HTT:
1078         STRICT_CHECK(ch != 'P');
1079         parser->state = s_req_http_HTTP;
1080         break;
1081 
1082       case s_req_http_HTTP:
1083         STRICT_CHECK(ch != '/');
1084         parser->state = s_req_first_http_major;
1085         break;
1086 
1087       /* first digit of major HTTP version */
1088       case s_req_first_http_major:
1089         if (ch < '1' || ch > '9') {
1090           SET_ERRNO(HPE_INVALID_VERSION);
1091           goto error;
1092         }
1093 
1094         parser->http_major = ch - '0';
1095         parser->state = s_req_http_major;
1096         break;
1097 
1098       /* major HTTP version or dot */
1099       case s_req_http_major:
1100       {
1101         if (ch == '.') {
1102           parser->state = s_req_first_http_minor;
1103           break;
1104         }
1105 
1106         if (!IS_NUM(ch)) {
1107           SET_ERRNO(HPE_INVALID_VERSION);
1108           goto error;
1109         }
1110 
1111         parser->http_major *= 10;
1112         parser->http_major += ch - '0';
1113 
1114         if (parser->http_major > 999) {
1115           SET_ERRNO(HPE_INVALID_VERSION);
1116           goto error;
1117         }
1118 
1119         break;
1120       }
1121 
1122       /* first digit of minor HTTP version */
1123       case s_req_first_http_minor:
1124         if (!IS_NUM(ch)) {
1125           SET_ERRNO(HPE_INVALID_VERSION);
1126           goto error;
1127         }
1128 
1129         parser->http_minor = ch - '0';
1130         parser->state = s_req_http_minor;
1131         break;
1132 
1133       /* minor HTTP version or end of request line */
1134       case s_req_http_minor:
1135       {
1136         if (ch == CR) {
1137           parser->state = s_req_line_almost_done;
1138           break;
1139         }
1140 
1141         if (ch == LF) {
1142           parser->state = s_header_field_start;
1143           break;
1144         }
1145 
1146         /* XXX allow spaces after digit? */
1147 
1148         if (!IS_NUM(ch)) {
1149           SET_ERRNO(HPE_INVALID_VERSION);
1150           goto error;
1151         }
1152 
1153         parser->http_minor *= 10;
1154         parser->http_minor += ch - '0';
1155 
1156         if (parser->http_minor > 999) {
1157           SET_ERRNO(HPE_INVALID_VERSION);
1158           goto error;
1159         }
1160 
1161         break;
1162       }
1163 
1164       /* end of request line */
1165       case s_req_line_almost_done:
1166       {
1167         if (ch != LF) {
1168           SET_ERRNO(HPE_LF_EXPECTED);
1169           goto error;
1170         }
1171 
1172         parser->state = s_header_field_start;
1173         break;
1174       }
1175 
1176       case s_header_field_start:
1177       {
1178         if (ch == CR) {
1179           parser->state = s_headers_almost_done;
1180           break;
1181         }
1182 
1183         if (ch == LF) {
1184           /* they might be just sending \n instead of \r\n so this would be
1185            * the second \n to denote the end of headers*/
1186           parser->state = s_headers_almost_done;
1187           goto reexecute_byte;
1188         }
1189 
1190         c = TOKEN(ch);
1191 
1192         if (!c) {
1193           SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1194           goto error;
1195         }
1196 
1197         MARK(header_field);
1198 
1199         parser->index = 0;
1200         parser->state = s_header_field;
1201 
1202         switch (c) {
1203           case 'c':
1204             parser->header_state = h_C;
1205             break;
1206 
1207           case 'p':
1208             parser->header_state = h_matching_proxy_connection;
1209             break;
1210 
1211           case 't':
1212             parser->header_state = h_matching_transfer_encoding;
1213             break;
1214 
1215           case 'u':
1216             parser->header_state = h_matching_upgrade;
1217             break;
1218 
1219           default:
1220             parser->header_state = h_general;
1221             break;
1222         }
1223         break;
1224       }
1225 
1226       case s_header_field:
1227       {
1228         c = TOKEN(ch);
1229 
1230         if (c) {
1231           switch (parser->header_state) {
1232             case h_general:
1233               break;
1234 
1235             case h_C:
1236               parser->index++;
1237               parser->header_state = (c == 'o' ? h_CO : h_general);
1238               break;
1239 
1240             case h_CO:
1241               parser->index++;
1242               parser->header_state = (c == 'n' ? h_CON : h_general);
1243               break;
1244 
1245             case h_CON:
1246               parser->index++;
1247               switch (c) {
1248                 case 'n':
1249                   parser->header_state = h_matching_connection;
1250                   break;
1251                 case 't':
1252                   parser->header_state = h_matching_content_length;
1253                   break;
1254                 default:
1255                   parser->header_state = h_general;
1256                   break;
1257               }
1258               break;
1259 
1260             /* connection */
1261 
1262             case h_matching_connection:
1263               parser->index++;
1264               if (parser->index > sizeof(CONNECTION)-1
1265                   || c != CONNECTION[parser->index]) {
1266                 parser->header_state = h_general;
1267               } else if (parser->index == sizeof(CONNECTION)-2) {
1268                 parser->header_state = h_connection;
1269               }
1270               break;
1271 
1272             /* proxy-connection */
1273 
1274             case h_matching_proxy_connection:
1275               parser->index++;
1276               if (parser->index > sizeof(PROXY_CONNECTION)-1
1277                   || c != PROXY_CONNECTION[parser->index]) {
1278                 parser->header_state = h_general;
1279               } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1280                 parser->header_state = h_connection;
1281               }
1282               break;
1283 
1284             /* content-length */
1285 
1286             case h_matching_content_length:
1287               parser->index++;
1288               if (parser->index > sizeof(CONTENT_LENGTH)-1
1289                   || c != CONTENT_LENGTH[parser->index]) {
1290                 parser->header_state = h_general;
1291               } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1292                 parser->header_state = h_content_length;
1293               }
1294               break;
1295 
1296             /* transfer-encoding */
1297 
1298             case h_matching_transfer_encoding:
1299               parser->index++;
1300               if (parser->index > sizeof(TRANSFER_ENCODING)-1
1301                   || c != TRANSFER_ENCODING[parser->index]) {
1302                 parser->header_state = h_general;
1303               } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1304                 parser->header_state = h_transfer_encoding;
1305               }
1306               break;
1307 
1308             /* upgrade */
1309 
1310             case h_matching_upgrade:
1311               parser->index++;
1312               if (parser->index > sizeof(UPGRADE)-1
1313                   || c != UPGRADE[parser->index]) {
1314                 parser->header_state = h_general;
1315               } else if (parser->index == sizeof(UPGRADE)-2) {
1316                 parser->header_state = h_upgrade;
1317               }
1318               break;
1319 
1320             case h_connection:
1321             case h_content_length:
1322             case h_transfer_encoding:
1323             case h_upgrade:
1324               if (ch != ' ') parser->header_state = h_general;
1325               break;
1326 
1327             default:
1328               assert(0 && "Unknown header_state");
1329               break;
1330           }
1331           break;
1332         }
1333 
1334         if (ch == ':') {
1335           parser->state = s_header_value_start;
1336           CALLBACK_DATA(header_field);
1337           break;
1338         }
1339 
1340         if (ch == CR) {
1341           parser->state = s_header_almost_done;
1342           CALLBACK_DATA(header_field);
1343           break;
1344         }
1345 
1346         if (ch == LF) {
1347           parser->state = s_header_field_start;
1348           CALLBACK_DATA(header_field);
1349           break;
1350         }
1351 
1352         SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1353         goto error;
1354       }
1355 
1356       case s_header_value_start:
1357       {
1358         if (ch == ' ' || ch == '\t') break;
1359 
1360         MARK(header_value);
1361 
1362         parser->state = s_header_value;
1363         parser->index = 0;
1364 
1365         if (ch == CR) {
1366           parser->header_state = h_general;
1367           parser->state = s_header_almost_done;
1368           CALLBACK_DATA(header_value);
1369           break;
1370         }
1371 
1372         if (ch == LF) {
1373           parser->state = s_header_field_start;
1374           CALLBACK_DATA(header_value);
1375           break;
1376         }
1377 
1378         c = LOWER(ch);
1379 
1380         switch (parser->header_state) {
1381           case h_upgrade:
1382             parser->flags |= F_UPGRADE;
1383             parser->header_state = h_general;
1384             break;
1385 
1386           case h_transfer_encoding:
1387             /* looking for 'Transfer-Encoding: chunked' */
1388             if ('c' == c) {
1389               parser->header_state = h_matching_transfer_encoding_chunked;
1390             } else {
1391               parser->header_state = h_general;
1392             }
1393             break;
1394 
1395           case h_content_length:
1396             if (!IS_NUM(ch)) {
1397               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1398               goto error;
1399             }
1400 
1401             parser->content_length = ch - '0';
1402             break;
1403 
1404           case h_connection:
1405             /* looking for 'Connection: keep-alive' */
1406             if (c == 'k') {
1407               parser->header_state = h_matching_connection_keep_alive;
1408             /* looking for 'Connection: close' */
1409             } else if (c == 'c') {
1410               parser->header_state = h_matching_connection_close;
1411             } else {
1412               parser->header_state = h_general;
1413             }
1414             break;
1415 
1416           default:
1417             parser->header_state = h_general;
1418             break;
1419         }
1420         break;
1421       }
1422 
1423       case s_header_value:
1424       {
1425 
1426         if (ch == CR) {
1427           parser->state = s_header_almost_done;
1428           CALLBACK_DATA(header_value);
1429           break;
1430         }
1431 
1432         if (ch == LF) {
1433           parser->state = s_header_almost_done;
1434           CALLBACK_DATA_NOADVANCE(header_value);
1435           goto reexecute_byte;
1436         }
1437 
1438         c = LOWER(ch);
1439 
1440         switch (parser->header_state) {
1441           case h_general:
1442             break;
1443 
1444           case h_connection:
1445           case h_transfer_encoding:
1446             assert(0 && "Shouldn't get here.");
1447             break;
1448 
1449           case h_content_length:
1450           {
1451             uint64_t t;
1452 
1453             if (ch == ' ') break;
1454 
1455             if (!IS_NUM(ch)) {
1456               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1457               goto error;
1458             }
1459 
1460             t = parser->content_length;
1461             t *= 10;
1462             t += ch - '0';
1463 
1464             /* Overflow? */
1465             if (t < parser->content_length || t == ULLONG_MAX) {
1466               SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1467               goto error;
1468             }
1469 
1470             parser->content_length = t;
1471             break;
1472           }
1473 
1474           /* Transfer-Encoding: chunked */
1475           case h_matching_transfer_encoding_chunked:
1476             parser->index++;
1477             if (parser->index > sizeof(CHUNKED)-1
1478                 || c != CHUNKED[parser->index]) {
1479               parser->header_state = h_general;
1480             } else if (parser->index == sizeof(CHUNKED)-2) {
1481               parser->header_state = h_transfer_encoding_chunked;
1482             }
1483             break;
1484 
1485           /* looking for 'Connection: keep-alive' */
1486           case h_matching_connection_keep_alive:
1487             parser->index++;
1488             if (parser->index > sizeof(KEEP_ALIVE)-1
1489                 || c != KEEP_ALIVE[parser->index]) {
1490               parser->header_state = h_general;
1491             } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1492               parser->header_state = h_connection_keep_alive;
1493             }
1494             break;
1495 
1496           /* looking for 'Connection: close' */
1497           case h_matching_connection_close:
1498             parser->index++;
1499             if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1500               parser->header_state = h_general;
1501             } else if (parser->index == sizeof(CLOSE)-2) {
1502               parser->header_state = h_connection_close;
1503             }
1504             break;
1505 
1506           case h_transfer_encoding_chunked:
1507           case h_connection_keep_alive:
1508           case h_connection_close:
1509             if (ch != ' ') parser->header_state = h_general;
1510             break;
1511 
1512           default:
1513             parser->state = s_header_value;
1514             parser->header_state = h_general;
1515             break;
1516         }
1517         break;
1518       }
1519 
1520       case s_header_almost_done:
1521       {
1522         STRICT_CHECK(ch != LF);
1523 
1524         parser->state = s_header_value_lws;
1525 
1526         switch (parser->header_state) {
1527           case h_connection_keep_alive:
1528             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1529             break;
1530           case h_connection_close:
1531             parser->flags |= F_CONNECTION_CLOSE;
1532             break;
1533           case h_transfer_encoding_chunked:
1534             parser->flags |= F_CHUNKED;
1535             break;
1536           default:
1537             break;
1538         }
1539 
1540         break;
1541       }
1542 
1543       case s_header_value_lws:
1544       {
1545         if (ch == ' ' || ch == '\t')
1546           parser->state = s_header_value_start;
1547         else
1548         {
1549           parser->state = s_header_field_start;
1550           goto reexecute_byte;
1551         }
1552         break;
1553       }
1554 
1555       case s_headers_almost_done:
1556       {
1557         STRICT_CHECK(ch != LF);
1558 
1559         if (parser->flags & F_TRAILING) {
1560           /* End of a chunked request */
1561           parser->state = NEW_MESSAGE();
1562           CALLBACK_NOTIFY(message_complete);
1563           break;
1564         }
1565 
1566         parser->state = s_headers_done;
1567 
1568         /* Set this here so that on_headers_complete() callbacks can see it */
1569         parser->upgrade =
1570           (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1571 
1572         /* Here we call the headers_complete callback. This is somewhat
1573          * different than other callbacks because if the user returns 1, we
1574          * will interpret that as saying that this message has no body. This
1575          * is needed for the annoying case of recieving a response to a HEAD
1576          * request.
1577          *
1578          * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1579          * we have to simulate it by handling a change in errno below.
1580          */
1581         if (settings->on_headers_complete) {
1582           switch (settings->on_headers_complete(parser)) {
1583             case 0:
1584               break;
1585 
1586             case 1:
1587               parser->flags |= F_SKIPBODY;
1588               break;
1589 
1590             default:
1591               SET_ERRNO(HPE_CB_headers_complete);
1592               return p - data; /* Error */
1593           }
1594         }
1595 
1596         if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1597           return p - data;
1598         }
1599 
1600         goto reexecute_byte;
1601       }
1602 
1603       case s_headers_done:
1604       {
1605         STRICT_CHECK(ch != LF);
1606 
1607         parser->nread = 0;
1608 
1609         /* Exit, the rest of the connect is in a different protocol. */
1610         if (parser->upgrade) {
1611           parser->state = NEW_MESSAGE();
1612           CALLBACK_NOTIFY(message_complete);
1613           return (p - data) + 1;
1614         }
1615 
1616         if (parser->flags & F_SKIPBODY) {
1617           parser->state = NEW_MESSAGE();
1618           CALLBACK_NOTIFY(message_complete);
1619         } else if (parser->flags & F_CHUNKED) {
1620           /* chunked encoding - ignore Content-Length header */
1621           parser->state = s_chunk_size_start;
1622         } else {
1623           if (parser->content_length == 0) {
1624             /* Content-Length header given but zero: Content-Length: 0\r\n */
1625             parser->state = NEW_MESSAGE();
1626             CALLBACK_NOTIFY(message_complete);
1627           } else if (parser->content_length != ULLONG_MAX) {
1628             /* Content-Length header given and non-zero */
1629             parser->state = s_body_identity;
1630           } else {
1631             if (parser->type == HTTP_REQUEST ||
1632                 !http_message_needs_eof(parser)) {
1633               /* Assume content-length 0 - read the next */
1634               parser->state = NEW_MESSAGE();
1635               CALLBACK_NOTIFY(message_complete);
1636             } else {
1637               /* Read body until EOF */
1638               parser->state = s_body_identity_eof;
1639             }
1640           }
1641         }
1642 
1643         break;
1644       }
1645 
1646       case s_body_identity:
1647       {
1648         uint64_t to_read = MIN(parser->content_length,
1649                                (uint64_t) ((data + len) - p));
1650 
1651         assert(parser->content_length != 0
1652             && parser->content_length != ULLONG_MAX);
1653 
1654         /* The difference between advancing content_length and p is because
1655          * the latter will automaticaly advance on the next loop iteration.
1656          * Further, if content_length ends up at 0, we want to see the last
1657          * byte again for our message complete callback.
1658          */
1659         MARK(body);
1660         parser->content_length -= to_read;
1661         p += to_read - 1;
1662 
1663         if (parser->content_length == 0) {
1664           parser->state = s_message_done;
1665 
1666           /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1667            *
1668            * The alternative to doing this is to wait for the next byte to
1669            * trigger the data callback, just as in every other case. The
1670            * problem with this is that this makes it difficult for the test
1671            * harness to distinguish between complete-on-EOF and
1672            * complete-on-length. It's not clear that this distinction is
1673            * important for applications, but let's keep it for now.
1674            */
1675           CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1676           goto reexecute_byte;
1677         }
1678 
1679         break;
1680       }
1681 
1682       /* read until EOF */
1683       case s_body_identity_eof:
1684         MARK(body);
1685         p = data + len - 1;
1686 
1687         break;
1688 
1689       case s_message_done:
1690         parser->state = NEW_MESSAGE();
1691         CALLBACK_NOTIFY(message_complete);
1692         break;
1693 
1694       case s_chunk_size_start:
1695       {
1696         assert(parser->nread == 1);
1697         assert(parser->flags & F_CHUNKED);
1698 
1699         unhex_val = unhex[(unsigned char)ch];
1700         if (unhex_val == -1) {
1701           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1702           goto error;
1703         }
1704 
1705         parser->content_length = unhex_val;
1706         parser->state = s_chunk_size;
1707         break;
1708       }
1709 
1710       case s_chunk_size:
1711       {
1712         uint64_t t;
1713 
1714         assert(parser->flags & F_CHUNKED);
1715 
1716         if (ch == CR) {
1717           parser->state = s_chunk_size_almost_done;
1718           break;
1719         }
1720 
1721         unhex_val = unhex[(unsigned char)ch];
1722 
1723         if (unhex_val == -1) {
1724           if (ch == ';' || ch == ' ') {
1725             parser->state = s_chunk_parameters;
1726             break;
1727           }
1728 
1729           SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1730           goto error;
1731         }
1732 
1733         t = parser->content_length;
1734         t *= 16;
1735         t += unhex_val;
1736 
1737         /* Overflow? */
1738         if (t < parser->content_length || t == ULLONG_MAX) {
1739           SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1740           goto error;
1741         }
1742 
1743         parser->content_length = t;
1744         break;
1745       }
1746 
1747       case s_chunk_parameters:
1748       {
1749         assert(parser->flags & F_CHUNKED);
1750         /* just ignore this. TODO check for overflow */
1751         if (ch == CR) {
1752           parser->state = s_chunk_size_almost_done;
1753           break;
1754         }
1755         break;
1756       }
1757 
1758       case s_chunk_size_almost_done:
1759       {
1760         assert(parser->flags & F_CHUNKED);
1761         STRICT_CHECK(ch != LF);
1762 
1763         parser->nread = 0;
1764 
1765         if (parser->content_length == 0) {
1766           parser->flags |= F_TRAILING;
1767           parser->state = s_header_field_start;
1768         } else {
1769           parser->state = s_chunk_data;
1770         }
1771         break;
1772       }
1773 
1774       case s_chunk_data:
1775       {
1776         uint64_t to_read = MIN(parser->content_length,
1777                                (uint64_t) ((data + len) - p));
1778 
1779         assert(parser->flags & F_CHUNKED);
1780         assert(parser->content_length != 0
1781             && parser->content_length != ULLONG_MAX);
1782 
1783         /* See the explanation in s_body_identity for why the content
1784          * length and data pointers are managed this way.
1785          */
1786         MARK(body);
1787         parser->content_length -= to_read;
1788         p += to_read - 1;
1789 
1790         if (parser->content_length == 0) {
1791           parser->state = s_chunk_data_almost_done;
1792         }
1793 
1794         break;
1795       }
1796 
1797       case s_chunk_data_almost_done:
1798         assert(parser->flags & F_CHUNKED);
1799         assert(parser->content_length == 0);
1800         STRICT_CHECK(ch != CR);
1801         parser->state = s_chunk_data_done;
1802         CALLBACK_DATA(body);
1803         break;
1804 
1805       case s_chunk_data_done:
1806         assert(parser->flags & F_CHUNKED);
1807         STRICT_CHECK(ch != LF);
1808         parser->nread = 0;
1809         parser->state = s_chunk_size_start;
1810         break;
1811 
1812       default:
1813         assert(0 && "unhandled state");
1814         SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1815         goto error;
1816     }
1817   }
1818 
1819   /* Run callbacks for any marks that we have leftover after we ran our of
1820    * bytes. There should be at most one of these set, so it's OK to invoke
1821    * them in series (unset marks will not result in callbacks).
1822    *
1823    * We use the NOADVANCE() variety of callbacks here because 'p' has already
1824    * overflowed 'data' and this allows us to correct for the off-by-one that
1825    * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1826    * value that's in-bounds).
1827    */
1828 
1829   assert(((header_field_mark ? 1 : 0) +
1830           (header_value_mark ? 1 : 0) +
1831           (url_mark ? 1 : 0)  +
1832           (body_mark ? 1 : 0)) <= 1);
1833 
1834   CALLBACK_DATA_NOADVANCE(header_field);
1835   CALLBACK_DATA_NOADVANCE(header_value);
1836   CALLBACK_DATA_NOADVANCE(url);
1837   CALLBACK_DATA_NOADVANCE(body);
1838 
1839   return len;
1840 
1841 error:
1842   if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1843     SET_ERRNO(HPE_UNKNOWN);
1844   }
1845 
1846   return (p - data);
1847 }
1848 
1849 
1850 /* Does the parser need to see an EOF to find the end of the message? */
1851 int
http_message_needs_eof(const http_parser * parser)1852 http_message_needs_eof (const http_parser *parser)
1853 {
1854   if (parser->type == HTTP_REQUEST) {
1855     return 0;
1856   }
1857 
1858   /* See RFC 2616 section 4.4 */
1859   if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1860       parser->status_code == 204 ||     /* No Content */
1861       parser->status_code == 304 ||     /* Not Modified */
1862       parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
1863     return 0;
1864   }
1865 
1866   if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1867     return 0;
1868   }
1869 
1870   return 1;
1871 }
1872 
1873 
1874 int
http_should_keep_alive(const http_parser * parser)1875 http_should_keep_alive (const http_parser *parser)
1876 {
1877   if (parser->http_major > 0 && parser->http_minor > 0) {
1878     /* HTTP/1.1 */
1879     if (parser->flags & F_CONNECTION_CLOSE) {
1880       return 0;
1881     }
1882   } else {
1883     /* HTTP/1.0 or earlier */
1884     if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1885       return 0;
1886     }
1887   }
1888 
1889   return !http_message_needs_eof(parser);
1890 }
1891 
1892 
1893 const char *
http_method_str(enum http_method m)1894 http_method_str (enum http_method m)
1895 {
1896   return ELEM_AT(method_strings, m, "<unknown>");
1897 }
1898 
1899 
1900 void
http_parser_init(http_parser * parser,enum http_parser_type t)1901 http_parser_init (http_parser *parser, enum http_parser_type t)
1902 {
1903   void *data = parser->data; /* preserve application data */
1904   memset(parser, 0, sizeof(*parser));
1905   parser->data = data;
1906   parser->type = t;
1907   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1908   parser->http_errno = HPE_OK;
1909 }
1910 
1911 const char *
http_errno_name(enum http_errno err)1912 http_errno_name(enum http_errno err) {
1913   assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1914   return http_strerror_tab[err].name;
1915 }
1916 
1917 const char *
http_errno_description(enum http_errno err)1918 http_errno_description(enum http_errno err) {
1919   assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1920   return http_strerror_tab[err].description;
1921 }
1922 
1923 static enum http_host_state
http_parse_host_char(enum http_host_state s,const char ch)1924 http_parse_host_char(enum http_host_state s, const char ch) {
1925   switch(s) {
1926     case s_http_userinfo:
1927     case s_http_userinfo_start:
1928       if (ch == '@') {
1929         return s_http_host_start;
1930       }
1931 
1932       if (IS_USERINFO_CHAR(ch)) {
1933         return s_http_userinfo;
1934       }
1935       break;
1936 
1937     case s_http_host_start:
1938       if (ch == '[') {
1939         return s_http_host_v6_start;
1940       }
1941 
1942       if (IS_HOST_CHAR(ch)) {
1943         return s_http_host;
1944       }
1945 
1946       break;
1947 
1948     case s_http_host:
1949       if (IS_HOST_CHAR(ch)) {
1950         return s_http_host;
1951       }
1952 
1953     /* FALLTHROUGH */
1954     case s_http_host_v6_end:
1955       if (ch == ':') {
1956         return s_http_host_port_start;
1957       }
1958 
1959       break;
1960 
1961     case s_http_host_v6:
1962       if (ch == ']') {
1963         return s_http_host_v6_end;
1964       }
1965 
1966     /* FALLTHROUGH */
1967     case s_http_host_v6_start:
1968       if (IS_HEX(ch) || ch == ':') {
1969         return s_http_host_v6;
1970       }
1971 
1972       break;
1973 
1974     case s_http_host_port:
1975     case s_http_host_port_start:
1976       if (IS_NUM(ch)) {
1977         return s_http_host_port;
1978       }
1979 
1980       break;
1981 
1982     default:
1983       break;
1984   }
1985   return s_http_host_dead;
1986 }
1987 
1988 static int
http_parse_host(const char * buf,struct http_parser_url * u,int found_at)1989 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1990   enum http_host_state s;
1991 
1992   const char *p;
1993   size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1994 
1995   u->field_data[UF_HOST].len = 0;
1996 
1997   s = found_at ? s_http_userinfo_start : s_http_host_start;
1998 
1999   for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2000     enum http_host_state new_s = http_parse_host_char(s, *p);
2001 
2002     if (new_s == s_http_host_dead) {
2003       return 1;
2004     }
2005 
2006     switch(new_s) {
2007       case s_http_host:
2008         if (s != s_http_host) {
2009           u->field_data[UF_HOST].off = p - buf;
2010         }
2011         u->field_data[UF_HOST].len++;
2012         break;
2013 
2014       case s_http_host_v6:
2015         if (s != s_http_host_v6) {
2016           u->field_data[UF_HOST].off = p - buf;
2017         }
2018         u->field_data[UF_HOST].len++;
2019         break;
2020 
2021       case s_http_host_port:
2022         if (s != s_http_host_port) {
2023           u->field_data[UF_PORT].off = p - buf;
2024           u->field_data[UF_PORT].len = 0;
2025           u->field_set |= (1 << UF_PORT);
2026         }
2027         u->field_data[UF_PORT].len++;
2028         break;
2029 
2030       case s_http_userinfo:
2031         if (s != s_http_userinfo) {
2032           u->field_data[UF_USERINFO].off = p - buf ;
2033           u->field_data[UF_USERINFO].len = 0;
2034           u->field_set |= (1 << UF_USERINFO);
2035         }
2036         u->field_data[UF_USERINFO].len++;
2037         break;
2038 
2039       default:
2040         break;
2041     }
2042     s = new_s;
2043   }
2044 
2045   /* Make sure we don't end somewhere unexpected */
2046   switch (s) {
2047     case s_http_host_start:
2048     case s_http_host_v6_start:
2049     case s_http_host_v6:
2050     case s_http_host_port_start:
2051     case s_http_userinfo:
2052     case s_http_userinfo_start:
2053       return 1;
2054     default:
2055       break;
2056   }
2057 
2058   return 0;
2059 }
2060 
2061 int
http_parser_parse_url(const char * buf,size_t buflen,int is_connect,struct http_parser_url * u)2062 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2063                       struct http_parser_url *u)
2064 {
2065   enum state s;
2066   const char *p;
2067   enum http_parser_url_fields uf, old_uf;
2068   int found_at = 0;
2069 
2070   u->port = u->field_set = 0;
2071   s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2072   uf = old_uf = UF_MAX;
2073 
2074   for (p = buf; p < buf + buflen; p++) {
2075     s = parse_url_char(s, *p);
2076 
2077     /* Figure out the next field that we're operating on */
2078     switch (s) {
2079       case s_dead:
2080         return 1;
2081 
2082       /* Skip delimeters */
2083       case s_req_schema_slash:
2084       case s_req_schema_slash_slash:
2085       case s_req_server_start:
2086       case s_req_query_string_start:
2087       case s_req_fragment_start:
2088         continue;
2089 
2090       case s_req_schema:
2091         uf = UF_SCHEMA;
2092         break;
2093 
2094       case s_req_server_with_at:
2095         found_at = 1;
2096 
2097       /* FALLTROUGH */
2098       case s_req_server:
2099         uf = UF_HOST;
2100         break;
2101 
2102       case s_req_path:
2103         uf = UF_PATH;
2104         break;
2105 
2106       case s_req_query_string:
2107         uf = UF_QUERY;
2108         break;
2109 
2110       case s_req_fragment:
2111         uf = UF_FRAGMENT;
2112         break;
2113 
2114       default:
2115         assert(!"Unexpected state");
2116         return 1;
2117     }
2118 
2119     /* Nothing's changed; soldier on */
2120     if (uf == old_uf) {
2121       u->field_data[uf].len++;
2122       continue;
2123     }
2124 
2125     u->field_data[uf].off = p - buf;
2126     u->field_data[uf].len = 1;
2127 
2128     u->field_set |= (1 << uf);
2129     old_uf = uf;
2130   }
2131 
2132   /* host must be present if there is a schema */
2133   /* parsing http:///toto will fail */
2134   if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2135     if (http_parse_host(buf, u, found_at) != 0) {
2136       return 1;
2137     }
2138   }
2139 
2140   /* CONNECT requests can only contain "hostname:port" */
2141   if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2142     return 1;
2143   }
2144 
2145   if (u->field_set & (1 << UF_PORT)) {
2146     /* Don't bother with endp; we've already validated the string */
2147     unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2148 
2149     /* Ports have a max value of 2^16 */
2150     if (v > 0xffff) {
2151       return 1;
2152     }
2153 
2154     u->port = (uint16_t) v;
2155   }
2156 
2157   return 0;
2158 }
2159 
2160 void
http_parser_pause(http_parser * parser,int paused)2161 http_parser_pause(http_parser *parser, int paused) {
2162   /* Users should only be pausing/unpausing a parser that is not in an error
2163    * state. In non-debug builds, there's not much that we can do about this
2164    * other than ignore it.
2165    */
2166   if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2167       HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2168     SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2169   } else {
2170     assert(0 && "Attempting to pause parser in error state");
2171   }
2172 }
2173 
2174 int
http_body_is_final(const struct http_parser * parser)2175 http_body_is_final(const struct http_parser *parser) {
2176     return parser->state == s_message_done;
2177 }
2178