1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to
5  * deal in the Software without restriction, including without limitation the
6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE.
20  */
21 #ifndef http_parser_h
22 #define http_parser_h
23 
24 #define HTTP_PARSER_VERSION_MAJOR 1
25 #define HTTP_PARSER_VERSION_MINOR 0
26 
27 #include <sys/types.h>
28 #if defined(_WIN32) && !defined(__MINGW32__) && \
29   (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
30 #include <BaseTsd.h>  // @manual
31 #include <stddef.h>
32 typedef __int8 int8_t;
33 typedef unsigned __int8 uint8_t;
34 typedef __int16 int16_t;
35 typedef unsigned __int16 uint16_t;
36 typedef __int32 int32_t;
37 typedef unsigned __int32 uint32_t;
38 typedef __int64 int64_t;
39 typedef unsigned __int64 uint64_t;
40 #else
41 #include <stdint.h>
42 #endif
43 
44 #if __cplusplus
45 namespace proxygen {
46 #endif /* __cplusplus */
47 
48 /* Compile with -DHTTP_PARSER_STRICT_URL=1 to parse URLs
49  * strictly according to the RFCs
50  */
51 #ifndef HTTP_PARSER_STRICT_URL
52 # define HTTP_PARSER_STRICT_URL 0
53 #endif
54 
55 /* Compile with -DHTTP_PARSER_STRICT_HOSTNAME=1 to parse hostnames
56  * strictly according to the RFCs
57  */
58 #ifndef HTTP_PARSER_STRICT_HOSTNAME
59 # define HTTP_PARSER_STRICT_HOSTNAME 0
60 #endif
61 
62 /* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to
63  * the error reporting facility.
64  */
65 #ifndef HTTP_PARSER_DEBUG
66 # define HTTP_PARSER_DEBUG 0
67 #endif
68 
69 
70 /* Maximium header size allowed */
71 #define HTTP_MAX_HEADER_SIZE (80*1024)
72 
73 
74 typedef struct http_parser http_parser;
75 typedef struct http_parser_settings http_parser_settings;
76 typedef struct http_parser_result http_parser_result;
77 
78 
79 /* Callbacks should return non-zero to indicate an error. The parser will
80  * then halt execution.
81  *
82  * The one exception is on_headers_complete. In a HTTP_RESPONSE parser
83  * returning '1' from on_headers_complete will tell the parser that it
84  * should not expect a body. This is used when receiving a response to a
85  * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
86  * chunked' headers that indicate the presence of a body.
87  *
88  * http_data_cb does not return data chunks. It will be call arbitrarally
89  * many times for each string. E.G. you might get 10 callbacks for "on_path"
90  * each providing just a few characters more data.
91  */
92 typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
93 typedef int (*http_cb) (http_parser*);
94 
95 
96 /* Request Methods */
97 enum http_method
98   { HTTP_DELETE    = 0
99   , HTTP_GET
100   , HTTP_HEAD
101   , HTTP_POST
102   , HTTP_PUT
103   /* pathological */
104   , HTTP_CONNECT
105   , HTTP_OPTIONS
106   , HTTP_TRACE
107   /* webdav */
108   , HTTP_COPY
109   , HTTP_LOCK
110   , HTTP_MKCOL
111   , HTTP_MOVE
112   , HTTP_PROPFIND
113   , HTTP_PROPPATCH
114   , HTTP_UNLOCK
115   /* subversion */
116   , HTTP_REPORT
117   , HTTP_MKACTIVITY
118   , HTTP_CHECKOUT
119   , HTTP_MERGE
120   /* upnp */
121   , HTTP_MSEARCH
122   , HTTP_NOTIFY
123   , HTTP_SUBSCRIBE
124   , HTTP_UNSUBSCRIBE
125   /* RFC-5789 */
126   , HTTP_PATCH
127   };
128 
129 
130 enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
131 
132 
133 /* Flag values for http_parser.flags field */
134 enum flags
135   { F_CHUNKED               = 1 << 0
136   , F_TRAILING              = 1 << 3
137   , F_UPGRADE               = 1 << 4
138   , F_SKIPBODY              = 1 << 5
139   };
140 
141 
142 /* Map for errno-related constants
143  *
144  * The provided argument should be a macro that takes 2 arguments.
145  */
146 #define HTTP_ERRNO_MAP(XX)                                           \
147   /* No error */                                                     \
148   XX(OK, "success")                                                  \
149                                                                      \
150   /* Callback-related errors */                                      \
151   XX(CB_message_begin, "the on_message_begin callback failed")       \
152   XX(CB_path, "the on_path callback failed")                         \
153   XX(CB_query_string, "the on_query_string callback failed")         \
154   XX(CB_url, "the on_url callback failed")                           \
155   XX(CB_fragment, "the on_fragment callback failed")                 \
156   XX(CB_header_field, "the on_header_field callback failed")         \
157   XX(CB_header_value, "the on_header_value callback failed")         \
158   XX(CB_headers_complete, "the on_headers_complete callback failed") \
159   XX(CB_body, "the on_body callback failed")                         \
160   XX(CB_message_complete, "the on_message_complete callback failed") \
161   XX(CB_reason, "the on_reason callback failed")                     \
162   XX(CB_chunk_header, "the on_chunk_header callback failed")         \
163   XX(CB_chunk_complete, "the on_chunk_complete callback failed")     \
164                                                                      \
165   /* Parsing-related errors */                                       \
166   XX(INVALID_EOF_STATE, "stream ended at an unexpected time")        \
167   XX(HEADER_OVERFLOW,                                                \
168      "too many header bytes seen; overflow detected")                \
169   XX(CLOSED_CONNECTION,                                              \
170      "data received after completed connection: close message")      \
171   XX(INVALID_VERSION, "invalid HTTP version")                        \
172   XX(INVALID_STATUS, "invalid HTTP status code")                     \
173   XX(INVALID_METHOD, "invalid HTTP method")                          \
174   XX(INVALID_URL, "invalid URL")                                     \
175   XX(INVALID_HOST, "invalid host")                                   \
176   XX(INVALID_PORT, "invalid port")                                   \
177   XX(INVALID_PATH, "invalid path")                                   \
178   XX(INVALID_QUERY_STRING, "invalid query string")                   \
179   XX(INVALID_FRAGMENT, "invalid fragment")                           \
180   XX(LF_EXPECTED, "LF character expected")                           \
181   XX(INVALID_HEADER_TOKEN, "invalid character in header")            \
182   XX(INVALID_CONTENT_LENGTH,                                         \
183      "invalid character in content-length header")                   \
184   XX(HUGE_CONTENT_LENGTH,                                            \
185      "content-length header too large")                              \
186   XX(INVALID_CHUNK_SIZE,                                             \
187      "invalid character in chunk size header")                       \
188   XX(HUGE_CHUNK_SIZE,                                                \
189      "chunk header size too large")                                  \
190   XX(INVALID_TRANSFER_ENCODING,                                      \
191      "invalid character in transfer-encoding header")                \
192   XX(INVALID_UPGRADE,                                                \
193      "invalid character in upgrade header")                          \
194   XX(INVALID_CONSTANT, "invalid constant string")                    \
195   XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
196   XX(STRICT, "strict mode assertion failed")                         \
197   XX(PAUSED, "parser is paused")                                     \
198   XX(UNKNOWN, "an unknown error occurred")
199 
200 
201 /* Define HPE_* values for each errno value above */
202 #define HTTP_ERRNO_GEN(n, s) HPE_##n,
203 enum http_errno {
204   HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
205 };
206 #undef HTTP_ERRNO_GEN
207 
208 
209 /* Get an http_errno value from an http_parser */
210 #define HTTP_PARSER_ERRNO(p)            ((enum http_errno) (p)->http_errno)
211 
212 /* Get the line number that generated the current error */
213 #if HTTP_PARSER_DEBUG
214 #define HTTP_PARSER_ERRNO_LINE(p)       ((p)->error_lineno)
215 #else
216 #define HTTP_PARSER_ERRNO_LINE(p)       0
217 #endif
218 
219 
220 struct http_parser {
221   /** PRIVATE **/
222   unsigned char type : 2;     /* enum http_parser_type */
223   unsigned char flags : 6;    /* F_* values from 'flags' enum; semi-public */
224   unsigned char state;        /* enum state from http_parser.c */
225   unsigned char header_state; /* enum header_state from http_parser.c */
226   unsigned char index;        /* index into current matcher */
227 
228   uint32_t nread;          /* # bytes read in various scenarios */
229   int64_t content_length;  /* # bytes in body (0 if no Content-Length header) */
230 
231   /** READ-ONLY **/
232   unsigned short http_major;
233   unsigned short http_minor;
234   unsigned short status_code; /* responses only */
235   unsigned char method;       /* requests only */
236   unsigned char http_errno : 7;
237 
238   /* 1 = Upgrade header was present and the parser has exited because of that.
239    * 0 = No upgrade header present.
240    * Should be checked when http_parser_execute() returns in addition to
241    * error checking.
242    */
243   char upgrade : 1;
244 
245 #if HTTP_PARSER_DEBUG
246   uint32_t error_lineno;
247 #endif
248 
249   /** PUBLIC **/
250   void *data; /* A pointer to get hook to the "connection" or "socket" object */
251 };
252 
253 
254 struct http_parser_settings {
255   http_cb      on_message_begin;
256   http_data_cb on_url;
257   http_data_cb on_header_field;
258   http_data_cb on_header_value;
259   http_data_cb on_headers_complete;
260   http_data_cb on_body;
261   http_cb      on_message_complete;
262   http_data_cb on_reason;
263   /* When on_chunk_header is called, the current chunk length is stored
264    * in parser->content_length.
265    */
266   http_cb      on_chunk_header;
267   http_cb      on_chunk_complete;
268 };
269 
270 
271 enum http_parser_url_fields
272   { UF_SCHEMA           = 0
273   , UF_HOST             = 1
274   , UF_PORT             = 2
275   , UF_PATH             = 3
276   , UF_QUERY            = 4
277   , UF_FRAGMENT         = 5
278   , UF_USERINFO         = 6
279   , UF_MAX              = 7
280 };
281 
282 
283 /* Result structure for http_parser_parse_url().
284  *
285  * Callers should index into field_data[] with UF_* values iff field_set
286  * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
287  * because we probably have padding left over), we convert any port to
288  * a uint16_t.
289  */
290 struct http_parser_url {
291   uint16_t field_set;           /* Bitmask of (1 << UF_*) values */
292   uint16_t port;                /* Converted UF_PORT string */
293 
294   struct {
295     uint16_t off;               /* Offset into buffer in which field starts */
296     uint16_t len;               /* Length of run in buffer */
297   } field_data[UF_MAX];
298 };
299 
300 
301 void http_parser_init(http_parser *parser, enum http_parser_type type);
302 
303 
304 size_t http_parser_execute(http_parser *parser,
305                            const http_parser_settings *settings,
306                            const char *data,
307                            size_t len);
308 
309   /* Begin Facebook */
310 enum http_parser_options
311 {
312   F_HTTP_PARSER_OPTIONS_URL_STRICT           = (1 << 0)
313 };
314 
315 size_t http_parser_execute_options(http_parser *parser,
316                                    const http_parser_settings *settings,
317                                    uint8_t options,
318                                    const char *data,
319                                    size_t len);
320 /* End Facebook */
321 
322 /* Returns a string version of the HTTP method. */
323 const char *http_method_str(enum http_method m);
324 
325 /* Return a string name of the given error */
326 const char *http_errno_name(enum http_errno err);
327 
328 /* Return a string description of the given error */
329 const char *http_errno_description(enum http_errno err);
330 
331 /* Parse a URL; return nonzero on failure */
332 int http_parser_parse_url(const char *buf, size_t buflen,
333                           int is_connect,
334                           struct http_parser_url *u);
335 
336 /* Begin Facebook */
337 enum http_parser_parse_url_options
338 {
339   F_PARSE_URL_OPTIONS_URL_STRICT           = (1 << 0)
340 };
341 
342 int http_parser_parse_url_options(
343     const char *buf, size_t buflen,
344     int is_connect,
345     struct http_parser_url *u,
346     uint8_t options);
347 /* End Facebook */
348 
349 /* Pause or un-pause the parser; a nonzero value pauses */
350 void http_parser_pause(http_parser *parser, int paused);
351 
352 #if __cplusplus
353 }
354 #endif /* __cplusplus */
355 
356 #endif
357