1 /**
2  * @file http.c
3  *
4  * @brief Look for HTTP requests in buffers.
5  * @author David Suárez
6  * @author Chris Lightfoot
7  * @date Sun, 28 Oct 2018 16:14:56 +0100
8  *
9  * We look for GET requests only, and only if the response is of type
10  * text/html.
11  *
12  * Copyright (c) 2002 Chris Lightfoot.
13  * Email: chris@ex-parrot.com; WWW: http://www.ex-parrot.com/~chris/
14  *
15  * Copyright (c) 2018 David Suárez.
16  * Email: david.sephirot@gmail.com
17  *
18  */
19 
20 #include "compat/compat.h"
21 
22 #include <stdio.h>
23 #include <stdlib.h> /* On many systems (Darwin...), stdio.h is a prerequisite. */
24 #include <string.h>
25 
26 #include "common/util.h"
27 
28 
29 /* find_http_req DATA LEN FOUND FOUNDLEN
30  * Look for an HTTP request and response in buffer DATA of length LEN. The
31  * return value is a pointer into DATA suitable for a subsequent call to this
32  * function; *FOUND is either NULL, or a pointer to the start of an HTTP
33  * request; in the latter case, *FOUNDLEN is the length of the match
34  * containing enough information to obtain the URL. */
find_http_req(const unsigned char * data,const size_t len,unsigned char ** http,size_t * httplen)35 unsigned char *find_http_req(const unsigned char *data, const size_t len, unsigned char **http, size_t *httplen) {
36     unsigned char *req, *le, *blankline, *hosthdr;
37 
38 	#define remaining(x)    (len - ((x) - data))
39 	#define MAX_REQ         16384
40 	#define HTTPGET_LEN 4
41 	#define HTTPPOST_LEN 5
42 
43     /* HTTP requests look like:
44      *
45      *      GET {path} HTTP/1.(0|1)\r\n
46      *      header: value\r\n
47      *          ...
48      *      \r\n
49      *
50      * We may care about the Host: header in the request. */
51     if (len < 40)
52         return (unsigned char*)data;
53 
54     if (!(req = memstr(data, len, (unsigned char*)"GET ", HTTPGET_LEN)) &&
55     	!(req = memstr(data, len, (unsigned char*)"POST ", HTTPPOST_LEN))	)
56         return (unsigned char*)(data + len - HTTPGET_LEN);
57 
58     /* Find the end of the request line. */
59     if (!(le = memstr(req + HTTPGET_LEN, remaining(req + HTTPGET_LEN), (unsigned char*)"\r\n", 2))) {
60         if (remaining(req + HTTPGET_LEN) > MAX_REQ)
61             return (unsigned char*)(req + HTTPGET_LEN);
62         else
63             return (unsigned char*)req;
64     }
65 
66     /* Not enough space for a path. */
67     if (le < req + 5)
68         return le + 2;
69 
70     /* Not an HTTP request, just a line starting GET.... */
71     if (memcmp(le - 9, " HTTP/1.", 8) || !strchr("01", (int)*(le - 1)))
72         return le + 2;
73 
74     /* Find the end of the request headers. */
75     if (!(blankline = memstr(le + 2, remaining(le + 2), (unsigned char*)"\r\n\r\n", 4))) {
76         if (remaining(le + 2) > MAX_REQ)
77             return (unsigned char*)(data + len - 4);
78         else
79             return req;
80     }
81 
82     if ((memcmp(req + HTTPGET_LEN, "http://", 7) == 0) ||
83     	(memcmp(req + HTTPPOST_LEN, "http://", 7) == 0))
84         /* Probably a cache request; in any case, don't need to look for a Host:. */
85         goto found;
86 
87     /* Is there a Host: header? */
88     if (!(hosthdr = memstr(le, blankline - le + 2, (unsigned char*)"\r\nHost: ", 8))) {
89         return blankline + HTTPGET_LEN;
90     }
91 
92 found:
93 
94     *http = req;
95     *httplen = blankline - req;
96 
97     return blankline + 4;
98 }
99 
100