1 /**
2 * @file http.c
3 *
4 * @brief Look for HTTP requests in buffers.
5 * @author David Suárez
6 * @author Chris Lightfoot
7 * @date Sun, 28 Oct 2018 16:14:56 +0100
8 *
9 * We look for GET requests only, and only if the response is of type
10 * text/html.
11 *
12 * Copyright (c) 2002 Chris Lightfoot.
13 * Email: chris@ex-parrot.com; WWW: http://www.ex-parrot.com/~chris/
14 *
15 * Copyright (c) 2018 David Suárez.
16 * Email: david.sephirot@gmail.com
17 *
18 */
19
20 #include "compat/compat.h"
21
22 #include <stdio.h>
23 #include <stdlib.h> /* On many systems (Darwin...), stdio.h is a prerequisite. */
24 #include <string.h>
25
26 #include "common/util.h"
27
28
29 /* find_http_req DATA LEN FOUND FOUNDLEN
30 * Look for an HTTP request and response in buffer DATA of length LEN. The
31 * return value is a pointer into DATA suitable for a subsequent call to this
32 * function; *FOUND is either NULL, or a pointer to the start of an HTTP
33 * request; in the latter case, *FOUNDLEN is the length of the match
34 * containing enough information to obtain the URL. */
find_http_req(const unsigned char * data,const size_t len,unsigned char ** http,size_t * httplen)35 unsigned char *find_http_req(const unsigned char *data, const size_t len, unsigned char **http, size_t *httplen) {
36 unsigned char *req, *le, *blankline, *hosthdr;
37
38 #define remaining(x) (len - ((x) - data))
39 #define MAX_REQ 16384
40 #define HTTPGET_LEN 4
41 #define HTTPPOST_LEN 5
42
43 /* HTTP requests look like:
44 *
45 * GET {path} HTTP/1.(0|1)\r\n
46 * header: value\r\n
47 * ...
48 * \r\n
49 *
50 * We may care about the Host: header in the request. */
51 if (len < 40)
52 return (unsigned char*)data;
53
54 if (!(req = memstr(data, len, (unsigned char*)"GET ", HTTPGET_LEN)) &&
55 !(req = memstr(data, len, (unsigned char*)"POST ", HTTPPOST_LEN)) )
56 return (unsigned char*)(data + len - HTTPGET_LEN);
57
58 /* Find the end of the request line. */
59 if (!(le = memstr(req + HTTPGET_LEN, remaining(req + HTTPGET_LEN), (unsigned char*)"\r\n", 2))) {
60 if (remaining(req + HTTPGET_LEN) > MAX_REQ)
61 return (unsigned char*)(req + HTTPGET_LEN);
62 else
63 return (unsigned char*)req;
64 }
65
66 /* Not enough space for a path. */
67 if (le < req + 5)
68 return le + 2;
69
70 /* Not an HTTP request, just a line starting GET.... */
71 if (memcmp(le - 9, " HTTP/1.", 8) || !strchr("01", (int)*(le - 1)))
72 return le + 2;
73
74 /* Find the end of the request headers. */
75 if (!(blankline = memstr(le + 2, remaining(le + 2), (unsigned char*)"\r\n\r\n", 4))) {
76 if (remaining(le + 2) > MAX_REQ)
77 return (unsigned char*)(data + len - 4);
78 else
79 return req;
80 }
81
82 if ((memcmp(req + HTTPGET_LEN, "http://", 7) == 0) ||
83 (memcmp(req + HTTPPOST_LEN, "http://", 7) == 0))
84 /* Probably a cache request; in any case, don't need to look for a Host:. */
85 goto found;
86
87 /* Is there a Host: header? */
88 if (!(hosthdr = memstr(le, blankline - le + 2, (unsigned char*)"\r\nHost: ", 8))) {
89 return blankline + HTTPGET_LEN;
90 }
91
92 found:
93
94 *http = req;
95 *httplen = blankline - req;
96
97 return blankline + 4;
98 }
99
100