1 // src/parse_http_request.cc
2 // This file is part of libpbe; see http://anyterm.org/
3 // (C) 2005-2008 Philip Endecott
4 
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 
19 #include "parse_http_request.hh"
20 
21 #include <boost/spirit/include/classic.hpp>
22 #include <boost/spirit/home/classic/actor/insert_at_actor.hpp>
23 #include <boost/spirit/home/classic/dynamic/if.hpp>
24 #include <boost/spirit/home/classic/utility/loops.hpp>
25 #include <boost/spirit/home/classic/iterator/multi_pass.hpp>
26 
27 #include <boost/lexical_cast.hpp>
28 #include <boost/scoped_array.hpp>
29 
30 #include <map>
31 
32 
33 using namespace std;
34 using namespace boost::spirit::classic;
35 
36 
37 namespace pbe {
38 
39 
parse_http_request(istream & strm)40 HttpRequest parse_http_request(istream& strm)
41 {
42   HttpRequest request;
43 
44   typedef multi_pass<istreambuf_iterator<char> > iterator_t;
45   typedef scanner<iterator_t> scanner_t;
46   typedef rule<scanner_t> rule_t;
47 
48   string tmp_hn;
49   string tmp_hv;
50 
51   // HTTP/1.1 request parsing, based on:
52   //   RFC2616
53   //   RFC2396
54   //   HTTP/1.1 Errata (http://skrb.org/ietf/http_errata.html)
55 
56   rule_t CRLF = str_p("\r\n");
57 
58   rule_t extension_method = nothing_p;
59 
60   rule_t method = str_p("OPTIONS") | "GET" | "HEAD" | "POST" | "PUT"
61     | "DELETE" | "TRACE" | "CONNECT" | extension_method;
62 
63   rule_t mark = ch_p('-') | '_' | '.' | '!' | '~' | '*' | '\'' | '(' | ')';
64 
65   rule_t unreserved = alnum_p | mark;
66 
67   rule_t escaped = ch_p('%') >> xdigit_p >> xdigit_p;
68 
69   rule_t reserved = ch_p(';') | '/' | '?' | ':' | '@' | '&' | '=' | '+' | '$' | ',';
70 
71   rule_t pchar = unreserved | escaped | ':' | '@' | '&' | '=' | '+' | '$' | ',';
72 
73   rule_t param = *pchar;
74 
75   rule_t segment = *pchar >> *(';' >> param);
76 
77   rule_t path_segments = segment >> *('/' >> segment);
78 
79   rule_t abs_path = ( ch_p('/') >> path_segments )[assign_a(request.abs_path)];
80 
81   rule_t scheme = alpha_p >> *(alpha_p | digit_p | '+' | '-' | '.' );
82 
83   rule_t userinfo = *(unreserved | escaped | ';' | ':' | '&' | '=' | '+' | '$' | ',' );
84 
85   rule_t domainlabel = alnum_p | alnum_p >> *(alnum_p | '-') >> alnum_p;
86 
87   rule_t toplabel = alpha_p | alpha_p >> *(alnum_p | '-') >> alnum_p;
88 
89   rule_t hostname = *(domainlabel >> '.') >> toplabel >> !ch_p('.');
90 
91   uint_parser<unsigned,10,1,3> decimal_byte;
92 
93   rule_t ipv4address = decimal_byte >> '.' >> decimal_byte >> '.' >>
94     decimal_byte >> '.' >> decimal_byte;
95 
96   rule_t host = hostname | ipv4address;
97 
98   rule_t port = uint_p;
99 
100   rule_t hostport = host >> !(':' >> port);
101 
102   rule_t server = !( !(userinfo >> '@') >> hostport );
103 
104   rule_t reg_name = +(unreserved | escaped | '$' | ',' | ';' | ':' | '@' |
105 		      '&' | '=' | '+');
106 
107   rule_t authority = server | reg_name;
108 
109   rule_t net_path = str_p("//") >> authority >> !abs_path;
110 
111   rule_t uric = reserved | unreserved | escaped;
112 
113   rule_t query = (*uric) [assign_a(request.query)];
114 
115   rule_t hier_part = (net_path | abs_path) >> !('?' >> query);
116 
117   rule_t uric_no_slash = unreserved | escaped | ';' | '?' | ':' | '@' |
118     '&' | '=' | '+' | '$' | ',';
119 
120   rule_t opaque_part = uric_no_slash >> *uric;
121 
122   rule_t absolute_uri = scheme >> ':' >> (hier_part | opaque_part);
123 
124   rule_t request_uri = ch_p('*')
125     | absolute_uri
126     | (abs_path >> !(ch_p('?') >> query))
127     | authority;
128 
129   rule_t http_version = str_p("HTTP/") >> uint_p >> '.' >> uint_p;
130 
131   rule_t request_line =
132     method  [assign_a(request.method)]
133     >> ' ' >> request_uri
134     >> ' ' >> http_version [assign_a(request.http_version)]
135     >> CRLF ;
136 
137   rule_t header_name = +(alnum_p|'-');
138 
139   rule_t header_value = *(print_p|' '|'\t');
140 
141   rule_t header = ( header_name [assign_a(tmp_hn)] >>
142 		    ':' >> *(ch_p(' ')) >> header_value [assign_a(tmp_hv)] )
143     [insert_at_a(request.headers,tmp_hn,tmp_hv)];
144 
145   rule_t request_r =
146     request_line
147     >> *(header >> CRLF)
148     >> CRLF;
149 
150   iterator_t first(make_multi_pass(std::istreambuf_iterator<char>(strm)));
151   iterator_t last(make_multi_pass(std::istreambuf_iterator<char>()));
152 
153   scanner_t scanner(first,last);
154 
155   if (!request_r.parse(scanner)) {
156     throw HttpRequestSyntaxError();
157   }
158 
159   HttpRequest::headers_t::const_iterator i = request.headers.find("Content-Length");
160   if (i==request.headers.end()) {
161     return request;
162   }
163   size_t content_length = boost::lexical_cast<size_t>(i->second);
164   boost::scoped_array<char> buf(new char[content_length]);
165   strm.read(buf.get(),content_length);
166   request.body = string(buf.get(),content_length);
167 
168   return request;
169 }
170 
171 
172 };
173 
174