1 // src/URI.cc
2 // This file is part of libpbe; see http://anyterm.org/
3 // (C) 2005-2008 Philip Endecott
4 
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2 of the License, or
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 
19 #include "URI.hh"
20 
21 #include <boost/spirit/include/classic.hpp>
22 #include <boost/spirit/home/classic/actor/insert_at_actor.hpp>
23 #include <boost/spirit/home/classic/dynamic/if.hpp>
24 #include <boost/spirit/home/classic/utility/loops.hpp>
25 #include <boost/spirit/home/classic/iterator/multi_pass.hpp>
26 
27 #include <boost/lexical_cast.hpp>
28 #include <boost/scoped_array.hpp>
29 
30 #include <map>
31 
32 using namespace std;
33 using namespace boost::spirit::classic;
34 
35 
36 namespace pbe {
37 
38 
URI(string absolute_uri)39 URI::URI(string absolute_uri):
40   port(0)
41 {
42   typedef rule<> rule_t;
43 
44   // URI parsing EBNF based on
45   //   RFC2616
46   //   RFC2396
47   //   HTTP/1.1 Errata (http://skrb.org/ietf/http_errata.html)
48 
49   rule_t mark = ch_p('-') | '_' | '.' | '!' | '~' | '*' | '\'' | '(' | ')';
50 
51   rule_t unreserved = alnum_p | mark;
52 
53   rule_t escaped = ch_p('%') >> xdigit_p >> xdigit_p;
54 
55   rule_t reserved = ch_p(';') | '/' | '?' | ':' | '@' | '&' | '=' | '+' | '$' | ',';
56 
57   rule_t pchar = unreserved | escaped | ':' | '@' | '&' | '=' | '+' | '$' | ',';
58 
59   rule_t param = *pchar;
60 
61   rule_t segment = *pchar >> *(';' >> param);
62 
63   rule_t path_segments = segment >> *('/' >> segment);
64 
65   rule_t abs_path = ( ch_p('/') >> path_segments )[assign_a(URI::abs_path)];
66 
67   rule_t scheme = alpha_p >> *(alpha_p | digit_p | '+' | '-' | '.' );
68 
69   rule_t userinfo = *(unreserved | escaped | ';' | ':' | '&' | '=' | '+' | '$' | ',' );
70 
71   //rule_t domainlabel = alnum_p | alnum_p >> *(alnum_p | '-') >> alnum_p;
72   rule_t domainlabel = *(alnum_p | '-');
73 
74   //rule_t toplabel = alpha_p | alpha_p >> *(alnum_p | '-') >> alnum_p;
75 
76   //rule_t hostname = *(domainlabel >> '.') >> toplabel >> !ch_p('.');
77   rule_t hostname = domainlabel % ch_p('.');
78 
79   uint_parser<unsigned,10,1,3> decimal_byte;
80 
81   rule_t ipv4address = decimal_byte >> '.' >> decimal_byte >> '.' >>
82     decimal_byte >> '.' >> decimal_byte;
83 
84   rule_t host = hostname | ipv4address;
85 
86   rule_t port = uint_p[assign_a(URI::port)];
87 
88   rule_t hostport = host[assign_a(URI::host)]
89                     >> !(':' >> port);
90 
91   rule_t server = !( !(userinfo[assign_a(URI::userinfo)] >> '@') >> hostport );
92 
93   rule_t reg_name = +(unreserved | escaped | '$' | ',' | ';' | ':' | '@' |
94 		      '&' | '=' | '+');
95 
96   rule_t authority = server | reg_name;
97 
98   rule_t net_path = str_p("//") >> authority >> !abs_path;
99 
100   rule_t uric = reserved | unreserved | escaped;
101 
102   rule_t query = (*uric) [assign_a(URI::query)];
103 
104   rule_t hier_part = (net_path | abs_path) >> !('?' >> query);
105 
106   rule_t uric_no_slash = unreserved | escaped | ';' | '?' | ':' | '@' |
107     '&' | '=' | '+' | '$' | ',';
108 
109   rule_t opaque_part = uric_no_slash >> *uric;
110 
111   rule_t absoluteURI = scheme[assign_a(URI::scheme)] >> ':' >> (hier_part | opaque_part);
112 
113   if (!parse(absolute_uri.c_str(), absoluteURI).full) {
114     throw SyntaxError();
115   }
116 }
117 
118 
119 };
120 
121