1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/Uri.h>
18 
19 #include <algorithm>
20 #include <cctype>
21 
22 #include <boost/regex.hpp>
23 
24 namespace folly {
25 
26 namespace {
27 
submatch(const boost::cmatch & m,int idx)28 std::string submatch(const boost::cmatch& m, int idx) {
29   const auto& sub = m[idx];
30   return std::string(sub.first, sub.second);
31 }
32 
33 } // namespace
34 
Uri(StringPiece str)35 Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) {
36   static const boost::regex uriRegex(
37       "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme:
38       "([^?#]*)" // authority and path
39       "(?:\\?([^#]*))?" // ?query
40       "(?:#(.*))?"); // #fragment
41   static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?");
42 
43   boost::cmatch match;
44   if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) {
45     throw std::invalid_argument(to<std::string>("invalid URI ", str));
46   }
47 
48   scheme_ = submatch(match, 1);
49   std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower);
50 
51   StringPiece authorityAndPath(match[2].first, match[2].second);
52   boost::cmatch authorityAndPathMatch;
53   if (!boost::regex_match(
54           authorityAndPath.begin(),
55           authorityAndPath.end(),
56           authorityAndPathMatch,
57           authorityAndPathRegex)) {
58     // Does not start with //, doesn't have authority
59     hasAuthority_ = false;
60     path_ = authorityAndPath.str();
61   } else {
62     static const boost::regex authorityRegex(
63         "(?:([^@:]*)(?::([^@]*))?@)?" // username, password
64         "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']',
65                                    // dotted-IPv4, or named host)
66         "(?::(\\d*))?"); // port
67 
68     const auto authority = authorityAndPathMatch[1];
69     boost::cmatch authorityMatch;
70     if (!boost::regex_match(
71             authority.first,
72             authority.second,
73             authorityMatch,
74             authorityRegex)) {
75       throw std::invalid_argument(to<std::string>(
76           "invalid URI authority ",
77           StringPiece(authority.first, authority.second)));
78     }
79 
80     StringPiece port(authorityMatch[4].first, authorityMatch[4].second);
81     if (!port.empty()) {
82       try {
83         port_ = to<uint16_t>(port);
84       } catch (ConversionError const& e) {
85         throw std::invalid_argument(
86             to<std::string>("invalid URI port: ", e.what()));
87       }
88     }
89 
90     hasAuthority_ = true;
91     username_ = submatch(authorityMatch, 1);
92     password_ = submatch(authorityMatch, 2);
93     host_ = submatch(authorityMatch, 3);
94     path_ = submatch(authorityAndPathMatch, 2);
95   }
96 
97   query_ = submatch(match, 3);
98   fragment_ = submatch(match, 4);
99 }
100 
authority() const101 std::string Uri::authority() const {
102   std::string result;
103 
104   // Port is 5 characters max and we have up to 3 delimiters.
105   result.reserve(host().size() + username().size() + password().size() + 8);
106 
107   if (!username().empty() || !password().empty()) {
108     result.append(username());
109 
110     if (!password().empty()) {
111       result.push_back(':');
112       result.append(password());
113     }
114 
115     result.push_back('@');
116   }
117 
118   result.append(host());
119 
120   if (port() != 0) {
121     result.push_back(':');
122     toAppend(port(), &result);
123   }
124 
125   return result;
126 }
127 
hostname() const128 std::string Uri::hostname() const {
129   if (!host_.empty() && host_[0] == '[') {
130     // If it starts with '[', then it should end with ']', this is ensured by
131     // regex
132     return host_.substr(1, host_.size() - 2);
133   }
134   return host_;
135 }
136 
getQueryParams()137 const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams() {
138   if (!query_.empty() && queryParams_.empty()) {
139     // Parse query string
140     static const boost::regex queryParamRegex(
141         "(^|&)" /*start of query or start of parameter "&"*/
142         "([^=&]*)=?" /*parameter name and "=" if value is expected*/
143         "([^=&]*)" /*parameter value*/
144         "(?=(&|$))" /*forward reference, next should be end of query or
145                       start of next parameter*/);
146     const boost::cregex_iterator paramBeginItr(
147         query_.data(), query_.data() + query_.size(), queryParamRegex);
148     boost::cregex_iterator paramEndItr;
149     for (auto itr = paramBeginItr; itr != paramEndItr; ++itr) {
150       if (itr->length(2) == 0) {
151         // key is empty, ignore it
152         continue;
153       }
154       queryParams_.emplace_back(
155           std::string((*itr)[2].first, (*itr)[2].second), // parameter name
156           std::string((*itr)[3].first, (*itr)[3].second) // parameter value
157       );
158     }
159   }
160   return queryParams_;
161 }
162 
163 } // namespace folly
164