1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 
11 #include <glog/logging.h>
12 
13 #include <folly/Conv.h>
14 #include <folly/Optional.h>
15 #include <folly/Portability.h>
16 #include <folly/Range.h>
17 #include <folly/String.h>
18 #include <proxygen/lib/utils/Export.h>
19 #include <string>
20 
21 namespace proxygen {
22 
23 // ParseURL can handle non-fully-formed URLs. This class must not persist beyond
24 // the lifetime of the buffer underlying the input StringPiece
25 
26 class ParseURL {
27  public:
28   /* Parse a URL.  If parsing succeeds, return a fully formed ParseURL with
29    * valid() == true.  If parsing fails, returns nothing. If you need the
30    * partial parse results, use parseURLMaybeInvalid below.
31    */
32   static folly::Expected<ParseURL, folly::Unit> parseURL(
33       folly::StringPiece urlVal, bool strict = false) noexcept {
34     ParseURL parseUrl(urlVal, strict);
35     if (parseUrl.valid()) {
36       return parseUrl;
37     } else {
38       return folly::makeUnexpected(folly::Unit());
39     }
40   }
41 
42   /* Parse a URL.  Returns a ParseURL object that may or may not be valid.
43    * Caller should check valid()
44    */
45   static ParseURL parseURLMaybeInvalid(folly::StringPiece urlVal,
46                                        bool strict = false) noexcept {
47     return ParseURL(urlVal, strict);
48   }
49 
50   // Deprecated.  Will be removed soon
51   explicit ParseURL(folly::StringPiece urlVal, bool strict = true) noexcept {
52     init(urlVal, strict);
53   }
54 
ParseURL(ParseURL && goner)55   ParseURL(ParseURL&& goner)
56       : url_(goner.url_),
57         scheme_(goner.scheme_),
58         path_(goner.path_),
59         query_(goner.query_),
60         fragment_(goner.fragment_),
61         port_(goner.port_),
62         valid_(goner.valid_),
63         initialized_(goner.initialized_) {
64     moveHostAndAuthority(std::move(goner));
65   }
66 
67   ParseURL& operator=(ParseURL&& goner) {
68     url_ = goner.url_;
69     scheme_ = goner.scheme_;
70     path_ = goner.path_;
71     query_ = goner.query_;
72     fragment_ = goner.fragment_;
73     port_ = goner.port_;
74     valid_ = goner.valid_;
75     initialized_ = goner.initialized_;
76     moveHostAndAuthority(std::move(goner));
77     return *this;
78   }
79 
80   ParseURL& operator=(const ParseURL&) = delete;
81   ParseURL(const ParseURL&) = delete;
82 
83   ParseURL() = default;
84 
85   void init(folly::StringPiece urlVal, bool strict = false) {
86     CHECK(!initialized_);
87     url_ = urlVal;
88     parse(strict);
89     initialized_ = true;
90   }
91 
92   operator bool() const {
93     return valid();
94   }
95 
url()96   folly::StringPiece url() const {
97     return url_;
98   }
99 
scheme()100   folly::StringPiece scheme() const {
101     return scheme_;
102   }
103 
authority()104   std::string authority() const {
105     return authority_;
106   }
107 
hasHost()108   bool hasHost() const {
109     return valid() && !host_.empty();
110   }
111 
host()112   folly::StringPiece host() const {
113     return host_;
114   }
115 
port()116   uint16_t port() const {
117     return port_;
118   }
119 
hostAndPort()120   std::string hostAndPort() const {
121     std::string rc = host_.str();
122     if (port_ != 0) {
123       folly::toAppend(":", port_, &rc);
124     }
125     return rc;
126   }
127 
path()128   folly::StringPiece path() const {
129     return path_;
130   }
131 
query()132   folly::StringPiece query() const {
133     return query_;
134   }
135 
fragment()136   folly::StringPiece fragment() const {
137     return fragment_;
138   }
139 
valid()140   bool valid() const {
141     return valid_;
142   }
143 
hostNoBrackets()144   folly::StringPiece hostNoBrackets() {
145     stripBrackets();
146     return hostNoBrackets_;
147   }
148 
149   bool hostIsIPAddress();
150 
151   FB_EXPORT void stripBrackets() noexcept;
152 
153   FOLLY_NODISCARD folly::Optional<folly::StringPiece> getQueryParam(
154       folly::StringPiece name) const noexcept;
155 
156  private:
moveHostAndAuthority(ParseURL && goner)157   void moveHostAndAuthority(ParseURL&& goner) {
158     if (!valid_) {
159       return;
160     }
161     int64_t hostOff = -1;
162     int64_t hostNoBracketsOff = -1;
163     if (goner.host_.empty() || (goner.host_.data() >= goner.url_.data() &&
164                                 goner.host_.data() < goner.url_.end())) {
165       // relative url_
166       host_ = goner.host_;
167     } else {
168       // relative authority_
169       hostOff = goner.host_.data() - goner.authority_.data();
170     }
171     if (goner.hostNoBrackets_.empty() ||
172         (goner.hostNoBrackets_.data() >= goner.url_.data() &&
173          goner.hostNoBrackets_.data() < goner.url_.end())) {
174       // relative url_
175       hostNoBrackets_ = goner.hostNoBrackets_;
176     } else {
177       // relative authority_
178       hostNoBracketsOff =
179           goner.hostNoBrackets_.data() - goner.authority_.data();
180     }
181     authority_ = std::move(goner.authority_);
182     if (hostOff >= 0) {
183       host_.reset(authority_.data() + hostOff, goner.host_.size());
184     }
185     if (hostNoBracketsOff >= 0) {
186       hostNoBrackets_.reset(authority_.data() + hostNoBracketsOff,
187                             goner.hostNoBrackets_.size());
188     }
189   }
190 
191   FB_EXPORT void parse(bool strict) noexcept;
192 
193   void parseNonFully(bool strict) noexcept;
194 
195   bool parseAuthority() noexcept;
196 
197   folly::StringPiece url_;
198   folly::StringPiece scheme_;
199   std::string authority_;
200   folly::StringPiece host_;
201   folly::StringPiece hostNoBrackets_;
202   folly::StringPiece path_;
203   folly::StringPiece query_;
204   folly::StringPiece fragment_;
205   uint16_t port_{0};
206   bool valid_{false};
207   bool initialized_{false};
208 };
209 
210 } // namespace proxygen
211