1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kNetwork
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/client/mongo_uri.h"
36 
37 #include <utility>
38 
39 #include <boost/algorithm/string/case_conv.hpp>
40 #include <boost/algorithm/string/classification.hpp>
41 #include <boost/algorithm/string/find_iterator.hpp>
42 #include <boost/algorithm/string/predicate.hpp>
43 #include <boost/range/algorithm/count.hpp>
44 
45 #include "mongo/base/status_with.h"
46 #include "mongo/bson/bsonobjbuilder.h"
47 #include "mongo/client/dbclientinterface.h"
48 #include "mongo/client/sasl_client_authenticate.h"
49 #include "mongo/db/namespace_string.h"
50 #include "mongo/stdx/utility.h"
51 #include "mongo/util/dns_name.h"
52 #include "mongo/util/dns_query.h"
53 #include "mongo/util/hex.h"
54 #include "mongo/util/mongoutils/str.h"
55 
56 using namespace std::literals::string_literals;
57 
58 namespace {
59 constexpr std::array<char, 16> hexits{
60     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
61 
62 // This vector must remain sorted.  It is over pairs to facilitate a call to `std::includes` using
63 // a `std::map<std::string, std::string>` as the other parameter.
64 const std::vector<std::pair<std::string, std::string>> permittedTXTOptions = {{"authSource"s, ""s},
65                                                                               {"replicaSet"s, ""s}};
66 }  // namespace
67 
68 /**
69  * RFC 3986 Section 2.1 - Percent Encoding
70  *
71  * Encode data elements in a way which will allow them to be embedded
72  * into a mongodb:// URI safely.
73  */
uriEncode(std::ostream & ss,StringData toEncode,StringData passthrough)74 void mongo::uriEncode(std::ostream& ss, StringData toEncode, StringData passthrough) {
75     for (const auto& c : toEncode) {
76         if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || isalnum(c) ||
77             (passthrough.find(c) != std::string::npos)) {
78             ss << c;
79         } else {
80             // Encoding anything not included in section 2.3 "Unreserved characters"
81             ss << '%' << hexits[(c >> 4) & 0xF] << hexits[c & 0xF];
82         }
83     }
84 }
85 
uriDecode(StringData toDecode)86 mongo::StatusWith<std::string> mongo::uriDecode(StringData toDecode) {
87     StringBuilder out;
88     for (size_t i = 0; i < toDecode.size(); ++i) {
89         const auto c = toDecode[i];
90         if (c == '%') {
91             if (i + 2 > toDecode.size()) {
92                 return Status(ErrorCodes::FailedToParse,
93                               "Encountered partial escape sequence at end of string");
94             }
95             auto swHex = fromHex(toDecode.substr(i + 1, 2));
96             if (!swHex.isOK()) {
97                 return swHex.getStatus();
98             }
99             out << swHex.getValue();
100             i += 2;
101         } else {
102             out << c;
103         }
104     }
105     return out.str();
106 }
107 
108 namespace mongo {
109 
110 namespace {
111 
112 constexpr StringData kURIPrefix = "mongodb://"_sd;
113 constexpr StringData kURISRVPrefix = "mongodb+srv://"_sd;
114 constexpr StringData kDefaultMongoHost = "127.0.0.1:27017"_sd;
115 
116 /**
117  * Helper Method for MongoURI::parse() to split a string into exactly 2 pieces by a char
118  * delimeter.
119  */
partitionForward(StringData str,const char c)120 std::pair<StringData, StringData> partitionForward(StringData str, const char c) {
121     const auto delim = str.find(c);
122     if (delim == std::string::npos) {
123         return {str, StringData()};
124     }
125     return {str.substr(0, delim), str.substr(delim + 1)};
126 }
127 
128 /**
129  * Helper method for MongoURI::parse() to split a string into exactly 2 pieces by a char
130  * delimiter searching backward from the end of the string.
131  */
partitionBackward(StringData str,const char c)132 std::pair<StringData, StringData> partitionBackward(StringData str, const char c) {
133     const auto delim = str.rfind(c);
134     if (delim == std::string::npos) {
135         return {StringData(), str};
136     }
137     return {str.substr(0, delim), str.substr(delim + 1)};
138 }
139 
140 /**
141  * Breakout method for parsing application/x-www-form-urlencoded option pairs
142  *
143  * foo=bar&baz=qux&...
144  *
145  * A `std::map<std::string, std::string>` is returned, to facilitate setwise operations from the STL
146  * on multiple parsed option sources.  STL setwise operations require sorted lists.  A map is used
147  * instead of a vector of pairs to permit insertion-is-not-overwrite behavior.
148  */
parseOptions(StringData options,StringData url)149 std::map<std::string, std::string> parseOptions(StringData options, StringData url) {
150     std::map<std::string, std::string> ret;
151     if (options.empty()) {
152         return ret;
153     }
154 
155     if (options.find('?') != std::string::npos) {
156         uasserted(
157             ErrorCodes::FailedToParse,
158             str::stream() << "URI Cannot Contain multiple questions marks for mongodb:// URL: "
159                           << url);
160     }
161 
162     const auto optionsStr = options.toString();
163     for (auto i =
164              boost::make_split_iterator(optionsStr, boost::first_finder("&", boost::is_iequal()));
165          i != std::remove_reference<decltype((i))>::type{};
166          ++i) {
167         const auto opt = boost::copy_range<std::string>(*i);
168         if (opt.empty()) {
169             uasserted(ErrorCodes::FailedToParse,
170                       str::stream()
171                           << "Missing a key/value pair in the options for mongodb:// URL: "
172                           << url);
173         }
174 
175         const auto kvPair = partitionForward(opt, '=');
176         const auto keyRaw = kvPair.first;
177         if (keyRaw.empty()) {
178             uasserted(ErrorCodes::FailedToParse,
179                       str::stream()
180                           << "Missing a key for key/value pair in the options for mongodb:// URL: "
181                           << url);
182         }
183         const auto key = uriDecode(keyRaw);
184         if (!key.isOK()) {
185             uasserted(
186                 ErrorCodes::FailedToParse,
187                 str::stream() << "Key '" << keyRaw
188                               << "' in options cannot properly be URL decoded for mongodb:// URL: "
189                               << url);
190         }
191         const auto valRaw = kvPair.second;
192         if (valRaw.empty()) {
193             uasserted(ErrorCodes::FailedToParse,
194                       str::stream() << "Missing value for key '" << keyRaw
195                                     << "' in the options for mongodb:// URL: "
196                                     << url);
197         }
198         const auto val = uriDecode(valRaw);
199         if (!val.isOK()) {
200             uasserted(
201                 ErrorCodes::FailedToParse,
202                 str::stream() << "Value '" << valRaw << "' for key '" << keyRaw
203                               << "' in options cannot properly be URL decoded for mongodb:// URL: "
204                               << url);
205         }
206 
207         ret[key.getValue()] = val.getValue();
208     }
209 
210     return ret;
211 }
212 
addTXTOptions(std::map<std::string,std::string> options,const std::string & host,const StringData url,const bool isSeedlist)213 MongoURI::OptionsMap addTXTOptions(std::map<std::string, std::string> options,
214                                    const std::string& host,
215                                    const StringData url,
216                                    const bool isSeedlist) {
217     // If there is no seedlist mode, then don't add any TXT options.
218     if (!isSeedlist)
219         return options;
220     options.insert({"ssl", "true"});
221 
222     // Get all TXT records and parse them as options, adding them to the options set.
223     auto txtRecords = dns::getTXTRecords(host);
224     if (txtRecords.empty()) {
225         return {std::make_move_iterator(begin(options)), std::make_move_iterator(end(options))};
226     }
227 
228     if (txtRecords.size() > 1) {
229         uasserted(ErrorCodes::FailedToParse, "Encountered multiple TXT records for: "s + url);
230     }
231 
232     auto txtOptions = parseOptions(txtRecords.front(), url);
233     if (!std::includes(
234             begin(permittedTXTOptions),
235             end(permittedTXTOptions),
236             begin(stdx::as_const(txtOptions)),
237             end(stdx::as_const(txtOptions)),
238             [](const auto& lhs, const auto& rhs) { return std::get<0>(lhs) < std::get<0>(rhs); })) {
239         uasserted(ErrorCodes::FailedToParse, "Encountered invalid options in TXT record.");
240     }
241 
242     options.insert(std::make_move_iterator(begin(txtOptions)),
243                    std::make_move_iterator(end(txtOptions)));
244 
245     return {std::make_move_iterator(begin(options)), std::make_move_iterator(end(options))};
246 }
247 
248 // Contains the parts of a MongoURI as unowned StringData's. Any code that needs to break up
249 // URIs into their basic components without fully parsing them can use this struct.
250 // Internally, MongoURI uses this to do basic parsing of the input URI string.
251 struct URIParts {
252     explicit URIParts(StringData uri);
253     StringData scheme;
254     StringData username;
255     StringData password;
256     StringData hostIdentifiers;
257     StringData database;
258     StringData options;
259 };
260 
URIParts(StringData uri)261 URIParts::URIParts(StringData uri) {
262     // 1. Strip off the scheme ("mongo://")
263     auto schemeEnd = uri.find("://");
264     if (schemeEnd == std::string::npos) {
265         uasserted(ErrorCodes::FailedToParse,
266                   str::stream() << "URI must begin with " << kURIPrefix << " or " << kURISRVPrefix
267                                 << ": "
268                                 << uri);
269     }
270     const auto uriWithoutPrefix = uri.substr(schemeEnd + 3);
271     scheme = uri.substr(0, schemeEnd);
272 
273     // 2. Split the string by the first, unescaped / (if any), yielding:
274     // split[0]: User information and host identifers
275     // split[1]: Auth database and connection options
276     const auto userAndDb = partitionForward(uriWithoutPrefix, '/');
277     const auto userAndHostInfo = userAndDb.first;
278 
279     // 2.b Make sure that there are no question marks in the left side of the /
280     //     as any options after the ? must still have the / delimeter
281     if (userAndDb.second.empty() && userAndHostInfo.find('?') != std::string::npos) {
282         uasserted(
283             ErrorCodes::FailedToParse,
284             str::stream()
285                 << "URI must contain slash delimeter between hosts and options for mongodb:// URL: "
286                 << uri);
287     }
288 
289     // 3. Split the user information and host identifiers string by the last, unescaped @,
290     const auto userAndHost = partitionBackward(userAndHostInfo, '@');
291     const auto userInfo = userAndHost.first;
292     hostIdentifiers = userAndHost.second;
293 
294     // 4. Split up the username and password
295     const auto userAndPass = partitionForward(userInfo, ':');
296     username = userAndPass.first;
297     password = userAndPass.second;
298 
299     // 5. Split the database name from the list of options
300     const auto databaseAndOptions = partitionForward(userAndDb.second, '?');
301     database = databaseAndOptions.first;
302     options = databaseAndOptions.second;
303 }
304 }  // namespace
305 
isMongoURI(StringData uri)306 bool MongoURI::isMongoURI(StringData uri) {
307     return (uri.startsWith(kURIPrefix) || uri.startsWith(kURISRVPrefix));
308 }
309 
redact(StringData url)310 std::string MongoURI::redact(StringData url) {
311     uassert(50892, "String passed to MongoURI::redact wasn't a MongoURI", isMongoURI(url));
312     URIParts parts(url);
313     std::ostringstream out;
314 
315     out << parts.scheme << "://";
316     if (!parts.username.empty()) {
317         out << parts.username << "@";
318     }
319     out << parts.hostIdentifiers;
320     if (!parts.database.empty()) {
321         out << "/" << parts.database;
322     }
323 
324     return out.str();
325 }
326 
parseImpl(const std::string & url)327 MongoURI MongoURI::parseImpl(const std::string& url) {
328     const StringData urlSD(url);
329 
330     // 1. Validate and remove the scheme prefix `mongodb://` or `mongodb+srv://`
331     const bool isSeedlist = urlSD.startsWith(kURISRVPrefix);
332     if (!(urlSD.startsWith(kURIPrefix) || isSeedlist)) {
333         return MongoURI(uassertStatusOK(ConnectionString::parse(url)));
334     }
335 
336     // 2. Split up the URI into its components for further parsing and validation
337     URIParts parts(url);
338     const auto hostIdentifiers = parts.hostIdentifiers;
339     const auto usernameSD = parts.username;
340     const auto passwordSD = parts.password;
341     const auto databaseSD = parts.database;
342     const auto connectionOptions = parts.options;
343 
344     // 3. URI decode and validate the username/password
345     const auto containsColonOrAt = [](StringData str) {
346         return (str.find(':') != std::string::npos) || (str.find('@') != std::string::npos);
347     };
348 
349     if (containsColonOrAt(usernameSD)) {
350         uasserted(ErrorCodes::FailedToParse,
351                   str::stream() << "Username must be URL Encoded for mongodb:// URL: " << url);
352     }
353 
354     if (containsColonOrAt(passwordSD)) {
355         uasserted(ErrorCodes::FailedToParse,
356                   str::stream() << "Password must be URL Encoded for mongodb:// URL: " << url);
357     }
358 
359     // Get the username and make sure it did not fail to decode
360     const auto usernameWithStatus = uriDecode(usernameSD);
361     if (!usernameWithStatus.isOK()) {
362         uasserted(ErrorCodes::FailedToParse,
363                   str::stream() << "Username cannot properly be URL decoded for mongodb:// URL: "
364                                 << url);
365     }
366     const auto username = usernameWithStatus.getValue();
367 
368     // Get the password and make sure it did not fail to decode
369     const auto passwordWithStatus = uriDecode(passwordSD);
370     if (!passwordWithStatus.isOK())
371         uasserted(ErrorCodes::FailedToParse,
372                   str::stream() << "Password cannot properly be URL decoded for mongodb:// URL: "
373                                 << url);
374     const auto password = passwordWithStatus.getValue();
375 
376     // 4. Validate, split, and URL decode the host identifiers.
377     const auto hostIdentifiersStr = hostIdentifiers.toString();
378     std::vector<HostAndPort> servers;
379     for (auto i = boost::make_split_iterator(hostIdentifiersStr,
380                                              boost::first_finder(",", boost::is_iequal()));
381          i != std::remove_reference<decltype((i))>::type{};
382          ++i) {
383         const auto hostWithStatus = uriDecode(boost::copy_range<std::string>(*i));
384         if (!hostWithStatus.isOK()) {
385             uasserted(ErrorCodes::FailedToParse,
386                       str::stream() << "Host cannot properly be URL decoded for mongodb:// URL: "
387                                     << url);
388         }
389 
390         const auto host = hostWithStatus.getValue();
391         if (host.empty()) {
392             continue;
393         }
394 
395         if ((host.find('/') != std::string::npos) && !StringData(host).endsWith(".sock")) {
396             uasserted(
397                 ErrorCodes::FailedToParse,
398                 str::stream() << "'" << host << "' in '" << url
399                               << "' appears to be a unix socket, but does not end in '.sock'");
400         }
401 
402         servers.push_back(uassertStatusOK(HostAndPort::parse(host)));
403     }
404     if (servers.empty()) {
405         uasserted(ErrorCodes::FailedToParse, "No server(s) specified");
406     }
407 
408     const std::string canonicalHost = servers.front().host();
409     // If we're in seedlist mode, lookup the SRV record for `_mongodb._tcp` on the specified
410     // domain name.  Take that list of servers as the new list of servers.
411     if (isSeedlist) {
412         if (servers.size() > 1) {
413             uasserted(ErrorCodes::FailedToParse,
414                       "Only a single server may be specified with a mongo+srv:// url.");
415         }
416 
417         const mongo::dns::HostName host(canonicalHost);
418 
419         if (host.nameComponents().size() < 3) {
420             uasserted(ErrorCodes::FailedToParse,
421                       "A server specified with a mongo+srv:// url must have at least 3 hostname "
422                       "components separated by dots ('.')");
423         }
424 
425         const mongo::dns::HostName srvSubdomain("_mongodb._tcp");
426 
427         const auto srvEntries =
428             dns::lookupSRVRecords(srvSubdomain.resolvedIn(host).canonicalName());
429 
430         auto makeFQDN = [](dns::HostName hostName) {
431             hostName.forceQualification();
432             return hostName;
433         };
434 
435         const mongo::dns::HostName domain = makeFQDN(host.parentDomain());
436         servers.clear();
437         using std::begin;
438         using std::end;
439         std::transform(
440             begin(srvEntries), end(srvEntries), back_inserter(servers), [&domain](auto&& srv) {
441                 const dns::HostName target(srv.host);  // FQDN
442 
443                 if (!domain.contains(target)) {
444                     uasserted(ErrorCodes::FailedToParse,
445                               str::stream() << "Hostname " << target << " is not within the domain "
446                                             << domain);
447                 }
448                 return HostAndPort(srv.host, srv.port);
449             });
450     }
451 
452     // 5. Decode the database name
453     const auto databaseWithStatus = uriDecode(databaseSD);
454     if (!databaseWithStatus.isOK()) {
455         uasserted(ErrorCodes::FailedToParse,
456                   str::stream() << "Database name cannot properly be URL "
457                                    "decoded for mongodb:// URL: "
458                                 << url);
459     }
460     const auto database = databaseWithStatus.getValue();
461 
462     // 6. Validate the database contains no prohibited characters
463     // Prohibited characters:
464     // slash ("/"), backslash ("\"), space (" "), double-quote ("""), or dollar sign ("$")
465     // period (".") is also prohibited, but drivers MAY allow periods
466     if (!database.empty() &&
467         !NamespaceString::validDBName(database,
468                                       NamespaceString::DollarInDbNameBehavior::Disallow)) {
469         uasserted(ErrorCodes::FailedToParse,
470                   str::stream() << "Database name cannot have reserved "
471                                    "characters for mongodb:// URL: "
472                                 << url);
473     }
474 
475     // 7. Validate, split, and URL decode the connection options
476     auto options =
477         addTXTOptions(parseOptions(connectionOptions, url), canonicalHost, url, isSeedlist);
478 
479     // If a replica set option was specified, store it in the 'setName' field.
480     auto optIter = options.find("replicaSet");
481     std::string setName;
482     if (optIter != end(options)) {
483         setName = optIter->second;
484         invariant(!setName.empty());
485     }
486 
487     boost::optional<bool> retryWrites = boost::none;
488     optIter = options.find("retryWrites");
489     if (optIter != end(options)) {
490         if (optIter->second == "true") {
491             retryWrites.reset(true);
492         } else if (optIter->second == "false") {
493             retryWrites.reset(false);
494         } else {
495             uasserted(ErrorCodes::FailedToParse,
496                       str::stream() << "retryWrites must be either \"true\" or \"false\"");
497         }
498     }
499 
500     ConnectionString cs(
501         setName.empty() ? ConnectionString::MASTER : ConnectionString::SET, servers, setName);
502     return MongoURI(
503         std::move(cs), username, password, database, std::move(retryWrites), std::move(options));
504 }
505 
parse(const std::string & url)506 StatusWith<MongoURI> MongoURI::parse(const std::string& url) try {
507     return parseImpl(url);
508 } catch (const std::exception&) {
509     return exceptionToStatus();
510 }
511 
getAppName() const512 const boost::optional<std::string> MongoURI::getAppName() const {
513     const auto optIter = _options.find("appName");
514     if (optIter != end(_options)) {
515         return optIter->second;
516     }
517     return boost::none;
518 }
519 
canonicalizeURIAsString() const520 std::string MongoURI::canonicalizeURIAsString() const {
521     StringBuilder uri;
522     uri << kURIPrefix;
523     if (!_user.empty()) {
524         uri << uriEncode(_user);
525         if (!_password.empty()) {
526             uri << ":" << uriEncode(_password);
527         }
528         uri << "@";
529     }
530 
531     const auto& servers = _connectString.getServers();
532     if (!servers.empty()) {
533         auto delimeter = "";
534         for (auto& hostAndPort : servers) {
535             if (boost::count(hostAndPort.host(), ':') > 1) {
536                 uri << delimeter << "[" << uriEncode(hostAndPort.host()) << "]"
537                     << ":" << uriEncode(std::to_string(hostAndPort.port()));
538             } else if (StringData(hostAndPort.host()).endsWith(".sock")) {
539                 uri << delimeter << uriEncode(hostAndPort.host());
540             } else {
541                 uri << delimeter << uriEncode(hostAndPort.host()) << ":"
542                     << uriEncode(std::to_string(hostAndPort.port()));
543             }
544             delimeter = ",";
545         }
546     } else {
547         uri << kDefaultMongoHost;
548     }
549 
550     uri << "/";
551     if (!_database.empty()) {
552         uri << uriEncode(_database);
553     }
554 
555     if (!_options.empty()) {
556         auto delimeter = "";
557         uri << "?";
558         for (const auto& pair : _options) {
559             uri << delimeter << uriEncode(pair.first) << "=" << uriEncode(pair.second);
560             delimeter = "&";
561         }
562     }
563     return uri.str();
564 }
565 }  // namespace mongo
566