1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kNetwork
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/client/mongo_uri.h"
36
37 #include <utility>
38
39 #include <boost/algorithm/string/case_conv.hpp>
40 #include <boost/algorithm/string/classification.hpp>
41 #include <boost/algorithm/string/find_iterator.hpp>
42 #include <boost/algorithm/string/predicate.hpp>
43 #include <boost/range/algorithm/count.hpp>
44
45 #include "mongo/base/status_with.h"
46 #include "mongo/bson/bsonobjbuilder.h"
47 #include "mongo/client/dbclientinterface.h"
48 #include "mongo/client/sasl_client_authenticate.h"
49 #include "mongo/db/namespace_string.h"
50 #include "mongo/stdx/utility.h"
51 #include "mongo/util/dns_name.h"
52 #include "mongo/util/dns_query.h"
53 #include "mongo/util/hex.h"
54 #include "mongo/util/mongoutils/str.h"
55
56 using namespace std::literals::string_literals;
57
58 namespace {
59 constexpr std::array<char, 16> hexits{
60 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
61
62 // This vector must remain sorted. It is over pairs to facilitate a call to `std::includes` using
63 // a `std::map<std::string, std::string>` as the other parameter.
64 const std::vector<std::pair<std::string, std::string>> permittedTXTOptions = {{"authSource"s, ""s},
65 {"replicaSet"s, ""s}};
66 } // namespace
67
68 /**
69 * RFC 3986 Section 2.1 - Percent Encoding
70 *
71 * Encode data elements in a way which will allow them to be embedded
72 * into a mongodb:// URI safely.
73 */
uriEncode(std::ostream & ss,StringData toEncode,StringData passthrough)74 void mongo::uriEncode(std::ostream& ss, StringData toEncode, StringData passthrough) {
75 for (const auto& c : toEncode) {
76 if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || isalnum(c) ||
77 (passthrough.find(c) != std::string::npos)) {
78 ss << c;
79 } else {
80 // Encoding anything not included in section 2.3 "Unreserved characters"
81 ss << '%' << hexits[(c >> 4) & 0xF] << hexits[c & 0xF];
82 }
83 }
84 }
85
uriDecode(StringData toDecode)86 mongo::StatusWith<std::string> mongo::uriDecode(StringData toDecode) {
87 StringBuilder out;
88 for (size_t i = 0; i < toDecode.size(); ++i) {
89 const auto c = toDecode[i];
90 if (c == '%') {
91 if (i + 2 > toDecode.size()) {
92 return Status(ErrorCodes::FailedToParse,
93 "Encountered partial escape sequence at end of string");
94 }
95 auto swHex = fromHex(toDecode.substr(i + 1, 2));
96 if (!swHex.isOK()) {
97 return swHex.getStatus();
98 }
99 out << swHex.getValue();
100 i += 2;
101 } else {
102 out << c;
103 }
104 }
105 return out.str();
106 }
107
108 namespace mongo {
109
110 namespace {
111
112 constexpr StringData kURIPrefix = "mongodb://"_sd;
113 constexpr StringData kURISRVPrefix = "mongodb+srv://"_sd;
114 constexpr StringData kDefaultMongoHost = "127.0.0.1:27017"_sd;
115
116 /**
117 * Helper Method for MongoURI::parse() to split a string into exactly 2 pieces by a char
118 * delimeter.
119 */
partitionForward(StringData str,const char c)120 std::pair<StringData, StringData> partitionForward(StringData str, const char c) {
121 const auto delim = str.find(c);
122 if (delim == std::string::npos) {
123 return {str, StringData()};
124 }
125 return {str.substr(0, delim), str.substr(delim + 1)};
126 }
127
128 /**
129 * Helper method for MongoURI::parse() to split a string into exactly 2 pieces by a char
130 * delimiter searching backward from the end of the string.
131 */
partitionBackward(StringData str,const char c)132 std::pair<StringData, StringData> partitionBackward(StringData str, const char c) {
133 const auto delim = str.rfind(c);
134 if (delim == std::string::npos) {
135 return {StringData(), str};
136 }
137 return {str.substr(0, delim), str.substr(delim + 1)};
138 }
139
140 /**
141 * Breakout method for parsing application/x-www-form-urlencoded option pairs
142 *
143 * foo=bar&baz=qux&...
144 *
145 * A `std::map<std::string, std::string>` is returned, to facilitate setwise operations from the STL
146 * on multiple parsed option sources. STL setwise operations require sorted lists. A map is used
147 * instead of a vector of pairs to permit insertion-is-not-overwrite behavior.
148 */
parseOptions(StringData options,StringData url)149 std::map<std::string, std::string> parseOptions(StringData options, StringData url) {
150 std::map<std::string, std::string> ret;
151 if (options.empty()) {
152 return ret;
153 }
154
155 if (options.find('?') != std::string::npos) {
156 uasserted(
157 ErrorCodes::FailedToParse,
158 str::stream() << "URI Cannot Contain multiple questions marks for mongodb:// URL: "
159 << url);
160 }
161
162 const auto optionsStr = options.toString();
163 for (auto i =
164 boost::make_split_iterator(optionsStr, boost::first_finder("&", boost::is_iequal()));
165 i != std::remove_reference<decltype((i))>::type{};
166 ++i) {
167 const auto opt = boost::copy_range<std::string>(*i);
168 if (opt.empty()) {
169 uasserted(ErrorCodes::FailedToParse,
170 str::stream()
171 << "Missing a key/value pair in the options for mongodb:// URL: "
172 << url);
173 }
174
175 const auto kvPair = partitionForward(opt, '=');
176 const auto keyRaw = kvPair.first;
177 if (keyRaw.empty()) {
178 uasserted(ErrorCodes::FailedToParse,
179 str::stream()
180 << "Missing a key for key/value pair in the options for mongodb:// URL: "
181 << url);
182 }
183 const auto key = uriDecode(keyRaw);
184 if (!key.isOK()) {
185 uasserted(
186 ErrorCodes::FailedToParse,
187 str::stream() << "Key '" << keyRaw
188 << "' in options cannot properly be URL decoded for mongodb:// URL: "
189 << url);
190 }
191 const auto valRaw = kvPair.second;
192 if (valRaw.empty()) {
193 uasserted(ErrorCodes::FailedToParse,
194 str::stream() << "Missing value for key '" << keyRaw
195 << "' in the options for mongodb:// URL: "
196 << url);
197 }
198 const auto val = uriDecode(valRaw);
199 if (!val.isOK()) {
200 uasserted(
201 ErrorCodes::FailedToParse,
202 str::stream() << "Value '" << valRaw << "' for key '" << keyRaw
203 << "' in options cannot properly be URL decoded for mongodb:// URL: "
204 << url);
205 }
206
207 ret[key.getValue()] = val.getValue();
208 }
209
210 return ret;
211 }
212
addTXTOptions(std::map<std::string,std::string> options,const std::string & host,const StringData url,const bool isSeedlist)213 MongoURI::OptionsMap addTXTOptions(std::map<std::string, std::string> options,
214 const std::string& host,
215 const StringData url,
216 const bool isSeedlist) {
217 // If there is no seedlist mode, then don't add any TXT options.
218 if (!isSeedlist)
219 return options;
220 options.insert({"ssl", "true"});
221
222 // Get all TXT records and parse them as options, adding them to the options set.
223 auto txtRecords = dns::getTXTRecords(host);
224 if (txtRecords.empty()) {
225 return {std::make_move_iterator(begin(options)), std::make_move_iterator(end(options))};
226 }
227
228 if (txtRecords.size() > 1) {
229 uasserted(ErrorCodes::FailedToParse, "Encountered multiple TXT records for: "s + url);
230 }
231
232 auto txtOptions = parseOptions(txtRecords.front(), url);
233 if (!std::includes(
234 begin(permittedTXTOptions),
235 end(permittedTXTOptions),
236 begin(stdx::as_const(txtOptions)),
237 end(stdx::as_const(txtOptions)),
238 [](const auto& lhs, const auto& rhs) { return std::get<0>(lhs) < std::get<0>(rhs); })) {
239 uasserted(ErrorCodes::FailedToParse, "Encountered invalid options in TXT record.");
240 }
241
242 options.insert(std::make_move_iterator(begin(txtOptions)),
243 std::make_move_iterator(end(txtOptions)));
244
245 return {std::make_move_iterator(begin(options)), std::make_move_iterator(end(options))};
246 }
247
248 // Contains the parts of a MongoURI as unowned StringData's. Any code that needs to break up
249 // URIs into their basic components without fully parsing them can use this struct.
250 // Internally, MongoURI uses this to do basic parsing of the input URI string.
251 struct URIParts {
252 explicit URIParts(StringData uri);
253 StringData scheme;
254 StringData username;
255 StringData password;
256 StringData hostIdentifiers;
257 StringData database;
258 StringData options;
259 };
260
URIParts(StringData uri)261 URIParts::URIParts(StringData uri) {
262 // 1. Strip off the scheme ("mongo://")
263 auto schemeEnd = uri.find("://");
264 if (schemeEnd == std::string::npos) {
265 uasserted(ErrorCodes::FailedToParse,
266 str::stream() << "URI must begin with " << kURIPrefix << " or " << kURISRVPrefix
267 << ": "
268 << uri);
269 }
270 const auto uriWithoutPrefix = uri.substr(schemeEnd + 3);
271 scheme = uri.substr(0, schemeEnd);
272
273 // 2. Split the string by the first, unescaped / (if any), yielding:
274 // split[0]: User information and host identifers
275 // split[1]: Auth database and connection options
276 const auto userAndDb = partitionForward(uriWithoutPrefix, '/');
277 const auto userAndHostInfo = userAndDb.first;
278
279 // 2.b Make sure that there are no question marks in the left side of the /
280 // as any options after the ? must still have the / delimeter
281 if (userAndDb.second.empty() && userAndHostInfo.find('?') != std::string::npos) {
282 uasserted(
283 ErrorCodes::FailedToParse,
284 str::stream()
285 << "URI must contain slash delimeter between hosts and options for mongodb:// URL: "
286 << uri);
287 }
288
289 // 3. Split the user information and host identifiers string by the last, unescaped @,
290 const auto userAndHost = partitionBackward(userAndHostInfo, '@');
291 const auto userInfo = userAndHost.first;
292 hostIdentifiers = userAndHost.second;
293
294 // 4. Split up the username and password
295 const auto userAndPass = partitionForward(userInfo, ':');
296 username = userAndPass.first;
297 password = userAndPass.second;
298
299 // 5. Split the database name from the list of options
300 const auto databaseAndOptions = partitionForward(userAndDb.second, '?');
301 database = databaseAndOptions.first;
302 options = databaseAndOptions.second;
303 }
304 } // namespace
305
isMongoURI(StringData uri)306 bool MongoURI::isMongoURI(StringData uri) {
307 return (uri.startsWith(kURIPrefix) || uri.startsWith(kURISRVPrefix));
308 }
309
redact(StringData url)310 std::string MongoURI::redact(StringData url) {
311 uassert(50892, "String passed to MongoURI::redact wasn't a MongoURI", isMongoURI(url));
312 URIParts parts(url);
313 std::ostringstream out;
314
315 out << parts.scheme << "://";
316 if (!parts.username.empty()) {
317 out << parts.username << "@";
318 }
319 out << parts.hostIdentifiers;
320 if (!parts.database.empty()) {
321 out << "/" << parts.database;
322 }
323
324 return out.str();
325 }
326
parseImpl(const std::string & url)327 MongoURI MongoURI::parseImpl(const std::string& url) {
328 const StringData urlSD(url);
329
330 // 1. Validate and remove the scheme prefix `mongodb://` or `mongodb+srv://`
331 const bool isSeedlist = urlSD.startsWith(kURISRVPrefix);
332 if (!(urlSD.startsWith(kURIPrefix) || isSeedlist)) {
333 return MongoURI(uassertStatusOK(ConnectionString::parse(url)));
334 }
335
336 // 2. Split up the URI into its components for further parsing and validation
337 URIParts parts(url);
338 const auto hostIdentifiers = parts.hostIdentifiers;
339 const auto usernameSD = parts.username;
340 const auto passwordSD = parts.password;
341 const auto databaseSD = parts.database;
342 const auto connectionOptions = parts.options;
343
344 // 3. URI decode and validate the username/password
345 const auto containsColonOrAt = [](StringData str) {
346 return (str.find(':') != std::string::npos) || (str.find('@') != std::string::npos);
347 };
348
349 if (containsColonOrAt(usernameSD)) {
350 uasserted(ErrorCodes::FailedToParse,
351 str::stream() << "Username must be URL Encoded for mongodb:// URL: " << url);
352 }
353
354 if (containsColonOrAt(passwordSD)) {
355 uasserted(ErrorCodes::FailedToParse,
356 str::stream() << "Password must be URL Encoded for mongodb:// URL: " << url);
357 }
358
359 // Get the username and make sure it did not fail to decode
360 const auto usernameWithStatus = uriDecode(usernameSD);
361 if (!usernameWithStatus.isOK()) {
362 uasserted(ErrorCodes::FailedToParse,
363 str::stream() << "Username cannot properly be URL decoded for mongodb:// URL: "
364 << url);
365 }
366 const auto username = usernameWithStatus.getValue();
367
368 // Get the password and make sure it did not fail to decode
369 const auto passwordWithStatus = uriDecode(passwordSD);
370 if (!passwordWithStatus.isOK())
371 uasserted(ErrorCodes::FailedToParse,
372 str::stream() << "Password cannot properly be URL decoded for mongodb:// URL: "
373 << url);
374 const auto password = passwordWithStatus.getValue();
375
376 // 4. Validate, split, and URL decode the host identifiers.
377 const auto hostIdentifiersStr = hostIdentifiers.toString();
378 std::vector<HostAndPort> servers;
379 for (auto i = boost::make_split_iterator(hostIdentifiersStr,
380 boost::first_finder(",", boost::is_iequal()));
381 i != std::remove_reference<decltype((i))>::type{};
382 ++i) {
383 const auto hostWithStatus = uriDecode(boost::copy_range<std::string>(*i));
384 if (!hostWithStatus.isOK()) {
385 uasserted(ErrorCodes::FailedToParse,
386 str::stream() << "Host cannot properly be URL decoded for mongodb:// URL: "
387 << url);
388 }
389
390 const auto host = hostWithStatus.getValue();
391 if (host.empty()) {
392 continue;
393 }
394
395 if ((host.find('/') != std::string::npos) && !StringData(host).endsWith(".sock")) {
396 uasserted(
397 ErrorCodes::FailedToParse,
398 str::stream() << "'" << host << "' in '" << url
399 << "' appears to be a unix socket, but does not end in '.sock'");
400 }
401
402 servers.push_back(uassertStatusOK(HostAndPort::parse(host)));
403 }
404 if (servers.empty()) {
405 uasserted(ErrorCodes::FailedToParse, "No server(s) specified");
406 }
407
408 const std::string canonicalHost = servers.front().host();
409 // If we're in seedlist mode, lookup the SRV record for `_mongodb._tcp` on the specified
410 // domain name. Take that list of servers as the new list of servers.
411 if (isSeedlist) {
412 if (servers.size() > 1) {
413 uasserted(ErrorCodes::FailedToParse,
414 "Only a single server may be specified with a mongo+srv:// url.");
415 }
416
417 const mongo::dns::HostName host(canonicalHost);
418
419 if (host.nameComponents().size() < 3) {
420 uasserted(ErrorCodes::FailedToParse,
421 "A server specified with a mongo+srv:// url must have at least 3 hostname "
422 "components separated by dots ('.')");
423 }
424
425 const mongo::dns::HostName srvSubdomain("_mongodb._tcp");
426
427 const auto srvEntries =
428 dns::lookupSRVRecords(srvSubdomain.resolvedIn(host).canonicalName());
429
430 auto makeFQDN = [](dns::HostName hostName) {
431 hostName.forceQualification();
432 return hostName;
433 };
434
435 const mongo::dns::HostName domain = makeFQDN(host.parentDomain());
436 servers.clear();
437 using std::begin;
438 using std::end;
439 std::transform(
440 begin(srvEntries), end(srvEntries), back_inserter(servers), [&domain](auto&& srv) {
441 const dns::HostName target(srv.host); // FQDN
442
443 if (!domain.contains(target)) {
444 uasserted(ErrorCodes::FailedToParse,
445 str::stream() << "Hostname " << target << " is not within the domain "
446 << domain);
447 }
448 return HostAndPort(srv.host, srv.port);
449 });
450 }
451
452 // 5. Decode the database name
453 const auto databaseWithStatus = uriDecode(databaseSD);
454 if (!databaseWithStatus.isOK()) {
455 uasserted(ErrorCodes::FailedToParse,
456 str::stream() << "Database name cannot properly be URL "
457 "decoded for mongodb:// URL: "
458 << url);
459 }
460 const auto database = databaseWithStatus.getValue();
461
462 // 6. Validate the database contains no prohibited characters
463 // Prohibited characters:
464 // slash ("/"), backslash ("\"), space (" "), double-quote ("""), or dollar sign ("$")
465 // period (".") is also prohibited, but drivers MAY allow periods
466 if (!database.empty() &&
467 !NamespaceString::validDBName(database,
468 NamespaceString::DollarInDbNameBehavior::Disallow)) {
469 uasserted(ErrorCodes::FailedToParse,
470 str::stream() << "Database name cannot have reserved "
471 "characters for mongodb:// URL: "
472 << url);
473 }
474
475 // 7. Validate, split, and URL decode the connection options
476 auto options =
477 addTXTOptions(parseOptions(connectionOptions, url), canonicalHost, url, isSeedlist);
478
479 // If a replica set option was specified, store it in the 'setName' field.
480 auto optIter = options.find("replicaSet");
481 std::string setName;
482 if (optIter != end(options)) {
483 setName = optIter->second;
484 invariant(!setName.empty());
485 }
486
487 boost::optional<bool> retryWrites = boost::none;
488 optIter = options.find("retryWrites");
489 if (optIter != end(options)) {
490 if (optIter->second == "true") {
491 retryWrites.reset(true);
492 } else if (optIter->second == "false") {
493 retryWrites.reset(false);
494 } else {
495 uasserted(ErrorCodes::FailedToParse,
496 str::stream() << "retryWrites must be either \"true\" or \"false\"");
497 }
498 }
499
500 ConnectionString cs(
501 setName.empty() ? ConnectionString::MASTER : ConnectionString::SET, servers, setName);
502 return MongoURI(
503 std::move(cs), username, password, database, std::move(retryWrites), std::move(options));
504 }
505
parse(const std::string & url)506 StatusWith<MongoURI> MongoURI::parse(const std::string& url) try {
507 return parseImpl(url);
508 } catch (const std::exception&) {
509 return exceptionToStatus();
510 }
511
getAppName() const512 const boost::optional<std::string> MongoURI::getAppName() const {
513 const auto optIter = _options.find("appName");
514 if (optIter != end(_options)) {
515 return optIter->second;
516 }
517 return boost::none;
518 }
519
canonicalizeURIAsString() const520 std::string MongoURI::canonicalizeURIAsString() const {
521 StringBuilder uri;
522 uri << kURIPrefix;
523 if (!_user.empty()) {
524 uri << uriEncode(_user);
525 if (!_password.empty()) {
526 uri << ":" << uriEncode(_password);
527 }
528 uri << "@";
529 }
530
531 const auto& servers = _connectString.getServers();
532 if (!servers.empty()) {
533 auto delimeter = "";
534 for (auto& hostAndPort : servers) {
535 if (boost::count(hostAndPort.host(), ':') > 1) {
536 uri << delimeter << "[" << uriEncode(hostAndPort.host()) << "]"
537 << ":" << uriEncode(std::to_string(hostAndPort.port()));
538 } else if (StringData(hostAndPort.host()).endsWith(".sock")) {
539 uri << delimeter << uriEncode(hostAndPort.host());
540 } else {
541 uri << delimeter << uriEncode(hostAndPort.host()) << ":"
542 << uriEncode(std::to_string(hostAndPort.port()));
543 }
544 delimeter = ",";
545 }
546 } else {
547 uri << kDefaultMongoHost;
548 }
549
550 uri << "/";
551 if (!_database.empty()) {
552 uri << uriEncode(_database);
553 }
554
555 if (!_options.empty()) {
556 auto delimeter = "";
557 uri << "?";
558 for (const auto& pair : _options) {
559 uri << delimeter << uriEncode(pair.first) << "=" << uriEncode(pair.second);
560 delimeter = "&";
561 }
562 }
563 return uri.str();
564 }
565 } // namespace mongo
566