1# Copyright 2011-present MongoDB, Inc. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you 4# may not use this file except in compliance with the License. You 5# may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12# implied. See the License for the specific language governing 13# permissions and limitations under the License. 14 15 16"""Tools to parse and validate a MongoDB URI.""" 17import re 18import warnings 19import sys 20 21from bson.py3compat import string_type, PY3 22 23if PY3: 24 from urllib.parse import unquote_plus 25else: 26 from urllib import unquote_plus 27 28from pymongo.common import ( 29 get_validated_options, INTERNAL_URI_OPTION_NAME_MAP, 30 URI_OPTIONS_DEPRECATION_MAP, _CaseInsensitiveDictionary) 31from pymongo.errors import ConfigurationError, InvalidURI 32from pymongo.srv_resolver import _HAVE_DNSPYTHON, _SrvResolver 33 34 35SCHEME = 'mongodb://' 36SCHEME_LEN = len(SCHEME) 37SRV_SCHEME = 'mongodb+srv://' 38SRV_SCHEME_LEN = len(SRV_SCHEME) 39DEFAULT_PORT = 27017 40 41 42def parse_userinfo(userinfo): 43 """Validates the format of user information in a MongoDB URI. 44 Reserved characters like ':', '/', '+' and '@' must be escaped 45 following RFC 3986. 46 47 Returns a 2-tuple containing the unescaped username followed 48 by the unescaped password. 49 50 :Paramaters: 51 - `userinfo`: A string of the form <username>:<password> 52 53 .. versionchanged:: 2.2 54 Now uses `urllib.unquote_plus` so `+` characters must be escaped. 55 """ 56 if '@' in userinfo or userinfo.count(':') > 1: 57 if PY3: 58 quote_fn = "urllib.parse.quote_plus" 59 else: 60 quote_fn = "urllib.quote_plus" 61 raise InvalidURI("Username and password must be escaped according to " 62 "RFC 3986, use %s()." % quote_fn) 63 user, _, passwd = userinfo.partition(":") 64 # No password is expected with GSSAPI authentication. 65 if not user: 66 raise InvalidURI("The empty string is not valid username.") 67 return unquote_plus(user), unquote_plus(passwd) 68 69 70def parse_ipv6_literal_host(entity, default_port): 71 """Validates an IPv6 literal host:port string. 72 73 Returns a 2-tuple of IPv6 literal followed by port where 74 port is default_port if it wasn't specified in entity. 75 76 :Parameters: 77 - `entity`: A string that represents an IPv6 literal enclosed 78 in braces (e.g. '[::1]' or '[::1]:27017'). 79 - `default_port`: The port number to use when one wasn't 80 specified in entity. 81 """ 82 if entity.find(']') == -1: 83 raise ValueError("an IPv6 address literal must be " 84 "enclosed in '[' and ']' according " 85 "to RFC 2732.") 86 i = entity.find(']:') 87 if i == -1: 88 return entity[1:-1], default_port 89 return entity[1: i], entity[i + 2:] 90 91 92def parse_host(entity, default_port=DEFAULT_PORT): 93 """Validates a host string 94 95 Returns a 2-tuple of host followed by port where port is default_port 96 if it wasn't specified in the string. 97 98 :Parameters: 99 - `entity`: A host or host:port string where host could be a 100 hostname or IP address. 101 - `default_port`: The port number to use when one wasn't 102 specified in entity. 103 """ 104 host = entity 105 port = default_port 106 if entity[0] == '[': 107 host, port = parse_ipv6_literal_host(entity, default_port) 108 elif entity.endswith(".sock"): 109 return entity, default_port 110 elif entity.find(':') != -1: 111 if entity.count(':') > 1: 112 raise ValueError("Reserved characters such as ':' must be " 113 "escaped according RFC 2396. An IPv6 " 114 "address literal must be enclosed in '[' " 115 "and ']' according to RFC 2732.") 116 host, port = host.split(':', 1) 117 if isinstance(port, string_type): 118 if not port.isdigit() or int(port) > 65535 or int(port) <= 0: 119 raise ValueError("Port must be an integer between 0 and 65535: %s" 120 % (port,)) 121 port = int(port) 122 123 # Normalize hostname to lowercase, since DNS is case-insensitive: 124 # http://tools.ietf.org/html/rfc4343 125 # This prevents useless rediscovery if "foo.com" is in the seed list but 126 # "FOO.com" is in the hello response. 127 return host.lower(), port 128 129 130# Options whose values are implicitly determined by tlsInsecure. 131_IMPLICIT_TLSINSECURE_OPTS = { 132 "tlsallowinvalidcertificates", 133 "tlsallowinvalidhostnames", 134 "tlsdisableocspendpointcheck",} 135 136# Options that cannot be specified when tlsInsecure is also specified. 137_TLSINSECURE_EXCLUDE_OPTS = ( 138 {k for k in _IMPLICIT_TLSINSECURE_OPTS} | 139 {INTERNAL_URI_OPTION_NAME_MAP[k] for k in _IMPLICIT_TLSINSECURE_OPTS}) 140 141 142def _parse_options(opts, delim): 143 """Helper method for split_options which creates the options dict. 144 Also handles the creation of a list for the URI tag_sets/ 145 readpreferencetags portion, and the use of a unicode options string.""" 146 options = _CaseInsensitiveDictionary() 147 for uriopt in opts.split(delim): 148 key, value = uriopt.split("=") 149 if key.lower() == 'readpreferencetags': 150 options.setdefault(key, []).append(value) 151 else: 152 if key in options: 153 warnings.warn("Duplicate URI option '%s'." % (key,)) 154 if key.lower() == 'authmechanismproperties': 155 val = value 156 else: 157 val = unquote_plus(value) 158 options[key] = val 159 160 return options 161 162 163def _handle_security_options(options): 164 """Raise appropriate errors when conflicting TLS options are present in 165 the options dictionary. 166 167 :Parameters: 168 - `options`: Instance of _CaseInsensitiveDictionary containing 169 MongoDB URI options. 170 """ 171 tlsinsecure = options.get('tlsinsecure') 172 if tlsinsecure is not None: 173 for opt in _TLSINSECURE_EXCLUDE_OPTS: 174 if opt in options: 175 err_msg = ("URI options %s and %s cannot be specified " 176 "simultaneously.") 177 raise InvalidURI(err_msg % ( 178 options.cased_key('tlsinsecure'), options.cased_key(opt))) 179 180 # Convenience function to retrieve option values based on public or private names. 181 def _getopt(opt): 182 return (options.get(opt) or 183 options.get(INTERNAL_URI_OPTION_NAME_MAP[opt])) 184 185 # Handle co-occurence of OCSP & tlsAllowInvalidCertificates options. 186 tlsallowinvalidcerts = _getopt('tlsallowinvalidcertificates') 187 if tlsallowinvalidcerts is not None: 188 if 'tlsdisableocspendpointcheck' in options: 189 err_msg = ("URI options %s and %s cannot be specified " 190 "simultaneously.") 191 raise InvalidURI(err_msg % ( 192 'tlsallowinvalidcertificates', options.cased_key( 193 'tlsdisableocspendpointcheck'))) 194 if tlsallowinvalidcerts is True: 195 options['tlsdisableocspendpointcheck'] = True 196 197 # Handle co-occurence of CRL and OCSP-related options. 198 tlscrlfile = _getopt('tlscrlfile') 199 if tlscrlfile is not None: 200 for opt in ('tlsinsecure', 'tlsallowinvalidcertificates', 201 'tlsdisableocspendpointcheck'): 202 if options.get(opt) is True: 203 err_msg = ("URI option %s=True cannot be specified when " 204 "CRL checking is enabled.") 205 raise InvalidURI(err_msg % (opt,)) 206 207 if 'ssl' in options and 'tls' in options: 208 def truth_value(val): 209 if val in ('true', 'false'): 210 return val == 'true' 211 if isinstance(val, bool): 212 return val 213 return val 214 if truth_value(options.get('ssl')) != truth_value(options.get('tls')): 215 err_msg = ("Can not specify conflicting values for URI options %s " 216 "and %s.") 217 raise InvalidURI(err_msg % ( 218 options.cased_key('ssl'), options.cased_key('tls'))) 219 220 return options 221 222 223def _handle_option_deprecations(options): 224 """Issue appropriate warnings when deprecated options are present in the 225 options dictionary. Removes deprecated option key, value pairs if the 226 options dictionary is found to also have the renamed option. 227 228 :Parameters: 229 - `options`: Instance of _CaseInsensitiveDictionary containing 230 MongoDB URI options. 231 """ 232 for optname in list(options): 233 if optname in URI_OPTIONS_DEPRECATION_MAP: 234 mode, message = URI_OPTIONS_DEPRECATION_MAP[optname] 235 if mode == 'renamed': 236 newoptname = message 237 if newoptname in options: 238 warn_msg = ("Deprecated option '%s' ignored in favor of " 239 "'%s'.") 240 warnings.warn( 241 warn_msg % (options.cased_key(optname), 242 options.cased_key(newoptname)), 243 DeprecationWarning, stacklevel=2) 244 options.pop(optname) 245 continue 246 warn_msg = "Option '%s' is deprecated, use '%s' instead." 247 warnings.warn( 248 warn_msg % (options.cased_key(optname), newoptname), 249 DeprecationWarning, stacklevel=2) 250 elif mode == 'removed': 251 warn_msg = "Option '%s' is deprecated. %s." 252 warnings.warn( 253 warn_msg % (options.cased_key(optname), message), 254 DeprecationWarning, stacklevel=2) 255 256 return options 257 258 259def _normalize_options(options): 260 """Normalizes option names in the options dictionary by converting them to 261 their internally-used names. Also handles use of the tlsInsecure option. 262 263 :Parameters: 264 - `options`: Instance of _CaseInsensitiveDictionary containing 265 MongoDB URI options. 266 """ 267 tlsinsecure = options.get('tlsinsecure') 268 if tlsinsecure is not None: 269 for opt in _IMPLICIT_TLSINSECURE_OPTS: 270 intname = INTERNAL_URI_OPTION_NAME_MAP[opt] 271 # Internal options are logical inverse of public options. 272 options[intname] = not tlsinsecure 273 274 for optname in list(options): 275 intname = INTERNAL_URI_OPTION_NAME_MAP.get(optname, None) 276 if intname is not None: 277 options[intname] = options.pop(optname) 278 279 return options 280 281 282def validate_options(opts, warn=False): 283 """Validates and normalizes options passed in a MongoDB URI. 284 285 Returns a new dictionary of validated and normalized options. If warn is 286 False then errors will be thrown for invalid options, otherwise they will 287 be ignored and a warning will be issued. 288 289 :Parameters: 290 - `opts`: A dict of MongoDB URI options. 291 - `warn` (optional): If ``True`` then warnings will be logged and 292 invalid options will be ignored. Otherwise invalid options will 293 cause errors. 294 """ 295 return get_validated_options(opts, warn) 296 297 298def split_options(opts, validate=True, warn=False, normalize=True): 299 """Takes the options portion of a MongoDB URI, validates each option 300 and returns the options in a dictionary. 301 302 :Parameters: 303 - `opt`: A string representing MongoDB URI options. 304 - `validate`: If ``True`` (the default), validate and normalize all 305 options. 306 - `warn`: If ``False`` (the default), suppress all warnings raised 307 during validation of options. 308 - `normalize`: If ``True`` (the default), renames all options to their 309 internally-used names. 310 """ 311 and_idx = opts.find("&") 312 semi_idx = opts.find(";") 313 try: 314 if and_idx >= 0 and semi_idx >= 0: 315 raise InvalidURI("Can not mix '&' and ';' for option separators.") 316 elif and_idx >= 0: 317 options = _parse_options(opts, "&") 318 elif semi_idx >= 0: 319 options = _parse_options(opts, ";") 320 elif opts.find("=") != -1: 321 options = _parse_options(opts, None) 322 else: 323 raise ValueError 324 except ValueError: 325 raise InvalidURI("MongoDB URI options are key=value pairs.") 326 327 options = _handle_security_options(options) 328 329 options = _handle_option_deprecations(options) 330 331 if validate: 332 options = validate_options(options, warn) 333 if options.get('authsource') == '': 334 raise InvalidURI( 335 "the authSource database cannot be an empty string") 336 337 if normalize: 338 options = _normalize_options(options) 339 340 return options 341 342 343def split_hosts(hosts, default_port=DEFAULT_PORT): 344 """Takes a string of the form host1[:port],host2[:port]... and 345 splits it into (host, port) tuples. If [:port] isn't present the 346 default_port is used. 347 348 Returns a set of 2-tuples containing the host name (or IP) followed by 349 port number. 350 351 :Parameters: 352 - `hosts`: A string of the form host1[:port],host2[:port],... 353 - `default_port`: The port number to use when one wasn't specified 354 for a host. 355 """ 356 nodes = [] 357 for entity in hosts.split(','): 358 if not entity: 359 raise ConfigurationError("Empty host " 360 "(or extra comma in host list).") 361 port = default_port 362 # Unix socket entities don't have ports 363 if entity.endswith('.sock'): 364 port = None 365 nodes.append(parse_host(entity, port)) 366 return nodes 367 368 369# Prohibited characters in database name. DB names also can't have ".", but for 370# backward-compat we allow "db.collection" in URI. 371_BAD_DB_CHARS = re.compile('[' + re.escape(r'/ "$') + ']') 372 373_ALLOWED_TXT_OPTS = frozenset( 374 ['authsource', 'authSource', 'replicaset', 'replicaSet', 'loadbalanced', 375 'loadBalanced']) 376 377 378def _check_options(nodes, options): 379 # Ensure directConnection was not True if there are multiple seeds. 380 if len(nodes) > 1 and options.get('directconnection'): 381 raise ConfigurationError( 382 'Cannot specify multiple hosts with directConnection=true') 383 384 if options.get('loadbalanced'): 385 if len(nodes) > 1: 386 raise ConfigurationError( 387 'Cannot specify multiple hosts with loadBalanced=true') 388 if options.get('directconnection'): 389 raise ConfigurationError( 390 'Cannot specify directConnection=true with loadBalanced=true') 391 if options.get('replicaset'): 392 raise ConfigurationError( 393 'Cannot specify replicaSet with loadBalanced=true') 394 395 396def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False, 397 normalize=True, connect_timeout=None): 398 """Parse and validate a MongoDB URI. 399 400 Returns a dict of the form:: 401 402 { 403 'nodelist': <list of (host, port) tuples>, 404 'username': <username> or None, 405 'password': <password> or None, 406 'database': <database name> or None, 407 'collection': <collection name> or None, 408 'options': <dict of MongoDB URI options>, 409 'fqdn': <fqdn of the MongoDB+SRV URI> or None 410 } 411 412 If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done 413 to build nodelist and options. 414 415 :Parameters: 416 - `uri`: The MongoDB URI to parse. 417 - `default_port`: The port number to use when one wasn't specified 418 for a host in the URI. 419 - `validate` (optional): If ``True`` (the default), validate and 420 normalize all options. Default: ``True``. 421 - `warn` (optional): When validating, if ``True`` then will warn 422 the user then ignore any invalid options or values. If ``False``, 423 validation will error when options are unsupported or values are 424 invalid. Default: ``False``. 425 - `normalize` (optional): If ``True``, convert names of URI options 426 to their internally-used names. Default: ``True``. 427 - `connect_timeout` (optional): The maximum time in milliseconds to 428 wait for a response from the DNS server. 429 430 .. versionchanged:: 3.9 431 Added the ``normalize`` parameter. 432 433 .. versionchanged:: 3.6 434 Added support for mongodb+srv:// URIs. 435 436 .. versionchanged:: 3.5 437 Return the original value of the ``readPreference`` MongoDB URI option 438 instead of the validated read preference mode. 439 440 .. versionchanged:: 3.1 441 ``warn`` added so invalid options can be ignored. 442 """ 443 if uri.startswith(SCHEME): 444 is_srv = False 445 scheme_free = uri[SCHEME_LEN:] 446 elif uri.startswith(SRV_SCHEME): 447 if not _HAVE_DNSPYTHON: 448 python_path = sys.executable or "python" 449 raise ConfigurationError( 450 'The "dnspython" module must be ' 451 'installed to use mongodb+srv:// URIs. ' 452 'To fix this error install pymongo with the srv extra:\n ' 453 '%s -m pip install "pymongo[srv]"' % (python_path)) 454 is_srv = True 455 scheme_free = uri[SRV_SCHEME_LEN:] 456 else: 457 raise InvalidURI("Invalid URI scheme: URI must " 458 "begin with '%s' or '%s'" % (SCHEME, SRV_SCHEME)) 459 460 if not scheme_free: 461 raise InvalidURI("Must provide at least one hostname or IP.") 462 463 user = None 464 passwd = None 465 dbase = None 466 collection = None 467 options = _CaseInsensitiveDictionary() 468 469 host_part, _, path_part = scheme_free.partition('/') 470 if not host_part: 471 host_part = path_part 472 path_part = "" 473 474 if not path_part and '?' in host_part: 475 raise InvalidURI("A '/' is required between " 476 "the host list and any options.") 477 478 if path_part: 479 dbase, _, opts = path_part.partition('?') 480 if dbase: 481 dbase = unquote_plus(dbase) 482 if '.' in dbase: 483 dbase, collection = dbase.split('.', 1) 484 if _BAD_DB_CHARS.search(dbase): 485 raise InvalidURI('Bad database name "%s"' % dbase) 486 else: 487 dbase = None 488 489 if opts: 490 options.update(split_options(opts, validate, warn, normalize)) 491 492 if '@' in host_part: 493 userinfo, _, hosts = host_part.rpartition('@') 494 user, passwd = parse_userinfo(userinfo) 495 else: 496 hosts = host_part 497 498 if '/' in hosts: 499 raise InvalidURI("Any '/' in a unix domain socket must be" 500 " percent-encoded: %s" % host_part) 501 502 hosts = unquote_plus(hosts) 503 fqdn = None 504 505 if is_srv: 506 if options.get('directConnection'): 507 raise ConfigurationError( 508 "Cannot specify directConnection=true with " 509 "%s URIs" % (SRV_SCHEME,)) 510 nodes = split_hosts(hosts, default_port=None) 511 if len(nodes) != 1: 512 raise InvalidURI( 513 "%s URIs must include one, " 514 "and only one, hostname" % (SRV_SCHEME,)) 515 fqdn, port = nodes[0] 516 if port is not None: 517 raise InvalidURI( 518 "%s URIs must not include a port number" % (SRV_SCHEME,)) 519 520 # Use the connection timeout. connectTimeoutMS passed as a keyword 521 # argument overrides the same option passed in the connection string. 522 connect_timeout = connect_timeout or options.get("connectTimeoutMS") 523 dns_resolver = _SrvResolver(fqdn, connect_timeout=connect_timeout) 524 nodes = dns_resolver.get_hosts() 525 dns_options = dns_resolver.get_options() 526 if dns_options: 527 parsed_dns_options = split_options( 528 dns_options, validate, warn, normalize) 529 if set(parsed_dns_options) - _ALLOWED_TXT_OPTS: 530 raise ConfigurationError( 531 "Only authSource, replicaSet, and loadBalanced are " 532 "supported from DNS") 533 for opt, val in parsed_dns_options.items(): 534 if opt not in options: 535 options[opt] = val 536 if "ssl" not in options: 537 options["ssl"] = True if validate else 'true' 538 else: 539 nodes = split_hosts(hosts, default_port=default_port) 540 541 _check_options(nodes, options) 542 543 return { 544 'nodelist': nodes, 545 'username': user, 546 'password': passwd, 547 'database': dbase, 548 'collection': collection, 549 'options': options, 550 'fqdn': fqdn 551 } 552 553 554if __name__ == '__main__': 555 import pprint 556 import sys 557 try: 558 pprint.pprint(parse_uri(sys.argv[1])) 559 except InvalidURI as exc: 560 print(exc) 561 sys.exit(0) 562