1# Copyright 2011-present MongoDB, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you
4# may not use this file except in compliance with the License.  You
5# may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12# implied.  See the License for the specific language governing
13# permissions and limitations under the License.
14
15
16"""Tools to parse and validate a MongoDB URI."""
17import re
18import warnings
19import sys
20
21from bson.py3compat import string_type, PY3
22
23if PY3:
24    from urllib.parse import unquote_plus
25else:
26    from urllib import unquote_plus
27
28from pymongo.common import (
29    get_validated_options, INTERNAL_URI_OPTION_NAME_MAP,
30    URI_OPTIONS_DEPRECATION_MAP, _CaseInsensitiveDictionary)
31from pymongo.errors import ConfigurationError, InvalidURI
32from pymongo.srv_resolver import _HAVE_DNSPYTHON, _SrvResolver
33
34
35SCHEME = 'mongodb://'
36SCHEME_LEN = len(SCHEME)
37SRV_SCHEME = 'mongodb+srv://'
38SRV_SCHEME_LEN = len(SRV_SCHEME)
39DEFAULT_PORT = 27017
40
41
42def parse_userinfo(userinfo):
43    """Validates the format of user information in a MongoDB URI.
44    Reserved characters like ':', '/', '+' and '@' must be escaped
45    following RFC 3986.
46
47    Returns a 2-tuple containing the unescaped username followed
48    by the unescaped password.
49
50    :Paramaters:
51        - `userinfo`: A string of the form <username>:<password>
52
53    .. versionchanged:: 2.2
54       Now uses `urllib.unquote_plus` so `+` characters must be escaped.
55    """
56    if '@' in userinfo or userinfo.count(':') > 1:
57        if PY3:
58            quote_fn = "urllib.parse.quote_plus"
59        else:
60            quote_fn = "urllib.quote_plus"
61        raise InvalidURI("Username and password must be escaped according to "
62                         "RFC 3986, use %s()." % quote_fn)
63    user, _, passwd = userinfo.partition(":")
64    # No password is expected with GSSAPI authentication.
65    if not user:
66        raise InvalidURI("The empty string is not valid username.")
67    return unquote_plus(user), unquote_plus(passwd)
68
69
70def parse_ipv6_literal_host(entity, default_port):
71    """Validates an IPv6 literal host:port string.
72
73    Returns a 2-tuple of IPv6 literal followed by port where
74    port is default_port if it wasn't specified in entity.
75
76    :Parameters:
77        - `entity`: A string that represents an IPv6 literal enclosed
78                    in braces (e.g. '[::1]' or '[::1]:27017').
79        - `default_port`: The port number to use when one wasn't
80                          specified in entity.
81    """
82    if entity.find(']') == -1:
83        raise ValueError("an IPv6 address literal must be "
84                         "enclosed in '[' and ']' according "
85                         "to RFC 2732.")
86    i = entity.find(']:')
87    if i == -1:
88        return entity[1:-1], default_port
89    return entity[1: i], entity[i + 2:]
90
91
92def parse_host(entity, default_port=DEFAULT_PORT):
93    """Validates a host string
94
95    Returns a 2-tuple of host followed by port where port is default_port
96    if it wasn't specified in the string.
97
98    :Parameters:
99        - `entity`: A host or host:port string where host could be a
100                    hostname or IP address.
101        - `default_port`: The port number to use when one wasn't
102                          specified in entity.
103    """
104    host = entity
105    port = default_port
106    if entity[0] == '[':
107        host, port = parse_ipv6_literal_host(entity, default_port)
108    elif entity.endswith(".sock"):
109        return entity, default_port
110    elif entity.find(':') != -1:
111        if entity.count(':') > 1:
112            raise ValueError("Reserved characters such as ':' must be "
113                             "escaped according RFC 2396. An IPv6 "
114                             "address literal must be enclosed in '[' "
115                             "and ']' according to RFC 2732.")
116        host, port = host.split(':', 1)
117    if isinstance(port, string_type):
118        if not port.isdigit() or int(port) > 65535 or int(port) <= 0:
119            raise ValueError("Port must be an integer between 0 and 65535: %s"
120                             % (port,))
121        port = int(port)
122
123    # Normalize hostname to lowercase, since DNS is case-insensitive:
124    # http://tools.ietf.org/html/rfc4343
125    # This prevents useless rediscovery if "foo.com" is in the seed list but
126    # "FOO.com" is in the hello response.
127    return host.lower(), port
128
129
130# Options whose values are implicitly determined by tlsInsecure.
131_IMPLICIT_TLSINSECURE_OPTS = {
132    "tlsallowinvalidcertificates",
133    "tlsallowinvalidhostnames",
134    "tlsdisableocspendpointcheck",}
135
136# Options that cannot be specified when tlsInsecure is also specified.
137_TLSINSECURE_EXCLUDE_OPTS = (
138        {k for k in _IMPLICIT_TLSINSECURE_OPTS} |
139        {INTERNAL_URI_OPTION_NAME_MAP[k] for k in _IMPLICIT_TLSINSECURE_OPTS})
140
141
142def _parse_options(opts, delim):
143    """Helper method for split_options which creates the options dict.
144    Also handles the creation of a list for the URI tag_sets/
145    readpreferencetags portion, and the use of a unicode options string."""
146    options = _CaseInsensitiveDictionary()
147    for uriopt in opts.split(delim):
148        key, value = uriopt.split("=")
149        if key.lower() == 'readpreferencetags':
150            options.setdefault(key, []).append(value)
151        else:
152            if key in options:
153                warnings.warn("Duplicate URI option '%s'." % (key,))
154            if key.lower() == 'authmechanismproperties':
155                val = value
156            else:
157                val = unquote_plus(value)
158            options[key] = val
159
160    return options
161
162
163def _handle_security_options(options):
164    """Raise appropriate errors when conflicting TLS options are present in
165    the options dictionary.
166
167    :Parameters:
168        - `options`: Instance of _CaseInsensitiveDictionary containing
169          MongoDB URI options.
170    """
171    tlsinsecure = options.get('tlsinsecure')
172    if tlsinsecure is not None:
173        for opt in _TLSINSECURE_EXCLUDE_OPTS:
174            if opt in options:
175                err_msg = ("URI options %s and %s cannot be specified "
176                           "simultaneously.")
177                raise InvalidURI(err_msg % (
178                    options.cased_key('tlsinsecure'), options.cased_key(opt)))
179
180    # Convenience function to retrieve option values based on public or private names.
181    def _getopt(opt):
182        return (options.get(opt) or
183                options.get(INTERNAL_URI_OPTION_NAME_MAP[opt]))
184
185    # Handle co-occurence of OCSP & tlsAllowInvalidCertificates options.
186    tlsallowinvalidcerts = _getopt('tlsallowinvalidcertificates')
187    if tlsallowinvalidcerts is not None:
188        if 'tlsdisableocspendpointcheck' in options:
189            err_msg = ("URI options %s and %s cannot be specified "
190                       "simultaneously.")
191            raise InvalidURI(err_msg % (
192                'tlsallowinvalidcertificates', options.cased_key(
193                    'tlsdisableocspendpointcheck')))
194        if tlsallowinvalidcerts is True:
195            options['tlsdisableocspendpointcheck'] = True
196
197    # Handle co-occurence of CRL and OCSP-related options.
198    tlscrlfile = _getopt('tlscrlfile')
199    if tlscrlfile is not None:
200        for opt in ('tlsinsecure', 'tlsallowinvalidcertificates',
201                    'tlsdisableocspendpointcheck'):
202            if options.get(opt) is True:
203                err_msg = ("URI option %s=True cannot be specified when "
204                           "CRL checking is enabled.")
205                raise InvalidURI(err_msg % (opt,))
206
207    if 'ssl' in options and 'tls' in options:
208        def truth_value(val):
209            if val in ('true', 'false'):
210                return val == 'true'
211            if isinstance(val, bool):
212                return val
213            return val
214        if truth_value(options.get('ssl')) != truth_value(options.get('tls')):
215            err_msg = ("Can not specify conflicting values for URI options %s "
216                      "and %s.")
217            raise InvalidURI(err_msg % (
218                options.cased_key('ssl'), options.cased_key('tls')))
219
220    return options
221
222
223def _handle_option_deprecations(options):
224    """Issue appropriate warnings when deprecated options are present in the
225    options dictionary. Removes deprecated option key, value pairs if the
226    options dictionary is found to also have the renamed option.
227
228    :Parameters:
229        - `options`: Instance of _CaseInsensitiveDictionary containing
230          MongoDB URI options.
231    """
232    for optname in list(options):
233        if optname in URI_OPTIONS_DEPRECATION_MAP:
234            mode, message = URI_OPTIONS_DEPRECATION_MAP[optname]
235            if mode == 'renamed':
236                newoptname = message
237                if newoptname in options:
238                    warn_msg = ("Deprecated option '%s' ignored in favor of "
239                                "'%s'.")
240                    warnings.warn(
241                        warn_msg % (options.cased_key(optname),
242                                    options.cased_key(newoptname)),
243                        DeprecationWarning, stacklevel=2)
244                    options.pop(optname)
245                    continue
246                warn_msg = "Option '%s' is deprecated, use '%s' instead."
247                warnings.warn(
248                    warn_msg % (options.cased_key(optname), newoptname),
249                    DeprecationWarning, stacklevel=2)
250            elif mode == 'removed':
251                warn_msg = "Option '%s' is deprecated. %s."
252                warnings.warn(
253                    warn_msg % (options.cased_key(optname), message),
254                    DeprecationWarning, stacklevel=2)
255
256    return options
257
258
259def _normalize_options(options):
260    """Normalizes option names in the options dictionary by converting them to
261    their internally-used names. Also handles use of the tlsInsecure option.
262
263    :Parameters:
264        - `options`: Instance of _CaseInsensitiveDictionary containing
265          MongoDB URI options.
266    """
267    tlsinsecure = options.get('tlsinsecure')
268    if tlsinsecure is not None:
269        for opt in _IMPLICIT_TLSINSECURE_OPTS:
270            intname = INTERNAL_URI_OPTION_NAME_MAP[opt]
271            # Internal options are logical inverse of public options.
272            options[intname] = not tlsinsecure
273
274    for optname in list(options):
275        intname = INTERNAL_URI_OPTION_NAME_MAP.get(optname, None)
276        if intname is not None:
277            options[intname] = options.pop(optname)
278
279    return options
280
281
282def validate_options(opts, warn=False):
283    """Validates and normalizes options passed in a MongoDB URI.
284
285    Returns a new dictionary of validated and normalized options. If warn is
286    False then errors will be thrown for invalid options, otherwise they will
287    be ignored and a warning will be issued.
288
289    :Parameters:
290        - `opts`: A dict of MongoDB URI options.
291        - `warn` (optional): If ``True`` then warnings will be logged and
292          invalid options will be ignored. Otherwise invalid options will
293          cause errors.
294    """
295    return get_validated_options(opts, warn)
296
297
298def split_options(opts, validate=True, warn=False, normalize=True):
299    """Takes the options portion of a MongoDB URI, validates each option
300    and returns the options in a dictionary.
301
302    :Parameters:
303        - `opt`: A string representing MongoDB URI options.
304        - `validate`: If ``True`` (the default), validate and normalize all
305          options.
306        - `warn`: If ``False`` (the default), suppress all warnings raised
307          during validation of options.
308        - `normalize`: If ``True`` (the default), renames all options to their
309          internally-used names.
310    """
311    and_idx = opts.find("&")
312    semi_idx = opts.find(";")
313    try:
314        if and_idx >= 0 and semi_idx >= 0:
315            raise InvalidURI("Can not mix '&' and ';' for option separators.")
316        elif and_idx >= 0:
317            options = _parse_options(opts, "&")
318        elif semi_idx >= 0:
319            options = _parse_options(opts, ";")
320        elif opts.find("=") != -1:
321            options = _parse_options(opts, None)
322        else:
323            raise ValueError
324    except ValueError:
325        raise InvalidURI("MongoDB URI options are key=value pairs.")
326
327    options = _handle_security_options(options)
328
329    options = _handle_option_deprecations(options)
330
331    if validate:
332        options = validate_options(options, warn)
333        if options.get('authsource') == '':
334            raise InvalidURI(
335                "the authSource database cannot be an empty string")
336
337    if normalize:
338        options = _normalize_options(options)
339
340    return options
341
342
343def split_hosts(hosts, default_port=DEFAULT_PORT):
344    """Takes a string of the form host1[:port],host2[:port]... and
345    splits it into (host, port) tuples. If [:port] isn't present the
346    default_port is used.
347
348    Returns a set of 2-tuples containing the host name (or IP) followed by
349    port number.
350
351    :Parameters:
352        - `hosts`: A string of the form host1[:port],host2[:port],...
353        - `default_port`: The port number to use when one wasn't specified
354          for a host.
355    """
356    nodes = []
357    for entity in hosts.split(','):
358        if not entity:
359            raise ConfigurationError("Empty host "
360                                     "(or extra comma in host list).")
361        port = default_port
362        # Unix socket entities don't have ports
363        if entity.endswith('.sock'):
364            port = None
365        nodes.append(parse_host(entity, port))
366    return nodes
367
368
369# Prohibited characters in database name. DB names also can't have ".", but for
370# backward-compat we allow "db.collection" in URI.
371_BAD_DB_CHARS = re.compile('[' + re.escape(r'/ "$') + ']')
372
373_ALLOWED_TXT_OPTS = frozenset(
374    ['authsource', 'authSource', 'replicaset', 'replicaSet', 'loadbalanced',
375     'loadBalanced'])
376
377
378def _check_options(nodes, options):
379    # Ensure directConnection was not True if there are multiple seeds.
380    if len(nodes) > 1 and options.get('directconnection'):
381        raise ConfigurationError(
382            'Cannot specify multiple hosts with directConnection=true')
383
384    if options.get('loadbalanced'):
385        if len(nodes) > 1:
386            raise ConfigurationError(
387                'Cannot specify multiple hosts with loadBalanced=true')
388        if options.get('directconnection'):
389            raise ConfigurationError(
390                'Cannot specify directConnection=true with loadBalanced=true')
391        if options.get('replicaset'):
392            raise ConfigurationError(
393                'Cannot specify replicaSet with loadBalanced=true')
394
395
396def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False,
397              normalize=True, connect_timeout=None):
398    """Parse and validate a MongoDB URI.
399
400    Returns a dict of the form::
401
402        {
403            'nodelist': <list of (host, port) tuples>,
404            'username': <username> or None,
405            'password': <password> or None,
406            'database': <database name> or None,
407            'collection': <collection name> or None,
408            'options': <dict of MongoDB URI options>,
409            'fqdn': <fqdn of the MongoDB+SRV URI> or None
410        }
411
412    If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done
413    to build nodelist and options.
414
415    :Parameters:
416        - `uri`: The MongoDB URI to parse.
417        - `default_port`: The port number to use when one wasn't specified
418          for a host in the URI.
419        - `validate` (optional): If ``True`` (the default), validate and
420          normalize all options. Default: ``True``.
421        - `warn` (optional): When validating, if ``True`` then will warn
422          the user then ignore any invalid options or values. If ``False``,
423          validation will error when options are unsupported or values are
424          invalid. Default: ``False``.
425        - `normalize` (optional): If ``True``, convert names of URI options
426          to their internally-used names. Default: ``True``.
427        - `connect_timeout` (optional): The maximum time in milliseconds to
428          wait for a response from the DNS server.
429
430    .. versionchanged:: 3.9
431        Added the ``normalize`` parameter.
432
433    .. versionchanged:: 3.6
434        Added support for mongodb+srv:// URIs.
435
436    .. versionchanged:: 3.5
437        Return the original value of the ``readPreference`` MongoDB URI option
438        instead of the validated read preference mode.
439
440    .. versionchanged:: 3.1
441        ``warn`` added so invalid options can be ignored.
442    """
443    if uri.startswith(SCHEME):
444        is_srv = False
445        scheme_free = uri[SCHEME_LEN:]
446    elif uri.startswith(SRV_SCHEME):
447        if not _HAVE_DNSPYTHON:
448            python_path = sys.executable or "python"
449            raise ConfigurationError(
450                'The "dnspython" module must be '
451                'installed to use mongodb+srv:// URIs. '
452                'To fix this error install pymongo with the srv extra:\n '
453                '%s -m pip install "pymongo[srv]"' % (python_path))
454        is_srv = True
455        scheme_free = uri[SRV_SCHEME_LEN:]
456    else:
457        raise InvalidURI("Invalid URI scheme: URI must "
458                         "begin with '%s' or '%s'" % (SCHEME, SRV_SCHEME))
459
460    if not scheme_free:
461        raise InvalidURI("Must provide at least one hostname or IP.")
462
463    user = None
464    passwd = None
465    dbase = None
466    collection = None
467    options = _CaseInsensitiveDictionary()
468
469    host_part, _, path_part = scheme_free.partition('/')
470    if not host_part:
471        host_part = path_part
472        path_part = ""
473
474    if not path_part and '?' in host_part:
475        raise InvalidURI("A '/' is required between "
476                         "the host list and any options.")
477
478    if path_part:
479        dbase, _, opts = path_part.partition('?')
480        if dbase:
481            dbase = unquote_plus(dbase)
482            if '.' in dbase:
483                dbase, collection = dbase.split('.', 1)
484            if _BAD_DB_CHARS.search(dbase):
485                raise InvalidURI('Bad database name "%s"' % dbase)
486        else:
487            dbase = None
488
489        if opts:
490            options.update(split_options(opts, validate, warn, normalize))
491
492    if '@' in host_part:
493        userinfo, _, hosts = host_part.rpartition('@')
494        user, passwd = parse_userinfo(userinfo)
495    else:
496        hosts = host_part
497
498    if '/' in hosts:
499        raise InvalidURI("Any '/' in a unix domain socket must be"
500                         " percent-encoded: %s" % host_part)
501
502    hosts = unquote_plus(hosts)
503    fqdn = None
504
505    if is_srv:
506        if options.get('directConnection'):
507            raise ConfigurationError(
508                "Cannot specify directConnection=true with "
509                "%s URIs" % (SRV_SCHEME,))
510        nodes = split_hosts(hosts, default_port=None)
511        if len(nodes) != 1:
512            raise InvalidURI(
513                "%s URIs must include one, "
514                "and only one, hostname" % (SRV_SCHEME,))
515        fqdn, port = nodes[0]
516        if port is not None:
517            raise InvalidURI(
518                "%s URIs must not include a port number" % (SRV_SCHEME,))
519
520        # Use the connection timeout. connectTimeoutMS passed as a keyword
521        # argument overrides the same option passed in the connection string.
522        connect_timeout = connect_timeout or options.get("connectTimeoutMS")
523        dns_resolver = _SrvResolver(fqdn, connect_timeout=connect_timeout)
524        nodes = dns_resolver.get_hosts()
525        dns_options = dns_resolver.get_options()
526        if dns_options:
527            parsed_dns_options = split_options(
528                dns_options, validate, warn, normalize)
529            if set(parsed_dns_options) - _ALLOWED_TXT_OPTS:
530                raise ConfigurationError(
531                    "Only authSource, replicaSet, and loadBalanced are "
532                    "supported from DNS")
533            for opt, val in parsed_dns_options.items():
534                if opt not in options:
535                    options[opt] = val
536        if "ssl" not in options:
537            options["ssl"] = True if validate else 'true'
538    else:
539        nodes = split_hosts(hosts, default_port=default_port)
540
541    _check_options(nodes, options)
542
543    return {
544        'nodelist': nodes,
545        'username': user,
546        'password': passwd,
547        'database': dbase,
548        'collection': collection,
549        'options': options,
550        'fqdn': fqdn
551    }
552
553
554if __name__ == '__main__':
555    import pprint
556    import sys
557    try:
558        pprint.pprint(parse_uri(sys.argv[1]))
559    except InvalidURI as exc:
560        print(exc)
561    sys.exit(0)
562