1# -*- coding: utf-8 -*-
2
3#
4# furl - URL manipulation made simple.
5#
6# Ansgar Grunseid
7# grunseid.com
8# grunseid@gmail.com
9#
10# License: Build Amazing Things (Unlicense)
11#
12
13import re
14import abc
15import warnings
16from posixpath import normpath
17
18import six
19from six.moves import urllib
20from six.moves.urllib.parse import quote, unquote
21try:
22    from icecream import ic
23except ImportError:  # Graceful fallback if IceCream isn't installed.
24    ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a)  # noqa
25
26from .omdict1D import omdict1D
27from .compat import string_types, UnicodeMixin
28from .common import (
29    callable_attr, is_iterable_but_not_string, absent as _absent)
30
31
32# Map of common protocols, as suggested by the common protocols included in
33# urllib/parse.py, to their default ports. Protocol scheme strings are
34# lowercase.
35DEFAULT_PORTS = {
36    'ws': 80,
37    'ftp': 21,
38    'git': 9418,
39    'hdl': 2641,
40    'nfs': 111,
41    'sip': 5060,
42    'ssh': 22,
43    'svn': 3690,
44    'wss': 443,
45    'http': 80,
46    'imap': 143,
47    'nntp': 119,
48    'sftp': 22,
49    'sips': 5061,
50    'tftp': 69,
51    'rtsp': 554,
52    'wais': 210,
53    'https': 443,
54    'rsync': 873,
55    'rtspu': 5004,
56    'snews': 563,
57    'gopher': 70,
58    'telnet': 23,
59    'prospero': 191,
60}
61
62
63def lget(l, index, default=None):
64    try:
65        return l[index]
66    except IndexError:
67        return default
68
69
70def attemptstr(o):
71    try:
72        return str(o)
73    except Exception:
74        return o
75
76
77def utf8(o, default=_absent):
78    try:
79        return o.encode('utf8')
80    except Exception:
81        return o if default is _absent else default
82
83
84def non_string_iterable(o):
85    return callable_attr(o, '__iter__') and not isinstance(o, string_types)
86
87
88# TODO(grun): Support IDNA2008 via the third party idna module. See
89# https://github.com/gruns/furl/issues/73.
90def idna_encode(o):
91    if callable_attr(o, 'encode'):
92        return str(o.encode('idna').decode('utf8'))
93    return o
94
95
96def idna_decode(o):
97    if callable_attr(utf8(o), 'decode'):
98        return utf8(o).decode('idna')
99    return o
100
101
102def is_valid_port(port):
103    port = str(port)
104    if not port.isdigit() or not 0 < int(port) <= 65535:
105        return False
106    return True
107
108
109def static_vars(**kwargs):
110    def decorator(func):
111        for key, value in six.iteritems(kwargs):
112            setattr(func, key, value)
113        return func
114    return decorator
115
116
117def create_quote_fn(safe_charset, quote_plus):
118    def quote_fn(s, dont_quote):
119        if dont_quote is True:
120            safe = safe_charset
121        elif dont_quote is False:
122            safe = ''
123        else:  # <dont_quote> is expected to be a string.
124            safe = dont_quote
125
126        # Prune duplicates and characters not in <safe_charset>.
127        safe = ''.join(set(safe) & set(safe_charset))  # E.g. '?^#?' -> '?'.
128
129        quoted = quote(s, safe)
130        if quote_plus:
131            quoted = quoted.replace('%20', '+')
132
133        return quoted
134
135    return quote_fn
136
137
138#
139# TODO(grun): Update some of the regex functions below to reflect the fact that
140# the valid encoding of Path segments differs slightly from the valid encoding
141# of Fragment Path segments. Similarly, the valid encodings of Query keys and
142# values differ slightly from the valid encodings of Fragment Query keys and
143# values.
144#
145# For example, '?' and '#' don't need to be encoded in Fragment Path segments
146# but they must be encoded in Path segments. Similarly, '#' doesn't need to be
147# encoded in Fragment Query keys and values, but must be encoded in Query keys
148# and values.
149#
150# Perhaps merge them with URLPath, FragmentPath, URLQuery, and
151# FragmentQuery when those new classes are created (see the TODO
152# currently at the top of the source, 02/03/2012).
153#
154
155# RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt)
156#
157#   unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
158#
159#   pct-encoded = "%" HEXDIG HEXDIG
160#
161#   sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
162#                 / "*" / "+" / "," / ";" / "="
163#
164#   pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
165#
166#   === Path ===
167#   segment     = *pchar
168#
169#   === Query ===
170#   query       = *( pchar / "/" / "?" )
171#
172#   === Scheme ===
173#   scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
174#
175PERCENT_REGEX = r'\%[a-fA-F\d][a-fA-F\d]'
176INVALID_HOST_CHARS = '!@#$%^&\'\"*()+=:;/'
177
178
179@static_vars(regex=re.compile(
180    r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;='), PERCENT_REGEX)))
181def is_valid_encoded_path_segment(segment):
182    return is_valid_encoded_path_segment.regex.match(segment) is not None
183
184
185@static_vars(regex=re.compile(
186    r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;/?'), PERCENT_REGEX)))
187def is_valid_encoded_query_key(key):
188    return is_valid_encoded_query_key.regex.match(key) is not None
189
190
191@static_vars(regex=re.compile(
192    r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;/?='), PERCENT_REGEX)))
193def is_valid_encoded_query_value(value):
194    return is_valid_encoded_query_value.regex.match(value) is not None
195
196
197@static_vars(regex=re.compile(r'[a-zA-Z][a-zA-Z\-\.\+]*'))
198def is_valid_scheme(scheme):
199    return is_valid_scheme.regex.match(scheme) is not None
200
201
202@static_vars(regex=re.compile('[%s]' % re.escape(INVALID_HOST_CHARS)))
203def is_valid_host(hostname):
204    toks = hostname.split('.')
205    if toks[-1] == '':  # Trailing '.' in a fully qualified domain name.
206        toks.pop()
207
208    for tok in toks:
209        if is_valid_host.regex.search(tok) is not None:
210            return False
211
212    return '' not in toks  # Adjacent periods aren't allowed.
213
214
215def get_scheme(url):
216    if url.startswith(':'):
217        return ''
218
219    # Avoid incorrect scheme extraction with url.find(':') when other URL
220    # components, like the path, query, fragment, etc, may have a colon in
221    # them. For example, the URL 'a?query:', whose query has a ':' in it.
222    no_fragment = url.split('#', 1)[0]
223    no_query = no_fragment.split('?', 1)[0]
224    no_path_or_netloc = no_query.split('/', 1)[0]
225    scheme = url[:max(0, no_path_or_netloc.find(':'))] or None
226
227    if scheme is not None and not is_valid_scheme(scheme):
228        return None
229
230    return scheme
231
232
233def strip_scheme(url):
234    scheme = get_scheme(url) or ''
235    url = url[len(scheme):]
236    if url.startswith(':'):
237        url = url[1:]
238    return url
239
240
241def set_scheme(url, scheme):
242    after_scheme = strip_scheme(url)
243    if scheme is None:
244        return after_scheme
245    else:
246        return '%s:%s' % (scheme, after_scheme)
247
248
249# 'netloc' in Python parlance, 'authority' in RFC 3986 parlance.
250def has_netloc(url):
251    scheme = get_scheme(url)
252    return url.startswith('//' if scheme is None else scheme + '://')
253
254
255def urlsplit(url):
256    """
257    Parameters:
258      url: URL string to split.
259    Returns: urlparse.SplitResult tuple subclass, just like
260      urlparse.urlsplit() returns, with fields (scheme, netloc, path,
261      query, fragment, username, password, hostname, port). See
262        http://docs.python.org/library/urlparse.html#urlparse.urlsplit
263      for more details on urlsplit().
264    """
265    original_scheme = get_scheme(url)
266
267    # urlsplit() parses URLs differently depending on whether or not the URL's
268    # scheme is in any of
269    #
270    #   urllib.parse.uses_fragment
271    #   urllib.parse.uses_netloc
272    #   urllib.parse.uses_params
273    #   urllib.parse.uses_query
274    #   urllib.parse.uses_relative
275    #
276    # For consistent URL parsing, switch the URL's scheme to 'http', a scheme
277    # in all of the aforementioned uses_* lists, and afterwards revert to the
278    # original scheme (which may or may not be in some, or all, of the the
279    # uses_* lists).
280    if original_scheme is not None:
281        url = set_scheme(url, 'http')
282
283    scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url)
284
285    # Detect and preserve the '//' before the netloc, if present. E.g. preserve
286    # URLs like 'http:', 'http://', and '///sup' correctly.
287    after_scheme = strip_scheme(url)
288    if after_scheme.startswith('//'):
289        netloc = netloc or ''
290    else:
291        netloc = None
292
293    scheme = original_scheme
294
295    return urllib.parse.SplitResult(scheme, netloc, path, query, fragment)
296
297
298def urljoin(base, url):
299    """
300    Parameters:
301      base: Base URL to join with <url>.
302      url: Relative or absolute URL to join with <base>.
303
304    Returns: The resultant URL from joining <base> and <url>.
305    """
306    base_scheme = get_scheme(base) if has_netloc(base) else None
307    url_scheme = get_scheme(url) if has_netloc(url) else None
308
309    if base_scheme is not None:
310        # For consistent URL joining, switch the base URL's scheme to
311        # 'http'. urllib.parse.urljoin() behaves differently depending on the
312        # scheme. E.g.
313        #
314        #   >>> urllib.parse.urljoin('http://google.com/', 'hi')
315        #   'http://google.com/hi'
316        #
317        # vs
318        #
319        #   >>> urllib.parse.urljoin('asdf://google.com/', 'hi')
320        #   'hi'
321        root = set_scheme(base, 'http')
322    else:
323        root = base
324
325    joined = urllib.parse.urljoin(root, url)
326
327    new_scheme = url_scheme if url_scheme is not None else base_scheme
328    if new_scheme is not None and has_netloc(joined):
329        joined = set_scheme(joined, new_scheme)
330
331    return joined
332
333
334def join_path_segments(*args):
335    """
336    Join multiple lists of path segments together, intelligently
337    handling path segments borders to preserve intended slashes of the
338    final constructed path.
339
340    This function is not encoding aware. It doesn't test for, or change,
341    the encoding of path segments it is passed.
342
343    Examples:
344      join_path_segments(['a'], ['b']) == ['a','b']
345      join_path_segments(['a',''], ['b']) == ['a','b']
346      join_path_segments(['a'], ['','b']) == ['a','b']
347      join_path_segments(['a',''], ['','b']) == ['a','','b']
348      join_path_segments(['a','b'], ['c','d']) == ['a','b','c','d']
349
350    Returns: A list containing the joined path segments.
351    """
352    finals = []
353
354    for segments in args:
355        if not segments or segments == ['']:
356            continue
357        elif not finals:
358            finals.extend(segments)
359        else:
360            # Example #1: ['a',''] + ['b'] == ['a','b']
361            # Example #2: ['a',''] + ['','b'] == ['a','','b']
362            if finals[-1] == '' and (segments[0] != '' or len(segments) > 1):
363                finals.pop(-1)
364            # Example: ['a'] + ['','b'] == ['a','b']
365            elif finals[-1] != '' and segments[0] == '' and len(segments) > 1:
366                segments = segments[1:]
367            finals.extend(segments)
368
369    return finals
370
371
372def remove_path_segments(segments, remove):
373    """
374    Removes the path segments of <remove> from the end of the path
375    segments <segments>.
376
377    Examples:
378      # ('/a/b/c', 'b/c') -> '/a/'
379      remove_path_segments(['','a','b','c'], ['b','c']) == ['','a','']
380      # ('/a/b/c', '/b/c') -> '/a'
381      remove_path_segments(['','a','b','c'], ['','b','c']) == ['','a']
382
383    Returns: The list of all remaining path segments after the segments
384    in <remove> have been removed from the end of <segments>. If no
385    segments from <remove> were removed from <segments>, <segments> is
386    returned unmodified.
387    """
388    # [''] means a '/', which is properly represented by ['', ''].
389    if segments == ['']:
390        segments.append('')
391    if remove == ['']:
392        remove.append('')
393
394    ret = None
395    if remove == segments:
396        ret = []
397    elif len(remove) > len(segments):
398        ret = segments
399    else:
400        toremove = list(remove)
401
402        if len(remove) > 1 and remove[0] == '':
403            toremove.pop(0)
404
405        if toremove and toremove == segments[-1 * len(toremove):]:
406            ret = segments[:len(segments) - len(toremove)]
407            if remove[0] != '' and ret:
408                ret.append('')
409        else:
410            ret = segments
411
412    return ret
413
414
415def quacks_like_a_path_with_segments(obj):
416    return (
417        hasattr(obj, 'segments') and
418        is_iterable_but_not_string(obj.segments))
419
420
421class Path(object):
422
423    """
424    Represents a path comprised of zero or more path segments.
425
426      http://tools.ietf.org/html/rfc3986#section-3.3
427
428    Path parameters aren't supported.
429
430    Attributes:
431      _force_absolute: Function whos boolean return value specifies
432        whether self.isabsolute should be forced to True or not. If
433        _force_absolute(self) returns True, isabsolute is read only and
434        raises an AttributeError if assigned to. If
435        _force_absolute(self) returns False, isabsolute is mutable and
436        can be set to True or False. URL paths use _force_absolute and
437        return True if the netloc is non-empty (not equal to
438        ''). Fragment paths are never read-only and their
439        _force_absolute(self) always returns False.
440      segments: List of zero or more path segments comprising this
441        path. If the path string has a trailing '/', the last segment
442        will be '' and self.isdir will be True and self.isfile will be
443        False. An empty segment list represents an empty path, not '/'
444        (though they have the same meaning).
445      isabsolute: Boolean whether or not this is an absolute path or
446        not. An absolute path starts with a '/'. self.isabsolute is
447        False if the path is empty (self.segments == [] and str(path) ==
448        '').
449      strict: Boolean whether or not UserWarnings should be raised if
450        improperly encoded path strings are provided to methods that
451        take such strings, like load(), add(), set(), remove(), etc.
452    """
453
454    # From RFC 3986:
455    #   segment       = *pchar
456    #   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
457    #   unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
458    #   sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
459    #                       / "*" / "+" / "," / ";" / "="
460    SAFE_SEGMENT_CHARS = ":@-._~!$&'()*+,;="
461
462    def __init__(self, path='', force_absolute=lambda _: False, strict=False):
463        self.segments = []
464
465        self.strict = strict
466        self._isabsolute = False
467        self._force_absolute = force_absolute
468
469        self.load(path)
470
471    def load(self, path):
472        """
473        Load <path>, replacing any existing path. <path> can either be
474        a Path instance, a list of segments, a path string to adopt.
475
476        Returns: <self>.
477        """
478        if not path:
479            segments = []
480        elif quacks_like_a_path_with_segments(path):  # Path interface.
481            segments = path.segments
482        elif is_iterable_but_not_string(path):  # List interface.
483            segments = path
484        else:  # String interface.
485            segments = self._segments_from_path(path)
486
487        if self._force_absolute(self):
488            self._isabsolute = True if segments else False
489        else:
490            self._isabsolute = (segments and segments[0] == '')
491
492        if self.isabsolute and len(segments) > 1 and segments[0] == '':
493            segments.pop(0)
494
495        self.segments = segments
496
497        return self
498
499    def add(self, path):
500        """
501        Add <path> to the existing path. <path> can either be a Path instance,
502        a list of segments, or a path string to append to the existing path.
503
504        Returns: <self>.
505        """
506        if quacks_like_a_path_with_segments(path):  # Path interface.
507            newsegments = path.segments
508        elif is_iterable_but_not_string(path):  # List interface.
509            newsegments = path
510        else:  # String interface.
511            newsegments = self._segments_from_path(path)
512
513        # Preserve the opening '/' if one exists already (self.segments
514        # == ['']).
515        if self.segments == [''] and newsegments and newsegments[0] != '':
516            newsegments.insert(0, '')
517
518        segments = self.segments
519        if self.isabsolute and self.segments and self.segments[0] != '':
520            segments.insert(0, '')
521
522        self.load(join_path_segments(segments, newsegments))
523
524        return self
525
526    def set(self, path):
527        self.load(path)
528        return self
529
530    def remove(self, path):
531        if path is True:
532            self.load('')
533        else:
534            if is_iterable_but_not_string(path):  # List interface.
535                segments = path
536            else:  # String interface.
537                segments = self._segments_from_path(path)
538            base = ([''] if self.isabsolute else []) + self.segments
539            self.load(remove_path_segments(base, segments))
540
541        return self
542
543    def normalize(self):
544        """
545        Normalize the path. Turn '//a/./b/../c//' into '/a/c/'.
546
547        Returns: <self>.
548        """
549        if str(self):
550            normalized = normpath(str(self)) + ('/' * self.isdir)
551            if normalized.startswith('//'):  # http://bugs.python.org/636648
552                normalized = '/' + normalized.lstrip('/')
553            self.load(normalized)
554
555        return self
556
557    def asdict(self):
558        return {
559            'encoded': str(self),
560            'isdir': self.isdir,
561            'isfile': self.isfile,
562            'segments': self.segments,
563            'isabsolute': self.isabsolute,
564            }
565
566    @property
567    def isabsolute(self):
568        if self._force_absolute(self):
569            return True
570        return self._isabsolute
571
572    @isabsolute.setter
573    def isabsolute(self, isabsolute):
574        """
575        Raises: AttributeError if _force_absolute(self) returns True.
576        """
577        if self._force_absolute(self):
578            s = ('Path.isabsolute is True and read-only for URLs with a netloc'
579                 ' (a username, password, host, and/or port). A URL path must '
580                 "start with a '/' to separate itself from a netloc.")
581            raise AttributeError(s)
582        self._isabsolute = isabsolute
583
584    @property
585    def isdir(self):
586        """
587        Returns: True if the path ends on a directory, False
588        otherwise. If True, the last segment is '', representing the
589        trailing '/' of the path.
590        """
591        return (self.segments == [] or
592                (self.segments and self.segments[-1] == ''))
593
594    @property
595    def isfile(self):
596        """
597        Returns: True if the path ends on a file, False otherwise. If
598        True, the last segment is not '', representing some file as the
599        last segment of the path.
600        """
601        return not self.isdir
602
603    def __truediv__(self, path):
604        copy = self.__class__(
605            path=self.segments,
606            force_absolute=self._force_absolute,
607            strict=self.strict)
608        return copy.add(path)
609
610    def __eq__(self, other):
611        return str(self) == str(other)
612
613    def __ne__(self, other):
614        return not self == other
615
616    def __bool__(self):
617        return len(self.segments) > 0
618    __nonzero__ = __bool__
619
620    def __str__(self):
621        segments = list(self.segments)
622        if self.isabsolute:
623            if not segments:
624                segments = ['', '']
625            else:
626                segments.insert(0, '')
627        return self._path_from_segments(segments)
628
629    def __repr__(self):
630        return "%s('%s')" % (self.__class__.__name__, str(self))
631
632    def _segments_from_path(self, path):
633        """
634        Returns: The list of path segments from the path string <path>.
635
636        Raises: UserWarning if <path> is an improperly encoded path
637        string and self.strict is True.
638
639        TODO(grun): Accept both list values and string values and
640        refactor the list vs string interface testing to this common
641        method.
642        """
643        segments = []
644        for segment in path.split('/'):
645            if not is_valid_encoded_path_segment(segment):
646                segment = quote(utf8(segment))
647                if self.strict:
648                    s = ("Improperly encoded path string received: '%s'. "
649                         "Proceeding, but did you mean '%s'?" %
650                         (path, self._path_from_segments(segments)))
651                    warnings.warn(s, UserWarning)
652            segments.append(utf8(segment))
653        del segment
654
655        # In Python 3, utf8() returns Bytes objects that must be decoded into
656        # strings before they can be passed to unquote(). In Python 2, utf8()
657        # returns strings that can be passed directly to urllib.unquote().
658        segments = [
659            segment.decode('utf8')
660            if isinstance(segment, bytes) and not isinstance(segment, str)
661            else segment for segment in segments]
662
663        return [unquote(segment) for segment in segments]
664
665    def _path_from_segments(self, segments):
666        """
667        Combine the provided path segments <segments> into a path string. Path
668        segments in <segments> will be quoted.
669
670        Returns: A path string with quoted path segments.
671        """
672        segments = [
673            quote(utf8(attemptstr(segment)), self.SAFE_SEGMENT_CHARS)
674            for segment in segments]
675        return '/'.join(segments)
676
677
678@six.add_metaclass(abc.ABCMeta)
679class PathCompositionInterface(object):
680
681    """
682    Abstract class interface for a parent class that contains a Path.
683    """
684
685    def __init__(self, strict=False):
686        """
687        Params:
688          force_absolute: See Path._force_absolute.
689
690        Assignments to <self> in __init__() must be added to
691        __setattr__() below.
692        """
693        self._path = Path(force_absolute=self._force_absolute, strict=strict)
694
695    @property
696    def path(self):
697        return self._path
698
699    @property
700    def pathstr(self):
701        """This method is deprecated. Use str(furl.path) instead."""
702        s = ('furl.pathstr is deprecated. Use str(furl.path) instead. There '
703             'should be one, and preferably only one, obvious way to serialize'
704             ' a Path object to a string.')
705        warnings.warn(s, DeprecationWarning)
706        return str(self._path)
707
708    @abc.abstractmethod
709    def _force_absolute(self, path):
710        """
711        Subclass me.
712        """
713        pass
714
715    def __setattr__(self, attr, value):
716        """
717        Returns: True if this attribute is handled and set here, False
718        otherwise.
719        """
720        if attr == '_path':
721            self.__dict__[attr] = value
722            return True
723        elif attr == 'path':
724            self._path.load(value)
725            return True
726        return False
727
728
729@six.add_metaclass(abc.ABCMeta)
730class URLPathCompositionInterface(PathCompositionInterface):
731
732    """
733    Abstract class interface for a parent class that contains a URL
734    Path.
735
736    A URL path's isabsolute attribute is absolute and read-only if a
737    netloc is defined. A path cannot start without '/' if there's a
738    netloc. For example, the URL 'http://google.coma/path' makes no
739    sense. It should be 'http://google.com/a/path'.
740
741    A URL path's isabsolute attribute is mutable if there's no
742    netloc. The scheme doesn't matter. For example, the isabsolute
743    attribute of the URL path in 'mailto:user@host.com', with scheme
744    'mailto' and path 'user@host.com', is mutable because there is no
745    netloc. See
746
747      http://en.wikipedia.org/wiki/URI_scheme#Examples
748    """
749
750    def __init__(self, strict=False):
751        PathCompositionInterface.__init__(self, strict=strict)
752
753    def _force_absolute(self, path):
754        return bool(path) and self.netloc
755
756
757@six.add_metaclass(abc.ABCMeta)
758class FragmentPathCompositionInterface(PathCompositionInterface):
759
760    """
761    Abstract class interface for a parent class that contains a Fragment
762    Path.
763
764    Fragment Paths they be set to absolute (self.isabsolute = True) or
765    not absolute (self.isabsolute = False).
766    """
767
768    def __init__(self, strict=False):
769        PathCompositionInterface.__init__(self, strict=strict)
770
771    def _force_absolute(self, path):
772        return False
773
774
775class Query(object):
776
777    """
778    Represents a URL query comprised of zero or more unique parameters
779    and their respective values.
780
781      http://tools.ietf.org/html/rfc3986#section-3.4
782
783
784    All interaction with Query.params is done with unquoted strings. So
785
786      f.query.params['a'] = 'a%5E'
787
788    means the intended value for 'a' is 'a%5E', not 'a^'.
789
790
791    Query.params is implemented as an omdict1D object - a one
792    dimensional ordered multivalue dictionary. This provides support for
793    repeated URL parameters, like 'a=1&a=2'. omdict1D is a subclass of
794    omdict, an ordered multivalue dictionary. Documentation for omdict
795    can be found here
796
797      https://github.com/gruns/orderedmultidict
798
799    The one dimensional aspect of omdict1D means that a list of values
800    is interpreted as multiple values, not a single value which is
801    itself a list of values. This is a reasonable distinction to make
802    because URL query parameters are one dimensional: query parameter
803    values cannot themselves be composed of sub-values.
804
805    So what does this mean? This means we can safely interpret
806
807      f = furl('http://www.google.com')
808      f.query.params['arg'] = ['one', 'two', 'three']
809
810    as three different values for 'arg': 'one', 'two', and 'three',
811    instead of a single value which is itself some serialization of the
812    python list ['one', 'two', 'three']. Thus, the result of the above
813    will be
814
815      f.query.allitems() == [
816        ('arg','one'), ('arg','two'), ('arg','three')]
817
818    and not
819
820      f.query.allitems() == [('arg', ['one', 'two', 'three'])]
821
822    The latter doesn't make sense because query parameter values cannot
823    be composed of sub-values. So finally
824
825      str(f.query) == 'arg=one&arg=two&arg=three'
826
827
828    Additionally, while the set of allowed characters in URL queries is
829    defined in RFC 3986 section 3.4, the format for encoding key=value
830    pairs within the query is not. In turn, the parsing of encoded
831    key=value query pairs differs between implementations.
832
833    As a compromise to support equal signs in both key=value pair
834    encoded queries, like
835
836      https://www.google.com?a=1&b=2
837
838    and non-key=value pair encoded queries, like
839
840      https://www.google.com?===3===
841
842    equal signs are percent encoded in key=value pairs where the key is
843    non-empty, e.g.
844
845      https://www.google.com?equal-sign=%3D
846
847    but not encoded in key=value pairs where the key is empty, e.g.
848
849      https://www.google.com?===equal=sign===
850
851    This presents a reasonable compromise to accurately reproduce
852    non-key=value queries with equal signs while also still percent
853    encoding equal signs in key=value pair encoded queries, as
854    expected. See
855
856      https://github.com/gruns/furl/issues/99
857
858    for more details.
859
860    Attributes:
861      params: Ordered multivalue dictionary of query parameter key:value
862        pairs. Parameters in self.params are maintained URL decoded,
863        e.g. 'a b' not 'a+b'.
864      strict: Boolean whether or not UserWarnings should be raised if
865        improperly encoded query strings are provided to methods that
866        take such strings, like load(), add(), set(), remove(), etc.
867    """
868
869    # From RFC 3986:
870    #   query       = *( pchar / "/" / "?" )
871    #   pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
872    #   unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
873    #   sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
874    #                     / "*" / "+" / "," / ";" / "="
875    SAFE_KEY_CHARS = "/?:@-._~!$'()*+,;"
876    SAFE_VALUE_CHARS = SAFE_KEY_CHARS + '='
877
878    def __init__(self, query='', strict=False):
879        self.strict = strict
880
881        self._params = omdict1D()
882
883        self.load(query)
884
885    def load(self, query):
886        items = self._items(query)
887        self.params.load(items)
888        return self
889
890    def add(self, args):
891        for param, value in self._items(args):
892            self.params.add(param, value)
893        return self
894
895    def set(self, mapping):
896        """
897        Adopt all mappings in <mapping>, replacing any existing mappings
898        with the same key. If a key has multiple values in <mapping>,
899        they are all adopted.
900
901        Examples:
902          Query({1:1}).set([(1,None),(2,2)]).params.allitems()
903            == [(1,None),(2,2)]
904          Query({1:None,2:None}).set([(1,1),(2,2),(1,11)]).params.allitems()
905            == [(1,1),(2,2),(1,11)]
906          Query({1:None}).set([(1,[1,11,111])]).params.allitems()
907            == [(1,1),(1,11),(1,111)]
908
909        Returns: <self>.
910        """
911        self.params.updateall(mapping)
912        return self
913
914    def remove(self, query):
915        if query is True:
916            self.load('')
917            return self
918
919        # Single key to remove.
920        items = [query]
921        # Dictionary or multivalue dictionary of items to remove.
922        if callable_attr(query, 'items'):
923            items = self._items(query)
924        # List of keys or items to remove.
925        elif non_string_iterable(query):
926            items = query
927
928        for item in items:
929            if non_string_iterable(item) and len(item) == 2:
930                key, value = item
931                self.params.popvalue(key, value, None)
932            else:
933                key = item
934                self.params.pop(key, None)
935
936        return self
937
938    @property
939    def params(self):
940        return self._params
941
942    @params.setter
943    def params(self, params):
944        items = self._items(params)
945
946        self._params.clear()
947        for key, value in items:
948            self._params.add(key, value)
949
950    def encode(self, delimiter='&', quote_plus=True, dont_quote='',
951               delimeter=_absent):
952        """
953        Examples:
954
955          Query('a=a&b=#').encode() == 'a=a&b=%23'
956          Query('a=a&b=#').encode(';') == 'a=a;b=%23'
957          Query('a+b=c@d').encode(dont_quote='@') == 'a+b=c@d'
958          Query('a+b=c@d').encode(quote_plus=False) == 'a%20b=c%40d'
959
960        Until furl v0.4.6, the 'delimiter' argument was incorrectly
961        spelled 'delimeter'. For backwards compatibility, accept both
962        the correct 'delimiter' and the old, mispelled 'delimeter'.
963
964        Keys and values are encoded application/x-www-form-urlencoded if
965        <quote_plus> is True, percent-encoded otherwise.
966
967        <dont_quote> exempts valid query characters from being
968        percent-encoded, either in their entirety with dont_quote=True,
969        or selectively with dont_quote=<string>, like
970        dont_quote='/?@_'. Invalid query characters -- those not in
971        self.SAFE_KEY_CHARS, like '#' and '^' -- are always encoded,
972        even if included in <dont_quote>. For example:
973
974          Query('#=^').encode(dont_quote='#^') == '%23=%5E'.
975
976
977        Returns: A URL encoded query string using <delimiter> as the
978        delimiter separating key:value pairs. The most common and
979        default delimiter is '&', but ';' can also be specified. ';' is
980        W3C recommended.
981        """
982        if delimeter is not _absent:
983            delimiter = delimeter
984
985        quote_key = create_quote_fn(self.SAFE_KEY_CHARS, quote_plus)
986        quote_value = create_quote_fn(self.SAFE_VALUE_CHARS, quote_plus)
987
988        pairs = []
989        for key, value in self.params.iterallitems():
990            utf8key = utf8(key, utf8(attemptstr(key)))
991            quoted_key = quote_key(utf8key, dont_quote)
992
993            if value is None:  # Example: http://sprop.su/?key.
994                pair = quoted_key
995            else:  # Example: http://sprop.su/?key=value.
996                utf8value = utf8(value, utf8(attemptstr(value)))
997                quoted_value = quote_value(utf8value, dont_quote)
998
999                if not quoted_key:  # Unquote '=' to allow queries like '?==='.
1000                    quoted_value = quoted_value.replace('%3D', '=')
1001
1002                pair = '%s=%s' % (quoted_key, quoted_value)
1003
1004            pairs.append(pair)
1005
1006        query = delimiter.join(pairs)
1007
1008        return query
1009
1010    def asdict(self):
1011        return {
1012            'encoded': str(self),
1013            'params': self.params.allitems(),
1014            }
1015
1016    def __eq__(self, other):
1017        return str(self) == str(other)
1018
1019    def __ne__(self, other):
1020        return not self == other
1021
1022    def __bool__(self):
1023        return len(self.params) > 0
1024    __nonzero__ = __bool__
1025
1026    def __str__(self):
1027        return self.encode()
1028
1029    def __repr__(self):
1030        return "%s('%s')" % (self.__class__.__name__, str(self))
1031
1032    def _items(self, items):
1033        """
1034        Extract and return the key:value items from various
1035        containers. Some containers that could hold key:value items are
1036
1037          - List of (key,value) tuples.
1038          - Dictionaries of key:value items.
1039          - Multivalue dictionary of key:value items, with potentially
1040            repeated keys.
1041          - Query string with encoded params and values.
1042
1043        Keys and values are passed through unmodified unless they were
1044        passed in within an encoded query string, like
1045        'a=a%20a&b=b'. Keys and values passed in within an encoded query
1046        string are unquoted by urlparse.parse_qsl(), which uses
1047        urllib.unquote_plus() internally.
1048
1049        Returns: List of items as (key, value) tuples. Keys and values
1050        are passed through unmodified unless they were passed in as part
1051        of an encoded query string, in which case the final keys and
1052        values that are returned will be unquoted.
1053
1054        Raises: UserWarning if <path> is an improperly encoded path
1055        string and self.strict is True.
1056        """
1057        if not items:
1058            items = []
1059        # Multivalue Dictionary-like interface. e.g. {'a':1, 'a':2,
1060        # 'b':2}
1061        elif callable_attr(items, 'allitems'):
1062            items = list(items.allitems())
1063        elif callable_attr(items, 'iterallitems'):
1064            items = list(items.iterallitems())
1065        # Dictionary-like interface. e.g. {'a':1, 'b':2, 'c':3}
1066        elif callable_attr(items, 'items'):
1067            items = list(items.items())
1068        elif callable_attr(items, 'iteritems'):
1069            items = list(items.iteritems())
1070        # Encoded query string. e.g. 'a=1&b=2&c=3'
1071        elif isinstance(items, six.string_types):
1072            items = self._extract_items_from_querystr(items)
1073        # Default to list of key:value items interface. e.g. [('a','1'),
1074        # ('b','2')]
1075        else:
1076            items = list(items)
1077
1078        return items
1079
1080    def _extract_items_from_querystr(self, querystr):
1081        items = []
1082
1083        pairstrs = [s2 for s1 in querystr.split('&') for s2 in s1.split(';')]
1084        pairs = [item.split('=', 1) for item in pairstrs]
1085        pairs = [(p[0], lget(p, 1, '')) for p in pairs]  # Pad with value ''.
1086
1087        for pairstr, (key, value) in six.moves.zip(pairstrs, pairs):
1088            valid_key = is_valid_encoded_query_key(key)
1089            valid_value = is_valid_encoded_query_value(value)
1090            if self.strict and (not valid_key or not valid_value):
1091                msg = (
1092                    "Incorrectly percent encoded query string received: '%s'. "
1093                    "Proceeding, but did you mean '%s'?" %
1094                    (querystr, urllib.parse.urlencode(pairs)))
1095                warnings.warn(msg, UserWarning)
1096
1097            key_decoded = unquote(key.replace('+', ' '))
1098            # Empty value without a '=', e.g. '?sup'.
1099            if key == pairstr:
1100                value_decoded = None
1101            else:
1102                value_decoded = unquote(value.replace('+', ' '))
1103
1104            items.append((key_decoded, value_decoded))
1105
1106        return items
1107
1108
1109@six.add_metaclass(abc.ABCMeta)
1110class QueryCompositionInterface(object):
1111
1112    """
1113    Abstract class interface for a parent class that contains a Query.
1114    """
1115
1116    def __init__(self, strict=False):
1117        self._query = Query(strict=strict)
1118
1119    @property
1120    def query(self):
1121        return self._query
1122
1123    @property
1124    def querystr(self):
1125        """This method is deprecated. Use str(furl.query) instead."""
1126        s = ('furl.querystr is deprecated. Use str(furl.query) instead. There '
1127             'should be one, and preferably only one, obvious way to serialize'
1128             ' a Query object to a string.')
1129        warnings.warn(s, DeprecationWarning)
1130        return str(self._query)
1131
1132    @property
1133    def args(self):
1134        """
1135        Shortcut method to access the query parameters, self._query.params.
1136        """
1137        return self._query.params
1138
1139    def __setattr__(self, attr, value):
1140        """
1141        Returns: True if this attribute is handled and set here, False
1142        otherwise.
1143        """
1144        if attr == 'args' or attr == 'query':
1145            self._query.load(value)
1146            return True
1147        return False
1148
1149
1150class Fragment(FragmentPathCompositionInterface, QueryCompositionInterface):
1151
1152    """
1153    Represents a URL fragment, comprised internally of a Path and Query
1154    optionally separated by a '?' character.
1155
1156      http://tools.ietf.org/html/rfc3986#section-3.5
1157
1158    Attributes:
1159      path: Path object from FragmentPathCompositionInterface.
1160      query: Query object from QueryCompositionInterface.
1161      separator: Boolean whether or not a '?' separator should be
1162        included in the string representation of this fragment. When
1163        False, a '?' character will not separate the fragment path from
1164        the fragment query in the fragment string. This is useful to
1165        build fragments like '#!arg1=val1&arg2=val2', where no
1166        separating '?' is desired.
1167    """
1168
1169    def __init__(self, fragment='', strict=False):
1170        FragmentPathCompositionInterface.__init__(self, strict=strict)
1171        QueryCompositionInterface.__init__(self, strict=strict)
1172        self.strict = strict
1173        self.separator = True
1174
1175        self.load(fragment)
1176
1177    def load(self, fragment):
1178        self.path.load('')
1179        self.query.load('')
1180
1181        if fragment is None:
1182            fragment = ''
1183
1184        toks = fragment.split('?', 1)
1185        if len(toks) == 0:
1186            self._path.load('')
1187            self._query.load('')
1188        elif len(toks) == 1:
1189            # Does this fragment look like a path or a query? Default to
1190            # path.
1191            if '=' in fragment:  # Query example: '#woofs=dogs'.
1192                self._query.load(fragment)
1193            else:  # Path example: '#supinthisthread'.
1194                self._path.load(fragment)
1195        else:
1196            # Does toks[1] actually look like a query? Like 'a=a' or
1197            # 'a=' or '=a'?
1198            if '=' in toks[1]:
1199                self._path.load(toks[0])
1200                self._query.load(toks[1])
1201            # If toks[1] doesn't look like a query, the user probably
1202            # provided a fragment string like 'a?b?' that was intended
1203            # to be adopted as-is, not a two part fragment with path 'a'
1204            # and query 'b?'.
1205            else:
1206                self._path.load(fragment)
1207
1208    def add(self, path=_absent, args=_absent):
1209        if path is not _absent:
1210            self.path.add(path)
1211        if args is not _absent:
1212            self.query.add(args)
1213
1214        return self
1215
1216    def set(self, path=_absent, args=_absent, separator=_absent):
1217        if path is not _absent:
1218            self.path.load(path)
1219        if args is not _absent:
1220            self.query.load(args)
1221        if separator is True or separator is False:
1222            self.separator = separator
1223
1224        return self
1225
1226    def remove(self, fragment=_absent, path=_absent, args=_absent):
1227        if fragment is True:
1228            self.load('')
1229        if path is not _absent:
1230            self.path.remove(path)
1231        if args is not _absent:
1232            self.query.remove(args)
1233
1234        return self
1235
1236    def asdict(self):
1237        return {
1238            'encoded': str(self),
1239            'separator': self.separator,
1240            'path': self.path.asdict(),
1241            'query': self.query.asdict(),
1242            }
1243
1244    def __eq__(self, other):
1245        return str(self) == str(other)
1246
1247    def __ne__(self, other):
1248        return not self == other
1249
1250    def __setattr__(self, attr, value):
1251        if (not PathCompositionInterface.__setattr__(self, attr, value) and
1252                not QueryCompositionInterface.__setattr__(self, attr, value)):
1253            object.__setattr__(self, attr, value)
1254
1255    def __bool__(self):
1256        return bool(self.path) or bool(self.query)
1257    __nonzero__ = __bool__
1258
1259    def __str__(self):
1260        path, query = str(self._path), str(self._query)
1261
1262        # If there is no query or self.separator is False, decode all
1263        # '?' characters in the path from their percent encoded form
1264        # '%3F' to '?'. This allows for fragment strings containg '?'s,
1265        # like '#dog?machine?yes'.
1266        if path and (not query or not self.separator):
1267            path = path.replace('%3F', '?')
1268
1269        separator = '?' if path and query and self.separator else ''
1270
1271        return path + separator + query
1272
1273    def __repr__(self):
1274        return "%s('%s')" % (self.__class__.__name__, str(self))
1275
1276
1277@six.add_metaclass(abc.ABCMeta)
1278class FragmentCompositionInterface(object):
1279
1280    """
1281    Abstract class interface for a parent class that contains a
1282    Fragment.
1283    """
1284
1285    def __init__(self, strict=False):
1286        self._fragment = Fragment(strict=strict)
1287
1288    @property
1289    def fragment(self):
1290        return self._fragment
1291
1292    @property
1293    def fragmentstr(self):
1294        """This method is deprecated. Use str(furl.fragment) instead."""
1295        s = ('furl.fragmentstr is deprecated. Use str(furl.fragment) instead. '
1296             'There should be one, and preferably only one, obvious way to '
1297             'serialize a Fragment object to a string.')
1298        warnings.warn(s, DeprecationWarning)
1299        return str(self._fragment)
1300
1301    def __setattr__(self, attr, value):
1302        """
1303        Returns: True if this attribute is handled and set here, False
1304        otherwise.
1305        """
1306        if attr == 'fragment':
1307            self.fragment.load(value)
1308            return True
1309        return False
1310
1311
1312class furl(URLPathCompositionInterface, QueryCompositionInterface,
1313           FragmentCompositionInterface, UnicodeMixin):
1314
1315    """
1316    Object for simple parsing and manipulation of a URL and its
1317    components.
1318
1319      scheme://username:password@host:port/path?query#fragment
1320
1321    Attributes:
1322      strict: Boolean whether or not UserWarnings should be raised if
1323        improperly encoded path, query, or fragment strings are provided
1324        to methods that take such strings, like load(), add(), set(),
1325        remove(), etc.
1326      username: Username string for authentication. Initially None.
1327      password: Password string for authentication with
1328        <username>. Initially None.
1329      scheme: URL scheme. A string ('http', 'https', '', etc) or None.
1330        All lowercase. Initially None.
1331      host: URL host (hostname, IPv4 address, or IPv6 address), not
1332        including port. All lowercase. Initially None.
1333      port: Port. Valid port values are 1-65535, or None meaning no port
1334        specified.
1335      netloc: Network location. Combined host and port string. Initially
1336      None.
1337      path: Path object from URLPathCompositionInterface.
1338      query: Query object from QueryCompositionInterface.
1339      fragment: Fragment object from FragmentCompositionInterface.
1340    """
1341
1342    def __init__(self, url='', args=_absent, path=_absent, fragment=_absent,
1343                 scheme=_absent, netloc=_absent, origin=_absent,
1344                 fragment_path=_absent, fragment_args=_absent,
1345                 fragment_separator=_absent, host=_absent, port=_absent,
1346                 query=_absent, query_params=_absent, username=_absent,
1347                 password=_absent, strict=False):
1348        """
1349        Raises: ValueError on invalid URL or invalid URL component(s) provided.
1350        """
1351        URLPathCompositionInterface.__init__(self, strict=strict)
1352        QueryCompositionInterface.__init__(self, strict=strict)
1353        FragmentCompositionInterface.__init__(self, strict=strict)
1354        self.strict = strict
1355
1356        self.load(url)  # Raises ValueError on invalid URL.
1357        self.set(  # Raises ValueError on invalid URL component(s).
1358            args=args, path=path, fragment=fragment, scheme=scheme,
1359            netloc=netloc, origin=origin, fragment_path=fragment_path,
1360            fragment_args=fragment_args, fragment_separator=fragment_separator,
1361            host=host, port=port, query=query, query_params=query_params,
1362            username=username, password=password)
1363
1364    def load(self, url):
1365        """
1366        Parse and load a URL.
1367
1368        Raises: ValueError on invalid URL, like a malformed IPv6 address
1369        or invalid port.
1370        """
1371        self.username = self.password = None
1372        self._host = self._port = self._scheme = None
1373
1374        if url is None:
1375            url = ''
1376        if not isinstance(url, six.string_types):
1377            url = str(url)
1378
1379        # urlsplit() raises a ValueError on malformed IPv6 addresses in
1380        # Python 2.7+.
1381        tokens = urlsplit(url)
1382
1383        self.netloc = tokens.netloc  # Raises ValueError in Python 2.7+.
1384        self.scheme = tokens.scheme
1385        if not self.port:
1386            self._port = DEFAULT_PORTS.get(self.scheme)
1387        self.path.load(tokens.path)
1388        self.query.load(tokens.query)
1389        self.fragment.load(tokens.fragment)
1390
1391        return self
1392
1393    @property
1394    def scheme(self):
1395        return self._scheme
1396
1397    @scheme.setter
1398    def scheme(self, scheme):
1399        if callable_attr(scheme, 'lower'):
1400            scheme = scheme.lower()
1401        self._scheme = scheme
1402
1403    @property
1404    def host(self):
1405        return self._host
1406
1407    @host.setter
1408    def host(self, host):
1409        """
1410        Raises: ValueError on invalid host or malformed IPv6 address.
1411        """
1412        # Invalid IPv6 literal.
1413        urllib.parse.urlsplit('http://%s/' % host)  # Raises ValueError.
1414
1415        # Invalid host string.
1416        resembles_ipv6_literal = (
1417            host is not None and lget(host, 0) == '[' and ':' in host and
1418            lget(host, -1) == ']')
1419        if (host is not None and not resembles_ipv6_literal and
1420           not is_valid_host(host)):
1421            errmsg = (
1422                "Invalid host '%s'. Host strings must have at least one "
1423                "non-period character, can't contain any of '%s', and can't "
1424                "have adjacent periods.")
1425            raise ValueError(errmsg % (host, INVALID_HOST_CHARS))
1426
1427        if callable_attr(host, 'lower'):
1428            host = host.lower()
1429        if callable_attr(host, 'startswith') and host.startswith('xn--'):
1430            host = idna_decode(host)
1431        self._host = host
1432
1433    @property
1434    def port(self):
1435        return self._port or DEFAULT_PORTS.get(self.scheme)
1436
1437    @port.setter
1438    def port(self, port):
1439        """
1440        The port value can be 1-65535 or None, meaning no port specified. If
1441        <port> is None and self.scheme is a known scheme in DEFAULT_PORTS,
1442        the default port value from DEFAULT_PORTS will be used.
1443
1444        Raises: ValueError on invalid port.
1445        """
1446        if port is None:
1447            self._port = DEFAULT_PORTS.get(self.scheme)
1448        elif is_valid_port(port):
1449            self._port = int(str(port))
1450        else:
1451            raise ValueError("Invalid port '%s'." % port)
1452
1453    @property
1454    def netloc(self):
1455        userpass = quote(utf8(self.username) or '', safe='')
1456        if self.password is not None:
1457            userpass += ':' + quote(utf8(self.password), safe='')
1458        if userpass or self.username is not None:
1459            userpass += '@'
1460
1461        netloc = idna_encode(self.host)
1462        if self.port and self.port != DEFAULT_PORTS.get(self.scheme):
1463            netloc = (netloc or '') + (':' + str(self.port))
1464
1465        if userpass or netloc:
1466            netloc = (userpass or '') + (netloc or '')
1467
1468        return netloc
1469
1470    @netloc.setter
1471    def netloc(self, netloc):
1472        """
1473        Params:
1474          netloc: Network location string, like 'google.com' or
1475            'user:pass@google.com:99'.
1476        Raises: ValueError on invalid port or malformed IPv6 address.
1477        """
1478        # Raises ValueError on malformed IPv6 addresses.
1479        urllib.parse.urlsplit('http://%s/' % netloc)
1480
1481        username = password = host = port = None
1482
1483        if netloc and '@' in netloc:
1484            userpass, netloc = netloc.split('@', 1)
1485            if ':' in userpass:
1486                username, password = userpass.split(':', 1)
1487            else:
1488                username = userpass
1489
1490        if netloc and ':' in netloc:
1491            # IPv6 address literal.
1492            if ']' in netloc:
1493                colonpos, bracketpos = netloc.rfind(':'), netloc.rfind(']')
1494                if colonpos > bracketpos and colonpos != bracketpos + 1:
1495                    raise ValueError("Invalid netloc '%s'." % netloc)
1496                elif colonpos > bracketpos and colonpos == bracketpos + 1:
1497                    host, port = netloc.rsplit(':', 1)
1498                else:
1499                    host = netloc
1500            else:
1501                host, port = netloc.rsplit(':', 1)
1502                host = host
1503        else:
1504            host = netloc
1505
1506        # Avoid side effects by assigning self.port before self.host so
1507        # that if an exception is raised when assigning self.port,
1508        # self.host isn't updated.
1509        self.port = port  # Raises ValueError on invalid port.
1510        self.host = host
1511        self.username = None if username is None else unquote(username)
1512        self.password = None if password is None else unquote(password)
1513
1514    @property
1515    def origin(self):
1516        port = ''
1517        scheme = self.scheme or ''
1518        host = idna_encode(self.host) or ''
1519        if self.port and self.port != DEFAULT_PORTS.get(self.scheme):
1520            port = ':%s' % self.port
1521        origin = '%s://%s%s' % (scheme, host, port)
1522
1523        return origin
1524
1525    @origin.setter
1526    def origin(self, origin):
1527        toks = origin.split('://', 1)
1528        if len(toks) == 1:
1529            host_port = origin
1530        else:
1531            self.scheme, host_port = toks
1532
1533        if ':' in host_port:
1534            self.host, self.port = host_port.split(':', 1)
1535        else:
1536            self.host = host_port
1537
1538    @property
1539    def url(self):
1540        return self.tostr()
1541
1542    @url.setter
1543    def url(self, url):
1544        return self.load(url)
1545
1546    def add(self, args=_absent, path=_absent, fragment_path=_absent,
1547            fragment_args=_absent, query_params=_absent):
1548        """
1549        Add components to a URL and return this furl instance, <self>.
1550
1551        If both <args> and <query_params> are provided, a UserWarning is
1552        raised because <args> is provided as a shortcut for
1553        <query_params>, not to be used simultaneously with
1554        <query_params>. Nonetheless, providing both <args> and
1555        <query_params> behaves as expected, with query keys and values
1556        from both <args> and <query_params> added to the query - <args>
1557        first, then <query_params>.
1558
1559        Parameters:
1560          args: Shortcut for <query_params>.
1561          path: A list of path segments to add to the existing path
1562            segments, or a path string to join with the existing path
1563            string.
1564          query_params: A dictionary of query keys and values or list of
1565            key:value items to add to the query.
1566          fragment_path: A list of path segments to add to the existing
1567            fragment path segments, or a path string to join with the
1568            existing fragment path string.
1569          fragment_args: A dictionary of query keys and values or list
1570            of key:value items to add to the fragment's query.
1571
1572        Returns: <self>.
1573
1574        Raises: UserWarning if redundant and possibly conflicting <args> and
1575        <query_params> were provided.
1576        """
1577        if args is not _absent and query_params is not _absent:
1578            s = ('Both <args> and <query_params> provided to furl.add(). '
1579                 '<args> is a shortcut for <query_params>, not to be used '
1580                 'with <query_params>. See furl.add() documentation for more '
1581                 'details.')
1582            warnings.warn(s, UserWarning)
1583
1584        if path is not _absent:
1585            self.path.add(path)
1586        if args is not _absent:
1587            self.query.add(args)
1588        if query_params is not _absent:
1589            self.query.add(query_params)
1590        if fragment_path is not _absent or fragment_args is not _absent:
1591            self.fragment.add(path=fragment_path, args=fragment_args)
1592
1593        return self
1594
1595    def set(self, args=_absent, path=_absent, fragment=_absent, scheme=_absent,
1596            netloc=_absent, origin=_absent, fragment_path=_absent,
1597            fragment_args=_absent, fragment_separator=_absent, host=_absent,
1598            port=_absent, query=_absent, query_params=_absent,
1599            username=_absent, password=_absent):
1600        """
1601        Set components of a url and return this furl instance, <self>.
1602
1603        If any overlapping, and hence possibly conflicting, parameters
1604        are provided, appropriate UserWarning's will be raised. The
1605        groups of parameters that could potentially overlap are
1606
1607          <scheme> and <origin>
1608          <origin>, <netloc>, and/or (<host> or <port>)
1609          <fragment> and (<fragment_path> and/or <fragment_args>)
1610          any two or all of <query>, <args>, and/or <query_params>
1611
1612        In all of the above groups, the latter parameter(s) take
1613        precedence over the earlier parameter(s). So, for example
1614
1615          furl('http://google.com/').set(
1616            netloc='yahoo.com:99', host='bing.com', port=40)
1617
1618        will result in a UserWarning being raised and the url becoming
1619
1620          'http://bing.com:40/'
1621
1622        not
1623
1624          'http://yahoo.com:99/
1625
1626        Parameters:
1627          args: Shortcut for <query_params>.
1628          path: A list of path segments or a path string to adopt.
1629          fragment: Fragment string to adopt.
1630          scheme: Scheme string to adopt.
1631          netloc: Network location string to adopt.
1632          origin: Scheme and netloc.
1633          query: Query string to adopt.
1634          query_params: A dictionary of query keys and values or list of
1635            key:value items to adopt.
1636          fragment_path: A list of path segments to adopt for the
1637            fragment's path or a path string to adopt as the fragment's
1638            path.
1639          fragment_args: A dictionary of query keys and values or list
1640            of key:value items for the fragment's query to adopt.
1641          fragment_separator: Boolean whether or not there should be a
1642            '?' separator between the fragment path and fragment query.
1643          host: Host string to adopt.
1644          port: Port number to adopt.
1645          username: Username string to adopt.
1646          password: Password string to adopt.
1647        Raises:
1648          ValueError on invalid port.
1649          UserWarning if <scheme> and <origin> are provided.
1650          UserWarning if <origin>, <netloc> and/or (<host> and/or <port>) are
1651            provided.
1652          UserWarning if <query>, <args>, and/or <query_params> are provided.
1653          UserWarning if <fragment> and (<fragment_path>,
1654            <fragment_args>, and/or <fragment_separator>) are provided.
1655        Returns: <self>.
1656        """
1657        def present(v):
1658            return v is not _absent
1659
1660        if present(scheme) and present(origin):
1661            s = ('Possible parameter overlap: <scheme> and <origin>. See '
1662                 'furl.set() documentation for more details.')
1663            warnings.warn(s, UserWarning)
1664        provided = [
1665            present(netloc), present(origin), present(host) or present(port)]
1666        if sum(provided) >= 2:
1667            s = ('Possible parameter overlap: <origin>, <netloc> and/or '
1668                 '(<host> and/or <port>) provided. See furl.set() '
1669                 'documentation for more details.')
1670            warnings.warn(s, UserWarning)
1671        if sum(present(p) for p in [args, query, query_params]) >= 2:
1672            s = ('Possible parameter overlap: <query>, <args>, and/or '
1673                 '<query_params> provided. See furl.set() documentation for '
1674                 'more details.')
1675            warnings.warn(s, UserWarning)
1676        provided = [fragment_path, fragment_args, fragment_separator]
1677        if present(fragment) and any(present(p) for p in provided):
1678            s = ('Possible parameter overlap: <fragment> and '
1679                 '(<fragment_path>and/or <fragment_args>) or <fragment> '
1680                 'and <fragment_separator> provided. See furl.set() '
1681                 'documentation for more details.')
1682            warnings.warn(s, UserWarning)
1683
1684        # Guard against side effects on exception.
1685        original_url = self.url
1686        try:
1687            if username is not _absent:
1688                self.username = username
1689            if password is not _absent:
1690                self.password = password
1691            if netloc is not _absent:
1692                # Raises ValueError on invalid port or malformed IP.
1693                self.netloc = netloc
1694            if origin is not _absent:
1695                # Raises ValueError on invalid port or malformed IP.
1696                self.origin = origin
1697            if scheme is not _absent:
1698                self.scheme = scheme
1699            if host is not _absent:
1700                # Raises ValueError on invalid host or malformed IP.
1701                self.host = host
1702            if port is not _absent:
1703                self.port = port  # Raises ValueError on invalid port.
1704
1705            if path is not _absent:
1706                self.path.load(path)
1707            if query is not _absent:
1708                self.query.load(query)
1709            if args is not _absent:
1710                self.query.load(args)
1711            if query_params is not _absent:
1712                self.query.load(query_params)
1713            if fragment is not _absent:
1714                self.fragment.load(fragment)
1715            if fragment_path is not _absent:
1716                self.fragment.path.load(fragment_path)
1717            if fragment_args is not _absent:
1718                self.fragment.query.load(fragment_args)
1719            if fragment_separator is not _absent:
1720                self.fragment.separator = fragment_separator
1721        except Exception:
1722            self.load(original_url)
1723            raise
1724
1725        return self
1726
1727    def remove(self, args=_absent, path=_absent, fragment=_absent,
1728               query=_absent, query_params=_absent, port=False,
1729               fragment_path=_absent, fragment_args=_absent, username=False,
1730               password=False):
1731        """
1732        Remove components of this furl's URL and return this furl
1733        instance, <self>.
1734
1735        Parameters:
1736          args: Shortcut for query_params.
1737          path: A list of path segments to remove from the end of the
1738            existing path segments list, or a path string to remove from
1739            the end of the existing path string, or True to remove the
1740            path portion of the URL entirely.
1741          query: A list of query keys to remove from the query, if they
1742            exist, or True to remove the query portion of the URL
1743            entirely.
1744          query_params: A list of query keys to remove from the query,
1745            if they exist.
1746          port: If True, remove the port from the network location
1747            string, if it exists.
1748          fragment: If True, remove the fragment portion of the URL
1749            entirely.
1750          fragment_path: A list of path segments to remove from the end
1751            of the fragment's path segments or a path string to remove
1752            from the end of the fragment's path string.
1753          fragment_args: A list of query keys to remove from the
1754            fragment's query, if they exist.
1755          username: If True, remove the username, if it exists.
1756          password: If True, remove the password, if it exists.
1757        Returns: <self>.
1758        """
1759        if username is True:
1760            self.username = None
1761        if password is True:
1762            self.password = None
1763        if port is True:
1764            self.port = None
1765        if path is not _absent:
1766            self.path.remove(path)
1767
1768        if args is not _absent:
1769            self.query.remove(args)
1770        if query is not _absent:
1771            self.query.remove(query)
1772        if query_params is not _absent:
1773            self.query.remove(query_params)
1774
1775        if fragment is not _absent:
1776            self.fragment.remove(fragment)
1777        if fragment_path is not _absent:
1778            self.fragment.path.remove(fragment_path)
1779        if fragment_args is not _absent:
1780            self.fragment.query.remove(fragment_args)
1781
1782        return self
1783
1784    def tostr(self, query_delimiter='&', query_quote_plus=True,
1785              query_dont_quote=''):
1786        encoded_query = self.query.encode(
1787            query_delimiter, query_quote_plus, query_dont_quote)
1788        url = urllib.parse.urlunsplit((
1789            self.scheme or '',  # Must be text type in Python 3.
1790            self.netloc,
1791            str(self.path),
1792            encoded_query,
1793            str(self.fragment),
1794        ))
1795
1796        # Differentiate between '' and None values for scheme and netloc.
1797        if self.scheme == '':
1798            url = ':' + url
1799
1800        if self.netloc == '':
1801            if self.scheme is None:
1802                url = '//' + url
1803            elif strip_scheme(url) == '':
1804                url = url + '//'
1805
1806        return str(url)
1807
1808    def join(self, *urls):
1809        for url in urls:
1810            if not isinstance(url, six.string_types):
1811                url = str(url)
1812            newurl = urljoin(self.url, url)
1813            self.load(newurl)
1814        return self
1815
1816    def copy(self):
1817        return self.__class__(self)
1818
1819    def asdict(self):
1820        return {
1821            'url': self.url,
1822            'scheme': self.scheme,
1823            'username': self.username,
1824            'password': self.password,
1825            'host': self.host,
1826            'host_encoded': idna_encode(self.host),
1827            'port': self.port,
1828            'netloc': self.netloc,
1829            'origin': self.origin,
1830            'path': self.path.asdict(),
1831            'query': self.query.asdict(),
1832            'fragment': self.fragment.asdict(),
1833            }
1834
1835    def __truediv__(self, path):
1836        return self.copy().add(path=path)
1837
1838    def __eq__(self, other):
1839        try:
1840            return self.url == other.url
1841        except AttributeError:
1842            return None
1843
1844    def __ne__(self, other):
1845        return not self == other
1846
1847    def __setattr__(self, attr, value):
1848        if (not PathCompositionInterface.__setattr__(self, attr, value) and
1849           not QueryCompositionInterface.__setattr__(self, attr, value) and
1850           not FragmentCompositionInterface.__setattr__(self, attr, value)):
1851            object.__setattr__(self, attr, value)
1852
1853    def __unicode__(self):
1854        return self.tostr()
1855
1856    def __repr__(self):
1857        return "%s('%s')" % (self.__class__.__name__, str(self))
1858