1# -*- coding: utf-8 -*- 2 3# 4# furl - URL manipulation made simple. 5# 6# Ansgar Grunseid 7# grunseid.com 8# grunseid@gmail.com 9# 10# License: Build Amazing Things (Unlicense) 11# 12 13import re 14import abc 15import warnings 16from posixpath import normpath 17 18import six 19from six.moves import urllib 20from six.moves.urllib.parse import quote, unquote 21try: 22 from icecream import ic 23except ImportError: # Graceful fallback if IceCream isn't installed. 24 ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa 25 26from .omdict1D import omdict1D 27from .compat import string_types, UnicodeMixin 28from .common import ( 29 callable_attr, is_iterable_but_not_string, absent as _absent) 30 31 32# Map of common protocols, as suggested by the common protocols included in 33# urllib/parse.py, to their default ports. Protocol scheme strings are 34# lowercase. 35DEFAULT_PORTS = { 36 'ws': 80, 37 'ftp': 21, 38 'git': 9418, 39 'hdl': 2641, 40 'nfs': 111, 41 'sip': 5060, 42 'ssh': 22, 43 'svn': 3690, 44 'wss': 443, 45 'http': 80, 46 'imap': 143, 47 'nntp': 119, 48 'sftp': 22, 49 'sips': 5061, 50 'tftp': 69, 51 'rtsp': 554, 52 'wais': 210, 53 'https': 443, 54 'rsync': 873, 55 'rtspu': 5004, 56 'snews': 563, 57 'gopher': 70, 58 'telnet': 23, 59 'prospero': 191, 60} 61 62 63def lget(l, index, default=None): 64 try: 65 return l[index] 66 except IndexError: 67 return default 68 69 70def attemptstr(o): 71 try: 72 return str(o) 73 except Exception: 74 return o 75 76 77def utf8(o, default=_absent): 78 try: 79 return o.encode('utf8') 80 except Exception: 81 return o if default is _absent else default 82 83 84def non_string_iterable(o): 85 return callable_attr(o, '__iter__') and not isinstance(o, string_types) 86 87 88# TODO(grun): Support IDNA2008 via the third party idna module. See 89# https://github.com/gruns/furl/issues/73. 90def idna_encode(o): 91 if callable_attr(o, 'encode'): 92 return str(o.encode('idna').decode('utf8')) 93 return o 94 95 96def idna_decode(o): 97 if callable_attr(utf8(o), 'decode'): 98 return utf8(o).decode('idna') 99 return o 100 101 102def is_valid_port(port): 103 port = str(port) 104 if not port.isdigit() or not 0 < int(port) <= 65535: 105 return False 106 return True 107 108 109def static_vars(**kwargs): 110 def decorator(func): 111 for key, value in six.iteritems(kwargs): 112 setattr(func, key, value) 113 return func 114 return decorator 115 116 117def create_quote_fn(safe_charset, quote_plus): 118 def quote_fn(s, dont_quote): 119 if dont_quote is True: 120 safe = safe_charset 121 elif dont_quote is False: 122 safe = '' 123 else: # <dont_quote> is expected to be a string. 124 safe = dont_quote 125 126 # Prune duplicates and characters not in <safe_charset>. 127 safe = ''.join(set(safe) & set(safe_charset)) # E.g. '?^#?' -> '?'. 128 129 quoted = quote(s, safe) 130 if quote_plus: 131 quoted = quoted.replace('%20', '+') 132 133 return quoted 134 135 return quote_fn 136 137 138# 139# TODO(grun): Update some of the regex functions below to reflect the fact that 140# the valid encoding of Path segments differs slightly from the valid encoding 141# of Fragment Path segments. Similarly, the valid encodings of Query keys and 142# values differ slightly from the valid encodings of Fragment Query keys and 143# values. 144# 145# For example, '?' and '#' don't need to be encoded in Fragment Path segments 146# but they must be encoded in Path segments. Similarly, '#' doesn't need to be 147# encoded in Fragment Query keys and values, but must be encoded in Query keys 148# and values. 149# 150# Perhaps merge them with URLPath, FragmentPath, URLQuery, and 151# FragmentQuery when those new classes are created (see the TODO 152# currently at the top of the source, 02/03/2012). 153# 154 155# RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) 156# 157# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 158# 159# pct-encoded = "%" HEXDIG HEXDIG 160# 161# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 162# / "*" / "+" / "," / ";" / "=" 163# 164# pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 165# 166# === Path === 167# segment = *pchar 168# 169# === Query === 170# query = *( pchar / "/" / "?" ) 171# 172# === Scheme === 173# scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 174# 175PERCENT_REGEX = r'\%[a-fA-F\d][a-fA-F\d]' 176INVALID_HOST_CHARS = '!@#$%^&\'\"*()+=:;/' 177 178 179@static_vars(regex=re.compile( 180 r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;='), PERCENT_REGEX))) 181def is_valid_encoded_path_segment(segment): 182 return is_valid_encoded_path_segment.regex.match(segment) is not None 183 184 185@static_vars(regex=re.compile( 186 r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;/?'), PERCENT_REGEX))) 187def is_valid_encoded_query_key(key): 188 return is_valid_encoded_query_key.regex.match(key) is not None 189 190 191@static_vars(regex=re.compile( 192 r'^([\w%s]|(%s))*$' % (re.escape('-.~:@!$&\'()*+,;/?='), PERCENT_REGEX))) 193def is_valid_encoded_query_value(value): 194 return is_valid_encoded_query_value.regex.match(value) is not None 195 196 197@static_vars(regex=re.compile(r'[a-zA-Z][a-zA-Z\-\.\+]*')) 198def is_valid_scheme(scheme): 199 return is_valid_scheme.regex.match(scheme) is not None 200 201 202@static_vars(regex=re.compile('[%s]' % re.escape(INVALID_HOST_CHARS))) 203def is_valid_host(hostname): 204 toks = hostname.split('.') 205 if toks[-1] == '': # Trailing '.' in a fully qualified domain name. 206 toks.pop() 207 208 for tok in toks: 209 if is_valid_host.regex.search(tok) is not None: 210 return False 211 212 return '' not in toks # Adjacent periods aren't allowed. 213 214 215def get_scheme(url): 216 if url.startswith(':'): 217 return '' 218 219 # Avoid incorrect scheme extraction with url.find(':') when other URL 220 # components, like the path, query, fragment, etc, may have a colon in 221 # them. For example, the URL 'a?query:', whose query has a ':' in it. 222 no_fragment = url.split('#', 1)[0] 223 no_query = no_fragment.split('?', 1)[0] 224 no_path_or_netloc = no_query.split('/', 1)[0] 225 scheme = url[:max(0, no_path_or_netloc.find(':'))] or None 226 227 if scheme is not None and not is_valid_scheme(scheme): 228 return None 229 230 return scheme 231 232 233def strip_scheme(url): 234 scheme = get_scheme(url) or '' 235 url = url[len(scheme):] 236 if url.startswith(':'): 237 url = url[1:] 238 return url 239 240 241def set_scheme(url, scheme): 242 after_scheme = strip_scheme(url) 243 if scheme is None: 244 return after_scheme 245 else: 246 return '%s:%s' % (scheme, after_scheme) 247 248 249# 'netloc' in Python parlance, 'authority' in RFC 3986 parlance. 250def has_netloc(url): 251 scheme = get_scheme(url) 252 return url.startswith('//' if scheme is None else scheme + '://') 253 254 255def urlsplit(url): 256 """ 257 Parameters: 258 url: URL string to split. 259 Returns: urlparse.SplitResult tuple subclass, just like 260 urlparse.urlsplit() returns, with fields (scheme, netloc, path, 261 query, fragment, username, password, hostname, port). See 262 http://docs.python.org/library/urlparse.html#urlparse.urlsplit 263 for more details on urlsplit(). 264 """ 265 original_scheme = get_scheme(url) 266 267 # urlsplit() parses URLs differently depending on whether or not the URL's 268 # scheme is in any of 269 # 270 # urllib.parse.uses_fragment 271 # urllib.parse.uses_netloc 272 # urllib.parse.uses_params 273 # urllib.parse.uses_query 274 # urllib.parse.uses_relative 275 # 276 # For consistent URL parsing, switch the URL's scheme to 'http', a scheme 277 # in all of the aforementioned uses_* lists, and afterwards revert to the 278 # original scheme (which may or may not be in some, or all, of the the 279 # uses_* lists). 280 if original_scheme is not None: 281 url = set_scheme(url, 'http') 282 283 scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url) 284 285 # Detect and preserve the '//' before the netloc, if present. E.g. preserve 286 # URLs like 'http:', 'http://', and '///sup' correctly. 287 after_scheme = strip_scheme(url) 288 if after_scheme.startswith('//'): 289 netloc = netloc or '' 290 else: 291 netloc = None 292 293 scheme = original_scheme 294 295 return urllib.parse.SplitResult(scheme, netloc, path, query, fragment) 296 297 298def urljoin(base, url): 299 """ 300 Parameters: 301 base: Base URL to join with <url>. 302 url: Relative or absolute URL to join with <base>. 303 304 Returns: The resultant URL from joining <base> and <url>. 305 """ 306 base_scheme = get_scheme(base) if has_netloc(base) else None 307 url_scheme = get_scheme(url) if has_netloc(url) else None 308 309 if base_scheme is not None: 310 # For consistent URL joining, switch the base URL's scheme to 311 # 'http'. urllib.parse.urljoin() behaves differently depending on the 312 # scheme. E.g. 313 # 314 # >>> urllib.parse.urljoin('http://google.com/', 'hi') 315 # 'http://google.com/hi' 316 # 317 # vs 318 # 319 # >>> urllib.parse.urljoin('asdf://google.com/', 'hi') 320 # 'hi' 321 root = set_scheme(base, 'http') 322 else: 323 root = base 324 325 joined = urllib.parse.urljoin(root, url) 326 327 new_scheme = url_scheme if url_scheme is not None else base_scheme 328 if new_scheme is not None and has_netloc(joined): 329 joined = set_scheme(joined, new_scheme) 330 331 return joined 332 333 334def join_path_segments(*args): 335 """ 336 Join multiple lists of path segments together, intelligently 337 handling path segments borders to preserve intended slashes of the 338 final constructed path. 339 340 This function is not encoding aware. It doesn't test for, or change, 341 the encoding of path segments it is passed. 342 343 Examples: 344 join_path_segments(['a'], ['b']) == ['a','b'] 345 join_path_segments(['a',''], ['b']) == ['a','b'] 346 join_path_segments(['a'], ['','b']) == ['a','b'] 347 join_path_segments(['a',''], ['','b']) == ['a','','b'] 348 join_path_segments(['a','b'], ['c','d']) == ['a','b','c','d'] 349 350 Returns: A list containing the joined path segments. 351 """ 352 finals = [] 353 354 for segments in args: 355 if not segments or segments == ['']: 356 continue 357 elif not finals: 358 finals.extend(segments) 359 else: 360 # Example #1: ['a',''] + ['b'] == ['a','b'] 361 # Example #2: ['a',''] + ['','b'] == ['a','','b'] 362 if finals[-1] == '' and (segments[0] != '' or len(segments) > 1): 363 finals.pop(-1) 364 # Example: ['a'] + ['','b'] == ['a','b'] 365 elif finals[-1] != '' and segments[0] == '' and len(segments) > 1: 366 segments = segments[1:] 367 finals.extend(segments) 368 369 return finals 370 371 372def remove_path_segments(segments, remove): 373 """ 374 Removes the path segments of <remove> from the end of the path 375 segments <segments>. 376 377 Examples: 378 # ('/a/b/c', 'b/c') -> '/a/' 379 remove_path_segments(['','a','b','c'], ['b','c']) == ['','a',''] 380 # ('/a/b/c', '/b/c') -> '/a' 381 remove_path_segments(['','a','b','c'], ['','b','c']) == ['','a'] 382 383 Returns: The list of all remaining path segments after the segments 384 in <remove> have been removed from the end of <segments>. If no 385 segments from <remove> were removed from <segments>, <segments> is 386 returned unmodified. 387 """ 388 # [''] means a '/', which is properly represented by ['', '']. 389 if segments == ['']: 390 segments.append('') 391 if remove == ['']: 392 remove.append('') 393 394 ret = None 395 if remove == segments: 396 ret = [] 397 elif len(remove) > len(segments): 398 ret = segments 399 else: 400 toremove = list(remove) 401 402 if len(remove) > 1 and remove[0] == '': 403 toremove.pop(0) 404 405 if toremove and toremove == segments[-1 * len(toremove):]: 406 ret = segments[:len(segments) - len(toremove)] 407 if remove[0] != '' and ret: 408 ret.append('') 409 else: 410 ret = segments 411 412 return ret 413 414 415def quacks_like_a_path_with_segments(obj): 416 return ( 417 hasattr(obj, 'segments') and 418 is_iterable_but_not_string(obj.segments)) 419 420 421class Path(object): 422 423 """ 424 Represents a path comprised of zero or more path segments. 425 426 http://tools.ietf.org/html/rfc3986#section-3.3 427 428 Path parameters aren't supported. 429 430 Attributes: 431 _force_absolute: Function whos boolean return value specifies 432 whether self.isabsolute should be forced to True or not. If 433 _force_absolute(self) returns True, isabsolute is read only and 434 raises an AttributeError if assigned to. If 435 _force_absolute(self) returns False, isabsolute is mutable and 436 can be set to True or False. URL paths use _force_absolute and 437 return True if the netloc is non-empty (not equal to 438 ''). Fragment paths are never read-only and their 439 _force_absolute(self) always returns False. 440 segments: List of zero or more path segments comprising this 441 path. If the path string has a trailing '/', the last segment 442 will be '' and self.isdir will be True and self.isfile will be 443 False. An empty segment list represents an empty path, not '/' 444 (though they have the same meaning). 445 isabsolute: Boolean whether or not this is an absolute path or 446 not. An absolute path starts with a '/'. self.isabsolute is 447 False if the path is empty (self.segments == [] and str(path) == 448 ''). 449 strict: Boolean whether or not UserWarnings should be raised if 450 improperly encoded path strings are provided to methods that 451 take such strings, like load(), add(), set(), remove(), etc. 452 """ 453 454 # From RFC 3986: 455 # segment = *pchar 456 # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 457 # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 458 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 459 # / "*" / "+" / "," / ";" / "=" 460 SAFE_SEGMENT_CHARS = ":@-._~!$&'()*+,;=" 461 462 def __init__(self, path='', force_absolute=lambda _: False, strict=False): 463 self.segments = [] 464 465 self.strict = strict 466 self._isabsolute = False 467 self._force_absolute = force_absolute 468 469 self.load(path) 470 471 def load(self, path): 472 """ 473 Load <path>, replacing any existing path. <path> can either be 474 a Path instance, a list of segments, a path string to adopt. 475 476 Returns: <self>. 477 """ 478 if not path: 479 segments = [] 480 elif quacks_like_a_path_with_segments(path): # Path interface. 481 segments = path.segments 482 elif is_iterable_but_not_string(path): # List interface. 483 segments = path 484 else: # String interface. 485 segments = self._segments_from_path(path) 486 487 if self._force_absolute(self): 488 self._isabsolute = True if segments else False 489 else: 490 self._isabsolute = (segments and segments[0] == '') 491 492 if self.isabsolute and len(segments) > 1 and segments[0] == '': 493 segments.pop(0) 494 495 self.segments = segments 496 497 return self 498 499 def add(self, path): 500 """ 501 Add <path> to the existing path. <path> can either be a Path instance, 502 a list of segments, or a path string to append to the existing path. 503 504 Returns: <self>. 505 """ 506 if quacks_like_a_path_with_segments(path): # Path interface. 507 newsegments = path.segments 508 elif is_iterable_but_not_string(path): # List interface. 509 newsegments = path 510 else: # String interface. 511 newsegments = self._segments_from_path(path) 512 513 # Preserve the opening '/' if one exists already (self.segments 514 # == ['']). 515 if self.segments == [''] and newsegments and newsegments[0] != '': 516 newsegments.insert(0, '') 517 518 segments = self.segments 519 if self.isabsolute and self.segments and self.segments[0] != '': 520 segments.insert(0, '') 521 522 self.load(join_path_segments(segments, newsegments)) 523 524 return self 525 526 def set(self, path): 527 self.load(path) 528 return self 529 530 def remove(self, path): 531 if path is True: 532 self.load('') 533 else: 534 if is_iterable_but_not_string(path): # List interface. 535 segments = path 536 else: # String interface. 537 segments = self._segments_from_path(path) 538 base = ([''] if self.isabsolute else []) + self.segments 539 self.load(remove_path_segments(base, segments)) 540 541 return self 542 543 def normalize(self): 544 """ 545 Normalize the path. Turn '//a/./b/../c//' into '/a/c/'. 546 547 Returns: <self>. 548 """ 549 if str(self): 550 normalized = normpath(str(self)) + ('/' * self.isdir) 551 if normalized.startswith('//'): # http://bugs.python.org/636648 552 normalized = '/' + normalized.lstrip('/') 553 self.load(normalized) 554 555 return self 556 557 def asdict(self): 558 return { 559 'encoded': str(self), 560 'isdir': self.isdir, 561 'isfile': self.isfile, 562 'segments': self.segments, 563 'isabsolute': self.isabsolute, 564 } 565 566 @property 567 def isabsolute(self): 568 if self._force_absolute(self): 569 return True 570 return self._isabsolute 571 572 @isabsolute.setter 573 def isabsolute(self, isabsolute): 574 """ 575 Raises: AttributeError if _force_absolute(self) returns True. 576 """ 577 if self._force_absolute(self): 578 s = ('Path.isabsolute is True and read-only for URLs with a netloc' 579 ' (a username, password, host, and/or port). A URL path must ' 580 "start with a '/' to separate itself from a netloc.") 581 raise AttributeError(s) 582 self._isabsolute = isabsolute 583 584 @property 585 def isdir(self): 586 """ 587 Returns: True if the path ends on a directory, False 588 otherwise. If True, the last segment is '', representing the 589 trailing '/' of the path. 590 """ 591 return (self.segments == [] or 592 (self.segments and self.segments[-1] == '')) 593 594 @property 595 def isfile(self): 596 """ 597 Returns: True if the path ends on a file, False otherwise. If 598 True, the last segment is not '', representing some file as the 599 last segment of the path. 600 """ 601 return not self.isdir 602 603 def __truediv__(self, path): 604 copy = self.__class__( 605 path=self.segments, 606 force_absolute=self._force_absolute, 607 strict=self.strict) 608 return copy.add(path) 609 610 def __eq__(self, other): 611 return str(self) == str(other) 612 613 def __ne__(self, other): 614 return not self == other 615 616 def __bool__(self): 617 return len(self.segments) > 0 618 __nonzero__ = __bool__ 619 620 def __str__(self): 621 segments = list(self.segments) 622 if self.isabsolute: 623 if not segments: 624 segments = ['', ''] 625 else: 626 segments.insert(0, '') 627 return self._path_from_segments(segments) 628 629 def __repr__(self): 630 return "%s('%s')" % (self.__class__.__name__, str(self)) 631 632 def _segments_from_path(self, path): 633 """ 634 Returns: The list of path segments from the path string <path>. 635 636 Raises: UserWarning if <path> is an improperly encoded path 637 string and self.strict is True. 638 639 TODO(grun): Accept both list values and string values and 640 refactor the list vs string interface testing to this common 641 method. 642 """ 643 segments = [] 644 for segment in path.split('/'): 645 if not is_valid_encoded_path_segment(segment): 646 segment = quote(utf8(segment)) 647 if self.strict: 648 s = ("Improperly encoded path string received: '%s'. " 649 "Proceeding, but did you mean '%s'?" % 650 (path, self._path_from_segments(segments))) 651 warnings.warn(s, UserWarning) 652 segments.append(utf8(segment)) 653 del segment 654 655 # In Python 3, utf8() returns Bytes objects that must be decoded into 656 # strings before they can be passed to unquote(). In Python 2, utf8() 657 # returns strings that can be passed directly to urllib.unquote(). 658 segments = [ 659 segment.decode('utf8') 660 if isinstance(segment, bytes) and not isinstance(segment, str) 661 else segment for segment in segments] 662 663 return [unquote(segment) for segment in segments] 664 665 def _path_from_segments(self, segments): 666 """ 667 Combine the provided path segments <segments> into a path string. Path 668 segments in <segments> will be quoted. 669 670 Returns: A path string with quoted path segments. 671 """ 672 segments = [ 673 quote(utf8(attemptstr(segment)), self.SAFE_SEGMENT_CHARS) 674 for segment in segments] 675 return '/'.join(segments) 676 677 678@six.add_metaclass(abc.ABCMeta) 679class PathCompositionInterface(object): 680 681 """ 682 Abstract class interface for a parent class that contains a Path. 683 """ 684 685 def __init__(self, strict=False): 686 """ 687 Params: 688 force_absolute: See Path._force_absolute. 689 690 Assignments to <self> in __init__() must be added to 691 __setattr__() below. 692 """ 693 self._path = Path(force_absolute=self._force_absolute, strict=strict) 694 695 @property 696 def path(self): 697 return self._path 698 699 @property 700 def pathstr(self): 701 """This method is deprecated. Use str(furl.path) instead.""" 702 s = ('furl.pathstr is deprecated. Use str(furl.path) instead. There ' 703 'should be one, and preferably only one, obvious way to serialize' 704 ' a Path object to a string.') 705 warnings.warn(s, DeprecationWarning) 706 return str(self._path) 707 708 @abc.abstractmethod 709 def _force_absolute(self, path): 710 """ 711 Subclass me. 712 """ 713 pass 714 715 def __setattr__(self, attr, value): 716 """ 717 Returns: True if this attribute is handled and set here, False 718 otherwise. 719 """ 720 if attr == '_path': 721 self.__dict__[attr] = value 722 return True 723 elif attr == 'path': 724 self._path.load(value) 725 return True 726 return False 727 728 729@six.add_metaclass(abc.ABCMeta) 730class URLPathCompositionInterface(PathCompositionInterface): 731 732 """ 733 Abstract class interface for a parent class that contains a URL 734 Path. 735 736 A URL path's isabsolute attribute is absolute and read-only if a 737 netloc is defined. A path cannot start without '/' if there's a 738 netloc. For example, the URL 'http://google.coma/path' makes no 739 sense. It should be 'http://google.com/a/path'. 740 741 A URL path's isabsolute attribute is mutable if there's no 742 netloc. The scheme doesn't matter. For example, the isabsolute 743 attribute of the URL path in 'mailto:user@host.com', with scheme 744 'mailto' and path 'user@host.com', is mutable because there is no 745 netloc. See 746 747 http://en.wikipedia.org/wiki/URI_scheme#Examples 748 """ 749 750 def __init__(self, strict=False): 751 PathCompositionInterface.__init__(self, strict=strict) 752 753 def _force_absolute(self, path): 754 return bool(path) and self.netloc 755 756 757@six.add_metaclass(abc.ABCMeta) 758class FragmentPathCompositionInterface(PathCompositionInterface): 759 760 """ 761 Abstract class interface for a parent class that contains a Fragment 762 Path. 763 764 Fragment Paths they be set to absolute (self.isabsolute = True) or 765 not absolute (self.isabsolute = False). 766 """ 767 768 def __init__(self, strict=False): 769 PathCompositionInterface.__init__(self, strict=strict) 770 771 def _force_absolute(self, path): 772 return False 773 774 775class Query(object): 776 777 """ 778 Represents a URL query comprised of zero or more unique parameters 779 and their respective values. 780 781 http://tools.ietf.org/html/rfc3986#section-3.4 782 783 784 All interaction with Query.params is done with unquoted strings. So 785 786 f.query.params['a'] = 'a%5E' 787 788 means the intended value for 'a' is 'a%5E', not 'a^'. 789 790 791 Query.params is implemented as an omdict1D object - a one 792 dimensional ordered multivalue dictionary. This provides support for 793 repeated URL parameters, like 'a=1&a=2'. omdict1D is a subclass of 794 omdict, an ordered multivalue dictionary. Documentation for omdict 795 can be found here 796 797 https://github.com/gruns/orderedmultidict 798 799 The one dimensional aspect of omdict1D means that a list of values 800 is interpreted as multiple values, not a single value which is 801 itself a list of values. This is a reasonable distinction to make 802 because URL query parameters are one dimensional: query parameter 803 values cannot themselves be composed of sub-values. 804 805 So what does this mean? This means we can safely interpret 806 807 f = furl('http://www.google.com') 808 f.query.params['arg'] = ['one', 'two', 'three'] 809 810 as three different values for 'arg': 'one', 'two', and 'three', 811 instead of a single value which is itself some serialization of the 812 python list ['one', 'two', 'three']. Thus, the result of the above 813 will be 814 815 f.query.allitems() == [ 816 ('arg','one'), ('arg','two'), ('arg','three')] 817 818 and not 819 820 f.query.allitems() == [('arg', ['one', 'two', 'three'])] 821 822 The latter doesn't make sense because query parameter values cannot 823 be composed of sub-values. So finally 824 825 str(f.query) == 'arg=one&arg=two&arg=three' 826 827 828 Additionally, while the set of allowed characters in URL queries is 829 defined in RFC 3986 section 3.4, the format for encoding key=value 830 pairs within the query is not. In turn, the parsing of encoded 831 key=value query pairs differs between implementations. 832 833 As a compromise to support equal signs in both key=value pair 834 encoded queries, like 835 836 https://www.google.com?a=1&b=2 837 838 and non-key=value pair encoded queries, like 839 840 https://www.google.com?===3=== 841 842 equal signs are percent encoded in key=value pairs where the key is 843 non-empty, e.g. 844 845 https://www.google.com?equal-sign=%3D 846 847 but not encoded in key=value pairs where the key is empty, e.g. 848 849 https://www.google.com?===equal=sign=== 850 851 This presents a reasonable compromise to accurately reproduce 852 non-key=value queries with equal signs while also still percent 853 encoding equal signs in key=value pair encoded queries, as 854 expected. See 855 856 https://github.com/gruns/furl/issues/99 857 858 for more details. 859 860 Attributes: 861 params: Ordered multivalue dictionary of query parameter key:value 862 pairs. Parameters in self.params are maintained URL decoded, 863 e.g. 'a b' not 'a+b'. 864 strict: Boolean whether or not UserWarnings should be raised if 865 improperly encoded query strings are provided to methods that 866 take such strings, like load(), add(), set(), remove(), etc. 867 """ 868 869 # From RFC 3986: 870 # query = *( pchar / "/" / "?" ) 871 # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 872 # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 873 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 874 # / "*" / "+" / "," / ";" / "=" 875 SAFE_KEY_CHARS = "/?:@-._~!$'()*+,;" 876 SAFE_VALUE_CHARS = SAFE_KEY_CHARS + '=' 877 878 def __init__(self, query='', strict=False): 879 self.strict = strict 880 881 self._params = omdict1D() 882 883 self.load(query) 884 885 def load(self, query): 886 items = self._items(query) 887 self.params.load(items) 888 return self 889 890 def add(self, args): 891 for param, value in self._items(args): 892 self.params.add(param, value) 893 return self 894 895 def set(self, mapping): 896 """ 897 Adopt all mappings in <mapping>, replacing any existing mappings 898 with the same key. If a key has multiple values in <mapping>, 899 they are all adopted. 900 901 Examples: 902 Query({1:1}).set([(1,None),(2,2)]).params.allitems() 903 == [(1,None),(2,2)] 904 Query({1:None,2:None}).set([(1,1),(2,2),(1,11)]).params.allitems() 905 == [(1,1),(2,2),(1,11)] 906 Query({1:None}).set([(1,[1,11,111])]).params.allitems() 907 == [(1,1),(1,11),(1,111)] 908 909 Returns: <self>. 910 """ 911 self.params.updateall(mapping) 912 return self 913 914 def remove(self, query): 915 if query is True: 916 self.load('') 917 return self 918 919 # Single key to remove. 920 items = [query] 921 # Dictionary or multivalue dictionary of items to remove. 922 if callable_attr(query, 'items'): 923 items = self._items(query) 924 # List of keys or items to remove. 925 elif non_string_iterable(query): 926 items = query 927 928 for item in items: 929 if non_string_iterable(item) and len(item) == 2: 930 key, value = item 931 self.params.popvalue(key, value, None) 932 else: 933 key = item 934 self.params.pop(key, None) 935 936 return self 937 938 @property 939 def params(self): 940 return self._params 941 942 @params.setter 943 def params(self, params): 944 items = self._items(params) 945 946 self._params.clear() 947 for key, value in items: 948 self._params.add(key, value) 949 950 def encode(self, delimiter='&', quote_plus=True, dont_quote='', 951 delimeter=_absent): 952 """ 953 Examples: 954 955 Query('a=a&b=#').encode() == 'a=a&b=%23' 956 Query('a=a&b=#').encode(';') == 'a=a;b=%23' 957 Query('a+b=c@d').encode(dont_quote='@') == 'a+b=c@d' 958 Query('a+b=c@d').encode(quote_plus=False) == 'a%20b=c%40d' 959 960 Until furl v0.4.6, the 'delimiter' argument was incorrectly 961 spelled 'delimeter'. For backwards compatibility, accept both 962 the correct 'delimiter' and the old, mispelled 'delimeter'. 963 964 Keys and values are encoded application/x-www-form-urlencoded if 965 <quote_plus> is True, percent-encoded otherwise. 966 967 <dont_quote> exempts valid query characters from being 968 percent-encoded, either in their entirety with dont_quote=True, 969 or selectively with dont_quote=<string>, like 970 dont_quote='/?@_'. Invalid query characters -- those not in 971 self.SAFE_KEY_CHARS, like '#' and '^' -- are always encoded, 972 even if included in <dont_quote>. For example: 973 974 Query('#=^').encode(dont_quote='#^') == '%23=%5E'. 975 976 977 Returns: A URL encoded query string using <delimiter> as the 978 delimiter separating key:value pairs. The most common and 979 default delimiter is '&', but ';' can also be specified. ';' is 980 W3C recommended. 981 """ 982 if delimeter is not _absent: 983 delimiter = delimeter 984 985 quote_key = create_quote_fn(self.SAFE_KEY_CHARS, quote_plus) 986 quote_value = create_quote_fn(self.SAFE_VALUE_CHARS, quote_plus) 987 988 pairs = [] 989 for key, value in self.params.iterallitems(): 990 utf8key = utf8(key, utf8(attemptstr(key))) 991 quoted_key = quote_key(utf8key, dont_quote) 992 993 if value is None: # Example: http://sprop.su/?key. 994 pair = quoted_key 995 else: # Example: http://sprop.su/?key=value. 996 utf8value = utf8(value, utf8(attemptstr(value))) 997 quoted_value = quote_value(utf8value, dont_quote) 998 999 if not quoted_key: # Unquote '=' to allow queries like '?==='. 1000 quoted_value = quoted_value.replace('%3D', '=') 1001 1002 pair = '%s=%s' % (quoted_key, quoted_value) 1003 1004 pairs.append(pair) 1005 1006 query = delimiter.join(pairs) 1007 1008 return query 1009 1010 def asdict(self): 1011 return { 1012 'encoded': str(self), 1013 'params': self.params.allitems(), 1014 } 1015 1016 def __eq__(self, other): 1017 return str(self) == str(other) 1018 1019 def __ne__(self, other): 1020 return not self == other 1021 1022 def __bool__(self): 1023 return len(self.params) > 0 1024 __nonzero__ = __bool__ 1025 1026 def __str__(self): 1027 return self.encode() 1028 1029 def __repr__(self): 1030 return "%s('%s')" % (self.__class__.__name__, str(self)) 1031 1032 def _items(self, items): 1033 """ 1034 Extract and return the key:value items from various 1035 containers. Some containers that could hold key:value items are 1036 1037 - List of (key,value) tuples. 1038 - Dictionaries of key:value items. 1039 - Multivalue dictionary of key:value items, with potentially 1040 repeated keys. 1041 - Query string with encoded params and values. 1042 1043 Keys and values are passed through unmodified unless they were 1044 passed in within an encoded query string, like 1045 'a=a%20a&b=b'. Keys and values passed in within an encoded query 1046 string are unquoted by urlparse.parse_qsl(), which uses 1047 urllib.unquote_plus() internally. 1048 1049 Returns: List of items as (key, value) tuples. Keys and values 1050 are passed through unmodified unless they were passed in as part 1051 of an encoded query string, in which case the final keys and 1052 values that are returned will be unquoted. 1053 1054 Raises: UserWarning if <path> is an improperly encoded path 1055 string and self.strict is True. 1056 """ 1057 if not items: 1058 items = [] 1059 # Multivalue Dictionary-like interface. e.g. {'a':1, 'a':2, 1060 # 'b':2} 1061 elif callable_attr(items, 'allitems'): 1062 items = list(items.allitems()) 1063 elif callable_attr(items, 'iterallitems'): 1064 items = list(items.iterallitems()) 1065 # Dictionary-like interface. e.g. {'a':1, 'b':2, 'c':3} 1066 elif callable_attr(items, 'items'): 1067 items = list(items.items()) 1068 elif callable_attr(items, 'iteritems'): 1069 items = list(items.iteritems()) 1070 # Encoded query string. e.g. 'a=1&b=2&c=3' 1071 elif isinstance(items, six.string_types): 1072 items = self._extract_items_from_querystr(items) 1073 # Default to list of key:value items interface. e.g. [('a','1'), 1074 # ('b','2')] 1075 else: 1076 items = list(items) 1077 1078 return items 1079 1080 def _extract_items_from_querystr(self, querystr): 1081 items = [] 1082 1083 pairstrs = [s2 for s1 in querystr.split('&') for s2 in s1.split(';')] 1084 pairs = [item.split('=', 1) for item in pairstrs] 1085 pairs = [(p[0], lget(p, 1, '')) for p in pairs] # Pad with value ''. 1086 1087 for pairstr, (key, value) in six.moves.zip(pairstrs, pairs): 1088 valid_key = is_valid_encoded_query_key(key) 1089 valid_value = is_valid_encoded_query_value(value) 1090 if self.strict and (not valid_key or not valid_value): 1091 msg = ( 1092 "Incorrectly percent encoded query string received: '%s'. " 1093 "Proceeding, but did you mean '%s'?" % 1094 (querystr, urllib.parse.urlencode(pairs))) 1095 warnings.warn(msg, UserWarning) 1096 1097 key_decoded = unquote(key.replace('+', ' ')) 1098 # Empty value without a '=', e.g. '?sup'. 1099 if key == pairstr: 1100 value_decoded = None 1101 else: 1102 value_decoded = unquote(value.replace('+', ' ')) 1103 1104 items.append((key_decoded, value_decoded)) 1105 1106 return items 1107 1108 1109@six.add_metaclass(abc.ABCMeta) 1110class QueryCompositionInterface(object): 1111 1112 """ 1113 Abstract class interface for a parent class that contains a Query. 1114 """ 1115 1116 def __init__(self, strict=False): 1117 self._query = Query(strict=strict) 1118 1119 @property 1120 def query(self): 1121 return self._query 1122 1123 @property 1124 def querystr(self): 1125 """This method is deprecated. Use str(furl.query) instead.""" 1126 s = ('furl.querystr is deprecated. Use str(furl.query) instead. There ' 1127 'should be one, and preferably only one, obvious way to serialize' 1128 ' a Query object to a string.') 1129 warnings.warn(s, DeprecationWarning) 1130 return str(self._query) 1131 1132 @property 1133 def args(self): 1134 """ 1135 Shortcut method to access the query parameters, self._query.params. 1136 """ 1137 return self._query.params 1138 1139 def __setattr__(self, attr, value): 1140 """ 1141 Returns: True if this attribute is handled and set here, False 1142 otherwise. 1143 """ 1144 if attr == 'args' or attr == 'query': 1145 self._query.load(value) 1146 return True 1147 return False 1148 1149 1150class Fragment(FragmentPathCompositionInterface, QueryCompositionInterface): 1151 1152 """ 1153 Represents a URL fragment, comprised internally of a Path and Query 1154 optionally separated by a '?' character. 1155 1156 http://tools.ietf.org/html/rfc3986#section-3.5 1157 1158 Attributes: 1159 path: Path object from FragmentPathCompositionInterface. 1160 query: Query object from QueryCompositionInterface. 1161 separator: Boolean whether or not a '?' separator should be 1162 included in the string representation of this fragment. When 1163 False, a '?' character will not separate the fragment path from 1164 the fragment query in the fragment string. This is useful to 1165 build fragments like '#!arg1=val1&arg2=val2', where no 1166 separating '?' is desired. 1167 """ 1168 1169 def __init__(self, fragment='', strict=False): 1170 FragmentPathCompositionInterface.__init__(self, strict=strict) 1171 QueryCompositionInterface.__init__(self, strict=strict) 1172 self.strict = strict 1173 self.separator = True 1174 1175 self.load(fragment) 1176 1177 def load(self, fragment): 1178 self.path.load('') 1179 self.query.load('') 1180 1181 if fragment is None: 1182 fragment = '' 1183 1184 toks = fragment.split('?', 1) 1185 if len(toks) == 0: 1186 self._path.load('') 1187 self._query.load('') 1188 elif len(toks) == 1: 1189 # Does this fragment look like a path or a query? Default to 1190 # path. 1191 if '=' in fragment: # Query example: '#woofs=dogs'. 1192 self._query.load(fragment) 1193 else: # Path example: '#supinthisthread'. 1194 self._path.load(fragment) 1195 else: 1196 # Does toks[1] actually look like a query? Like 'a=a' or 1197 # 'a=' or '=a'? 1198 if '=' in toks[1]: 1199 self._path.load(toks[0]) 1200 self._query.load(toks[1]) 1201 # If toks[1] doesn't look like a query, the user probably 1202 # provided a fragment string like 'a?b?' that was intended 1203 # to be adopted as-is, not a two part fragment with path 'a' 1204 # and query 'b?'. 1205 else: 1206 self._path.load(fragment) 1207 1208 def add(self, path=_absent, args=_absent): 1209 if path is not _absent: 1210 self.path.add(path) 1211 if args is not _absent: 1212 self.query.add(args) 1213 1214 return self 1215 1216 def set(self, path=_absent, args=_absent, separator=_absent): 1217 if path is not _absent: 1218 self.path.load(path) 1219 if args is not _absent: 1220 self.query.load(args) 1221 if separator is True or separator is False: 1222 self.separator = separator 1223 1224 return self 1225 1226 def remove(self, fragment=_absent, path=_absent, args=_absent): 1227 if fragment is True: 1228 self.load('') 1229 if path is not _absent: 1230 self.path.remove(path) 1231 if args is not _absent: 1232 self.query.remove(args) 1233 1234 return self 1235 1236 def asdict(self): 1237 return { 1238 'encoded': str(self), 1239 'separator': self.separator, 1240 'path': self.path.asdict(), 1241 'query': self.query.asdict(), 1242 } 1243 1244 def __eq__(self, other): 1245 return str(self) == str(other) 1246 1247 def __ne__(self, other): 1248 return not self == other 1249 1250 def __setattr__(self, attr, value): 1251 if (not PathCompositionInterface.__setattr__(self, attr, value) and 1252 not QueryCompositionInterface.__setattr__(self, attr, value)): 1253 object.__setattr__(self, attr, value) 1254 1255 def __bool__(self): 1256 return bool(self.path) or bool(self.query) 1257 __nonzero__ = __bool__ 1258 1259 def __str__(self): 1260 path, query = str(self._path), str(self._query) 1261 1262 # If there is no query or self.separator is False, decode all 1263 # '?' characters in the path from their percent encoded form 1264 # '%3F' to '?'. This allows for fragment strings containg '?'s, 1265 # like '#dog?machine?yes'. 1266 if path and (not query or not self.separator): 1267 path = path.replace('%3F', '?') 1268 1269 separator = '?' if path and query and self.separator else '' 1270 1271 return path + separator + query 1272 1273 def __repr__(self): 1274 return "%s('%s')" % (self.__class__.__name__, str(self)) 1275 1276 1277@six.add_metaclass(abc.ABCMeta) 1278class FragmentCompositionInterface(object): 1279 1280 """ 1281 Abstract class interface for a parent class that contains a 1282 Fragment. 1283 """ 1284 1285 def __init__(self, strict=False): 1286 self._fragment = Fragment(strict=strict) 1287 1288 @property 1289 def fragment(self): 1290 return self._fragment 1291 1292 @property 1293 def fragmentstr(self): 1294 """This method is deprecated. Use str(furl.fragment) instead.""" 1295 s = ('furl.fragmentstr is deprecated. Use str(furl.fragment) instead. ' 1296 'There should be one, and preferably only one, obvious way to ' 1297 'serialize a Fragment object to a string.') 1298 warnings.warn(s, DeprecationWarning) 1299 return str(self._fragment) 1300 1301 def __setattr__(self, attr, value): 1302 """ 1303 Returns: True if this attribute is handled and set here, False 1304 otherwise. 1305 """ 1306 if attr == 'fragment': 1307 self.fragment.load(value) 1308 return True 1309 return False 1310 1311 1312class furl(URLPathCompositionInterface, QueryCompositionInterface, 1313 FragmentCompositionInterface, UnicodeMixin): 1314 1315 """ 1316 Object for simple parsing and manipulation of a URL and its 1317 components. 1318 1319 scheme://username:password@host:port/path?query#fragment 1320 1321 Attributes: 1322 strict: Boolean whether or not UserWarnings should be raised if 1323 improperly encoded path, query, or fragment strings are provided 1324 to methods that take such strings, like load(), add(), set(), 1325 remove(), etc. 1326 username: Username string for authentication. Initially None. 1327 password: Password string for authentication with 1328 <username>. Initially None. 1329 scheme: URL scheme. A string ('http', 'https', '', etc) or None. 1330 All lowercase. Initially None. 1331 host: URL host (hostname, IPv4 address, or IPv6 address), not 1332 including port. All lowercase. Initially None. 1333 port: Port. Valid port values are 1-65535, or None meaning no port 1334 specified. 1335 netloc: Network location. Combined host and port string. Initially 1336 None. 1337 path: Path object from URLPathCompositionInterface. 1338 query: Query object from QueryCompositionInterface. 1339 fragment: Fragment object from FragmentCompositionInterface. 1340 """ 1341 1342 def __init__(self, url='', args=_absent, path=_absent, fragment=_absent, 1343 scheme=_absent, netloc=_absent, origin=_absent, 1344 fragment_path=_absent, fragment_args=_absent, 1345 fragment_separator=_absent, host=_absent, port=_absent, 1346 query=_absent, query_params=_absent, username=_absent, 1347 password=_absent, strict=False): 1348 """ 1349 Raises: ValueError on invalid URL or invalid URL component(s) provided. 1350 """ 1351 URLPathCompositionInterface.__init__(self, strict=strict) 1352 QueryCompositionInterface.__init__(self, strict=strict) 1353 FragmentCompositionInterface.__init__(self, strict=strict) 1354 self.strict = strict 1355 1356 self.load(url) # Raises ValueError on invalid URL. 1357 self.set( # Raises ValueError on invalid URL component(s). 1358 args=args, path=path, fragment=fragment, scheme=scheme, 1359 netloc=netloc, origin=origin, fragment_path=fragment_path, 1360 fragment_args=fragment_args, fragment_separator=fragment_separator, 1361 host=host, port=port, query=query, query_params=query_params, 1362 username=username, password=password) 1363 1364 def load(self, url): 1365 """ 1366 Parse and load a URL. 1367 1368 Raises: ValueError on invalid URL, like a malformed IPv6 address 1369 or invalid port. 1370 """ 1371 self.username = self.password = None 1372 self._host = self._port = self._scheme = None 1373 1374 if url is None: 1375 url = '' 1376 if not isinstance(url, six.string_types): 1377 url = str(url) 1378 1379 # urlsplit() raises a ValueError on malformed IPv6 addresses in 1380 # Python 2.7+. 1381 tokens = urlsplit(url) 1382 1383 self.netloc = tokens.netloc # Raises ValueError in Python 2.7+. 1384 self.scheme = tokens.scheme 1385 if not self.port: 1386 self._port = DEFAULT_PORTS.get(self.scheme) 1387 self.path.load(tokens.path) 1388 self.query.load(tokens.query) 1389 self.fragment.load(tokens.fragment) 1390 1391 return self 1392 1393 @property 1394 def scheme(self): 1395 return self._scheme 1396 1397 @scheme.setter 1398 def scheme(self, scheme): 1399 if callable_attr(scheme, 'lower'): 1400 scheme = scheme.lower() 1401 self._scheme = scheme 1402 1403 @property 1404 def host(self): 1405 return self._host 1406 1407 @host.setter 1408 def host(self, host): 1409 """ 1410 Raises: ValueError on invalid host or malformed IPv6 address. 1411 """ 1412 # Invalid IPv6 literal. 1413 urllib.parse.urlsplit('http://%s/' % host) # Raises ValueError. 1414 1415 # Invalid host string. 1416 resembles_ipv6_literal = ( 1417 host is not None and lget(host, 0) == '[' and ':' in host and 1418 lget(host, -1) == ']') 1419 if (host is not None and not resembles_ipv6_literal and 1420 not is_valid_host(host)): 1421 errmsg = ( 1422 "Invalid host '%s'. Host strings must have at least one " 1423 "non-period character, can't contain any of '%s', and can't " 1424 "have adjacent periods.") 1425 raise ValueError(errmsg % (host, INVALID_HOST_CHARS)) 1426 1427 if callable_attr(host, 'lower'): 1428 host = host.lower() 1429 if callable_attr(host, 'startswith') and host.startswith('xn--'): 1430 host = idna_decode(host) 1431 self._host = host 1432 1433 @property 1434 def port(self): 1435 return self._port or DEFAULT_PORTS.get(self.scheme) 1436 1437 @port.setter 1438 def port(self, port): 1439 """ 1440 The port value can be 1-65535 or None, meaning no port specified. If 1441 <port> is None and self.scheme is a known scheme in DEFAULT_PORTS, 1442 the default port value from DEFAULT_PORTS will be used. 1443 1444 Raises: ValueError on invalid port. 1445 """ 1446 if port is None: 1447 self._port = DEFAULT_PORTS.get(self.scheme) 1448 elif is_valid_port(port): 1449 self._port = int(str(port)) 1450 else: 1451 raise ValueError("Invalid port '%s'." % port) 1452 1453 @property 1454 def netloc(self): 1455 userpass = quote(utf8(self.username) or '', safe='') 1456 if self.password is not None: 1457 userpass += ':' + quote(utf8(self.password), safe='') 1458 if userpass or self.username is not None: 1459 userpass += '@' 1460 1461 netloc = idna_encode(self.host) 1462 if self.port and self.port != DEFAULT_PORTS.get(self.scheme): 1463 netloc = (netloc or '') + (':' + str(self.port)) 1464 1465 if userpass or netloc: 1466 netloc = (userpass or '') + (netloc or '') 1467 1468 return netloc 1469 1470 @netloc.setter 1471 def netloc(self, netloc): 1472 """ 1473 Params: 1474 netloc: Network location string, like 'google.com' or 1475 'user:pass@google.com:99'. 1476 Raises: ValueError on invalid port or malformed IPv6 address. 1477 """ 1478 # Raises ValueError on malformed IPv6 addresses. 1479 urllib.parse.urlsplit('http://%s/' % netloc) 1480 1481 username = password = host = port = None 1482 1483 if netloc and '@' in netloc: 1484 userpass, netloc = netloc.split('@', 1) 1485 if ':' in userpass: 1486 username, password = userpass.split(':', 1) 1487 else: 1488 username = userpass 1489 1490 if netloc and ':' in netloc: 1491 # IPv6 address literal. 1492 if ']' in netloc: 1493 colonpos, bracketpos = netloc.rfind(':'), netloc.rfind(']') 1494 if colonpos > bracketpos and colonpos != bracketpos + 1: 1495 raise ValueError("Invalid netloc '%s'." % netloc) 1496 elif colonpos > bracketpos and colonpos == bracketpos + 1: 1497 host, port = netloc.rsplit(':', 1) 1498 else: 1499 host = netloc 1500 else: 1501 host, port = netloc.rsplit(':', 1) 1502 host = host 1503 else: 1504 host = netloc 1505 1506 # Avoid side effects by assigning self.port before self.host so 1507 # that if an exception is raised when assigning self.port, 1508 # self.host isn't updated. 1509 self.port = port # Raises ValueError on invalid port. 1510 self.host = host 1511 self.username = None if username is None else unquote(username) 1512 self.password = None if password is None else unquote(password) 1513 1514 @property 1515 def origin(self): 1516 port = '' 1517 scheme = self.scheme or '' 1518 host = idna_encode(self.host) or '' 1519 if self.port and self.port != DEFAULT_PORTS.get(self.scheme): 1520 port = ':%s' % self.port 1521 origin = '%s://%s%s' % (scheme, host, port) 1522 1523 return origin 1524 1525 @origin.setter 1526 def origin(self, origin): 1527 toks = origin.split('://', 1) 1528 if len(toks) == 1: 1529 host_port = origin 1530 else: 1531 self.scheme, host_port = toks 1532 1533 if ':' in host_port: 1534 self.host, self.port = host_port.split(':', 1) 1535 else: 1536 self.host = host_port 1537 1538 @property 1539 def url(self): 1540 return self.tostr() 1541 1542 @url.setter 1543 def url(self, url): 1544 return self.load(url) 1545 1546 def add(self, args=_absent, path=_absent, fragment_path=_absent, 1547 fragment_args=_absent, query_params=_absent): 1548 """ 1549 Add components to a URL and return this furl instance, <self>. 1550 1551 If both <args> and <query_params> are provided, a UserWarning is 1552 raised because <args> is provided as a shortcut for 1553 <query_params>, not to be used simultaneously with 1554 <query_params>. Nonetheless, providing both <args> and 1555 <query_params> behaves as expected, with query keys and values 1556 from both <args> and <query_params> added to the query - <args> 1557 first, then <query_params>. 1558 1559 Parameters: 1560 args: Shortcut for <query_params>. 1561 path: A list of path segments to add to the existing path 1562 segments, or a path string to join with the existing path 1563 string. 1564 query_params: A dictionary of query keys and values or list of 1565 key:value items to add to the query. 1566 fragment_path: A list of path segments to add to the existing 1567 fragment path segments, or a path string to join with the 1568 existing fragment path string. 1569 fragment_args: A dictionary of query keys and values or list 1570 of key:value items to add to the fragment's query. 1571 1572 Returns: <self>. 1573 1574 Raises: UserWarning if redundant and possibly conflicting <args> and 1575 <query_params> were provided. 1576 """ 1577 if args is not _absent and query_params is not _absent: 1578 s = ('Both <args> and <query_params> provided to furl.add(). ' 1579 '<args> is a shortcut for <query_params>, not to be used ' 1580 'with <query_params>. See furl.add() documentation for more ' 1581 'details.') 1582 warnings.warn(s, UserWarning) 1583 1584 if path is not _absent: 1585 self.path.add(path) 1586 if args is not _absent: 1587 self.query.add(args) 1588 if query_params is not _absent: 1589 self.query.add(query_params) 1590 if fragment_path is not _absent or fragment_args is not _absent: 1591 self.fragment.add(path=fragment_path, args=fragment_args) 1592 1593 return self 1594 1595 def set(self, args=_absent, path=_absent, fragment=_absent, scheme=_absent, 1596 netloc=_absent, origin=_absent, fragment_path=_absent, 1597 fragment_args=_absent, fragment_separator=_absent, host=_absent, 1598 port=_absent, query=_absent, query_params=_absent, 1599 username=_absent, password=_absent): 1600 """ 1601 Set components of a url and return this furl instance, <self>. 1602 1603 If any overlapping, and hence possibly conflicting, parameters 1604 are provided, appropriate UserWarning's will be raised. The 1605 groups of parameters that could potentially overlap are 1606 1607 <scheme> and <origin> 1608 <origin>, <netloc>, and/or (<host> or <port>) 1609 <fragment> and (<fragment_path> and/or <fragment_args>) 1610 any two or all of <query>, <args>, and/or <query_params> 1611 1612 In all of the above groups, the latter parameter(s) take 1613 precedence over the earlier parameter(s). So, for example 1614 1615 furl('http://google.com/').set( 1616 netloc='yahoo.com:99', host='bing.com', port=40) 1617 1618 will result in a UserWarning being raised and the url becoming 1619 1620 'http://bing.com:40/' 1621 1622 not 1623 1624 'http://yahoo.com:99/ 1625 1626 Parameters: 1627 args: Shortcut for <query_params>. 1628 path: A list of path segments or a path string to adopt. 1629 fragment: Fragment string to adopt. 1630 scheme: Scheme string to adopt. 1631 netloc: Network location string to adopt. 1632 origin: Scheme and netloc. 1633 query: Query string to adopt. 1634 query_params: A dictionary of query keys and values or list of 1635 key:value items to adopt. 1636 fragment_path: A list of path segments to adopt for the 1637 fragment's path or a path string to adopt as the fragment's 1638 path. 1639 fragment_args: A dictionary of query keys and values or list 1640 of key:value items for the fragment's query to adopt. 1641 fragment_separator: Boolean whether or not there should be a 1642 '?' separator between the fragment path and fragment query. 1643 host: Host string to adopt. 1644 port: Port number to adopt. 1645 username: Username string to adopt. 1646 password: Password string to adopt. 1647 Raises: 1648 ValueError on invalid port. 1649 UserWarning if <scheme> and <origin> are provided. 1650 UserWarning if <origin>, <netloc> and/or (<host> and/or <port>) are 1651 provided. 1652 UserWarning if <query>, <args>, and/or <query_params> are provided. 1653 UserWarning if <fragment> and (<fragment_path>, 1654 <fragment_args>, and/or <fragment_separator>) are provided. 1655 Returns: <self>. 1656 """ 1657 def present(v): 1658 return v is not _absent 1659 1660 if present(scheme) and present(origin): 1661 s = ('Possible parameter overlap: <scheme> and <origin>. See ' 1662 'furl.set() documentation for more details.') 1663 warnings.warn(s, UserWarning) 1664 provided = [ 1665 present(netloc), present(origin), present(host) or present(port)] 1666 if sum(provided) >= 2: 1667 s = ('Possible parameter overlap: <origin>, <netloc> and/or ' 1668 '(<host> and/or <port>) provided. See furl.set() ' 1669 'documentation for more details.') 1670 warnings.warn(s, UserWarning) 1671 if sum(present(p) for p in [args, query, query_params]) >= 2: 1672 s = ('Possible parameter overlap: <query>, <args>, and/or ' 1673 '<query_params> provided. See furl.set() documentation for ' 1674 'more details.') 1675 warnings.warn(s, UserWarning) 1676 provided = [fragment_path, fragment_args, fragment_separator] 1677 if present(fragment) and any(present(p) for p in provided): 1678 s = ('Possible parameter overlap: <fragment> and ' 1679 '(<fragment_path>and/or <fragment_args>) or <fragment> ' 1680 'and <fragment_separator> provided. See furl.set() ' 1681 'documentation for more details.') 1682 warnings.warn(s, UserWarning) 1683 1684 # Guard against side effects on exception. 1685 original_url = self.url 1686 try: 1687 if username is not _absent: 1688 self.username = username 1689 if password is not _absent: 1690 self.password = password 1691 if netloc is not _absent: 1692 # Raises ValueError on invalid port or malformed IP. 1693 self.netloc = netloc 1694 if origin is not _absent: 1695 # Raises ValueError on invalid port or malformed IP. 1696 self.origin = origin 1697 if scheme is not _absent: 1698 self.scheme = scheme 1699 if host is not _absent: 1700 # Raises ValueError on invalid host or malformed IP. 1701 self.host = host 1702 if port is not _absent: 1703 self.port = port # Raises ValueError on invalid port. 1704 1705 if path is not _absent: 1706 self.path.load(path) 1707 if query is not _absent: 1708 self.query.load(query) 1709 if args is not _absent: 1710 self.query.load(args) 1711 if query_params is not _absent: 1712 self.query.load(query_params) 1713 if fragment is not _absent: 1714 self.fragment.load(fragment) 1715 if fragment_path is not _absent: 1716 self.fragment.path.load(fragment_path) 1717 if fragment_args is not _absent: 1718 self.fragment.query.load(fragment_args) 1719 if fragment_separator is not _absent: 1720 self.fragment.separator = fragment_separator 1721 except Exception: 1722 self.load(original_url) 1723 raise 1724 1725 return self 1726 1727 def remove(self, args=_absent, path=_absent, fragment=_absent, 1728 query=_absent, query_params=_absent, port=False, 1729 fragment_path=_absent, fragment_args=_absent, username=False, 1730 password=False): 1731 """ 1732 Remove components of this furl's URL and return this furl 1733 instance, <self>. 1734 1735 Parameters: 1736 args: Shortcut for query_params. 1737 path: A list of path segments to remove from the end of the 1738 existing path segments list, or a path string to remove from 1739 the end of the existing path string, or True to remove the 1740 path portion of the URL entirely. 1741 query: A list of query keys to remove from the query, if they 1742 exist, or True to remove the query portion of the URL 1743 entirely. 1744 query_params: A list of query keys to remove from the query, 1745 if they exist. 1746 port: If True, remove the port from the network location 1747 string, if it exists. 1748 fragment: If True, remove the fragment portion of the URL 1749 entirely. 1750 fragment_path: A list of path segments to remove from the end 1751 of the fragment's path segments or a path string to remove 1752 from the end of the fragment's path string. 1753 fragment_args: A list of query keys to remove from the 1754 fragment's query, if they exist. 1755 username: If True, remove the username, if it exists. 1756 password: If True, remove the password, if it exists. 1757 Returns: <self>. 1758 """ 1759 if username is True: 1760 self.username = None 1761 if password is True: 1762 self.password = None 1763 if port is True: 1764 self.port = None 1765 if path is not _absent: 1766 self.path.remove(path) 1767 1768 if args is not _absent: 1769 self.query.remove(args) 1770 if query is not _absent: 1771 self.query.remove(query) 1772 if query_params is not _absent: 1773 self.query.remove(query_params) 1774 1775 if fragment is not _absent: 1776 self.fragment.remove(fragment) 1777 if fragment_path is not _absent: 1778 self.fragment.path.remove(fragment_path) 1779 if fragment_args is not _absent: 1780 self.fragment.query.remove(fragment_args) 1781 1782 return self 1783 1784 def tostr(self, query_delimiter='&', query_quote_plus=True, 1785 query_dont_quote=''): 1786 encoded_query = self.query.encode( 1787 query_delimiter, query_quote_plus, query_dont_quote) 1788 url = urllib.parse.urlunsplit(( 1789 self.scheme or '', # Must be text type in Python 3. 1790 self.netloc, 1791 str(self.path), 1792 encoded_query, 1793 str(self.fragment), 1794 )) 1795 1796 # Differentiate between '' and None values for scheme and netloc. 1797 if self.scheme == '': 1798 url = ':' + url 1799 1800 if self.netloc == '': 1801 if self.scheme is None: 1802 url = '//' + url 1803 elif strip_scheme(url) == '': 1804 url = url + '//' 1805 1806 return str(url) 1807 1808 def join(self, *urls): 1809 for url in urls: 1810 if not isinstance(url, six.string_types): 1811 url = str(url) 1812 newurl = urljoin(self.url, url) 1813 self.load(newurl) 1814 return self 1815 1816 def copy(self): 1817 return self.__class__(self) 1818 1819 def asdict(self): 1820 return { 1821 'url': self.url, 1822 'scheme': self.scheme, 1823 'username': self.username, 1824 'password': self.password, 1825 'host': self.host, 1826 'host_encoded': idna_encode(self.host), 1827 'port': self.port, 1828 'netloc': self.netloc, 1829 'origin': self.origin, 1830 'path': self.path.asdict(), 1831 'query': self.query.asdict(), 1832 'fragment': self.fragment.asdict(), 1833 } 1834 1835 def __truediv__(self, path): 1836 return self.copy().add(path=path) 1837 1838 def __eq__(self, other): 1839 try: 1840 return self.url == other.url 1841 except AttributeError: 1842 return None 1843 1844 def __ne__(self, other): 1845 return not self == other 1846 1847 def __setattr__(self, attr, value): 1848 if (not PathCompositionInterface.__setattr__(self, attr, value) and 1849 not QueryCompositionInterface.__setattr__(self, attr, value) and 1850 not FragmentCompositionInterface.__setattr__(self, attr, value)): 1851 object.__setattr__(self, attr, value) 1852 1853 def __unicode__(self): 1854 return self.tostr() 1855 1856 def __repr__(self): 1857 return "%s('%s')" % (self.__class__.__name__, str(self)) 1858