1# -*- coding: utf-8 -*- 2# cython: language_level=3, always_allow_keywords=True 3 4## Copyright 1999-2018 by LivingLogic AG, Bayreuth/Germany 5## Copyright 1999-2018 by Walter Dörwald 6## 7## All Rights Reserved 8## 9## See ll/xist/__init__.py for the license 10 11 12""" 13:mod:`ll.url` contains an :rfc:`2396` compliant implementation of URLs and 14classes for accessing resource metadata as well as file like classes for 15reading and writing resource data. 16 17These three levels of functionality are implemented in three classes: 18 19:class:`URL` 20 :class:`URL` objects are the names of resources and can be used and 21 modified, regardless of the fact whether these resources actually exits. 22 :class:`URL` objects never hits the hard drive or the net. 23 24:class:`Connection` 25 :class:`Connection` objects contain functionality that accesses and 26 changes file metadata (like last modified date, permission bits, 27 directory structure etc.). A connection object can be created by calling 28 the :meth:`connect` method on a :class:`URL` object. 29 30:class:`Resource` 31 :class:`Resource` objects are file like objects that work with the actual 32 bytes that make up the file data. This functionality lives in the 33 :class:`Resource` class and its subclasses. Creating a resource is done 34 by calling the :meth:`open` method on a :class:`Connection` or a 35 :class:`URL`. 36""" 37 38 39import os, urllib.request, urllib.error, urllib.parse as urlparse, mimetypes, io, warnings 40import datetime, cgi, re, fnmatch, pickle, errno, threading 41import email 42from email import utils 43 44default_ssh_python = os.environ.get("LL_URL_SSH_PYTHON") 45 46# don't fail when :mod:`pwd` or :mod:`grp` can't be imported, because if this 47# doesn't work, we're probably on Windows and :func:`os.chown` won't work anyway. 48try: 49 import pwd, grp 50except ImportError: 51 pass 52 53try: 54 import execnet 55except ImportError: 56 pass 57 58try: 59 from PIL import Image 60except ImportError: 61 pass 62 63from ll import misc 64 65 66__docformat__ = "reStructuredText" 67 68 69def mime2dt(s): 70 return datetime.datetime(*utils.parsedate(s)[:7]) 71 72 73weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 74monthname = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] 75 76 77def httpdate(dt): 78 """ 79 Return a string suitable for a "Last-Modified" and "Expires" header. 80 81 :obj:`dt` is a :class:`datetime.datetime` object in UTC. 82 """ 83 return f"{weekdayname[dt.weekday()]}, {dt.day:02d} {monthname[dt.month]:3} {dt.year:4} {dt.hour:02}:{dt.minute:02}:{dt.second:02} GMT" 84 85 86def _normalizepath(path_segments): 87 """ 88 Internal helper function for normalizing a path list. 89 90 Should be equivalent to RFC2396, Section 5.2 (6) (c)-(f) with the exception 91 of removing empty path_segments. 92 """ 93 new_path_segments = [] 94 l = len(path_segments) 95 for i in range(l): 96 segment = path_segments[i] 97 if not segment or segment == ".": 98 if i == l-1: 99 new_path_segments.append("") 100 elif segment == ".." and len(new_path_segments) and new_path_segments[-1] != "..": 101 new_path_segments.pop() 102 if i == l-1: 103 new_path_segments.append("") 104 else: 105 new_path_segments.append(segment) 106 return new_path_segments 107 108 109def _escape(s, safe="".join(chr(c) for c in range(128))): 110 return urlparse.quote(s, safe) 111 112 113_unescape = urlparse.unquote 114 115 116alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 117alphanum = alpha + "0123456789" 118mark = "-_.!~*'()" 119additionalsafe = "[]" 120safe = alphanum + mark + additionalsafe 121pathsafe = safe + ":@&=$,;+" + "|" # add "|" for Windows paths 122querysafe = alphanum 123fragsafe = alphanum 124 125schemecharfirst = alpha 126schemechar = alphanum + "+-." 127 128 129def _urlencode(query_parts): 130 if query_parts is not None: 131 res = [] 132 # generate a canonical order for the names 133 items = sorted(query_parts.items()) 134 for (name, values) in items: 135 if not isinstance(values, (list, tuple)): 136 values = (values,) 137 else: 138 # generate a canonical order for the values 139 values.sort() 140 for value in values: 141 res.append(f"{_escape(name, querysafe)}={_escape(value, querysafe)}") 142 return "&".join(res) 143 else: 144 return None 145 146 147def compilepattern(pattern, ignorecase=False): 148 if pattern is None: 149 return None 150 elif isinstance(pattern, str): 151 return (re.compile(fnmatch.translate(pattern), re.I if ignorecase else 0).match,) 152 else: 153 return tuple(re.compile(fnmatch.translate(p), re.I if ignorecase else 0).match for p in pattern) 154 155 156def matchpatterns(name, include, exclude): 157 if include and not any(matcher(name) is not None for matcher in include): 158 return False 159 if exclude and any(matcher(name) is not None for matcher in exclude): 160 return False 161 return True 162 163 164class Context(object): 165 """ 166 Working with URLs (e.g. calling :meth:`URL.open` or :meth:`URL.connect`) 167 involves :class:`Connection` objects. To avoid constantly creating new 168 connections you can pass a :class:`Context` object to those methods. 169 Connections will be stored in the :class:`Context` object and will be 170 reused by those methods. 171 172 A :class:`Context` object can also be used as a context manager. This context 173 object will be used for all :meth:`open` and :meth:`connect` calls inside the 174 :keyword:`with` block. (Note that after the end of the :keyword:`with` block 175 all connections will be closed.) 176 """ 177 def __init__(self): 178 self.schemes = {} 179 180 def closeall(self): 181 """ 182 Close and drop all connections in this context. 183 """ 184 for scheme in self.schemes: 185 schemereg[scheme].closeall(self) 186 self.schemes = {} 187 188 def __enter__(self): 189 self.prev = threadlocalcontext.context 190 threadlocalcontext.context = self 191 192 def __exit__(self, *exc_info): 193 threadlocalcontext.context = self.prev 194 del self.prev 195 self.closeall() 196 197 198class ThreadLocalContext(threading.local): 199 context = Context() 200 201threadlocalcontext = ThreadLocalContext() 202 203def getcontext(context): 204 if context is None: 205 return threadlocalcontext.context 206 return context 207 208 209### 210### Cursor for the :meth:`walk` method 211### 212 213class Cursor(object): 214 """ 215 A :class:`Cursor` object is used by the :meth:`walk` method during directory 216 traversal. It contains information about the state of the traversal and can 217 be used to influence which directories are traversed and in which order. 218 219 Information about the state of the traversal is provided in the following 220 attributes: 221 222 ``rooturl`` 223 The URL where traversal has been started (i.e. the object for which the 224 :meth:`walk` method has been called) 225 226 ``url`` 227 The current URL being traversed. 228 229 ``event`` 230 A string that specifies which event is currently handled. Possible values 231 are: ``"beforedir"``, ``"afterdir"`` and ``"file"``. A ``"beforedir"`` 232 event is emitted before a directory is entered. ``"afterdir"`` 233 is emitted after a directory has been entered. ``"file"`` is emitted when 234 a file is encountered. 235 236 ``isdir`` 237 True if ``url`` refers to a directory. 238 239 ``isfile`` 240 Tur if ``url`` refers to a regular file. 241 242 The following attributes specify which part of the tree should be traversed: 243 244 ``beforedir`` 245 Should the generator yield ``"beforedir"`` events? 246 247 ``afterdir`` 248 Should the generator yield ``"afterdir"`` events? 249 250 ``file`` 251 Should the generator yield ``"file"`` events? 252 253 ``enterdir`` 254 Should the directory be entered? 255 256 Note that if any of these attributes is changed by the code consuming the 257 generator, this new value will be used for the next traversal step once the 258 generator is resumed and will be reset to its initial value (specified in 259 the constructor) afterwards. 260 """ 261 def __init__(self, url, beforedir=True, afterdir=False, file=True, enterdir=False): 262 """ 263 Create a new :class:`Cursor` object for a tree traversal rooted at the node 264 :obj:`node`. 265 266 The arguments :obj:`beforedir`, :obj:`afterdir`, :obj:`file` and 267 :obj:`enterdir` are used as the initial values for the attributes of 268 the same name. (see the class docstring for info about their use). 269 """ 270 self.rooturl = self.url = url 271 self.event = None 272 self.beforedir = self._beforedir = beforedir 273 self.afterdir = self._afterdir = afterdir 274 self.file = self._file = file 275 self.enterdir = self._enterdir = enterdir 276 self.isdir = self.isfile = None 277 278 def restore(self): 279 """ 280 Restore the attributes ``beforedir``, ``afterdir``, ``file`` and 281 ``enterdir`` to their initial value. 282 """ 283 self.beforedir = self._beforedir 284 self.afterdir = self._afterdir 285 self.file = self._file 286 self.enterdir = self._enterdir 287 288 289class Connection(object): 290 """ 291 A :class:`Connection` object is used for accessing and modifying the 292 metadata associated with a file. It is created by calling the 293 :meth:`connect` method on a :class:`URL` object. 294 """ 295 296 @misc.notimplemented 297 def stat(self, url): 298 """ 299 Return the result of a :func:`stat` call on the file :obj:`url`. 300 """ 301 302 @misc.notimplemented 303 def lstat(self, url): 304 """ 305 Return the result of a :func:`stat` call on the file :obj:`url`. Like 306 :meth:`stat`, but does not follow symbolic links. 307 """ 308 309 @misc.notimplemented 310 def chmod(self, url, mode): 311 """ 312 Set the access mode of the file :obj:`url` to :obj:`mode`. 313 """ 314 315 @misc.notimplemented 316 def chown(self, url, owner=None, group=None): 317 """ 318 Change the owner and/or group of the file :obj:`url`. 319 """ 320 321 @misc.notimplemented 322 def lchown(self, url, owner=None, group=None): 323 """ 324 Change the owner and/or group of the file :obj:`url` 325 (ignoring symbolic links). 326 """ 327 328 @misc.notimplemented 329 def uid(self, url): 330 """ 331 Return the user id of the owner of the file :obj:`url`. 332 """ 333 334 @misc.notimplemented 335 def gid(self, url): 336 """ 337 Return the group id the file :obj:`url` belongs to. 338 """ 339 340 @misc.notimplemented 341 def owner(self, url): 342 """ 343 Return the name of the owner of the file :obj:`url`. 344 """ 345 346 @misc.notimplemented 347 def group(self, url): 348 """ 349 Return the name of the group the file :obj:`url` belongs to. 350 """ 351 352 def mimetype(self, url): 353 """ 354 Return the mimetype of the file :obj:`url`. 355 """ 356 name = self._url2filename(url) 357 mimetype = mimetypes.guess_type(name)[0] 358 return mimetype or "application/octet-stream" 359 360 @misc.notimplemented 361 def exists(self, url): 362 """ 363 Test whether the file :obj:`url` exists. 364 """ 365 366 @misc.notimplemented 367 def isfile(self, url): 368 """ 369 Test whether the resource :obj:`url` is a file. 370 """ 371 372 @misc.notimplemented 373 def isdir(self, url): 374 """ 375 Test whether the resource :obj:`url` is a directory. 376 """ 377 378 @misc.notimplemented 379 def islink(self, url): 380 """ 381 Test whether the resource :obj:`url` is a link. 382 """ 383 384 @misc.notimplemented 385 def ismount(self, url): 386 """ 387 Test whether the resource :obj:`url` is a mount point. 388 """ 389 390 @misc.notimplemented 391 def access(self, url, mode): 392 """ 393 Test for access to the file/resource :obj:`url`. 394 """ 395 396 def size(self, url): 397 """ 398 Return the size of the file :obj:`url`. 399 """ 400 return self.stat(url).st_size 401 402 def imagesize(self, url): 403 """ 404 Return the size of the image :obj:`url` (if the resource is an image file) 405 as a ``(width, height)`` tuple. This requires the PIL__. 406 407 __ http://www.pythonware.com/products/pil/ 408 """ 409 stream = self.open(url, mode="rb") 410 img = Image.open(stream) # Requires PIL 411 imagesize = img.size 412 stream.close() 413 return imagesize 414 415 def cdate(self, url): 416 """ 417 Return the "metadate change" date of the file/resource :obj:`url` 418 as a :class:`datetime.datetime` object in UTC. 419 """ 420 return datetime.datetime.utcfromtimestamp(self.stat(url).st_ctime) 421 422 def adate(self, url): 423 """ 424 Return the last access date of the file/resource :obj:`url` as a 425 :class:`datetime.datetime` object in UTC. 426 """ 427 return datetime.datetime.utcfromtimestamp(self.stat(url).st_atime) 428 429 def mdate(self, url): 430 """ 431 Return the last modification date of the file/resource :obj:`url` 432 as a :class:`datetime.datetime` object in UTC. 433 """ 434 return datetime.datetime.utcfromtimestamp(self.stat(url).st_mtime) 435 436 def resheaders(self, url): 437 """ 438 Return the MIME headers for the file/resource :obj:`url`. 439 """ 440 return email.message_from_string(f"Content-Type: {self.mimetype(url)}\nContent-Length: {self.size(url)}\nLast-modified: {httpdate(self.mdate(url))}\n") 441 442 @misc.notimplemented 443 def remove(self, url): 444 """ 445 Remove the file :obj:`url`. 446 """ 447 448 @misc.notimplemented 449 def rmdir(self, url): 450 """ 451 Remove the directory :obj:`url`. 452 """ 453 454 @misc.notimplemented 455 def rename(self, url, target): 456 """ 457 Renames :obj:`url` to :obj:`target`. This might not work if :obj:`target` 458 has a different scheme than :obj:`url` (or is on a different server). 459 """ 460 461 @misc.notimplemented 462 def link(self, url, target): 463 """ 464 Create a hard link from :obj:`url` to :obj:`target`. This will not work 465 if :obj:`target` has a different scheme than :obj:`url` (or is on a 466 different server). 467 """ 468 469 @misc.notimplemented 470 def symlink(self, url, target): 471 """ 472 Create a symbolic link from :obj:`url` to :obj:`target`. This will not 473 work if :obj:`target` has a different scheme than :obj:`url` (or is on a 474 different server). 475 """ 476 477 @misc.notimplemented 478 def chdir(self, url): 479 """ 480 Change the current directory to :obj:`url`. 481 """ 482 os.chdir(self.name) 483 484 @misc.notimplemented 485 def mkdir(self, url, mode=0o777): 486 """ 487 Create the directory :obj:`url`. 488 """ 489 490 @misc.notimplemented 491 def makedirs(self, url, mode=0o777): 492 """ 493 Create the directory :obj:`url` and all intermediate ones. 494 """ 495 496 @misc.notimplemented 497 def walk(self, url, beforedir=True, afterdir=False, file=True, enterdir=True): 498 """ 499 Return an iterator for traversing the directory hierarchy rooted at 500 the directory :obj:`url`. 501 502 Each item produced by the iterator is a :class:`Cursor` object. 503 It contains information about the state of the traversal and can be used 504 to influence which parts of the directory hierarchy are traversed and in 505 which order. 506 507 The arguments :obj:`beforedir`, :obj:`afterdir`, 508 :obj:`file` and :obj:`enterdir` specify how the directory hierarchy should 509 be traversed. For more information see the :class:`Cursor` class. 510 511 Note that the :class:`Cursor` object is reused by :meth:`walk`, so you 512 can't rely on any attributes remaining the same across calls to 513 :func:`next`. 514 515 The following example shows how to traverse the current directory, print 516 all files except those in certain directories:: 517 518 from ll import url 519 520 for cursor in url.here().walk(beforedir=True, afterdir=False, file=True): 521 if cursor.isdir: 522 if cursor.url.path[-2] in (".git", "build", "dist", "__pycache__"): 523 cursor.enterdir = False 524 else: 525 print(cursor.url) 526 """ 527 528 def listdir(self, url, include=None, exclude=None, ignorecase=False): 529 """ 530 Iterates over items in the directory :obj:`url`. The items produced are 531 :class:`URL` objects relative to :obj:`url`. 532 533 With the optional :obj:`include` argument, this only lists items whose 534 names match the given pattern. Items matching the optional pattern 535 :obj:`exclude` will not be listed. :obj:`include` and :obj:`exclude` can 536 be strings (which will be interpreted as :mod:`fnmatch` style filename 537 patterns) or lists of strings. If :obj:`ignorecase` is true 538 case-insensitive name matching will be performed. 539 """ 540 include = compilepattern(include, ignorecase) 541 exclude = compilepattern(exclude, ignorecase) 542 for cursor in self.walk(url, beforedir=True, afterdir=False, file=True, enterdir=False): 543 if matchpatterns(cursor.url.path[-1-cursor.isdir], include, exclude): 544 yield cursor.url 545 546 def files(self, url, include=None, exclude=None, ignorecase=False): 547 """ 548 Iterates over files in the directory :obj:`url`. The items produced 549 are :class:`URL` objects relative to :obj:`url`. 550 551 With the optional :obj:`include` argument, this only lists files whose 552 names match the given pattern. Files matching the optional pattern 553 :obj:`exclude` will not be listed. :obj:`include` and :obj:`exclude` can 554 be strings (which will be interpreted as :mod:`fnmatch` style filename 555 patterns) or lists of strings. If :obj:`ignorecase` is true 556 case-insensitive name matching will be performed. 557 """ 558 include = compilepattern(include, ignorecase) 559 exclude = compilepattern(exclude, ignorecase) 560 for cursor in self.walk(url, beforedir=False, afterdir=False, file=True, enterdir=False): 561 if cursor.isfile and matchpatterns(cursor.url.path[-1], include, exclude): 562 yield cursor.url 563 564 def dirs(self, url, include=None, exclude=None, ignorecase=False): 565 """ 566 Iterates over directories in the directory :obj:`url`. The items produced 567 are :class:`URL` objects relative to :obj:`url`. 568 569 With the optional :obj:`include` argument, this only directories items 570 whose names match the given pattern. Directories matching the optional 571 pattern :obj:`exclude` will not be listed. :obj:`include` and 572 :obj:`exclude` can be strings (which will be interpreted as :mod:`fnmatch` 573 style filename patterns) or lists of strings. If :obj:`ignorecase` is 574 true case-insensitive name matching will be performed. 575 """ 576 include = compilepattern(include, ignorecase) 577 exclude = compilepattern(exclude, ignorecase) 578 for cursor in self.walk(url, beforedir=True, afterdir=False, file=False, enterdir=False): 579 if cursor.isdir and matchpatterns(cursor.url.path[-2], include, exclude): 580 yield cursor.url 581 582 def walkall(self, url, include=None, exclude=None, enterdir=None, skipdir=None, ignorecase=False): 583 """ 584 Recursively iterate over files and subdirectories. The iterator 585 yields :class:`URL` objects naming each child URL of the directory 586 :obj:`url` and its descendants relative to :obj:`url`. This performs 587 a depth-first traversal, returning each directory before all its children. 588 589 With the optional :obj:`include` argument, only yield items whose 590 names match the given pattern. Items matching the optional pattern 591 :obj:`exclude` will not be listed. Directories that don't match the 592 optional pattern :obj:`enterdir` or match the pattern :obj:`skipdir` 593 will not be traversed. :obj:`include`, :obj:`exclude`, :obj:`enterdir` 594 and :obj:`skipdir` can be strings (which will be interpreted as 595 :mod:`fnmatch` style filename patterns) or lists of strings. 596 If :obj:`ignorecase` is true case-insensitive name matching will be 597 performed. 598 """ 599 include = compilepattern(include, ignorecase) 600 exclude = compilepattern(exclude, ignorecase) 601 enterdir = compilepattern(enterdir, ignorecase) 602 skipdir = compilepattern(skipdir, ignorecase) 603 for cursor in self.walk(url, beforedir=True, afterdir=False, file=True, enterdir=True): 604 name = cursor.url.path[-1-cursor.isdir] 605 if matchpatterns(name, include, exclude): 606 yield cursor.url 607 if cursor.isdir: 608 cursor.enterdir = matchpatterns(name, enterdir, skipdir) 609 610 def walkfiles(self, url, include=None, exclude=None, enterdir=None, skipdir=None, ignorecase=False): 611 """ 612 Return a recursive iterator over files in the directory :obj:`url`. 613 614 With the optional :obj:`include` argument, only yield files whose names 615 match the given pattern. Files matching the optional pattern 616 :obj:`exclude` will not be listed. Directories that don't match the 617 optional pattern :obj:`enterdir` or match the pattern :obj:`skipdir` 618 will not be traversed. :obj:`include`, :obj:`exclude`, :obj:`enterdir` 619 and :obj:`skipdir` can be strings (which will be interpreted as 620 :mod:`fnmatch` style filename patterns) or lists of strings. 621 If :obj:`ignorecase` is true case-insensitive name matching will be 622 performed. 623 """ 624 include = compilepattern(include, ignorecase) 625 exclude = compilepattern(exclude, ignorecase) 626 enterdir = compilepattern(enterdir, ignorecase) 627 skipdir = compilepattern(skipdir, ignorecase) 628 for cursor in self.walk(url, beforedir=True, afterdir=False, file=True, enterdir=True): 629 if cursor.isfile: 630 if matchpatterns(cursor.url.path[-1], include, exclude): 631 yield cursor.url 632 else: 633 cursor.enterdir = matchpatterns(cursor.url.path[-2], enterdir, skipdir) 634 635 def walkdirs(self, url, include=None, exclude=None, enterdir=None, skipdir=None, ignorecase=False): 636 """ 637 Return a recursive iterator over subdirectories in the directory 638 :obj:`url`. 639 640 With the optional :obj:`include` argument, only yield directories whose 641 names match the given pattern. Items matching the optional pattern 642 :obj:`exclude` will not be listed. Directories that don't match the 643 optional pattern :obj:`enterdir` or match the pattern :obj:`skipdir` 644 will not be traversed. :obj:`include`, :obj:`exclude`, :obj:`enterdir` 645 and :obj:`skipdir` can be strings (which will be interpreted as 646 :mod:`fnmatch` style filename patterns) or lists of strings. 647 If :obj:`ignorecase` is true case-insensitive name matching will be 648 performed. 649 """ 650 include = compilepattern(include, ignorecase) 651 exclude = compilepattern(exclude, ignorecase) 652 enterdir = compilepattern(enterdir, ignorecase) 653 skipdir = compilepattern(skipdir, ignorecase) 654 for cursor in self.walk(url, beforedir=True, afterdir=False, file=False, enterdir=True): 655 name = cursor.url.path[-2] 656 if matchpatterns(name, include, exclude): 657 yield cursor.url 658 cursor.enterdir = matchpatterns(name, enterdir, skipdir) 659 660 @misc.notimplemented 661 def open(self, url, *args, **kwargs): 662 """ 663 Open :obj:`url` for reading or writing. :meth:`open` returns a 664 :class:`Resource` object. 665 666 Which additional parameters are supported depends on the actual 667 resource created. Some common parameters are: 668 669 :obj:`mode` : string 670 A string indicating how the file is to be opened (just like the 671 mode argument for the builtin :func:`open` (e.g. ``"rb"`` or 672 ``"wb"``). 673 674 :obj:`headers` : mapping 675 Additional headers to use for an HTTP request. 676 677 :obj:`data` : byte string 678 Request body to use for an HTTP POST request. 679 680 :obj:`python` : string or :const:`None` 681 Name of the Python interpreter to use on the remote side (used by 682 ``ssh`` URLs) 683 684 :obj:`nice` : int or :const:`None` 685 Nice level for the remote python (used by ``ssh`` URLs) 686 """ 687 688 689class LocalConnection(Connection): 690 def _url2filename(self, url): 691 return os.path.expanduser(url.local()) 692 693 def stat(self, url): 694 return os.stat(self._url2filename(url)) 695 696 def lstat(self, url): 697 return os.lstat(self._url2filename(url)) 698 699 def chmod(self, url, mode): 700 name = self._url2filename(url) 701 os.chmod(name, mode) 702 703 def _chown(self, func, url, owner, group): 704 name = self._url2filename(url) 705 if owner is not None or group is not None: 706 if owner is None or group is None: 707 stat = os.stat(name) 708 if owner is None: 709 owner = stat.st_uid 710 elif isinstance(owner, str): 711 owner = pwd.getpwnam(owner)[2] 712 if group is None: 713 group = stat.st_gid 714 elif isinstance(group, str): 715 group = grp.getgrnam(group)[2] 716 func(name, owner, group) 717 718 def chown(self, url, owner=None, group=None): 719 self._chown(os.chown, url, owner, group) 720 721 def lchown(self, url, owner=None, group=None): 722 self._chown(os.lchown, url, owner, group) 723 724 def chdir(self, url): 725 os.chdir(self._url2filename(url)) 726 727 def mkdir(self, url, mode=0o777): 728 os.mkdir(self._url2filename(url), mode) 729 730 def makedirs(self, url, mode=0o777): 731 os.makedirs(self._url2filename(url), mode) 732 733 def uid(self, url): 734 return self.stat(url).st_uid 735 736 def gid(self, url): 737 return self.stat(url).st_gid 738 739 def owner(self, url): 740 return pwd.getpwuid(self.uid(url))[0] 741 742 def group(self, url): 743 return grp.getgrgid(self.gid(url))[0] 744 745 def exists(self, url): 746 return os.path.exists(self._url2filename(url)) 747 748 def isfile(self, url): 749 return os.path.isfile(self._url2filename(url)) 750 751 def isdir(self, url): 752 return os.path.isdir(self._url2filename(url)) 753 754 def islink(self, url): 755 return os.path.islink(self._url2filename(url)) 756 757 def ismount(self, url): 758 return os.path.ismount(self._url2filename(url)) 759 760 def access(self, url, mode): 761 return os.access(self._url2filename(url), mode) 762 763 def remove(self, url): 764 return os.remove(self._url2filename(url)) 765 766 def rmdir(self, url): 767 return os.rmdir(self._url2filename(url)) 768 769 def rename(self, url, target): 770 name = self._url2filename(url) 771 if not isinstance(target, URL): 772 target = URL(target) 773 target = self._url2filename(target) 774 os.rename(name, target) 775 776 def link(self, url, target): 777 name = self._url2filename(url) 778 if not isinstance(target, URL): 779 target = URL(target) 780 target = self._url2filename(target) 781 os.link(name, target) 782 783 def symlink(self, url, target): 784 name = self._url2filename(url) 785 if not isinstance(target, URL): 786 target = URL(target) 787 target = self._url2filename(target) 788 os.symlink(name, target) 789 790 def _walk(self, cursor, base, name): 791 def _event(url, event): 792 cursor.url = url 793 cursor.event = event 794 cursor.isdir = event != "file" 795 cursor.isfile = not cursor.isdir 796 return cursor 797 798 if name: 799 fullname = os.path.join(base, name) 800 else: 801 fullname = base 802 for childname in sorted(os.listdir(fullname)): 803 fullchildname = os.path.join(fullname, childname) 804 isdir = os.path.isdir(fullchildname) 805 relchildname = os.path.join(name, childname) if name else childname 806 emitbeforedir = cursor.beforedir 807 emitafterdir = cursor.afterdir 808 emitfile = cursor.file 809 enterdir = cursor.enterdir 810 if isdir: 811 if emitbeforedir or emitafterdir: 812 dirurl = Dir(relchildname, scheme=None) 813 if emitbeforedir: 814 yield _event(dirurl, "beforedir") 815 # The user may have altered ``cursor`` attributes outside the generator, so we refetch them 816 emitbeforedir = cursor.beforedir 817 emitafterdir = cursor.afterdir 818 emitfile = cursor.file 819 enterdir = cursor.enterdir 820 cursor.restore() 821 if enterdir: 822 yield from self._walk(cursor, base, relchildname) 823 if emitafterdir: 824 yield _event(dirurl, "afterdir") 825 cursor.restore() 826 else: 827 if emitfile: 828 yield _event(File(relchildname, scheme=None), "file") 829 cursor.restore() 830 831 def walk(self, url, beforedir=True, afterdir=False, file=True, enterdir=True): 832 cursor = Cursor(url, beforedir=beforedir, afterdir=afterdir, file=file, enterdir=enterdir) 833 return self._walk(cursor, url.local(), "") 834 835 def open(self, url, *args, **kwargs): 836 return FileResource(url, *args, **kwargs) 837 838 839class SshConnection(Connection): 840 remote_code = """ 841 import sys, os, pickle, re, fnmatch 842 try: 843 from urllib import request 844 except ImportError: 845 import urllib as request 846 try: 847 next 848 except NameError: 849 def next(iter): 850 return iter.next() 851 try: 852 unicode 853 except NameError: 854 unicode = str 855 856 files = {} 857 858 def ownergroup(filename, owner=None, group=None): 859 if owner is not None or group is not None: 860 if owner is None or group is None: 861 if isinstance(filename, unicode): 862 stat = os.stat(filename) 863 else: 864 stat = os.fstat(files[filename].fileno()) 865 if owner is None: 866 owner = stat.st_uid 867 elif isinstance(owner, unicode): 868 import pwd 869 owner = pwd.getpwnam(owner)[2] 870 871 if group is None: 872 group = stat.st_gid 873 elif isinstance(group, unicode): 874 import grp 875 group = grp.getgrnam(group)[2] 876 return (owner, group) 877 878 def compilepattern(pattern, ignorecase=False): 879 if pattern is None: 880 return None 881 elif isinstance(pattern, unicode): 882 return (re.compile(fnmatch.translate(pattern), re.I if ignorecase else 0).match,) 883 else: 884 return tuple(re.compile(fnmatch.translate(p), re.I if ignorecase else 0).match for p in pattern) 885 886 def matchpatterns(name, include, exclude): 887 if include and not any(matcher(name) is not None for matcher in include): 888 return False 889 if exclude and any(matcher(name) is not None for matcher in exclude): 890 return False 891 return True 892 893 def listdir(dirname): 894 result = [] 895 for childname in sorted(os.listdir(dirname)): 896 fullchildname = os.path.join(dirname, childname) 897 isdir = os.path.isdir(fullchildname) 898 result.append((isdir, childname)) 899 return result 900 901 while True: 902 (filename, cmdname, args, kwargs) = channel.receive() 903 if isinstance(filename, unicode): 904 filename = os.path.expanduser(request.url2pathname(filename)) 905 data = None 906 try: 907 if cmdname == "open": 908 try: 909 stream = open(filename, *args, **kwargs) 910 except IOError: 911 exc = sys.exc_info()[1] 912 if args: 913 mode = args[0] 914 else: 915 mode = kwargs.get("mode", "rb") 916 if "w" not in mode or exc.errno != 2: # didn't work for some other reason than a non existing directory 917 raise 918 (splitpath, splitname) = os.path.split(filename) 919 if splitpath: 920 os.makedirs(splitpath) 921 stream = open(filename, *args, **kwargs) 922 else: 923 raise # we don't have a directory to make so pass the error on 924 data = id(stream) 925 files[data] = stream 926 elif cmdname == "stat": 927 if isinstance(filename, unicode): 928 data = tuple(os.stat(filename)) 929 else: 930 data = tuple(os.fstat(files[filename].fileno())) 931 elif cmdname == "lstat": 932 data = os.lstat(filename) 933 elif cmdname == "close": 934 try: 935 stream = files[filename] 936 except KeyError: 937 pass 938 else: 939 stream.close() 940 del files[filename] 941 elif cmdname == "chmod": 942 data = os.chmod(filename, *args, **kwargs) 943 elif cmdname == "chown": 944 (owner, group) = ownergroup(filename, *args, **kwargs) 945 if owner is not None: 946 data = os.chown(filename, owner, group) 947 elif cmdname == "lchown": 948 (owner, group) = ownergroup(filename, *args, **kwargs) 949 if owner is not None: 950 data = os.lchown(filename, owner, group) 951 elif cmdname == "uid": 952 stat = os.stat(filename) 953 data = stat.st_uid 954 elif cmdname == "gid": 955 stat = os.stat(filename) 956 data = stat.st_gid 957 elif cmdname == "owner": 958 import pwd 959 stat = os.stat(filename) 960 data = unicode(pwd.getpwuid(stat.st_uid)[0]) 961 elif cmdname == "group": 962 import grp 963 stat = os.stat(filename) 964 data = unicode(grp.getgrgid(stat.st_gid)[0]) 965 elif cmdname == "exists": 966 data = os.path.exists(filename) 967 elif cmdname == "isfile": 968 data = os.path.isfile(filename) 969 elif cmdname == "isdir": 970 data = os.path.isdir(filename) 971 elif cmdname == "islink": 972 data = os.path.islink(filename) 973 elif cmdname == "ismount": 974 data = os.path.ismount(filename) 975 elif cmdname == "access": 976 data = os.access(filename, *args, **kwargs) 977 elif cmdname == "remove": 978 data = os.remove(filename) 979 elif cmdname == "rmdir": 980 data = os.rmdir(filename) 981 elif cmdname == "rename": 982 data = os.rename(filename, os.path.expanduser(args[0])) 983 elif cmdname == "link": 984 data = os.link(filename, os.path.expanduser(args[0])) 985 elif cmdname == "symlink": 986 data = os.symlink(filename, os.path.expanduser(args[0])) 987 elif cmdname == "chdir": 988 data = os.chdir(filename) 989 elif cmdname == "mkdir": 990 data = os.mkdir(filename) 991 elif cmdname == "makedirs": 992 data = os.makedirs(filename) 993 elif cmdname == "makefifo": 994 data = os.makefifo(filename) 995 elif cmdname == "listdir": 996 data = listdir(filename) 997 elif cmdname == "next": 998 data = next(files[filename]) 999 else: 1000 data = getattr(files[filename], cmdname) 1001 data = data(*args, **kwargs) 1002 except StopIteration: 1003 exc = sys.exc_info()[1] 1004 channel.send((True, pickle.dumps(exc))) 1005 except Exception: 1006 exc = sys.exc_info()[1] 1007 channel.send((True, pickle.dumps(exc))) 1008 else: 1009 channel.send((False, data)) 1010 """ 1011 1012 def __init__(self, context, server, python=None, nice=None): 1013 # We don't have to store the context (this avoids cycles) 1014 self.server = server 1015 self.python = python 1016 self.nice = nice 1017 self._channel = None 1018 1019 def close(self): 1020 if self._channel is not None and not self._channel.isclosed(): 1021 self._channel.close() 1022 self._channel.gateway.exit() 1023 self._channel.gateway.join() 1024 1025 def _url2filename(self, url): 1026 if url.scheme != "ssh": 1027 raise ValueError(f"URL {url!r} is not an ssh URL") 1028 filename = str(url.path) 1029 if filename.startswith("/~"): 1030 filename = filename[1:] 1031 return filename 1032 1033 def _send(self, filename, cmd, *args, **kwargs): 1034 if self._channel is None: 1035 server = f"ssh={self.server}" 1036 python = self.python 1037 if python is None: 1038 python = default_ssh_python 1039 if python is not None: 1040 server += f"//python={python}" 1041 if self.nice is not None: 1042 server += f"//nice={self.nice}" 1043 gateway = execnet.makegateway(server) # This requires ``execnet`` (http://codespeak.net/execnet/) 1044 gateway.reconfigure(py2str_as_py3str=False, py3str_as_py2str=False) 1045 self._channel = gateway.remote_exec(self.remote_code) 1046 self._channel.send((filename, cmd, args, kwargs)) 1047 (isexc, data) = self._channel.receive() 1048 if isexc: 1049 raise pickle.loads(data, fix_imports=True) 1050 else: 1051 return data 1052 1053 def stat(self, url): 1054 filename = self._url2filename(url) 1055 data = self._send(filename, "stat") 1056 return os.stat_result(data) # channel returned a tuple => wrap it 1057 1058 def lstat(self, url): 1059 filename = self._url2filename(url) 1060 data = self._send(filename, "lstat") 1061 return os.stat_result(data) # channel returned a tuple => wrap it 1062 1063 def chmod(self, url, mode): 1064 return self._send(self._url2filename(url), "chmod", mode) 1065 1066 def chown(self, url, owner=None, group=None): 1067 return self._send(self._url2filename(url), "chown", owner, group) 1068 1069 def lchown(self, url, owner=None, group=None): 1070 return self._send(self._url2filename(url), "lchown", owner, group) 1071 1072 def chdir(self, url): 1073 return self._send(self._url2filename(url), "chdir") 1074 1075 def mkdir(self, url, mode=0o777): 1076 return self._send(self._url2filename(url), "mkdir", mode) 1077 1078 def makedirs(self, url, mode=0o777): 1079 return self._send(self._url2filename(url), "makedirs", mode) 1080 1081 def uid(self, url): 1082 return self._send(self._url2filename(url), "uid") 1083 1084 def gid(self, url): 1085 return self._send(self._url2filename(url), "gid") 1086 1087 def owner(self, url): 1088 return self._send(self._url2filename(url), "owner") 1089 1090 def group(self, url): 1091 return self._send(self._url2filename(url), "group") 1092 1093 def exists(self, url): 1094 return self._send(self._url2filename(url), "exists") 1095 1096 def isfile(self, url): 1097 return self._send(self._url2filename(url), "isfile") 1098 1099 def isdir(self, url): 1100 return self._send(self._url2filename(url), "isdir") 1101 1102 def islink(self, url): 1103 return self._send(self._url2filename(url), "islink") 1104 1105 def ismount(self, url): 1106 return self._send(self._url2filename(url), "ismount") 1107 1108 def access(self, url, mode): 1109 return self._send(self._url2filename(url), "access", mode) 1110 1111 def remove(self, url): 1112 return self._send(self._url2filename(url), "remove") 1113 1114 def rmdir(self, url): 1115 return self._send(self._url2filename(url), "rmdir") 1116 1117 def _cmdwithtarget(self, cmdname, url, target): 1118 filename = self._url2filename(url) 1119 if not isinstance(target, URL): 1120 target = URL(target) 1121 targetname = self._url2filename(target) 1122 if target.server != url.server: 1123 raise OSError(errno.EXDEV, os.strerror(errno.EXDEV)) 1124 return self._send(filename, cmdname, targetname) 1125 1126 def rename(self, url, target): 1127 return self._cmdwithtarget("rename", url, target) 1128 1129 def link(self, url, target): 1130 return self._cmdwithtarget("link", url, target) 1131 1132 def symlink(self, url, target): 1133 return self._cmdwithtarget("symlink", url, target) 1134 1135 def _walk(self, cursor, base, name): 1136 def _event(url, event): 1137 cursor.url = url 1138 cursor.event = event 1139 cursor.isdir = event != "file" 1140 cursor.isfile = not cursor.isdir 1141 return cursor 1142 1143 if name: 1144 fullname = os.path.join(base, name) 1145 else: 1146 fullname = base 1147 for (isdir, childname) in self._send(fullname, "listdir"): 1148 fullchildname = os.path.join(fullname, childname) 1149 relchildname = os.path.join(name, childname) if name else childname 1150 emitbeforedir = cursor.beforedir 1151 emitafterdir = cursor.afterdir 1152 emitfile = cursor.file 1153 enterdir = cursor.enterdir 1154 if isdir: 1155 if emitbeforedir or emitafterdir: 1156 dirurl = Dir(relchildname, scheme=None) 1157 if emitbeforedir: 1158 yield _event(dirurl, "beforedir") 1159 # The user may have altered ``cursor`` attributes outside the generator, so we refetch them 1160 emitbeforedir = cursor.beforedir 1161 emitafterdir = cursor.afterdir 1162 emitfile = cursor.file 1163 enterdir = cursor.enterdir 1164 cursor.restore() 1165 if enterdir: 1166 yield from self._walk(cursor, base, relchildname) 1167 if emitafterdir: 1168 yield _event(dirurl, "afterdir") 1169 cursor.restore() 1170 else: 1171 if emitfile: 1172 yield _event(File(relchildname, scheme=None), "file") 1173 cursor.restore() 1174 1175 def walk(self, url, beforedir=True, afterdir=False, file=True, enterdir=True): 1176 cursor = Cursor(url, beforedir=beforedir, afterdir=afterdir, file=file, enterdir=enterdir) 1177 return self._walk(cursor, self._url2filename(url), "") 1178 1179 def open(self, url, *args, **kwargs): 1180 return RemoteFileResource(self, url, *args, **kwargs) 1181 1182 def __repr__(self): 1183 return f"<{self.__class__.__module__}.{self.__class__.__name__} to {self.server!r} at {id(self):#x}>" 1184 1185 1186class URLConnection(Connection): 1187 def mimetype(self, url): 1188 return url.open().mimetype() 1189 1190 def size(self, url): 1191 return url.open().size() 1192 1193 def imagesize(self, url): 1194 return url.open(mode="rb").imagesize() 1195 1196 def mdate(self, url): 1197 return url.open(mode="rb").mdate() 1198 1199 def resheaders(self, url): 1200 return url.open(mode="rb").resheaders() 1201 1202 def isdir(self, url): 1203 # URLs never are directories (even if they might be (for URLs ending in ``/``), there's no way to call :meth:`listdir`) 1204 return False 1205 1206 def open(self, url, mode="rb", headers=None, data=None): 1207 if mode != "rb": 1208 raise NotImplementedError(f"mode {mode!r} not supported") 1209 return URLResource(url, headers=headers, data=data) 1210 1211 1212def here(scheme="file"): 1213 """ 1214 Return the current directory as an :class:`URL` object. 1215 """ 1216 return Dir(os.getcwd(), scheme) 1217 1218 1219def home(user="", scheme="file"): 1220 """ 1221 Return the home directory of the current user (or the user named :obj:`user`, 1222 if :obj:`user` is specified) as an :class:`URL` object:: 1223 1224 >>> url.home() 1225 URL('file:/home/walter/') 1226 >>> url.home("andreas") 1227 URL('file:/home/andreas/') 1228 """ 1229 return Dir(f"~{user}", scheme) 1230 1231 1232def root(): 1233 """ 1234 Return a blank ``root`` :class:`URL`, i.e. ``URL("root:")``. 1235 """ 1236 return URL("root:") 1237 1238 1239def File(name, scheme="file"): 1240 """ 1241 Turn a filename into an :class:`URL` object:: 1242 1243 >>> url.File("a#b") 1244 URL('file:a%23b') 1245 """ 1246 name = urllib.request.pathname2url(os.path.expanduser(name)) 1247 if name.startswith("///"): 1248 name = name[2:] 1249 url = URL(name) 1250 url.scheme = scheme 1251 return url 1252 1253 1254def Dir(name, scheme="file"): 1255 """ 1256 Turns a directory name into an :class:`URL` object, just like :func:`File`, 1257 but ensures that the path is terminated with a ``/``:: 1258 1259 >>> url.Dir("a#b") 1260 URL('file:a%23b/') 1261 """ 1262 name = urllib.request.pathname2url(os.path.expanduser(name)) 1263 if name: 1264 if not name.endswith("/"): 1265 name += "/" 1266 else: 1267 name = "./" 1268 if name.startswith("///"): 1269 name = name[2:] 1270 url = URL(name) 1271 url.scheme = scheme 1272 return url 1273 1274 1275def Ssh(user, host, path="~/"): 1276 """ 1277 Return a ssh :class:`URL` for the user :obj:`user` on the host :obj:`host` 1278 with the path :obj:`path`.:obj:`path` (defaulting to the users home 1279 directory) must be a path in URL notation (i.e. use ``/`` as directory 1280 separator):: 1281 1282 >>> url.Ssh("root", "www.example.com", "~joe/public_html/index.html") 1283 URL('ssh://root@www.example.com/~joe/public_html/index.html') 1284 1285 If the path starts with ``~/`` it is relative to this users home directory, 1286 if it starts with ``~user`` it's relative to the home directory of the user 1287 ``user``. In all othercases the path is considered to be absolute. 1288 """ 1289 url = URL() 1290 url.scheme = "ssh" 1291 url.userinfo = user 1292 url.host = host 1293 if path.startswith("~"): 1294 path = "/" + path 1295 url.path = path 1296 return url 1297 1298 1299def first(urls): 1300 """ 1301 Return the first URL from :obj:`urls` that exists as a real file or 1302 directory. :const:`None` entries in :obj:`urls` will be skipped. 1303 """ 1304 for url in urls: 1305 if url is not None: 1306 if url.exists(): 1307 return url 1308 1309 1310def firstdir(urls): 1311 """ 1312 Return the first URL from :obj:`urls` that exists as a real directory. 1313 :const:`None` entries in :obj:`urls` will be skipped. 1314 """ 1315 for url in urls: 1316 if url is not None: 1317 if url.isdir(): 1318 return url 1319 1320 1321def firstfile(urls): 1322 """ 1323 Return the first URL from :obj:`urls` that exists as a real file. 1324 :const:`None` entries in :obj:`urls` will be skipped. 1325 """ 1326 for url in urls: 1327 if url is not None: 1328 if url.isfile(): 1329 return url 1330 1331 1332class Resource(object): 1333 """ 1334 A :class:`Resource` is a base class that provides a file-like interface 1335 to local and remote files, URLs and other resources. 1336 1337 Each resource object has the following attributes: 1338 1339 :attr:`url` 1340 The URL for which this resource has been opened (i.e. 1341 ``foo.open().url is foo`` if ``foo`` is a :class:`URL` object); 1342 1343 :attr:`name` 1344 A string version of :attr:`url`; 1345 1346 :attr:`closed` 1347 A :class:`bool` specifying whether the resource has been closed 1348 (i.e. whether the :meth:`close` method has been called). 1349 1350 In addition to file methods (like :meth:`read`, :meth:`readlines`, 1351 :meth:`write` and :meth:`close`) a resource object might provide the 1352 following methods: 1353 1354 :meth:`finalurl` 1355 Return the real URL of the resource (this might be different from the 1356 :attr:`url` attribute in case of a redirect). 1357 1358 :meth:`size` 1359 Return the size of the file/resource. 1360 1361 :meth:`mdate` 1362 Return the last modification date of the file/resource as a 1363 :class:`datetime.datetime` object in UTC. 1364 1365 :meth:`mimetype` 1366 Return the mimetype of the file/resource. 1367 1368 :meth:`imagesize` 1369 Return the size of the image (if the resource is an image file) as a 1370 ``(width, height)`` tuple. This requires the PIL__. 1371 1372 __ http://www.pythonware.com/products/pil/ 1373 """ 1374 1375 def finalurl(self): 1376 return self.url 1377 1378 def imagesize(self): 1379 pos = self.tell() 1380 self.seek(0) 1381 img = Image.open(self) # Requires PIL 1382 imagesize = img.size 1383 self.seek(pos) 1384 return imagesize 1385 1386 def __enter__(self): 1387 return self 1388 1389 def __exit__(self, *exc_info): 1390 self.close() 1391 1392 def __repr__(self): 1393 return f"<{'closed' if self.closed else 'open'} {self.__class__.__module__}.{self.__class__.__name__} {self.name}, mode {self.mode!r} at {id(self):#x}>" 1394 1395 1396class FileResource(Resource): 1397 """ 1398 A subclass of :class:`Resource` that handles local files. 1399 """ 1400 def __init__(self, url, mode="rb", *args, **kwargs): 1401 url = URL(url) 1402 self.name = os.path.expanduser(url.local()) 1403 self.mode = mode 1404 try: 1405 file = open(self.name, mode, *args, **kwargs) 1406 except IOError as exc: 1407 if "w" not in mode or exc.errno != 2: # didn't work for some other reason than a non existing directory 1408 raise 1409 (splitpath, splitname) = os.path.split(self.name) 1410 if splitpath: 1411 os.makedirs(splitpath) 1412 file = open(self.name, mode, *args, **kwargs) 1413 else: 1414 raise # we don't have a directory to make so pass the error on 1415 self.file = file 1416 self.url = url 1417 1418 def __getattr__(self, name): 1419 return getattr(self.file, name) 1420 1421 def __iter__(self): 1422 return iter(self.file) 1423 1424 def close(self): 1425 if self.file is not None: 1426 self.file.close() 1427 self.file = None 1428 1429 @property 1430 def closed(self): 1431 return self.file is None 1432 1433 def size(self): 1434 # Forward to the connection 1435 return LocalSchemeDefinition._connection.size(self.url) 1436 1437 def mdate(self): 1438 # Forward to the connection 1439 return LocalSchemeDefinition._connection.mdate(self.url) 1440 1441 def mimetype(self): 1442 # Forward to the connection 1443 return LocalSchemeDefinition._connection.mimetype(self.url) 1444 1445 1446class RemoteFileResource(Resource): 1447 """ 1448 A subclass of :class:`Resource` that handles remote files (those using 1449 the ``ssh`` scheme). 1450 """ 1451 def __init__(self, connection, url, mode="rb", *args, **kwargs): 1452 self.connection = connection 1453 self.url = URL(url) 1454 self.mode = mode 1455 self.args = args 1456 self.kwargs = kwargs 1457 filename = self.connection._url2filename(url) 1458 self.name = str(self.url) 1459 self.remoteid = self._send(filename, "open", mode, *args, **kwargs) 1460 1461 def __repr__(self): 1462 return f"<{'closed' if self.connection is None else 'open'} {self.__class__.__module__}.{self.__class__.__name__} {self.name}, mode {self.mode!r} at {id(self):#x}>" 1463 1464 def _send(self, filename, cmd, *args, **kwargs): 1465 if self.connection is None: 1466 raise ValueError("I/O operation on closed file") 1467 return self.connection._send(filename, cmd, *args, **kwargs) 1468 1469 def close(self): 1470 if self.connection is not None: 1471 self._send(self.remoteid, "close") 1472 self.connection = None # close the channel too as there are no longer any meaningful operations 1473 1474 @property 1475 def closed(self): 1476 return self.connection is None 1477 1478 def read(self, size=None): 1479 return self._send(self.remoteid, "read", size) if size is not None else self._send(self.remoteid, "read") 1480 1481 def readline(self, size=-1): 1482 return self._send(self.remoteid, "readline", size) if size is not None else self._send(self.remoteid, "readline") 1483 1484 def readlines(self, size=-1): 1485 return self._send(self.remoteid, "readlines", size) if size is not None else self._send(self.remoteid, "readlines") 1486 1487 def __iter__(self): 1488 return self 1489 1490 def __next__(self): 1491 return self._send(self.remoteid, "next") 1492 1493 def seek(self, offset, whence=0): 1494 return self._send(self.remoteid, "seek", offset, whence) 1495 1496 def tell(self): 1497 return self._send(self.remoteid, "tell") 1498 1499 def truncate(self, size=None): 1500 if size is None: 1501 return self._send(self.remoteid, "truncate") 1502 else: 1503 return self._send(self.remoteid, "truncate", size) 1504 1505 def write(self, string): 1506 return self._send(self.remoteid, "write", string) 1507 1508 def writelines(self, strings): 1509 return self._send(self.remoteid, "writelines", strings) 1510 1511 def flush(self): 1512 return self._send(self.remoteid, "flush") 1513 1514 def size(self): 1515 # Forward to the connection 1516 return self.connection.size(self.url) 1517 1518 def mdate(self): 1519 # Forward to the connection 1520 return self.connection.mdate(self.url) 1521 1522 def mimetype(self): 1523 # Forward to the connection 1524 return self.connection.mimetype(self.url) 1525 1526 1527class URLResource(Resource): 1528 """ 1529 A subclass of :class:`Resource` that handles HTTP, FTP and other URLs 1530 (i.e. those that are not handled by :class:`FileResource` or 1531 :class:`RemoteFileResource`. 1532 """ 1533 def __init__(self, url, mode="rb", headers=None, data=None): 1534 if "w" in mode: 1535 raise ValueError(f"writing mode {mode!r} not supported") 1536 self.url = URL(url) 1537 self.name = str(self.url) 1538 self.mode = mode 1539 self.reqheaders = headers 1540 self.reqdata = data 1541 self._finalurl = None 1542 if data is not None: 1543 data = urlparse.urlencode(data) 1544 if headers is None: 1545 headers = {} 1546 req = urllib.request.Request(url=self.name, data=data, headers=headers) 1547 self._stream = urllib.request.urlopen(req) 1548 self._finalurl = URL(self._stream.url) # Remember the final URL in case of a redirect 1549 self._resheaders = self._stream.info() 1550 self._mimetype = None 1551 self._encoding = None 1552 contenttype = self._resheaders.get("Content-Type") 1553 if contenttype is not None: 1554 (mimetype, options) = cgi.parse_header(contenttype) 1555 self._mimetype = mimetype 1556 self._encoding = options.get("charset") 1557 1558 cl = self._resheaders.get("Content-Length") 1559 if cl: 1560 cl = int(cl) 1561 self._size = cl 1562 lm = self._resheaders.get("Last-Modified") 1563 if lm is not None: 1564 lm = mime2dt(lm) 1565 self._mdate = lm 1566 self._buffer = io.BytesIO() 1567 1568 def __getattr__(self, name): 1569 function = getattr(self._stream, name) 1570 def call(*args, **kwargs): 1571 return function(*args, **kwargs) 1572 return call 1573 1574 def close(self): 1575 if self._stream is not None: 1576 self._stream.close() 1577 self._stream = None 1578 1579 @property 1580 def closed(self): 1581 return self._stream is None 1582 1583 def finalurl(self): 1584 return self._finalurl 1585 1586 def mimetype(self): 1587 return self._mimetype 1588 1589 def resheaders(self): 1590 return self._resheaders 1591 1592 def encoding(self): 1593 return self._encoding 1594 1595 def mdate(self): 1596 return self._mdate 1597 1598 def size(self): 1599 return self._size 1600 1601 def read(self, size=None): 1602 data = self._stream.read(size) if size is not None else self._stream.read() 1603 self._buffer.write(data) 1604 return data 1605 1606 def readline(self, size=None): 1607 data = self._stream.readline(size) if size is not None else self._stream.readline() 1608 self._buffer.write(data) 1609 return data 1610 1611 def resdata(self): 1612 data = self._stream.read() 1613 self._buffer.write(data) 1614 return self._buffer.getvalue() 1615 1616 def imagesize(self): 1617 img = Image.open(io.BytesIO(self.resdata())) # Requires PIL 1618 return img.size 1619 1620 def __iter__(self): 1621 while True: 1622 data = self._stream.readline() 1623 if not data: 1624 break 1625 self._buffer.write(data) 1626 yield data 1627 1628 1629class SchemeDefinition(object): 1630 """ 1631 A :class:`SchemeDefinition` instance defines the properties of a particular 1632 URL scheme. 1633 """ 1634 _connection = URLConnection() 1635 1636 def __init__(self, scheme, usehierarchy, useserver, usefrag, islocal=False, isremote=False, defaultport=None): 1637 """ 1638 Create a new :class:`SchemeDefinition` instance. Arguments are: 1639 1640 * :obj:`scheme`: The name of the scheme; 1641 1642 * :obj:`usehierarchy`: Specifies whether this scheme uses hierarchical 1643 URLs or opaque URLs (i.e. whether ``hier_part`` or ``opaque_part`` 1644 from the BNF in :rfc:`2396` is used); 1645 1646 * :obj:`useserver`: Specifies whether this scheme uses an Internet-based 1647 server :attr:`authority` component or a registry of naming authorities 1648 (only for hierarchical URLs); 1649 1650 * :obj:`usefrag`: Specifies whether this scheme uses fragments 1651 (according to the BNF in :rfc:`2396` every scheme does, but it doesn't 1652 make sense for e.g. ``"javascript"``, ``"mailto"`` or ``"tel"``); 1653 1654 * :obj:`islocal`: Specifies whether URLs with this scheme refer to 1655 local files; 1656 1657 * :obj:`isremote`: Specifies whether URLs with this scheme refer to 1658 remote files (there may be schemes which are neither local nor remote, 1659 e.g. ``"mailto"``); 1660 1661 * :obj:`defaultport`: The default port for this scheme (only for schemes 1662 using server based authority). 1663 """ 1664 self.scheme = scheme 1665 self.usehierarchy = usehierarchy 1666 self.useserver = useserver 1667 self.usefrag = usefrag 1668 self.islocal = islocal 1669 self.isremote = isremote 1670 self.defaultport = defaultport 1671 1672 def connect(self, url, context=None, **kwargs): 1673 """ 1674 Create a :class:`Connection` for the :class:`URL` :obj:`url` (which must 1675 have :obj:`self` as the scheme). 1676 """ 1677 return self._connect(url, context, **kwargs)[0] 1678 1679 def _connect(self, url, context=None, **kwargs): 1680 # Returns a tuple ``(connect, kwargs)`` (some of the keyword arguments 1681 # might have been consumed by the connect call, the rest can be passed 1682 # on the whatever call will be made on the connection itself) 1683 # We can always use the same connection here, because the connection for 1684 # local files and real URLs doesn't use any resources. 1685 # This will be overwritten by :class:`SshSchemeDefinition` 1686 return (self._connection, kwargs) 1687 1688 def open(self, *args, **kwargs): 1689 return URLConnection(*args, **kwargs) 1690 1691 def closeall(self, context): 1692 """ 1693 Close all connections active for this scheme in the context :obj:`context`. 1694 """ 1695 1696 def __repr__(self): 1697 return f"<{self.__class__.__name__} instance scheme={self.scheme!r} usehierarchy={self.usehierarchy!r} useserver={self.useserver!r} usefrag={self.usefrag!r} at {id(self):#x}>" 1698 1699 1700class LocalSchemeDefinition(SchemeDefinition): 1701 # Use a different connection than the base class (but still one single connection for all URLs) 1702 _connection = LocalConnection() 1703 1704 def open(self, *args, **kwargs): 1705 return FileResource(*args, **kwargs) 1706 1707 1708class SshSchemeDefinition(SchemeDefinition): 1709 def _connect(self, url, context=None, **kwargs): 1710 if "python" in kwargs or "nice" in kwargs: 1711 kwargs = kwargs.copy() 1712 python = kwargs.pop("python", None) 1713 nice = kwargs.pop("nice", None) 1714 else: 1715 python = None 1716 nice = None 1717 1718 context = getcontext(context) 1719 if context is threadlocalcontext.__class__.context: 1720 raise ValueError("ssh URLs need a custom context") 1721 # Use one :class:`SshConnection` for each user/host/python combination 1722 server = url.server 1723 try: 1724 connections = context.schemes["ssh"] 1725 except KeyError: 1726 connections = context.schemes["ssh"] = {} 1727 try: 1728 connection = connections[(server, python, nice)] 1729 except KeyError: 1730 connection = connections[(server, python, nice)] = SshConnection(context, server, python, nice) 1731 return (connection, kwargs) 1732 1733 def open(self, url, mode="rb", context=None, python=None, nice=None): 1734 (connection, kwargs) = self._connect(url, context=context, python=python, nice=nice) 1735 return RemoteFileResource(connection, url, mode, **kwargs) 1736 1737 def closeall(self, context): 1738 for connection in context.schemes["ssh"].values(): 1739 connection.close() 1740 1741 1742schemereg = { 1743 "http": SchemeDefinition("http", usehierarchy=True, useserver=True, usefrag=True, isremote=True, defaultport=80), 1744 "https": SchemeDefinition("https", usehierarchy=True, useserver=True, usefrag=True, isremote=True, defaultport=443), 1745 "ftp": SchemeDefinition("ftp", usehierarchy=True, useserver=True, usefrag=True, isremote=True, defaultport=21), 1746 "file": LocalSchemeDefinition("file", usehierarchy=True, useserver=False, usefrag=True, islocal=True), 1747 "root": LocalSchemeDefinition("root", usehierarchy=True, useserver=False, usefrag=True, islocal=True), 1748 "javascript": SchemeDefinition("javascript", usehierarchy=False, useserver=False, usefrag=False), 1749 "mailto": SchemeDefinition("mailto", usehierarchy=False, useserver=False, usefrag=False), 1750 "tel": SchemeDefinition("tel", usehierarchy=False, useserver=False, usefrag=False), 1751 "fax": SchemeDefinition("fax", usehierarchy=False, useserver=False, usefrag=False), 1752 "ssh": SshSchemeDefinition("ssh", usehierarchy=True, useserver=True, usefrag=True, islocal=False, isremote=True), 1753} 1754defaultreg = LocalSchemeDefinition("", usehierarchy=True, useserver=True, islocal=True, usefrag=True) 1755 1756 1757class Path(object): 1758 __slots__ = ("_path", "_segments") 1759 1760 def __init__(self, path=None): 1761 self._path = "" 1762 self._segments = [] 1763 self.path = path 1764 1765 def _prefix(cls, path): 1766 if path.startswith("/"): 1767 return "/" 1768 else: 1769 return "" 1770 1771 def insert(self, index, *others): 1772 segments = self.segments 1773 segments[index:index] = map(_unescape, others) 1774 self.segments = segments 1775 1776 def startswith(self, prefix): 1777 """ 1778 Return whether :obj:`self` starts with the path :obj:`prefix`. 1779 :obj:`prefix` will be converted to a :class:`Path` if it isn't one. 1780 """ 1781 if not isinstance(prefix, Path): 1782 prefix = Path(prefix) 1783 segments = prefix.segments 1784 if self.isabs != prefix.isabs: 1785 return False 1786 if segments and not segments[-1] and len(self.segments) > len(segments): 1787 return self.segments[:len(segments)-1] == segments[:-1] 1788 else: 1789 return self.segments[:len(segments)] == segments 1790 1791 def endswith(self, suffix): 1792 """ 1793 Return whether :obj:`self` ends with the path :obj:`suffix`. :obj:`suffix` 1794 will be converted to a :class:`Path` if it isn't one. If :obj:`suffix` is 1795 absolute a normal comparison will be done. 1796 """ 1797 if not isinstance(suffix, Path): 1798 suffix = Path(suffix) 1799 if suffix.isabs: 1800 return self == suffix 1801 else: 1802 segments = suffix.segments 1803 return self.segments[-len(segments):] == segments 1804 1805 def clone(self): 1806 return Path(self) 1807 1808 def __repr__(self): 1809 return f"Path({self._path!r})" 1810 1811 def __str__(self): 1812 return self.path 1813 1814 def __eq__(self, other): 1815 if not isinstance(other, Path): 1816 other = Path(other) 1817 return self._path == other._path 1818 1819 def __ne__(self, other): 1820 return not self == other 1821 1822 def __hash__(self): 1823 return hash(self._path) 1824 1825 def __len__(self): 1826 return len(self.segments) 1827 1828 def __getitem__(self, index): 1829 if isinstance(index, slice): 1830 # Return of slice of the path. The resulting path will always be relative, i.e. the leading ``/`` will be dropped. 1831 return Path(self.segments[index]) 1832 else: 1833 return self.segments[index] 1834 1835 def __setitem__(self, index, value): 1836 segments = self.segments 1837 if isinstance(index, slice): 1838 segments[index] = map(_unescape, value) 1839 self._path = self._prefix(self._path) + self._segments2path(segments) 1840 else: 1841 segments[index] = _unescape(value) 1842 self._path = self._prefix(self._path) + self._segments2path(segments) 1843 self._segments = segments 1844 1845 def __delitem__(self, index): 1846 if isinstance(index, slice): 1847 del self.segments[index] 1848 else: 1849 segments = self.segments 1850 del segments[index] 1851 self._path = self._segments2path(segments) 1852 self._segments = segments 1853 1854 def __contains__(self, item): 1855 return _unescape(item) in self.segments 1856 1857 class isabs(misc.propclass): 1858 """ 1859 Is the path absolute? 1860 """ 1861 def __get__(self): 1862 return self._path.startswith("/") 1863 1864 def __set__(self, isabs): 1865 isabs = bool(isabs) 1866 if isabs != self._path.startswith("/"): 1867 if isabs: 1868 self._path = "/" + self._path 1869 else: 1870 self._path = self._path[1:] 1871 1872 def __delete__(self): 1873 if self._path.startswith("/"): 1874 self._path = self._path[1:] 1875 1876 @classmethod 1877 def _segments2path(cls, segments): 1878 return "/".join(_escape(segment, pathsafe) for segment in segments) 1879 1880 @classmethod 1881 def _path2segments(cls, path): 1882 if path.startswith("/"): 1883 path = path[1:] 1884 return list(map(_unescape, path.split("/"))) 1885 1886 def _setpathorsegments(self, path): 1887 if path is None: 1888 self._path = "" 1889 self._segments = [] 1890 elif isinstance(path, Path): 1891 self._path = path._path 1892 self._segments = None 1893 elif isinstance(path, (list, tuple)): 1894 self._segments = list(map(_unescape, path)) 1895 self._path = self._prefix(self._path) + self._segments2path(self._segments) 1896 else: 1897 path = _escape(path) 1898 prefix = self._prefix(path) 1899 if prefix: 1900 path = path[1:] 1901 self._segments = self._path2segments(path) 1902 self._path = prefix + self._segments2path(self._segments) 1903 1904 class path(misc.propclass): 1905 """ 1906 The complete path as a string. 1907 """ 1908 def __get__(self): 1909 return self._path 1910 1911 def __set__(self, path): 1912 self._setpathorsegments(path) 1913 1914 def __delete__(self): 1915 self.clear() 1916 1917 class segments(misc.propclass): 1918 """ 1919 The path as a list of (name, param) tuples. 1920 """ 1921 def __get__(self): 1922 if self._segments is None: 1923 self._segments = self._path2segments(self._path) 1924 return self._segments 1925 1926 def __set__(self, path): 1927 self._setpathorsegments(path) 1928 1929 def __delete__(self): 1930 self._path = self._prefix(self._path) 1931 self._segments = [] 1932 1933 class file(misc.propclass): 1934 """ 1935 The filename without the path, i.e. the name part of the last component 1936 of :attr:`path`. The ``baz.html`` part of 1937 ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 1938 """ 1939 def __get__(self): 1940 try: 1941 return self[-1] 1942 except IndexError: 1943 return None 1944 1945 def __set__(self, file): 1946 """ 1947 Setting the filename preserves the parameter in the last segment. 1948 """ 1949 if file is None: 1950 del self.file 1951 segments = self.segments 1952 if segments: 1953 self[-1] = file 1954 else: 1955 self.segments = [file] 1956 1957 def __delete__(self): 1958 """ 1959 Deleting the filename preserves the parameter in the last segment. 1960 """ 1961 segments = self.segments 1962 if segments: 1963 self[-1] = "" 1964 1965 class ext(misc.propclass): 1966 """ 1967 The filename extension of the last segment of the path. The ``html`` part 1968 of ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 1969 """ 1970 def __get__(self): 1971 ext = None 1972 segments = self.segments 1973 if segments: 1974 segment = segments[-1] 1975 pos = segment.rfind(".") 1976 if pos != -1: 1977 ext = segment[pos+1:] 1978 return ext 1979 1980 def __set__(self, ext): 1981 if ext is None: 1982 del self.ext 1983 segments = self.segments 1984 if segments: 1985 segment = segments[-1] 1986 pos = segment.rfind(".") 1987 if pos != -1: 1988 segment = segment[:pos+1] + ext 1989 else: 1990 segment = segment + "." + ext 1991 self[-1] = segment 1992 1993 def __delete__(self): 1994 segments = self.segments 1995 if segments: 1996 segment = segments[-1] 1997 pos = segment.rfind(".") 1998 if pos != -1: 1999 segment = segment[:pos] 2000 self[-1] = segment 2001 2002 def withext(self, ext): 2003 """ 2004 Return a new :class:`Path` where the filename extension has been replaced 2005 with :obj:`ext`. 2006 """ 2007 path = self.clone() 2008 path.ext = ext 2009 return path 2010 2011 def withoutext(self): 2012 """ 2013 Return a new :class:`Path` where the filename extension has been removed. 2014 """ 2015 if "/" not in self._path and self._path.rfind(".")==0: 2016 return Path("./") 2017 else: 2018 path = self.clone() 2019 del path.ext 2020 return path 2021 2022 def withfile(self, file): 2023 """ 2024 Return a new :class:`Path` where the filename (i.e. the name of the last 2025 component of :attr:`segments`) has been replaced with :obj:`file`. 2026 """ 2027 path = self.clone() 2028 path.file = file 2029 return path 2030 2031 def withoutfile(self): 2032 """ 2033 Return a new :class:`Path` where the filename (i.e. the name of the last 2034 component of :attr:`segments`) has been removed. 2035 """ 2036 if "/" not in self._path: 2037 return Path("./") 2038 else: 2039 path = Path(self) 2040 del path.file 2041 return path 2042 2043 def clear(self): 2044 self._path = "" 2045 self._segments = [] 2046 2047 def __truediv__(self, other): 2048 """ 2049 Join two paths. 2050 """ 2051 if isinstance(other, str): 2052 other = Path(other) 2053 if isinstance(other, Path): 2054 newpath = Path() 2055 # RFC2396, Section 5.2 (5) 2056 if other.isabs: 2057 newpath._path = other._path 2058 newpath._segments = None 2059 else: 2060 # the following should be equivalent to RFC2396, Section 5.2 (6) (c)-(f) 2061 newpath._path = self._prefix(self._path) + self._segments2path( 2062 _normalizepath( 2063 self.segments[:-1] + # RFC2396, Section 5.2 (6) (a) 2064 other.segments # RFC2396, Section 5.2 (6) (b) 2065 ) 2066 ) 2067 newpath._segments = None 2068 return newpath 2069 elif isinstance(other, (list, tuple)): # this makes path/list possible 2070 return other.__class__(self/path for path in other) 2071 else: # this makes path/generator possible 2072 return (self/path for path in other) 2073 2074 def __rtruediv__(self, other): 2075 """ 2076 Right hand version of :meth:`__div__`. This supports list and generators 2077 as the left hand side too. 2078 """ 2079 if isinstance(other, str): 2080 other = Path(other) 2081 if isinstance(other, Path): 2082 return other/self 2083 elif isinstance(other, (list, tuple)): 2084 return other.__class__(path/self for path in other) 2085 else: 2086 return (path/self for path in other) 2087 2088 def relative(self, basepath): 2089 """ 2090 Return an relative :class:`Path` :obj:`rel` such that 2091 ``basepath/rel == self``, i.e. this is the inverse operation of 2092 :meth:`__div__`. 2093 2094 If :obj:`self` is relative, an identical copy of :obj:`self` will be 2095 returned. 2096 """ 2097 # if :obj:`self` is relative don't do anything 2098 if not self.isabs: 2099 pass # FIXME return self.clone() 2100 basepath = Path(basepath) # clone/coerce 2101 self_segments = _normalizepath(self.segments) 2102 base_segments = _normalizepath(basepath.segments) 2103 while len(self_segments) > 1 and len(base_segments) > 1 and self_segments[0] == base_segments[0]: 2104 del self_segments[0] 2105 del base_segments[0] 2106 # build a path from one file to the other 2107 self_segments[:0] = [".."]*(len(base_segments)-1) 2108 if not len(self_segments) or self_segments == [""]: 2109 self_segments = [".", ""] 2110 return Path(self._segments2path(self_segments)) 2111 2112 def reverse(self): 2113 segments = self.segments 2114 segments.reverse() 2115 if segments and not segments[0]: 2116 del segments[0] 2117 segments.append("") 2118 self.segments = segments 2119 2120 def normalize(self): 2121 self.segments = _normalizepath(self.segments) 2122 2123 def normalized(self): 2124 new = self.clone() 2125 new.normalize() 2126 return new 2127 2128 def local(self): 2129 """ 2130 Return :obj:`self` converted to a filename using the file naming 2131 conventions of the OS. Parameters will be dropped in the resulting string. 2132 """ 2133 localpath = _unescape(self._path) 2134 if self._path.endswith("/") and not (localpath.endswith(os.sep) or (os.altsep is not None and localpath.endswith(os.altsep))): 2135 localpath += os.sep 2136 return localpath 2137 2138 def abs(self): 2139 """ 2140 Return an absolute version of :obj:`self`. 2141 """ 2142 path = os.path.abspath(self.local()) 2143 path = path.rstrip(os.sep) 2144 if path.startswith("///"): 2145 path = path[2:] 2146 path = urllib.request.pathname2url(path) 2147 if len(self) and not self.segments[-1]: 2148 path += "/" 2149 return Path(path) 2150 2151 def real(self): 2152 """ 2153 Return the canonical version of :obj:`self`, eliminating all symbolic 2154 links. 2155 """ 2156 path = os.path.realpath(self.local()) 2157 path = path.rstrip(os.sep) 2158 path = urllib.request.pathname2url(path) 2159 if path.startswith("///"): 2160 path = path[2:] 2161 if len(self) and not self.segments[-1]: 2162 path += "/" 2163 return Path(path) 2164 2165 2166class Query(dict): 2167 __slots__ = () 2168 2169 def __init__(self, arg=None, **kwargs): 2170 if arg is not None: 2171 if isinstance(arg, dict): 2172 for (key, value) in arg.items(): 2173 self.add(key, value) 2174 else: 2175 for (key, value) in arg: 2176 self.add(key, value) 2177 for (key, value) in kwargs.items(): 2178 self.add(key, value) 2179 2180 def __setitem__(self, key, value): 2181 dict.__setitem__(self, str(key), [str(value)]) 2182 2183 def add(self, key, *values): 2184 key = str(key) 2185 values = map(str, values) 2186 self.setdefault(key, []).extend(values) 2187 2188 2189class URL(object): 2190 """ 2191 An :rfc:`2396` compliant URL. 2192 """ 2193 def __init__(self, url=None): 2194 """ 2195 Create a new :class:`URL` instance. :obj:`url` may be a :class:`str` 2196 object, or an :class:`URL` (in which case you'll get a copy of :obj:`url`), 2197 or :const:`None` (which will create an :class:`URL` referring to the 2198 "current document"). 2199 """ 2200 self.url = url 2201 2202 def _clear(self): 2203 # internal helper method that makes :obj:`self` empty. 2204 self.reg = defaultreg 2205 self._scheme = None 2206 self._userinfo = None 2207 self._host = None 2208 self._port = None 2209 self._path = Path() 2210 self._reg_name = None 2211 self._query = None 2212 self._query_parts = None 2213 self._opaque_part = None 2214 self._frag = None 2215 2216 def clone(self): 2217 """ 2218 Return an identical copy :obj:`self`. 2219 """ 2220 return URL(self) 2221 2222 @staticmethod 2223 def _checkscheme(scheme): 2224 # Check whether :obj:`scheme` contains only legal characters. 2225 if not scheme or scheme[0] not in schemecharfirst: 2226 return False 2227 for c in scheme[1:]: 2228 if c not in schemechar: 2229 return False 2230 return True 2231 2232 class scheme(misc.propclass): 2233 """ 2234 The URL scheme (e.g. ``ftp``, ``ssh``, ``http`` or ``mailto``). The 2235 scheme will be :const:`None` if the URL is a relative one. 2236 """ 2237 def __get__(self): 2238 return self._scheme 2239 def __set__(self, scheme): 2240 """ 2241 The scheme will be converted to lowercase on setting (if :obj:`scheme` 2242 is not :const:`None`, otherwise the scheme will be deleted). 2243 """ 2244 if scheme is None: 2245 self._scheme = None 2246 else: 2247 scheme = scheme.lower() 2248 # check if the scheme only has allowed characters 2249 if not self._checkscheme(scheme): 2250 raise ValueError(f"Illegal scheme char in scheme {scheme!r}") 2251 self._scheme = scheme 2252 self.reg = schemereg.get(scheme, defaultreg) 2253 def __delete__(self): 2254 """ 2255 Deletes the scheme, i.e. makes the URL relative. 2256 """ 2257 self._scheme = None 2258 self.reg = defaultreg 2259 2260 class userinfo(misc.propclass): 2261 """ 2262 The user info part of the :class:`URL`; i.e. the ``user`` part of 2263 ``http://user@www.example.com:8080/bar/baz.html;xyzzy?spam=eggs#frag``. 2264 """ 2265 def __get__(self): 2266 return self._userinfo 2267 def __set__(self, userinfo): 2268 self._userinfo = userinfo 2269 def __delete__(self): 2270 self._userinfo = None 2271 2272 class host(misc.propclass): 2273 """ 2274 The host part of the :class:`URL`; i.e. the ``www.example.com`` part of 2275 ``http://user@www.example.com:8080/bar/baz.html;xyzzy?spam=eggs#frag``. 2276 """ 2277 def __get__(self): 2278 return self._host 2279 def __set__(self, host): 2280 if host is not None: 2281 host = host.lower() 2282 self._host = host 2283 def __delete__(self): 2284 self._host = None 2285 2286 class port(misc.propclass): 2287 """ 2288 The port number of the :class:`URL` (as an :class:`int`) or :const:`None` 2289 if the :class:`URL` has none. The ``8080`` in 2290 ``http://user@www.example.com:8080/bar/baz.html;xyzzy?spam=eggs#frag``. 2291 """ 2292 def __get__(self): 2293 return self._port 2294 def __set__(self, port): 2295 if port is not None: 2296 port = int(port) 2297 self._port = port 2298 def __delete__(self): 2299 self._port = None 2300 2301 class hostport(misc.propclass): 2302 """ 2303 The host and (if specified) the port number of the :class:`URL`, i.e. the 2304 ``www.example.com:8080`` in 2305 ``http://user@www.example.com:8080/bar/baz.html;xyzzy?spam=eggs#frag``. 2306 """ 2307 def __get__(self): 2308 if self.host is not None: 2309 hostport = _escape(self.host, safe) 2310 if self.port is not None: 2311 hostport += f":{self.port}" 2312 return hostport 2313 else: 2314 return None 2315 def __set__(self, hostport): 2316 # find the port number (RFC2396, Section 3.2.2) 2317 if hostport is None: 2318 del self.hostport 2319 else: 2320 del self.port 2321 pos = hostport.rfind(":") 2322 if pos != -1: 2323 if pos != len(hostport)-1: 2324 self.port = hostport[pos+1:] 2325 hostport = hostport[:pos] 2326 self.host = _unescape(hostport) 2327 def __delete__(self): 2328 del self.host 2329 del self.port 2330 2331 class server(misc.propclass): 2332 """ 2333 The server part of the :class:`URL`; i.e. the ``user@www.example.com`` 2334 part of ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 2335 """ 2336 def __get__(self): 2337 if self.hostport is not None: 2338 userinfo = self.userinfo 2339 if userinfo is not None: 2340 return _escape(userinfo, safe) + "@" + self.hostport 2341 else: 2342 return self.hostport 2343 else: 2344 return None 2345 def __set__(self, server): 2346 """ 2347 Setting the server always works even if the current :attr:`scheme` 2348 does use :attr:`opaque_part` or :attr:`reg_name` but will be ignored 2349 when reassembling the URL for the :attr:`url` property. 2350 """ 2351 if server is None: 2352 del self.server 2353 else: 2354 # find the userinfo (RFC2396, Section 3.2.2) 2355 pos = server.find("@") 2356 if pos != -1: 2357 self.userinfo = _unescape(server[:pos]) 2358 server = server[pos+1:] 2359 else: 2360 del self.userinfo 2361 self.hostport = server 2362 def __delete__(self): 2363 del self.userinfo 2364 del self.hostport 2365 2366 class reg_name(misc.propclass): 2367 """ 2368 The reg_name part of the :class:`URL` for hierarchical schemes that use 2369 a name based :attr:`authority` instead of :attr:`server`. 2370 """ 2371 def __get__(self): 2372 return self._reg_name 2373 def __set__(self, reg_name): 2374 if reg_name is None: 2375 del self.reg_name 2376 else: 2377 self._reg_name = reg_name 2378 def __delete__(self): 2379 self._reg_name = None 2380 2381 class authority(misc.propclass): 2382 """ 2383 The authority part of the :class:`URL` for hierarchical schemes. 2384 Depending on the scheme, this is either :attr:`server` or 2385 :attr:`reg_name`. 2386 """ 2387 def __get__(self): 2388 if self.reg.useserver: 2389 return self.server 2390 else: 2391 return self.reg_name 2392 def __set__(self, authority): 2393 if self.reg.useserver: 2394 self.server = authority 2395 else: 2396 self.reg_name = authority 2397 def __delete__(self): 2398 if self.reg.useserver: 2399 del self.server 2400 else: 2401 del self.reg_name 2402 2403 class isabspath(misc.propclass): 2404 """ 2405 Specifies whether the path of a hierarchical :class:`URL` is absolute, 2406 (i.e. it has a leading ``"/"``). Note that the path will always be 2407 absolute if an :attr:`authority` is specified. 2408 """ 2409 def __get__(self): 2410 return (self.authority is not None) or self.path.isabs 2411 def __set__(self, isabspath): 2412 self.path.isabs = isabspath 2413 2414 class path(misc.propclass): 2415 """ 2416 The path segments of a hierarchical :class:`URL` as a :class:`Path` object. 2417 """ 2418 def __get__(self): 2419 return self._path 2420 def __set__(self, path): 2421 self._path = Path(path) 2422 def __delete__(self): 2423 self._path = Path() 2424 2425 class file(misc.propclass): 2426 """ 2427 The filename without the path, i.e. the name part of the last component 2428 of :attr:`path`. The ``baz.html`` part of 2429 ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 2430 """ 2431 def __get__(self): 2432 return self.path.file 2433 def __set__(self, file): 2434 """ 2435 Setting the filename preserves the parameter in the last segment. 2436 """ 2437 self.path.file = file 2438 def __delete__(self): 2439 """ 2440 Deleting the filename preserves the parameter in the last segment. 2441 """ 2442 del self.path.file 2443 2444 class ext(misc.propclass): 2445 """ 2446 The filename extension of the last segment of the path. The ``html`` part 2447 of ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 2448 """ 2449 def __get__(self): 2450 return self.path.ext 2451 def __set__(self, ext): 2452 """ 2453 Setting the extension preserves the parameter in the last segment. 2454 """ 2455 self.path.ext = ext 2456 def __delete__(self): 2457 """ 2458 Deleting the extension preserves the parameter in the last segment. 2459 """ 2460 del self.path.ext 2461 2462 class query_parts(misc.propclass): 2463 """ 2464 The query component as a dictionary, i.e. ``{u"spam": u"eggs"}`` from 2465 ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 2466 2467 If the query component couldn't be parsed, ``query_parts`` will be 2468 :const:`False`. 2469 """ 2470 def __get__(self): 2471 return self._query_parts 2472 def __set__(self, query_parts): 2473 self._query = _urlencode(query_parts) 2474 self._query_parts = query_parts 2475 def __delete__(self): 2476 self._query = None 2477 self._query_parts = None 2478 2479 class query(misc.propclass): 2480 """ 2481 The query component, i.e. the ``spam=eggs`` part of 2482 ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 2483 """ 2484 def __get__(self): 2485 if self._query_parts is False: 2486 return self._query 2487 else: 2488 return _urlencode(self._query_parts) 2489 def __set__(self, query): 2490 self._query = query 2491 if query is not None: 2492 parts = {} 2493 for part in query.split("&"): 2494 namevalue = part.split("=", 1) 2495 name = _unescape(namevalue[0].replace("+", " ")) 2496 if len(namevalue) == 2: 2497 value = _unescape(namevalue[1].replace("+", " ")) 2498 parts.setdefault(name, []).append(value) 2499 else: 2500 parts = False 2501 break 2502 query = parts 2503 self._query_parts = query 2504 def __delete__(self): 2505 self._query = None 2506 self._query_parts = None 2507 2508 class opaque_part(misc.propclass): 2509 """ 2510 The opaque part (for schemes like ``mailto`` that are not hierarchical). 2511 """ 2512 def __get__(self): 2513 return self._opaque_part 2514 def __set__(self, opaque_part): 2515 self._opaque_part = opaque_part 2516 def __delete__(self): 2517 self._opaque_part = None 2518 2519 class frag(misc.propclass): 2520 """ 2521 The fragment identifier, which references a part of the resource, i.e. 2522 the ``frag`` part of 2523 ``http://user@www.example.com/bar/baz.html;xyzzy?spam=eggs#frag``. 2524 """ 2525 def __get__(self): 2526 return self._frag 2527 def __set__(self, frag): 2528 self._frag = frag 2529 def __delete__(self): 2530 self._frag = None 2531 2532 class url(misc.propclass): 2533 """ 2534 The complete URL 2535 """ 2536 def __get__(self): 2537 """ 2538 Getting :attr:`url` reassembles the URL from the components. 2539 """ 2540 result = "" 2541 if self.scheme is not None: 2542 result += self.scheme + ":" 2543 if self.reg.usehierarchy: 2544 if self.authority is not None: 2545 result += "//" + self.authority 2546 if not self.path.isabs: 2547 result += "/" 2548 result += str(self.path) 2549 if self.query is not None: 2550 result += "?" + self.query 2551 else: 2552 result += self.opaque_part 2553 if self.reg.usefrag and self.frag is not None: 2554 result += "#" + _escape(self.frag, fragsafe) 2555 return result 2556 2557 def __set__(self, url): 2558 """ 2559 Setting :attr:`url` parses :obj:`url` into the components. :obj:`url` 2560 may also be an :class:`URL` instance, in which case the URL will be 2561 copied. 2562 """ 2563 self._clear() 2564 if url is None: 2565 return 2566 elif isinstance(url, URL): 2567 self.scheme = url.scheme 2568 self.userinfo = url.userinfo 2569 self.host = url.host 2570 self.port = url.port 2571 self.path = url.path.clone() 2572 self.reg_name = url.reg_name 2573 self.opaque_part = url.opaque_part 2574 self.query = url.query 2575 self.frag = url.frag 2576 else: 2577 url = _escape(url) 2578 # find the scheme (RFC2396, Section 3.1) 2579 pos = url.find(":") 2580 if pos != -1: 2581 scheme = url[:pos] 2582 if self._checkscheme(scheme): # if the scheme is illegal assume there is none (e.g. "/foo.php?x=http://www.bar.com", will *not* have the scheme "/foo.php?x=http") 2583 self.scheme = scheme # the info about what we have to expect in the rest of the URL can be found in self.reg now 2584 url = url[pos+1:] 2585 2586 # find the fragment (RFC2396, Section 4.1) 2587 if self.reg.usefrag: 2588 # the fragment itself may not contain a "#", so find the last "#" 2589 pos = url.rfind("#") 2590 if pos != -1: 2591 self.frag = _unescape(url[pos+1:]) 2592 url = url[:pos] 2593 2594 if self.reg.usehierarchy: 2595 # find the query (RFC2396, Section 3.4) 2596 pos = url.rfind("?") 2597 if pos != -1: 2598 self.query = url[pos+1:] 2599 url = url[:pos] 2600 if url.startswith("//"): 2601 url = url[2:] 2602 # find the authority part (RFC2396, Section 3.2) 2603 pos = url.find("/") 2604 if pos != -1: 2605 authority = url[:pos] 2606 url = url[pos:] # keep the "/" 2607 else: 2608 authority = url 2609 url = "/" 2610 self.authority = authority 2611 self.path = Path(url) 2612 else: 2613 self.opaque_part = url 2614 2615 def __delete__(self): 2616 """ 2617 After deleting the URL the resulting object will refer to the 2618 "current document". 2619 """ 2620 self._clear() 2621 2622 def withext(self, ext): 2623 """ 2624 Return a new :class:`URL` where the filename extension has been replaced 2625 with :obj:`ext`. 2626 """ 2627 url = URL(self) 2628 url.path = url.path.withext(ext) 2629 return url 2630 2631 def withoutext(self): 2632 """ 2633 Return a new :class:`URL` where the filename extension has been removed. 2634 """ 2635 url = URL(self) 2636 url.path = url.path.withoutext() 2637 return url 2638 2639 def withfile(self, file): 2640 """ 2641 Return a new :class:`URL` where the filename (i.e. the name of last 2642 component of :attr:`path_segments`) has been replaced with 2643 :obj:`file`. 2644 """ 2645 url = URL(self) 2646 url.path = url.path.withfile(file) 2647 return url 2648 2649 def withoutfile(self): 2650 url = URL(self) 2651 url.path = url.path.withoutfile() 2652 return url 2653 2654 def withfrag(self, frag): 2655 """ 2656 Return a new :class:`URL` where the fragment has been replaced with 2657 :obj:`frag`. 2658 """ 2659 url = URL(self) 2660 url.frag = frag 2661 return url 2662 2663 def withoutfrag(self): 2664 """ 2665 Return a new :class:`URL` where the frag has been dropped. 2666 """ 2667 url = URL(self) 2668 del url.frag 2669 return url 2670 2671 def __truediv__(self, other): 2672 """ 2673 Join :obj:`self` with another (possible relative) :class:`URL` 2674 :obj:`other`, to form a new :class:`URL`. 2675 2676 :obj:`other` may be a :class:`str` or :class:`URL` object. It may be 2677 :const:`None` (referring to the "current document") in which case 2678 :obj:`self` will be returned. It may also be a list or other iterable. 2679 For this case a list (or iterator) will be returned where 2680 :meth:`__div__` will be applied to every item in the list/iterator. E.g. 2681 the following expression returns all the files in the current directory 2682 as absolute URLs (see the method :meth:`files` and the function 2683 :func:`here` for further explanations):: 2684 2685 >>> here = url.here() 2686 >>> for f in here/here.files(): 2687 ... print(f) 2688 """ 2689 if isinstance(other, str): 2690 other = URL(other) 2691 if isinstance(other, URL): 2692 newurl = URL() 2693 # RFC2396, Section 5.2 (2) 2694 if other.scheme is None and other.authority is None and not str(other.path) and other.query is None: 2695 newurl = URL(self) 2696 newurl.frag = other.frag 2697 return newurl 2698 if not self.reg.usehierarchy: # e.g. "mailto:x@y"/"file:foo" 2699 return other 2700 # In violation of RFC2396 we treat file URLs as relative ones (if the base is a local URL) 2701 if other.scheme == "file" and self.islocal(): 2702 other = URL(other) 2703 del other.scheme 2704 del other.authority 2705 # RFC2396, Section 5.2 (3) 2706 if other.scheme is not None: 2707 return other 2708 newurl.scheme = self.scheme 2709 newurl.query = other.query 2710 newurl.frag = other.frag 2711 # RFC2396, Section 5.2 (4) 2712 if other.authority is None: 2713 newurl.authority = self.authority 2714 # RFC2396, Section 5.2 (5) & (6) (a) (b) 2715 newurl._path = self._path/other._path 2716 else: 2717 newurl.authority = other.authority 2718 newurl._path = other._path.clone() 2719 return newurl 2720 elif isinstance(other, (list, tuple)): # this makes path/list possible 2721 return other.__class__(self/path for path in other) 2722 else: # this makes path/generator possible 2723 return (self/path for path in other) 2724 2725 def __rtruediv__(self, other): 2726 """ 2727 Right hand version of :meth:`__div__`. This supports lists and iterables 2728 as the left hand side too. 2729 """ 2730 if isinstance(other, str): 2731 other = URL(other) 2732 if isinstance(other, URL): 2733 return other/self 2734 elif isinstance(other, (list, tuple)): 2735 return other.__class__(item/self for item in other) 2736 else: 2737 return (item/self for item in other) 2738 2739 def relative(self, baseurl, allowschemerel=False): 2740 """ 2741 Return an relative :class:`URL` :obj:`rel` such that 2742 ``baseurl/rel == self``, i.e. this is the inverse operation of 2743 :meth:`__div__`. 2744 2745 If :obj:`self` is relative, has a different :attr:`scheme` or 2746 :attr:`authority` than :obj:`baseurl` or a non-hierarchical scheme, an 2747 identical copy of :obj:`self` will be returned. 2748 2749 If :obj:`allowschemerel` is true, scheme relative URLs are allowed, i.e. 2750 if both :obj:`self` and :obj:`baseurl` use the same hierarchical scheme, 2751 both a different authority (i.e. server), a scheme relative url 2752 (``//server/path/file.html``) will be returned. 2753 """ 2754 # if :obj:`self` is relative don't do anything 2755 if self.scheme is None: 2756 return URL(self) 2757 # javascript etc. 2758 if not self.reg.usehierarchy: 2759 return URL(self) 2760 baseurl = URL(baseurl) # clone/coerce 2761 newurl = URL(self) # clone 2762 # only calculate a new URL if to the same scheme/server, else use the original (or a scheme relative one) 2763 if self.authority != baseurl.authority: 2764 if self.scheme == baseurl.scheme and allowschemerel: 2765 del newurl.scheme 2766 return newurl 2767 elif self.scheme != baseurl.scheme: 2768 return newurl 2769 del newurl.scheme 2770 del newurl.authority 2771 selfpath_segments = _normalizepath(self._path.segments) 2772 basepath_segments = _normalizepath(baseurl._path.segments) 2773 while len(selfpath_segments) > 1 and len(basepath_segments) > 1 and selfpath_segments[0] == basepath_segments[0]: 2774 del selfpath_segments[0] 2775 del basepath_segments[0] 2776 # does the URL go to the same file? 2777 if selfpath_segments == basepath_segments and self.query == baseurl.query: 2778 # only return the frag 2779 del newurl.path 2780 del newurl.query 2781 else: 2782 # build a path from one file to the other 2783 selfpath_segments[:0] = [".."]*(len(basepath_segments)-1) 2784 if not len(selfpath_segments) or selfpath_segments == [""]: 2785 selfpath_segments = [".", ""] 2786 newurl._path.segments = selfpath_segments 2787 newurl._path = self.path.relative(baseurl.path) 2788 newurl._path.isabs = False 2789 return newurl 2790 2791 def __str__(self): 2792 return self.url 2793 2794 def __repr__(self): 2795 return f"URL({self.url!r})" 2796 2797 def __bool__(self): 2798 """ 2799 Return whether the :class:`URL` is not empty, i.e. whether it is not the 2800 :class:`URL` referring to the start of the current document. 2801 """ 2802 return self.url != "" 2803 2804 def __eq__(self, other): 2805 """ 2806 Return whether two :class:`URL` objects are equal. Note that only 2807 properties relevant for the current scheme will be compared. 2808 """ 2809 if self.__class__ != other.__class__: 2810 return False 2811 if self.scheme != other.scheme: 2812 return False 2813 if self.reg.usehierarchy: 2814 if self.reg.useserver: 2815 selfport = self.port or self.reg.defaultport 2816 otherport = other.port or other.reg.defaultport 2817 if self.userinfo != other.userinfo or self.host != other.host or selfport != otherport: 2818 return False 2819 else: 2820 if self.reg_name != other.reg_name: 2821 return False 2822 if self._path != other._path: 2823 return False 2824 else: 2825 if self.opaque_part != other.opaque_part: 2826 return False 2827 # Use canonical version of (i.e. sorted names and values) 2828 if self.query != other.query: 2829 return False 2830 if self.frag != other.frag: 2831 return False 2832 return True 2833 2834 def __ne__(self, other): 2835 """ 2836 Return whether two :class:`URL` objects are *not* equal. 2837 """ 2838 return not self == other 2839 2840 def __hash__(self): 2841 """ 2842 Return a hash value for :obj:`self`, to be able to use :class:`URL` 2843 objects as dictionary keys. You must be careful not to modify an 2844 :class:`URL` as soon as you use it as a dictionary key. 2845 """ 2846 res = hash(self.scheme) 2847 if self.reg.usehierarchy: 2848 if self.reg.useserver: 2849 res ^= hash(self.userinfo) 2850 res ^= hash(self.host) 2851 res ^= hash(self.port or self.reg.defaultport) 2852 else: 2853 res ^= hash(self.reg_name) 2854 res ^= hash(self._path) 2855 else: 2856 res ^= hash(self.opaque_part) 2857 res ^= hash(self.query) 2858 res ^= hash(self.frag) 2859 return res 2860 2861 def abs(self, scheme=-1): 2862 """ 2863 Return an absolute version of :obj:`self` (works only for local URLs). 2864 2865 If the argument :obj:`scheme` is specified, it will be used for the 2866 resulting URL otherwise the result will have the same scheme as 2867 :obj:`self`. 2868 """ 2869 self._checklocal() 2870 new = self.clone() 2871 new.path = self.path.abs() 2872 if scheme != -1: 2873 new.scheme = scheme 2874 return new 2875 2876 def real(self, scheme=-1): 2877 """ 2878 Return the canonical version of :obj:`self`, eliminating all symbolic 2879 links (works only for local URLs). 2880 2881 If the argument :obj:`scheme` is specified, it will be used for the 2882 resulting URL otherwise the result will have the same scheme as 2883 :obj:`self`. 2884 """ 2885 self._checklocal() 2886 new = self.clone() 2887 new.path = self.path.real() 2888 if scheme != -1: 2889 new.scheme = scheme 2890 return new 2891 2892 def islocal(self): 2893 """ 2894 Return whether :obj:`self` refers to a local file, i.e. whether 2895 :obj:`self` is a relative :class:`URL` or the scheme is ``root`` or 2896 ``file``). 2897 """ 2898 return self.reg.islocal 2899 2900 def _checklocal(self): 2901 if not self.islocal(): 2902 raise ValueError(f"URL {self!r} is not local") 2903 2904 def local(self): 2905 """ 2906 Return :obj:`self` as a local filename (which will only works if 2907 :obj:`self` is local (see :meth:`islocal`). 2908 """ 2909 self._checklocal() 2910 return self.path.local() 2911 2912 def _connect(self, context=None, **kwargs): 2913 return self.reg._connect(self, context=context, **kwargs) 2914 2915 def connect(self, context=None, **kwargs): 2916 """ 2917 Return a :class:`Connection` object for accessing and modifying the 2918 metadata of :obj:`self`. 2919 2920 Whether you get a new connection object, or an existing one depends on 2921 the scheme, the URL itself, and the context passed in (as the 2922 :obj:`context` argument). 2923 """ 2924 return self._connect(context, **kwargs)[0] 2925 2926 def open(self, *args, **kwargs): 2927 """ 2928 Open :obj:`self` for reading or writing. :meth:`open` returns a 2929 :class:`Resource` object. 2930 2931 Which additional parameters are supported depends on the actual resource 2932 created. Some common parameters are: 2933 2934 :obj:`mode` (supported by all resources) 2935 A string indicating how the file is to be opened (just like the 2936 mode argument for the builtin :func:`open`; e.g. ``"rb"`` or 2937 ``"wb"``). 2938 2939 :obj:`context` (supported by all resources) 2940 :meth:`open` needs a :class:`Connection` for this URL which it gets 2941 from a :class:`Context` object. 2942 2943 :obj:`headers` 2944 Additional headers to use for an HTTP request. 2945 2946 :obj:`data` 2947 Request body to use for an HTTP POST request. 2948 2949 :obj:`python` 2950 Name of the Python interpreter to use on the remote side 2951 (used by ``ssh`` URLs) 2952 2953 :obj:`nice` 2954 Nice level for the remove python (used by ``ssh`` URLs) 2955 """ 2956 (connection, kwargs) = self._connect(**kwargs) 2957 if "context" in kwargs: 2958 kwargs = kwargs.copy() 2959 del kwargs["context"] 2960 return connection.open(self, *args, **kwargs) 2961 2962 def openread(self, *args, **kwargs): 2963 return self.open(mode="rb", *args, **kwargs) 2964 2965 def openwrite(self, *args, **kwargs): 2966 return self.open(mode="wb", *args, **kwargs) 2967 2968 def import_(self, name=None): 2969 """ 2970 Import the content of the URL :obj:`self` as a Python module. 2971 2972 :obj:`name` can be used the specify the module name (i.e. the ``__name__`` 2973 attribute of the module). The default determines it from the URL. 2974 """ 2975 if self.islocal(): 2976 filename = self.real().local() 2977 else: 2978 filename = f"/{self.scheme}/{self.server}{self.path}" 2979 return misc.module(self.openread().read(), filename, name) 2980 2981 def __iter__(self): 2982 try: 2983 isdir = self.isdir() 2984 except AttributeError: 2985 isdir = False 2986 if isdir: 2987 return iter(self/self.listdir()) 2988 else: 2989 return iter(self.open()) 2990 2991 # All the following methods need a connection and simply forward the operation to the connection 2992 def stat(self, **kwargs): 2993 return self.connect(**kwargs).stat(self) 2994 2995 def lstat(self, **kwargs): 2996 return self.connect(**kwargs).lstat(self) 2997 2998 def chmod(self, mode, **kwargs): 2999 return self.connect(**kwargs).chmod(self, mode) 3000 3001 def chown(self, owner=None, group=None, **kwargs): 3002 return self.connect(**kwargs).chown(self, owner=owner, group=group) 3003 3004 def lchown(self, owner=None, group=None, **kwargs): 3005 return self.connect(**kwargs).lchown(self, owner=owner, group=group) 3006 3007 def uid(self, **kwargs): 3008 return self.connect(**kwargs).uid(self) 3009 3010 def gid(self, **kwargs): 3011 return self.connect(**kwargs).gid(self) 3012 3013 def owner(self, **kwargs): 3014 return self.connect(**kwargs).owner(self) 3015 3016 def group(self, **kwargs): 3017 return self.connect(**kwargs).group(self) 3018 3019 def mimetype(self, **kwargs): 3020 return self.connect(**kwargs).mimetype(self) 3021 3022 def exists(self, **kwargs): 3023 return self.connect(**kwargs).exists(self) 3024 3025 def isfile(self, **kwargs): 3026 return self.connect(**kwargs).isfile(self) 3027 3028 def isdir(self, **kwargs): 3029 return self.connect(**kwargs).isdir(self) 3030 3031 def islink(self, **kwargs): 3032 return self.connect(**kwargs).islink(self) 3033 3034 def ismount(self, **kwargs): 3035 return self.connect(**kwargs).ismount(self) 3036 3037 def access(self, mode, **kwargs): 3038 return self.connect(**kwargs).access(self, mode) 3039 3040 def size(self, **kwargs): 3041 return self.connect(**kwargs).size(self) 3042 3043 def imagesize(self, **kwargs): 3044 return self.connect(**kwargs).imagesize(self) 3045 3046 def cdate(self, **kwargs): 3047 return self.connect(**kwargs).cdate(self) 3048 3049 def adate(self, **kwargs): 3050 return self.connect(**kwargs).adate(self) 3051 3052 def mdate(self, **kwargs): 3053 return self.connect(**kwargs).mdate(self) 3054 3055 def resheaders(self, **kwargs): 3056 return self.connect(**kwargs).resheaders(self) 3057 3058 def remove(self, **kwargs): 3059 return self.connect(**kwargs).remove(self) 3060 3061 def rmdir(self, **kwargs): 3062 return self.connect(**kwargs).rmdir(self) 3063 3064 def rename(self, target, **kwargs): 3065 return self.connect(**kwargs).rename(self, target) 3066 3067 def link(self, target, **kwargs): 3068 return self.connect(**kwargs).link(self, target) 3069 3070 def symlink(self, target, **kwargs): 3071 return self.connect(**kwargs).symlink(self, target) 3072 3073 def chdir(self, **kwargs): 3074 return self.connect(**kwargs).chdir(self) 3075 3076 def mkdir(self, mode=0o777, **kwargs): 3077 return self.connect(**kwargs).mkdir(self, mode=mode) 3078 3079 def makedirs(self, mode=0o777, **kwargs): 3080 return self.connect(**kwargs).makedirs(self, mode=mode) 3081 3082 def walk(self, beforedir=True, afterdir=False, file=True, enterdir=True, **kwargs): 3083 return self.connect(**kwargs).walk(self, beforedir=beforedir, afterdir=afterdir, file=file, enterdir=enterdir) 3084 3085 def listdir(self, include=None, exclude=None, ignorecase=False, **kwargs): 3086 return self.connect(**kwargs).listdir(self, include=include, exclude=exclude, ignorecase=ignorecase) 3087 3088 def files(self, include=None, exclude=None, ignorecase=False, **kwargs): 3089 return self.connect(**kwargs).files(self, include=include, exclude=exclude, ignorecase=ignorecase) 3090 3091 def dirs(self, include=None, exclude=None, ignorecase=False, **kwargs): 3092 return self.connect(**kwargs).dirs(self, include=include, exclude=exclude, ignorecase=ignorecase) 3093 3094 def walkall(self, include=None, exclude=None, enterdir=None, skipdir=None, ignorecase=False, **kwargs): 3095 return self.connect(**kwargs).walkall(self, include=include, exclude=exclude, enterdir=enterdir, skipdir=skipdir, ignorecase=ignorecase) 3096 3097 def walkfiles(self, include=None, exclude=None, enterdir=None, skipdir=None, ignorecase=False, **kwargs): 3098 return self.connect(**kwargs).walkfiles(self, include=include, exclude=exclude, enterdir=enterdir, skipdir=skipdir, ignorecase=ignorecase) 3099 3100 def walkdirs(self, include=None, exclude=None, enterdir=None, skipdir=None, ignorecase=False, **kwargs): 3101 return self.connect(**kwargs).walkdirs(self, include=include, exclude=exclude, enterdir=enterdir, skipdir=skipdir, ignorecase=ignorecase) 3102 3103warnings.filterwarnings("always", module="url") 3104