1# -*- test-case-name: nevow.test.test_url -*- 2# Copyright (c) 2004-2007 Divmod. 3# See LICENSE for details. 4 5""" 6URL parsing, construction and rendering. 7""" 8 9import weakref 10import urlparse 11import urllib 12 13from zope.interface import implements 14 15from twisted.web.util import redirectTo 16 17from nevow import inevow, flat 18from nevow.stan import raw 19from nevow.flat import serialize 20from nevow.context import WovenContext 21 22def _uqf(query): 23 for x in query.split('&'): 24 if '=' in x: 25 yield tuple( [urllib.unquote_plus(s) for s in x.split('=', 1)] ) 26 elif x: 27 yield (urllib.unquote_plus(x), None) 28unquerify = lambda query: list(_uqf(query)) 29 30 31class URL(object): 32 """ 33 Represents a URL and provides a convenient API for modifying its parts. 34 35 A URL is split into a number of distinct parts: scheme, netloc (domain 36 name), path segments, query parameters and fragment identifier. 37 38 Methods are provided to modify many of the parts of the URL, especially 39 the path and query parameters. Values can be passed to methods as-is; 40 encoding and escaping is handled automatically. 41 42 There are a number of ways to create a URL: 43 - Standard Python creation, i.e. __init__. 44 - fromString, a class method that parses a string. 45 - fromContext, a class method that creates a URL to represent the 46 current URL in the path traversal process. 47 48 URL instances can be used in a stan tree or to fill template slots. They can 49 also be used as a redirect mechanism - simply return an instance from an 50 IResource method. See URLRedirectAdapter for details. 51 52 URL subclasses with different constructor signatures should override 53 L{cloneURL} to ensure that the numerous instance methods which return 54 copies do so correctly. Additionally, the L{fromString}, L{fromContext} 55 and L{fromRequest} class methods need overriding. 56 57 @type fragment: C{str} 58 @ivar fragment: The fragment portion of the URL, decoded. 59 """ 60 61 def __init__(self, scheme='http', netloc='localhost', pathsegs=None, 62 querysegs=None, fragment=None): 63 self.scheme = scheme 64 self.netloc = netloc 65 if pathsegs is None: 66 pathsegs = [''] 67 self._qpathlist = pathsegs 68 if querysegs is None: 69 querysegs = [] 70 self._querylist = querysegs 71 if fragment is None: 72 fragment = '' 73 self.fragment = fragment 74 75 76 def path(): 77 def get(self): 78 return '/'.join([ 79 # Note that this set of safe things is pretty arbitrary. 80 # It is this particular set in order to match that used by 81 # nevow.flat.flatstan.StringSerializer, so that url.path 82 # will give something which is contained by flatten(url). 83 urllib.quote(seg, safe="-_.!*'()") for seg in self._qpathlist]) 84 doc = """ 85 The path portion of the URL. 86 """ 87 return get, None, None, doc 88 path = property(*path()) 89 90 def __eq__(self, other): 91 if not isinstance(other, self.__class__): 92 return NotImplemented 93 for attr in ['scheme', 'netloc', '_qpathlist', '_querylist', 'fragment']: 94 if getattr(self, attr) != getattr(other, attr): 95 return False 96 return True 97 98 def __ne__(self, other): 99 if not isinstance(other, self.__class__): 100 return NotImplemented 101 return not self.__eq__(other) 102 103 query = property( 104 lambda self: [y is None and x or '='.join((x,y)) 105 for (x,y) in self._querylist] 106 ) 107 108 def _pathMod(self, newpathsegs, newqueryparts): 109 return self.cloneURL(self.scheme, 110 self.netloc, 111 newpathsegs, 112 newqueryparts, 113 self.fragment) 114 115 116 def cloneURL(self, scheme, netloc, pathsegs, querysegs, fragment): 117 """ 118 Make a new instance of C{self.__class__}, passing along the given 119 arguments to its constructor. 120 """ 121 return self.__class__(scheme, netloc, pathsegs, querysegs, fragment) 122 123 124 ## class methods used to build URL objects ## 125 126 def fromString(klass, st): 127 scheme, netloc, path, query, fragment = urlparse.urlsplit(st) 128 u = klass( 129 scheme, netloc, 130 [urllib.unquote(seg) for seg in path.split('/')[1:]], 131 unquerify(query), urllib.unquote(fragment)) 132 return u 133 fromString = classmethod(fromString) 134 135 def fromRequest(klass, request): 136 """ 137 Create a new L{URL} instance which is the same as the URL represented 138 by C{request} except that it includes only the path segments which have 139 already been processed. 140 """ 141 uri = request.prePathURL() 142 if '?' in request.uri: 143 uri += '?' + request.uri.split('?')[-1] 144 return klass.fromString(uri) 145 fromRequest = classmethod(fromRequest) 146 147 def fromContext(klass, context): 148 '''Create a URL object that represents the current URL in the traversal 149 process.''' 150 request = inevow.IRequest(context) 151 uri = request.prePathURL() 152 if '?' in request.uri: 153 uri += '?' + request.uri.split('?')[-1] 154 return klass.fromString(uri) 155 fromContext = classmethod(fromContext) 156 157 ## path manipulations ## 158 159 def pathList(self, unquote=False, copy=True): 160 result = self._qpathlist 161 if unquote: 162 result = map(urllib.unquote, result) 163 if copy: 164 result = result[:] 165 return result 166 167 def sibling(self, path): 168 """Construct a url where the given path segment is a sibling of this url 169 """ 170 l = self.pathList() 171 l[-1] = path 172 return self._pathMod(l, self.queryList(0)) 173 174 def child(self, path): 175 """Construct a url where the given path segment is a child of this url 176 """ 177 l = self.pathList() 178 if l[-1] == '': 179 l[-1] = path 180 else: 181 l.append(path) 182 return self._pathMod(l, self.queryList(0)) 183 184 def isRoot(self, pathlist): 185 return (pathlist == [''] or not pathlist) 186 187 def parent(self): 188 import warnings 189 warnings.warn( 190 "[v0.4] URL.parent has been deprecated and replaced with parentdir (which does what parent used to do) and up (which does what you probably thought parent would do ;-))", 191 DeprecationWarning, 192 stacklevel=2) 193 return self.parentdir() 194 195 def curdir(self): 196 """Construct a url which is a logical equivalent to '.' 197 of the current url. For example: 198 199 >>> print URL.fromString('http://foo.com/bar').curdir() 200 http://foo.com/ 201 >>> print URL.fromString('http://foo.com/bar/').curdir() 202 http://foo.com/bar/ 203 """ 204 l = self.pathList() 205 if l[-1] != '': 206 l[-1] = '' 207 return self._pathMod(l, self.queryList(0)) 208 209 def up(self): 210 """Pop a URL segment from this url. 211 """ 212 l = self.pathList() 213 if len(l): 214 l.pop() 215 return self._pathMod(l, self.queryList(0)) 216 217 def parentdir(self): 218 """Construct a url which is the parent of this url's directory; 219 This is logically equivalent to '..' of the current url. 220 For example: 221 222 >>> print URL.fromString('http://foo.com/bar/file').parentdir() 223 http://foo.com/ 224 >>> print URL.fromString('http://foo.com/bar/dir/').parentdir() 225 http://foo.com/bar/ 226 """ 227 l = self.pathList() 228 if not self.isRoot(l) and l[-1] == '': 229 del l[-2] 230 else: 231 # we are a file, such as http://example.com/foo/bar our 232 # parent directory is http://example.com/ 233 l.pop() 234 if self.isRoot(l): l.append('') 235 else: l[-1] = '' 236 return self._pathMod(l, self.queryList(0)) 237 238 def click(self, href): 239 """Build a path by merging 'href' and this path. 240 241 Return a path which is the URL where a browser would presumably 242 take you if you clicked on a link with an 'href' as given. 243 """ 244 scheme, netloc, path, query, fragment = urlparse.urlsplit(href) 245 246 if (scheme, netloc, path, query, fragment) == ('', '', '', '', ''): 247 return self 248 249 query = unquerify(query) 250 251 if scheme: 252 if path and path[0] == '/': 253 path = path[1:] 254 return self.cloneURL( 255 scheme, netloc, map(raw, path.split('/')), query, fragment) 256 else: 257 scheme = self.scheme 258 259 if not netloc: 260 netloc = self.netloc 261 if not path: 262 path = self.path 263 if not query: 264 query = self._querylist 265 if not fragment: 266 fragment = self.fragment 267 else: 268 if path[0] == '/': 269 path = path[1:] 270 else: 271 l = self.pathList() 272 l[-1] = path 273 path = '/'.join(l) 274 275 path = normURLPath(path) 276 return self.cloneURL( 277 scheme, netloc, map(raw, path.split('/')), query, fragment) 278 279 ## query manipulation ## 280 281 def queryList(self, copy=True): 282 """Return current query as a list of tuples.""" 283 if copy: 284 return self._querylist[:] 285 return self._querylist 286 287 # FIXME: here we call str() on query arg values: is this right? 288 289 def add(self, name, value=None): 290 """Add a query argument with the given value 291 None indicates that the argument has no value 292 """ 293 q = self.queryList() 294 q.append((name, value)) 295 return self._pathMod(self.pathList(copy=False), q) 296 297 def replace(self, name, value=None): 298 """ 299 Remove all existing occurrences of the query argument 'name', *if it 300 exists*, then add the argument with the given value. 301 302 C{None} indicates that the argument has no value. 303 """ 304 ql = self.queryList(False) 305 ## Preserve the original position of the query key in the list 306 i = 0 307 for (k, v) in ql: 308 if k == name: 309 break 310 i += 1 311 q = filter(lambda x: x[0] != name, ql) 312 q.insert(i, (name, value)) 313 return self._pathMod(self.pathList(copy=False), q) 314 315 def remove(self, name): 316 """Remove all query arguments with the given name 317 """ 318 return self._pathMod( 319 self.pathList(copy=False), 320 filter( 321 lambda x: x[0] != name, self.queryList(False))) 322 323 def clear(self, name=None): 324 """Remove all existing query arguments 325 """ 326 if name is None: 327 q = [] 328 else: 329 q = filter(lambda x: x[0] != name, self.queryList(False)) 330 return self._pathMod(self.pathList(copy=False), q) 331 332 ## scheme manipulation ## 333 334 def secure(self, secure=True, port=None): 335 """Modify the scheme to https/http and return the new URL. 336 337 @param secure: choose between https and http, default to True (https) 338 @param port: port, override the scheme's normal port 339 """ 340 341 # Choose the scheme and default port. 342 if secure: 343 scheme, defaultPort = 'https', 443 344 else: 345 scheme, defaultPort = 'http', 80 346 347 # Rebuild the netloc with port if not default. 348 netloc = self.netloc.split(':',1)[0] 349 if port is not None and port != defaultPort: 350 netloc = '%s:%d' % (netloc, port) 351 352 return self.cloneURL( 353 scheme, netloc, self._qpathlist, self._querylist, self.fragment) 354 355 ## fragment/anchor manipulation 356 357 def anchor(self, anchor=None): 358 """ 359 Modify the fragment/anchor and return a new URL. An anchor of 360 C{None} (the default) or C{''} (the empty string) will remove the 361 current anchor. 362 """ 363 return self.cloneURL( 364 self.scheme, self.netloc, self._qpathlist, self._querylist, anchor) 365 366 ## object protocol override ## 367 368 def __str__(self): 369 return str(flat.flatten(self)) 370 371 def __repr__(self): 372 return ( 373 '%s(scheme=%r, netloc=%r, pathsegs=%r, querysegs=%r, fragment=%r)' 374 % (self.__class__, 375 self.scheme, 376 self.netloc, 377 self._qpathlist, 378 self._querylist, 379 self.fragment)) 380 381 382def normURLPath(path): 383 """ 384 Normalise the URL path by resolving segments of '.' and '..'. 385 """ 386 segs = [] 387 388 pathSegs = path.split('/') 389 390 for seg in pathSegs: 391 if seg == '.': 392 pass 393 elif seg == '..': 394 if segs: 395 segs.pop() 396 else: 397 segs.append(seg) 398 399 if pathSegs[-1:] in (['.'],['..']): 400 segs.append('') 401 402 return '/'.join(segs) 403 404 405class URLOverlay(object): 406 def __init__(self, urlaccessor, doc=None, dolater=None, keep=None): 407 """A Proto like object for abstractly specifying urls in stan trees. 408 409 @param urlaccessor: a function which takes context and returns a URL 410 411 @param doc: a a string documenting this URLOverlay instance's usage 412 413 @param dolater: a list of tuples of (command, args, kw) where 414 command is a string, args is a tuple and kw is a dict; when the 415 URL is returned from urlaccessor during rendering, these 416 methods will be applied to the URL in order 417 """ 418 if doc is not None: 419 self.__doc__ = doc 420 self.urlaccessor = urlaccessor 421 if dolater is None: 422 dolater= [] 423 self.dolater = dolater 424 if keep is None: 425 keep = [] 426 self._keep = keep 427 428 def addCommand(self, cmd, args, kw): 429 dl = self.dolater[:] 430 dl.append((cmd, args, kw)) 431 return self.__class__(self.urlaccessor, dolater=dl, keep=self._keep[:]) 432 433 def keep(self, *args): 434 """A list of arguments to carry over from the previous url. 435 """ 436 K = self._keep[:] 437 K.extend(args) 438 return self.__class__(self.urlaccessor, dolater=self.dolater[:], keep=K) 439 440 441def createForwarder(cmd): 442 return lambda self, *args, **kw: self.addCommand(cmd, args, kw) 443 444 445for cmd in [ 446 'sibling', 'child', 'parent', 'here', 'curdir', 'click', 'add', 447 'replace', 'clear', 'remove', 'secure', 'anchor', 'up', 'parentdir' 448 ]: 449 setattr(URLOverlay, cmd, createForwarder(cmd)) 450 451 452def hereaccessor(context): 453 return URL.fromContext(context).clear() 454here = URLOverlay( 455 hereaccessor, 456 "A lazy url construction object representing the current page's URL. " 457 "The URL which will be used will be determined at render time by " 458 "looking at the request. Any query parameters will be " 459 "cleared automatically.") 460 461 462def gethereaccessor(context): 463 return URL.fromContext(context) 464gethere = URLOverlay(gethereaccessor, 465 "A lazy url construction object like 'here' except query parameters " 466 "are preserved. Useful for constructing a URL to this same object " 467 "when query parameters need to be preserved but modified slightly.") 468 469 470 471def viewhereaccessor(context): 472 U = hereaccessor(context) 473 i = 1 474 while True: 475 try: 476 params = context.locate(inevow.IViewParameters, depth=i) 477 except KeyError: 478 break 479 for (cmd, args, kw) in iter(params): 480 U = getattr(U, cmd)(*args, **kw) 481 i += 1 482 return U 483viewhere = URLOverlay(viewhereaccessor, 484 "A lazy url construction object like 'here' IViewParameters objects " 485 "are looked up in the context during rendering. Commands provided by " 486 "any found IViewParameters objects are applied to the URL object before " 487 "rendering it.") 488 489 490def rootaccessor(context): 491 req = context.locate(inevow.IRequest) 492 root = req.getRootURL() 493 if root is None: 494 return URL.fromContext(context).click('/') 495 return URL.fromString(root) 496root = URLOverlay(rootaccessor, 497 "A lazy URL construction object representing the root of the " 498 "application. Normally, this will just be the logical '/', but if " 499 "request.rememberRootURL() has previously been used in " 500 "the request traversal process, the url of the resource " 501 "where rememberRootURL was called will be used instead.") 502 503 504def URLSerializer(original, context): 505 """ 506 Serialize the given L{URL}. 507 508 Unicode path, query and fragment components are handled according to the 509 IRI standard (RFC 3987). 510 """ 511 def _maybeEncode(s): 512 if isinstance(s, unicode): 513 s = s.encode('utf-8') 514 return s 515 urlContext = WovenContext(parent=context, precompile=context.precompile, inURL=True) 516 if original.scheme: 517 # TODO: handle Unicode (see #2409) 518 yield "%s://%s" % (original.scheme, original.netloc) 519 for pathsegment in original._qpathlist: 520 yield '/' 521 yield serialize(_maybeEncode(pathsegment), urlContext) 522 query = original._querylist 523 if query: 524 yield '?' 525 first = True 526 for key, value in query: 527 if not first: 528 # xhtml can't handle unescaped '&' 529 if context.isAttrib is True: 530 yield '&' 531 else: 532 yield '&' 533 else: 534 first = False 535 yield serialize(_maybeEncode(key), urlContext) 536 if value is not None: 537 yield '=' 538 yield serialize(_maybeEncode(value), urlContext) 539 if original.fragment: 540 yield "#" 541 yield serialize(_maybeEncode(original.fragment), urlContext) 542 543 544def URLOverlaySerializer(original, context): 545 if context.precompile: 546 yield original 547 else: 548 url = original.urlaccessor(context) 549 for (cmd, args, kw) in original.dolater: 550 url = getattr(url, cmd)(*args, **kw) 551 req = context.locate(inevow.IRequest) 552 for key in original._keep: 553 for value in req.args.get(key, []): 554 url = url.add(key, value) 555 yield serialize(url, context) 556 557 558## This is totally unfinished and doesn't work yet. 559#class IURLGenerator(compy.Interface): 560# pass 561 562 563class URLGenerator: 564 #implements(IURLGenerator) 565 566 def __init__(self): 567 self._objmap = weakref.WeakKeyDictionary() 568 569 def objectMountedAt(self, obj, at): 570 self._objmap[obj] = at 571 572 def url(self, obj): 573 try: 574 return self._objmap.get(obj, None) 575 except TypeError: 576 return None 577 578 __call__ = url 579 580 def __getstate__(self): 581 d = self.__dict__.copy() 582 del d['_objmap'] 583 return d 584 585 def __setstate__(self, state): 586 self.__dict__ = state 587 self._objmap = weakref.WeakKeyDictionary() 588 589 590class URLRedirectAdapter: 591 """ 592 Adapter for URL and URLOverlay instances that results in an HTTP 593 redirect. 594 595 Whenever a URL or URLOverlay instance is returned from locateChild or 596 renderHTTP an HTTP response is generated that causes a redirect to 597 the adapted URL. Any remaining segments of the current request are 598 consumed. 599 600 Note that URLOverlay instances are lazy so their use might not be entirely 601 obvious when returned from locateChild, i.e. url.here means the request's 602 URL and not the URL of the resource that is self. 603 604 Here are some examples:: 605 606 def renderHTTP(self, ctx): 607 # Redirect to my immediate parent 608 return url.here.up() 609 610 def locateChild(self, ctx, segments): 611 # Redirect to the URL of this resource 612 return url.URL.fromContext(ctx) 613 """ 614 implements(inevow.IResource) 615 616 def __init__(self, original): 617 self.original = original 618 619 def locateChild(self, ctx, segments): 620 return self, () 621 622 def renderHTTP(self, ctx): 623 # The URL may contain deferreds so we need to flatten it using 624 # flattenFactory that will collect the bits into the bits list and 625 # call flattened to finish. 626 bits = [] 627 def flattened(spam): 628 # Join the bits to make a complete URL. 629 u = ''.join(bits) 630 # It might also be relative so resolve it against the current URL 631 # and flatten it again. 632 u = flat.flatten(URL.fromContext(ctx).click(u), ctx) 633 return redirectTo(u, inevow.IRequest(ctx)) 634 return flat.flattenFactory(self.original, ctx, bits.append, flattened) 635