1# -*- test-case-name: nevow.test.test_url -*-
2# Copyright (c) 2004-2007 Divmod.
3# See LICENSE for details.
4
5"""
6URL parsing, construction and rendering.
7"""
8
9import weakref
10import urlparse
11import urllib
12
13from zope.interface import implements
14
15from twisted.web.util import redirectTo
16
17from nevow import inevow, flat
18from nevow.stan import raw
19from nevow.flat import serialize
20from nevow.context import WovenContext
21
22def _uqf(query):
23    for x in query.split('&'):
24        if '=' in x:
25            yield tuple( [urllib.unquote_plus(s) for s in x.split('=', 1)] )
26        elif x:
27            yield (urllib.unquote_plus(x), None)
28unquerify = lambda query: list(_uqf(query))
29
30
31class URL(object):
32    """
33    Represents a URL and provides a convenient API for modifying its parts.
34
35    A URL is split into a number of distinct parts: scheme, netloc (domain
36    name), path segments, query parameters and fragment identifier.
37
38    Methods are provided to modify many of the parts of the URL, especially
39    the path and query parameters. Values can be passed to methods as-is;
40    encoding and escaping is handled automatically.
41
42    There are a number of ways to create a URL:
43        - Standard Python creation, i.e. __init__.
44        - fromString, a class method that parses a string.
45        - fromContext, a class method that creates a URL to represent the
46          current URL in the path traversal process.
47
48    URL instances can be used in a stan tree or to fill template slots. They can
49    also be used as a redirect mechanism - simply return an instance from an
50    IResource method. See URLRedirectAdapter for details.
51
52    URL subclasses with different constructor signatures should override
53    L{cloneURL} to ensure that the numerous instance methods which return
54    copies do so correctly.  Additionally, the L{fromString}, L{fromContext}
55    and L{fromRequest} class methods need overriding.
56
57    @type fragment: C{str}
58    @ivar fragment: The fragment portion of the URL, decoded.
59    """
60
61    def __init__(self, scheme='http', netloc='localhost', pathsegs=None,
62                 querysegs=None, fragment=None):
63        self.scheme = scheme
64        self.netloc = netloc
65        if pathsegs is None:
66            pathsegs = ['']
67        self._qpathlist = pathsegs
68        if querysegs is None:
69            querysegs = []
70        self._querylist = querysegs
71        if fragment is None:
72            fragment = ''
73        self.fragment = fragment
74
75
76    def path():
77        def get(self):
78            return '/'.join([
79                    # Note that this set of safe things is pretty arbitrary.
80                    # It is this particular set in order to match that used by
81                    # nevow.flat.flatstan.StringSerializer, so that url.path
82                    # will give something which is contained by flatten(url).
83                    urllib.quote(seg, safe="-_.!*'()") for seg in self._qpathlist])
84        doc = """
85        The path portion of the URL.
86        """
87        return get, None, None, doc
88    path = property(*path())
89
90    def __eq__(self, other):
91        if not isinstance(other, self.__class__):
92            return NotImplemented
93        for attr in ['scheme', 'netloc', '_qpathlist', '_querylist', 'fragment']:
94            if getattr(self, attr) != getattr(other, attr):
95                return False
96        return True
97
98    def __ne__(self, other):
99        if not isinstance(other, self.__class__):
100            return NotImplemented
101        return not self.__eq__(other)
102
103    query = property(
104        lambda self: [y is None and x or '='.join((x,y))
105            for (x,y) in self._querylist]
106        )
107
108    def _pathMod(self, newpathsegs, newqueryparts):
109        return self.cloneURL(self.scheme,
110                             self.netloc,
111                             newpathsegs,
112                             newqueryparts,
113                             self.fragment)
114
115
116    def cloneURL(self, scheme, netloc, pathsegs, querysegs, fragment):
117        """
118        Make a new instance of C{self.__class__}, passing along the given
119        arguments to its constructor.
120        """
121        return self.__class__(scheme, netloc, pathsegs, querysegs, fragment)
122
123
124    ## class methods used to build URL objects ##
125
126    def fromString(klass, st):
127        scheme, netloc, path, query, fragment = urlparse.urlsplit(st)
128        u = klass(
129            scheme, netloc,
130            [urllib.unquote(seg) for seg in path.split('/')[1:]],
131            unquerify(query), urllib.unquote(fragment))
132        return u
133    fromString = classmethod(fromString)
134
135    def fromRequest(klass, request):
136        """
137        Create a new L{URL} instance which is the same as the URL represented
138        by C{request} except that it includes only the path segments which have
139        already been processed.
140        """
141        uri = request.prePathURL()
142        if '?' in request.uri:
143            uri += '?' + request.uri.split('?')[-1]
144        return klass.fromString(uri)
145    fromRequest = classmethod(fromRequest)
146
147    def fromContext(klass, context):
148        '''Create a URL object that represents the current URL in the traversal
149        process.'''
150        request = inevow.IRequest(context)
151        uri = request.prePathURL()
152        if '?' in request.uri:
153            uri += '?' + request.uri.split('?')[-1]
154        return klass.fromString(uri)
155    fromContext = classmethod(fromContext)
156
157    ## path manipulations ##
158
159    def pathList(self, unquote=False, copy=True):
160        result = self._qpathlist
161        if unquote:
162            result = map(urllib.unquote, result)
163        if copy:
164            result = result[:]
165        return result
166
167    def sibling(self, path):
168        """Construct a url where the given path segment is a sibling of this url
169        """
170        l = self.pathList()
171        l[-1] = path
172        return self._pathMod(l, self.queryList(0))
173
174    def child(self, path):
175        """Construct a url where the given path segment is a child of this url
176        """
177        l = self.pathList()
178        if l[-1] == '':
179            l[-1] = path
180        else:
181            l.append(path)
182        return self._pathMod(l, self.queryList(0))
183
184    def isRoot(self, pathlist):
185        return (pathlist == [''] or not pathlist)
186
187    def parent(self):
188        import warnings
189        warnings.warn(
190            "[v0.4] URL.parent has been deprecated and replaced with parentdir (which does what parent used to do) and up (which does what you probably thought parent would do ;-))",
191            DeprecationWarning,
192            stacklevel=2)
193        return self.parentdir()
194
195    def curdir(self):
196        """Construct a url which is a logical equivalent to '.'
197        of the current url. For example:
198
199        >>> print URL.fromString('http://foo.com/bar').curdir()
200        http://foo.com/
201        >>> print URL.fromString('http://foo.com/bar/').curdir()
202        http://foo.com/bar/
203        """
204        l = self.pathList()
205        if l[-1] != '':
206            l[-1] = ''
207        return self._pathMod(l, self.queryList(0))
208
209    def up(self):
210        """Pop a URL segment from this url.
211        """
212        l = self.pathList()
213        if len(l):
214            l.pop()
215        return self._pathMod(l, self.queryList(0))
216
217    def parentdir(self):
218        """Construct a url which is the parent of this url's directory;
219        This is logically equivalent to '..' of the current url.
220        For example:
221
222        >>> print URL.fromString('http://foo.com/bar/file').parentdir()
223        http://foo.com/
224        >>> print URL.fromString('http://foo.com/bar/dir/').parentdir()
225        http://foo.com/bar/
226        """
227        l = self.pathList()
228        if not self.isRoot(l) and l[-1] == '':
229            del l[-2]
230        else:
231            # we are a file, such as http://example.com/foo/bar our
232            # parent directory is http://example.com/
233            l.pop()
234            if self.isRoot(l): l.append('')
235            else: l[-1] = ''
236        return self._pathMod(l, self.queryList(0))
237
238    def click(self, href):
239        """Build a path by merging 'href' and this path.
240
241        Return a path which is the URL where a browser would presumably
242        take you if you clicked on a link with an 'href' as given.
243        """
244        scheme, netloc, path, query, fragment = urlparse.urlsplit(href)
245
246        if (scheme, netloc, path, query, fragment) == ('', '', '', '', ''):
247            return self
248
249        query = unquerify(query)
250
251        if scheme:
252            if path and path[0] == '/':
253                path = path[1:]
254            return self.cloneURL(
255                scheme, netloc, map(raw, path.split('/')), query, fragment)
256        else:
257            scheme = self.scheme
258
259        if not netloc:
260            netloc = self.netloc
261            if not path:
262                path = self.path
263                if not query:
264                    query = self._querylist
265                    if not fragment:
266                        fragment = self.fragment
267            else:
268                if path[0] == '/':
269                    path = path[1:]
270                else:
271                    l = self.pathList()
272                    l[-1] = path
273                    path = '/'.join(l)
274
275        path = normURLPath(path)
276        return self.cloneURL(
277            scheme, netloc, map(raw, path.split('/')), query, fragment)
278
279    ## query manipulation ##
280
281    def queryList(self, copy=True):
282        """Return current query as a list of tuples."""
283        if copy:
284            return self._querylist[:]
285        return self._querylist
286
287    # FIXME: here we call str() on query arg values: is this right?
288
289    def add(self, name, value=None):
290        """Add a query argument with the given value
291        None indicates that the argument has no value
292        """
293        q = self.queryList()
294        q.append((name, value))
295        return self._pathMod(self.pathList(copy=False), q)
296
297    def replace(self, name, value=None):
298        """
299        Remove all existing occurrences of the query argument 'name', *if it
300        exists*, then add the argument with the given value.
301
302        C{None} indicates that the argument has no value.
303        """
304        ql = self.queryList(False)
305        ## Preserve the original position of the query key in the list
306        i = 0
307        for (k, v) in ql:
308            if k == name:
309                break
310            i += 1
311        q = filter(lambda x: x[0] != name, ql)
312        q.insert(i, (name, value))
313        return self._pathMod(self.pathList(copy=False), q)
314
315    def remove(self, name):
316        """Remove all query arguments with the given name
317        """
318        return self._pathMod(
319            self.pathList(copy=False),
320            filter(
321                lambda x: x[0] != name, self.queryList(False)))
322
323    def clear(self, name=None):
324        """Remove all existing query arguments
325        """
326        if name is None:
327            q = []
328        else:
329            q = filter(lambda x: x[0] != name, self.queryList(False))
330        return self._pathMod(self.pathList(copy=False), q)
331
332    ## scheme manipulation ##
333
334    def secure(self, secure=True, port=None):
335        """Modify the scheme to https/http and return the new URL.
336
337        @param secure: choose between https and http, default to True (https)
338        @param port: port, override the scheme's normal port
339        """
340
341        # Choose the scheme and default port.
342        if secure:
343            scheme, defaultPort = 'https', 443
344        else:
345            scheme, defaultPort = 'http', 80
346
347        # Rebuild the netloc with port if not default.
348        netloc = self.netloc.split(':',1)[0]
349        if port is not None and port != defaultPort:
350            netloc = '%s:%d' % (netloc, port)
351
352        return self.cloneURL(
353            scheme, netloc, self._qpathlist, self._querylist, self.fragment)
354
355    ## fragment/anchor manipulation
356
357    def anchor(self, anchor=None):
358        """
359        Modify the fragment/anchor and return a new URL. An anchor of
360        C{None} (the default) or C{''} (the empty string) will remove the
361        current anchor.
362        """
363        return self.cloneURL(
364            self.scheme, self.netloc, self._qpathlist, self._querylist, anchor)
365
366    ## object protocol override ##
367
368    def __str__(self):
369        return str(flat.flatten(self))
370
371    def __repr__(self):
372        return (
373            '%s(scheme=%r, netloc=%r, pathsegs=%r, querysegs=%r, fragment=%r)'
374            % (self.__class__,
375               self.scheme,
376               self.netloc,
377               self._qpathlist,
378               self._querylist,
379               self.fragment))
380
381
382def normURLPath(path):
383    """
384    Normalise the URL path by resolving segments of '.' and '..'.
385    """
386    segs = []
387
388    pathSegs = path.split('/')
389
390    for seg in pathSegs:
391        if seg == '.':
392            pass
393        elif seg == '..':
394            if segs:
395                segs.pop()
396        else:
397            segs.append(seg)
398
399    if pathSegs[-1:] in (['.'],['..']):
400        segs.append('')
401
402    return '/'.join(segs)
403
404
405class URLOverlay(object):
406    def __init__(self, urlaccessor, doc=None, dolater=None, keep=None):
407        """A Proto like object for abstractly specifying urls in stan trees.
408
409        @param urlaccessor: a function which takes context and returns a URL
410
411        @param doc: a a string documenting this URLOverlay instance's usage
412
413        @param dolater: a list of tuples of (command, args, kw) where
414        command is a string, args is a tuple and kw is a dict; when the
415        URL is returned from urlaccessor during rendering, these
416        methods will be applied to the URL in order
417        """
418        if doc is not None:
419            self.__doc__ = doc
420        self.urlaccessor = urlaccessor
421        if dolater is None:
422            dolater= []
423        self.dolater = dolater
424        if keep is None:
425            keep = []
426        self._keep = keep
427
428    def addCommand(self, cmd, args, kw):
429        dl = self.dolater[:]
430        dl.append((cmd, args, kw))
431        return self.__class__(self.urlaccessor, dolater=dl, keep=self._keep[:])
432
433    def keep(self, *args):
434        """A list of arguments to carry over from the previous url.
435        """
436        K = self._keep[:]
437        K.extend(args)
438        return self.__class__(self.urlaccessor, dolater=self.dolater[:], keep=K)
439
440
441def createForwarder(cmd):
442    return lambda self, *args, **kw: self.addCommand(cmd, args, kw)
443
444
445for cmd in [
446    'sibling', 'child', 'parent', 'here', 'curdir', 'click', 'add',
447    'replace', 'clear', 'remove', 'secure', 'anchor', 'up', 'parentdir'
448    ]:
449    setattr(URLOverlay, cmd, createForwarder(cmd))
450
451
452def hereaccessor(context):
453    return URL.fromContext(context).clear()
454here = URLOverlay(
455    hereaccessor,
456    "A lazy url construction object representing the current page's URL. "
457    "The URL which will be used will be determined at render time by "
458    "looking at the request. Any query parameters will be "
459    "cleared automatically.")
460
461
462def gethereaccessor(context):
463    return URL.fromContext(context)
464gethere = URLOverlay(gethereaccessor,
465    "A lazy url construction object like 'here' except query parameters "
466    "are preserved. Useful for constructing a URL to this same object "
467    "when query parameters need to be preserved but modified slightly.")
468
469
470
471def viewhereaccessor(context):
472    U = hereaccessor(context)
473    i = 1
474    while True:
475        try:
476            params = context.locate(inevow.IViewParameters, depth=i)
477        except KeyError:
478            break
479        for (cmd, args, kw) in iter(params):
480            U = getattr(U, cmd)(*args, **kw)
481        i += 1
482    return U
483viewhere = URLOverlay(viewhereaccessor,
484    "A lazy url construction object like 'here' IViewParameters objects "
485    "are looked up in the context during rendering. Commands provided by "
486    "any found IViewParameters objects are applied to the URL object before "
487    "rendering it.")
488
489
490def rootaccessor(context):
491    req = context.locate(inevow.IRequest)
492    root = req.getRootURL()
493    if root is None:
494        return URL.fromContext(context).click('/')
495    return URL.fromString(root)
496root = URLOverlay(rootaccessor,
497    "A lazy URL construction object representing the root of the "
498    "application. Normally, this will just be the logical '/', but if "
499    "request.rememberRootURL() has previously been used in "
500    "the request traversal process, the url of the resource "
501    "where rememberRootURL was called will be used instead.")
502
503
504def URLSerializer(original, context):
505    """
506    Serialize the given L{URL}.
507
508    Unicode path, query and fragment components are handled according to the
509    IRI standard (RFC 3987).
510    """
511    def _maybeEncode(s):
512        if isinstance(s, unicode):
513            s = s.encode('utf-8')
514        return s
515    urlContext = WovenContext(parent=context, precompile=context.precompile, inURL=True)
516    if original.scheme:
517        # TODO: handle Unicode (see #2409)
518        yield "%s://%s" % (original.scheme, original.netloc)
519    for pathsegment in original._qpathlist:
520        yield '/'
521        yield serialize(_maybeEncode(pathsegment), urlContext)
522    query = original._querylist
523    if query:
524        yield '?'
525        first = True
526        for key, value in query:
527            if not first:
528                # xhtml can't handle unescaped '&'
529                if context.isAttrib is True:
530                    yield '&'
531                else:
532                    yield '&'
533            else:
534                first = False
535            yield serialize(_maybeEncode(key), urlContext)
536            if value is not None:
537                yield '='
538                yield serialize(_maybeEncode(value), urlContext)
539    if original.fragment:
540        yield "#"
541        yield serialize(_maybeEncode(original.fragment), urlContext)
542
543
544def URLOverlaySerializer(original, context):
545    if context.precompile:
546        yield original
547    else:
548        url = original.urlaccessor(context)
549        for (cmd, args, kw) in original.dolater:
550            url = getattr(url, cmd)(*args, **kw)
551        req = context.locate(inevow.IRequest)
552        for key in original._keep:
553            for value in req.args.get(key, []):
554                url = url.add(key, value)
555        yield serialize(url, context)
556
557
558## This is totally unfinished and doesn't work yet.
559#class IURLGenerator(compy.Interface):
560#    pass
561
562
563class URLGenerator:
564    #implements(IURLGenerator)
565
566    def __init__(self):
567        self._objmap = weakref.WeakKeyDictionary()
568
569    def objectMountedAt(self, obj, at):
570        self._objmap[obj] = at
571
572    def url(self, obj):
573        try:
574            return self._objmap.get(obj, None)
575        except TypeError:
576            return None
577
578    __call__ = url
579
580    def __getstate__(self):
581        d = self.__dict__.copy()
582        del d['_objmap']
583        return d
584
585    def __setstate__(self, state):
586        self.__dict__ = state
587        self._objmap = weakref.WeakKeyDictionary()
588
589
590class URLRedirectAdapter:
591    """
592    Adapter for URL and URLOverlay instances that results in an HTTP
593    redirect.
594
595    Whenever a URL or URLOverlay instance is returned from locateChild or
596    renderHTTP an HTTP response is generated that causes a redirect to
597    the adapted URL. Any remaining segments of the current request are
598    consumed.
599
600    Note that URLOverlay instances are lazy so their use might not be entirely
601    obvious when returned from locateChild, i.e. url.here means the request's
602    URL and not the URL of the resource that is self.
603
604    Here are some examples::
605
606        def renderHTTP(self, ctx):
607            # Redirect to my immediate parent
608            return url.here.up()
609
610        def locateChild(self, ctx, segments):
611            # Redirect to the URL of this resource
612            return url.URL.fromContext(ctx)
613    """
614    implements(inevow.IResource)
615
616    def __init__(self, original):
617        self.original = original
618
619    def locateChild(self, ctx, segments):
620        return self, ()
621
622    def renderHTTP(self, ctx):
623        # The URL may contain deferreds so we need to flatten it using
624        # flattenFactory that will collect the bits into the bits list and
625        # call flattened to finish.
626        bits = []
627        def flattened(spam):
628            # Join the bits to make a complete URL.
629            u = ''.join(bits)
630            # It might also be relative so resolve it against the current URL
631            # and flatten it again.
632            u = flat.flatten(URL.fromContext(ctx).click(u), ctx)
633            return redirectTo(u, inevow.IRequest(ctx))
634        return flat.flattenFactory(self.original, ctx, bits.append, flattened)
635