1# encoding: utf-8
2###############################################################################
3# libproxy - A library for proxy configuration
4# Copyright (C) 2006 Nathaniel McCallum <nathaniel@natemccallum.com>
5#
6# This library is free software; you can redistribute it and/or
7# modify it under the terms of the GNU Lesser General Public
8# License as published by the Free Software Foundation; either
9# version 2.1 of the License, or (at your option) any later version.
10#
11# This library is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14# Lesser General Public License for more details.
15#
16# You should have received a copy of the GNU Lesser General Public
17# License along with this library; if not, write to the Free Software
18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
19###############################################################################
20
21"A library for proxy configuration and autodetection."
22
23import ctypes
24import ctypes.util
25import platform
26
27import sys
28
29def _load(name, *versions):
30    for ver in versions:
31        try: return ctypes.cdll.LoadLibrary('lib%s.so.%s' % (name, ver))
32        except: pass
33    name_ver = ctypes.util.find_library(name)
34    if name_ver:
35        return ctypes.cdll.LoadLibrary(name_ver)
36    raise ImportError("Unable to find %s library" % name)
37
38# Load libproxy
39_libproxy = _load("proxy", 1)
40_libproxy.px_proxy_factory_new.restype = ctypes.POINTER(ctypes.c_void_p)
41_libproxy.px_proxy_factory_free.argtypes = [ctypes.c_void_p]
42_libproxy.px_proxy_factory_get_proxies.restype = ctypes.POINTER(ctypes.c_void_p)
43_libproxy.px_proxy_factory_free_proxies.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
44
45class ProxyFactory(object):
46    """A ProxyFactory object is used to provide potential proxies to use
47    in order to reach a given URL (via 'getProxies(url)').
48
49    This instance should be kept around as long as possible as it contains
50    cached data to increase performance.  Memory usage should be minimal (cache
51    is small) and the cache lifespan is handled automatically.
52
53    Usage is pretty simple:
54        pf = libproxy.ProxyFactory()
55        for url in urls:
56            proxies = pf.getProxies(url)
57            for proxy in proxies:
58                if proxy == "direct://":
59                    # Fetch URL without using a proxy
60                elif proxy.startswith("http://"):
61                    # Fetch URL using an HTTP proxy
62                elif proxy.startswith("socks://"):
63                    # Fetch URL using a SOCKS proxy
64
65                if fetchSucceeded:
66                    break
67    """
68
69    class ProxyResolutionError(RuntimeError):
70        """Exception raised when proxy cannot be resolved generally
71           due to invalid URL"""
72        pass
73
74    def __init__(self):
75        self._pf = _libproxy.px_proxy_factory_new()
76
77    def getProxies(self, url):
78        """Given a URL, returns a list of proxies in priority order to be used
79        to reach that URL.
80
81        A list of proxy strings is returned.  If the first proxy fails, the
82        second should be tried, etc... In all cases, at least one entry in the
83        list will be returned. There are no error conditions.
84
85        Regarding performance: this method always blocks and may be called
86        in a separate thread (is thread-safe).  In most cases, the time
87        required to complete this function call is simply the time required
88        to read the configuration (e.g  from GConf, Kconfig, etc).
89
90        In the case of PAC, if no valid PAC is found in the cache (i.e.
91        configuration has changed, cache is invalid, etc), the PAC file is
92        downloaded and inserted into the cache. This is the most expensive
93        operation as the PAC is retrieved over the network. Once a PAC exists
94        in the cache, it is merely a JavaScript invocation to evaluate the PAC.
95        One should note that DNS can be called from within a PAC during
96        JavaScript invocation.
97
98        In the case of WPAD, WPAD is used to automatically locate a PAC on the
99        network.  Currently, we only use DNS for this, but other methods may
100        be implemented in the future.  Once the PAC is located, normal PAC
101        performance (described above) applies.
102
103        """
104        if type(url) != str:
105            raise TypeError("url must be a string!")
106
107        if type(url) is bytes:
108            # Python 2: str is bytes
109            url_bytes = url
110        else:
111            # Python 3: str is unicode
112            # TODO: Does this need to be encoded from IRI to ASCII (ACE) URI,
113            # for example http://кц.рф/пример ->
114            # http://xn--j1ay.xn--p1ai/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80?
115            # Or is libproxy designed to accept IRIs like
116            # http://кц.рф/пример? Passing in an IRI does seem to work
117            # acceptably in practice, so do that for now.
118            url_bytes = url.encode('utf-8')
119
120        proxies = []
121        array = _libproxy.px_proxy_factory_get_proxies(self._pf, url_bytes)
122
123        if not bool(array):
124            raise ProxyFactory.ProxyResolutionError(
125                    "Can't resolve proxy for '%s'" % url)
126
127        i=0
128        while array[i]:
129            proxy_bytes = ctypes.cast(array[i], ctypes.c_char_p).value
130            if type(proxy_bytes) is str:
131                # Python 2
132                proxies.append(proxy_bytes)
133            else:
134                # Python 3
135                proxies.append(proxy_bytes.decode('utf-8', errors='replace'))
136            i += 1
137
138        _libproxy.px_proxy_factory_free_proxies(array)
139
140        return proxies
141
142    def __del__(self):
143        if _libproxy:
144            _libproxy.px_proxy_factory_free(self._pf)
145
146