1# encoding: utf-8 2############################################################################### 3# libproxy - A library for proxy configuration 4# Copyright (C) 2006 Nathaniel McCallum <nathaniel@natemccallum.com> 5# 6# This library is free software; you can redistribute it and/or 7# modify it under the terms of the GNU Lesser General Public 8# License as published by the Free Software Foundation; either 9# version 2.1 of the License, or (at your option) any later version. 10# 11# This library is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14# Lesser General Public License for more details. 15# 16# You should have received a copy of the GNU Lesser General Public 17# License along with this library; if not, write to the Free Software 18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19############################################################################### 20 21"A library for proxy configuration and autodetection." 22 23import ctypes 24import ctypes.util 25import platform 26 27import sys 28 29def _load(name, *versions): 30 for ver in versions: 31 try: return ctypes.cdll.LoadLibrary('lib%s.so.%s' % (name, ver)) 32 except: pass 33 name_ver = ctypes.util.find_library(name) 34 if name_ver: 35 return ctypes.cdll.LoadLibrary(name_ver) 36 raise ImportError("Unable to find %s library" % name) 37 38# Load libproxy 39_libproxy = _load("proxy", 1) 40_libproxy.px_proxy_factory_new.restype = ctypes.POINTER(ctypes.c_void_p) 41_libproxy.px_proxy_factory_free.argtypes = [ctypes.c_void_p] 42_libproxy.px_proxy_factory_get_proxies.restype = ctypes.POINTER(ctypes.c_void_p) 43_libproxy.px_proxy_factory_free_proxies.argtypes = [ctypes.POINTER(ctypes.c_void_p)] 44 45class ProxyFactory(object): 46 """A ProxyFactory object is used to provide potential proxies to use 47 in order to reach a given URL (via 'getProxies(url)'). 48 49 This instance should be kept around as long as possible as it contains 50 cached data to increase performance. Memory usage should be minimal (cache 51 is small) and the cache lifespan is handled automatically. 52 53 Usage is pretty simple: 54 pf = libproxy.ProxyFactory() 55 for url in urls: 56 proxies = pf.getProxies(url) 57 for proxy in proxies: 58 if proxy == "direct://": 59 # Fetch URL without using a proxy 60 elif proxy.startswith("http://"): 61 # Fetch URL using an HTTP proxy 62 elif proxy.startswith("socks://"): 63 # Fetch URL using a SOCKS proxy 64 65 if fetchSucceeded: 66 break 67 """ 68 69 class ProxyResolutionError(RuntimeError): 70 """Exception raised when proxy cannot be resolved generally 71 due to invalid URL""" 72 pass 73 74 def __init__(self): 75 self._pf = _libproxy.px_proxy_factory_new() 76 77 def getProxies(self, url): 78 """Given a URL, returns a list of proxies in priority order to be used 79 to reach that URL. 80 81 A list of proxy strings is returned. If the first proxy fails, the 82 second should be tried, etc... In all cases, at least one entry in the 83 list will be returned. There are no error conditions. 84 85 Regarding performance: this method always blocks and may be called 86 in a separate thread (is thread-safe). In most cases, the time 87 required to complete this function call is simply the time required 88 to read the configuration (e.g from GConf, Kconfig, etc). 89 90 In the case of PAC, if no valid PAC is found in the cache (i.e. 91 configuration has changed, cache is invalid, etc), the PAC file is 92 downloaded and inserted into the cache. This is the most expensive 93 operation as the PAC is retrieved over the network. Once a PAC exists 94 in the cache, it is merely a JavaScript invocation to evaluate the PAC. 95 One should note that DNS can be called from within a PAC during 96 JavaScript invocation. 97 98 In the case of WPAD, WPAD is used to automatically locate a PAC on the 99 network. Currently, we only use DNS for this, but other methods may 100 be implemented in the future. Once the PAC is located, normal PAC 101 performance (described above) applies. 102 103 """ 104 if type(url) != str: 105 raise TypeError("url must be a string!") 106 107 if type(url) is bytes: 108 # Python 2: str is bytes 109 url_bytes = url 110 else: 111 # Python 3: str is unicode 112 # TODO: Does this need to be encoded from IRI to ASCII (ACE) URI, 113 # for example http://кц.рф/пример -> 114 # http://xn--j1ay.xn--p1ai/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80? 115 # Or is libproxy designed to accept IRIs like 116 # http://кц.рф/пример? Passing in an IRI does seem to work 117 # acceptably in practice, so do that for now. 118 url_bytes = url.encode('utf-8') 119 120 proxies = [] 121 array = _libproxy.px_proxy_factory_get_proxies(self._pf, url_bytes) 122 123 if not bool(array): 124 raise ProxyFactory.ProxyResolutionError( 125 "Can't resolve proxy for '%s'" % url) 126 127 i=0 128 while array[i]: 129 proxy_bytes = ctypes.cast(array[i], ctypes.c_char_p).value 130 if type(proxy_bytes) is str: 131 # Python 2 132 proxies.append(proxy_bytes) 133 else: 134 # Python 3 135 proxies.append(proxy_bytes.decode('utf-8', errors='replace')) 136 i += 1 137 138 _libproxy.px_proxy_factory_free_proxies(array) 139 140 return proxies 141 142 def __del__(self): 143 if _libproxy: 144 _libproxy.px_proxy_factory_free(self._pf) 145 146