1# -*- coding: utf-8 -*-
2
3from __future__ import absolute_import
4from __future__ import print_function
5from __future__ import with_statement
6
7import glob
8import os
9import hmac
10import hashlib
11import shutil
12import subprocess
13import ipaddress
14import re
15import six
16
17from twisted.internet import defer
18from twisted.internet.interfaces import IProtocolFactory
19from twisted.internet.endpoints import serverFromString
20from twisted.web.http_headers import Headers
21
22from zope.interface import implementer
23from zope.interface import Interface
24
25if six.PY3:
26    import asyncio
27
28try:
29    import GeoIP as _GeoIP
30    GeoIP = _GeoIP
31except ImportError:
32    GeoIP = None
33
34city = None
35country = None
36asn = None
37
38
39def create_tbb_web_headers():
40    """
41    Returns a new `twisted.web.http_headers.Headers` instance
42    populated with tags to mimic Tor Browser. These include values for
43    `User-Agent`, `Accept`, `Accept-Language` and `Accept-Encoding`.
44    """
45    return Headers({
46        b"User-Agent": [b"Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0"],
47        b"Accept": [b"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"],
48        b"Accept-Language": [b"en-US,en;q=0.5"],
49        b"Accept-Encoding": [b"gzip, deflate"],
50    })
51
52
53def version_at_least(version_string, major, minor, micro, patch):
54    """
55    This returns True if the version_string represents a Tor version
56    of at least ``major``.``minor``.``micro``.``patch`` version,
57    ignoring any trailing specifiers.
58    """
59    parts = re.match(
60        r'^([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+).*$',
61        version_string,
62    )
63    for ver, gold in zip(parts.group(1, 2, 3, 4), (major, minor, micro, patch)):
64        if int(ver) < int(gold):
65            return False
66        elif int(ver) > int(gold):
67            return True
68    return True
69
70
71def create_geoip(fname):
72    # It's more "pythonic" to just wait for the exception,
73    # but GeoIP prints out "Can't open..." messages for you,
74    # which isn't desired here
75    if not os.path.isfile(fname):
76        raise IOError("Can't find %s" % fname)
77
78    if GeoIP is None:
79        return None
80
81    # just letting any errors make it out
82    return GeoIP.open(fname, GeoIP.GEOIP_STANDARD)
83
84
85def maybe_create_db(path):
86    try:
87        return create_geoip(path)
88    except IOError:
89        return None
90
91
92city = maybe_create_db("/usr/share/GeoIP/GeoLiteCity.dat")
93asn = maybe_create_db("/usr/share/GeoIP/GeoIPASNum.dat")
94country = maybe_create_db("/usr/share/GeoIP/GeoIP.dat")
95
96
97def is_executable(path):
98    """Checks if the given path points to an existing, executable file"""
99    return os.path.isfile(path) and os.access(path, os.X_OK)
100
101
102def find_tor_binary(globs=('/usr/sbin/', '/usr/bin/',
103                           '/Applications/TorBrowser_*.app/Contents/MacOS/'),
104                    system_tor=True):
105    """
106    Tries to find the tor executable using the shell first or in in the
107    paths whose glob-patterns is in the given 'globs'-tuple.
108
109    :param globs:
110        A tuple of shell-style globs of directories to use to find tor
111        (TODO consider making that globs to actual tor binary?)
112
113    :param system_tor:
114        This controls whether bash is used to seach for 'tor' or
115        not. If False, we skip that check and use only the 'globs'
116        tuple.
117    """
118
119    # Try to find the tor executable using the shell
120    if system_tor:
121        try:
122            proc = subprocess.Popen(
123                ('which tor'),
124                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
125                shell=True
126            )
127        except OSError:
128            pass
129        else:
130            stdout, _ = proc.communicate()
131            if proc.poll() == 0 and stdout != '':
132                return stdout.strip()
133
134    # the shell may not provide type and tor is usually not on PATH when using
135    # the browser-bundle. Look in specific places
136    for pattern in globs:
137        for path in glob.glob(pattern):
138            torbin = os.path.join(path, 'tor')
139            if is_executable(torbin):
140                return torbin
141    return None
142
143
144def maybe_ip_addr(addr):
145    """
146    Tries to return an IPAddress, otherwise returns a string.
147
148    TODO consider explicitly checking for .exit or .onion at the end?
149    """
150
151    if six.PY2 and isinstance(addr, str):
152        addr = unicode(addr)  # noqa
153    try:
154        return ipaddress.ip_address(addr)
155    except ValueError:
156        pass
157    return str(addr)
158
159
160def find_keywords(args, key_filter=lambda x: not x.startswith("$")):
161    """
162    This splits up strings like name=value, foo=bar into a dict. Does NOT deal
163    with quotes in value (e.g. key="value with space" will not work
164
165    By default, note that it takes OUT any key which starts with $ (i.e. a
166    single dollar sign) since for many use-cases the way Tor encodes nodes
167    with "$hash=name" looks like a keyword argument (but it isn't). If you
168    don't want this, override the "key_filter" argument to this method.
169
170    :param args: a list of strings, each with one key=value pair
171
172    :return:
173        a dict of key->value (both strings) of all name=value type
174        keywords found in args.
175    """
176    filtered = [x for x in args if '=' in x and key_filter(x.split('=')[0])]
177    return dict(x.split('=', 1) for x in filtered)
178
179
180def delete_file_or_tree(*args):
181    """
182    For every path in args, try to delete it as a file or a directory
183    tree. Ignores deletion errors.
184    """
185
186    for f in args:
187        try:
188            os.unlink(f)
189        except OSError:
190            shutil.rmtree(f, ignore_errors=True)
191
192
193def process_from_address(addr, port, torstate=None):
194    """
195    Determines the PID from the address/port provided by using lsof
196    and returns it as an int (or None if it couldn't be
197    determined). In the special case the addr is '(Tor_internal)' then
198    the PID of the Tor process (as gotten from the torstate object) is
199    returned (or 0 if unavailable, e.g. a Tor which doesn't implement
200    'GETINFO process/pid'). In this case if no TorState instance is
201    given, None is returned.
202    """
203
204    if addr is None:
205        return None
206
207    if "(tor_internal)" == str(addr).lower():
208        if torstate is None:
209            return None
210        return int(torstate.tor_pid)
211
212    proc = subprocess.Popen(['lsof', '-i', '4tcp@%s:%s' % (addr, port)],
213                            stdout=subprocess.PIPE)
214    (stdout, stderr) = proc.communicate()
215    lines = stdout.split(b'\n')
216    if len(lines) > 1:
217        return int(lines[1].split()[1])
218
219
220def hmac_sha256(key, msg):
221    """
222    Adapted from rransom's tor-utils git repository. Returns the
223    digest (binary) of an HMAC with SHA256 over msg with key.
224    """
225
226    return hmac.new(key, msg, hashlib.sha256).digest()
227
228
229CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE = os.urandom(32)
230
231
232def compare_via_hash(x, y):
233    """
234    Taken from rransom's tor-utils git repository, to compare two
235    hashes in something resembling constant time (or at least, not
236    leaking timing info?)
237    """
238    return (hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, x) ==
239            hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, y))
240
241
242class NetLocation(object):
243    """
244    Represents the location of an IP address, either city or country
245    level resolution depending on what GeoIP database was loaded. If
246    the ASN database is available you get that also.
247    """
248
249    def __init__(self, ipaddr):
250        "ipaddr should be a dotted-quad"
251        self.ip = ipaddr
252        self.latlng = (None, None)
253        self.countrycode = None
254        self.city = None
255        self.asn = None
256
257        if self.ip is None or self.ip == 'unknown':
258            return
259
260        if city:
261            try:
262                r = city.record_by_addr(self.ip)
263            except Exception:
264                r = None
265            if r is not None:
266                self.countrycode = r['country_code']
267                self.latlng = (r['latitude'], r['longitude'])
268                try:
269                    self.city = (r['city'], r['region_code'])
270                except KeyError:
271                    self.city = (r['city'], r['region_name'])
272
273        elif country:
274            self.countrycode = country.country_code_by_addr(ipaddr)
275
276        else:
277            self.countrycode = ''
278
279        if asn:
280            try:
281                self.asn = asn.org_by_addr(self.ip)
282            except Exception:
283                self.asn = None
284
285
286@implementer(IProtocolFactory)
287class NoOpProtocolFactory:
288    """
289    This is an IProtocolFactory that does nothing. Used for testing,
290    and for :method:`available_tcp_port`
291    """
292    def noop(self, *args, **kw):
293        pass
294    buildProtocol = noop
295    doStart = noop
296    doStop = noop
297
298
299@defer.inlineCallbacks
300def available_tcp_port(reactor):
301    """
302    Returns a Deferred firing an available TCP port on localhost.
303    It does so by listening on port 0; then stopListening and fires the
304    assigned port number.
305    """
306
307    endpoint = serverFromString(reactor, 'tcp:0:interface=127.0.0.1')
308    port = yield endpoint.listen(NoOpProtocolFactory())
309    address = port.getHost()
310    yield port.stopListening()
311    defer.returnValue(address.port)
312
313
314def unescape_quoted_string(string):
315    r'''
316    This function implementes the recommended functionality described in the
317    tor control-spec to be compatible with older tor versions:
318
319      * Read \\n \\t \\r and \\0 ... \\377 as C escapes.
320      * Treat a backslash followed by any other character as that character.
321
322    Except the legacy support for the escape sequences above this function
323    implements parsing of QuotedString using qcontent from
324
325    QuotedString = DQUOTE *qcontent DQUOTE
326
327    :param string: The escaped quoted string.
328    :returns: The unescaped string.
329    :raises ValueError: If the string is in a invalid form
330                        (e.g. a single backslash)
331    '''
332    match = re.match(r'''^"((?:[^"\\]|\\.)*)"$''', string)
333    if not match:
334        raise ValueError("Invalid quoted string", string)
335    string = match.group(1)
336    # remove backslash before all characters which should not be
337    # handeled as escape codes by string.decode('string-escape').
338    # This is needed so e.g. '\x00' is not unescaped as '\0'
339    string = re.sub(r'((?:^|[^\\])(?:\\\\)*)\\([^ntr0-7\\])', r'\1\2', string)
340    if six.PY3:
341        # XXX hmmm?
342        return bytes(string, 'ascii').decode('unicode-escape')
343    return string.decode('string-escape')
344
345
346def default_control_port():
347    """
348    This returns a default control port, which respects an environment
349    variable `TX_CONTROL_PORT`. Without the environment variable, this
350    returns 9151 (the Tor Browser Bundle default).
351
352    You shouldn't use this in "normal" code, this is a convenience for
353    the examples.
354    """
355    try:
356        return int(os.environ['TX_CONTROL_PORT'])
357    except KeyError:
358        return 9151
359
360
361class IListener(Interface):
362    def add(callback):
363        """
364        Add a listener. The arguments to the callback are determined by whomever calls notify()
365        """
366
367    def remove(callback):
368        """
369        Add a listener. The arguments to the callback are determined by whomever calls notify()
370        """
371
372    def notify(*args, **kw):
373        """
374        Calls every listener with the given args and keyword-args.
375
376        XXX errors? just log?
377        """
378
379
380def maybe_coroutine(obj):
381    """
382    If 'obj' is a coroutine and we're using Python3, wrap it in
383    ensureDeferred. Otherwise return the original object.
384
385    (This is to insert in all callback chains from user code, in case
386    that user code is Python3 and used 'async def')
387    """
388    if six.PY3 and asyncio.iscoroutine(obj):
389        return defer.ensureDeferred(obj)
390    return obj
391
392
393@implementer(IListener)
394class _Listener(object):
395    """
396    Internal helper.
397    """
398
399    def __init__(self):
400        self._listeners = set()
401
402    def add(self, callback):
403        """
404        Add a callback to this listener
405        """
406        self._listeners.add(callback)
407
408    __call__ = add  #: alias for "add"
409
410    def remove(self, callback):
411        """
412        Remove a callback from this listener
413        """
414        self._listeners.remove(callback)
415
416    def notify(self, *args, **kw):
417        """
418        Calls all listeners with the specified args.
419
420        Returns a Deferred which callbacks when all the listeners
421        which return Deferreds have themselves completed.
422        """
423        calls = []
424
425        def failed(fail):
426            # XXX use logger
427            fail.printTraceback()
428
429        for cb in self._listeners:
430            d = defer.maybeDeferred(cb, *args, **kw)
431            d.addCallback(maybe_coroutine)
432            d.addErrback(failed)
433            calls.append(d)
434        return defer.DeferredList(calls)
435
436
437class _ListenerCollection(object):
438    """
439    Internal helper.
440
441    This collects all your valid event listeners together in one
442    object if you want.
443    """
444    def __init__(self, valid_events):
445        self._valid_events = valid_events
446        for e in valid_events:
447            setattr(self, e, _Listener())
448
449    def __call__(self, event, callback):
450        if event not in self._valid_events:
451            raise Exception("Invalid event '{}'".format(event))
452        getattr(self, event).add(callback)
453
454    def remove(self, event, callback):
455        if event not in self._valid_events:
456            raise Exception("Invalid event '{}'".format(event))
457        getattr(self, event).remove(callback)
458
459    def notify(self, event, *args, **kw):
460        if event not in self._valid_events:
461            raise Exception("Invalid event '{}'".format(event))
462        getattr(self, event).notify(*args, **kw)
463
464
465# similar to OneShotObserverList in Tahoe-LAFS
466class SingleObserver(object):
467    """
468    A helper for ".when_*()" sort of functions.
469    """
470    _NotFired = object()
471
472    def __init__(self):
473        self._observers = []
474        self._fired = self._NotFired
475
476    def when_fired(self):
477        d = defer.Deferred()
478        if self._fired is not self._NotFired:
479            d.callback(self._fired)
480        else:
481            self._observers.append(d)
482        return d
483
484    def fire(self, value):
485        if self._observers is None:
486            return  # raise RuntimeError("already fired") ?
487        self._fired = value
488        for d in self._observers:
489            d.callback(self._fired)
490        self._observers = None
491        return value  # so we're transparent if used as a callback
492
493
494class _Version(object):
495    """
496    Replacement for incremental.Version until
497    https://github.com/meejah/txtorcon/issues/233 and/or
498    https://github.com/hawkowl/incremental/issues/31 is fixed.
499    """
500    # as of latest incremental, it should only access .package and
501    # .short() via the getVersionString() method that Twisted's
502    # deprecated() uses...
503
504    def __init__(self, package, major, minor, patch):
505        self.package = package
506        self.major = major
507        self.minor = minor
508        self.patch = patch
509
510    def short(self):
511        return '{}.{}.{}'.format(self.major, self.minor, self.patch)
512
513
514# originally from magic-wormhole code
515def _is_non_public_numeric_address(host):
516    """
517    returns True if 'host' is not public
518    """
519    # for numeric hostnames, skip RFC1918 addresses, since no Tor exit
520    # node will be able to reach those. Likewise ignore IPv6 addresses.
521    try:
522        a = ipaddress.ip_address(six.text_type(host))
523    except ValueError:
524        return False        # non-numeric, let Tor try it
525    if a.is_loopback or a.is_multicast or a.is_private or a.is_reserved \
526       or a.is_unspecified:
527        return True         # too weird, don't connect
528    return False
529