1import logging
2import os
3import platform
4from collections import OrderedDict
5
6from requests import __version__ as requests_version
7from stem import __version__ as stem_version
8
9from sbws import __version__
10
11log = logging.getLogger(__name__)
12
13RESULT_VERSION = 4
14WIRE_VERSION = 1
15SPEC_VERSION = "1.5.0"
16
17# This is a dictionary of torrc options we always want to set when launching
18# Tor and that do not depend on any runtime configuration
19# Options that are known at runtime (from configuration file) are added
20# in utils/stem.py launch_tor
21TORRC_STARTING_POINT = {
22    # We will find out via the ControlPort and not setting something static
23    # means a lower chance of conflict
24    "SocksPort": "auto",
25    # Easier than password authentication
26    "CookieAuthentication": "1",
27    # To avoid path bias warnings
28    "UseEntryGuards": "0",
29    # Because we need things from full server descriptors (namely for now: the
30    # bandwidth line)
31    "UseMicrodescriptors": "0",
32    # useful logging options for clients that don't care about anonymity
33    "SafeLogging": "0",
34    "LogTimeGranularity": "1",
35    "ProtocolWarnings": "1",
36    # To be able to respond to MaxAdvertisedBandwidth as soon as possible.
37    # If ``FetchDirInfoExtraEarly` is set, but not
38    # `FetchDirInfoEarly`, Tor will throw this error:
39    # `FetchDirInfoExtraEarly requires that you also set FetchDirInfoEarly`
40    "FetchDirInfoEarly": "1",
41    "FetchDirInfoExtraEarly": "1",
42    # To make Tor keep fetching descriptors, even when idle.
43    "FetchUselessDescriptors": "1",
44    # Things needed to make circuits fail a little faster. We get the
45    # circuit_timeout as a string instead of an int on purpose: stem only
46    # accepts strings.
47    "LearnCircuitBuildTimeout": "0",
48}
49# Options that need to be set at runtime.
50TORRC_RUNTIME_OPTIONS = {
51    # The scanner builds the circuits to download the data itself,
52    # so do not let Tor to build them.
53    "__DisablePredictedCircuits": "1",
54    # The scanner attach the streams to the circuit itself,
55    # so do not let Tor to attach them.
56    "__LeaveStreamsUnattached": "1",
57}
58# Options that can be set at runtime and can fail with some Tor versions
59# The ones that fail will be ignored..
60TORRC_OPTIONS_CAN_FAIL = OrderedDict(
61    {
62        # Since currently scanner anonymity is not the goal, ConnectionPadding
63        # is disable to do not send extra traffic
64        "ConnectionPadding": "0"
65    }
66)
67
68PKG_DIR = os.path.abspath(os.path.dirname(__file__))
69DEFAULT_CONFIG_PATH = os.path.join(PKG_DIR, "config.default.ini")
70DEFAULT_LOG_CONFIG_PATH = os.path.join(PKG_DIR, "config.log.default.ini")
71USER_CONFIG_PATH = os.path.join(os.path.expanduser("~"), ".sbws.ini")
72SUPERVISED_USER_CONFIG_PATH = "/etc/sbws/sbws.ini"
73SUPERVISED_RUN_DPATH = "/run/sbws/tor"
74
75SOCKET_TIMEOUT = 60  # seconds
76
77SBWS_SCALE_CONSTANT = 7500
78TORFLOW_SCALING = 1
79SBWS_SCALING = 2
80TORFLOW_BW_MARGIN = 0.05
81TORFLOW_OBS_LAST = 0
82TORFLOW_OBS_MEAN = 1
83TORFLOW_OBS_DECAYING = 3
84TORFLOW_ROUND_DIG = 3
85PROP276_ROUND_DIG = 2
86# Number of seconds the measurements for a relay have to be distant from each
87# other, otherwise the relay would be excluded from the relays to vote on.
88# Ideally, this should be 86400 seconds (1 day).
89# To have sbws vote on approximately the same number of relays as Torflow,
90# leave it as None, to not exclude measurements.
91DAY_SECS = None
92# Minimum number of measurements for a relay to be included as a relay to vote
93# on.
94# Ideally, this should be 2.
95# As the constant before, leave it as 1 to not exclude measurements.
96NUM_MIN_RESULTS = 1
97MIN_REPORT = 60
98# Maximum difference between the total consensus bandwidth and the total in
99# in the bandwidth lines in percentage
100MAX_BW_DIFF_PERC = 50
101
102# With the new KeyValues in #29591, the lines are greater than 510
103# Tor already accept lines of any size, but leaving the limit anyway.
104BW_LINE_SIZE = 1022
105
106# RelayList, ResultDump
107# For how many seconds in the past the relays and measurements data is keep/
108# considered valid.
109# This is currently set by default in config.default.ini as ``data_period``,
110# and used in ResultDump.
111# In a future refactor, constants in config.default.ini should be moved here,
112# or calculated in settings, so that there's no need to pass the configuration
113# to all the functions.
114MEASUREMENTS_PERIOD = 5 * 24 * 60 * 60
115
116# #40017: To make sbws behave similar to Torflow, the number of raw past
117# measurements used when generating the Bandwidth File has to be 28, not 5.
118# Note that this is different from the number of raw past measurements used
119# when measuring, which are used for the monitoring values and storing json.
120GENERATE_PERIOD = 28 * 24 * 60 * 60
121
122# Metadata to send in every requests, so that data servers can know which
123# scanners are using them.
124# In Requests these keys are case insensitive.
125HTTP_HEADERS = {
126    # This would be ignored if changing to HTTP/2
127    "Connection": "keep-alive",
128    # Needs to get Tor version from the controller
129    "User-Agent": "sbws/{} ({}) Python/{} Requests/{} Stem/{} Tor/".format(
130        __version__,
131        platform.platform(),
132        platform.python_version(),
133        requests_version,
134        stem_version,
135    ),
136    # Organization defined names (:rfc:`7239`)
137    # Needs to get the nickname from the user config file.
138    "Tor-Bandwidth-Scanner-Nickname": "{}",
139    "Tor-Bandwidth-Scanner-UUID": "{}",
140    # In case of including IP address.
141    # 'Forwarded': 'for={}'  # IPv6 part, if there's
142}
143# In the case of having ipv6 it's concatenated to forwarder.
144IPV6_FORWARDED = ', for="[{}]"'
145
146HTTP_GET_HEADERS = {
147    "Range": "{}",
148    "Accept-Encoding": "identity",
149}
150DESTINATION_VERIFY_CERTIFICATE = True
151# This number might need adjusted depending on the percentage of circuits and
152# HTTP requests failures.
153
154# Number of attempts to use a destination, that are stored, in order to decide
155# whether the destination is functional or not.
156NUM_DESTINATION_ATTEMPTS_STORED = 10
157# Time to wait before trying again a destination that wasn't functional.
158# Because intermitent failures with CDN destinations, start trying again
159# after 5 min.
160DELTA_SECONDS_RETRY_DESTINATION = 60 * 5
161# No matter what, do not increase the wait time between destination reties
162# past this value.
163MAX_SECONDS_RETRY_DESTINATION = 60 * 60 * 3
164# Number of consecutive times a destination can fail before considering it
165# not functional.
166MAX_NUM_DESTINATION_FAILURES = 3
167# By which factor to multiply DELTA_SECONDS_RETRY_DESTINATION when the
168# destination fail again.
169FACTOR_INCREMENT_DESTINATION_RETRY = 2
170
171# Constants to check health KeyValues in the bandwidth file
172PERIOD_DAYS = int(MEASUREMENTS_PERIOD / (24 * 60 * 60))
173MAX_RECENT_CONSENSUS_COUNT = PERIOD_DAYS * 24  # 120
174# XXX: This was only defined in `config.default.ini`, it should be read from
175# here.
176FRACTION_RELAYS = 0.05
177# A priority list currently takes more than 3h, ideally it should only take 1h.
178MIN_HOURS_PRIORITY_LIST = 1
179# As of 2020, there're less than 7000 relays.
180MAX_RELAYS = 8000
181# 120
182MAX_RECENT_PRIORITY_LIST_COUNT = int(
183    PERIOD_DAYS * 24 / MIN_HOURS_PRIORITY_LIST
184)
185MAX_RELAYS_PER_PRIORITY_LIST = int(MAX_RELAYS * FRACTION_RELAYS)  # 400
186# 48000
187MAX_RECENT_PRIORITY_RELAY_COUNT = (
188    MAX_RECENT_PRIORITY_LIST_COUNT * MAX_RELAYS_PER_PRIORITY_LIST
189)
190
191
192def fail_hard(*a, **kw):
193    """Log something ... and then exit as fast as possible"""
194    log.critical(*a, **kw)
195    exit(1)
196
197
198def touch_file(fname, times=None):
199    """
200    If **fname** exists, update its last access and modified times to now. If
201    **fname** does not exist, create it. If **times** are specified, pass them
202    to os.utime for use.
203
204    :param str fname: Name of file to update or create
205    :param tuple times: 2-tuple of floats for access time and modified time
206        respectively
207    """
208    log.debug("Touching %s", fname)
209    with open(fname, "a") as fd:
210        os.utime(fd.fileno(), times=times)
211