1#!/usr/bin/env python
2
3"""
4Copyright (c) 2014-2021 Maltrail developers (https://github.com/stamparm/maltrail/)
5See the file 'LICENSE' for copying permission
6"""
7
8from __future__ import print_function  # Requires: Python >= 2.6
9
10import sys
11
12sys.dont_write_bytecode = True
13
14import cProfile
15import inspect
16import math
17import mmap
18import optparse
19import os
20import platform
21import re
22import socket
23import subprocess
24import struct
25import threading
26import time
27import traceback
28import warnings
29
30from core.addr import inet_ntoa6
31from core.addr import addr_port
32from core.attribdict import AttribDict
33from core.common import check_connection
34from core.common import check_sudo
35from core.common import check_whitelisted
36from core.common import get_ex_message
37from core.common import get_text
38from core.common import is_local
39from core.common import load_trails
40from core.common import patch_parser
41from core.compat import xrange
42from core.datatype import LRUDict
43from core.enums import BLOCK_MARKER
44from core.enums import CACHE_TYPE
45from core.enums import PROTO
46from core.enums import TRAIL
47from core.log import create_log_directory
48from core.log import flush_condensed_events
49from core.log import get_error_log_handle
50from core.log import log_error
51from core.log import log_event
52from core.parallel import worker
53from core.parallel import write_block
54from core.settings import config
55from core.settings import CAPTURE_TIMEOUT
56from core.settings import CHECK_CONNECTION_MAX_RETRIES
57from core.settings import CONFIG_FILE
58from core.settings import CONSONANTS
59from core.settings import DLT_OFFSETS
60from core.settings import DNS_EXHAUSTION_THRESHOLD
61from core.settings import GENERIC_SINKHOLE_REGEX
62from core.settings import HOMEPAGE
63from core.settings import HOURLY_SECS
64from core.settings import HTTP_TIME_FORMAT
65from core.settings import IGNORE_DNS_QUERY_SUFFIXES
66from core.settings import IPPROTO_LUT
67from core.settings import IS_WIN
68from core.settings import LOCALHOST_IP
69from core.settings import LOCAL_SUBDOMAIN_LOOKUPS
70from core.settings import MAX_CACHE_ENTRIES
71from core.settings import MMAP_ZFILL_CHUNK_LENGTH
72from core.settings import NAME
73from core.settings import NO_SUCH_NAME_COUNTERS
74from core.settings import NO_SUCH_NAME_PER_HOUR_THRESHOLD
75from core.settings import INFECTION_SCANNING_THRESHOLD
76from core.settings import PORT_SCANNING_THRESHOLD
77from core.settings import POTENTIAL_INFECTION_PORTS
78from core.settings import read_config
79from core.settings import REGULAR_SENSOR_SLEEP_TIME
80from core.settings import SNAP_LEN
81from core.settings import SUSPICIOUS_CONTENT_TYPES
82from core.settings import SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS
83from core.settings import SUSPICIOUS_DIRECT_IP_URL_REGEX
84from core.settings import SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD
85from core.settings import SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD
86from core.settings import SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD
87from core.settings import SUSPICIOUS_HTTP_PATH_REGEXES
88from core.settings import SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION
89from core.settings import SUSPICIOUS_HTTP_REQUEST_REGEXES
90from core.settings import SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS
91from core.settings import SUSPICIOUS_PROXY_PROBE_PRE_CONDITION
92from core.settings import SUSPICIOUS_UA_REGEX
93from core.settings import VALID_DNS_NAME_REGEX
94from core.settings import trails
95from core.settings import VERSION
96from core.settings import WEB_SCANNING_THRESHOLD
97from core.settings import WHITELIST
98from core.settings import WHITELIST_DIRECT_DOWNLOAD_KEYWORDS
99from core.settings import WHITELIST_LONG_DOMAIN_NAME_KEYWORDS
100from core.settings import WHITELIST_HTTP_REQUEST_PATHS
101from core.settings import WHITELIST_UA_REGEX
102from core.update import update_ipcat
103from core.update import update_trails
104from thirdparty import six
105from thirdparty.six.moves import urllib as _urllib
106
107warnings.filterwarnings(action="ignore", category=DeprecationWarning)       # NOTE: https://github.com/helpsystems/pcapy/pull/67/files
108
109_buffer = None
110_caps = []
111_connect_sec = 0
112_connect_src_dst = {}
113_connect_src_details = {}
114_path_src_dst = {}
115_path_src_dst_details = {}
116_count = 0
117_locks = AttribDict()
118_multiprocessing = None
119_n = None
120_result_cache = LRUDict(MAX_CACHE_ENTRIES)
121_local_cache = LRUDict(MAX_CACHE_ENTRIES)
122_last_syn = None
123_last_logged_syn = None
124_last_udp = None
125_last_logged_udp = None
126_done_count = 0
127_done_lock = threading.Lock()
128_subdomains = {}
129_subdomains_sec = None
130_dns_exhausted_domains = set()
131
132class _set(set):
133    pass
134
135try:
136    import pcapy
137except ImportError:
138    if IS_WIN:
139        exit("[!] please install 'WinPcap' (e.g. 'http://www.winpcap.org/install/') and Pcapy (e.g. 'https://breakingcode.wordpress.com/?s=pcapy')")
140    else:
141        msg = "[!] please install 'Pcapy' (e.g. 'sudo pip%s install pcapy-ng')" % ('3' if six.PY3 else '2')
142
143        exit(msg)
144
145def _check_domain_member(query, domains):
146    parts = query.lower().split('.')
147
148    for i in xrange(0, len(parts)):
149        domain = '.'.join(parts[i:])
150        if domain in domains:
151            return True
152
153    return False
154
155def _check_domain_whitelisted(query):
156    result = _result_cache.get((CACHE_TYPE.DOMAIN_WHITELISTED, query))
157
158    if result is None:
159        result = _check_domain_member(re.split(r"(?i)[^A-Z0-9._-]", query or "")[0], WHITELIST)
160        _result_cache[(CACHE_TYPE.DOMAIN_WHITELISTED, query)] = result
161
162    return result
163
164def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None):
165    if query:
166        query = query.lower()
167        if ':' in query:
168            query = query.split(':', 1)[0]
169
170    if query.replace('.', "").isdigit():  # IP address
171        return
172
173    if _result_cache.get((CACHE_TYPE.DOMAIN, query)) is False:
174        return
175
176    result = False
177    if re.search(VALID_DNS_NAME_REGEX, query) is not None and not _check_domain_whitelisted(query):
178        parts = query.split('.')
179
180        if query.endswith(".ip-adress.com"):  # Reference: https://www.virustotal.com/gui/domain/ip-adress.com/relations
181            _ = '.'.join(parts[:-2])
182            trail = "%s(.ip-adress.com)" % _
183            if _ in trails:
184                result = True
185                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[_][0], trails[_][1]), packet)
186
187        if not result:
188            for i in xrange(0, len(parts)):
189                domain = '.'.join(parts[i:])
190                if domain in trails:
191                    if domain == query:
192                        trail = domain
193                    else:
194                        _ = ".%s" % domain
195                        trail = "(%s)%s" % (query[:-len(_)], _)
196
197                    if not (re.search(r"(?i)\A([rd]?ns|nf|mx|nic)\d*\.", query) and any(_ in trails.get(domain, " ")[0] for _ in ("suspicious", "sinkhole"))):  # e.g. ns2.nobel.su
198                        if not ((query == trail or parts[0] == "www") and any(_ in trails.get(domain, " ")[0] for _ in ("dynamic", "free web"))):  # e.g. noip.com
199                            result = True
200                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet)
201                            break
202
203        if not result and config.USE_HEURISTICS:
204            if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
205                trail = None
206
207                if len(parts) > 2:
208                    trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
209                elif len(parts) == 2:
210                    trail = "(%s).%s" % (parts[0], parts[1])
211                else:
212                    trail = query
213
214                if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS):
215                    result = True
216                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet)
217
218        if not result and trails._regex:
219            match = re.search(trails._regex, query)
220            if match:
221                group, trail = [_ for _ in match.groupdict().items() if _[1] is not None][0]
222                candidate = trails._regex.split("(?P<")[int(group[1:]) + 1]
223                candidate = candidate.split('>', 1)[-1].rstrip('|')[:-1]
224                if candidate in trails:
225                    result = True
226                    trail = match.group(0)
227
228                    prefix, suffix = query[:match.start()], query[match.end():]
229                    if prefix:
230                        trail = "(%s)%s" % (prefix, trail)
231                    if suffix:
232                        trail = "%s(%s)" % (trail, suffix)
233
234                    trail = trail.replace(".)", ").")
235
236                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[candidate][0], trails[candidate][1]), packet)
237
238        if not result and ".onion." in query:
239            trail = re.sub(r"(\.onion)(\..*)", r"\1(\2)", query)
240            _ = trail.split('(')[0]
241            if _ in trails:
242                result = True
243                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[_][0], trails[_][1]), packet)
244
245    if result is False:
246        _result_cache[(CACHE_TYPE.DOMAIN, query)] = False
247
248def _get_local_prefix():
249    _sources = set(_.split('~')[0] for _ in _connect_src_dst.keys())
250    _candidates = [re.sub(r"\d+\.\d+\Z", "", _) for _ in _sources]
251    _ = sorted(((_candidates.count(_), _) for _ in set(_candidates)), reverse=True)
252    result = _[0][1] if _ else ""
253
254    if result:
255        _result_cache[(CACHE_TYPE.LOCAL_PREFIX, "")] = result
256    else:
257        result = _result_cache.get((CACHE_TYPE.LOCAL_PREFIX, ""))
258
259    return result or '_'
260
261def _process_packet(packet, sec, usec, ip_offset):
262    """
263    Processes single (raw) IP layer data
264    """
265
266    global _connect_sec
267    global _last_syn
268    global _last_logged_syn
269    global _last_udp
270    global _last_logged_udp
271    global _subdomains_sec
272
273    try:
274        if config.USE_HEURISTICS:
275            if _locks.connect_sec:
276                _locks.connect_sec.acquire()
277
278            connect_sec = _connect_sec
279            _connect_sec = sec
280
281            if _locks.connect_sec:
282                _locks.connect_sec.release()
283
284            if sec > connect_sec:
285                for key in _connect_src_dst:
286                    _src_ip, _dst = key.split('~')
287                    if not _dst.isdigit() and len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
288                        if not check_whitelisted(_src_ip):
289                            _dst_ip = _dst
290                            for _ in _connect_src_details[key]:
291                                log_event((sec, usec, _src_ip, _[2], _dst_ip, _[3], PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)"), packet)
292                    elif len(_connect_src_dst[key]) > INFECTION_SCANNING_THRESHOLD:
293                        _dst_port = _dst
294                        _dst_ip = [_[-1] for _ in _connect_src_details[key]]
295                        _src_port = [_[-2] for _ in _connect_src_details[key]]
296
297                        if len(_dst_ip) == len(set(_dst_ip)):
298                            if _src_ip.startswith(_get_local_prefix()):
299                                log_event((sec, usec, _src_ip, _src_port[0], _dst_ip[0], _dst_port, PROTO.TCP, TRAIL.PORT, _dst_port, "potential infection", "(heuristic)"), packet)
300
301                _connect_src_dst.clear()
302                _connect_src_details.clear()
303
304                for key in _path_src_dst:
305                    if len(_path_src_dst[key]) > WEB_SCANNING_THRESHOLD:
306                        _src_ip, _dst_ip = key.split('~')
307                        _sec, _usec, _src_port, _dst_port, _path = _path_src_dst_details[key].pop()
308                        log_event((_sec, _usec, _src_ip, _src_port, _dst_ip, _dst_port, PROTO.TCP, TRAIL.PATH, "*", "potential web scanning", "(heuristic)"), packet)
309
310                _path_src_dst.clear()
311                _path_src_dst_details.clear()
312
313        ip_data = packet[ip_offset:]
314        ip_version = ord(ip_data[0:1]) >> 4
315        localhost_ip = LOCALHOST_IP[ip_version]
316
317        if ip_version == 0x04:  # IPv4
318            ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])
319            fragment_offset = ip_header[4] & 0x1fff
320            if fragment_offset != 0:
321                return
322            iph_length = (ip_header[0] & 0xf) << 2
323            protocol = ip_header[6]
324            src_ip = socket.inet_ntoa(ip_header[8])
325            dst_ip = socket.inet_ntoa(ip_header[9])
326        elif ip_version == 0x06:  # IPv6
327            # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/
328            ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40])
329            iph_length = 40
330            protocol = ip_header[4]
331            src_ip = inet_ntoa6(ip_header[6])
332            dst_ip = inet_ntoa6(ip_header[7])
333        else:
334            return
335
336        if protocol == socket.IPPROTO_TCP:  # TCP
337            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length + 14])
338
339            if flags != 2 and config.plugin_functions:
340                if dst_ip in trails:
341                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True)
342                elif src_ip in trails and dst_ip != localhost_ip:
343                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True)
344
345            if flags == 2:  # SYN set (only)
346                _ = _last_syn
347                _last_syn = (sec, src_ip, src_port, dst_ip, dst_port)
348                if _ == _last_syn:  # skip bursts
349                    return
350
351                if dst_ip in trails or addr_port(dst_ip, dst_port) in trails:
352                    _ = _last_logged_syn
353                    _last_logged_syn = _last_syn
354                    if _ != _last_logged_syn:
355                        trail = addr_port(dst_ip, dst_port)
356                        if trail not in trails:
357                            trail = dst_ip
358                        if not any(_ in trails[trail][0] for _ in ("attacker",)) and not ("parking site" in trails[trail][0] and dst_port not in (80, 443)):
359                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP if ':' not in trail else TRAIL.IPORT, trail, trails[trail][0], trails[trail][1]), packet)
360
361                elif (src_ip in trails or addr_port(src_ip, src_port) in trails) and dst_ip != localhost_ip:
362                    _ = _last_logged_syn
363                    _last_logged_syn = _last_syn
364                    if _ != _last_logged_syn:
365                        trail = addr_port(src_ip, src_port)
366                        if trail not in trails:
367                            trail = src_ip
368                        if not any(_ in trails[trail][0] for _ in ("malware",)):
369                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP if ':' not in trail else TRAIL.IPORT, trail, trails[trail][0], trails[trail][1]), packet)
370
371                if config.USE_HEURISTICS:
372                    if dst_ip != localhost_ip:
373                        key = "%s~%s" % (src_ip, dst_ip)
374                        if key not in _connect_src_dst:
375                            _connect_src_dst[key] = set()
376                            _connect_src_details[key] = set()
377                        _connect_src_dst[key].add(dst_port)
378                        _connect_src_details[key].add((sec, usec, src_port, dst_port))
379
380                        if dst_port in POTENTIAL_INFECTION_PORTS:
381                            key = "%s~%s" % (src_ip, dst_port)
382                            if key not in _connect_src_dst:
383                                _connect_src_dst[key] = set()
384                                _connect_src_details[key] = set()
385                            _connect_src_dst[key].add(dst_ip)
386                            _connect_src_details[key].add((sec, usec, src_port, dst_ip))
387            else:
388                tcph_length = doff_reserved >> 4
389                h_size = iph_length + (tcph_length << 2)
390                tcp_data = get_text(ip_data[h_size:])
391
392                if tcp_data.startswith("HTTP/"):
393                    match = re.search(GENERIC_SINKHOLE_REGEX, tcp_data[:2000])
394                    if match:
395                        trail = match.group(0)
396                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "sinkhole response (malware)", "(heuristic)"), packet)
397                    else:
398                        index = tcp_data.find("<title>")
399                        if index >= 0:
400                            title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)]
401                            if re.search(r"domain name has been seized by|Domain Seized|Domain Seizure", title):
402                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, title, "seized domain (suspicious)", "(heuristic)"), packet)
403
404                    content_type = None
405                    first_index = tcp_data.find("\r\nContent-Type:")
406                    if first_index >= 0:
407                        first_index = first_index + len("\r\nContent-Type:")
408                        last_index = tcp_data.find("\r\n", first_index)
409                        if last_index >= 0:
410                            content_type = tcp_data[first_index:last_index].strip().lower()
411
412                    if content_type and content_type in SUSPICIOUS_CONTENT_TYPES:
413                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, content_type, "content type (suspicious)", "(heuristic)"), packet)
414
415                method, path = None, None
416
417                if " HTTP/" in tcp_data:
418                    index = tcp_data.find("\r\n")
419                    if index >= 0:
420                        line = tcp_data[:index]
421                        if line.count(' ') == 2 and " HTTP/" in line:
422                            method, path, _ = line.split(' ')
423
424                if method and path:
425                    post_data = None
426                    host = dst_ip
427                    first_index = tcp_data.find("\r\nHost:")
428                    path = path.lower()
429
430                    if first_index >= 0:
431                        first_index = first_index + len("\r\nHost:")
432                        last_index = tcp_data.find("\r\n", first_index)
433                        if last_index >= 0:
434                            host = tcp_data[first_index:last_index]
435                            host = host.strip().lower()
436                            if host.endswith(":80"):
437                                host = host[:-3]
438                            if host and host[0].isalpha() and dst_ip in trails:
439                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet)
440                            elif re.search(r"\A\d+\.[0-9.]+\Z", host or "") and re.search(SUSPICIOUS_DIRECT_IP_URL_REGEX, "%s%s" % (host, path)):
441                                if not dst_ip.startswith(_get_local_prefix()):
442                                    trail = "(%s)%s" % (host, path)
443                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "potential iot-malware download (suspicious)", "(heuristic)"), packet)
444                                    return
445                            elif config.CHECK_HOST_DOMAINS:
446                                _check_domain(host, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
447                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
448                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet)
449
450                    index = tcp_data.find("\r\n\r\n")
451                    if index >= 0:
452                        post_data = tcp_data[index + 4:]
453
454                    url = None
455                    if config.USE_HEURISTICS and path.startswith('/'):
456                        _path = path.split('/')[1]
457
458                        key = "%s~%s" % (src_ip, dst_ip)
459                        if key not in _path_src_dst:
460                            _path_src_dst[key] = set()
461                        _path_src_dst[key].add(_path)
462
463                        if key not in _path_src_dst_details:
464                            _path_src_dst_details[key] = set()
465                        _path_src_dst_details[key].add((sec, usec, src_port, dst_port, path))
466
467                    elif config.USE_HEURISTICS and dst_port == 80 and path.startswith("http://") and any(_ in path for _ in SUSPICIOUS_PROXY_PROBE_PRE_CONDITION) and not _check_domain_whitelisted(path.split('/')[2]):
468                        trail = re.sub(r"(http://[^/]+/)(.+)", r"\g<1>(\g<2>)", path)
469                        trail = re.sub(r"(http://)([^/(]+)", lambda match: "%s%s" % (match.group(1), match.group(2).split(':')[0].rstrip('.')), trail)
470                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "potential proxy probe (suspicious)", "(heuristic)"), packet)
471                        return
472                    elif "://" in path:
473                        unquoted_path = _urllib.parse.unquote(path)
474
475                        key = "code execution"
476                        if key not in _local_cache:
477                            _local_cache[key] = next(_[1] for _ in SUSPICIOUS_HTTP_REQUEST_REGEXES if "code execution" in _[0])
478
479                        if re.search(_local_cache[key], unquoted_path, re.I) is None:    # NOTE: to prevent malware domain FPs in case of outside scanners
480                            url = path.split("://", 1)[1]
481
482                            if '/' not in url:
483                                url = "%s/" % url
484
485                            host, path = url.split('/', 1)
486                            if host.endswith(":80"):
487                                host = host[:-3]
488                            path = "/%s" % path
489                            proxy_domain = host.split(':')[0]
490                            _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
491                    elif method == "CONNECT":
492                        if '/' in path:
493                            host, path = path.split('/', 1)
494                            path = "/%s" % path
495                        else:
496                            host, path = path, '/'
497                        if host.endswith(":80"):
498                            host = host[:-3]
499                        url = "%s%s" % (host, path)
500                        proxy_domain = host.split(':')[0]
501                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
502
503                    if url is None:
504                        url = "%s%s" % (host, path)
505
506                    if config.USE_HEURISTICS:
507                        user_agent, result = None, None
508
509                        first_index = tcp_data.find("\r\nUser-Agent:")
510                        if first_index >= 0:
511                            first_index = first_index + len("\r\nUser-Agent:")
512                            last_index = tcp_data.find("\r\n", first_index)
513                            if last_index >= 0:
514                                user_agent = tcp_data[first_index:last_index]
515                                user_agent = _urllib.parse.unquote(user_agent).strip()
516
517                        if user_agent:
518                            result = _result_cache.get((CACHE_TYPE.USER_AGENT, user_agent))
519                            if result is None:
520                                if re.search(WHITELIST_UA_REGEX, user_agent, re.I) is None:
521                                    match = re.search(SUSPICIOUS_UA_REGEX, user_agent)
522                                    if match:
523                                        def _(value):
524                                            return value.rstrip('\\').replace('(', "\\(").replace(')', "\\)")
525
526                                        parts = user_agent.split(match.group(0), 1)
527
528                                        if len(parts) > 1 and parts[0] and parts[-1]:
529                                            result = _result_cache[(CACHE_TYPE.USER_AGENT, user_agent)] = "%s (%s)" % (_(match.group(0)), _(user_agent))
530                                        else:
531                                            result = _result_cache[(CACHE_TYPE.USER_AGENT, user_agent)] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts)
532                                if not result:
533                                    _result_cache[(CACHE_TYPE.USER_AGENT, user_agent)] = False
534
535                            if result:
536                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet)
537
538                    if not _check_domain_whitelisted(host):
539                        path = path.replace("//", '/')
540
541                        unquoted_path = _urllib.parse.unquote(path)
542                        unquoted_post_data = _urllib.parse.unquote(post_data or "")
543
544                        checks = [path.rstrip('/')]
545
546                        if '?' in path:
547                            checks.append(path.split('?')[0].rstrip('/'))
548
549                            if '=' in path:
550                                checks.append(path[:path.index('=') + 1])
551
552                            _ = re.sub(r"(\w+=)[^&=]+", r"\g<1>", path)
553                            if _ not in checks:
554                                checks.append(_)
555                                if _.count('/') > 1:
556                                    checks.append("/%s" % _.split('/')[-1])
557                        elif post_data:
558                            checks.append("%s?%s" % (path, unquoted_post_data.lower()))
559
560                        if checks[-1].count('/') > 1:
561                            checks.append(checks[-1][:checks[-1].rfind('/')])
562                            checks.append(checks[0][checks[0].rfind('/'):].split('?')[0])
563
564                        for check in filter(None, checks):
565                            for _ in ("", host):
566                                check = "%s%s" % (_, check)
567                                if check in trails:
568                                    if '?' not in path and '?' in check and post_data:
569                                        trail = "%s(%s \\(%s %s\\))" % (host, path, method, post_data.strip())
570                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, trails[check][0], trails[check][1]))
571                                    else:
572                                        parts = url.split(check)
573                                        other = ("(%s)" % _ if _ else _ for _ in parts)
574                                        trail = check.join(other)
575                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1]))
576
577                                    return
578
579                        if "%s/" % host in trails:
580                            trail = "%s/" % host
581                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[trail][0], trails[trail][1]))
582                            return
583
584                        if config.USE_HEURISTICS:
585                            match = re.search(r"\b(CF-Connecting-IP|True-Client-IP|X-Forwarded-For):\s*([0-9.]+)".encode(), packet, re.I)
586                            if match:
587                                src_ip = "%s,%s" % (src_ip, match.group(1))
588
589                            for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
590                                replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char]
591                                path = path.replace(char, replacement)
592                                if post_data:
593                                    post_data = post_data.replace(char, replacement)
594
595                            if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS):
596                                if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
597                                    found = _result_cache.get((CACHE_TYPE.PATH, unquoted_path))
598                                    if found is None:
599                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
600                                            if re.search(regex, unquoted_path, re.I | re.DOTALL):
601                                                found = desc
602                                                break
603                                        _result_cache[(CACHE_TYPE.PATH, unquoted_path)] = found or ""
604                                    if found and not ("data leakage" in found and is_local(dst_ip)):
605                                        trail = "%s(%s)" % (host, path)
606                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % found, "(heuristic)"), packet)
607                                        return
608
609                                if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
610                                    found = _result_cache.get((CACHE_TYPE.POST_DATA, unquoted_post_data))
611                                    if found is None:
612                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
613                                            if re.search(regex, unquoted_post_data, re.I | re.DOTALL):
614                                                found = desc
615                                                break
616                                        _result_cache[(CACHE_TYPE.POST_DATA, unquoted_post_data)] = found or ""
617                                    if found:
618                                        trail = "%s(%s \\(%s %s\\))" % (host, path, method, post_data.strip())
619                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "%s (suspicious)" % found, "(heuristic)"), packet)
620                                        return
621
622                            if '.' in path:
623                                _ = _urllib.parse.urlparse("http://%s" % url)  # dummy scheme
624                                path = path.lower()
625                                filename = _.path.split('/')[-1]
626                                name, extension = os.path.splitext(filename)
627                                trail = "%s(%s)" % (host, path)
628                                if extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not is_local(dst_ip) and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and '=' not in _.query and len(name) < 10:
629                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet)
630                                else:
631                                    for desc, regex in SUSPICIOUS_HTTP_PATH_REGEXES:
632                                        if re.search(regex, filename, re.I):
633                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % desc, "(heuristic)"), packet)
634                                            break
635
636        elif protocol == socket.IPPROTO_UDP:  # UDP
637            _ = ip_data[iph_length:iph_length + 4]
638            if len(_) < 4:
639                return
640
641            src_port, dst_port = struct.unpack("!HH", _)
642
643            _ = _last_udp
644            _last_udp = (sec, src_ip, src_port, dst_ip, dst_port)
645            if _ == _last_udp:  # skip bursts
646                return
647
648            if src_port != 53 and dst_port != 53:  # not DNS
649                if dst_ip in trails:
650                    trail = dst_ip
651                elif src_ip in trails:
652                    trail = src_ip
653                else:
654                    trail = None
655
656                if trail:
657                    _ = _last_logged_udp
658                    _last_logged_udp = _last_udp
659                    if _ != _last_logged_udp:
660                        if not any(_ in trails[trail][0] for _ in ("malware",)):
661                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet)
662
663            else:
664                dns_data = ip_data[iph_length + 8:]
665
666                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
667                if len(dns_data) > 6:
668                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
669                    if qdcount > 0:
670                        offset = 12
671                        query = ""
672
673                        while len(dns_data) > offset:
674                            length = ord(dns_data[offset:offset + 1])
675                            if not length:
676                                query = query[:-1]
677                                break
678                            query += get_text(dns_data[offset + 1:offset + length + 1]) + '.'
679                            offset += length + 1
680
681                        query = query.lower()
682
683                        if not query or re.search(VALID_DNS_NAME_REGEX, query) is None or any(_ in query for _ in (".intranet.",)) or query.split('.')[-1] in IGNORE_DNS_QUERY_SUFFIXES:
684                            return
685
686                        parts = query.split('.')
687
688                        if ord(dns_data[2:3]) & 0xfa == 0x00:  # standard query (both recursive and non-recursive)
689                            type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5])
690
691                            if len(parts) > 2:
692                                if len(parts) > 3 and len(parts[-2]) <= 3:
693                                    domain = '.'.join(parts[-3:])
694                                else:
695                                    domain = '.'.join(parts[-2:])
696
697                                if not _check_domain_whitelisted(domain):  # e.g. <hash>.hashserver.cs.trendmicro.com
698                                    if (sec - (_subdomains_sec or 0)) > HOURLY_SECS:
699                                        _subdomains.clear()
700                                        _dns_exhausted_domains.clear()
701                                        _subdomains_sec = sec
702
703                                    subdomains = _subdomains.get(domain)
704
705                                    if not subdomains:
706                                        subdomains = _subdomains[domain] = _set()
707                                        subdomains._start = sec
708
709                                    if not re.search(r"\A\d+\-\d+\-\d+\-\d+\Z", parts[0]):
710                                        if sec - subdomains._start > 60:
711                                            subdomains._start = sec
712                                            subdomains.clear()
713                                        elif len(subdomains) < DNS_EXHAUSTION_THRESHOLD:
714                                            subdomains.add('.'.join(parts[:-2]))
715                                        else:
716                                            trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
717                                            if re.search(r"bl\b", trail) is None:                                               # generic check for DNSBLs
718                                                if not any(_ in subdomains for _ in LOCAL_SUBDOMAIN_LOOKUPS):                   # generic check for local DNS resolutions
719                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet)
720                                                    _dns_exhausted_domains.add(domain)
721
722                                            return
723
724                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
725                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
726                                if addr_port(dst_ip, dst_port) in trails:
727                                    trail = addr_port(dst_ip, dst_port)
728                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IPORT, "%s (%s)" % (dst_ip, query), trails[trail][0], trails[trail][1]), packet)
729                                elif dst_ip in trails:
730                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet)
731                                elif src_ip in trails:
732                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)
733
734                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet)
735
736                        elif config.USE_HEURISTICS:
737                            if ord(dns_data[2:3]) & 0x80:  # standard response
738                                if ord(dns_data[3:4]) == 0x80:  # recursion available, no error
739                                    _ = offset + 5
740                                    try:
741                                        while _ < len(dns_data):
742                                            if ord(dns_data[_:_ + 1]) & 0xc0 != 0 and dns_data[_ + 2] == "\00" and dns_data[_ + 3] == "\x01":  # Type A
743                                                break
744                                            else:
745                                                _ += 12 + struct.unpack("!H", dns_data[_ + 10: _ + 12])[0]
746
747                                        _ = dns_data[_ + 12:_ + 16]
748                                        if _:
749                                            answer = socket.inet_ntoa(_)
750                                            if answer in trails and not _check_domain_whitelisted(query):
751                                                _ = trails[answer]
752                                                if "sinkhole" in _[0]:
753                                                    trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
754                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % _[0].split(" ")[1], "(heuristic)"), packet)  # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn)
755                                                elif "parking" in _[0]:
756                                                    trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
757                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "parked site (suspicious)", "(heuristic)"), packet)
758                                    except IndexError:
759                                        pass
760
761                                elif ord(dns_data[3:4]) == 0x83:  # recursion available, no such name
762                                    if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails):
763                                        if parts[-1].isdigit():
764                                            return
765
766                                        if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])):  # generic check for DNSBL IP lookups
767                                            if not is_local(dst_ip):  # prevent FPs caused by local queries
768                                                for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)):
769                                                    if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec // 3600:
770                                                        NO_SUCH_NAME_COUNTERS[_] = [sec // 3600, 1, set()]
771                                                    else:
772                                                        NO_SUCH_NAME_COUNTERS[_][1] += 1
773                                                        NO_SUCH_NAME_COUNTERS[_][2].add(query)
774
775                                                        if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD:
776                                                            if _.startswith("*."):
777                                                                trail = "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:])
778                                                                if not any(subdomain in trail for subdomain in LOCAL_SUBDOMAIN_LOOKUPS):  # generic check for local DNS resolutions
779                                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "excessive no such domain (suspicious)", "(heuristic)"), packet)
780                                                                for item in NO_SUCH_NAME_COUNTERS[_][2]:
781                                                                    try:
782                                                                        del NO_SUCH_NAME_COUNTERS[item]
783                                                                    except KeyError:
784                                                                        pass
785                                                            else:
786                                                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet)
787
788                                                            try:
789                                                                del NO_SUCH_NAME_COUNTERS[_]
790                                                            except KeyError:
791                                                                pass
792
793                                                            break
794
795                                            if len(parts) == 2 and parts[0] and '-' not in parts[0]:
796                                                part = parts[0]
797                                                trail = "(%s).%s" % (parts[0], parts[1])
798
799                                                result = _result_cache.get(part)
800
801                                                if result is None:
802                                                    # Reference: https://github.com/exp0se/dga_detector
803                                                    probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part))
804                                                    entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities)
805                                                    if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
806                                                        result = "entropy threshold no such domain (suspicious)"
807
808                                                    if not result:
809                                                        if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
810                                                            result = "consonant threshold no such domain (suspicious)"
811
812                                                    _result_cache[part] = result or False
813
814                                                if result:
815                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet)
816
817        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
818            if protocol == socket.IPPROTO_ICMP:
819                if ord(ip_data[iph_length:iph_length + 1]) != 0x08:  # Non-echo request
820                    return
821            elif protocol == socket.IPPROTO_ICMPV6:
822                if ord(ip_data[iph_length:iph_length + 1]) != 0x80:  # Non-echo request
823                    return
824
825            if dst_ip in trails:
826                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)
827            elif src_ip in trails:
828                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)
829
830    except struct.error:
831        pass
832
833    except Exception:
834        if config.SHOW_DEBUG:
835            traceback.print_exc()
836
837def init():
838    """
839    Performs sensor initialization
840    """
841
842    global _multiprocessing
843
844    try:
845        import multiprocessing
846
847        if config.PROCESS_COUNT > 1 and not config.profile:
848            _multiprocessing = multiprocessing
849    except (ImportError, OSError, NotImplementedError):
850        pass
851
852    def update_timer():
853        retries = 0
854        if not config.offline:
855            while retries < CHECK_CONNECTION_MAX_RETRIES and not check_connection():
856                sys.stdout.write("[!] can't update because of lack of Internet connection (waiting..." if not retries else '.')
857                sys.stdout.flush()
858                time.sleep(10)
859                retries += 1
860
861            if retries:
862                print(")")
863
864        if config.offline or retries == CHECK_CONNECTION_MAX_RETRIES:
865            if retries == CHECK_CONNECTION_MAX_RETRIES:
866                print("[x] going to continue without online update")
867            _ = update_trails(offline=True)
868        else:
869            _ = update_trails()
870            update_ipcat()
871
872        if _:
873            trails.clear()
874            trails.update(_)
875        elif not trails:
876            _ = load_trails()
877            trails.update(_)
878
879        _regex = ""
880        for trail in trails:
881            if "static" in trails[trail][1]:
882                if re.search(r"[\].][*+]|\[[a-z0-9_.\-]+\]", trail, re.I):
883                    try:
884                        re.compile(trail)
885                    except re.error:
886                        pass
887                    else:
888                        if re.escape(trail) != trail:
889                            index = _regex.count("(?P<g")
890                            if index < 100:  # Reference: https://stackoverflow.com/questions/478458/python-regular-expressions-with-more-than-100-groups
891                                _regex += "|(?P<g%s>%s)" % (index, trail)
892
893        trails._regex = _regex.strip('|')
894
895        thread = threading.Timer(config.UPDATE_PERIOD, update_timer)
896        thread.daemon = True
897        thread.start()
898
899    create_log_directory()
900    get_error_log_handle()
901
902    msg = "[i] using '%s' for trail storage" % config.TRAILS_FILE
903    if os.path.isfile(config.TRAILS_FILE):
904        mtime = time.gmtime(os.path.getmtime(config.TRAILS_FILE))
905        msg += " (last modification: '%s')" % time.strftime(HTTP_TIME_FORMAT, mtime)
906
907    print(msg)
908
909    update_timer()
910
911    if not config.DISABLE_CHECK_SUDO and check_sudo() is False:
912        exit("[!] please run '%s' with root privileges" % __file__)
913
914    if config.plugins:
915        config.plugin_functions = []
916        for plugin in re.split(r"[,;]", config.plugins):
917            plugin = plugin.strip()
918            found = False
919
920            for _ in (plugin, os.path.join("plugins", plugin), os.path.join("plugins", "%s.py" % plugin)):
921                if os.path.isfile(_):
922                    plugin = _
923                    found = True
924                    break
925
926            if not found:
927                exit("[!] plugin script '%s' not found" % plugin)
928            else:
929                dirname, filename = os.path.split(plugin)
930                dirname = os.path.abspath(dirname)
931                if not os.path.exists(os.path.join(dirname, '__init__.py')):
932                    exit("[!] empty file '__init__.py' required inside directory '%s'" % dirname)
933
934                if not filename.endswith(".py"):
935                    exit("[!] plugin script '%s' should have an extension '.py'" % filename)
936
937                if dirname not in sys.path:
938                    sys.path.insert(0, dirname)
939
940                try:
941                    module = __import__(filename[:-3])
942                except (ImportError, SyntaxError) as msg:
943                    exit("[!] unable to import plugin script '%s' (%s)" % (filename, msg))
944
945                found = False
946                for name, function in inspect.getmembers(module, inspect.isfunction):
947                    if name == "plugin" and not set(inspect.getargspec(function).args) & set(("event_tuple', 'packet")):
948                        found = True
949                        config.plugin_functions.append(function)
950                        function.__name__ = module.__name__
951
952                if not found:
953                    exit("[!] missing function 'plugin(event_tuple, packet)' in plugin script '%s'" % filename)
954
955    if config.pcap_file:
956        for _ in config.pcap_file.split(','):
957            _caps.append(pcapy.open_offline(_))
958    else:
959        interfaces = set(_.strip() for _ in config.MONITOR_INTERFACE.split(','))
960
961        if (config.MONITOR_INTERFACE or "").lower() == "any":
962            if IS_WIN or "any" not in pcapy.findalldevs():
963                print("[x] virtual interface 'any' missing. Replacing it with all interface names")
964                interfaces = pcapy.findalldevs()
965            else:
966                print("[?] in case of any problems with packet capture on virtual interface 'any', please put all monitoring interfaces to promiscuous mode manually (e.g. 'sudo ifconfig eth0 promisc')")
967
968        for interface in interfaces:
969            if interface.lower() != "any" and re.sub(r"(?i)\Anetmap:", "", interface) not in pcapy.findalldevs():
970                hint = "[?] available interfaces: '%s'" % ",".join(pcapy.findalldevs())
971                exit("[!] interface '%s' not found\n%s" % (interface, hint))
972
973            print("[i] opening interface '%s'" % interface)
974            try:
975                _caps.append(pcapy.open_live(interface, SNAP_LEN, True, CAPTURE_TIMEOUT))
976            except (socket.error, pcapy.PcapError):
977                if "permitted" in str(sys.exc_info()[1]):
978                    exit("[!] permission problem occurred ('%s')" % sys.exc_info()[1])
979                elif "No such device" in str(sys.exc_info()[1]):
980                    exit("[!] no such device '%s'" % interface)
981                else:
982                    raise
983
984    if config.LOG_SERVER and ':' not in config.LOG_SERVER:
985        exit("[!] invalid configuration value for 'LOG_SERVER' ('%s')" % config.LOG_SERVER)
986
987    if config.SYSLOG_SERVER and not len(config.SYSLOG_SERVER.split(':')) == 2:
988        exit("[!] invalid configuration value for 'SYSLOG_SERVER' ('%s')" % config.SYSLOG_SERVER)
989
990    if config.LOGSTASH_SERVER and not len(config.LOGSTASH_SERVER.split(':')) == 2:
991        exit("[!] invalid configuration value for 'LOGSTASH_SERVER' ('%s')" % config.LOGSTASH_SERVER)
992
993    if config.REMOTE_SEVERITY_REGEX:
994        try:
995            re.compile(config.REMOTE_SEVERITY_REGEX)
996        except re.error:
997            exit("[!] invalid configuration value for 'REMOTE_SEVERITY_REGEX' ('%s')" % config.REMOTE_SEVERITY_REGEX)
998
999    if config.CAPTURE_FILTER:
1000        print("[i] setting capture filter '%s'" % config.CAPTURE_FILTER)
1001        for _cap in _caps:
1002            try:
1003                _cap.setfilter(config.CAPTURE_FILTER)
1004            except:
1005                pass
1006
1007    if _multiprocessing:
1008        _init_multiprocessing()
1009
1010    if not IS_WIN and not config.DISABLE_CPU_AFFINITY:
1011        try:
1012            try:
1013                mod = int(subprocess.check_output("grep -c ^processor /proc/cpuinfo", stderr=subprocess.STDOUT, shell=True).strip())
1014                used = subprocess.check_output("for pid in $(ps aux | grep python | grep sensor.py | grep -E -o 'root[ ]*[0-9]*' | tr -d '[:alpha:] '); do schedtool $pid; done | grep -E -o 'AFFINITY .*' | cut -d ' ' -f 2 | grep -v 0xf", stderr=subprocess.STDOUT, shell=True).strip().split('\n')
1015                max_used = max(int(_, 16) for _ in used)
1016                affinity = max(1, (max_used << 1) % 2 ** mod)
1017            except:
1018                affinity = 1
1019            p = subprocess.Popen("schedtool -n -2 -M 2 -p 10 -a 0x%02x %d" % (affinity, os.getpid()), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1020            _, stderr = p.communicate()
1021            if "not found" in stderr:
1022                msg, _ = "[?] please install 'schedtool' for better CPU scheduling", platform.linux_distribution()[0].lower()
1023                for distro, install in {("fedora", "centos"): "sudo yum install schedtool", ("debian", "ubuntu"): "sudo apt-get install schedtool"}.items():
1024                    if _ in distro:
1025                        msg += " (e.g. '%s')" % install
1026                        break
1027                print(msg)
1028        except:
1029            pass
1030
1031def _init_multiprocessing():
1032    """
1033    Inits worker processes used in multiprocessing mode
1034    """
1035
1036    global _buffer
1037    global _multiprocessing
1038    global _n
1039
1040    if _multiprocessing:
1041        print("[i] preparing capture buffer...")
1042        try:
1043            _buffer = mmap.mmap(-1, config.CAPTURE_BUFFER)  # http://www.alexonlinux.com/direct-io-in-python
1044
1045            _ = b"\x00" * MMAP_ZFILL_CHUNK_LENGTH
1046            for i in xrange(config.CAPTURE_BUFFER // MMAP_ZFILL_CHUNK_LENGTH):
1047                _buffer.write(_)
1048            _buffer.seek(0)
1049        except KeyboardInterrupt:
1050            raise
1051        except:
1052            exit("[!] unable to allocate network capture buffer. Please adjust value of 'CAPTURE_BUFFER'")
1053
1054        _n = _multiprocessing.Value('L', lock=False)
1055
1056        try:
1057            for i in xrange(config.PROCESS_COUNT - 1):
1058                process = _multiprocessing.Process(target=worker, name=str(i), args=(_buffer, _n, i, config.PROCESS_COUNT - 1, _process_packet))
1059                process.daemon = True
1060                process.start()
1061        except TypeError:   # Note: https://github.com/stamparm/maltrail/issues/11823
1062            _buffer = None
1063            _multiprocessing = None
1064        else:
1065            print("[i] created %d more processes (out of total %d)" % (config.PROCESS_COUNT - 1, config.PROCESS_COUNT))
1066
1067def monitor():
1068    """
1069    Sniffs/monitors given capturing interface
1070    """
1071
1072    print("[^] running...")
1073
1074    def packet_handler(datalink, header, packet):
1075        global _count
1076
1077        ip_offset = None
1078        try:
1079            dlt_offset = DLT_OFFSETS[datalink]
1080        except KeyError:
1081            log_error("Received unexpected datalink (%d)" % datalink, single=True)
1082            return
1083
1084        try:
1085            if datalink == pcapy.DLT_RAW:
1086                ip_offset = dlt_offset
1087
1088            elif datalink == pcapy.DLT_PPP:
1089                if packet[2:4] in (b"\x00\x21", b"\x00\x57"):  # (IPv4, IPv6)
1090                    ip_offset = dlt_offset
1091
1092            elif datalink == pcapy.DLT_NULL:
1093                if packet[0:4] in (b"\x02\x00\x00\x00", b"\x23\x00\x00\x00"):  # (IPv4, IPv6)
1094                    ip_offset = dlt_offset
1095
1096            elif dlt_offset >= 2:
1097                if packet[dlt_offset - 2:dlt_offset] == b"\x81\x00":  # VLAN
1098                    dlt_offset += 4
1099                if packet[dlt_offset - 2:dlt_offset] in (b"\x08\x00", b"\x86\xdd"):  # (IPv4, IPv6)
1100                    ip_offset = dlt_offset
1101
1102        except IndexError:
1103            pass
1104
1105        if ip_offset is None:
1106            return
1107
1108        try:
1109            if six.PY3:  # https://github.com/helpsystems/pcapy/issues/37#issuecomment-530795813
1110                sec, usec = [int(_) for _ in ("%.6f" % time.time()).split('.')]
1111            else:
1112                sec, usec = header.getts()
1113
1114            if _multiprocessing:
1115                block = struct.pack("=III", sec, usec, ip_offset) + packet
1116
1117                if _locks.count:
1118                    _locks.count.acquire()
1119
1120                write_block(_buffer, _count, block)
1121                _n.value = _count = _count + 1
1122
1123                if _locks.count:
1124                    _locks.count.release()
1125            else:
1126                _process_packet(packet, sec, usec, ip_offset)
1127
1128        except socket.timeout:
1129            pass
1130
1131    try:
1132        def _(_cap):
1133            global _done_count
1134
1135            datalink = _cap.datalink()
1136
1137
1138#
1139# NOTE: currently an issue with pcapy-png and loop()
1140#
1141#            if six.PY3 and not config.pcap_file:  # https://github.com/helpsystems/pcapy/issues/37#issuecomment-530795813
1142#                def _loop_handler(header, packet):
1143#                    packet_handler(datalink, header, packet)
1144#
1145#                _cap.loop(-1, _loop_handler)
1146#            else:
1147
1148            while True:
1149                success = False
1150                try:
1151                    (header, packet) = _cap.next()
1152                    if header is not None:
1153                        success = True
1154                        packet_handler(datalink, header, packet)
1155                    elif config.pcap_file:
1156                        with _done_lock:
1157                            _done_count += 1
1158                        break
1159                except (pcapy.PcapError, socket.timeout):
1160                    pass
1161
1162                if not success:
1163                    time.sleep(REGULAR_SENSOR_SLEEP_TIME)
1164
1165        if config.profile and len(_caps) == 1:
1166            print("[=] will store profiling results to '%s'..." % config.profile)
1167            _(_caps[0])
1168        else:
1169            if len(_caps) > 1:
1170                if _multiprocessing:
1171                    _locks.count = threading.Lock()
1172                _locks.connect_sec = threading.Lock()
1173
1174            for _cap in _caps:
1175                threading.Thread(target=_, args=(_cap,)).start()
1176
1177            while _caps and not _done_count == (config.pcap_file or "").count(',') + 1:
1178                time.sleep(1)
1179
1180        if not config.pcap_file:
1181            print("[i] all capturing interfaces closed")
1182    except SystemError as ex:
1183        if "error return without" in str(ex):
1184            print("\r[x] stopping (Ctrl-C pressed)")
1185        else:
1186            raise
1187    except KeyboardInterrupt:
1188        print("\r[x] stopping (Ctrl-C pressed)")
1189    finally:
1190        print("\r[i] cleaning up...")
1191
1192        if _multiprocessing:
1193            try:
1194                for _ in xrange(config.PROCESS_COUNT - 1):
1195                    write_block(_buffer, _n.value, b"", BLOCK_MARKER.END)
1196                    _n.value = _n.value + 1
1197                while _multiprocessing.active_children():
1198                    time.sleep(REGULAR_SENSOR_SLEEP_TIME)
1199            except KeyboardInterrupt:
1200                pass
1201
1202        if config.pcap_file:
1203            flush_condensed_events(True)
1204
1205def main():
1206    for i in xrange(1, len(sys.argv)):
1207        if sys.argv[i] == "-q":
1208            sys.stdout = open(os.devnull, 'w')
1209        if sys.argv[i] == "-i":
1210            for j in xrange(i + 2, len(sys.argv)):
1211                value = sys.argv[j]
1212                if os.path.isfile(value):
1213                    sys.argv[i + 1] += ",%s" % value
1214                    sys.argv[j] = ''
1215                else:
1216                    break
1217
1218    print("%s (sensor) #v%s {%s}\n" % (NAME, VERSION, HOMEPAGE))
1219
1220    if "--version" in sys.argv:
1221        raise SystemExit
1222
1223    parser = optparse.OptionParser(version=VERSION)
1224    parser.add_option("-c", dest="config_file", default=CONFIG_FILE, help="configuration file (default: '%s')" % os.path.split(CONFIG_FILE)[-1])
1225    parser.add_option("-r", dest="pcap_file", help="pcap file for offline analysis")
1226    parser.add_option("-p", dest="plugins", help="plugin(s) to be used per event")
1227    parser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="turn off regular output")
1228    parser.add_option("--console", dest="console", action="store_true", help="print events to console")
1229    parser.add_option("--offline", dest="offline", action="store_true", help="disable (online) trail updates")
1230    parser.add_option("--debug", dest="debug", action="store_true", help=optparse.SUPPRESS_HELP)
1231    parser.add_option("--profile", dest="profile", help=optparse.SUPPRESS_HELP)
1232
1233    patch_parser(parser)
1234
1235    options, _ = parser.parse_args()
1236
1237    print("[*] starting @ %s\n" % time.strftime("%X /%Y-%m-%d/"))
1238
1239    read_config(options.config_file)
1240
1241    for option in dir(options):
1242        if isinstance(getattr(options, option), (six.string_types, bool)) and not option.startswith('_'):
1243            config[option] = getattr(options, option)
1244
1245    if options.debug:
1246        config.console = True
1247        config.PROCESS_COUNT = 1
1248        config.SHOW_DEBUG = True
1249
1250    if options.pcap_file:
1251        if options.pcap_file == '-':
1252            print("[i] using STDIN")
1253        else:
1254            for _ in options.pcap_file.split(','):
1255                if not os.path.isfile(_):
1256                    exit("[!] missing pcap file '%s'" % _)
1257
1258            print("[i] using pcap file(s) '%s'" % options.pcap_file)
1259
1260    if not config.DISABLE_CHECK_SUDO and not check_sudo():
1261        exit("[!] please run '%s' with root privileges" % __file__)
1262
1263    try:
1264        init()
1265        if config.profile:
1266            open(config.profile, "w+b").write("")
1267            cProfile.run("monitor()", config.profile)
1268        else:
1269            monitor()
1270    except KeyboardInterrupt:
1271        print("\r[x] stopping (Ctrl-C pressed)")
1272
1273if __name__ == "__main__":
1274    code = 0
1275
1276    try:
1277        main()
1278    except SystemExit as ex:
1279        if isinstance(get_ex_message(ex), six.string_types) and get_ex_message(ex).strip('0'):
1280            print(get_ex_message(ex))
1281            code = 1
1282    except IOError:
1283        log_error("\n\n[!] session abruptly terminated\n[?] (hint: \"https://stackoverflow.com/a/20997655\")")
1284        code = 1
1285    except Exception:
1286        msg = "\r[!] unhandled exception occurred ('%s')" % sys.exc_info()[1]
1287        msg += "\n[x] please report the following details at 'https://github.com/stamparm/maltrail/issues':\n---\n'%s'\n---" % traceback.format_exc()
1288        log_error("\n\n%s" % msg.replace("\r", ""))
1289
1290        print(msg)
1291        code = 1
1292    finally:
1293        if not any(_ in sys.argv for _ in ("--version", "-h", "--help")):
1294            print("\n[*] ending @ %s" % time.strftime("%X /%Y-%m-%d/"))
1295
1296        os._exit(code)
1297