1import abc
2import re
3from typing import List
4from typing import Optional
5from typing import TYPE_CHECKING
6
7from ddtrace.internal.processor.trace import TraceProcessor
8
9from .ext import http
10
11
12if TYPE_CHECKING:
13    from ddtrace import Span
14
15
16class TraceFilter(TraceProcessor):
17    @abc.abstractmethod
18    def process_trace(self, trace):
19        # type: (List[Span]) -> Optional[List[Span]]
20        """Processes a trace.
21
22        None can be returned to prevent the trace from being exported.
23        """
24        pass
25
26
27class FilterRequestsOnUrl(TraceFilter):
28    r"""Filter out traces from incoming http requests based on the request's url.
29
30    This class takes as argument a list of regular expression patterns
31    representing the urls to be excluded from tracing. A trace will be excluded
32    if its root span contains a ``http.url`` tag and if this tag matches any of
33    the provided regular expression using the standard python regexp match
34    semantic (https://docs.python.org/2/library/re.html#re.match).
35
36    :param list regexps: a list of regular expressions (or a single string) defining
37                         the urls that should be filtered out.
38
39    Examples:
40    To filter out http calls to domain api.example.com::
41
42        FilterRequestsOnUrl(r'http://api\\.example\\.com')
43
44    To filter out http calls to all first level subdomains from example.com::
45
46        FilterRequestOnUrl(r'http://.*+\\.example\\.com')
47
48    To filter out calls to both http://test.example.com and http://example.com/healthcheck::
49
50        FilterRequestOnUrl([r'http://test\\.example\\.com', r'http://example\\.com/healthcheck'])
51    """
52
53    def __init__(self, regexps):
54        if isinstance(regexps, str):
55            regexps = [regexps]
56        self._regexps = [re.compile(regexp) for regexp in regexps]
57
58    def process_trace(self, trace):
59        # type: (List[Span]) -> Optional[List[Span]]
60        """
61        When the filter is registered in the tracer, process_trace is called by
62        on each trace before it is sent to the agent, the returned value will
63        be fed to the next filter in the list. If process_trace returns None,
64        the whole trace is discarded.
65        """
66        for span in trace:
67            if span.parent_id is None and span.get_tag(http.URL) is not None:
68                url = span.get_tag(http.URL)
69                for regexp in self._regexps:
70                    if regexp.match(url):
71                        return None
72        return trace
73