1import datetime
2import functools
3import logging
4import os
5import re
6from collections import namedtuple
7from typing import Any, Callable, Dict, Iterable, List, Tuple  # noqa
8
9from .abc import AbstractAccessLogger
10from .web_request import BaseRequest
11from .web_response import StreamResponse
12
13KeyMethod = namedtuple("KeyMethod", "key method")
14
15
16class AccessLogger(AbstractAccessLogger):
17    """Helper object to log access.
18
19    Usage:
20        log = logging.getLogger("spam")
21        log_format = "%a %{User-Agent}i"
22        access_logger = AccessLogger(log, log_format)
23        access_logger.log(request, response, time)
24
25    Format:
26        %%  The percent sign
27        %a  Remote IP-address (IP-address of proxy if using reverse proxy)
28        %t  Time when the request was started to process
29        %P  The process ID of the child that serviced the request
30        %r  First line of request
31        %s  Response status code
32        %b  Size of response in bytes, including HTTP headers
33        %T  Time taken to serve the request, in seconds
34        %Tf Time taken to serve the request, in seconds with floating fraction
35            in .06f format
36        %D  Time taken to serve the request, in microseconds
37        %{FOO}i  request.headers['FOO']
38        %{FOO}o  response.headers['FOO']
39        %{FOO}e  os.environ['FOO']
40
41    """
42
43    LOG_FORMAT_MAP = {
44        "a": "remote_address",
45        "t": "request_start_time",
46        "P": "process_id",
47        "r": "first_request_line",
48        "s": "response_status",
49        "b": "response_size",
50        "T": "request_time",
51        "Tf": "request_time_frac",
52        "D": "request_time_micro",
53        "i": "request_header",
54        "o": "response_header",
55    }
56
57    LOG_FORMAT = '%a %t "%r" %s %b "%{Referer}i" "%{User-Agent}i"'
58    FORMAT_RE = re.compile(r"%(\{([A-Za-z0-9\-_]+)\}([ioe])|[atPrsbOD]|Tf?)")
59    CLEANUP_RE = re.compile(r"(%[^s])")
60    _FORMAT_CACHE = {}  # type: Dict[str, Tuple[str, List[KeyMethod]]]
61
62    def __init__(self, logger: logging.Logger, log_format: str = LOG_FORMAT) -> None:
63        """Initialise the logger.
64
65        logger is a logger object to be used for logging.
66        log_format is a string with apache compatible log format description.
67
68        """
69        super().__init__(logger, log_format=log_format)
70
71        _compiled_format = AccessLogger._FORMAT_CACHE.get(log_format)
72        if not _compiled_format:
73            _compiled_format = self.compile_format(log_format)
74            AccessLogger._FORMAT_CACHE[log_format] = _compiled_format
75
76        self._log_format, self._methods = _compiled_format
77
78    def compile_format(self, log_format: str) -> Tuple[str, List[KeyMethod]]:
79        """Translate log_format into form usable by modulo formatting
80
81        All known atoms will be replaced with %s
82        Also methods for formatting of those atoms will be added to
83        _methods in appropriate order
84
85        For example we have log_format = "%a %t"
86        This format will be translated to "%s %s"
87        Also contents of _methods will be
88        [self._format_a, self._format_t]
89        These method will be called and results will be passed
90        to translated string format.
91
92        Each _format_* method receive 'args' which is list of arguments
93        given to self.log
94
95        Exceptions are _format_e, _format_i and _format_o methods which
96        also receive key name (by functools.partial)
97
98        """
99        # list of (key, method) tuples, we don't use an OrderedDict as users
100        # can repeat the same key more than once
101        methods = list()
102
103        for atom in self.FORMAT_RE.findall(log_format):
104            if atom[1] == "":
105                format_key1 = self.LOG_FORMAT_MAP[atom[0]]
106                m = getattr(AccessLogger, "_format_%s" % atom[0])
107                key_method = KeyMethod(format_key1, m)
108            else:
109                format_key2 = (self.LOG_FORMAT_MAP[atom[2]], atom[1])
110                m = getattr(AccessLogger, "_format_%s" % atom[2])
111                key_method = KeyMethod(format_key2, functools.partial(m, atom[1]))
112
113            methods.append(key_method)
114
115        log_format = self.FORMAT_RE.sub(r"%s", log_format)
116        log_format = self.CLEANUP_RE.sub(r"%\1", log_format)
117        return log_format, methods
118
119    @staticmethod
120    def _format_i(
121        key: str, request: BaseRequest, response: StreamResponse, time: float
122    ) -> str:
123        if request is None:
124            return "(no headers)"
125
126        # suboptimal, make istr(key) once
127        return request.headers.get(key, "-")
128
129    @staticmethod
130    def _format_o(
131        key: str, request: BaseRequest, response: StreamResponse, time: float
132    ) -> str:
133        # suboptimal, make istr(key) once
134        return response.headers.get(key, "-")
135
136    @staticmethod
137    def _format_a(request: BaseRequest, response: StreamResponse, time: float) -> str:
138        if request is None:
139            return "-"
140        ip = request.remote
141        return ip if ip is not None else "-"
142
143    @staticmethod
144    def _format_t(request: BaseRequest, response: StreamResponse, time: float) -> str:
145        now = datetime.datetime.utcnow()
146        start_time = now - datetime.timedelta(seconds=time)
147        return start_time.strftime("[%d/%b/%Y:%H:%M:%S +0000]")
148
149    @staticmethod
150    def _format_P(request: BaseRequest, response: StreamResponse, time: float) -> str:
151        return "<%s>" % os.getpid()
152
153    @staticmethod
154    def _format_r(request: BaseRequest, response: StreamResponse, time: float) -> str:
155        if request is None:
156            return "-"
157        return "{} {} HTTP/{}.{}".format(
158            request.method,
159            request.path_qs,
160            request.version.major,
161            request.version.minor,
162        )
163
164    @staticmethod
165    def _format_s(request: BaseRequest, response: StreamResponse, time: float) -> int:
166        return response.status
167
168    @staticmethod
169    def _format_b(request: BaseRequest, response: StreamResponse, time: float) -> int:
170        return response.body_length
171
172    @staticmethod
173    def _format_T(request: BaseRequest, response: StreamResponse, time: float) -> str:
174        return str(round(time))
175
176    @staticmethod
177    def _format_Tf(request: BaseRequest, response: StreamResponse, time: float) -> str:
178        return "%06f" % time
179
180    @staticmethod
181    def _format_D(request: BaseRequest, response: StreamResponse, time: float) -> str:
182        return str(round(time * 1000000))
183
184    def _format_line(
185        self, request: BaseRequest, response: StreamResponse, time: float
186    ) -> Iterable[Tuple[str, Callable[[BaseRequest, StreamResponse, float], str]]]:
187        return [(key, method(request, response, time)) for key, method in self._methods]
188
189    def log(self, request: BaseRequest, response: StreamResponse, time: float) -> None:
190        try:
191            fmt_info = self._format_line(request, response, time)
192
193            values = list()
194            extra = dict()
195            for key, value in fmt_info:
196                values.append(value)
197
198                if key.__class__ is str:
199                    extra[key] = value
200                else:
201                    k1, k2 = key  # type: ignore
202                    dct = extra.get(k1, {})  # type: ignore
203                    dct[k2] = value  # type: ignore
204                    extra[k1] = dct  # type: ignore
205
206            self.logger.info(self._log_format % tuple(values), extra=extra)
207        except Exception:
208            self.logger.exception("Error in logging")
209