1# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*- 2# vi: set ft=python sts=4 ts=4 sw=4 noet : 3 4# This file is part of Fail2Ban. 5# 6# Fail2Ban is free software; you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation; either version 2 of the License, or 9# (at your option) any later version. 10# 11# Fail2Ban is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with Fail2Ban; if not, write to the Free Software 18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 20# Author: Cyril Jaquier 21# 22 23__author__ = "Cyril Jaquier" 24__copyright__ = "Copyright (c) 2004 Cyril Jaquier" 25__license__ = "GPL" 26 27import re, time 28from abc import abstractmethod 29 30from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE 31from ..helpers import getLogger 32 33logSys = getLogger(__name__) 34 35# check already grouped contains "(", but ignores char "\(" and conditional "(?(id)...)": 36RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)') 37RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" ) 38 39RE_EXLINE_NO_BOUNDS = re.compile(r'^\{UNB\}') 40RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}') 41RE_EXSANC_BOUND_BEG = re.compile(r'^\((?:\?:)?\^\|\\b\|\\W\)') 42RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$') 43RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\((?:\?:)?\^)') 44RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$') 45RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'), 46 lambda m: m.group().replace('**', '') ) 47 48RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\((?:\?:)?\^(?!\|))') 49RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$') 50 51RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]') 52 53RE_EPOCH_PATTERN = re.compile(r"(?<!\\)\{L?EPOCH\}", re.IGNORECASE) 54 55 56class DateTemplate(object): 57 """A template which searches for and returns a date from a log line. 58 59 This is an not functional abstract class which other templates should 60 inherit from. 61 62 Attributes 63 ---------- 64 name 65 regex 66 """ 67 68 LINE_BEGIN = 8 69 LINE_END = 4 70 WORD_BEGIN = 2 71 WORD_END = 1 72 73 def __init__(self): 74 self.name = "" 75 self.weight = 1.0 76 self.flags = 0 77 self.hits = 0 78 self.time = 0 79 self._regex = "" 80 self._cRegex = None 81 82 def getRegex(self): 83 return self._regex 84 85 def setRegex(self, regex, wordBegin=True, wordEnd=True): 86 r"""Sets regex to use for searching for date in log line. 87 88 Parameters 89 ---------- 90 regex : str 91 The regex the template will use for searching for a date. 92 wordBegin : bool 93 Defines whether the regex should be modified to search at beginning of a 94 word, by adding special boundary r'(?=^|\b|\W)' to start of regex. 95 Can be disabled with specifying of ** at front of regex. 96 Default True. 97 wordEnd : bool 98 Defines whether the regex should be modified to search at end of a word, 99 by adding special boundary r'(?=\b|\W|$)' to end of regex. 100 Can be disabled with specifying of ** at end of regex. 101 Default True. 102 103 Raises 104 ------ 105 re.error 106 If regular expression fails to compile 107 """ 108 # Warning: don't use lookahead for line-begin boundary, 109 # (e. g. r"^(?:\W{0,2})?" is much faster as r"(?:^|(?<=^\W)|(?<=^\W{2}))") 110 # because it may be very slow in negative case (by long log-lines not matching pattern) 111 112 regex = regex.strip() 113 boundBegin = wordBegin and not RE_NO_WRD_BOUND_BEG.search(regex) 114 boundEnd = wordEnd and not RE_NO_WRD_BOUND_END.search(regex) 115 # if no group add it now, should always have a group(1): 116 if not RE_GROUPED.search(regex): 117 regex = RE_GROUP[0].sub(RE_GROUP[1], regex) 118 self.flags = 0 119 # if word or line start boundary: 120 if boundBegin: 121 self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN 122 if wordBegin != 'start': 123 regex = r'(?=^|\b|\W)' + regex 124 else: 125 regex = r"^(?:\W{0,2})?" + regex 126 if not self.name.startswith('{^LN-BEG}'): 127 self.name = '{^LN-BEG}' + self.name 128 # if word end boundary: 129 if boundEnd: 130 self.flags |= DateTemplate.WORD_END 131 regex += r'(?=\b|\W|$)' 132 if not (self.flags & DateTemplate.LINE_BEGIN) and RE_LINE_BOUND_BEG.search(regex): 133 self.flags |= DateTemplate.LINE_BEGIN 134 if not (self.flags & DateTemplate.LINE_END) and RE_LINE_BOUND_END.search(regex): 135 self.flags |= DateTemplate.LINE_END 136 # remove possible special pattern "**" in front and end of regex: 137 regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex) 138 self._regex = regex 139 logSys.log(7, ' constructed regex %s', regex) 140 self._cRegex = None 141 142 regex = property(getRegex, setRegex, doc= 143 """Regex used to search for date. 144 """) 145 146 def _compileRegex(self): 147 """Compile regex by first usage. 148 """ 149 if not self._cRegex: 150 try: 151 # print('*'*10 + (' compile - %-30.30s -- %s' % (getattr(self, 'pattern', self.regex), self.name))) 152 self._cRegex = re.compile(self.regex) 153 except Exception as e: 154 logSys.error('Compile %r failed, expression %r', self.name, self.regex) 155 raise e 156 157 def matchDate(self, line, *args): 158 """Check if regex for date matches on a log line. 159 """ 160 if not self._cRegex: 161 self._compileRegex() 162 dateMatch = self._cRegex.search(line, *args); # pos, endpos 163 if dateMatch: 164 self.hits += 1 165 # print('*'*10 + ('[%s] - %-30.30s -- %s' % ('*' if dateMatch else ' ', getattr(self, 'pattern', self.regex), self.name))) 166 return dateMatch 167 168 @abstractmethod 169 def getDate(self, line, dateMatch=None, default_tz=None): 170 """Abstract method, which should return the date for a log line 171 172 This should return the date for a log line, typically taking the 173 date from the part of the line which matched the templates regex. 174 This requires abstraction, therefore just raises exception. 175 176 Parameters 177 ---------- 178 line : str 179 Log line, of which the date should be extracted from. 180 default_tz: if no explicit time zone is present in the line 181 passing this will interpret it as in that time zone. 182 183 Raises 184 ------ 185 NotImplementedError 186 Abstract method, therefore always returns this. 187 """ 188 raise NotImplementedError("getDate() is abstract") 189 190 @staticmethod 191 def unboundPattern(pattern): 192 return RE_EXEANC_BOUND_BEG.sub('', 193 RE_EXSANC_BOUND_BEG.sub('', 194 RE_EXLINE_BOUND_BEG.sub('', RE_EXLINE_NO_BOUNDS.sub('', pattern)) 195 ) 196 ) 197 198 199class DateEpoch(DateTemplate): 200 """A date template which searches for Unix timestamps. 201 202 This includes Unix timestamps which appear at start of a line, optionally 203 within square braces (nsd), or on SELinux audit log lines. 204 205 Attributes 206 ---------- 207 name 208 regex 209 """ 210 211 def __init__(self, lineBeginOnly=False, pattern=None, longFrm=False): 212 DateTemplate.__init__(self) 213 self.name = "Epoch" if not pattern else pattern 214 self._longFrm = longFrm; 215 self._grpIdx = 1 216 epochRE = r"\d{10,11}\b(?:\.\d{3,6})?" 217 if longFrm: 218 self.name = "LongEpoch" if not pattern else pattern 219 epochRE = r"\d{10,11}(?:\d{3}(?:\.\d{1,6}|\d{3})?)?" 220 if pattern: 221 # pattern should capture/cut out the whole match: 222 regex = "(" + RE_EPOCH_PATTERN.sub(lambda v: "(%s)" % epochRE, pattern) + ")" 223 self._grpIdx = 2 224 self.setRegex(regex) 225 elif not lineBeginOnly: 226 regex = r"((?:^|(?P<square>(?<=^\[))|(?P<selinux>(?<=\baudit\()))%s)(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" % epochRE 227 self.setRegex(regex, wordBegin=False) ;# already line begin resp. word begin anchored 228 else: 229 regex = r"((?P<square>(?<=^\[))?%s)(?(square)(?=\]))" % epochRE 230 self.setRegex(regex, wordBegin='start', wordEnd=True) 231 232 def getDate(self, line, dateMatch=None, default_tz=None): 233 """Method to return the date for a log line. 234 235 Parameters 236 ---------- 237 line : str 238 Log line, of which the date should be extracted from. 239 default_tz: ignored, Unix timestamps are time zone independent 240 241 Returns 242 ------- 243 (float, str) 244 Tuple containing a Unix timestamp, and the string of the date 245 which was matched and in turned used to calculated the timestamp. 246 """ 247 if not dateMatch: 248 dateMatch = self.matchDate(line) 249 if dateMatch: 250 v = dateMatch.group(self._grpIdx) 251 # extract part of format which represents seconds since epoch 252 if self._longFrm and len(v) >= 13: 253 if len(v) >= 16 and '.' not in v: 254 v = float(v) / 1000000 255 else: 256 v = float(v) / 1000 257 return (float(v), dateMatch) 258 259 260class DatePatternRegex(DateTemplate): 261 """Date template, with regex/pattern 262 263 Parameters 264 ---------- 265 pattern : str 266 Sets the date templates pattern. 267 268 Attributes 269 ---------- 270 name 271 regex 272 pattern 273 """ 274 275 _patternRE, _patternName = getTimePatternRE() 276 _patternRE = re.compile(_patternRE) 277 278 def __init__(self, pattern=None, **kwargs): 279 super(DatePatternRegex, self).__init__() 280 self._pattern = None 281 if pattern is not None: 282 self.setRegex(pattern, **kwargs) 283 284 @property 285 def pattern(self): 286 """The pattern used for regex with strptime "%" time fields. 287 288 This should be a valid regular expression, of which matching string 289 will be extracted from the log line. strptime style "%" fields will 290 be replaced by appropriate regular expressions, or custom regex 291 groups with names as per the strptime fields can also be used 292 instead. 293 """ 294 return self._pattern 295 296 @pattern.setter 297 def pattern(self, pattern): 298 self.setRegex(pattern) 299 300 def setRegex(self, pattern, wordBegin=True, wordEnd=True): 301 # original pattern: 302 self._pattern = pattern 303 # if unbound signalled - reset boundaries left and right: 304 if RE_EXLINE_NO_BOUNDS.search(pattern): 305 pattern = RE_EXLINE_NO_BOUNDS.sub('', pattern) 306 wordBegin = wordEnd = False 307 # if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin: 308 if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern): 309 pattern = RE_EXLINE_BOUND_BEG.sub('', pattern) 310 wordBegin = 'start' 311 try: 312 # wrap to regex: 313 fmt = self._patternRE.sub(r'%(\1)s', pattern) 314 self.name = fmt % self._patternName 315 regex = fmt % timeRE 316 # if expected add (?iu) for "ignore case" and "unicode": 317 if RE_ALPHA_PATTERN.search(pattern): 318 regex = r'(?iu)' + regex 319 super(DatePatternRegex, self).setRegex(regex, wordBegin, wordEnd) 320 except Exception as e: 321 raise TypeError("Failed to set datepattern '%s' (may be an invalid format or unescaped percent char): %s" % (pattern, e)) 322 323 def getDate(self, line, dateMatch=None, default_tz=None): 324 """Method to return the date for a log line. 325 326 This uses a custom version of strptime, using the named groups 327 from the instances `pattern` property. 328 329 Parameters 330 ---------- 331 line : str 332 Log line, of which the date should be extracted from. 333 default_tz: optionally used to correct timezone 334 335 Returns 336 ------- 337 (float, str) 338 Tuple containing a Unix timestamp, and the string of the date 339 which was matched and in turned used to calculated the timestamp. 340 """ 341 if not dateMatch: 342 dateMatch = self.matchDate(line) 343 if dateMatch: 344 return (reGroupDictStrptime(dateMatch.groupdict(), default_tz=default_tz), 345 dateMatch) 346 347 348class DateTai64n(DateTemplate): 349 """A date template which matches TAI64N formate timestamps. 350 351 Attributes 352 ---------- 353 name 354 regex 355 """ 356 357 def __init__(self, wordBegin=False): 358 DateTemplate.__init__(self) 359 self.name = "TAI64N" 360 # We already know the format for TAI64N 361 self.setRegex("@[0-9a-f]{24}", wordBegin=wordBegin) 362 363 def getDate(self, line, dateMatch=None, default_tz=None): 364 """Method to return the date for a log line. 365 366 Parameters 367 ---------- 368 line : str 369 Log line, of which the date should be extracted from. 370 default_tz: ignored, since TAI is time zone independent 371 372 Returns 373 ------- 374 (float, str) 375 Tuple containing a Unix timestamp, and the string of the date 376 which was matched and in turned used to calculated the timestamp. 377 """ 378 if not dateMatch: 379 dateMatch = self.matchDate(line) 380 if dateMatch: 381 # extract part of format which represents seconds since epoch 382 value = dateMatch.group(1) 383 seconds_since_epoch = value[2:17] 384 # convert seconds from HEX into local time stamp 385 return (int(seconds_since_epoch, 16), dateMatch) 386