1# emacs: -*- mode: python; coding: utf-8; py-indent-offset: 4; indent-tabs-mode: t -*-
2# vi: set ft=python sts=4 ts=4 sw=4 noet :
3
4# This file is part of Fail2Ban.
5#
6# Fail2Ban is free software; you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation; either version 2 of the License, or
9# (at your option) any later version.
10#
11# Fail2Ban is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with Fail2Ban; if not, write to the Free Software
18# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19
20# Author: Cyril Jaquier
21#
22
23__author__ = "Cyril Jaquier"
24__copyright__ = "Copyright (c) 2004 Cyril Jaquier"
25__license__ = "GPL"
26
27import re, time
28from abc import abstractmethod
29
30from .strptime import reGroupDictStrptime, timeRE, getTimePatternRE
31from ..helpers import getLogger
32
33logSys = getLogger(__name__)
34
35# check already grouped contains "(", but ignores char "\(" and conditional "(?(id)...)":
36RE_GROUPED = re.compile(r'(?<!(?:\(\?))(?<!\\)\((?!\?)')
37RE_GROUP = ( re.compile(r'^((?:\(\?\w+\))?\^?(?:\(\?\w+\))?)(.*?)(\$?)$'), r"\1(\2)\3" )
38
39RE_EXLINE_NO_BOUNDS = re.compile(r'^\{UNB\}')
40RE_EXLINE_BOUND_BEG = re.compile(r'^\{\^LN-BEG\}')
41RE_EXSANC_BOUND_BEG = re.compile(r'^\((?:\?:)?\^\|\\b\|\\W\)')
42RE_EXEANC_BOUND_BEG = re.compile(r'\(\?=\\b\|\\W\|\$\)$')
43RE_NO_WRD_BOUND_BEG = re.compile(r'^\(*(?:\(\?\w+\))?(?:\^|\(*\*\*|\((?:\?:)?\^)')
44RE_NO_WRD_BOUND_END = re.compile(r'(?<!\\)(?:\$\)?|\\b|\\s|\*\*\)*)$')
45RE_DEL_WRD_BOUNDS = ( re.compile(r'^\(*(?:\(\?\w+\))?\(*\*\*|(?<!\\)\*\*\)*$'),
46	                    lambda m: m.group().replace('**', '') )
47
48RE_LINE_BOUND_BEG = re.compile(r'^(?:\(\?\w+\))?(?:\^|\((?:\?:)?\^(?!\|))')
49RE_LINE_BOUND_END = re.compile(r'(?<![\\\|])(?:\$\)?)$')
50
51RE_ALPHA_PATTERN = re.compile(r'(?<!\%)\%[aAbBpc]')
52
53RE_EPOCH_PATTERN = re.compile(r"(?<!\\)\{L?EPOCH\}", re.IGNORECASE)
54
55
56class DateTemplate(object):
57	"""A template which searches for and returns a date from a log line.
58
59	This is an not functional abstract class which other templates should
60	inherit from.
61
62	Attributes
63	----------
64	name
65	regex
66	"""
67
68	LINE_BEGIN = 8
69	LINE_END =   4
70	WORD_BEGIN = 2
71	WORD_END =   1
72
73	def __init__(self):
74		self.name = ""
75		self.weight = 1.0
76		self.flags = 0
77		self.hits = 0
78		self.time = 0
79		self._regex = ""
80		self._cRegex = None
81
82	def getRegex(self):
83		return self._regex
84
85	def setRegex(self, regex, wordBegin=True, wordEnd=True):
86		r"""Sets regex to use for searching for date in log line.
87
88		Parameters
89		----------
90		regex : str
91			The regex the template will use for searching for a date.
92		wordBegin : bool
93			Defines whether the regex should be modified to search at beginning of a
94			word, by adding special boundary r'(?=^|\b|\W)' to start of regex.
95			Can be disabled with specifying of ** at front of regex.
96			Default True.
97		wordEnd : bool
98			Defines whether the regex should be modified to search at end of a word,
99			by adding special boundary r'(?=\b|\W|$)' to end of regex.
100			Can be disabled with specifying of ** at end of regex.
101			Default True.
102
103		Raises
104		------
105		re.error
106			If regular expression fails to compile
107		"""
108		# Warning: don't use lookahead for line-begin boundary,
109		# (e. g. r"^(?:\W{0,2})?" is much faster as r"(?:^|(?<=^\W)|(?<=^\W{2}))")
110		# because it may be very slow in negative case (by long log-lines not matching pattern)
111
112		regex = regex.strip()
113		boundBegin = wordBegin and not RE_NO_WRD_BOUND_BEG.search(regex)
114		boundEnd = wordEnd and not RE_NO_WRD_BOUND_END.search(regex)
115		# if no group add it now, should always have a group(1):
116		if not RE_GROUPED.search(regex):
117			regex = RE_GROUP[0].sub(RE_GROUP[1], regex)
118		self.flags = 0
119		# if word or line start boundary:
120		if boundBegin:
121			self.flags |= DateTemplate.WORD_BEGIN if wordBegin != 'start' else DateTemplate.LINE_BEGIN
122			if wordBegin != 'start':
123				regex = r'(?=^|\b|\W)' + regex
124			else:
125				regex = r"^(?:\W{0,2})?" + regex
126				if not self.name.startswith('{^LN-BEG}'):
127					self.name = '{^LN-BEG}' + self.name
128		# if word end boundary:
129		if boundEnd:
130			self.flags |= DateTemplate.WORD_END
131			regex += r'(?=\b|\W|$)'
132		if not (self.flags & DateTemplate.LINE_BEGIN) and RE_LINE_BOUND_BEG.search(regex):
133			self.flags |= DateTemplate.LINE_BEGIN
134		if not (self.flags & DateTemplate.LINE_END) and RE_LINE_BOUND_END.search(regex):
135			self.flags |= DateTemplate.LINE_END
136		# remove possible special pattern "**" in front and end of regex:
137		regex = RE_DEL_WRD_BOUNDS[0].sub(RE_DEL_WRD_BOUNDS[1], regex)
138		self._regex = regex
139		logSys.log(7, '  constructed regex %s', regex)
140		self._cRegex = None
141
142	regex = property(getRegex, setRegex, doc=
143		"""Regex used to search for date.
144		""")
145
146	def _compileRegex(self):
147		"""Compile regex by first usage.
148		"""
149		if not self._cRegex:
150			try:
151				# print('*'*10 + (' compile - %-30.30s -- %s' % (getattr(self, 'pattern', self.regex), self.name)))
152				self._cRegex = re.compile(self.regex)
153			except Exception as e:
154				logSys.error('Compile %r failed, expression %r', self.name, self.regex)
155				raise e
156
157	def matchDate(self, line, *args):
158		"""Check if regex for date matches on a log line.
159		"""
160		if not self._cRegex:
161			self._compileRegex()
162		dateMatch = self._cRegex.search(line, *args); # pos, endpos
163		if dateMatch:
164			self.hits += 1
165		# print('*'*10 + ('[%s] - %-30.30s -- %s' % ('*' if dateMatch else ' ', getattr(self, 'pattern', self.regex), self.name)))
166		return dateMatch
167
168	@abstractmethod
169	def getDate(self, line, dateMatch=None, default_tz=None):
170		"""Abstract method, which should return the date for a log line
171
172		This should return the date for a log line, typically taking the
173		date from the part of the line which matched the templates regex.
174		This requires abstraction, therefore just raises exception.
175
176		Parameters
177		----------
178		line : str
179			Log line, of which the date should be extracted from.
180		default_tz: if no explicit time zone is present in the line
181                            passing this will interpret it as in that time zone.
182
183		Raises
184		------
185		NotImplementedError
186			Abstract method, therefore always returns this.
187		"""
188		raise NotImplementedError("getDate() is abstract")
189
190	@staticmethod
191	def unboundPattern(pattern):
192		return RE_EXEANC_BOUND_BEG.sub('',
193			RE_EXSANC_BOUND_BEG.sub('',
194				RE_EXLINE_BOUND_BEG.sub('', RE_EXLINE_NO_BOUNDS.sub('', pattern))
195			)
196		)
197
198
199class DateEpoch(DateTemplate):
200	"""A date template which searches for Unix timestamps.
201
202	This includes Unix timestamps which appear at start of a line, optionally
203	within square braces (nsd), or on SELinux audit log lines.
204
205	Attributes
206	----------
207	name
208	regex
209	"""
210
211	def __init__(self, lineBeginOnly=False, pattern=None, longFrm=False):
212		DateTemplate.__init__(self)
213		self.name = "Epoch" if not pattern else pattern
214		self._longFrm = longFrm;
215		self._grpIdx = 1
216		epochRE = r"\d{10,11}\b(?:\.\d{3,6})?"
217		if longFrm:
218			self.name = "LongEpoch" if not pattern else pattern
219			epochRE = r"\d{10,11}(?:\d{3}(?:\.\d{1,6}|\d{3})?)?"
220		if pattern:
221			# pattern should capture/cut out the whole match:
222			regex = "(" + RE_EPOCH_PATTERN.sub(lambda v: "(%s)" % epochRE, pattern) + ")"
223			self._grpIdx = 2
224			self.setRegex(regex)
225		elif not lineBeginOnly:
226			regex = r"((?:^|(?P<square>(?<=^\[))|(?P<selinux>(?<=\baudit\()))%s)(?:(?(selinux)(?=:\d+\)))|(?(square)(?=\])))" % epochRE
227			self.setRegex(regex, wordBegin=False) ;# already line begin resp. word begin anchored
228		else:
229			regex = r"((?P<square>(?<=^\[))?%s)(?(square)(?=\]))" % epochRE
230			self.setRegex(regex, wordBegin='start', wordEnd=True)
231
232	def getDate(self, line, dateMatch=None, default_tz=None):
233		"""Method to return the date for a log line.
234
235		Parameters
236		----------
237		line : str
238			Log line, of which the date should be extracted from.
239		default_tz: ignored, Unix timestamps are time zone independent
240
241		Returns
242		-------
243		(float, str)
244			Tuple containing a Unix timestamp, and the string of the date
245			which was matched and in turned used to calculated the timestamp.
246		"""
247		if not dateMatch:
248			dateMatch = self.matchDate(line)
249		if dateMatch:
250			v = dateMatch.group(self._grpIdx)
251			# extract part of format which represents seconds since epoch
252			if self._longFrm and len(v) >= 13:
253				if len(v) >= 16 and '.' not in v:
254					v = float(v) / 1000000
255				else:
256					v = float(v) / 1000
257			return (float(v), dateMatch)
258
259
260class DatePatternRegex(DateTemplate):
261	"""Date template, with regex/pattern
262
263	Parameters
264	----------
265	pattern : str
266		Sets the date templates pattern.
267
268	Attributes
269	----------
270	name
271	regex
272	pattern
273	"""
274
275	_patternRE, _patternName = getTimePatternRE()
276	_patternRE = re.compile(_patternRE)
277
278	def __init__(self, pattern=None, **kwargs):
279		super(DatePatternRegex, self).__init__()
280		self._pattern = None
281		if pattern is not None:
282			self.setRegex(pattern, **kwargs)
283
284	@property
285	def pattern(self):
286		"""The pattern used for regex with strptime "%" time fields.
287
288		This should be a valid regular expression, of which matching string
289		will be extracted from the log line. strptime style "%" fields will
290		be replaced by appropriate regular expressions, or custom regex
291		groups with names as per the strptime fields can also be used
292		instead.
293		"""
294		return self._pattern
295
296	@pattern.setter
297	def pattern(self, pattern):
298		self.setRegex(pattern)
299
300	def setRegex(self, pattern, wordBegin=True, wordEnd=True):
301		# original pattern:
302		self._pattern = pattern
303		# if unbound signalled - reset boundaries left and right:
304		if RE_EXLINE_NO_BOUNDS.search(pattern):
305			pattern = RE_EXLINE_NO_BOUNDS.sub('', pattern)
306			wordBegin = wordEnd = False
307		# if explicit given {^LN-BEG} - remove it from pattern and set 'start' in wordBegin:
308		if wordBegin and RE_EXLINE_BOUND_BEG.search(pattern):
309			pattern = RE_EXLINE_BOUND_BEG.sub('', pattern)
310			wordBegin = 'start'
311		try:
312			# wrap to regex:
313			fmt = self._patternRE.sub(r'%(\1)s', pattern)
314			self.name = fmt % self._patternName
315			regex = fmt % timeRE
316			# if expected add (?iu) for "ignore case" and "unicode":
317			if RE_ALPHA_PATTERN.search(pattern):
318				regex = r'(?iu)' + regex
319			super(DatePatternRegex, self).setRegex(regex, wordBegin, wordEnd)
320		except Exception as e:
321			raise TypeError("Failed to set datepattern '%s' (may be an invalid format or unescaped percent char): %s" % (pattern, e))
322
323	def getDate(self, line, dateMatch=None, default_tz=None):
324		"""Method to return the date for a log line.
325
326		This uses a custom version of strptime, using the named groups
327		from the instances `pattern` property.
328
329		Parameters
330		----------
331		line : str
332			Log line, of which the date should be extracted from.
333		default_tz: optionally used to correct timezone
334
335		Returns
336		-------
337		(float, str)
338			Tuple containing a Unix timestamp, and the string of the date
339			which was matched and in turned used to calculated the timestamp.
340		"""
341		if not dateMatch:
342			dateMatch = self.matchDate(line)
343		if dateMatch:
344			return (reGroupDictStrptime(dateMatch.groupdict(), default_tz=default_tz),
345				dateMatch)
346
347
348class DateTai64n(DateTemplate):
349	"""A date template which matches TAI64N formate timestamps.
350
351	Attributes
352	----------
353	name
354	regex
355	"""
356
357	def __init__(self, wordBegin=False):
358		DateTemplate.__init__(self)
359		self.name = "TAI64N"
360		# We already know the format for TAI64N
361		self.setRegex("@[0-9a-f]{24}", wordBegin=wordBegin)
362
363	def getDate(self, line, dateMatch=None, default_tz=None):
364		"""Method to return the date for a log line.
365
366		Parameters
367		----------
368		line : str
369			Log line, of which the date should be extracted from.
370		default_tz: ignored, since TAI is time zone independent
371
372		Returns
373		-------
374		(float, str)
375			Tuple containing a Unix timestamp, and the string of the date
376			which was matched and in turned used to calculated the timestamp.
377		"""
378		if not dateMatch:
379			dateMatch = self.matchDate(line)
380		if dateMatch:
381			# extract part of format which represents seconds since epoch
382			value = dateMatch.group(1)
383			seconds_since_epoch = value[2:17]
384			# convert seconds from HEX into local time stamp
385			return (int(seconds_since_epoch, 16), dateMatch)
386