1package log
2
3import (
4	"bytes"
5	"fmt"
6	"regexp"
7	"regexp/syntax"
8
9	"github.com/prometheus/prometheus/pkg/labels"
10)
11
12// Filterer is a interface to filter log lines.
13type Filterer interface {
14	Filter(line []byte) bool
15	ToStage() Stage
16}
17
18// LineFilterFunc is a syntax sugar for creating line filter from a function
19type FiltererFunc func(line []byte) bool
20
21func (f FiltererFunc) Filter(line []byte) bool {
22	return f(line)
23}
24
25type trueFilter struct{}
26
27func (trueFilter) Filter(_ []byte) bool { return true }
28func (trueFilter) ToStage() Stage       { return NoopStage }
29
30// TrueFilter is a filter that returns and matches all log lines whatever their content.
31var TrueFilter = trueFilter{}
32
33type notFilter struct {
34	Filterer
35}
36
37func (n notFilter) Filter(line []byte) bool {
38	return !n.Filterer.Filter(line)
39}
40
41func (n notFilter) ToStage() Stage {
42	return StageFunc{
43		process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) {
44			return line, n.Filter(line)
45		},
46	}
47}
48
49// newNotFilter creates a new filter which matches only if the base filter doesn't match.
50// If the base filter is a `or` it will recursively simplify with `and` operations.
51func newNotFilter(base Filterer) Filterer {
52	// not(a|b) = not(a) and not(b) , and operation can't benefit from this optimization because both legs always needs to be executed.
53	if or, ok := base.(orFilter); ok {
54		return NewAndFilter(newNotFilter(or.left), newNotFilter(or.right))
55	}
56	return notFilter{Filterer: base}
57}
58
59type andFilter struct {
60	left  Filterer
61	right Filterer
62}
63
64// NewAndFilter creates a new filter which matches only if left and right matches.
65func NewAndFilter(left Filterer, right Filterer) Filterer {
66	// Make sure we take care of panics in case a nil or noop filter is passed.
67	if right == nil || right == TrueFilter {
68		return left
69	}
70
71	if left == nil || left == TrueFilter {
72		return right
73	}
74
75	return andFilter{
76		left:  left,
77		right: right,
78	}
79}
80
81func (a andFilter) Filter(line []byte) bool {
82	return a.left.Filter(line) && a.right.Filter(line)
83}
84
85func (a andFilter) ToStage() Stage {
86	return StageFunc{
87		process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) {
88			return line, a.Filter(line)
89		},
90	}
91}
92
93type orFilter struct {
94	left  Filterer
95	right Filterer
96}
97
98// newOrFilter creates a new filter which matches only if left or right matches.
99func newOrFilter(left Filterer, right Filterer) Filterer {
100	if left == nil || left == TrueFilter {
101		return right
102	}
103
104	if right == nil || right == TrueFilter {
105		return left
106	}
107
108	return orFilter{
109		left:  left,
110		right: right,
111	}
112}
113
114// chainOrFilter is a syntax sugar to chain multiple `or` filters. (1 or many)
115func chainOrFilter(curr, new Filterer) Filterer {
116	if curr == nil {
117		return new
118	}
119	return newOrFilter(curr, new)
120}
121
122func (a orFilter) Filter(line []byte) bool {
123	return a.left.Filter(line) || a.right.Filter(line)
124}
125
126func (a orFilter) ToStage() Stage {
127	return StageFunc{
128		process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) {
129			return line, a.Filter(line)
130		},
131	}
132}
133
134type regexpFilter struct {
135	*regexp.Regexp
136}
137
138// newRegexpFilter creates a new line filter for a given regexp.
139// If match is false the filter is the negation of the regexp.
140func newRegexpFilter(re string, match bool) (Filterer, error) {
141	reg, err := regexp.Compile(re)
142	if err != nil {
143		return nil, err
144	}
145	f := regexpFilter{reg}
146	if match {
147		return f, nil
148	}
149	return newNotFilter(f), nil
150}
151
152func (r regexpFilter) Filter(line []byte) bool {
153	return r.Match(line)
154}
155
156func (r regexpFilter) ToStage() Stage {
157	return StageFunc{
158		process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) {
159			return line, r.Filter(line)
160		},
161	}
162}
163
164type containsFilter struct {
165	match           []byte
166	caseInsensitive bool
167}
168
169func (l containsFilter) Filter(line []byte) bool {
170	if l.caseInsensitive {
171		line = bytes.ToLower(line)
172	}
173	return bytes.Contains(line, l.match)
174}
175
176func (l containsFilter) ToStage() Stage {
177	return StageFunc{
178		process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) {
179			return line, l.Filter(line)
180		},
181	}
182}
183
184func (l containsFilter) String() string {
185	return string(l.match)
186}
187
188// newContainsFilter creates a contains filter that checks if a log line contains a match.
189func newContainsFilter(match []byte, caseInsensitive bool) Filterer {
190	if len(match) == 0 {
191		return TrueFilter
192	}
193	if caseInsensitive {
194		match = bytes.ToLower(match)
195	}
196	return containsFilter{
197		match:           match,
198		caseInsensitive: caseInsensitive,
199	}
200}
201
202// NewFilter creates a new line filter from a match string and type.
203func NewFilter(match string, mt labels.MatchType) (Filterer, error) {
204	switch mt {
205	case labels.MatchRegexp:
206		return parseRegexpFilter(match, true)
207	case labels.MatchNotRegexp:
208		return parseRegexpFilter(match, false)
209	case labels.MatchEqual:
210		return newContainsFilter([]byte(match), false), nil
211	case labels.MatchNotEqual:
212		return newNotFilter(newContainsFilter([]byte(match), false)), nil
213	default:
214		return nil, fmt.Errorf("unknown matcher: %v", match)
215	}
216}
217
218// parseRegexpFilter parses a regexp and attempt to simplify it with only literal filters.
219// If not possible it will returns the original regexp filter.
220func parseRegexpFilter(re string, match bool) (Filterer, error) {
221	reg, err := syntax.Parse(re, syntax.Perl)
222	if err != nil {
223		return nil, err
224	}
225	reg = reg.Simplify()
226
227	// attempt to improve regex with tricks
228	f, ok := simplify(reg)
229	if !ok {
230		return newRegexpFilter(re, match)
231	}
232	if match {
233		return f, nil
234	}
235	return newNotFilter(f), nil
236}
237
238// simplify a regexp expression by replacing it, when possible, with a succession of literal filters.
239// For example `(foo|bar)` will be replaced by  `containsFilter(foo) or containsFilter(bar)`
240func simplify(reg *syntax.Regexp) (Filterer, bool) {
241	switch reg.Op {
242	case syntax.OpAlternate:
243		return simplifyAlternate(reg)
244	case syntax.OpConcat:
245		return simplifyConcat(reg, nil)
246	case syntax.OpCapture:
247		clearCapture(reg)
248		return simplify(reg)
249	case syntax.OpLiteral:
250		return newContainsFilter([]byte(string((reg.Rune))), isCaseInsensitive(reg)), true
251	case syntax.OpStar:
252		if reg.Sub[0].Op == syntax.OpAnyCharNotNL {
253			return TrueFilter, true
254		}
255	case syntax.OpEmptyMatch:
256		return TrueFilter, true
257	}
258	return nil, false
259}
260
261func isCaseInsensitive(reg *syntax.Regexp) bool {
262	return (reg.Flags & syntax.FoldCase) != 0
263}
264
265// clearCapture removes capture operation as they are not used for filtering.
266func clearCapture(regs ...*syntax.Regexp) {
267	for _, r := range regs {
268		if r.Op == syntax.OpCapture {
269			*r = *r.Sub[0]
270		}
271	}
272}
273
274// simplifyAlternate simplifies, when possible, alternate regexp expressions such as:
275// (foo|bar) or (foo|(bar|buzz)).
276func simplifyAlternate(reg *syntax.Regexp) (Filterer, bool) {
277	clearCapture(reg.Sub...)
278	// attempt to simplify the first leg
279	f, ok := simplify(reg.Sub[0])
280	if !ok {
281		return nil, false
282	}
283	// merge the rest of the legs
284	for i := 1; i < len(reg.Sub); i++ {
285		f2, ok := simplify(reg.Sub[i])
286		if !ok {
287			return nil, false
288		}
289		f = newOrFilter(f, f2)
290	}
291	return f, true
292}
293
294// simplifyConcat attempt to simplify concat operations.
295// Concat operations are either literal and star such as foo.* .*foo.* .*foo
296// which is a literalFilter.
297// Or a literal and alternates operation (see simplifyConcatAlternate), which represent a multiplication of alternates.
298// Anything else is rejected.
299func simplifyConcat(reg *syntax.Regexp, baseLiteral []byte) (Filterer, bool) {
300	clearCapture(reg.Sub...)
301	// we support only simplication of concat operation with 3 sub expressions.
302	// for instance .*foo.*bar contains 4 subs (.*+foo+.*+bar) and can't be simplified.
303	if len(reg.Sub) > 3 {
304		return nil, false
305	}
306
307	var curr Filterer
308	var ok bool
309	literals := 0
310	for _, sub := range reg.Sub {
311		if sub.Op == syntax.OpLiteral {
312			// only one literal is allowed.
313			if literals != 0 {
314				return nil, false
315			}
316			literals++
317			baseLiteral = append(baseLiteral, []byte(string(sub.Rune))...)
318			continue
319		}
320		// if we have an alternate we must also have a base literal to apply the concatenation with.
321		if sub.Op == syntax.OpAlternate && baseLiteral != nil {
322			if curr, ok = simplifyConcatAlternate(sub, baseLiteral, curr); !ok {
323				return nil, false
324			}
325			continue
326		}
327		if sub.Op == syntax.OpStar && sub.Sub[0].Op == syntax.OpAnyCharNotNL {
328			continue
329		}
330		return nil, false
331	}
332
333	// if we have a filter from concat alternates.
334	if curr != nil {
335		return curr, true
336	}
337
338	// if we have only a concat with literals.
339	if baseLiteral != nil {
340		return newContainsFilter(baseLiteral, isCaseInsensitive(reg)), true
341	}
342
343	return nil, false
344}
345
346// simplifyConcatAlternate simplifies concat alternate operations.
347// A concat alternate is found when a concat operation has a sub alternate and is preceded by a literal.
348// For instance bar|b|buzz is expressed as b(ar|(?:)|uzz) => b concat alternate(ar,(?:),uzz).
349// (?:) being an OpEmptyMatch and b being the literal to concat all alternates (ar,(?:),uzz) with.
350func simplifyConcatAlternate(reg *syntax.Regexp, literal []byte, curr Filterer) (Filterer, bool) {
351	for _, alt := range reg.Sub {
352		switch alt.Op {
353		case syntax.OpEmptyMatch:
354			curr = chainOrFilter(curr, newContainsFilter(literal, isCaseInsensitive(reg)))
355		case syntax.OpLiteral:
356			// concat the root literal with the alternate one.
357			altBytes := []byte(string(alt.Rune))
358			altLiteral := make([]byte, 0, len(literal)+len(altBytes))
359			altLiteral = append(altLiteral, literal...)
360			altLiteral = append(altLiteral, altBytes...)
361			curr = chainOrFilter(curr, newContainsFilter(altLiteral, isCaseInsensitive(reg)))
362		case syntax.OpConcat:
363			f, ok := simplifyConcat(alt, literal)
364			if !ok {
365				return nil, false
366			}
367			curr = chainOrFilter(curr, f)
368		case syntax.OpStar:
369			if alt.Sub[0].Op != syntax.OpAnyCharNotNL {
370				return nil, false
371			}
372			curr = chainOrFilter(curr, newContainsFilter(literal, isCaseInsensitive(reg)))
373		default:
374			return nil, false
375		}
376	}
377	if curr != nil {
378		return curr, true
379	}
380	return nil, false
381}
382