1package log 2 3import ( 4 "bytes" 5 "fmt" 6 "regexp" 7 "regexp/syntax" 8 9 "github.com/prometheus/prometheus/pkg/labels" 10) 11 12// Filterer is a interface to filter log lines. 13type Filterer interface { 14 Filter(line []byte) bool 15 ToStage() Stage 16} 17 18// LineFilterFunc is a syntax sugar for creating line filter from a function 19type FiltererFunc func(line []byte) bool 20 21func (f FiltererFunc) Filter(line []byte) bool { 22 return f(line) 23} 24 25type trueFilter struct{} 26 27func (trueFilter) Filter(_ []byte) bool { return true } 28func (trueFilter) ToStage() Stage { return NoopStage } 29 30// TrueFilter is a filter that returns and matches all log lines whatever their content. 31var TrueFilter = trueFilter{} 32 33type notFilter struct { 34 Filterer 35} 36 37func (n notFilter) Filter(line []byte) bool { 38 return !n.Filterer.Filter(line) 39} 40 41func (n notFilter) ToStage() Stage { 42 return StageFunc{ 43 process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) { 44 return line, n.Filter(line) 45 }, 46 } 47} 48 49// newNotFilter creates a new filter which matches only if the base filter doesn't match. 50// If the base filter is a `or` it will recursively simplify with `and` operations. 51func newNotFilter(base Filterer) Filterer { 52 // not(a|b) = not(a) and not(b) , and operation can't benefit from this optimization because both legs always needs to be executed. 53 if or, ok := base.(orFilter); ok { 54 return NewAndFilter(newNotFilter(or.left), newNotFilter(or.right)) 55 } 56 return notFilter{Filterer: base} 57} 58 59type andFilter struct { 60 left Filterer 61 right Filterer 62} 63 64// NewAndFilter creates a new filter which matches only if left and right matches. 65func NewAndFilter(left Filterer, right Filterer) Filterer { 66 // Make sure we take care of panics in case a nil or noop filter is passed. 67 if right == nil || right == TrueFilter { 68 return left 69 } 70 71 if left == nil || left == TrueFilter { 72 return right 73 } 74 75 return andFilter{ 76 left: left, 77 right: right, 78 } 79} 80 81func (a andFilter) Filter(line []byte) bool { 82 return a.left.Filter(line) && a.right.Filter(line) 83} 84 85func (a andFilter) ToStage() Stage { 86 return StageFunc{ 87 process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) { 88 return line, a.Filter(line) 89 }, 90 } 91} 92 93type orFilter struct { 94 left Filterer 95 right Filterer 96} 97 98// newOrFilter creates a new filter which matches only if left or right matches. 99func newOrFilter(left Filterer, right Filterer) Filterer { 100 if left == nil || left == TrueFilter { 101 return right 102 } 103 104 if right == nil || right == TrueFilter { 105 return left 106 } 107 108 return orFilter{ 109 left: left, 110 right: right, 111 } 112} 113 114// chainOrFilter is a syntax sugar to chain multiple `or` filters. (1 or many) 115func chainOrFilter(curr, new Filterer) Filterer { 116 if curr == nil { 117 return new 118 } 119 return newOrFilter(curr, new) 120} 121 122func (a orFilter) Filter(line []byte) bool { 123 return a.left.Filter(line) || a.right.Filter(line) 124} 125 126func (a orFilter) ToStage() Stage { 127 return StageFunc{ 128 process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) { 129 return line, a.Filter(line) 130 }, 131 } 132} 133 134type regexpFilter struct { 135 *regexp.Regexp 136} 137 138// newRegexpFilter creates a new line filter for a given regexp. 139// If match is false the filter is the negation of the regexp. 140func newRegexpFilter(re string, match bool) (Filterer, error) { 141 reg, err := regexp.Compile(re) 142 if err != nil { 143 return nil, err 144 } 145 f := regexpFilter{reg} 146 if match { 147 return f, nil 148 } 149 return newNotFilter(f), nil 150} 151 152func (r regexpFilter) Filter(line []byte) bool { 153 return r.Match(line) 154} 155 156func (r regexpFilter) ToStage() Stage { 157 return StageFunc{ 158 process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) { 159 return line, r.Filter(line) 160 }, 161 } 162} 163 164type containsFilter struct { 165 match []byte 166 caseInsensitive bool 167} 168 169func (l containsFilter) Filter(line []byte) bool { 170 if l.caseInsensitive { 171 line = bytes.ToLower(line) 172 } 173 return bytes.Contains(line, l.match) 174} 175 176func (l containsFilter) ToStage() Stage { 177 return StageFunc{ 178 process: func(line []byte, _ *LabelsBuilder) ([]byte, bool) { 179 return line, l.Filter(line) 180 }, 181 } 182} 183 184func (l containsFilter) String() string { 185 return string(l.match) 186} 187 188// newContainsFilter creates a contains filter that checks if a log line contains a match. 189func newContainsFilter(match []byte, caseInsensitive bool) Filterer { 190 if len(match) == 0 { 191 return TrueFilter 192 } 193 if caseInsensitive { 194 match = bytes.ToLower(match) 195 } 196 return containsFilter{ 197 match: match, 198 caseInsensitive: caseInsensitive, 199 } 200} 201 202// NewFilter creates a new line filter from a match string and type. 203func NewFilter(match string, mt labels.MatchType) (Filterer, error) { 204 switch mt { 205 case labels.MatchRegexp: 206 return parseRegexpFilter(match, true) 207 case labels.MatchNotRegexp: 208 return parseRegexpFilter(match, false) 209 case labels.MatchEqual: 210 return newContainsFilter([]byte(match), false), nil 211 case labels.MatchNotEqual: 212 return newNotFilter(newContainsFilter([]byte(match), false)), nil 213 default: 214 return nil, fmt.Errorf("unknown matcher: %v", match) 215 } 216} 217 218// parseRegexpFilter parses a regexp and attempt to simplify it with only literal filters. 219// If not possible it will returns the original regexp filter. 220func parseRegexpFilter(re string, match bool) (Filterer, error) { 221 reg, err := syntax.Parse(re, syntax.Perl) 222 if err != nil { 223 return nil, err 224 } 225 reg = reg.Simplify() 226 227 // attempt to improve regex with tricks 228 f, ok := simplify(reg) 229 if !ok { 230 return newRegexpFilter(re, match) 231 } 232 if match { 233 return f, nil 234 } 235 return newNotFilter(f), nil 236} 237 238// simplify a regexp expression by replacing it, when possible, with a succession of literal filters. 239// For example `(foo|bar)` will be replaced by `containsFilter(foo) or containsFilter(bar)` 240func simplify(reg *syntax.Regexp) (Filterer, bool) { 241 switch reg.Op { 242 case syntax.OpAlternate: 243 return simplifyAlternate(reg) 244 case syntax.OpConcat: 245 return simplifyConcat(reg, nil) 246 case syntax.OpCapture: 247 clearCapture(reg) 248 return simplify(reg) 249 case syntax.OpLiteral: 250 return newContainsFilter([]byte(string((reg.Rune))), isCaseInsensitive(reg)), true 251 case syntax.OpStar: 252 if reg.Sub[0].Op == syntax.OpAnyCharNotNL { 253 return TrueFilter, true 254 } 255 case syntax.OpEmptyMatch: 256 return TrueFilter, true 257 } 258 return nil, false 259} 260 261func isCaseInsensitive(reg *syntax.Regexp) bool { 262 return (reg.Flags & syntax.FoldCase) != 0 263} 264 265// clearCapture removes capture operation as they are not used for filtering. 266func clearCapture(regs ...*syntax.Regexp) { 267 for _, r := range regs { 268 if r.Op == syntax.OpCapture { 269 *r = *r.Sub[0] 270 } 271 } 272} 273 274// simplifyAlternate simplifies, when possible, alternate regexp expressions such as: 275// (foo|bar) or (foo|(bar|buzz)). 276func simplifyAlternate(reg *syntax.Regexp) (Filterer, bool) { 277 clearCapture(reg.Sub...) 278 // attempt to simplify the first leg 279 f, ok := simplify(reg.Sub[0]) 280 if !ok { 281 return nil, false 282 } 283 // merge the rest of the legs 284 for i := 1; i < len(reg.Sub); i++ { 285 f2, ok := simplify(reg.Sub[i]) 286 if !ok { 287 return nil, false 288 } 289 f = newOrFilter(f, f2) 290 } 291 return f, true 292} 293 294// simplifyConcat attempt to simplify concat operations. 295// Concat operations are either literal and star such as foo.* .*foo.* .*foo 296// which is a literalFilter. 297// Or a literal and alternates operation (see simplifyConcatAlternate), which represent a multiplication of alternates. 298// Anything else is rejected. 299func simplifyConcat(reg *syntax.Regexp, baseLiteral []byte) (Filterer, bool) { 300 clearCapture(reg.Sub...) 301 // we support only simplication of concat operation with 3 sub expressions. 302 // for instance .*foo.*bar contains 4 subs (.*+foo+.*+bar) and can't be simplified. 303 if len(reg.Sub) > 3 { 304 return nil, false 305 } 306 307 var curr Filterer 308 var ok bool 309 literals := 0 310 for _, sub := range reg.Sub { 311 if sub.Op == syntax.OpLiteral { 312 // only one literal is allowed. 313 if literals != 0 { 314 return nil, false 315 } 316 literals++ 317 baseLiteral = append(baseLiteral, []byte(string(sub.Rune))...) 318 continue 319 } 320 // if we have an alternate we must also have a base literal to apply the concatenation with. 321 if sub.Op == syntax.OpAlternate && baseLiteral != nil { 322 if curr, ok = simplifyConcatAlternate(sub, baseLiteral, curr); !ok { 323 return nil, false 324 } 325 continue 326 } 327 if sub.Op == syntax.OpStar && sub.Sub[0].Op == syntax.OpAnyCharNotNL { 328 continue 329 } 330 return nil, false 331 } 332 333 // if we have a filter from concat alternates. 334 if curr != nil { 335 return curr, true 336 } 337 338 // if we have only a concat with literals. 339 if baseLiteral != nil { 340 return newContainsFilter(baseLiteral, isCaseInsensitive(reg)), true 341 } 342 343 return nil, false 344} 345 346// simplifyConcatAlternate simplifies concat alternate operations. 347// A concat alternate is found when a concat operation has a sub alternate and is preceded by a literal. 348// For instance bar|b|buzz is expressed as b(ar|(?:)|uzz) => b concat alternate(ar,(?:),uzz). 349// (?:) being an OpEmptyMatch and b being the literal to concat all alternates (ar,(?:),uzz) with. 350func simplifyConcatAlternate(reg *syntax.Regexp, literal []byte, curr Filterer) (Filterer, bool) { 351 for _, alt := range reg.Sub { 352 switch alt.Op { 353 case syntax.OpEmptyMatch: 354 curr = chainOrFilter(curr, newContainsFilter(literal, isCaseInsensitive(reg))) 355 case syntax.OpLiteral: 356 // concat the root literal with the alternate one. 357 altBytes := []byte(string(alt.Rune)) 358 altLiteral := make([]byte, 0, len(literal)+len(altBytes)) 359 altLiteral = append(altLiteral, literal...) 360 altLiteral = append(altLiteral, altBytes...) 361 curr = chainOrFilter(curr, newContainsFilter(altLiteral, isCaseInsensitive(reg))) 362 case syntax.OpConcat: 363 f, ok := simplifyConcat(alt, literal) 364 if !ok { 365 return nil, false 366 } 367 curr = chainOrFilter(curr, f) 368 case syntax.OpStar: 369 if alt.Sub[0].Op != syntax.OpAnyCharNotNL { 370 return nil, false 371 } 372 curr = chainOrFilter(curr, newContainsFilter(literal, isCaseInsensitive(reg))) 373 default: 374 return nil, false 375 } 376 } 377 if curr != nil { 378 return curr, true 379 } 380 return nil, false 381} 382