1#!python 2# Copyright (c) gocept gmbh & co. kg 3# See also LICENSE.txt 4 5"""haproxy log check for request time and error rate. 6 7This check shows an advanced programming technique: we allow the user to 8define the thresholds dynamically. You can specify a list of conditions 9like: 10 11* the N1th percentile of t_tot must match range R1 12* the N2th percentile of t_tot must match range R2 13 14Implementation-wise, the command line parameter "percentiles" is used to 15compute both metric and context names. The default is to check for the 1650th and 95th percentile. The `MultiArg` class is used to specify sets 17of thresholds. It has the nice property to fill up missing values so the 18user is free in how many thresholds he specifies. 19""" 20 21import argparse 22import itertools 23import nagiosplugin 24import numpy 25import re 26 27 28class HAProxyLog(nagiosplugin.Resource): 29 """haproxy.log parser. 30 31 Goes through a haproxy log file and extracts total request time 32 (t_tot) and error status for each request. The error status is used 33 to compute the error rate. 34 """ 35 36 r_logline = re.compile( 37 r'haproxy.*: [0-9.:]+ \[\S+\] .* \d+/\d+/\d+/\d+/(\d+) (\d\d\d) ') 38 39 def __init__(self, logfile, statefile, percentiles): 40 self.logfile = logfile 41 self.statefile = statefile 42 self.percentiles = percentiles 43 44 def parse_log(self): 45 """Yields ttot and error status for each log line.""" 46 cookie = nagiosplugin.Cookie(self.statefile) 47 with nagiosplugin.LogTail(self.logfile, cookie) as lf: 48 for line in lf: 49 match = self.r_logline.search(line.decode()) 50 if not match: 51 continue 52 ttot, stat = match.groups() 53 err = not (stat.startswith('2') or stat.startswith('3')) 54 yield int(ttot), err 55 56 def probe(self): 57 """Computes error rate and t_tot percentiles.""" 58 d = numpy.fromiter(self.parse_log(), dtype=[ 59 ('ttot', numpy.int32), ('err', numpy.uint16)]) 60 requests = len(d['err']) 61 metrics = [] 62 if requests: 63 for pct in self.percentiles: 64 metrics.append(nagiosplugin.Metric( 65 'ttot%s' % pct, numpy.percentile( 66 d['ttot'], int(pct)) / 1000.0, 's', 0)) 67 error_rate = (100 * numpy.sum(d['err'] / requests) 68 if requests else 0) 69 metrics += [nagiosplugin.Metric('error_rate', error_rate, '%', 0, 100), 70 nagiosplugin.Metric('request_total', requests, min=0, 71 context='default')] 72 return metrics 73 74 75def parse_args(): 76 argp = argparse.ArgumentParser() 77 argp.add_argument('logfile') 78 argp.add_argument('--ew', '--error-warning', metavar='RANGE', default='') 79 argp.add_argument('--ec', '--error-critical', metavar='RANGE', default='') 80 argp.add_argument('--tw', '--ttot-warning', metavar='RANGE[,RANGE,...]', 81 type=nagiosplugin.MultiArg, default='') 82 argp.add_argument('--tc', '--ttot-critical', metavar='RANGE[,RANGE,...]', 83 type=nagiosplugin.MultiArg, default='') 84 argp.add_argument('-p', '--percentiles', metavar='N,N,...', 85 default='50,95', help='check Nth percentiles of ' 86 'total time (default: %(default)s)') 87 argp.add_argument('-v', '--verbose', action='count', default=0, 88 help='increase output verbosity (use up to 3 times)') 89 argp.add_argument('-t', '--timeout', default=30, 90 help='abort execution after TIMEOUT seconds') 91 argp.add_argument('-s', '--state-file', default='check_haproxy_log.state', 92 help='cookie file to save last log file position ' 93 '(default: "%(default)s")') 94 return argp.parse_args() 95 96 97@nagiosplugin.guarded 98def main(): 99 args = parse_args() 100 percentiles = args.percentiles.split(',') 101 check = nagiosplugin.Check( 102 HAProxyLog(args.logfile, args.state_file, percentiles), 103 nagiosplugin.ScalarContext('error_rate', args.ew, args.ec)) 104 for pct, i in zip(percentiles, itertools.count()): 105 check.add(nagiosplugin.ScalarContext( 106 'ttot%s' % pct, args.tw[i], args.tc[i], 107 'total time (%s.pct) is {valueunit}' % pct)) 108 check.main(args.verbose, args.timeout) 109 110if __name__ == '__main__': 111 main() 112