1#!/bin/sh 2# 3# plugin for munin to monitor usage of unbound servers. 4# To install copy this to /usr/local/share/munin/plugins/unbound_munin_ 5# and use munin-node-configure (--suggest, --shell). 6# 7# (C) 2008 W.C.A. Wijngaards. BSD Licensed. 8# 9# To install; enable statistics and unbound-control in unbound.conf 10# server: extended-statistics: yes 11# statistics-cumulative: no 12# statistics-interval: 0 13# remote-control: control-enable: yes 14# Run the command unbound-control-setup to generate the key files. 15# 16# Environment variables for this script 17# unbound_conf - where the unbound.conf file is located. 18# unbound_control - where to find unbound-control executable. 19# spoof_warn - what level to warn about spoofing 20# spoof_crit - what level to crit about spoofing 21# 22# You can set them in your munin/plugin-conf.d/plugins.conf file 23# with: 24# [unbound*] 25# user root 26# env.unbound_conf /usr/local/etc/unbound/unbound.conf 27# env.unbound_control /usr/local/sbin/unbound-control 28# env.spoof_warn 1000 29# env.spoof_crit 100000 30# 31# This plugin can create different graphs depending on what name 32# you link it as (with ln -s) into the plugins directory 33# You can link it multiple times. 34# If you are only a casual user, the _hits and _by_type are most interesting, 35# possibly followed by _by_rcode. 36# 37# unbound_munin_hits - base volume, cache hits, unwanted traffic 38# unbound_munin_queue - to monitor the internal requestlist 39# unbound_munin_memory - memory usage 40# unbound_munin_by_type - incoming queries by type 41# unbound_munin_by_class - incoming queries by class 42# unbound_munin_by_opcode - incoming queries by opcode 43# unbound_munin_by_rcode - answers by rcode, validation status 44# unbound_munin_by_flags - incoming queries by flags 45# unbound_munin_histogram - histogram of query resolving times 46# 47# Magic markers - optional - used by installation scripts and 48# munin-config: (originally contrib family but munin-node-configure ignores it) 49# 50#%# family=auto 51#%# capabilities=autoconf suggest 52 53# POD documentation 54: <<=cut 55=head1 NAME 56 57unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver. 58 59=head1 APPLICABLE SYSTEMS 60 61System with unbound daemon. 62 63=head1 CONFIGURATION 64 65 [unbound*] 66 user root 67 env.unbound_conf /usr/local/etc/unbound/unbound.conf 68 env.unbound_control /usr/local/sbin/unbound-control 69 env.spoof_warn 1000 70 env.spoof_crit 100000 71 72Use the .env settings to override the defaults. 73 74=head1 USAGE 75 76Can be used to present different graphs. Use ln -s for that name in 77the plugins directory to enable the graph. 78unbound_munin_hits - base volume, cache hits, unwanted traffic 79unbound_munin_queue - to monitor the internal requestlist 80unbound_munin_memory - memory usage 81unbound_munin_by_type - incoming queries by type 82unbound_munin_by_class - incoming queries by class 83unbound_munin_by_opcode - incoming queries by opcode 84unbound_munin_by_rcode - answers by rcode, validation status 85unbound_munin_by_flags - incoming queries by flags 86unbound_munin_histogram - histogram of query resolving times 87 88=head1 AUTHOR 89 90Copyright 2008 W.C.A. Wijngaards 91 92=head1 LICENSE 93 94BSD 95 96=cut 97 98state="${MUNIN_PLUGSTATE}/unbound.state" 99seentags="${MUNIN_PLUGSTATE}/unbound-seentags.state" 100conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf} 101ctrl=${unbound_control:-/usr/local/sbin/unbound-control} 102warn=${spoof_warn:-1000} 103crit=${spoof_crit:-100000} 104lock=$state.lock 105 106# number of seconds between polling attempts. 107# makes the statefile hang around for at least this many seconds, 108# so that multiple links of this script can share the results. 109lee=55 110 111# to keep things within 19 characters 112ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/" 113 114# get value from $1 into return variable $value 115get_value ( ) { 116 value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`" 117 if test "$value"x = ""x; then 118 value="0" 119 fi 120} 121 122# Update list of seen query types etc to seentags file. This is run while 123# holding the lock, after the state file is updated. 124update_seentags() { 125 tmplist="$(cat ${seentags} 2> /dev/null) 126num.query.type.A 127num.query.class.IN 128num.query.opcode.QUERY 129num.answer.rcode.NOERROR 130" 131 (echo "${tmplist}"; grep ^num ${state} | sed -e 's/=.*//') | sort -u > ${seentags} 132} 133 134# download the state from the unbound server. 135get_state ( ) { 136 # obtain lock for fetching the state 137 # because there is a race condition in fetching and writing to file 138 139 # see if the lock is stale, if so, take it 140 if test -f $lock ; then 141 pid="`cat $lock 2>&1`" 142 kill -0 "$pid" >/dev/null 2>&1 143 if test $? -ne 0 -a "$pid" != $$ ; then 144 echo $$ >$lock 145 fi 146 fi 147 148 i=0 149 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do 150 while test -f $lock; do 151 # wait 152 i=`expr $i + 1` 153 if test $i -gt 1000; then 154 sleep 1; 155 fi 156 if test $i -gt 1500; then 157 echo "error locking $lock" "=" `cat $lock` 158 rm -f $lock 159 exit 1 160 fi 161 done 162 # try to get it 163 if echo $$ >$lock ; then : ; else break; fi 164 done 165 # do not refetch if the file exists and only LEE seconds old 166 if test -f $state; then 167 now=`date +%s` 168 get_value "time.now" 169 value="`echo $value | sed -e 's/\..*$//'`" 170 if test $now -lt `expr $value + $lee`; then 171 rm -f $lock 172 return 173 fi 174 fi 175 $ctrl -c $conf stats > $state 176 if test $? -ne 0; then 177 echo "error retrieving data from unbound server" 178 rm -f $lock 179 exit 1 180 fi 181 update_seentags 182 rm -f $lock 183} 184 185if test "$1" = "autoconf" ; then 186 if test ! -f $conf; then 187 echo no "($conf does not exist)" 188 exit 0 189 fi 190 if test ! -d `dirname $state`; then 191 echo no "(`dirname $state` directory does not exist)" 192 exit 0 193 fi 194 echo yes 195 exit 0 196fi 197 198if test "$1" = "suggest" ; then 199 echo "hits" 200 echo "queue" 201 echo "memory" 202 echo "by_type" 203 echo "by_class" 204 echo "by_opcode" 205 echo "by_rcode" 206 echo "by_flags" 207 echo "histogram" 208 exit 0 209fi 210 211# determine my type, by name 212id=`echo $0 | sed -e 's/^.*unbound_munin_//'` 213if test "$id"x = ""x; then 214 # some default to keep people sane. 215 id="hits" 216fi 217 218# if $1 exists in statefile, config is echoed with label $2 219exist_config ( ) { 220 mn=`echo $1 | sed $ABBREV | tr . _` 221 if grep '^'$1'=' $state >/dev/null 2>&1; then 222 echo "$mn.label $2" 223 echo "$mn.min 0" 224 echo "$mn.type ABSOLUTE" 225 fi 226} 227 228# print label and min 0 for a name $1 in unbound format 229p_config ( ) { 230 mn=`echo $1 | sed $ABBREV | tr . _` 231 echo $mn.label "$2" 232 echo $mn.min 0 233 echo $mn.type $3 234} 235 236if test "$1" = "config" ; then 237 if test ! -f $state; then 238 get_state 239 fi 240 case $id in 241 hits) 242 echo "graph_title Unbound DNS traffic and cache hits" 243 echo "graph_args --base 1000 -l 0" 244 echo "graph_vlabel queries / \${graph_period}" 245 echo "graph_scale no" 246 echo "graph_category dns" 247 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state | 248 sed -e 's/=.*//'`; do 249 exist_config $x "queries handled by `basename $x .num.queries`" 250 done 251 p_config "total.num.queries" "total queries from clients" "ABSOLUTE" 252 p_config "total.num.cachehits" "cache hits" "ABSOLUTE" 253 p_config "total.num.prefetch" "cache prefetch" "ABSOLUTE" 254 p_config "num.query.tcp" "TCP queries" "ABSOLUTE" 255 p_config "num.query.tcpout" "TCP out queries" "ABSOLUTE" 256 p_config "num.query.tls" "TLS queries" "ABSOLUTE" 257 p_config "num.query.tls.resume" "TLS resumes" "ABSOLUTE" 258 p_config "num.query.ipv6" "IPv6 queries" "ABSOLUTE" 259 p_config "unwanted.queries" "queries that failed acl" "ABSOLUTE" 260 p_config "unwanted.replies" "unwanted or unsolicited replies" "ABSOLUTE" 261 echo "u_replies.warning $warn" 262 echo "u_replies.critical $crit" 263 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats." 264 ;; 265 queue) 266 echo "graph_title Unbound requestlist size" 267 echo "graph_args --base 1000 -l 0" 268 echo "graph_vlabel number of queries" 269 echo "graph_scale no" 270 echo "graph_category dns" 271 p_config "total.requestlist.avg" "Average size of queue on insert" "GAUGE" 272 p_config "total.requestlist.max" "Max size of queue (in 5 min)" "GAUGE" 273 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones" "GAUGE" 274 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space" "GAUGE" 275 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped." 276 ;; 277 memory) 278 echo "graph_title Unbound memory usage" 279 echo "graph_args --base 1024 -l 0" 280 echo "graph_vlabel memory used in bytes" 281 echo "graph_category dns" 282 p_config "mem.cache.rrset" "RRset cache memory" "GAUGE" 283 p_config "mem.cache.message" "Message cache memory" "GAUGE" 284 p_config "mem.mod.iterator" "Iterator module memory" "GAUGE" 285 p_config "mem.mod.validator" "Validator module and key cache memory" "GAUGE" 286 p_config "msg.cache.count" "msg cache count" "GAUGE" 287 p_config "rrset.cache.count" "rrset cache count" "GAUGE" 288 p_config "infra.cache.count" "infra cache count" "GAUGE" 289 p_config "key.cache.count" "key cache count" "GAUGE" 290 echo "graph_info The memory used by unbound." 291 ;; 292 by_type) 293 echo "graph_title Unbound DNS queries by type" 294 echo "graph_args --base 1000 -l 0" 295 echo "graph_vlabel queries / \${graph_period}" 296 echo "graph_scale no" 297 echo "graph_category dns" 298 for nm in `grep "^num.query.type" $seentags`; do 299 tp=`echo $nm | sed -e s/num.query.type.//` 300 p_config "$nm" "$tp" "ABSOLUTE" 301 done 302 echo "graph_info queries by DNS RR type queried for" 303 ;; 304 by_class) 305 echo "graph_title Unbound DNS queries by class" 306 echo "graph_args --base 1000 -l 0" 307 echo "graph_vlabel queries / \${graph_period}" 308 echo "graph_scale no" 309 echo "graph_category dns" 310 for nm in `grep "^num.query.class" $seentags`; do 311 tp=`echo $nm | sed -e s/num.query.class.//` 312 p_config "$nm" "$tp" "ABSOLUTE" 313 done 314 echo "graph_info queries by DNS RR class queried for." 315 ;; 316 by_opcode) 317 echo "graph_title Unbound DNS queries by opcode" 318 echo "graph_args --base 1000 -l 0" 319 echo "graph_vlabel queries / \${graph_period}" 320 echo "graph_scale no" 321 echo "graph_category dns" 322 for nm in `grep "^num.query.opcode" $seentags`; do 323 tp=`echo $nm | sed -e s/num.query.opcode.//` 324 p_config "$nm" "$tp" "ABSOLUTE" 325 done 326 echo "graph_info queries by opcode in the query packet." 327 ;; 328 by_rcode) 329 echo "graph_title Unbound DNS answers by return code" 330 echo "graph_args --base 1000 -l 0" 331 echo "graph_vlabel answer packets / \${graph_period}" 332 echo "graph_scale no" 333 echo "graph_category dns" 334 for nm in `grep "^num.answer.rcode" $seentags`; do 335 tp=`echo $nm | sed -e s/num.answer.rcode.//` 336 p_config "$nm" "$tp" "ABSOLUTE" 337 done 338 p_config "num.answer.secure" "answer secure" "ABSOLUTE" 339 p_config "num.answer.bogus" "answer bogus" "ABSOLUTE" 340 p_config "num.rrset.bogus" "num rrsets marked bogus" "ABSOLUTE" 341 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per \${graph_period} by the validator" 342 ;; 343 by_flags) 344 echo "graph_title Unbound DNS incoming queries by flags" 345 echo "graph_args --base 1000 -l 0" 346 echo "graph_vlabel queries / \${graph_period}" 347 echo "graph_scale no" 348 echo "graph_category dns" 349 p_config "num.query.flags.QR" "QR (query reply) flag" "ABSOLUTE" 350 p_config "num.query.flags.AA" "AA (auth answer) flag" "ABSOLUTE" 351 p_config "num.query.flags.TC" "TC (truncated) flag" "ABSOLUTE" 352 p_config "num.query.flags.RD" "RD (recursion desired) flag" "ABSOLUTE" 353 p_config "num.query.flags.RA" "RA (rec avail) flag" "ABSOLUTE" 354 p_config "num.query.flags.Z" "Z (zero) flag" "ABSOLUTE" 355 p_config "num.query.flags.AD" "AD (auth data) flag" "ABSOLUTE" 356 p_config "num.query.flags.CD" "CD (check disabled) flag" "ABSOLUTE" 357 p_config "num.query.edns.present" "EDNS OPT present" "ABSOLUTE" 358 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag" "ABSOLUTE" 359 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software." 360 ;; 361 histogram) 362 echo "graph_title Unbound DNS histogram of reply time" 363 echo "graph_args --base 1000 -l 0" 364 echo "graph_vlabel queries / \${graph_period}" 365 echo "graph_scale no" 366 echo "graph_category dns" 367 echo hcache.label "cache hits" 368 echo hcache.min 0 369 echo hcache.type ABSOLUTE 370 echo hcache.draw AREA 371 echo hcache.colour 999999 372 echo h64ms.label "0 msec - 66 msec" 373 echo h64ms.min 0 374 echo h64ms.type ABSOLUTE 375 echo h64ms.draw STACK 376 echo h64ms.colour 0000FF 377 echo h128ms.label "66 msec - 131 msec" 378 echo h128ms.min 0 379 echo h128ms.type ABSOLUTE 380 echo h128ms.colour 1F00DF 381 echo h128ms.draw STACK 382 echo h256ms.label "131 msec - 262 msec" 383 echo h256ms.min 0 384 echo h256ms.type ABSOLUTE 385 echo h256ms.draw STACK 386 echo h256ms.colour 3F00BF 387 echo h512ms.label "262 msec - 524 msec" 388 echo h512ms.min 0 389 echo h512ms.type ABSOLUTE 390 echo h512ms.draw STACK 391 echo h512ms.colour 5F009F 392 echo h1s.label "524 msec - 1 sec" 393 echo h1s.min 0 394 echo h1s.type ABSOLUTE 395 echo h1s.draw STACK 396 echo h1s.colour 7F007F 397 echo h2s.label "1 sec - 2 sec" 398 echo h2s.min 0 399 echo h2s.type ABSOLUTE 400 echo h2s.draw STACK 401 echo h2s.colour 9F005F 402 echo h4s.label "2 sec - 4 sec" 403 echo h4s.min 0 404 echo h4s.type ABSOLUTE 405 echo h4s.draw STACK 406 echo h4s.colour BF003F 407 echo h8s.label "4 sec - 8 sec" 408 echo h8s.min 0 409 echo h8s.type ABSOLUTE 410 echo h8s.draw STACK 411 echo h8s.colour DF001F 412 echo h16s.label "8 sec - ..." 413 echo h16s.min 0 414 echo h16s.type ABSOLUTE 415 echo h16s.draw STACK 416 echo h16s.colour FF0000 417 echo "graph_info Histogram of the reply times for queries." 418 ;; 419 esac 420 421 exit 0 422fi 423 424# do the stats itself 425get_state 426 427# get the time elapsed 428get_value "time.elapsed" 429if test $value = 0 || test $value = "0.000000"; then 430 echo "error: time elapsed 0 or could not retrieve data" 431 exit 1 432fi 433elapsed="$value" 434 435# print value for $1 436print_value ( ) { 437 mn=`echo $1 | sed $ABBREV | tr . _` 438 get_value $1 439 echo "$mn.value" $value 440} 441 442# print value if line already found in $2 443print_value_line ( ) { 444 mn=`echo $1 | sed $ABBREV | tr . _` 445 value="`echo $2 | sed -e 's/^.*=//'`" 446 echo "$mn.value" $value 447} 448 449 450case $id in 451hits) 452 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state | 453 sed -e 's/=.*//'` total.num.queries \ 454 total.num.cachehits total.num.prefetch num.query.tcp \ 455 num.query.tcpout num.query.tls num.query.tls.resume \ 456 num.query.ipv6 unwanted.queries \ 457 unwanted.replies; do 458 if grep "^"$x"=" $state >/dev/null 2>&1; then 459 print_value $x 460 fi 461 done 462 ;; 463queue) 464 for x in total.requestlist.avg total.requestlist.max \ 465 total.requestlist.overwritten total.requestlist.exceeded; do 466 print_value $x 467 done 468 ;; 469memory) 470 for x in mem.cache.rrset mem.cache.message mem.mod.iterator \ 471 mem.mod.validator msg.cache.count rrset.cache.count \ 472 infra.cache.count key.cache.count; do 473 print_value $x 474 done 475 ;; 476by_type) 477 for nm in `grep "^num.query.type" $seentags`; do 478 print_value $nm 479 done 480 ;; 481by_class) 482 for nm in `grep "^num.query.class" $seentags`; do 483 print_value $nm 484 done 485 ;; 486by_opcode) 487 for nm in `grep "^num.query.opcode" $seentags`; do 488 print_value $nm 489 done 490 ;; 491by_rcode) 492 for nm in `grep "^num.answer.rcode" $seentags`; do 493 print_value $nm 494 done 495 print_value "num.answer.secure" 496 print_value "num.answer.bogus" 497 print_value "num.rrset.bogus" 498 ;; 499by_flags) 500 for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do 501 print_value $x 502 done 503 ;; 504histogram) 505 get_value total.num.cachehits 506 echo hcache.value $value 507 r=0 508 for x in histogram.000000.000000.to.000000.000001 \ 509 histogram.000000.000001.to.000000.000002 \ 510 histogram.000000.000002.to.000000.000004 \ 511 histogram.000000.000004.to.000000.000008 \ 512 histogram.000000.000008.to.000000.000016 \ 513 histogram.000000.000016.to.000000.000032 \ 514 histogram.000000.000032.to.000000.000064 \ 515 histogram.000000.000064.to.000000.000128 \ 516 histogram.000000.000128.to.000000.000256 \ 517 histogram.000000.000256.to.000000.000512 \ 518 histogram.000000.000512.to.000000.001024 \ 519 histogram.000000.001024.to.000000.002048 \ 520 histogram.000000.002048.to.000000.004096 \ 521 histogram.000000.004096.to.000000.008192 \ 522 histogram.000000.008192.to.000000.016384 \ 523 histogram.000000.016384.to.000000.032768 \ 524 histogram.000000.032768.to.000000.065536; do 525 get_value $x 526 r=`expr $r + $value` 527 done 528 echo h64ms.value $r 529 get_value histogram.000000.065536.to.000000.131072 530 echo h128ms.value $value 531 get_value histogram.000000.131072.to.000000.262144 532 echo h256ms.value $value 533 get_value histogram.000000.262144.to.000000.524288 534 echo h512ms.value $value 535 get_value histogram.000000.524288.to.000001.000000 536 echo h1s.value $value 537 get_value histogram.000001.000000.to.000002.000000 538 echo h2s.value $value 539 get_value histogram.000002.000000.to.000004.000000 540 echo h4s.value $value 541 get_value histogram.000004.000000.to.000008.000000 542 echo h8s.value $value 543 r=0 544 for x in histogram.000008.000000.to.000016.000000 \ 545 histogram.000016.000000.to.000032.000000 \ 546 histogram.000032.000000.to.000064.000000 \ 547 histogram.000064.000000.to.000128.000000 \ 548 histogram.000128.000000.to.000256.000000 \ 549 histogram.000256.000000.to.000512.000000 \ 550 histogram.000512.000000.to.001024.000000 \ 551 histogram.001024.000000.to.002048.000000 \ 552 histogram.002048.000000.to.004096.000000 \ 553 histogram.004096.000000.to.008192.000000 \ 554 histogram.008192.000000.to.016384.000000 \ 555 histogram.016384.000000.to.032768.000000 \ 556 histogram.032768.000000.to.065536.000000 \ 557 histogram.065536.000000.to.131072.000000 \ 558 histogram.131072.000000.to.262144.000000 \ 559 histogram.262144.000000.to.524288.000000; do 560 get_value $x 561 r=`expr $r + $value` 562 done 563 echo h16s.value $r 564 ;; 565esac 566