1#!/bin/sh 2# 3# plugin for munin to monitor usage of unbound servers. 4# To install copy this to /usr/local/share/munin/plugins/unbound_munin_ 5# and use munin-node-configure (--suggest, --shell). 6# 7# (C) 2008 W.C.A. Wijngaards. BSD Licensed. 8# 9# To install; enable statistics and unbound-control in unbound.conf 10# server: extended-statistics: yes 11# statistics-cumulative: no 12# statistics-interval: 0 13# remote-control: control-enable: yes 14# Run the command unbound-control-setup to generate the key files. 15# 16# Environment variables for this script 17# statefile - where to put temporary statefile. 18# unbound_conf - where the unbound.conf file is located. 19# unbound_control - where to find unbound-control executable. 20# spoof_warn - what level to warn about spoofing 21# spoof_crit - what level to crit about spoofing 22# 23# You can set them in your munin/plugin-conf.d/plugins.conf file 24# with: 25# [unbound*] 26# user root 27# env.statefile /usr/local/var/munin/plugin-state/unbound-state 28# env.unbound_conf /usr/local/etc/unbound/unbound.conf 29# env.unbound_control /usr/local/sbin/unbound-control 30# env.spoof_warn 1000 31# env.spoof_crit 100000 32# 33# This plugin can create different graphs depending on what name 34# you link it as (with ln -s) into the plugins directory 35# You can link it multiple times. 36# If you are only a casual user, the _hits and _by_type are most interesting, 37# possibly followed by _by_rcode. 38# 39# unbound_munin_hits - base volume, cache hits, unwanted traffic 40# unbound_munin_queue - to monitor the internal requestlist 41# unbound_munin_memory - memory usage 42# unbound_munin_by_type - incoming queries by type 43# unbound_munin_by_class - incoming queries by class 44# unbound_munin_by_opcode - incoming queries by opcode 45# unbound_munin_by_rcode - answers by rcode, validation status 46# unbound_munin_by_flags - incoming queries by flags 47# unbound_munin_histogram - histogram of query resolving times 48# 49# Magic markers - optional - used by installation scripts and 50# munin-config: (originally contrib family but munin-node-configure ignores it) 51# 52#%# family=auto 53#%# capabilities=autoconf suggest 54 55# POD documentation 56: <<=cut 57=head1 NAME 58 59unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver. 60 61=head1 APPLICABLE SYSTEMS 62 63System with unbound daemon. 64 65=head1 CONFIGURATION 66 67 [unbound*] 68 user root 69 env.statefile /usr/local/var/munin/plugin-state/unbound-state 70 env.unbound_conf /usr/local/etc/unbound/unbound.conf 71 env.unbound_control /usr/local/sbin/unbound-control 72 env.spoof_warn 1000 73 env.spoof_crit 100000 74 75Use the .env settings to override the defaults. 76 77=head1 USAGE 78 79Can be used to present different graphs. Use ln -s for that name in 80the plugins directory to enable the graph. 81unbound_munin_hits - base volume, cache hits, unwanted traffic 82unbound_munin_queue - to monitor the internal requestlist 83unbound_munin_memory - memory usage 84unbound_munin_by_type - incoming queries by type 85unbound_munin_by_class - incoming queries by class 86unbound_munin_by_opcode - incoming queries by opcode 87unbound_munin_by_rcode - answers by rcode, validation status 88unbound_munin_by_flags - incoming queries by flags 89unbound_munin_histogram - histogram of query resolving times 90 91=head1 AUTHOR 92 93Copyright 2008 W.C.A. Wijngaards 94 95=head1 LICENSE 96 97BSD 98 99=cut 100 101state=${statefile:-/usr/local/var/munin/plugin-state/unbound-state} 102conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf} 103ctrl=${unbound_control:-/usr/local/sbin/unbound-control} 104warn=${spoof_warn:-1000} 105crit=${spoof_crit:-100000} 106lock=$state.lock 107 108# number of seconds between polling attempts. 109# makes the statefile hang around for at least this many seconds, 110# so that multiple links of this script can share the results. 111lee=55 112 113# to keep things within 19 characters 114ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/" 115 116# get value from $1 into return variable $value 117get_value ( ) { 118 value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`" 119 if test "$value"x = ""x; then 120 value="0" 121 fi 122} 123 124# download the state from the unbound server. 125get_state ( ) { 126 # obtain lock for fetching the state 127 # because there is a race condition in fetching and writing to file 128 129 # see if the lock is stale, if so, take it 130 if test -f $lock ; then 131 pid="`cat $lock 2>&1`" 132 kill -0 "$pid" >/dev/null 2>&1 133 if test $? -ne 0 -a "$pid" != $$ ; then 134 echo $$ >$lock 135 fi 136 fi 137 138 i=0 139 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do 140 while test -f $lock; do 141 # wait 142 i=`expr $i + 1` 143 if test $i -gt 1000; then 144 sleep 1; 145 fi 146 if test $i -gt 1500; then 147 echo "error locking $lock" "=" `cat $lock` 148 rm -f $lock 149 exit 1 150 fi 151 done 152 # try to get it 153 if echo $$ >$lock ; then : ; else break; fi 154 done 155 # do not refetch if the file exists and only LEE seconds old 156 if test -f $state; then 157 now=`date +%s` 158 get_value "time.now" 159 value="`echo $value | sed -e 's/\..*$//'`" 160 if test $now -lt `expr $value + $lee`; then 161 rm -f $lock 162 return 163 fi 164 fi 165 $ctrl -c $conf stats > $state 166 if test $? -ne 0; then 167 echo "error retrieving data from unbound server" 168 rm -f $lock 169 exit 1 170 fi 171 rm -f $lock 172} 173 174if test "$1" = "autoconf" ; then 175 if test ! -f $conf; then 176 echo no "($conf does not exist)" 177 exit 1 178 fi 179 if test ! -d `dirname $state`; then 180 echo no "(`dirname $state` directory does not exist)" 181 exit 1 182 fi 183 echo yes 184 exit 0 185fi 186 187if test "$1" = "suggest" ; then 188 echo "hits" 189 echo "queue" 190 echo "memory" 191 echo "by_type" 192 echo "by_class" 193 echo "by_opcode" 194 echo "by_rcode" 195 echo "by_flags" 196 echo "histogram" 197 exit 0 198fi 199 200# determine my type, by name 201id=`echo $0 | sed -e 's/^.*unbound_munin_//'` 202if test "$id"x = ""x; then 203 # some default to keep people sane. 204 id="hits" 205fi 206 207# if $1 exists in statefile, config is echoed with label $2 208exist_config ( ) { 209 mn=`echo $1 | sed $ABBREV | tr . _` 210 if grep '^'$1'=' $state >/dev/null 2>&1; then 211 echo "$mn.label $2" 212 echo "$mn.min 0" 213 echo "$mn.type ABSOLUTE" 214 fi 215} 216 217# print label and min 0 for a name $1 in unbound format 218p_config ( ) { 219 mn=`echo $1 | sed $ABBREV | tr . _` 220 echo $mn.label "$2" 221 echo $mn.min 0 222 echo $mn.type $3 223} 224 225if test "$1" = "config" ; then 226 if test ! -f $state; then 227 get_state 228 fi 229 case $id in 230 hits) 231 echo "graph_title Unbound DNS traffic and cache hits" 232 echo "graph_args --base 1000 -l 0" 233 echo "graph_vlabel queries / \${graph_period}" 234 echo "graph_scale no" 235 echo "graph_category DNS" 236 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state | 237 sed -e 's/=.*//'`; do 238 exist_config $x "queries handled by `basename $x .num.queries`" 239 done 240 p_config "total.num.queries" "total queries from clients" "ABSOLUTE" 241 p_config "total.num.cachehits" "cache hits" "ABSOLUTE" 242 p_config "total.num.prefetch" "cache prefetch" "ABSOLUTE" 243 p_config "num.query.tcp" "TCP queries" "ABSOLUTE" 244 p_config "num.query.tcpout" "TCP out queries" "ABSOLUTE" 245 p_config "num.query.tls" "TLS queries" "ABSOLUTE" 246 p_config "num.query.tls.resume" "TLS resumes" "ABSOLUTE" 247 p_config "num.query.ipv6" "IPv6 queries" "ABSOLUTE" 248 p_config "unwanted.queries" "queries that failed acl" "ABSOLUTE" 249 p_config "unwanted.replies" "unwanted or unsolicited replies" "ABSOLUTE" 250 echo "u_replies.warning $warn" 251 echo "u_replies.critical $crit" 252 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats." 253 ;; 254 queue) 255 echo "graph_title Unbound requestlist size" 256 echo "graph_args --base 1000 -l 0" 257 echo "graph_vlabel number of queries" 258 echo "graph_scale no" 259 echo "graph_category DNS" 260 p_config "total.requestlist.avg" "Average size of queue on insert" "GAUGE" 261 p_config "total.requestlist.max" "Max size of queue (in 5 min)" "GAUGE" 262 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones" "GAUGE" 263 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space" "GAUGE" 264 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped." 265 ;; 266 memory) 267 echo "graph_title Unbound memory usage" 268 echo "graph_args --base 1024 -l 0" 269 echo "graph_vlabel memory used in bytes" 270 echo "graph_category DNS" 271 p_config "mem.cache.rrset" "RRset cache memory" "GAUGE" 272 p_config "mem.cache.message" "Message cache memory" "GAUGE" 273 p_config "mem.mod.iterator" "Iterator module memory" "GAUGE" 274 p_config "mem.mod.validator" "Validator module and key cache memory" "GAUGE" 275 p_config "msg.cache.count" "msg cache count" "GAUGE" 276 p_config "rrset.cache.count" "rrset cache count" "GAUGE" 277 p_config "infra.cache.count" "infra cache count" "GAUGE" 278 p_config "key.cache.count" "key cache count" "GAUGE" 279 echo "graph_info The memory used by unbound." 280 ;; 281 by_type) 282 echo "graph_title Unbound DNS queries by type" 283 echo "graph_args --base 1000 -l 0" 284 echo "graph_vlabel queries / \${graph_period}" 285 echo "graph_scale no" 286 echo "graph_category DNS" 287 for x in `grep "^num.query.type" $state`; do 288 nm=`echo $x | sed -e 's/=.*$//'` 289 tp=`echo $nm | sed -e s/num.query.type.//` 290 p_config "$nm" "$tp" "ABSOLUTE" 291 done 292 echo "graph_info queries by DNS RR type queried for" 293 ;; 294 by_class) 295 echo "graph_title Unbound DNS queries by class" 296 echo "graph_args --base 1000 -l 0" 297 echo "graph_vlabel queries / \${graph_period}" 298 echo "graph_scale no" 299 echo "graph_category DNS" 300 for x in `grep "^num.query.class" $state`; do 301 nm=`echo $x | sed -e 's/=.*$//'` 302 tp=`echo $nm | sed -e s/num.query.class.//` 303 p_config "$nm" "$tp" "ABSOLUTE" 304 done 305 echo "graph_info queries by DNS RR class queried for." 306 ;; 307 by_opcode) 308 echo "graph_title Unbound DNS queries by opcode" 309 echo "graph_args --base 1000 -l 0" 310 echo "graph_vlabel queries / \${graph_period}" 311 echo "graph_scale no" 312 echo "graph_category DNS" 313 for x in `grep "^num.query.opcode" $state`; do 314 nm=`echo $x | sed -e 's/=.*$//'` 315 tp=`echo $nm | sed -e s/num.query.opcode.//` 316 p_config "$nm" "$tp" "ABSOLUTE" 317 done 318 echo "graph_info queries by opcode in the query packet." 319 ;; 320 by_rcode) 321 echo "graph_title Unbound DNS answers by return code" 322 echo "graph_args --base 1000 -l 0" 323 echo "graph_vlabel answer packets / \${graph_period}" 324 echo "graph_scale no" 325 echo "graph_category DNS" 326 for x in `grep "^num.answer.rcode" $state`; do 327 nm=`echo $x | sed -e 's/=.*$//'` 328 tp=`echo $nm | sed -e s/num.answer.rcode.//` 329 p_config "$nm" "$tp" "ABSOLUTE" 330 done 331 p_config "num.answer.secure" "answer secure" "ABSOLUTE" 332 p_config "num.answer.bogus" "answer bogus" "ABSOLUTE" 333 p_config "num.rrset.bogus" "num rrsets marked bogus" "ABSOLUTE" 334 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per \${graph_period} by the validator" 335 ;; 336 by_flags) 337 echo "graph_title Unbound DNS incoming queries by flags" 338 echo "graph_args --base 1000 -l 0" 339 echo "graph_vlabel queries / \${graph_period}" 340 echo "graph_scale no" 341 echo "graph_category DNS" 342 p_config "num.query.flags.QR" "QR (query reply) flag" "ABSOLUTE" 343 p_config "num.query.flags.AA" "AA (auth answer) flag" "ABSOLUTE" 344 p_config "num.query.flags.TC" "TC (truncated) flag" "ABSOLUTE" 345 p_config "num.query.flags.RD" "RD (recursion desired) flag" "ABSOLUTE" 346 p_config "num.query.flags.RA" "RA (rec avail) flag" "ABSOLUTE" 347 p_config "num.query.flags.Z" "Z (zero) flag" "ABSOLUTE" 348 p_config "num.query.flags.AD" "AD (auth data) flag" "ABSOLUTE" 349 p_config "num.query.flags.CD" "CD (check disabled) flag" "ABSOLUTE" 350 p_config "num.query.edns.present" "EDNS OPT present" "ABSOLUTE" 351 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag" "ABSOLUTE" 352 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software." 353 ;; 354 histogram) 355 echo "graph_title Unbound DNS histogram of reply time" 356 echo "graph_args --base 1000 -l 0" 357 echo "graph_vlabel queries / \${graph_period}" 358 echo "graph_scale no" 359 echo "graph_category DNS" 360 echo hcache.label "cache hits" 361 echo hcache.min 0 362 echo hcache.type ABSOLUTE 363 echo hcache.draw AREA 364 echo hcache.colour 999999 365 echo h64ms.label "0 msec - 66 msec" 366 echo h64ms.min 0 367 echo h64ms.type ABSOLUTE 368 echo h64ms.draw STACK 369 echo h64ms.colour 0000FF 370 echo h128ms.label "66 msec - 131 msec" 371 echo h128ms.min 0 372 echo h128ms.type ABSOLUTE 373 echo h128ms.colour 1F00DF 374 echo h128ms.draw STACK 375 echo h256ms.label "131 msec - 262 msec" 376 echo h256ms.min 0 377 echo h256ms.type ABSOLUTE 378 echo h256ms.draw STACK 379 echo h256ms.colour 3F00BF 380 echo h512ms.label "262 msec - 524 msec" 381 echo h512ms.min 0 382 echo h512ms.type ABSOLUTE 383 echo h512ms.draw STACK 384 echo h512ms.colour 5F009F 385 echo h1s.label "524 msec - 1 sec" 386 echo h1s.min 0 387 echo h1s.type ABSOLUTE 388 echo h1s.draw STACK 389 echo h1s.colour 7F007F 390 echo h2s.label "1 sec - 2 sec" 391 echo h2s.min 0 392 echo h2s.type ABSOLUTE 393 echo h2s.draw STACK 394 echo h2s.colour 9F005F 395 echo h4s.label "2 sec - 4 sec" 396 echo h4s.min 0 397 echo h4s.type ABSOLUTE 398 echo h4s.draw STACK 399 echo h4s.colour BF003F 400 echo h8s.label "4 sec - 8 sec" 401 echo h8s.min 0 402 echo h8s.type ABSOLUTE 403 echo h8s.draw STACK 404 echo h8s.colour DF001F 405 echo h16s.label "8 sec - ..." 406 echo h16s.min 0 407 echo h16s.type ABSOLUTE 408 echo h16s.draw STACK 409 echo h16s.colour FF0000 410 echo "graph_info Histogram of the reply times for queries." 411 ;; 412 esac 413 414 exit 0 415fi 416 417# do the stats itself 418get_state 419 420# get the time elapsed 421get_value "time.elapsed" 422if test $value = 0 || test $value = "0.000000"; then 423 echo "error: time elapsed 0 or could not retrieve data" 424 exit 1 425fi 426elapsed="$value" 427 428# print value for $1 429print_value ( ) { 430 mn=`echo $1 | sed $ABBREV | tr . _` 431 get_value $1 432 echo "$mn.value" $value 433} 434 435# print value if line already found in $2 436print_value_line ( ) { 437 mn=`echo $1 | sed $ABBREV | tr . _` 438 value="`echo $2 | sed -e 's/^.*=//'`" 439 echo "$mn.value" $value 440} 441 442 443case $id in 444hits) 445 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state | 446 sed -e 's/=.*//'` total.num.queries \ 447 total.num.cachehits total.num.prefetch num.query.tcp \ 448 num.query.tcpout num.query.tls num.query.tls.resume \ 449 num.query.ipv6 unwanted.queries \ 450 unwanted.replies; do 451 if grep "^"$x"=" $state >/dev/null 2>&1; then 452 print_value $x 453 fi 454 done 455 ;; 456queue) 457 for x in total.requestlist.avg total.requestlist.max \ 458 total.requestlist.overwritten total.requestlist.exceeded; do 459 print_value $x 460 done 461 ;; 462memory) 463 for x in mem.cache.rrset mem.cache.message mem.mod.iterator \ 464 mem.mod.validator msg.cache.count rrset.cache.count \ 465 infra.cache.count key.cache.count; do 466 print_value $x 467 done 468 ;; 469by_type) 470 for x in `grep "^num.query.type" $state`; do 471 nm=`echo $x | sed -e 's/=.*$//'` 472 print_value_line $nm $x 473 done 474 ;; 475by_class) 476 for x in `grep "^num.query.class" $state`; do 477 nm=`echo $x | sed -e 's/=.*$//'` 478 print_value_line $nm $x 479 done 480 ;; 481by_opcode) 482 for x in `grep "^num.query.opcode" $state`; do 483 nm=`echo $x | sed -e 's/=.*$//'` 484 print_value_line $nm $x 485 done 486 ;; 487by_rcode) 488 for x in `grep "^num.answer.rcode" $state`; do 489 nm=`echo $x | sed -e 's/=.*$//'` 490 print_value_line $nm $x 491 done 492 print_value "num.answer.secure" 493 print_value "num.answer.bogus" 494 print_value "num.rrset.bogus" 495 ;; 496by_flags) 497 for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do 498 print_value $x 499 done 500 ;; 501histogram) 502 get_value total.num.cachehits 503 echo hcache.value $value 504 r=0 505 for x in histogram.000000.000000.to.000000.000001 \ 506 histogram.000000.000001.to.000000.000002 \ 507 histogram.000000.000002.to.000000.000004 \ 508 histogram.000000.000004.to.000000.000008 \ 509 histogram.000000.000008.to.000000.000016 \ 510 histogram.000000.000016.to.000000.000032 \ 511 histogram.000000.000032.to.000000.000064 \ 512 histogram.000000.000064.to.000000.000128 \ 513 histogram.000000.000128.to.000000.000256 \ 514 histogram.000000.000256.to.000000.000512 \ 515 histogram.000000.000512.to.000000.001024 \ 516 histogram.000000.001024.to.000000.002048 \ 517 histogram.000000.002048.to.000000.004096 \ 518 histogram.000000.004096.to.000000.008192 \ 519 histogram.000000.008192.to.000000.016384 \ 520 histogram.000000.016384.to.000000.032768 \ 521 histogram.000000.032768.to.000000.065536; do 522 get_value $x 523 r=`expr $r + $value` 524 done 525 echo h64ms.value $r 526 get_value histogram.000000.065536.to.000000.131072 527 echo h128ms.value $value 528 get_value histogram.000000.131072.to.000000.262144 529 echo h256ms.value $value 530 get_value histogram.000000.262144.to.000000.524288 531 echo h512ms.value $value 532 get_value histogram.000000.524288.to.000001.000000 533 echo h1s.value $value 534 get_value histogram.000001.000000.to.000002.000000 535 echo h2s.value $value 536 get_value histogram.000002.000000.to.000004.000000 537 echo h4s.value $value 538 get_value histogram.000004.000000.to.000008.000000 539 echo h8s.value $value 540 r=0 541 for x in histogram.000008.000000.to.000016.000000 \ 542 histogram.000016.000000.to.000032.000000 \ 543 histogram.000032.000000.to.000064.000000 \ 544 histogram.000064.000000.to.000128.000000 \ 545 histogram.000128.000000.to.000256.000000 \ 546 histogram.000256.000000.to.000512.000000 \ 547 histogram.000512.000000.to.001024.000000 \ 548 histogram.001024.000000.to.002048.000000 \ 549 histogram.002048.000000.to.004096.000000 \ 550 histogram.004096.000000.to.008192.000000 \ 551 histogram.008192.000000.to.016384.000000 \ 552 histogram.016384.000000.to.032768.000000 \ 553 histogram.032768.000000.to.065536.000000 \ 554 histogram.065536.000000.to.131072.000000 \ 555 histogram.131072.000000.to.262144.000000 \ 556 histogram.262144.000000.to.524288.000000; do 557 get_value $x 558 r=`expr $r + $value` 559 done 560 echo h16s.value $r 561 ;; 562esac 563