#!/bin/sh # # plugin for munin to monitor usage of NSD. # # (C) 2008 W.C.A. Wijngaards. BSD Licensed. # # To install; compile with --enable-bind8-stats (enabled by default) # and enable nsd-control in nsd.conf with the line # remote-control: control-enable: yes # Run the command nsd-control-setup as root to generate the key files. # # Environment variables for this script # statefile - where to put temporary statefile. # nsd_conf - where the nsd.conf file is located. # nsd_control - where to find nsd-control executable. # nsd_checkconf - where to find nsd-checkconf executable. # # You can set them in your munin/plugin-conf.d/plugins.conf file # with: # [nsd_munin*] # user root # env.statefile /usr/local/var/munin/plugin-state/nsd-state # env.nsd_conf /usr/local/etc/nsd.conf # env.nsd_control /usr/local/sbin/nsd-control # env.nsd_checkconf /usr/local/sbin/nsd-checkconf # # This plugin can create different graphs depending on what name # you link it as (with ln -s) into the plugins directory # You can link it multiple times. # If you are only a casual user, the _hits and _by_type are most interesting, # possibly followed by _by_rcode. # # nsd_munin_hits - base volume, transport type, failures # nsd_munin_memory - memory usage # nsd_munin_by_type - incoming queries by type # nsd_munin_by_class - incoming queries by class # nsd_munin_by_opcode - incoming queries by opcode # nsd_munin_by_rcode - answers by rcode # nsd_munin_zones - number of zones # # Magic markers - optional - used by installation scripts and # munin-config: # #%# family=contrib #%# capabilities=autoconf suggest # POD documentation : <<=cut =head1 NAME nsd_munin_ - Munin plugin to monitor the NSD server. =head1 APPLICABLE SYSTEMS System with NSD daemon. =head1 CONFIGURATION [nsd_munin*] user root env.statefile /usr/local/var/munin/plugin-state/nsd-state env.nsd_conf /usr/local/etc/nsd.conf env.nsd_control /usr/local/sbin/nsd-control env.nsd_checkconf /usr/local/sbin/nsd-checkconf Use the .env settings to override the defaults. =head1 USAGE Can be used to present different graphs. Use ln -s for that name in the plugins directory to enable the graph. nsd_munin_hits - base volume, transport type, failures nsd_munin_memory - memory usage nsd_munin_by_type - incoming queries by type nsd_munin_by_class - incoming queries by class nsd_munin_by_opcode - incoming queries by opcode nsd_munin_by_rcode - answers by rcode nsd_munin_zones - number of zones =head1 AUTHOR Copyright 2008 W.C.A. Wijngaards =head1 LICENSE BSD =cut state=${statefile:-/usr/local/var/munin/plugin-state/nsd-state} conf=${nsd_conf:-/usr/local/etc/nsd.conf} ctrl=${nsd_control:-/usr/local/sbin/nsd-control} chkconf=${nsd_checkconf:-/usr/local/sbin/nsd-checkconf} lock=$state.lock # number of seconds between polling attempts. # makes the statefile hang around for at least this many seconds, # so that multiple links of this script can share the results. lee=55 # to keep things within 19 characters ABBREV="-e s/num/n/ -e s/type/t/ -e s/opcode/o/ -e s/rcode/r/ -e s/class/c/" # get value from $1 into return variable $value get_value ( ) { value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`" if test "$value"x = ""x; then value="0" fi } # download the state from NSD. get_state ( ) { # obtain lock for fetching the state # because there is a race condition in fetching and writing to file # see if the lock is stale, if so, take it if test -f $lock ; then pid="`cat $lock 2>&1`" kill -0 "$pid" >/dev/null 2>&1 if test $? -ne 0 -a "$pid" != $$ ; then echo $$ >$lock fi fi i=0 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do while test -f $lock; do # wait i=`expr $i + 1` if test $i -gt 1000; then sleep 1; fi if test $i -gt 1500; then echo "error locking $lock" "=" `cat $lock` rm -f $lock exit 1 fi done # try to get it echo $$ >$lock done # do not refetch if the file exists and only LEE seconds old if test -f $state; then now=`date +%s` get_value "timestamp" if test $now -lt `expr $value + $lee`; then rm -f $lock return fi fi $ctrl -c $conf stats > $state if test $? -ne 0; then echo "error retrieving data from the server" rm -f $lock exit 1 fi echo "timestamp="`date +%s` >> $state rm -f $lock } if test "$1" = "autoconf" ; then if test ! -f $conf; then echo no "($conf does not exist)" exit 1 fi if test ! -d `dirname $state`; then mkdir -p `dirname $state` if test ! -d `dirname $state`; then echo no "($state directory does not exist)" exit 1 fi fi echo yes exit 0 fi if test "$1" = "suggest" ; then echo "hits" echo "memory" echo "by_type" echo "by_class" echo "by_opcode" echo "by_rcode" echo "zones" exit 0 fi # determine my type, by name id=`echo $0 | sed -e 's/^.*nsd_munin_//'` if test "$id"x = ""x; then # some default to keep people sane. id="hits" fi # if $1 exists in statefile, config is echoed with label $2 exist_config ( ) { mn=`echo $1 | sed $ABBREV | tr . _` if grep '^'$1'=' $state >/dev/null 2>&1; then echo "$mn.label $2" echo "$mn.min 0" echo "$mn.type ABSOLUTE" fi } # print label and min 0 for a name $1 in nsd format p_config ( ) { mn=`echo $1 | sed $ABBREV | tr . _` echo $mn.label "$2" echo $mn.min 0 echo $mn.type $3 } if test "$1" = "config" ; then if test ! -f $state; then get_state fi case $id in hits) echo "graph_title NSD traffic" echo "graph_args --base 1000 -l 0" echo "graph_vlabel queries / \${graph_period}" echo "graph_scale no" echo "graph_category DNS" for x in server0.queries server1.queries server2.queries \ server3.queries server4.queries server5.queries \ server6.queries server7.queries server8.queries \ server9.queries server10.queries server11.queries \ server12.queries server13.queries server14.queries \ server15.queries ; do exist_config $x "queries handled by `basename $x .queries`" done p_config "num.queries" "total queries" "ABSOLUTE" p_config "num.udp" "UDP ip4 queries" "ABSOLUTE" p_config "num.udp6" "UDP ip6 queries" "ABSOLUTE" p_config "num.tcp" "TCP ip4 queries" "ABSOLUTE" p_config "num.tcp6" "TCP ip6 queries" "ABSOLUTE" p_config "num.edns" "queries with EDNS OPT" "ABSOLUTE" p_config "num.ednserr" "queries failed EDNS parse" "ABSOLUTE" p_config "num.answer_wo_aa" "nonauthor. queries (referrals)" "ABSOLUTE" p_config "num.rxerr" "receive failed" "ABSOLUTE" p_config "num.txerr" "transmit failed" "ABSOLUTE" p_config "num.truncated" "truncated replies with TC" "ABSOLUTE" p_config "num.raxfr" "AXFR from allowed client" "ABSOLUTE" p_config "num.rixfr" "IXFR from allowed client" "ABSOLUTE" p_config "num.dropped" "dropped due to sanity check" "ABSOLUTE" echo "graph_info DNS queries." ;; memory) echo "graph_title NSD memory usage" echo "graph_args --base 1024 -l 0" echo "graph_vlabel memory used in bytes" echo "graph_category DNS" p_config "size.vsz" "Total virtual memory (VSZ)" "GAUGE" p_config "size.rss" "Total resident memory (RSS)" "GAUGE" p_config "size.db.mem" "data in memory" "GAUGE" p_config "size.xfrd.mem" "xfr and notify memory" "GAUGE" p_config "size.config.mem" "config memory" "GAUGE" p_config "size.db.disk" "mmap of nsd.db file" "GAUGE" p_config "size.config.disk" "config zonelist on disk" "GAUGE" echo "graph_info The memory used by NSD, xfrd and config. Disk size of nsd.db and zonelist." ;; by_type) echo "graph_title NSD queries by type" echo "graph_args --base 1000 -l 0" echo "graph_vlabel queries / \${graph_period}" echo "graph_scale no" echo "graph_category DNS" for x in `grep "^num.type" $state`; do nm=`echo $x | sed -e 's/=.*$//'` tp=`echo $nm | sed -e s/num.type.//` p_config "$nm" "$tp" "ABSOLUTE" done echo "graph_info queries by DNS RR type queried for" ;; by_class) echo "graph_title NSD queries by class" echo "graph_args --base 1000 -l 0" echo "graph_vlabel queries / \${graph_period}" echo "graph_scale no" echo "graph_category DNS" for x in `grep "^num.class" $state`; do nm=`echo $x | sed -e 's/=.*$//'` tp=`echo $nm | sed -e s/num.class.//` p_config "$nm" "$tp" "ABSOLUTE" done echo "graph_info queries by DNS RR class queried for." ;; by_opcode) echo "graph_title NSD queries by opcode" echo "graph_args --base 1000 -l 0" echo "graph_vlabel queries / \${graph_period}" echo "graph_scale no" echo "graph_category DNS" for x in `grep "^num.opcode" $state`; do nm=`echo $x | sed -e 's/=.*$//'` tp=`echo $nm | sed -e s/num.opcode.//` p_config "$nm" "$tp" "ABSOLUTE" done echo "graph_info queries by opcode in the query packet." ;; by_rcode) echo "graph_title NSD answers by return code" echo "graph_args --base 1000 -l 0" echo "graph_vlabel answer packets / \${graph_period}" echo "graph_scale no" echo "graph_category DNS" for x in `grep "^num.rcode" $state`; do nm=`echo $x | sed -e 's/=.*$//'` tp=`echo $nm | sed -e s/num.rcode.//` p_config "$nm" "$tp" "ABSOLUTE" done echo "graph_info answers split out by return value." ;; zones) echo "graph_title NSD number of zones" echo "graph_args --base 1000 -l 0" echo "graph_vlabel zone count" echo "graph_category DNS" p_config "zone.total" "total zones" "GAUGE" p_config "zone.master" "master zones" "GAUGE" p_config "zone.slave" "slave zones" "GAUGE" echo "graph_info number of zones served by NSD." ;; esac exit 0 fi # do the stats itself get_state # get the time elapsed get_value "time.elapsed" if test $value = 0 || test $value = "0.000000"; then echo "error: time elapsed 0 or could not retrieve data" exit 1 fi elapsed="$value" # print value for $1 print_value ( ) { mn=`echo $1 | sed $ABBREV | tr . _` get_value $1 echo "$mn.value" $value } # print value if line already found in $2 print_value_line ( ) { mn=`echo $1 | sed $ABBREV | tr . _` value="`echo $2 | sed -e 's/^.*=//'`" echo "$mn.value" $value } case $id in hits) for x in server0.queries server1.queries server2.queries \ server3.queries server4.queries server5.queries \ server6.queries server7.queries server8.queries \ server9.queries server10.queries server11.queries \ server12.queries server13.queries server14.queries \ server15.queries \ num.queries num.udp num.udp6 num.tcp num.tcp6 \ num.edns num.ednserr num.answer_wo_aa num.rxerr num.txerr \ num.truncated num.raxfr num.rixfr num.dropped ; do if grep "^"$x"=" $state >/dev/null 2>&1; then print_value $x fi done ;; memory) # get the total memory for NSD serverpid=`$ctrl -c $conf serverpid 2>&1` # small race condition, if reload happens between previous and next # lines, if so, detect by checking if we have a number as output. rssval=`ps -p $serverpid -o rss= 2>&1` vszval=`ps -p $serverpid -o vsz= 2>&1` if test "`expr $rssval + 1 - 1 2>&1`" -eq "$rssval" >/dev/null 2>&1; then rssval=`expr $rssval \* 1024` else rssval=0 fi if test "`expr $vszval + 1 - 1 2>&1`" -eq "$vszval" >/dev/null 2>&1; then vszval=`expr $vszval \* 1024` else vszval=0 fi echo "size_vsz.value" $vszval echo "size_rss.value" $rssval for x in size.db.mem size.xfrd.mem size.config.mem \ size.db.disk size.config.disk; do print_value $x done ;; by_type) for x in `grep "^num.type" $state`; do nm=`echo $x | sed -e 's/=.*$//'` print_value_line $nm $x done ;; by_class) for x in `grep "^num.class" $state`; do nm=`echo $x | sed -e 's/=.*$//'` print_value_line $nm $x done ;; by_opcode) for x in `grep "^num.opcode" $state`; do nm=`echo $x | sed -e 's/=.*$//'` print_value_line $nm $x done ;; by_rcode) for x in `grep "^num.rcode" $state`; do nm=`echo $x | sed -e 's/=.*$//'` print_value_line $nm $x done ;; zones) get_value "zone.master" nummas="$value" get_value "zone.slave" numsla="$value" echo "zone_total.value" `expr $nummas + $numsla` echo "zone_master.value" "$nummas" echo "zone_slave.value" "$numsla" esac