#!%TCLSH% # # Network graph building daemon # # When a modification is done on an equipment (either by a human or by # a program when a user is using web interface to change characteristics # of an interface), this modification is detected by: # - detectconfmod script which reads syslog output # - radius daemon # They both write an entry in topo.modeq table of the database. # # The topographd daemon exploit this topo.modeq table. Its algorithm # is as follows. # # Infinite loop # once by night, fetches all equipment configuration files # (full rancid), analyzes every configuration file and # rebuild the graph. # # when an equipment specification is modified in topo.eq # table, generates the router.db rancid file), fetches # all equipments (full rancid), analyzes all configuration # files and rebuild the graph. # # when a vlan specification is modified in topo.vlan table, # generates the vlan.eq file, and rebuild the graph. # # when an equipment is modified (topo.modeq table), fetches # its configuration, analyzes it, and rebuild the graph. # When graph is rebuilt, mark the entry in topo.modeq as # "processed". # # History # 2010/02/16 : pda/jean : design # 2010/12/18 : pda : rework installation # 2012/01/18 : pda : ranciddb -> ranciddir # 2012/03/27 : pda/jean : daemonization # set conf(extractcoll) {extractcoll -a -s -w -i -p} set conf(start-rancid) {start-rancid %1$s} set conf(anaconf) {anaconf} source %LIBNETMAGIS% ############################################################################## # Graph updating ############################################################################## # # Detect separator in file using the following heuristic: # the expected format of the file is: # # + # Separator is the first non-alphanumeric and non-dot character # (ie. a hostname or fully qualified domain name) # found after the first field in the file. # # Input: # - filename : file name to open # Output: # - return value : character used as separator (":", ";" etc.), # - msg: error message if # # History # 2015/09/08 : jean/boggia : design # # proc detect-separator {filename _msg} { upvar $_msg msg set separator "" set msg "" if { [catch {open $filename "r"} fd] } then { set msg "detect-separator: cannot open $filename: $fd" } else { if {[gets $fd line] >= 0} then { if {![regexp {^[.a-zA-Z0-9-]+([^.a-zA-Z0-9])} $line dummy separator]} then { set msg "detect-separator: separator not found in $filename" } } else { set msg "detect-separator: could not read first line of $filename" } } return $separator } # Generate a new router.db rancid file # # Input: # - none # Output: # - return value : empty string or error message # # History # 2010/12/13 : pda/jean : design # 2012/04/25 : pda : create file if it doesn't exist (even if no modif) # proc update-routerdb {} { global ctxt set sql "SELECT * FROM topo.modeq WHERE eq = '_routerdb' AND processed = 0" set found 0 if {! [toposqlselect $sql tab { set found 1 }]} then { return "Cannot read equipment modification from database" } if {$found || ! [file exists $ctxt(routerdb)]} then { set sql "SELECT e.eq, t.type, e.up FROM topo.eq e, topo.eqtype t WHERE e.idtype = t.idtype" set leq {} if {! [toposqlselect $sql t { lappend leq [list $t(eq) $t(type) $t(up)] }]} then { return "Cannot read equipment list from database" } set msg "" set sep [detect-separator $ctxt(routerdb) msg] if {$msg ne ""} then { return $msg } set new "$ctxt(routerdb).new" if {[catch {set fd [open $new "w"]} msg]} then { return "Cannot create $new ($msg)" } foreach e $leq { lassign $e eq type up if {$up} then { set up "up" } else { set up "down" } puts $fd "$eq$sep$type$sep$up" } if {[catch {close $fd} msg]} then { return "Cannot close $new ($msg)" } if {[catch {file rename -force $new $ctxt(routerdb)} msg]} then { return "Cannot move $new to $ctxt(routerdb) ($msg)" } set sql "UPDATE topo.modeq SET processed = 1 WHERE eq = '_routerdb'" if {! [toposqlexec $sql]} then { return "Cannot update equipment modification for _routerdb" } } return "" } # # Guess if a full rancid run is needed # # Input: # - routerdbmod : in return, result from detect-filemod # Output: # - return value : # -1 : error # 0 : no full rancid needed # 1 : full rancid needed # - parameter routerdbmod : detect-filemod result # # History # 2010/10/15 : pda/jean : design # proc full-rancid-needed {_routerdbmod} { global ctxt upvar $_routerdbmod routerdbmod set msg [update-routerdb] if {$msg ne ""} then { keep-state-mail "router.db" $msg return -1 } set sql "SELECT topo.lastrun.date IS NULL OR ( (date_trunc('day',topo.lastrun.date) <> date_trunc('day',now()) AND extract(hour from now())>=$ctxt(fullrancidmin) AND extract(hour from now())<=$ctxt(fullrancidmax)) ) AS result FROM topo.lastrun" # if selects succeeds, returns the result of SQL query, # while translating it to 1 (true) or 0 (false) set r2 1 set r [toposqlselect $sql tab { set r2 [expr $tab(result) ? 1 : 0]}] if {$r} then { set r $r2 # detect if router.db has been modified set routerdbmod {} set fmod [detect-filemod $ctxt(routerdb)] if {[llength $fmod] > 0} then { lassign $fmod code path date switch $code { err { set msg $date set r -1 } add { set msg "File router.db added" set r 1 } mod { set msg "Resuming normal operation" set r 1 } del { set msg "File router.db deleted" set r -1 } } keep-state-mail "router.db" $msg if {$r == 1} then { set routerdbmod $fmod } } else { keep-state-mail "router.db" "Resuming normal operation" } } return $r } # # Update topo graph from equipment configuration files # # Input: # - full : 1 if a full rancid is needed # - routerdbmod : result from detect-filemod, or empty # - leq : list of modified equipments (may be with a fake "_vlan" equipement) # - leqvirt : modified virtual equipments, whose date must be reset in # database. See detect-dirmod for the format of this list. # Output: # - return value : 1 if ok, 0 if error # # History # 2010/10/15 : pda/jean : design # 2010/10/20 : pda/jean : coding # 2010/11/12 : pda/jean : add leqvirt # proc update-graph {full routerdbmod leq leqvirt} { global conf # # Reset equipments marked as modified in the topo.modeq table # if {! [toposqllock]} then { return 0 } if {[llength $leq] == 0} then { set sql "UPDATE topo.modeq SET processed = 1" } else { set inlist [join $leq "', '"] set sql "UPDATE topo.modeq SET processed = 1 WHERE eq IN ('$inlist')" } if {! [toposqlexec $sql]} then { return 0 } # # Run rancid and send a mail if needed # if {$full} then { set callrancid 1 set leqrancid {} } else { # It is not a full rancid run. # Remove dummy equipment "_vlan". If, after this removal, there # is no equipment, distinguish from the "full-rancid" case. set pos [lsearch -exact $leq "_vlan"] if {$pos != -1} then { set leqrancid [lreplace $leq $pos $pos] } else { set leqrancid $leq } if {[llength $leqrancid] == 0} then { set callrancid 0 } else { set callrancid 1 } } if {$callrancid} then { if {! [rancid $leqrancid]} then { toposqlunlock "abort" return 0 } } # # Update modification time of router.db if needed # if {[llength $routerdbmod] > 0} then { if {! [sync-filemonitor [list $routerdbmod]]} then { toposqlunlock "abort" return 0 } } # If it is not a "full anaconf" run, add virtual equipments if {$full} then { set leqanaconf {} } else { set leqanaconf $leq foreach meq $leqvirt { topo-verbositer "processing $meq" 9 lassign $meq code path date if {$code eq "add" || $code eq "mod"} then { if {[regexp {([^/]+)\.eq$} $path bidon eq]} then { topo-verbositer "adding virtual $eq to leqanaconf" 9 lappend leqanaconf $eq } } } } # # Update graph and send a mail if needed # if {! [anaconf $leqanaconf]} then { toposqlunlock "abort" return 0 } # # Update modification time of virtual equipments # if {! [sync-filemonitor $leqvirt]} then { toposqlunlock "abort" return 0 } # # Update sensor list # if {! [sensors]} then { toposqlunlock "abort" return 0 } # # Update date of last full rancid/anaconf # if {[llength $leq] == 0} then { set sql "DELETE FROM topo.lastrun ; INSERT INTO topo.lastrun (date) VALUES (NOW ())" if {! [toposqlexec $sql]} then { return 0 } } toposqlunlock "commit" return 1 } # # Call rancid # # Input: # - leq (optional) : modified equipment list # Output: # - return value : 1 if ok, 0 if error # # History # 2010/10/20 : pda/jean : design # proc rancid {{leq {}}} { global conf global ctxt if {[llength $leq] == 0} then { set-status "Ranciding all equipements" } else { set-status "Ranciding $leq" } # # Call rancid # set cmd [format "$ctxt(topobindir)/$conf(start-rancid)" $leq] topo-verbositer "rancid : cmd=<$cmd>" 2 if {[catch {exec sh -c $cmd} msg]} then { # erreur set msg "Error while running '$cmd'\n$msg" set r 0 } else { # No error: msg contains rancid output if {$msg eq ""} then { set msg "Resuming normal operation" } set r 1 } # # Send a mail if needed # if {[llength $leq] == 0} then { set ev "fullrancid" } else { set ev "rancid" } keep-state-mail $ev $msg return $r } # # Call anaconf to build the graph # # Input: # - leq (optional) : modified equipment list # Output: # - return value : 1 if ok, 0 if error # # History # 2010/10/20 : pda/jean : design # proc anaconf {{leq {}}} { global conf global ctxt if {[llength $leq] == 0} then { set-status "Building graph for all equipements" } else { set-status "Building graph for $leq" } set text "" set cmd "$ctxt(topobindir)/$conf(anaconf)" set r 1 foreach eq $leq { append cmd " $eq" } topo-verbositer "anaconf : cmd=<$cmd>" 2 if {[catch {exec sh -c $cmd} msg]} then { set msg "Error in $cmd\n$msg" set r 0 } else { # no error } set text $msg # # Send a mail if needed # keep-state-mail "anaconf" $msg return $r } # # Read sensor list and update it in database # # Input: none # Output: # - return value : 1 if ok, 0 if error # # History # 2010/11/09 : pda/jean : design # proc sensors {} { global conf global ctxt set-status "Updating sensor list" # # Read existing sensors in database # set sql "SELECT * FROM topo.sensor" set r [toposqlselect $sql tab { set id $tab(id) set told($id) [list $tab(type) $tab(eq) \ $tab(comm) $tab(iface) $tab(param)] } ] if {! $r} then { keep-state-mail "sensors" "Cannot read sensor list from database" return 0 } # # Red new sensor list from the graph # if {! [read-coll tnew msg]} then { keep-state-mail "sensors" "Cannot read sensor list from graph\n$msg" return 0 } if {$msg ne ""} then { keep-state-mail "sensors" "Inconsistent sensors:\n$msg\nSensor list not updated." return 1 } # # Difference analysis # set lunmod {} set sql {} foreach id [array names tnew] { lassign $tnew($id) type eq comm iface param set qtype [::pgsql::quote $type] set qid [::pgsql::quote $id] set qeq [::pgsql::quote $eq] set qcomm [::pgsql::quote $comm] set qiface [::pgsql::quote $iface] set qparam [::pgsql::quote $param] if {[info exists told($id)]} then { # # Update common sensors # if {$tnew($id) eq $told($id)} then { # # Same : just update lastseen date # lappend lunmod "'$qid'" } else { # # Not the same: update all fields # lappend sql "UPDATE topo.sensor SET type = '$qtype', eq = '$qeq', comm = '$qcomm', iface = '$qiface', param = '$qparam', lastmod = DEFAULT, lastseen = DEFAULT WHERE id = '$qid'" } unset told($id) } else { # # New sensor # lappend sql \ "INSERT INTO topo.sensor (id, type, eq, comm, iface, param) VALUES ('$qid','$qtype','$qeq','$qcomm','$qiface','$qparam')" } } # # Update date of sensors seen, but not modified # if {[llength $lunmod] > 0} then { set l [join $lunmod ","] lappend sql "UPDATE topo.sensor SET lastseen = DEFAULT WHERE id IN ($l)" } # # Remove old sensors, after some delay # lappend sql "DELETE FROM topo.sensor WHERE lastseen + interval '$ctxt(sensorexpire) days' < now()" # # Send the huuuuuge SQL command # if {[llength $sql] > 0} then { set sql [join $sql ";"] if {! [toposqlexec $sql]} then { keep-state-mail "sensors" "Cannot write sensors in database" return 0 } } # # Send a mail if needed # keep-state-mail "sensors" "Resuming normal operation" return 1 } # # Read lines from "extractcoll" and get sensor list # # Input: # - _tab : in return, information extracted # - _msg : in return, empty string or error/warning message # Output: # - return value : 1 if ok, 0 if error # - parameter _tab: array, indexed by sensor names, containing a list # { [ []]} # # Note : # Format of input file is: # trafic # nbassocwifi # nbauthwifi # port {} # ipmac # # History # 2008/07/28 : pda/boggia : design # 2008/07/30 : pda : adapt to new input format # 2010/11/09 : pda/jean : topographd integration # 2010/12/19 : pda : use call-topo # 2010/12/21 : pda : any message is not automatically an error # 2011/12/08 : jean : portmac and ipmac collector integration # proc read-coll {_tab _msg} { global conf upvar $_tab tab upvar $_msg msg set cmd $conf(extractcoll) if {! [call-topo $cmd msg]} then { return 0 } set lwarn {} foreach line [split $msg "\n"] { set l [split $line] switch [lindex $l 0] { trafic { lassign $l kw id eq comm iface vlan if {$vlan ne "-"} then { set iface "$iface.$vlan" } set sensor [list $kw $eq $comm $iface {}] } nbassocwifi - nbauthwifi { lassign $l kw id eq comm iface ssid set sensor [list $kw $eq $comm $iface $ssid] } portmac { lassign $l kw id eq comm eqtype ifacelist vlanid set sensor [list "portmac.$eqtype" $eq $comm $ifacelist $vlanid] } ipmac { lassign $l kw id eq comm eqtype set sensor [list "ipmac" $eq $comm {} {}] } default { lappend lwarn "Unknown sensor type ($l)" set sensor "" } } if {$sensor ne ""} then { if {[info exists tab($id)]} then { # same format for all sensor types, until now lassign $tab($id) okw oeq ocomm oiface lappend lwarn "Sensor '$id' seen more than once ($eq/$iface and $oeq/$oiface)" } set tab($id) $sensor } } set msg [join $lwarn "\n"] return 1 } ############################################################################## # Detection of equipment modified ############################################################################## # # Detect modifications on equipments # # Output: # - return value : list of modified equipments, or empty list # # History # 2010/10/21 : pda/jean : design # 2011/01/05 : jean : known equipements are pulled from rancid # proc detect-mod {} { set l {} set sql "SELECT DISTINCT(eq) AS eq FROM topo.modeq WHERE processed = 0" if {! [toposqlselect $sql tab { lappend l $tab(eq) }]} then { return {} } set sql "SELECT eq FROM topo.eq WHERE up=1" if {! [toposqlselect $sql tab {set rancideq($tab(eq)) 1}]} then { return {} } # # Check that equipment is managed by rancid # Note: according to equipment types, syslogd versions, and equipment # local configuration, names may be short names (not fqdn). In those # cases, we suppose that equipment is not managed (it is surely # an error in the detection script) # set leq {} set lunk {} foreach eq $l { if {[info exists rancideq($eq)]} then { lappend leq $eq } elseif {$eq eq "_vlan"} then { lappend leq $eq } elseif {$eq ne "_routerdb"} then { lappend lunk $eq } } if {[llength $lunk] == 0} then { keep-state-mail "detectunknw" "Resuming normal operation" } else { keep-state-mail "detectunknw" \ "Change detected on unknown equipments ($lunk)" } return $leq } ############################################################################## # Main program ############################################################################## # The -z option is reserved for internal use set usage {usage: %1$s [-h][-f][-v ] -h : display this text -f : run in foreground -v : verbose level (0 = none, 1 = minimum, 99 = max) } proc usage {argv0} { global usage puts stderr [format $usage $argv0] } # # Main program # proc main {argv0 argv} { global conf global ctxt set ctxt(dbfd1) "" set ctxt(dbfd2) "" set verbose 0 set foreground 0 set daemonized 0 # # Get configuration values from local file # set-log [get-local-conf "logger"] set ctxt(routerdb) [get-local-conf "ranciddir"] append ctxt(routerdb) "/router.db" set eqvirtdir [get-local-conf "eqvirtdir"] set ctxt(topobindir) [get-local-conf "topobindir"] # # Get configuration values from database # config ::dnsconfig lazy-connect set delay [dnsconfig get "topographddelay"] set delay [expr $delay*1000] set ctxt(maxstatus) [dnsconfig get "topomaxstatus"] set ctxt(sensorexpire) [dnsconfig get "sensorexpire"] set ctxt(modeqexpire) [dnsconfig get "modeqexpire"] set ctxt(fullrancidmin) [dnsconfig get "fullrancidmin"] set ctxt(fullrancidmax) [dnsconfig get "fullrancidmax"] # # Argument analysis # while {[llength $argv] > 0} { switch -glob -- [lindex $argv 0] { -h { usage $argv0 return 0 } -f { set foreground 1 set argv [lreplace $argv 0 0] } -z { # This option is not meant to be used by a human # It implies that the program is being rerun in order to be # daemonized set daemonized 1 set argv [lreplace $argv 0 0] } -v { set verbose [lindex $argv 1] set argv [lreplace $argv 0 1] } -* { usage $argv0 return 1 } default { break } } } if {[llength $argv] != 0} then { usage $argv0 return 1 } if {! $foreground && ! $daemonized} then { set argstr {} if {$verbose > 0} then { lappend argstr -v $verbose } lappend argstr "-z" run-as-daemon $argv0 [join $argstr " "] } reset-status set-status "Starting topographd" # # Default values # topo-set-verbose $verbose if {$verbose > 0} then { set-trace {toposqlselect toposqlexec toposqllock toposqlunlock keep-state-mail update-routerdb full-rancid-needed update-graph rancid anaconf sensors read-coll read-eq-type detect-mod detect-filemod detect-dirmod sync-filemonitor} } # # Daemon main loop # set first 1 while {true} { # # Except first time, wait for the delay # topo-verbositer "delay : first=$first delay=$delay" 10 if {! $first} then { after $delay } set first 0 # # Detect if a full rancid is needed (i.e. if no full rancid # has been done since 2 o'clock this morning, for example). # switch [full-rancid-needed routerdbmod] { -1 { # error continue } 0 { # not needed } 1 { # graph must be updated # check modification times of virtual equipments # in order to update them after configuration read. set leqvirt [detect-dirmod $eqvirtdir err] if {$err ne ""} then { keep-state-mail "eqvirt" $err continue } if {! [update-graph 1 $routerdbmod {} $leqvirt]} then { continue } } } # # Search for modified equipments and rebuild the graph # # virtual equipments set leqvirt [detect-dirmod $eqvirtdir err] if {$err ne ""} then { keep-state-mail "eqvirt" $err continue } set leq [detect-mod] if {[llength $leq] > 0 || [llength $leqvirt] > 0} then { update-graph 0 {} $leq $leqvirt } } } exit [main $argv0 $argv]