1#!/usr/bin/perl 2# vim:ts=4 3# nagios: -epn 4# 5# check_vmware.pl 6# Version 0.1 : Steve Shipway, The University of Auckland 7# 0.2 : Change syntax, and generation of configs, better error traps 8# 0.3 : Persistent sessions 9# 0.4 : Correct for later versions of VI API 10# 0.5 : Perfparse stats, NSCA 11# 0.6, 0.7 : NSCA for CPU and Memory 12# 0.8 : Parameterise all the thresholds 13# 0.9 : Fix percentages for multi-CPUs, fix memactive output, 14# add helpful suggestions on critical messages 15# 0.10 : Memory private usage was incorrect, active was redundant 16# 0.11 : Check $totspace to prevent /0 error 17# 18# This script performs general checks and data extractions for monitoring 19# ESX servers via the Virtual Centre API. Output can be for MRTG or Nagios 20# 21# You will need to install: 22# VI Perl Toolkit (download from VMWare website) 23# Class::MethodMaker 24# SOAP::Lite 25# XML::LibXML 26# ... and all dependent modules 27# You need the latest version of HTTP::Message! 28# 29# TO DO: 30# swap statistics 31# network statistics (lvl 3) 32# disk activity statistics (lvl 3) 33# query tools 34########################################################################## 35 36use strict; 37use VMware::VIRuntime; 38use VMware::VILib; 39my($VERSION) = "0.12"; 40 41########################################################################## 42# Default thresholds for Nagios checks 43my($WARNSPACE,$CRITSPACE) = ( 5, 3); # in GB 44my($WARNCPU,$CRITCPU) = (80,90); # percent max (VC defaults) 45my($WARNMEM,$CRITMEM) = (80,90); # percent max (VC defaults) 46my($WARNFAIR,$CRITFAIR) = (90,80); # percent min 47#my($WARNACTIVE,$CRITACTIVE)=(70,80); # percent max 48my($WARNREADY,$CRITREADY) = ( 5,10); # percent max (VMware recommended level) 49########################################################################## 50# Other configurable options 51my($TIMEOUT) = 5; # response time in secods 52my($DEBUG) = 0; # set to 1 for extra output 53my($SESSIONFILE)=""; # default place to save session file 54# if these 2 are set, and --nsca is given, then the external send_nsca 55# will be used instead of the internal code. 56my($NSCA) = "/usr/local/nrpe/send_nsca"; 57my($NSCACFG) = "/usr/local/nrpe/send_nsca.cfg"; 58my($MAXNSCA) = 10; 59my($MAXGUESTCPUS) = 4; # guests cant have more than this many CPUs 60my($NEWLINE) = "<BR>"; # use \\n for nag3, <BR> for nag2 61########################################################################## 62########################################################################## 63$Util::script_version = $VERSION; 64 65my($isnagios) = 1; # default reporting mode 66my($report,$rv,$begin,$vm); 67my($mode) = 0; # different Nagios/MRTG modes 68my( $havensca ) = 0; 69 70my($perfmgr); 71my(%perfkeys) = (); 72my($entity); 73my(@queries) = (); 74my(@metricids) = (); 75my($perfdata); 76my($interval) = 0; 77my($servicecontent); 78 79my($MSG,$A,$B,$STATUS,$PERF) = ("","UNKNOWN","UNKNOWN",3,"|"); 80 81# Format for perfdata: 82# |[<name>=<value><unit>;<warn>;<crit>;<min>;<max> ]+ 83# where all but name and value can be blank, and name must be quoted if it 84# contains embedded spaces or symbols. 85 86$Util::script_version = "1.0"; 87$|=1; 88$SIG{CHLD} = sub { print "SIGCHLD\n" if($DEBUG); }; 89 90my( %opts ) = ( 91 guest => { type => "=s", 92 help => "Name, hostname, or IP address of the Guest, if reporting for a specific guest rather than for a datacentre, cluster or host", 93 required => 0, 94 }, 95 host => { type => "=s", 96 help => "Hostname of the ESX Server (optional). Default is all.", 97 required => 0, 98 }, 99 datacenter => { type => "=s", 100 help => "Name of the Datacenter (optional). Default is all.", 101 required => 0, 102 }, 103 cluster => { type => "=s", 104 help => "Name of the Cluster (optional). Default is all.", 105 required => 0, 106 }, 107 debug => { type => ":i", 108 help => "Debug level.", 109 required => 0, 110 }, 111 generate => { type => "", 112 help => "Set this flag to attempt to generate configuration files for the active type", 113 required => 0, 114 }, 115 mode => { type => "=s", 116 help => "Nagios (default) or MRTG", 117 required => 0, 118 }, 119 report => { type => "=s", 120 help => "Report type: state (default), cpu, memory, disk, net. With optional numerical suffix for different MRTG reports. The suffix is only meaningful for 'cpu' and 'memory' report types. EG: state, memory, memory2, memory3, etc.", 121 required => 0, 122 }, 123 instance => { type => "=s", 124 help => "Disk or Network device name if required. This is similar to include but more efficient if you have a single instance to select. This only has an effect if the report is 'net' or 'disk'.", 125 required => 0, 126 }, 127 include => { type => "=s", 128 help => "Exclude Disk or Network device names. Regexp, default all. Excludes are processed after Includes. This only has an effect if the report is 'net' or 'disk'.", 129 required => 0, 130 }, 131 exclude => { type => "=s", 132 help => "Include Disk or Network device names. Regexp, default none. Excludes are processed after Includes. This only has an effect if the report is 'net' or 'disk'.", 133 required => 0, 134 }, 135 timeout => { type => "=i", 136 help => "Maximum number of seconds for response from VirtualCentre(defined $TIMEOUT).", 137 required => 0, 138 }, 139 nscaserver => { type => "=s", 140 help => "Specify NSCA server name. Default localhost", 141 required => 0, 142 }, 143 nscastrip => { type => "=s", 144 help => "Regular expression to strip from extracted hostname before submitting to NSCA. This is how to convert a FQDN to the Nagios hostname. For example, this could be your site's domain name. Default is nothing.", 145 required => 0, 146 }, 147 tolower => { type => "", 148 help => "Force guest hostnames to all lower case before sending to NSCA.", 149 required => 0, 150 }, 151 canon => { type => "", 152 help => "Canonicalise guest hostname before using nscastrip, tolower and sending to NSCA.", 153 required => 0, 154 }, 155 nsca => { type => "", 156 help => "Enable NSCA mode", 157 required => 0, 158 }, 159 warn => { type => "=i", 160 help => "Warning threshold (Currently: CPU=$WARNCPU\%, MEM=$WARNMEM\%, DISKSPACE=$WARNSPACE GB).", 161 required => 0, 162 }, 163 crit => { type => "=i", 164 help => "Critical threshold (Currently: CPU=$CRITCPU\%, MEM=$CRITMEM\%, DISKSPACE=$CRITSPACE GB).", 165 required => 0, 166 }, 167 warnready => { type => "=i", 168 help => "Warning threshold for CPU Ready time (Currently $WARNREADY\%).", 169 required => 0, 170 }, 171 critready => { type => "=i", 172 help => "Critical threshold for CPU Ready time (Currently $CRITREADY\%).", 173 required => 0, 174 }, 175# warnactive=> { type => "=i", 176# help => "Warning threshold for Active memory (Currently $WARNACTIVE\%).", 177# required => 0, 178# }, 179# critactive=> { type => "=i", 180# help => "Critical threshold for Active memory (Currently $CRITACTIVE\%).", 181# required => 0, 182# }, 183 184); 185 186######################################################################### 187# Error handler 188sub dounknown($) { 189 my($msg) = $_[0]; 190 Util::trace(1, "$msg\n"); 191 Util::disconnect(); 192 if($isnagios) { 193 print "UNKNOWN: $msg$PERF\n"; 194 exit 3; 195 } 196 print "UNKNOWN\nUNKNOWN\n\nERROR: $msg\n"; 197 close NSCAPROC if($havensca); 198 exit 0; 199} 200sub doerror($) { 201 my($msg) = $_[0]; 202 Util::trace(1, "$msg\n"); 203 Util::disconnect(); 204 if($isnagios) { 205 print "ERROR: $msg$PERF\n"; 206 exit 2; 207 } 208 print "UNKNOWN\nUNKNOWN\n\nERROR: $msg\n"; 209 close NSCAPROC if($havensca); 210 exit 0; 211} 212sub canonical($) { 213 my($host) = $_[0]; 214 my($nscastrip); 215 216 print "Processing [$host]\n" if($DEBUG); 217 218 if( Opts::option_is_set('canon') ) { 219 # DNS magic: canonicalise the hostname, if we can 220 my ( $lhname, $aliases, $addrtype, $length, @addrs) 221 = gethostbyname( $host ); 222 print "Canonicalised $host -> $lhname\n" 223 if($DEBUG and $lhname and ($host ne $lhname)); 224 $host = $lhname if($lhname); 225 } 226 227 if( Opts::option_is_set('nscastrip') ) { 228 $nscastrip = Opts::get_option('nscastrip'); 229 print "Stripping [$nscastrip]\n" if($DEBUG); 230 $host =~ s/$nscastrip//i; 231 } 232 $host =~ s/\.$//; 233 $host = lc $host if(Opts::option_is_set('tolower')); 234 235 return $host; 236} 237######################################################################### 238# NSCA client 239sub sendnsca($$$$) { 240 my($h,$s,$stat,$text) = @_; 241 my($DEVNULL) = " >/dev/null 2>&1 "; 242 243 $DEVNULL = "" if($DEBUG or $^O=~/Win/); 244 245 if(!$havensca) { 246 my($NSCAHOST)="localhost"; 247 if( ! -x $NSCA or ! -r $NSCACFG ) { 248 print "Cannot run $NSCA or cannot read $NSCACFG\n" if($DEBUG); 249 return; 250 } 251 if( Opts::option_is_set('nscaserver') ) { 252 $NSCAHOST = Opts::get_option('nscaserver'); 253 } 254 open NSCAPROC,"|$NSCA -H $NSCAHOST -c $NSCACFG $DEVNULL" or do { 255 print "Cannot run: $NSCA -H $NSCAHOST -c $NSCACFG\n" if($DEBUG); 256 return; 257 }; 258 } 259 print "Sending NSCA message.\n" if($DEBUG); 260 print NSCAPROC "$h\t$s\t$stat\t$text\n"; 261 $havensca += 1; 262 if($havensca > $MAXNSCA) { close NSCAPROC; $havensca = 0; } 263} 264######################################################################### 265# Option processing 266sub validate() { 267 my($valid) = 1; 268 if (Opts::option_is_set('instance')) { 269 if (Opts::option_is_set('report')) { 270 if(Opts::get_option('report') !~ /disk|net/ ) { 271 Util::trace(1, "You can only specify an instance if reporting on 'disk' or 'net'.\n" ); 272 $valid = 0; 273 dounknown("You can only specify an instance if reporting on 'disk' or 'net'."); 274 } 275 } else { 276 Util::trace(1, "You can only specify an instance if reporting on 'disk' or 'net'.\n" ); 277 $valid = 0; 278 dounknown("You can only specify an instance if reporting on 'disk' or 'net'."); 279 } 280 } 281# if (Opts::option_is_set('guest')) { 282# if ( Opts::option_is_set('host') or 283# Opts::option_is_set('datacenter') or 284# Opts::option_is_set('cluster')) { 285# Util::trace(1, "\nYou cannot specify a guest name in conjunction with host, datacenter or cluster." ); 286# $valid = 0; 287# } 288# } 289 290 return $valid; 291} 292 293######################################################################### 294sub getalarms($) { 295 my($mo) = $_[0]; 296 my($rv) = ""; 297 my($stat) = 0; 298 my($s); 299 my($aentity,$alarm); 300 my($tas); 301 my($withnsca) = Opts::option_is_set('nsca'); 302 my($nscahost,$nscaservice) = ("",""); 303 my($nscastatus) = 0; 304 305 $tas = $mo->triggeredAlarmState; 306 return(0,"") if(!$tas); 307 foreach my $a (@$tas) { 308 $s = $a->overallStatus->val; 309 next unless($s eq 'red' or $s eq 'yellow'); 310 $stat = 1 if($s eq 'yellow' and $stat < 1); 311 $stat = 2 if($s eq 'red' and $stat < 2); 312 $aentity = Vim::get_view(mo_ref=>$a->entity); 313 $alarm = Vim::get_view(mo_ref=>$a->alarm ); 314 $rv .= " $NEWLINE " if($rv); 315 $rv .= "[".$aentity->name."] " 316 .$alarm->info->name." is ".$a->overallStatus->val; 317 if( $withnsca ) { 318 # obtain FQDN of host 319 if($DEBUG){print "Type=".(ref $aentity)."\n";} 320 $nscahost = ""; 321 if( (ref $aentity) eq 'VirtualMachine' ) { 322 $nscahost = $aentity->guest->hostName; 323 } 324 $nscahost = $aentity->name if(!$nscahost); 325 $nscahost = canonical($nscahost); 326 # deduce servicedesc 327 if( $alarm->info->name =~ /\s(\S+)\s+usage/i ) { 328 $nscaservice = "VMware: Alarms: $1"; 329 } else { $nscaservice = "VMware: Alarms"; } 330 # send NSCA alert 331 if($a->overallStatus->val eq 'red') { $nscastatus=2; } 332 elsif($a->overallStatus->val eq 'yellow') { $nscastatus=1; } 333 elsif($a->overallStatus->val eq 'green') { $nscastatus=0; } 334 else { $nscastatus=3; } 335 print "NSCA: [$nscahost/$nscaservice] is $nscastatus\n" if($DEBUG); 336 sendnsca($nscahost,$nscaservice,$nscastatus, 337 $alarm->info->name." is ".$a->overallStatus->val ); 338 } 339 } 340 341 return ($stat,$rv); 342} 343 344######################################################################### 345sub getcounters($) { 346 my($type) = $_[0]; 347 # we need to identify which counter is which 348 my $perfCounterInfo = $perfmgr->perfCounter; 349 print "Identifying perfcounter IDs\n" if($DEBUG>1); 350 foreach ( @$perfCounterInfo ) { 351 next if($_->groupInfo->key !~ /$type/); # optimise 352 if($_->rollupType->val =~ /average|summation|latest/) { 353 $perfkeys{$_->groupInfo->key.":".$_->nameInfo->key}=$_->key; 354 $perfkeys{$_->key} = $_->groupInfo->key.":".$_->nameInfo->key; 355 } 356 } 357} 358sub getinterval() { 359 # We try to get the interval closest to 5min (the normal polling 360 # interval for MRTG) 361 print "Retrieving interval data...\n" if($DEBUG>1); 362 my $hi = $perfmgr->historicalInterval; 363 foreach (@$hi) { 364 $interval = $_->samplingPeriod if(!$interval); 365 if($_->samplingPeriod == 300) { $interval = 300; last; } 366 } 367 print "Selected interval is: $interval\n" if($DEBUG); 368} 369sub makequery() { 370 @queries = (); 371 foreach my $e ( @$entity ) { 372 if($DEBUG) { 373 if( defined $e->{value} ) { 374 print "Creating query for MORef ".$e->{value}."\n" ; 375 } else { 376 print "Creating query for ".$e->name."\n" ; 377 } 378 } 379 my $perfquery; 380 my (@t) = gmtime(time-300); # 5 mins ago 381 my $start = sprintf("%04d-%02d-%02dT%02d:%02d:00Z", 382 (1900+$t[5]),(1+$t[4]),$t[3],$t[2],$t[1]); 383 @t = gmtime(time); 384 my $end = sprintf("%04d-%02d-%02dT%02d:%02d:00Z", 385 (1900+$t[5]),(1+$t[4]),$t[3],$t[2],$t[1]); 386 print "Start time: $start\nEnd time : $end\n" if($DEBUG); 387 $perfquery = PerfQuerySpec->new(entity => $e, 388 metricId => \@metricids, intervalId => $interval, 389 startTime => $start, endTime => $end ); 390 push @queries,$perfquery; 391 } 392} 393sub runquery() { 394 print "Retrieving data...\n" if($DEBUG); 395 eval { $perfdata = $perfmgr->QueryPerf(querySpec => \@queries); }; 396 if ($@) { 397 if (ref($@) eq 'SoapFault') { 398 if (ref($@->detail) eq 'InvalidArgument') { 399 print "Error: $@\n" if($DEBUG); 400 print "Error: ".$@->detail."\n" if($DEBUG); 401 $MSG="Perf stats not available : Increase Perf logging level to 2 or higher."; 402 $STATUS=3; 403 return 1; 404 } 405 } 406 my($msg) = $@; $msg =~ s/^[\n\s]*//; $msg =~ s/\n/$NEWLINE/g; 407 if($msg =~ /SOAP Fault/i) { 408 print "Error: $msg\n" if($DEBUG); 409# dounknown("CPU Perf stats not available : Increase Perf logging level to 2 or higher."); 410 $MSG="Perf stats not available : Increase Perf logging level to 2 or higher."; 411 $STATUS=3; 412 return 1; 413 } 414# dounknown("Error: $msg"); 415 $MSG="Error: $msg"; 416 $STATUS=3; 417 return 1; 418 } 419 if(! @$perfdata) { 420# dounknown("Perf stats not available at required interval (300s) or invalid instance."); 421 $MSG="Perf stats not available at required interval (300s) or invalid instance."; 422 $STATUS=3; 423 return 1; 424 } 425 return 0; 426} 427 428######################################################################### 429# Various reporting modes 430 431# CPU report: either for avg of hosts(s) or for a VM 432# For nagios mode, we check ready time as well as cpu. 433# For mrtg mode, we give percentage use and ready time 434# MRTG: vm : 435sub cpureport() { 436 my(%results) = (); 437 my(%rcount) = (); 438 my($mycpus) = 0; 439 440 print "Retrieving PerfMgr data\n" if($DEBUG); 441 $perfmgr = Vim::get_view(mo_ref =>$servicecontent->perfManager) 442 if(!$perfmgr); 443 444 getinterval(); 445 446 # now we have the polling interval, we need to 447 # identify the things to retrieve 448 if($vm) { 449 $entity = $vm; # actually a list of refs 450 } elsif( Opts::option_is_set('host') ) { 451 $entity = $begin; # actually a list of refs 452 } else { 453 print "Retrieving list of hosts...\n" if($DEBUG); 454# $entity = Vim::find_entity_views (view_type => 'HostSystem', 455# begin_entity => @$begin); 456 my @e = (); 457 my $view_type = 'HostSystem'; 458 print "Making new propertyspec\n" if($DEBUG); 459 my $property_spec = PropertySpec->new(all => 0, 460 type => $view_type->get_backing_type(), pathSet => [] 461 ); 462 print "Making new filterspec from ".(ref $view_type)."\n" if($DEBUG); 463 my $property_filter_spec = 464 $view_type->get_search_filter_spec(@$begin, [$property_spec]); 465 print "Retrieving vim_service\n" if($DEBUG); 466 my $service = Vim::get_vim_service(); 467 print "Retrieving properties from ".(ref $service)."\n" if($DEBUG); 468 my $obj_contents = $service->RetrieveProperties( 469 _this => $servicecontent->propertyCollector, 470 specSet => $property_filter_spec); 471 print "Checking faults on ".(ref $obj_contents)."\n" if($DEBUG); 472 my $result = Util::check_fault($obj_contents); 473 foreach ( @$result ) { push @e, $_->obj; } 474 $entity = \@e; 475 } 476 if($DEBUG) { 477 print "Processing entities:\n"; 478 foreach my $ee ( @$entity ) { 479 if( defined $ee->{value} ) { 480 print " ".$ee->{value}."\n" ; 481 } else { 482 print " ".$ee->name."\n" ; 483 } 484 } 485 } 486 487 # we need to identify which is the CPU usage counter. 488 getcounters('cpu|cluster|mem'); 489 490 # now we know the counter numbers (although they may not be active!) 491 # which we retrieve depends on if we're monitoring hosts(s) or a VM 492 # hosts we get cpu:usage, cpu:usagemhz 493 # vms we get cpu:usage, cpu:{used,ready,system,wait} 494 # if in MRTG mode, we get other stats as well. 495 496 # we can probably optimise this in MRTG mode to only get the ones 497 # we want to graph this time 498 foreach ( qw/cpu:usage mem:usage cpu:usagemhz/ ) { 499 push @metricids, PerfMetricId->new (counterId => $perfkeys{$_}, 500 instance => '' ) 501 if(defined $perfkeys{$_}); 502 } 503 if($vm) { 504 foreach my $k ( qw/cpu:used cpu:ready cpu:system cpu:wait/ ) { 505 # We're asking for data for 4 vCPUs, although probably only 506 # 1 of them will actually be there and return data. 507 foreach my $vcpu ( 1..$MAXGUESTCPUS ) { 508 if(defined $perfkeys{$k}) { 509 push @metricids, PerfMetricId->new ( 510 counterId => $perfkeys{$k}, instance => ($vcpu-1)) ; 511 } 512 } 513 } 514 } else { 515 foreach ( qw/rescpu:actav5 clusterServices:cpufairness clusterServices:memfairness/ ) { 516 push @metricids, PerfMetricId->new (counterId => $perfkeys{$_}, 517 instance => '') if(defined $perfkeys{$_}); 518 } 519 } 520 foreach ( @metricids ) { 521 print $_->counterId.": ".$perfkeys{$_->counterId}."(" 522 .$_->instance.")\n" if($DEBUG>1); 523 $rcount{$_} = 0; 524 $results{$_} = 0; 525 } 526 527 makequery(); 528 return if(runquery()); 529 530 print "Perfstats retrieved...\n" if($DEBUG); 531 my($idx) = 0; 532 foreach my $pd (@$perfdata) { 533 if($DEBUG) { 534 if( defined $queries[$idx]->entity->{value} ) { 535 print "Results for ".$queries[$idx]->entity->{value}."\n" 536 } else { 537 print "Results for ".$queries[$idx]->entity->name."\n" 538 } 539 } 540 my $time_stamps = $pd->sampleInfo; 541 my $values = $pd->value; 542 next if(!$time_stamps or !$values); 543 my $nval = $#$time_stamps; 544 next if($nval<0); 545 print "Perfdata object: ".$time_stamps->[$nval]->timestamp."\n" if($DEBUG); 546 foreach my $v (@$values) { 547 print $perfkeys{$v->id->counterId}."=".$v->value->[$nval]."\n" 548 if($DEBUG>1); 549 $rcount{$v->id->counterId} += 1; 550 $results{$v->id->counterId} += $v->value->[$nval]; 551 } 552 $idx+=1; 553 } 554 # Now, we have a total of the various statistics. Some may need 555 # to be averages, some can remain totals. Basically, the %ages 556 # need to be averaged and the rest can remain as totals. 557 foreach ( qw/rescpu:actav5 clusterServices:cpufairness clusterServices:memfairness mem:usage cpu:usage/ ) { 558 next if(!defined $results{$perfkeys{$_}}); 559 $results{$perfkeys{$_}} /= $rcount{$perfkeys{$_}} 560 if($rcount{$perfkeys{$_}}); 561 } 562 # also, usage is a special case 563 $results{$perfkeys{'cpu:usage'}} /= 100 564 if(defined $results{$perfkeys{'cpu:usage'}}); 565 $results{$perfkeys{'mem:usage'}} /= 100 566 if(defined $results{$perfkeys{'mem:usage'}}); 567 if($vm) { 568 # These are in milliseconds total per interval 569 # we also divide by the number of CPUs to get the percentage... 570 # we convert to percentages by % = value/ncpus/interval/1000*100% 571 # sys + wait + ready + used = 100% 572 foreach ( qw/cpu:used cpu:ready cpu:system cpu:wait/ ) { 573 next if(!defined $results{$perfkeys{$_}}); 574 print "Perf $_ = ".$results{$perfkeys{$_}} 575 ." interval=".($interval*10) 576 ." count=".$rcount{$perfkeys{$_}}."\n" if($DEBUG>1); 577 $results{$perfkeys{$_}} /= ($interval*10); 578 $results{$perfkeys{$_}} /= $rcount{$perfkeys{$_}} 579 if($rcount{$perfkeys{$_}}); 580 } 581 $mycpus = $rcount{$perfkeys{'cpu:used'}} 582 if($rcount{$perfkeys{'cpu:used'}}); 583 } 584 585 # Finally, we have all the results! Now we have to do some thresholding 586 # for Nagios, or get the correct values for MRTG 587 # At this point, we could be looking at data for a host, a group of hosts, 588 # or a guest. 589 if($isnagios) { 590 my($cpuavg) = $results{$perfkeys{'cpu:usage'}}; 591 if(defined $cpuavg) { 592 $PERF .= "cpu=$cpuavg\%;$WARNCPU;$CRITCPU;0;100 "; 593 } else { 594 $PERF .= "cpu=;$WARNCPU;$CRITCPU;0;100 "; 595 } 596 if(!defined $cpuavg) { 597 $STATUS = 3; 598 $MSG = "CPU usage is unknown?"; 599 } elsif($cpuavg > $CRITCPU) { 600 $STATUS = 2; 601 $MSG = "CRIT: CPU usage at ".(int($cpuavg*100)/100)."\% (need more CPU allocation?)"; 602 } elsif($cpuavg > $WARNCPU) { 603 $STATUS = 1; 604 $MSG = "WARN: CPU usage at ".(int($cpuavg*100)/100)."\%"; 605 } else { 606 $STATUS = 0; 607 $MSG = "CPU usage at ".(int($cpuavg*100)/100)."\%"; 608 } 609 if($vm) { 610 $MSG .= "$NEWLINE Guest CPUs: $mycpus" if($mycpus); 611 if( defined $results{$perfkeys{'cpu:used'}} ) { 612 my $cpuu = int($results{$perfkeys{'cpu:used'}}*100)/100; 613 my $cpur = int($results{$perfkeys{'cpu:ready'}}*100)/100; 614 my $cpus = int($results{$perfkeys{'cpu:system'}}*100)/100; 615 $PERF .= "ready=$cpur\%;$WARNREADY;$CRITREADY;0;100 "; 616 $PERF .= "user=$cpuu\%;;;0;100 "; 617 $PERF .= "sys=$cpus\%;;;0;100 "; 618 if( $cpur > $CRITREADY ) { 619 $STATUS = 2; 620 $MSG .= "$NEWLINE CRIT: Ready time is $cpur\% (Cluster is overloaded, or guest has too much I/O)"; 621 } elsif( $cpur > $WARNREADY ) { 622 $STATUS = 1 if($STATUS < 1); 623 $MSG .= "$NEWLINE WARN: Ready time is $cpur\%"; 624# } else { 625 } 626 $MSG .= "$NEWLINE CPU stats: Used/System/Ready = $cpuu\%/$cpus\%/$cpur\%"; 627 } else { 628 $MSG .= "$NEWLINE No detailed CPU statistics available (raise logging level to 2)"; 629 } 630 } else { 631 my $cpufair = int($results{$perfkeys{'clusterServices:cpufairness'}}*100)/100; 632 $PERF .= "fair=$cpufair;$WARNFAIR;$CRITFAIR;0; " if($cpufair); 633 if( !$cpufair ) { 634 $MSG .= "$NEWLINE (No CPU fairness data)"; 635 } elsif( $cpufair < $CRITFAIR ) { 636 $MSG .= "$NEWLINE CRIT: CPU Fairness at $cpufair\% (check DRS or rebalance guest allocation in cluster)"; 637 $STATUS = 2; 638 } elsif( $cpufair < $WARNFAIR ) { 639 $MSG .= "$NEWLINE WARN: CPU Fairness at $cpufair\%"; 640 $STATUS = 1 if($STATUS < 1); 641 } else { $MSG .= "$NEWLINE CPU Fairness at $cpufair\%"; } 642 if($#$entity>0) { 643 # multiple hosts 644 my(@f) = (); my($avgf)=0; 645 my($sdf) = 0; 646 foreach ( @$entity ) { 647 next if(defined $_->{value}); # its a moref 648 $avgf += $_->summary->quickStats->distributedCpuFairness; 649 push @f,$_->summary->quickStats->distributedCpuFairness; 650 } 651 if($#f > -1) { 652 $avgf /= ( $#f + 1 ); 653 foreach (@f) { $sdf += ($_-$avgf)*($_-$avgf); } 654 $sdf = sqrt($sdf)/1000; 655 $MSG .= "$NEWLINE Distributed fairness SD is ".(int($sdf*100)/100); 656 } 657 } 658 } 659 } else { 660 $A = $results{$perfkeys{'cpu:usage'}}; 661 $B = $results{$perfkeys{'mem:usage'}}; 662 $A = "UNKNOWN" if(!defined $A); 663 $B = "UNKNOWN" if(!defined $B); 664 $MSG = "Avg CPU usage: ".(int($A*100)/100) 665 ."\%, Avg Memory usage: ".(int($B*100)/100)."\%"; 666 if($mode == 1) { 667 if($vm) { 668 $A = $results{$perfkeys{'cpu:used'}}; 669 $B = $results{$perfkeys{'cpu:ready'}}; 670 $A = "UNKNOWN" if(!defined $A); 671 $B = "UNKNOWN" if(!defined $B); 672 $MSG = "CPU Used: ".(int($A*100)/100) 673 ."\%, Ready: ".(int($B*100)/100)."\%"; 674 } else { 675 $A = $results{$perfkeys{'clusterServices:cpufairness'}}; 676 $B = $results{$perfkeys{'clusterServices:memfairness'}}; 677 $A = "UNKNOWN" if(!defined $A); 678 $B = "UNKNOWN" if(!defined $B); 679 $MSG = "CPU fairness: ".(int($A*100)/100) 680 ."\%, MEM fairness: ".(int($B*100)/100)."\%"; 681 } 682 } elsif($mode == 2) { 683 if($vm) { 684 $A = $results{$perfkeys{'cpu:system'}}; 685 $B = $results{$perfkeys{'cpu:wait'}}; 686 $A = "UNKNOWN" if(!defined $A); 687 $B = "UNKNOWN" if(!defined $B); 688 $MSG = "CPU System: ".(int($A*100)/100) 689 ."\%, Wait: ".(int($B*100)/100)."\%"; 690 } 691 } 692 } 693} 694 695 696# Memory report: either for hosts(s) or for a VM 697sub memreport() { 698 my(%results) = (); 699 my(%rcount) = (); 700 701 print "Running memory report\n" if($DEBUG); 702 print "Retrieving PerfMgr data\n" if($DEBUG); 703 $perfmgr = Vim::get_view(mo_ref => $servicecontent->perfManager) 704 if(!$perfmgr); 705 706 getinterval(); 707 708 # now we have the polling interval, we need to 709 # identify the things to retrieve 710 if($vm) { 711 $entity = $vm; # actually a list of refs 712 } elsif( Opts::option_is_set('host') ) { 713 $entity = $begin; # actually a list of refs 714 } else { 715 print "Retrieving list of hosts...\n" if($DEBUG); 716# $entity = Vim::find_entity_views (view_type => 'HostSystem', 717# begin_entity => @$begin); 718 my @e = (); 719 my $view_type = 'HostSystem'; 720 print "Making new propertyspec\n" if($DEBUG); 721 my $property_spec = PropertySpec->new(all => 0, 722 type => $view_type->get_backing_type(), pathSet => [] 723 ); 724 print "Making new filterspec from ".(ref $view_type)."\n" if($DEBUG); 725 my $property_filter_spec = 726 $view_type->get_search_filter_spec(@$begin, [$property_spec]); 727 print "Retrieving vim_service\n" if($DEBUG); 728 my $service = Vim::get_vim_service(); 729 print "Retrieving properties from ".(ref $service)."\n" if($DEBUG); 730 my $obj_contents = $service->RetrieveProperties( 731 _this => $servicecontent->propertyCollector, 732 specSet => $property_filter_spec); 733 print "Checking faults on ".(ref $obj_contents)."\n" if($DEBUG); 734 my $result = Util::check_fault($obj_contents); 735 foreach ( @$result ) { push @e, $_->obj; } 736 $entity = \@e; 737 } 738 if($DEBUG) { 739 print "Processing entities:\n"; 740 foreach my $ee ( @$entity ) { 741 if( defined $ee->{value} ) { 742 print " ".$ee->{value}."\n" ; 743 } else { 744 print " ".$ee->name."\n" ; 745 } 746 } 747 } 748 749 getcounters('mem|cluster|cpu'); 750 751 # now we know the counter numbers (although they may not be active!) 752 # which we retrieve depends on if we're monitoring hosts(s) or a VM 753 # we can probably optimise this in MRTG mode to only get the ones 754 # we want to graph this time 755 foreach ( qw/cpu:usage mem:usage/ ) { 756 push @metricids, PerfMetricId->new (counterId => $perfkeys{$_}, 757 instance => '' ) 758 if(defined $perfkeys{$_}); 759 } 760 if($vm) { 761 foreach ( qw/mem:granted mem:vmmemctl mem:active mem:shared mem:swapped mem:overhead mem:consumed mem:zero/ ) { 762 push @metricids, PerfMetricId->new (counterId => $perfkeys{$_}, 763 instance => '' ) 764 if(defined $perfkeys{$_}); 765 } 766 } else { 767 foreach ( qw/clusterServices:memfairness clusterServices:cpufairness mem:swapused/ ) { 768 push @metricids, PerfMetricId->new (counterId => $perfkeys{$_}, 769 instance => '' ) 770 if(defined $perfkeys{$_}); 771 } 772 } 773 foreach ( @metricids ) { 774 print $_->counterId.": ".$perfkeys{$_->counterId}."(" 775 .$_->instance.")\n" if($DEBUG>1); 776 $rcount{$_} = 0; 777 $results{$_} = 0; 778 } 779 780 makequery(); 781 return if(runquery()); 782 783 print "Perfstats retrieved...\n" if($DEBUG); 784 my($idx) = 0; 785 foreach my $pd (@$perfdata) { 786 if($DEBUG) { 787 if(defined $queries[$idx]->entity->{value}) { 788 print "Results for ".$queries[$idx]->entity->{value}."\n"; 789 } else { 790 print "Results for ".$queries[$idx]->entity->name."\n"; 791 } 792 } 793 my $time_stamps = $pd->sampleInfo; 794 my $values = $pd->value; 795 next if(!$time_stamps or !$values); 796 my $nval = $#$time_stamps; 797 next if($nval<0); 798 print "Perfdata object: ".$time_stamps->[$nval]->timestamp."\n" if($DEBUG); 799 foreach my $v (@$values) { 800 print $perfkeys{$v->id->counterId}."=".$v->value->[$nval]."\n" 801 if($DEBUG>1); 802 $rcount{$v->id->counterId} += 1; 803 $results{$v->id->counterId} += $v->value->[$nval]; 804 } 805 $idx+=1; 806 } 807 # Now, we have a total of the various statistics. Some may need 808 # to be averages, some can remain totals. Basically, the %ages 809 # need to be averaged and the rest can remain as totals. 810 foreach ( qw/clusterServices:cpufairness clusterServices:memfairness cpu:usage mem:usage/ ) { 811 next if(!defined $results{$perfkeys{$_}}); 812 $results{$perfkeys{$_}} /= $rcount{$perfkeys{$_}} 813 if($rcount{$perfkeys{$_}}); 814 } 815 # also, usage is a special case as it is in hundredths of a % 816 $results{$perfkeys{'mem:usage'}} /= 100 817 if(defined $results{$perfkeys{'mem:usage'}}); 818 $results{$perfkeys{'cpu:usage'}} /= 100 819 if(defined $results{$perfkeys{'cpu:usage'}}); 820 foreach ( qw/mem:granted mem:vmmemctl mem:active mem:shared mem:swapped mem:overhead mem:consumed/ ) { 821 next if(!defined $results{$perfkeys{$_}}); 822 $results{$perfkeys{$_}} *= 1024; 823 } 824 825 # Finally, we have all the results! Now we have to do some thresholding 826 # for Nagios, or get the correct values for MRTG 827 # At this point, we could be looking at data for a host, a group of hosts, 828 # or a guest. 829 if($isnagios) { 830 my($memavg) = int($results{$perfkeys{'mem:usage'}}*100)/100; 831 if(defined $memavg) { 832 $PERF.="mem=$memavg\%;$WARNMEM;$CRITMEM;0;100 "; 833 } else { 834 $PERF.="mem=;$WARNMEM;$CRITMEM;0;100 "; 835 } 836 if(!defined $memavg) { 837 $STATUS = 3; 838 $MSG = "Memory usage is unknown?"; 839 } elsif($memavg > $CRITMEM) { 840 $STATUS = 2; 841 $MSG = "CRIT: Memory usage at $memavg\% (Insufficient memory for ESX server or Guest)"; 842 } elsif($memavg > $WARNMEM) { 843 $STATUS = 1; 844 $MSG = "WARN: Memory usage at $memavg\%"; 845 } else { 846 $STATUS = 0; 847 $MSG = "Memory usage at $memavg\%"; 848 } 849 if($vm) { 850 # here we check for swap activity, vmmemctl and swapped too high 851 my($actvpc,$pvtpc,$shrpc,$balloonpc,$swappc) = (0,0,0,0,0); 852 my($configmem) = 0; 853 foreach my $v ( @$vm ) { 854 $configmem += $v->runtime->maxMemoryUsage; 855 } 856 $MSG .= "$NEWLINE GuestMemory: $configmem MB" if($configmem); 857 $configmem *= 1024*1024; 858 if($configmem) { 859 $balloonpc = int($results{$perfkeys{'mem:vmmemctl'}}/$configmem*10000)/100; 860 $swappc = int($results{$perfkeys{'mem:swapped'}}/$configmem*10000)/100; 861 $pvtpc = int(($configmem-$results{$perfkeys{'mem:vmmemctl'}}-$results{$perfkeys{'mem:swapped'}}-$results{$perfkeys{'mem:shared'}})/$configmem*10000)/100; 862 $pvtpc = 0 if($pvtpc<0); 863 $shrpc = int($results{$perfkeys{'mem:shared'}}/$configmem*10000)/100; 864 $MSG .= "$NEWLINE Memory split pvt/shr/bal/swp = $pvtpc\%/$shrpc\%/$balloonpc\%/$swappc\%"; 865# $actvpc = int($results{$perfkeys{'mem:active'}}/$configmem*10000)/100; 866 $MSG .= "$NEWLINE Maybe guest has too much memory?" if($balloonpc>5 and $memavg<25); 867 $PERF.="balloon=$balloonpc\%;;;0;100 "; 868 $PERF.="swap=$swappc\%;;;0;100 "; 869 $PERF.="private=$pvtpc\%;;;0;100 "; 870 $PERF.="shared=$shrpc\%;;;0;100 "; 871# usage = active/total x 100% so we already have this 872# $PERF.="active=$actvpc\%;$WARNACTIVE;$CRITACTIVE;0;100 "; 873# if($actvpc > $CRITACTIVE) { 874# $STATUS = 2; 875# $MSG .= "$NEWLINE CRIT: Memory activity at $actvpc\% (need more memory in guest)"; 876# } elsif ($actvpc > $WARNACTIVE) { 877# $STATUS = 1 if($STATUS<1); 878# $MSG .= "$NEWLINE WARN: Memory activity at $actvpc\%"; 879# } else { 880# $MSG .= "$NEWLINE Memory activity at $actvpc\%"; 881# } 882 } else { 883 $MSG .= "$NEWLINE No detailed Memory stats available (raise logging level to 2)"; 884 } 885 } else { 886 my $memfair = int($results{$perfkeys{'clusterServices:memfairness'}}*100)/100; 887 $PERF.="fair=$memfair;$WARNFAIR;$CRITFAIR;0; " if($memfair); 888 if( !$memfair ) { 889 $MSG .= "$NEWLINE (No MEM fairness data)"; 890 } elsif( $memfair < $CRITFAIR ) { 891 $MSG .= "$NEWLINE CRIT: MEM Fairness at $memfair\% (Check DRS or manually rebalance cluster)"; 892 $STATUS = 2; 893 } elsif( $memfair < $WARNFAIR ) { 894 $MSG .= "$NEWLINE WARN: MEM Fairness at $memfair\%"; 895 $STATUS = 1 if($STATUS < 1); 896 } else { $MSG .= "$NEWLINE MEM Fairness at $memfair\%"; } 897 if($#$entity>0) { 898 # multiple hosts 899 my(@f) = (); my($avgf)=0; 900 my($sdf) = 0; 901 foreach ( @$entity ) { 902 next if(defined $_->{value}); # its a moref 903 $avgf += $_->summary->quickStats->distributedMemoryFairness; 904 push @f,$_->summary->quickStats->distributedMemoryFairness; 905 } 906 if($#f > -1) { 907 $avgf /= ( $#f + 1 ); 908 foreach (@f) { $sdf += ($_-$avgf)*($_-$avgf); } 909 $sdf = sqrt($sdf)/1000; 910 $MSG .= "$NEWLINE Distributed fairness SD is ".(int($sdf*100)/100); 911 } 912 } 913 } 914 } else { ### MRTG mode... 915 $B = $results{$perfkeys{'cpu:usage'}}; 916 $A = $results{$perfkeys{'mem:usage'}}; 917 $A = "UNKNOWN" if(!defined $A); 918 $B = "UNKNOWN" if(!defined $B); 919 $MSG = "Avg Memory usage: ".(int($A*100)/100) 920 ."\%, Avg CPU usage: ".(int($B*100)/100)."\%"; 921 if($mode == 1) { 922 if($vm) { 923 $A = $results{$perfkeys{'mem:active'}}; 924 $B = $results{$perfkeys{'mem:granted'}}; 925 $A = "UNKNOWN" if(!defined $A); 926 $B = "UNKNOWN" if(!defined $B); 927 $MSG = "Memory active: ".(int($A/1024000)) 928 ."MB, from granted: ".(int($B/1024000))."MB"; 929 } else { 930 $A = $results{$perfkeys{'clusterServices:cpufairness'}}; 931 $B = $results{$perfkeys{'clusterServices:memfairness'}}; 932 $A = "UNKNOWN" if(!defined $A); 933 $B = "UNKNOWN" if(!defined $B); 934 $MSG = "CPU fairness: ".(int($A*100)/100) 935 ."\%, MEM fairness: ".(int($B*100)/100)."\%"; 936 } 937 } elsif($mode == 2) { 938 if($vm) { 939 $A = $B = "UNKNOWN"; 940 $A = $results{$perfkeys{'mem:consumed'}}/$results{$perfkeys{'mem:granted'}}*100 if($results{$perfkeys{'mem:granted'}}); 941 $B = $results{$perfkeys{'mem:shared'}}/$results{$perfkeys{'mem:granted'}}*100 if($results{$perfkeys{'mem:granted'}}); 942 $A = "UNKNOWN" if(!defined $A); 943 $B = "UNKNOWN" if(!defined $B); 944 $MSG = "Memory private: ".(int($A*100)/100) 945 ."\%, shared: ".(int($B*100)/100)."\%"; 946 } 947 } elsif($mode == 3) { 948 if($vm) { 949 $A = $B = "UNKNOWN"; 950 $A = $results{$perfkeys{'mem:vmmemctl'}}/$results{$perfkeys{'mem:granted'}}*100 if($results{$perfkeys{'mem:granted'}}); 951 $B = $results{$perfkeys{'mem:swapped'}}/$results{$perfkeys{'mem:granted'}}*100 if($results{$perfkeys{'mem:granted'}}); 952 $A = "UNKNOWN" if(!defined $A); 953 $B = "UNKNOWN" if(!defined $B); 954 $MSG = "Memory balloon: ".(int($A*100)/100) 955 ."\%, swapped: ".(int($B*100)/100)."\%"; 956 } 957 } 958 } 959} 960 961# Disk space report 962sub diskreport() { 963 my($totspace,$freespace); 964 my($disks); 965 my(@dsa) = (); 966 my($instance,$include,$exclude) = ('','',''); 967 my($cnt) = 0; 968 969 print "Running disk report\n" if($DEBUG); 970 $instance = Opts::get_option('instance') 971 if(Opts::option_is_set('instance')); 972 $include = Opts::get_option('include') 973 if(Opts::option_is_set('include')); 974 $exclude = Opts::get_option('exclude') 975 if(Opts::option_is_set('exclude')); 976 print "Identifying datastores...\n" if($DEBUG); 977 foreach(@$begin) { 978 print "N=".$_->name."\n" if($DEBUG); 979 eval { 980 push @dsa,@{$_->datastore} if(defined $_->datastore); 981 }; 982 if($@) { 983 my $children; 984 my( $r ) = $_->childEntity; 985 print "Identifying children for ".$_->name."\n" if($DEBUG); 986 $children = Vim::get_views( mo_ref_array => $r ); 987 foreach (@$children) { 988 print " N=".$_->name."\n" if($DEBUG); 989 push @dsa,@{$_->datastore} if(defined $_->datastore); 990 } 991 } 992 } 993 print "Extracting disks\n" if($DEBUG); 994 $disks = Vim::get_views( mo_ref_array => \@dsa ); 995 if(!@$disks) { 996 if($instance) { 997 dounknown("Disk instance $instance not found."); 998 } elsif($include or $exclude) { 999 dounknown("No matching disk instances found."); 1000 } else { 1001 dounknown("No datastores found."); 1002 } 1003 } 1004 $MSG = ""; $STATUS = 0; $totspace = $freespace = 0; 1005 foreach my $ds ( @$disks ) { 1006 print "Checking ".$ds->info->name."\n" if($DEBUG>1); 1007 next if($instance and $ds->info->name ne $instance); 1008 next if($include and $ds->info->name !~ /$include/); 1009 next if($exclude and $ds->info->name =~ /$exclude/); 1010 $cnt+=1; 1011 if($isnagios) { 1012 $totspace = $ds->summary->capacity; 1013 $freespace = $ds->summary->freeSpace; 1014 if($freespace < $CRITSPACE*1024000000) { 1015 $STATUS = 2; 1016 $MSG .= "$NEWLINE " if($MSG); 1017 $MSG .= "[C] ".$ds->info->name.": ".int($freespace/1024000) 1018 .($totspace? 1019 ("MB (".(int($freespace/$totspace*1000)/10)."\%) free") 1020 :""); 1021 } elsif($freespace < $WARNSPACE*1024000000) { 1022 $STATUS = 1 if($STATUS<2); 1023 $MSG .= "$NEWLINE " if($MSG); 1024 $MSG .= "[W] ".$ds->info->name.": ".int($freespace/1024000) 1025 .($totspace? 1026 ("MB (".(int($freespace/$totspace*1000)/10)."\%) free") 1027 :""); 1028 } 1029 } else { 1030 $totspace += $ds->summary->capacity; 1031 $freespace += $ds->summary->freeSpace; 1032 print "So far: ".int($totspace/1024000000)."GB ".$ds->info->name."\n" if($DEBUG); 1033 } 1034 } 1035 if(!$cnt) { 1036 if($instance) { dounknown("Disk instance $instance not found."); } 1037 dounknown("No matching disk instances found."); 1038 } 1039 if($isnagios) { 1040 $MSG = "All filesystems within parameters" if(!$MSG); 1041 $PERF .= "free=$freespace;;;0; total=$totspace;;;0; "; 1042 } else { 1043 # For MRTG, we show space used, so that the peak is more meaningful 1044 ($A,$B) = (($totspace-$freespace),$totspace); 1045 $MSG = "All datastores: ".int($A/1024000000)."GB used from ".int($totspace/1024000000)."GB"; 1046 } 1047} 1048 1049# Network interface report 1050sub netreport() { 1051} 1052 1053# State report 1054sub statereport() { 1055 my($numup) = 0; 1056 my($totvms) = 0; 1057 my(@errs) = (); 1058 my(@statobj) = (); 1059 print "Running state report\n" if($DEBUG); 1060 if(!$vm) { 1061 if(!$isnagios) { 1062 print "Extracting VMs\n" if($DEBUG); 1063 $vm = Vim::find_entity_views (view_type => 'VirtualMachine', 1064 begin_entity => @$begin); 1065 } 1066 push @statobj,@$begin; 1067 } else { 1068 push @statobj,@$vm; 1069 } 1070 $STATUS = 0; 1071 if($isnagios) { 1072 if($vm) { 1073 # we're checking a VM, so need to check if it is up 1074 foreach my $v (@$vm) { 1075 if( $v->runtime->powerState->val ne 'poweredOn' ) { 1076 $MSG .= "$NEWLINE " if($MSG); 1077 $MSG .= "Guest ".$v->name." is " 1078 .$v->runtime->powerState->val; 1079 $STATUS = 2; 1080 } 1081 } 1082 } 1083 print "Checking alarms...\n" if($DEBUG); 1084 foreach ( @statobj ) { 1085 my($s,$rv) = getalarms($_); 1086 $STATUS = $s if($s>$STATUS); 1087 if($rv) { $MSG.="$NEWLINE " if($MSG); $MSG.=$rv; } 1088 } 1089 if(!$MSG) { $MSG = "No alarms detected."; } 1090 } else { 1091 print "Processing...\n" if($DEBUG); 1092 foreach my $v ( @$vm ) { 1093 print "\rProcessing ".$v->name if ($DEBUG); 1094 $totvms += 1; 1095 $numup += 1 if( $v->runtime->powerState->val eq 'poweredOn' ); 1096 } 1097 print "\rDone. \n" if($DEBUG); 1098 ($A,$B)=($numup,$totvms); 1099 $MSG = "$numup guests from $totvms are running"; 1100 } 1101 print "$MSG\n" if($DEBUG); 1102} 1103######################################################################### 1104# Generate appropriate configuration files? 1105sub makenagioscfg() { 1106 my($cmdopt) = ""; 1107 my($hostobj) = "VMWARE"; 1108 my($address) = "put your VirtualCentre IP address in here"; 1109 my($alias) = ""; 1110 1111 print <<_END_ 1112# This is an autogenerated Nagios configuration file 1113# You may wish to modify it before using! 1114# 1115# This is an example of the required checkcommand definition: 1116#define command { 1117# command_name check_vmware 1118# command_line \$USER1\$/check_vmware --mode=nagios --config=\$USER1\$/vmware.cfg \$ARG1\$ 1119#} 1120# 1121# You also need to have a service template called 'generic-service' 1122# 1123# The vmware.cfg file must contain the necessary lines to define your 1124# VirtualCentre server and authentication parameters: 1125#VI_PASSWORD=secretpassword 1126#VI_SERVER=vmware-vc-server.auckland.ac.nz 1127#VI_USERNAME=adminuser 1128# 1129_END_ 1130; 1131 1132 # Now, if we have a guest defined, then we output a guest 1133 # configuration. Similarly for host and farm. 1134 if ( Opts::option_is_set('guest') ) { 1135 $cmdopt .= "--guest=\"".Opts::get_option('guest')."\" "; 1136 $hostobj = Opts::get_option('guest'); 1137 } 1138 if ( Opts::option_is_set('datacenter') ) { 1139 $cmdopt .= "--datacenter=\"".Opts::get_option('datacenter')."\" "; 1140 $hostobj = Opts::get_option('datacenter'); 1141 $alias = "VMWare datacentre ".Opts::get_option('datacenter'); 1142 } 1143 if ( Opts::option_is_set('cluster') ) { 1144 $cmdopt .= "--cluster=\"".Opts::get_option('cluster')."\" "; 1145 $hostobj = Opts::get_option('cluster'); 1146 $alias = "VMWare cluster ".Opts::get_option('cluster'); 1147 } 1148 if ( Opts::option_is_set('host') ) { 1149 $cmdopt .= "--host=\"".Opts::get_option('host')."\" "; 1150 $hostobj = Opts::get_option('host'); 1151 $alias = "VMWare server ".Opts::get_option('host'); 1152 $address = Opts::get_option('host'); 1153 } 1154 1155 if ( Opts::option_is_set('guest') ) { 1156 print <<_END_ 1157# Check guest status 1158define service { 1159 use generic-service 1160 host_name $hostobj 1161 service_description VMWare: Status 1162 check_command check_vmware!$cmdopt --report=status 1163} 1164# Check guest memory 1165define service { 1166 use generic-service 1167 host_name $hostobj 1168 service_description VMWare: Memory 1169 check_command check_vmware!$cmdopt --report=memory 1170} 1171# Check guest CPU 1172define service { 1173 use generic-service 1174 host_name $hostobj 1175 service_description VMWare: CPU 1176 check_command check_vmware!$cmdopt --report=cpu 1177} 1178_END_ 1179; 1180 } else { 1181 print <<_END_ 1182# Dummy Host object for the datacenter/cluster, or ESX server host object 1183define host { 1184 use generic-host 1185 host_name $hostobj 1186 alias $alias 1187 address $address 1188} 1189# Check host/cluster/datacenter status 1190define service { 1191 use generic-service 1192 host_name $hostobj 1193 service_description VMWare: Status 1194 check_command check_vmware!$cmdopt --report=status 1195} 1196# Check host/cluster/datacenter memory 1197define service { 1198 use generic-service 1199 host_name $hostobj 1200 service_description VMWare: Memory 1201 check_command check_vmware!$cmdopt --report=memory 1202} 1203# Check host/cluster/datacenter CPU 1204define service { 1205 use generic-service 1206 host_name $hostobj 1207 service_description VMWare: CPU 1208 check_command check_vmware!$cmdopt --report=cpu 1209} 1210# Check host/cluster/datacenter disk space 1211define service { 1212 use generic-service 1213 host_name $hostobj 1214 service_description VMWare: Datastores 1215 check_command check_vmware!$cmdopt --report=disk 1216} 1217_END_ 1218; 1219 } 1220} 1221sub makemrtgcfg() { 1222 my($cmdopt) = ""; 1223 my($hostobj); 1224 1225 print <<_END_ 1226# This is an autogenerated MRTG configuration file 1227# You may wish to modify it before using! 1228# 1229# The vmware.cfg file must contain the necessary lines to define your 1230# VirtualCentre server and authentication parameters: 1231#VI_PASSWORD=secretpassword 1232#VI_SERVER=vmware-vc-server.auckland.ac.nz 1233#VI_USERNAME=adminuser 1234# 1235_END_ 1236; 1237 $cmdopt = "--config=/usr/local/etc/vmware.cfg "; 1238 1239 # Now, if we have a guest defined, then we output a guest 1240 # configuration. Similarly for host and farm. 1241 if ( Opts::option_is_set('guest') ) { 1242 $cmdopt .= "--guest=\"".Opts::get_option('guest')."\" "; 1243 $hostobj = Opts::get_option('guest'); 1244 } 1245 if ( Opts::option_is_set('datacenter') ) { 1246 $cmdopt .= "--datacenter=\"".Opts::get_option('datacenter')."\" "; 1247 $hostobj = Opts::get_option('datacenter'); 1248 } 1249 if ( Opts::option_is_set('cluster') ) { 1250 $cmdopt .= "--cluster=\"".Opts::get_option('cluster')."\" "; 1251 $hostobj = Opts::get_option('cluster'); 1252 } 1253 if ( Opts::option_is_set('host') ) { 1254 $cmdopt .= "--host=\"".Opts::get_option('host')."\" "; 1255 $hostobj = Opts::get_option('host'); 1256 } 1257 1258 if ( Opts::option_is_set('guest') ) { 1259 print <<_END_ 1260# VMWare guest 1261# graph the CPU and Memory usage figures, plus detailed memory breakdown 1262# You may wish to add a --config= option to the check_vmware.pl call 1263# Resources graph 1264Target[$hostobj-res-vm]: `check_vmware.pl --mode=mrtg --report=cpu $cmdopt` 1265Title[$hostobj-res-vm]: $hostobj Resource Usage 1266MaxBytes[$hostobj-res-vm]: 100 1267PageTop[$hostobj-res-vm]: null 1268LegendI[$hostobj-res-vm]: cpu: 1269LegendO[$hostobj-res-vm]: mem: 1270Options[$hostobj-res-vm]: gauge growright 1271Ylegend[$hostobj-res-vm]: percent 1272ShortLegend[$hostobj-res-vm]: % 1273Legend1[$hostobj-res-vm]: CPU utilisation 1274Legend2[$hostobj-res-vm]: Memory utilisation 1275Legend3[$hostobj-res-vm]: Peak CPU utilisation 1276Legend4[$hostobj-res-vm]: Peak memory utilisation 1277routers.cgi*ShortDesc[$hostobj-res-vm]: VM: Resources 1278routers.cgi*Options[$hostobj-res-vm]: fixunit nototal nopercent 1279routers.cgi*Icon[$hostobj-res-vm]: chip-sm.gif 1280routers.cgi*InMenu[$hostobj-res-vm]: yes 1281routers.cgi*InCompact[$hostobj-res-vm]: yes 1282routers.cgi*InSummary[$hostobj-res-vm]: yes 1283# Detail CPU graph 1284Target[$hostobj-cpu-vm]: `check_vmware.pl --mode=mrtg --report=cpu1 $cmdopt` 1285Title[$hostobj-cpu-vm]: $hostobj CPU Usage 1286MaxBytes[$hostobj-cpu-vm]: 100 1287PageTop[$hostobj-cpu-vm]: null 1288LegendI[$hostobj-cpu-vm]: used: 1289LegendO[$hostobj-cpu-vm]: ready: 1290Options[$hostobj-cpu-vm]: gauge growright 1291Ylegend[$hostobj-cpu-vm]: percent 1292ShortLegend[$hostobj-cpu-vm]: % 1293Legend1[$hostobj-cpu-vm]: Used time 1294Legend2[$hostobj-cpu-vm]: Ready time 1295Legend3[$hostobj-cpu-vm]: Peak used 1296Legend4[$hostobj-cpu-vm]: Peak ready 1297routers.cgi*ShortDesc[$hostobj-cpu-vm]: VM: CPU 1298routers.cgi*Options[$hostobj-cpu-vm]: fixunit nototal nopercent 1299routers.cgi*Icon[$hostobj-cpu-vm]: chip-sm.gif 1300routers.cgi*InMenu[$hostobj-cpu-vm]: yes 1301routers.cgi*InCompact[$hostobj-cpu-vm]: yes 1302routers.cgi*InSummary[$hostobj-cpu-vm]: yes 1303 1304# Memory active graph 1305Target[$hostobj-mem-active]: `check_vmware.pl --mode=mrtg --report=memory1 $cmdopt` 1306Title[$hostobj-mem-active]: $hostobj Active Memory 1307MaxBytes[$hostobj-mem-active]: 100000000000 1308PageTop[$hostobj-mem-active]: null 1309LegendI[$hostobj-mem-active]: active: 1310LegendO[$hostobj-mem-active]: memory: 1311Options[$hostobj-mem-active]: gauge growright dorelpercent 1312Ylegend[$hostobj-mem-active]: percent 1313ShortLegend[$hostobj-mem-active]: % 1314Legend1[$hostobj-mem-active]: Active memory 1315Legend2[$hostobj-mem-active]: Total memory 1316Legend3[$hostobj-mem-active]: Peak active memory 1317Legend4[$hostobj-mem-active]: Peak total memory 1318routers.cgi*ShortDesc[$hostobj-mem-active]: VM: Act Mem 1319routers.cgi*Options[$hostobj-mem-active]: fixunit nototal nopercent 1320routers.cgi*Icon[$hostobj-mem-active]: chip-sm.gif 1321routers.cgi*InMenu[$hostobj-mem-active]: yes 1322routers.cgi*InCompact[$hostobj-mem-active]: yes 1323routers.cgi*InSummary[$hostobj-mem-active]: yes 1324 1325# Detailed Memory graph 1326Target[$hostobj-mem-ps]: `check_vmware.pl --mode=mrtg --report=memory2 $cmdopt` 1327Title[$hostobj-mem-ps]: $hostobj Memory Usage 1328MaxBytes[$hostobj-mem-ps]: 100 1329PageTop[$hostobj-mem-ps]: null 1330LegendI[$hostobj-mem-ps]: pvt: 1331LegendO[$hostobj-mem-ps]: shr: 1332Options[$hostobj-mem-ps]: gauge growright 1333Ylegend[$hostobj-mem-ps]: percent 1334ShortLegend[$hostobj-mem-ps]: % 1335Legend1[$hostobj-mem-ps]: Private memory 1336Legend2[$hostobj-mem-ps]: Shared memory 1337Legend3[$hostobj-mem-ps]: Peak private memory 1338Legend4[$hostobj-mem-ps]: Peak shared memory 1339routers.cgi*ShortDesc[$hostobj-mem-ps]: VM: Memory (pvt/shr) 1340routers.cgi*Options[$hostobj-mem-ps]: fixunit nototal nopercent 1341routers.cgi*Icon[$hostobj-mem-ps]: chip-sm.gif 1342routers.cgi*InMenu[$hostobj-mem-ps]: no 1343routers.cgi*InCompact[$hostobj-mem-ps]: yes 1344routers.cgi*InSummary[$hostobj-mem-ps]: no 1345routers.cgi*Graph[$hostobj-mem-ps]: $hostobj-vmem 1346 1347Target[$hostobj-mem-bs]: `check_vmware.pl --mode=mrtg --report=memory3 $cmdopt` 1348Title[$hostobj-mem-bs]: $hostobj Memory Usage 1349MaxBytes[$hostobj-mem-bs]: 100 1350PageTop[$hostobj-mem-bs]: null 1351LegendI[$hostobj-mem-bs]: bal: 1352LegendO[$hostobj-mem-bs]: swp: 1353Options[$hostobj-mem-bs]: gauge growright 1354Ylegend[$hostobj-mem-bs]: percent 1355ShortLegend[$hostobj-mem-bs]: % 1356Legend1[$hostobj-mem-bs]: Balloon memory 1357Legend2[$hostobj-mem-bs]: Swapped memory 1358Legend3[$hostobj-mem-bs]: Peak balloon memory 1359Legend4[$hostobj-mem-bs]: Peak swapped memory 1360routers.cgi*ShortDesc[$hostobj-mem-bs]: VM: Memory (bal/swp) 1361routers.cgi*Options[$hostobj-mem-bs]: fixunit nototal nopercent 1362routers.cgi*Icon[$hostobj-mem-bs]: chip-sm.gif 1363routers.cgi*InMenu[$hostobj-mem-bs]: no 1364routers.cgi*InCompact[$hostobj-mem-bs]: yes 1365routers.cgi*InSummary[$hostobj-mem-bs]: no 1366routers.cgi*Graph[$hostobj-mem-bs]: $hostobj-vmem 1367 1368routers.cgi*Desc[$hostobj-vmem]: $hostobj Memory Usage 1369routers.cgi*ShortDesc[$hostobj-vmem]: VM: Memory 1370routers.cgi*Icon[$hostobj-vmem]: chip-sm.gif 1371routers.cgi*InMenu[$hostobj-vmem]: yes 1372routers.cgi*InSummary[$hostobj-vmem]: yes 1373routers.cgi*GraphStyle[$hostobj-vmem]: stack 1374_END_ 1375; 1376 } else { 1377 # For now we do the default, but really we should set up some 1378 # combined graphs for all hosts if datacenter or cluster is set 1379 print <<_END_ 1380# VMWare datacenter/cluster/host 1381# You may wish to add a --config= directive to the command 1382# Graph CPU and Memory usage figures 1383# plus fairness figures 1384# And datastore (disk) space figures 1385# And count of active guests 1386 1387# Resources graph 1388Target[$hostobj--res-cl]: `check_vmware.pl --mode=mrtg --report=cpu $cmdopt` 1389Title[$hostobj--res-cl]: $hostobj Resource Usage 1390MaxBytes[$hostobj--res-cl]: 100 1391PageTop[$hostobj--res-cl]: null 1392LegendI[$hostobj--res-cl]: cpu: 1393LegendO[$hostobj--res-cl]: mem: 1394Options[$hostobj--res-cl]: gauge growright 1395Ylegend[$hostobj--res-cl]: percent 1396ShortLegend[$hostobj--res-cl]: % 1397Legend1[$hostobj--res-cl]: CPU utilisation 1398Legend2[$hostobj--res-cl]: Memory utilisation 1399Legend3[$hostobj--res-cl]: Peak CPU utilisation 1400Legend4[$hostobj--res-cl]: Peak memory utilisation 1401routers.cgi*ShortDesc[$hostobj--res-cl]: VM: Resources 1402routers.cgi*Options[$hostobj--res-cl]: fixunit nototal nopercent 1403routers.cgi*Icon[$hostobj--res-cl]: chip-sm.gif 1404routers.cgi*InMenu[$hostobj--res-cl]: yes 1405routers.cgi*InCompact[$hostobj--res-cl]: yes 1406routers.cgi*InSummary[$hostobj--res-cl]: yes 1407 1408# VMs active 1409Target[$hostobj--vm-actv]: `check_vmware.pl --mode=mrtg --report=status $cmdopt` 1410Title[$hostobj--vm-actv]: $hostobj Active Guests 1411MaxBytes[$hostobj--vm-actv]: 100000 1412PageTop[$hostobj--vm-actv]: null 1413LegendI[$hostobj--vm-actv]: active : 1414LegendO[$hostobj--vm-actv]: defined: 1415Options[$hostobj--vm-actv]: gauge growright integer 1416Ylegend[$hostobj--vm-actv]: Guests 1417ShortLegend[$hostobj--vm-actv]: 1418Legend1[$hostobj--vm-actv]: Active guests 1419Legend2[$hostobj--vm-actv]: Defined guests 1420Legend3[$hostobj--vm-actv]: Peak active guests 1421Legend4[$hostobj--vm-actv]: Peak defined guests 1422routers.cgi*ShortDesc[$hostobj--vm-actv]: VM: Guests 1423routers.cgi*Options[$hostobj--vm-actv]: fixunit nototal nopercent nomax 1424routers.cgi*Icon[$hostobj--vm-actv]: server-sm.gif 1425routers.cgi*InMenu[$hostobj--vm-actv]: yes 1426routers.cgi*InCompact[$hostobj--vm-actv]: yes 1427routers.cgi*InSummary[$hostobj--vm-actv]: yes 1428 1429# Datastores 1430Target[$hostobj--vm-ds]: `check_vmware.pl --mode=mrtg --report=disk $cmdopt` 1431Title[$hostobj--vm-ds]: $hostobj Datastores 1432MaxBytes[$hostobj--vm-ds]: 1000000000000000 1433PageTop[$hostobj--vm-ds]: null 1434LegendI[$hostobj--vm-ds]: used : 1435LegendO[$hostobj--vm-ds]: total: 1436Options[$hostobj--vm-ds]: gauge growright dorelpercent 1437Ylegend[$hostobj--vm-ds]: Percent 1438ShortLegend[$hostobj--vm-ds]: % 1439Legend1[$hostobj--vm-ds]: Space used 1440Legend2[$hostobj--vm-ds]: Space available 1441Legend3[$hostobj--vm-ds]: Peak space used 1442Legend4[$hostobj--vm-ds]: Peak space available 1443routers.cgi*ShortDesc[$hostobj--vm-ds]: VM: Datastores 1444routers.cgi*Options[$hostobj--vm-ds]: fixunit nototal nopercent 1445routers.cgi*Icon[$hostobj--vm-ds]: disk-sm.gif 1446routers.cgi*InMenu[$hostobj--vm-ds]: yes 1447routers.cgi*InCompact[$hostobj--vm-ds]: yes 1448routers.cgi*InSummary[$hostobj--vm-ds]: yes 1449 1450_END_ 1451; 1452 } 1453} 1454######################################################################### 1455# MAIN 1456 1457Opts::add_options(%opts); 1458Opts::parse(); 1459if( Opts::option_is_set('debug') ) { 1460 $DEBUG=Opts::get_option('debug'); 1461 $DEBUG=1 if(!$DEBUG); 1462} 1463$mode = 0; 1464if( Opts::option_is_set('mode') ) { 1465 if( Opts::get_option('mode') =~ /mrtg/i ) { 1466 $isnagios = 0; 1467 $mode = $1 if( Opts::get_option('mode') =~ /(\d+)/i ); # historical 1468 } 1469} 1470Opts::validate(\&validate); 1471print "Starting.\n" if($DEBUG); 1472$report = Opts::get_option('report'); 1473$mode = $1 if( $report =~ /(\d+)/i ); 1474 1475if( Opts::option_is_set('generate') ) { 1476 # generate config mode! 1477 if($isnagios) { makenagioscfg(); } else { makemrtgcfg(); } 1478 exit 0; 1479} 1480 1481 1482#if( Opts::option_is_set('warnactive') ) { 1483# $WARNACTIVE = Opts::get_option('warnactive'); 1484# if($WARNACTIVE<1 or $WARNACTIVE>99) { 1485# print "Usage: 0<warnactive<100\%\n"; exit 3; } 1486#} 1487#if( Opts::option_is_set('critactive') ) { 1488# $CRITACTIVE = Opts::get_option('critactive'); 1489# if($CRITACTIVE<$WARNACTIVE or $CRITACTIVE>99) { 1490# print "Usage: warnactive<critactive<100\%\n"; exit 3; } 1491#} 1492if( Opts::option_is_set('warnready') ) { 1493 $WARNREADY = Opts::get_option('warnready'); 1494 if($WARNREADY<1 or $WARNREADY>99) { 1495 print "Usage: 0<warnready<100\%\n"; exit 3; } 1496} 1497if( Opts::option_is_set('critready') ) { 1498 $CRITREADY = Opts::get_option('critready'); 1499 if($CRITREADY<$WARNREADY or $CRITREADY>99) { 1500 print "Usage: warnready<critready<100\%\n"; exit 3; } 1501} 1502if( Opts::option_is_set('warn') ) { 1503 if($report =~ /cpu/) { 1504 $WARNCPU = Opts::get_option('warn'); 1505 if($WARNCPU<1 or $WARNCPU>99) { 1506 print "Usage: 0<warn<100\%\n"; exit 3; } 1507 } elsif($report =~ /mem/ ) { 1508 $WARNMEM = Opts::get_option('warn'); 1509 if($WARNMEM<1 or $WARNMEM>99) { 1510 print "Usage: 0<warn<100\%\n"; exit 3; } 1511 } elsif($report =~ /disk|data/ ) { 1512 $WARNSPACE = Opts::get_option('warn'); 1513 if($WARNSPACE<0) { print "Usage: warn >= 0GB\n"; exit 3; } 1514 } 1515} 1516if( Opts::option_is_set('crit') ) { 1517 if($report =~ /cpu/) { 1518 $CRITCPU = Opts::get_option('crit'); 1519 if($CRITCPU<$WARNCPU or $CRITCPU>99) { 1520 print "Usage: warn<crit<100\%\n"; exit 3; } 1521 } elsif($report =~ /mem/ ) { 1522 $CRITMEM = Opts::get_option('crit'); 1523 if($CRITMEM<$WARNMEM or $CRITMEM>99) { 1524 print "Usage: warn<crit<100\%\n"; exit 3; } 1525 } elsif($report =~ /disk|data/ ) { 1526 $CRITSPACE= Opts::get_option('crit'); 1527 if($CRITSPACE>$WARNSPACE or $CRITSPACE<0) { 1528 print "Usage: warn > crit >= 0GB\n"; exit 3; } 1529 } 1530} 1531 1532 1533if(Opts::option_is_set('savesessionfile')) { 1534 $SESSIONFILE=Opts::get_option('savesessionfile'); 1535} 1536if(Opts::option_is_set('sessionfile')) { 1537 $SESSIONFILE=Opts::get_option('sessionfile'); 1538} 1539if( $SESSIONFILE and -f $SESSIONFILE 1540 and ( ! -w $SESSIONFILE or ! -r $SESSIONFILE ) ) { 1541 dounknown("Unable to read/write session file $SESSIONFILE"); 1542} 1543 1544# First, connect to VI 1545if($SESSIONFILE and -f $SESSIONFILE) { 1546 my(@s) = stat $SESSIONFILE; 1547 if( (time-$s[9])>1200 ) { 1548 # session file is >20mins old, lets reconnect 1549 unlink $SESSIONFILE; 1550 print "Expiring old session file\n" if($DEBUG); 1551 } 1552} 1553if($SESSIONFILE and -f $SESSIONFILE) { 1554 # load the saved session instead 1555 print "Loading session file\n" if($DEBUG); 1556 Vim::load_session(session_file=>$SESSIONFILE); 1557} else { 1558 print "Connecting\n" if($DEBUG); 1559 eval { 1560 $SIG{ALRM} = sub { die('TIMEOUT'); }; 1561 alarm($TIMEOUT); 1562 Util::connect(); 1563 alarm(0); 1564 }; 1565 if($@) { 1566 dounknown("No response from VirtualCentre server") if($@ =~ /TIMEOUT/); 1567 dounknown("You need to upgrade HTTP::Message!") if($@ =~ /HTTP::Message/); 1568 dounknown("Login to VirtualCentre server failed: $@."); 1569 } 1570 print "Connected\n" if($DEBUG); 1571} 1572if($DEBUG) { 1573 my $si_moref = ManagedObjectReference->new(type => 'ServiceInstance', 1574 value => 'ServiceInstance'); 1575 my $si_view = Vim::get_view(mo_ref => $si_moref); 1576 print "Server Time : ". $si_view->CurrentTime()."\n"; 1577} 1578 1579$servicecontent = Vim::get_service_content(); 1580 1581# Now, try and work out the 'begin' entity - host>cluster>datacenter>top 1582# @$begin is a list of the base to search in. 1583if ( Opts::option_is_set('datacenter') ) { 1584 my($dc) = Opts::get_option('datacenter'); 1585 $begin = Vim::find_entity_views (view_type => 'Datacenter', 1586 filter => {name => $dc }); 1587 unless (@$begin) { dounknown("Datacenter '$dc' not found."); } 1588# if ($#{$begin} != 0) { doerror("Datacenter <$dc> not unique."); } 1589} else { 1590# @$begin = ( $servicecontent->rootFolder ); 1591 @$begin = ( Vim::get_view( mo_ref=>$servicecontent->rootFolder )); 1592} 1593if(!@$begin) { 1594 dounknown("Unable to obtain root folder"); 1595} 1596if ( Opts::option_is_set('cluster') ) { 1597 my($cl) = Opts::get_option('cluster'); 1598 $begin = Vim::find_entity_views (view_type => 'ClusterComputeResource', 1599 begin_entity => @$begin, 1600 filter => {name => $cl }); 1601 unless (@$begin) { dounknown("Cluster '$cl' not found."); } 1602# if ($#{$begin} != 0) { doerror("Cluster <$cl> not unique."); } 1603} 1604if ( Opts::option_is_set('host') ) { 1605 my($ho) = Opts::get_option('host'); 1606 $begin = Vim::find_entity_views (view_type => 'HostSystem', 1607 begin_entity => @$begin, 1608 filter => {name => $ho }); 1609 unless (@$begin) { dounknown("Host system '$ho' not found."); } 1610# if ($#{$begin} != 0) { doerror("Host system <$ho> not unique."); } 1611} 1612 1613# Do we need to identify a VM? 1614if ( Opts::option_is_set('guest') ) { 1615 my($gu) = Opts::get_option('guest'); 1616 print "Trying to locate $gu\n" if($DEBUG); 1617 $vm = Vim::find_entity_views (view_type => 'VirtualMachine', 1618 begin_entity => @$begin, 1619 filter => {name => $gu }); 1620 unless(@$vm) { 1621 print "Now trying as hostname...\n" if($DEBUG); 1622 $vm = Vim::find_entity_views (view_type => 'VirtualMachine', 1623 begin_entity => @$begin, 1624 filter => { 'guest.hostName' => qr/$gu/i }); 1625 foreach ( @$vm ) { # we may have several with same hostname 1626 print "Guest is ".$_->runtime->powerState->val."\n" if($DEBUG); 1627 if($_->runtime->powerState->val eq 'poweredOn') { 1628 @$vm = ( $_ ); # Just keep the active one 1629 last; 1630 } 1631 } 1632 } 1633 unless(@$vm) { 1634 print "Now trying as IP address...\n" if($DEBUG); 1635 $vm = Vim::find_entity_views (view_type => 'VirtualMachine', 1636 begin_entity => @$begin, 1637 filter => { 'guest.ipAddress' => $gu }); 1638# filter => { 'guest.net[0].ipAddress' => $gu }); 1639 foreach ( @$vm ) { # we may have several with same IP address 1640 print "Guest is ".$_->runtime->powerState->val."\n" if($DEBUG); 1641 if($_->runtime->powerState->val eq 'poweredOn') { 1642 @$vm = ( $_ ); # Just keep the active one 1643 last; 1644 } 1645 } 1646 } 1647 unless(@$vm) { 1648 if(!$report or $report =~ /state/i) { 1649 doerror("Guest '$gu' not found."); 1650 } 1651 dounknown("Guest '$gu' not found."); 1652 } 1653} 1654 1655# Right, now we know where to start from. Possibly this is identifying 1656# a unique host, but not necessarily. We may also have a VM but not 1657# necessarily. 1658 1659print "Report type requested is [$report]\n" if($DEBUG); 1660print "Base is ".$begin->[0]->name."\n" if($DEBUG); 1661#foreach ( keys %{$begin->[0]} ) { 1662# print $_." = ".$begin->[0]{$_}."\n"; 1663#} 1664#exit 0; 1665 1666# Now, if we DONT have a guest set, but DO have nsca set, and are using 1667# Nagios mode plus a report type of CPU or MEM then we're going to query 1668# ALL the guests and feed the results back in via NSCA 1669# 1670 1671 1672# Now, depending on what we're asking for, call a different function 1673if($report =~ /cpu/i) { 1674 if($isnagios and !$vm and Opts::option_is_set('nsca')) { 1675 my($guestname,$glist); 1676 # loop through ALL guests in this host/farm 1677 print "Finding full list of guests\n" if($DEBUG); 1678 $glist = Vim::find_entity_views (view_type => 'VirtualMachine', 1679 begin_entity => @$begin ); 1680 foreach my $v ( @$glist ) { 1681 next if( $v->runtime->powerState->val ne 'poweredOn' ); 1682 @$vm = ( $v ); 1683 $MSG = ""; $STATUS=0; 1684 $guestname = $v->guest->hostName; 1685 $guestname = $v->name if(!$guestname); 1686 $guestname =~ s/^\s+//; 1687 $guestname =~ s/\s+.*$//; next if(!$guestname); 1688 $guestname = canonical($guestname); 1689 1690 print "Looping for $guestname\n" if($DEBUG); 1691 1692 cpureport(); 1693 1694 $MSG = "All OK" if(!$MSG); 1695 print "\n$guestname is [$STATUS] $MSG\n\n" if($DEBUG); 1696 sendnsca($guestname,"VMware: Resources: CPU",$STATUS,$MSG) if($STATUS<3); 1697 %perfkeys = (); @metricids = (); @queries = (); 1698 } 1699 $PERF = "|"; $MSG = ""; $STATUS=0; $vm = 0; 1700 print "*** Now running the real report...\n" if($DEBUG); 1701 } 1702 cpureport(); 1703} elsif($report =~ /mem/i) { 1704 if($isnagios and !$vm and Opts::option_is_set('nsca')) { 1705 my($guestname,$glist); 1706 # loop through ALL guests in this host/farm 1707 print "Finding full list of guests\n" if($DEBUG); 1708 $glist = Vim::find_entity_views (view_type => 'VirtualMachine', 1709 begin_entity => @$begin ); 1710 foreach my $v ( @$glist ) { 1711 next if( $v->runtime->powerState->val ne 'poweredOn' ); 1712 @$vm = ( $v ); 1713 $MSG = ""; $STATUS=0; 1714 $guestname = $v->guest->hostName; 1715 $guestname = $v->name if(!$guestname); 1716 $guestname =~ s/^\s+//; 1717 $guestname =~ s/\s+.*$//; next if(!$guestname); 1718 $guestname = canonical($guestname); 1719 1720 print "Looping for $guestname\n" if($DEBUG); 1721 1722 memreport(); 1723 1724 $MSG = "All OK" if(!$MSG); 1725 print "\n$guestname is [$STATUS] $MSG\n\n" if($DEBUG); 1726 sendnsca($guestname,"VMware: Resources: Memory",$STATUS,$MSG) if($STATUS<3); 1727 %perfkeys = (); @metricids = (); @queries = (); 1728 } 1729 $PERF = "|"; $MSG = ""; $STATUS=0; $vm = 0; 1730 print "*** Now running the real report...\n" if($DEBUG); 1731 } 1732 memreport(); 1733} elsif($report =~ /dis[ck]|datastore/i) { 1734 diskreport(); 1735} elsif($report =~ /net/i) { 1736 netreport(); 1737} else { # state 1738 statereport(); 1739} 1740 1741# Now disconnect from VI 1742if(Opts::option_is_set('savesessionfile')) { 1743 Vim::save_session(session_file=>Opts::get_option('savesessionfile')); 1744} else { 1745 print "Disconnecting...\n" if($DEBUG); 1746 Util::disconnect(); 1747} 1748 1749# clean up 1750if($havensca) { print "Closing NSCA connection\n" if($DEBUG); close NSCAPROC; } 1751$PERF="" if($PERF eq '|'); # no perf stats 1752 1753# And output the status 1754if($isnagios) { 1755 print "Exiting with status ($STATUS)\n" if($DEBUG); 1756 print "$MSG$PERF\n"; 1757 exit($STATUS); 1758} 1759 1760print "$A\n$B\n\n$MSG\n"; 1761exit 0; 1762