1#!/usr/bin/env perl 2 3# 4# Copyright © 2013-2017 Inria. All rights reserved. 5# 6# See COPYING in top-level directory. 7# 8# $HEADER$ 9# 10 11use strict; 12 13use Getopt::Long; 14 15my $HWLOC_top_builddir = "@HWLOC_top_builddir@"; 16my $prefix = "@prefix@"; 17my $exec_prefix = "@exec_prefix@"; 18my $bindir = "@bindir@"; 19# this will be changed into $bindir/... during make install 20my $locallstopo = "$HWLOC_top_builddir/utils/lstopo/lstopo-no-graphics"; 21 22my $hwlocdir = undef; 23my $outdir = undef; 24my @forcesubnets; 25my $needsudo = 0; 26my $ibnetdiscover = "/usr/sbin/ibnetdiscover"; 27my $ibroute = "/usr/sbin/ibroute"; 28my $verbose = 0; 29my $force = 0; 30my $dryrun = 0; 31my $ignoreerrors = 0; 32my $help = 0; 33my $sleep = 0; 34 35my $sleepcount = 0; 36sub sleep_between_probes { 37 my $indent = shift; 38 if ($sleep) { 39 print $indent."... Sleeping $sleep seconds between probes ...\n" unless $sleepcount; 40 sleep $sleep if $sleepcount; 41 $sleepcount++; 42 } 43} 44 45&Getopt::Long::Configure("bundling"); 46my $ok = Getopt::Long::GetOptions( 47 "hwloc-dir=s" => \$hwlocdir, 48 "force-subnet=s" => \@forcesubnets, 49 "sudo" => \$needsudo, 50 "ibnetdiscover=s" => \$ibnetdiscover, 51 "ibroute=s" => \$ibroute, 52 "sleep=i" => \$sleep, 53 "ignore-errors" => \$ignoreerrors, 54 "verbose|v" => \$verbose, 55 "force|f" => \$force, 56 "dry-run" => \$dryrun, 57 "help|h" => \$help 58 ); 59 60if ($help or !$ok or !defined $ARGV[0]) { 61 print "$0 [options] <outdir>\n"; 62 print " Dumps topology information to <outdir>/ib-raw/\n"; 63 print " Subnets are guessed from the <outdir>/hwloc/ directory where\n"; 64 print " the hwloc XML exports of some nodes are stored.\n"; 65 print "Options:\n"; 66 print " --sudo\n"; 67 print " Pass sudo to internal ibnetdiscover and ibroute invocations.\n"; 68 print " Useful when the entire script cannot run as root.\n"; 69 print " --hwloc-dir <dir>\n"; 70 print " Use <dir> instead of <outdir>/hwloc/ for hwloc XML exports.\n"; 71 print " --force-subnet [<subnet>:]<board>:<port> to force the discovery\n"; 72 print " Do not guess subnets from hwloc XML exports.\n"; 73 print " Force discovery on local board <board> port <port>\n"; 74 print " and optionally force the subnet id <subnet>\n"; 75 print " instead of reading it from the first GID.\n"; 76 print " Examples: --force-subnet mlx4_0:1\n"; 77 print " --force-subnet fe80:0000:0000:0000:mlx4_0:1\n"; 78 print " --ibnetdiscover /path/to/ibnetdiscover\n"; 79 print " --ibroute /path/to/ibroute\n"; 80 print " Specify exact location of programs. Default is /usr/bin/<program>\n"; 81 print " --sleep <n>\n"; 82 print " Sleep for <n> seconds between invocations of programs probing the network\n"; 83 print " --ignore-errors\n"; 84 print " Ignore errors from ibnetdiscover and ibroute, assume their outputs are ok\n"; 85 print " --force -f\n"; 86 print " Always rediscover to overwrite existing files without asking\n"; 87 print " --verbose -v\n"; 88 print " Add verbose messages\n"; 89 print " --dry-run\n"; 90 print " Do not actually run programs or modify anything\n"; 91 print " --help -h\n"; 92 print " Show this help\n"; 93 exit(1); 94} 95 96my $outdir = $ARGV[0]; 97 98mkdir $outdir unless $dryrun; 99die "$outdir isn't a directory\n" unless -d $outdir; 100mkdir "$outdir/ib-raw" unless $dryrun; 101die "$outdir/ib-raw isn't a directory\n" unless -d "$outdir/ib-raw"; 102 103my $sudo = $needsudo ? "sudo" : ""; 104 105if (`id -u` ne 0 and !$sudo and !$dryrun) { 106 print "WARNING: Not running as root.\n"; 107} 108 109# subnets that will be discovered locally 110my %subnets_todiscover; 111 112######################################### 113# Read forced subnets 114if (@forcesubnets) { 115 print "Enforcing list of subnets to discover:\n"; 116 foreach my $subnetstring (@forcesubnets) { 117 if ($subnetstring =~ /^([0-9a-fA-F:]{19}):([0-9a-z_-]+):([0-9]+)$/) { 118 my $subnet = $1; 119 my $boardname = $2; 120 my $portnum = $3; 121 print " Subnet $subnet from local board $boardname port $portnum.\n"; 122 $subnets_todiscover{$subnet}->{localboardname} = $boardname; 123 $subnets_todiscover{$subnet}->{localportnum} = $portnum; 124 125 } elsif ($subnetstring =~ /^([0-9a-z_-]+):([0-9]+)$/) { 126 my $boardname = $1; 127 my $portnum = $2; 128 my $subnet; 129 print " Unknown subnet from local board $boardname port $portnum.\n"; 130 my $filename = "/sys/class/infiniband/$boardname/ports/$portnum/gids/0"; 131 if (open FILE, $filename) { 132 my $line = <FILE>; 133 if ($line =~ /^([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})$/) { 134 $subnet = $1 135 } 136 close FILE; 137 } 138 if (defined $subnet) { 139 print " Found subnet $subnet from first GID.\n"; 140 $subnets_todiscover{$subnet}->{localboardname} = $boardname; 141 $subnets_todiscover{$subnet}->{localportnum} = $portnum; 142 } else { 143 print " Couldn't read subnet from GID $filename, ignoring.\n"; 144 } 145 146 } else { 147 print " Cannot parse --force-subnet $subnetstring, ignoring.\n"; 148 } 149 } 150 print "\n"; 151 152} else { 153 ######################################### 154 # Guess subnets from hwloc 155 if (!defined $hwlocdir) { 156 $hwlocdir = "${outdir}/hwloc"; 157 print "Using $hwlocdir as hwloc lstopo XML directory.\n\n"; 158 } 159 160 mkdir $hwlocdir unless $dryrun; 161 die "$hwlocdir isn't a directory\n" unless -d $hwlocdir; 162 163 # at least get the local hwloc XML 164 my $hostname = `hostname`; 165 chomp $hostname; 166 my $lstopooutput = "$hwlocdir/${hostname}.xml"; 167 if (!-f $lstopooutput) { 168 print "Exporting local node hwloc XML...\n"; 169 print " Running lstopo-no-graphics...\n"; 170 my $cmd = "$locallstopo -f $lstopooutput"; 171 if ($dryrun) { 172 print " NOT running $cmd\n" if $verbose; 173 } else { 174 my $ret = system "$cmd" ; 175 if ($ret and !$ignoreerrors) { 176 print " Failed (exit code $ret).\n"; 177 } 178 } 179 print "\n"; 180 } 181 182 # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidnum}->{subnet} and ->{guid} = xxxx:xxxx:xxxx:xxxx 183 # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid} = 1 184 # $servers{$hostname}->{subnets}->{$subnet} = 1 185 my %servers; 186 187 # $subnets{$subnet}->{$hostname} = 1; 188 my %subnets; 189 190 opendir DIR, $hwlocdir 191 or die "Failed to open hwloc directory ($!).\n"; 192 # list subnets by ports 193 while (my $hwlocfile = readdir DIR) { 194 my $hostname; 195 if ($hwlocfile =~ /(.+).xml$/) { 196 $hostname = $1; 197 } else { 198 next; 199 } 200 201 open FILE, "$hwlocdir/$hwlocfile" or next; 202 my $boardname = undef; 203 my $portnum = undef; 204 while (my $line = <FILE>) { 205 if ($line =~ /<object type=\"OSDev\".* name=\"((?!hfi)(?!usnic).+)\".* osdev_type=\"3\".*>/) { 206 $boardname = $1; 207 } elsif (defined $boardname) { 208 if ($line =~ /<\/object>/) { 209 $boardname = undef; 210 } elsif ($line =~ /<info name=\"Port([0-9]+)GID([0-9]+)\".* value=\"([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})\"\/.*>/) { 211 $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{subnet} = $3; 212 $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{guid} = $4; 213 } elsif ($line =~ /<info name=\"Port([0-9]+)LID\" value=\"(0x[0-9a-fA-F]+)\"\/>/) { 214 # lid must be between 0x1 and 0xbfff 215 if ((hex $2) < 1 or (hex $2) > 49151) { 216 $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1; 217 } 218 } elsif ($line =~ /<info name=\"Port([0-9]+)State\" value=\"([0-9])\"\/>/) { 219 # state must be active = 4 220 if ($2 != 4) { 221 $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1; 222 } 223 } 224 } 225 } 226 close FILE; 227 } 228 closedir DIR; 229 230 # remove down/inactive ports/servers/... 231 foreach my $hostname (keys %servers) { 232 foreach my $boardname (keys %{$servers{$hostname}->{gids}}) { 233 foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) { 234 delete $servers{$hostname}->{gids}->{$boardname}->{$portnum} 235 if exists $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid}; 236 } 237 delete $servers{$hostname}->{gids}->{$boardname} 238 unless keys %{$servers{$hostname}->{gids}->{$boardname}}; 239 } 240 delete $servers{$hostname} 241 unless keys %{$servers{$hostname}->{gids}}; 242 } 243 244 # fill list of hostnames per subnets and subnets per hostnames 245 foreach my $hostname (keys %servers) { 246 foreach my $boardname (keys %{$servers{$hostname}->{gids}}) { 247 foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) { 248 foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) { 249 my $subnet = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet}; 250 $servers{$hostname}->{subnets}->{$subnet} = 1; 251 $subnets{$subnet}->{$hostname} = 1; 252 } 253 } 254 } 255 } 256 257 my $nrsubnets = scalar (keys %subnets); 258 print "Found $nrsubnets subnets in hwloc directory:\n"; 259 # find local subnets 260 my $localhostname = `hostname`; chomp $localhostname; 261 { 262 my $hostname = $localhostname; 263 foreach my $boardname (keys %{$servers{$hostname}->{gids}}) { 264 foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) { 265 foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) { 266 my $subnet = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet}; 267 if (!exists $subnets_todiscover{$subnet}) { 268 print " Subnet $subnet is locally accessible from board $boardname port $portnum.\n"; 269 $subnets_todiscover{$subnet}->{localboardname} = $boardname; 270 $subnets_todiscover{$subnet}->{localportnum} = $portnum; 271 } elsif ($verbose) { 272 print " Subnet $subnet is also locally accessible from board $boardname port $portnum.\n"; 273 } 274 } 275 } 276 } 277 } 278 # find non-locally accessible subnets 279 foreach my $subnet (keys %subnets) { 280 next if exists $subnets{$subnet}->{$localhostname}; 281 print " Subnet $subnet is NOT locally accessible.\n"; 282 my @hostnames = (keys %{$subnets{$subnet}}); 283 if ($verbose) { 284 print " Subnet $subnet is accessible from nodes:\n"; 285 foreach my $hostname (@hostnames) { 286 print " $hostname\n"; 287 } 288 } else { 289 print " Subnet $subnet is accessible from node ".$hostnames[0]; 290 print " (and ".(@hostnames-1)." others)" if (@hostnames > 1); 291 print "\n"; 292 } 293 } 294 print "\n"; 295 296 # list nodes that are connected to all subnets, if the local isn't 297 if (scalar keys %{$servers{$localhostname}->{subnets}} != $nrsubnets) { 298 my @fullyconnectedhostnames; 299 foreach my $hostname (keys %servers) { 300 if (scalar keys %{$servers{$hostname}->{subnets}} == $nrsubnets) { 301 push @fullyconnectedhostnames, $hostname; 302 } 303 } 304 if (@fullyconnectedhostnames) { 305 if ($verbose) { 306 print "All subnets are accessible from nodes:\n"; 307 foreach my $hostname (@fullyconnectedhostnames) { 308 print " $hostname\n"; 309 } 310 } else { 311 print "All subnets are accessible from node ".$fullyconnectedhostnames[0]; 312 print " (and ".(@fullyconnectedhostnames-1)." others)" if (@fullyconnectedhostnames > 1); 313 print "\n"; 314 } 315 } else { 316 print "No node is connected to all subnets.\n"; 317 } 318 print "\n"; 319 } 320} 321 322########################### 323# Discovery routines 324 325# ibnetdiscover has GUIDs in the form of 0xXXXXXXXXXXXXXXXX, but hwloc 326# has GUIDs in the form of XXXX:XXXX:XXXX:XXXX. So just arbitrarily 327# choose hwloc's form and convert everything to that format. 328sub normalize_guid { 329 my ($guid) = @_; 330 331 return "" 332 if ($guid eq ""); 333 334 $guid =~ m/0x(.{4})(.{4})(.{4})(.{4})/; 335 return "$1:$2:$3:$4"; 336} 337 338sub getroutes { 339 my $subnet = shift; 340 my $boardname = shift; 341 my $portnum = shift; 342 my $ibnetdiscoveroutput = shift; 343 my $ibrouteoutdir = shift; 344 my $lids; 345 346 if (!open(FILE, $ibnetdiscoveroutput)) { 347 print " Couldn't open $ibnetdiscoveroutput\n"; 348 return; 349 } 350 351 while (<FILE>) { 352 # We only need lines that begin with SW 353 next 354 if (! /^SW /); 355 356 # Split out the columns. Yay regexps. One form of line has 357 # both source and destination information. The other form 358 # only has source information (because it's not hooked up to 359 # anything -- usually a switch port that doesn't have anything 360 # plugged in to it). 361 chomp; 362 my $line = $_; 363 364 my ($have_peer, $src_name, $src_type, $src_lid, $src_port_id, 365 $src_guid, $width, $speed, $dest_type, $dest_lid, $dest_port_id, 366 $dest_guid, $dest_name); 367 368 # First, assume that the line has both a port and a peer. 369 if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+-\s+(CA|SW)\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+\(\s+'(.+?)'\s+-\s+'(.+?)'\s\)/) { 370 # If we get here, there was no peer -- just a port. 371 $have_peer = 0; 372 373 if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+'(.+?)'/) { 374 print "Line cannot be parsed:\n$line\n"; 375 next; 376 } 377 $src_lid = $1; # This is a decimal number 378 $src_port_id = $2; # This is a decimal number 379 $src_guid = $3; 380 $width = $4; 381 $speed = $5; 382 $src_name = $6; 383 } else { 384 $have_peer = 1; 385 386 $src_lid = $1; # This is a decimal number 387 $src_port_id = $2; # This is a decimal number 388 $src_guid = $3; 389 $width = $4; 390 $speed = $5; 391 $dest_type = $6; 392 $dest_lid = $7; # This is a decimal number 393 $dest_port_id = $8; # This is a decimal number 394 $dest_guid = $9; 395 $src_name = $10; 396 $dest_name = $11; 397 } 398 399 # Convert GUIDs to the form xxxx:xxxx:xxxx:xxxx 400 $src_guid = normalize_guid($src_guid); 401 $dest_guid = normalize_guid($dest_guid) 402 if ($have_peer); 403 404 # If the source switch LID already exists, then just keep 405 # going. 406 next 407 if (exists($lids->{$src_lid})); 408 409 # Run ibroute on this switch LID 410 my $ibrouteoutput = "$ibrouteoutdir/ibroute-$subnet-$src_lid.txt"; 411 print " Running ibroute for switch '$src_name' LID $src_lid...\n"; 412 my $cmd = "$sudo $ibroute -C $boardname -P $portnum $src_lid"; 413 if ($dryrun) { 414 print " NOT running $cmd\n" if $verbose; 415 } else { 416 sleep_between_probes (" "); 417 my $ret = system "$cmd > ${ibrouteoutput}.new" ; 418 if (!$ret or $ignoreerrors) { 419 unlink ${ibrouteoutput}; 420 rename "${ibrouteoutput}.new", "${ibrouteoutput}"; 421 } else { 422 unlink "${ibrouteoutput}.new"; 423 print " Failed (exit code $ret).\n"; 424 next; 425 } 426 } 427 428 $lids->{$src_lid} = 1; 429 } 430 431 close FILE; 432} 433 434##############################" 435# Discover subnets for real 436 437foreach my $subnet (keys %subnets_todiscover) { 438 my $boardname = $subnets_todiscover{$subnet}->{localboardname}; 439 my $portnum = $subnets_todiscover{$subnet}->{localportnum}; 440 441 print "Looking at $subnet (through local board $boardname port $portnum)...\n"; 442 443 my $ibnetdiscoveroutput = "$outdir/ib-raw/ib-subnet-$subnet.txt"; 444 my $ibrouteoutdir = "$outdir/ib-raw/ibroutes-$subnet"; 445 446 if (-f $ibnetdiscoveroutput and !$force) { 447 print " $ibnetdiscoveroutput already exists, discover again? (y/n) "; 448 my $answer = <STDIN>; 449 next if $answer !~ /^y/; 450 } 451 452 print " Running ibnetdiscover...\n"; 453 my $cmd = "$sudo $ibnetdiscover -s -l -g -H -S -R -p -C $boardname -P $portnum"; 454 if ($dryrun) { 455 print " NOT running $cmd\n" if $verbose; 456 } else { 457 sleep_between_probes (" "); 458 print " $cmd\n" if $verbose; 459 my $ret = system "$cmd > ${ibnetdiscoveroutput}.new" ; 460 if (!$ret or $ignoreerrors) { 461 unlink ${ibnetdiscoveroutput}; 462 rename "${ibnetdiscoveroutput}.new", "${ibnetdiscoveroutput}"; 463 } else { 464 unlink "${ibnetdiscoveroutput}.new"; 465 print " Failed (exit code $ret).\n"; 466 next; 467 } 468 } 469 470 print " Getting routes...\n"; 471 if (!$dryrun) { 472 system("rm -rf $ibrouteoutdir"); 473 mkdir $ibrouteoutdir unless $dryrun; 474 die "$ibrouteoutdir isn't a directory\n" unless -d $ibrouteoutdir; 475 } 476 getroutes $subnet, $boardname, $portnum, $ibnetdiscoveroutput, $ibrouteoutdir; 477} 478