1#!/usr/bin/env perl
2
3#
4# Copyright © 2013-2017 Inria.  All rights reserved.
5#
6# See COPYING in top-level directory.
7#
8# $HEADER$
9#
10
11use strict;
12
13use Getopt::Long;
14
15my $HWLOC_top_builddir = "@HWLOC_top_builddir@";
16my $prefix = "@prefix@";
17my $exec_prefix = "@exec_prefix@";
18my $bindir = "@bindir@";
19# this will be changed into $bindir/... during make install
20my $locallstopo = "$HWLOC_top_builddir/utils/lstopo/lstopo-no-graphics";
21
22my $hwlocdir = undef;
23my $outdir = undef;
24my @forcesubnets;
25my $needsudo = 0;
26my $ibnetdiscover = "/usr/sbin/ibnetdiscover";
27my $ibroute = "/usr/sbin/ibroute";
28my $verbose = 0;
29my $force = 0;
30my $dryrun = 0;
31my $ignoreerrors = 0;
32my $help = 0;
33my $sleep = 0;
34
35my $sleepcount = 0;
36sub sleep_between_probes {
37  my $indent = shift;
38  if ($sleep) {
39    print $indent."... Sleeping $sleep seconds between probes ...\n" unless $sleepcount;
40    sleep $sleep if $sleepcount;
41    $sleepcount++;
42  }
43}
44
45&Getopt::Long::Configure("bundling");
46my $ok = Getopt::Long::GetOptions(
47	"hwloc-dir=s" => \$hwlocdir,
48	"force-subnet=s" => \@forcesubnets,
49	"sudo" => \$needsudo,
50	"ibnetdiscover=s" => \$ibnetdiscover,
51	"ibroute=s" => \$ibroute,
52	"sleep=i" => \$sleep,
53        "ignore-errors" => \$ignoreerrors,
54	"verbose|v" => \$verbose,
55	"force|f" => \$force,
56	"dry-run" => \$dryrun,
57	"help|h" => \$help
58    );
59
60if ($help or !$ok or !defined $ARGV[0]) {
61    print "$0 [options] <outdir>\n";
62    print "  Dumps topology information to <outdir>/ib-raw/\n";
63    print "  Subnets are guessed from the <outdir>/hwloc/ directory where\n";
64    print "  the hwloc XML exports of some nodes are stored.\n";
65    print "Options:\n";
66    print " --sudo\n";
67    print "    Pass sudo to internal ibnetdiscover and ibroute invocations.\n";
68    print "    Useful when the entire script cannot run as root.\n";
69    print " --hwloc-dir <dir>\n";
70    print "    Use <dir> instead of <outdir>/hwloc/ for hwloc XML exports.\n";
71    print " --force-subnet [<subnet>:]<board>:<port> to force the discovery\n";
72    print "    Do not guess subnets from hwloc XML exports.\n";
73    print "    Force discovery on local board <board> port <port>\n";
74    print "    and optionally force the subnet id <subnet>\n";
75    print "    instead of reading it from the first GID.\n";
76    print "    Examples: --force-subnet mlx4_0:1\n";
77    print "              --force-subnet fe80:0000:0000:0000:mlx4_0:1\n";
78    print " --ibnetdiscover /path/to/ibnetdiscover\n";
79    print " --ibroute /path/to/ibroute\n";
80    print "    Specify exact location of programs. Default is /usr/bin/<program>\n";
81    print " --sleep <n>\n";
82    print "    Sleep for <n> seconds between invocations of programs probing the network\n";
83    print " --ignore-errors\n";
84    print "    Ignore errors from ibnetdiscover and ibroute, assume their outputs are ok\n";
85    print " --force -f\n";
86    print "    Always rediscover to overwrite existing files without asking\n";
87    print " --verbose -v\n";
88    print "    Add verbose messages\n";
89    print " --dry-run\n";
90    print "    Do not actually run programs or modify anything\n";
91    print " --help -h\n";
92    print "    Show this help\n";
93    exit(1);
94}
95
96my $outdir = $ARGV[0];
97
98mkdir $outdir unless $dryrun;
99die "$outdir isn't a directory\n" unless -d $outdir;
100mkdir "$outdir/ib-raw" unless $dryrun;
101die "$outdir/ib-raw isn't a directory\n" unless -d "$outdir/ib-raw";
102
103my $sudo = $needsudo ? "sudo" : "";
104
105if (`id -u` ne 0 and !$sudo and !$dryrun) {
106    print "WARNING: Not running as root.\n";
107}
108
109# subnets that will be discovered locally
110my %subnets_todiscover;
111
112#########################################
113# Read forced subnets
114if (@forcesubnets) {
115  print "Enforcing list of subnets to discover:\n";
116  foreach my $subnetstring (@forcesubnets) {
117    if ($subnetstring =~ /^([0-9a-fA-F:]{19}):([0-9a-z_-]+):([0-9]+)$/) {
118      my $subnet = $1;
119      my $boardname = $2;
120      my $portnum = $3;
121      print " Subnet $subnet from local board $boardname port $portnum.\n";
122      $subnets_todiscover{$subnet}->{localboardname} = $boardname;
123      $subnets_todiscover{$subnet}->{localportnum} = $portnum;
124
125    } elsif ($subnetstring =~ /^([0-9a-z_-]+):([0-9]+)$/) {
126      my $boardname = $1;
127      my $portnum = $2;
128      my $subnet;
129      print " Unknown subnet from local board $boardname port $portnum.\n";
130      my $filename = "/sys/class/infiniband/$boardname/ports/$portnum/gids/0";
131      if (open FILE, $filename) {
132        my $line = <FILE>;
133        if ($line =~ /^([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})$/) {
134	  $subnet = $1
135        }
136        close FILE;
137      }
138      if (defined $subnet) {
139	print "  Found subnet $subnet from first GID.\n";
140	$subnets_todiscover{$subnet}->{localboardname} = $boardname;
141	$subnets_todiscover{$subnet}->{localportnum} = $portnum;
142      } else {
143	print "  Couldn't read subnet from GID $filename, ignoring.\n";
144      }
145
146    } else {
147      print " Cannot parse --force-subnet $subnetstring, ignoring.\n";
148    }
149  }
150  print "\n";
151
152} else {
153  #########################################
154  # Guess subnets from hwloc
155  if (!defined $hwlocdir) {
156    $hwlocdir = "${outdir}/hwloc";
157    print "Using $hwlocdir as hwloc lstopo XML directory.\n\n";
158  }
159
160  mkdir $hwlocdir unless $dryrun;
161  die "$hwlocdir isn't a directory\n" unless -d $hwlocdir;
162
163  # at least get the local hwloc XML
164  my $hostname = `hostname`;
165  chomp $hostname;
166  my $lstopooutput = "$hwlocdir/${hostname}.xml";
167  if (!-f $lstopooutput) {
168    print "Exporting local node hwloc XML...\n";
169    print "  Running lstopo-no-graphics...\n";
170    my $cmd = "$locallstopo -f $lstopooutput";
171    if ($dryrun) {
172      print "   NOT running $cmd\n" if $verbose;
173    } else {
174      my $ret = system "$cmd" ;
175      if ($ret and !$ignoreerrors) {
176	print "   Failed (exit code $ret).\n";
177      }
178    }
179    print "\n";
180  }
181
182  # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidnum}->{subnet} and ->{guid} = xxxx:xxxx:xxxx:xxxx
183  # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid} = 1
184  # $servers{$hostname}->{subnets}->{$subnet} = 1
185  my %servers;
186
187  # $subnets{$subnet}->{$hostname} = 1;
188  my %subnets;
189
190  opendir DIR, $hwlocdir
191    or die "Failed to open hwloc directory ($!).\n";
192  # list subnets by ports
193  while (my $hwlocfile = readdir DIR) {
194    my $hostname;
195    if ($hwlocfile =~ /(.+).xml$/) {
196      $hostname = $1;
197    } else {
198      next;
199    }
200
201    open FILE, "$hwlocdir/$hwlocfile" or next;
202    my $boardname = undef;
203    my $portnum = undef;
204    while (my $line = <FILE>) {
205      if ($line =~ /<object type=\"OSDev\".* name=\"((?!hfi)(?!usnic).+)\".* osdev_type=\"3\".*>/) {
206        $boardname = $1;
207      } elsif (defined $boardname) {
208        if ($line =~ /<\/object>/) {
209          $boardname = undef;
210        } elsif ($line =~ /<info name=\"Port([0-9]+)GID([0-9]+)\".* value=\"([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})\"\/.*>/) {
211          $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{subnet} = $3;
212          $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{guid} = $4;
213        } elsif ($line =~ /<info name=\"Port([0-9]+)LID\" value=\"(0x[0-9a-fA-F]+)\"\/>/) {
214          # lid must be between 0x1 and 0xbfff
215          if ((hex $2) < 1 or (hex $2) > 49151) {
216            $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1;
217          }
218        } elsif ($line =~ /<info name=\"Port([0-9]+)State\" value=\"([0-9])\"\/>/) {
219          # state must be active = 4
220          if ($2 != 4) {
221            $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1;
222          }
223        }
224      }
225    }
226    close FILE;
227  }
228  closedir DIR;
229
230  # remove down/inactive ports/servers/...
231  foreach my $hostname (keys %servers) {
232    foreach my $boardname (keys %{$servers{$hostname}->{gids}}) {
233      foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) {
234        delete $servers{$hostname}->{gids}->{$boardname}->{$portnum}
235	  if exists $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid};
236      }
237      delete $servers{$hostname}->{gids}->{$boardname}
238        unless keys %{$servers{$hostname}->{gids}->{$boardname}};
239    }
240    delete $servers{$hostname}
241      unless keys %{$servers{$hostname}->{gids}};
242  }
243
244  # fill list of hostnames per subnets and subnets per hostnames
245  foreach my $hostname (keys %servers) {
246    foreach my $boardname (keys %{$servers{$hostname}->{gids}}) {
247      foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) {
248	foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) {
249	  my $subnet  = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet};
250	  $servers{$hostname}->{subnets}->{$subnet} = 1;
251	  $subnets{$subnet}->{$hostname} = 1;
252	}
253      }
254    }
255  }
256
257  my $nrsubnets = scalar (keys %subnets);
258  print "Found $nrsubnets subnets in hwloc directory:\n";
259  # find local subnets
260  my $localhostname = `hostname`; chomp $localhostname;
261  {
262    my $hostname = $localhostname;
263    foreach my $boardname (keys %{$servers{$hostname}->{gids}}) {
264      foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) {
265        foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) {
266          my $subnet = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet};
267	  if (!exists $subnets_todiscover{$subnet}) {
268	    print " Subnet $subnet is locally accessible from board $boardname port $portnum.\n";
269	    $subnets_todiscover{$subnet}->{localboardname} = $boardname;
270	    $subnets_todiscover{$subnet}->{localportnum} = $portnum;
271	  } elsif ($verbose) {
272	    print " Subnet $subnet is also locally accessible from board $boardname port $portnum.\n";
273	  }
274        }
275      }
276    }
277  }
278  # find non-locally accessible subnets
279  foreach my $subnet (keys %subnets) {
280    next if exists $subnets{$subnet}->{$localhostname};
281    print " Subnet $subnet is NOT locally accessible.\n";
282    my @hostnames = (keys %{$subnets{$subnet}});
283    if ($verbose) {
284      print "  Subnet $subnet is accessible from nodes:\n";
285      foreach my $hostname (@hostnames) {
286	print "   $hostname\n";
287      }
288    } else {
289      print "  Subnet $subnet is accessible from node ".$hostnames[0];
290      print " (and ".(@hostnames-1)." others)" if (@hostnames > 1);
291      print "\n";
292    }
293  }
294  print "\n";
295
296  # list nodes that are connected to all subnets, if the local isn't
297  if (scalar keys %{$servers{$localhostname}->{subnets}} != $nrsubnets) {
298    my @fullyconnectedhostnames;
299    foreach my $hostname (keys %servers) {
300      if (scalar keys %{$servers{$hostname}->{subnets}} == $nrsubnets) {
301	push @fullyconnectedhostnames, $hostname;
302      }
303    }
304    if (@fullyconnectedhostnames) {
305      if ($verbose) {
306	print "All subnets are accessible from nodes:\n";
307	foreach my $hostname (@fullyconnectedhostnames) {
308	  print " $hostname\n";
309	}
310      } else {
311	print "All subnets are accessible from node ".$fullyconnectedhostnames[0];
312	print " (and ".(@fullyconnectedhostnames-1)." others)" if (@fullyconnectedhostnames > 1);
313	print "\n";
314      }
315    } else {
316      print "No node is connected to all subnets.\n";
317    }
318    print "\n";
319  }
320}
321
322###########################
323# Discovery routines
324
325# ibnetdiscover has GUIDs in the form of 0xXXXXXXXXXXXXXXXX, but hwloc
326# has GUIDs in the form of XXXX:XXXX:XXXX:XXXX.  So just arbitrarily
327# choose hwloc's form and convert everything to that format.
328sub normalize_guid {
329    my ($guid) = @_;
330
331    return ""
332        if ($guid eq "");
333
334    $guid =~ m/0x(.{4})(.{4})(.{4})(.{4})/;
335    return "$1:$2:$3:$4";
336}
337
338sub getroutes {
339    my $subnet = shift;
340    my $boardname = shift;
341    my $portnum = shift;
342    my $ibnetdiscoveroutput = shift;
343    my $ibrouteoutdir = shift;
344    my $lids;
345
346    if (!open(FILE, $ibnetdiscoveroutput)) {
347      print "  Couldn't open $ibnetdiscoveroutput\n";
348      return;
349    }
350
351    while (<FILE>) {
352        # We only need lines that begin with SW
353        next
354            if (! /^SW /);
355
356        # Split out the columns.  Yay regexps.  One form of line has
357        # both source and destination information.  The other form
358        # only has source information (because it's not hooked up to
359        # anything -- usually a switch port that doesn't have anything
360        # plugged in to it).
361        chomp;
362        my $line = $_;
363
364        my ($have_peer, $src_name, $src_type, $src_lid, $src_port_id,
365            $src_guid, $width, $speed, $dest_type, $dest_lid, $dest_port_id,
366            $dest_guid, $dest_name);
367
368        # First, assume that the line has both a port and a peer.
369        if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+-\s+(CA|SW)\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+\(\s+'(.+?)'\s+-\s+'(.+?)'\s\)/) {
370            # If we get here, there was no peer -- just a port.
371            $have_peer = 0;
372
373            if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+'(.+?)'/) {
374                print "Line cannot be parsed:\n$line\n";
375                next;
376            }
377            $src_lid = $1; # This is a decimal number
378            $src_port_id = $2; # This is a decimal number
379            $src_guid = $3;
380            $width = $4;
381            $speed = $5;
382            $src_name = $6;
383        } else {
384            $have_peer = 1;
385
386            $src_lid = $1; # This is a decimal number
387            $src_port_id = $2; # This is a decimal number
388            $src_guid = $3;
389            $width = $4;
390            $speed = $5;
391            $dest_type = $6;
392            $dest_lid = $7; # This is a decimal number
393            $dest_port_id = $8; # This is a decimal number
394            $dest_guid = $9;
395            $src_name = $10;
396            $dest_name = $11;
397        }
398
399        # Convert GUIDs to the form xxxx:xxxx:xxxx:xxxx
400        $src_guid = normalize_guid($src_guid);
401        $dest_guid = normalize_guid($dest_guid)
402            if ($have_peer);
403
404        # If the source switch LID already exists, then just keep
405        # going.
406        next
407            if (exists($lids->{$src_lid}));
408
409        # Run ibroute on this switch LID
410	my $ibrouteoutput = "$ibrouteoutdir/ibroute-$subnet-$src_lid.txt";
411        print "  Running ibroute for switch '$src_name' LID $src_lid...\n";
412	my $cmd = "$sudo $ibroute -C $boardname -P $portnum $src_lid";
413	if ($dryrun) {
414	  print "   NOT running $cmd\n" if $verbose;
415	} else {
416	  sleep_between_probes ("   ");
417	  my $ret = system "$cmd > ${ibrouteoutput}.new" ;
418	  if (!$ret or $ignoreerrors) {
419	    unlink ${ibrouteoutput};
420	    rename "${ibrouteoutput}.new", "${ibrouteoutput}";
421	  } else {
422	    unlink "${ibrouteoutput}.new";
423	    print "   Failed (exit code $ret).\n";
424	    next;
425	  }
426	}
427
428        $lids->{$src_lid} = 1;
429    }
430
431    close FILE;
432}
433
434##############################"
435# Discover subnets for real
436
437foreach my $subnet (keys %subnets_todiscover) {
438  my $boardname = $subnets_todiscover{$subnet}->{localboardname};
439  my $portnum = $subnets_todiscover{$subnet}->{localportnum};
440
441  print "Looking at $subnet (through local board $boardname port $portnum)...\n";
442
443  my $ibnetdiscoveroutput = "$outdir/ib-raw/ib-subnet-$subnet.txt";
444  my $ibrouteoutdir = "$outdir/ib-raw/ibroutes-$subnet";
445
446  if (-f $ibnetdiscoveroutput and !$force) {
447    print " $ibnetdiscoveroutput already exists, discover again? (y/n) ";
448    my $answer = <STDIN>;
449    next if $answer !~ /^y/;
450  }
451
452  print " Running ibnetdiscover...\n";
453  my $cmd = "$sudo $ibnetdiscover -s -l -g -H -S -R -p -C $boardname -P $portnum";
454  if ($dryrun) {
455    print "  NOT running $cmd\n" if $verbose;
456  } else {
457    sleep_between_probes ("  ");
458    print "  $cmd\n" if $verbose;
459    my $ret = system "$cmd > ${ibnetdiscoveroutput}.new" ;
460    if (!$ret or $ignoreerrors) {
461      unlink ${ibnetdiscoveroutput};
462      rename "${ibnetdiscoveroutput}.new", "${ibnetdiscoveroutput}";
463    } else {
464      unlink "${ibnetdiscoveroutput}.new";
465      print "  Failed (exit code $ret).\n";
466      next;
467    }
468  }
469
470  print " Getting routes...\n";
471  if (!$dryrun) {
472    system("rm -rf $ibrouteoutdir");
473    mkdir $ibrouteoutdir unless $dryrun;
474    die "$ibrouteoutdir isn't a directory\n" unless -d $ibrouteoutdir;
475  }
476  getroutes $subnet, $boardname, $portnum, $ibnetdiscoveroutput, $ibrouteoutdir;
477}
478