1#!/usr/bin/env perl
2use warnings;
3use strict;
4
5use POSIX qw/getcwd/;
6use File::Basename;
7use File::Temp qw/tempdir tempfile mktemp/;
8use List::Util qw/max/;
9# do not include Data::Dumper except for debugging.
10# use Data::Dumper;
11use Fcntl;
12use Carp;
13
14# This companion program serves as a helper for running bwc programs.
15#
16# It should be rewritten someday, perhaps in python. At least this has to
17# be more modular (to be honest, it used to be much worse).
18
19# MPI_BINDIR=/opt/mpich2-1.1a2-1.fc10.x86_64/usr/bin ./bwc.pl :complete matrix=c72 mn=64 ys=0..64 mpi=4x4
20
21# {{{ usage
22sub usage {
23    print STDERR <<EOF;
24Usage: ./bwc.pl <action> <parameters>
25
26# 20140901: some revamping in progress. The instructions below may be
27# partly inaccurate.
28
29The action to be performed is either:
30- the path to a program, in which case the command line eventually called
31  is quite similar to ``<action> <parameters>'', with some substitutions
32  performed. Depending on the program name, some options are discarded
33  because they are not meaningful.
34- one of the special actions defined by the script. These actions are
35  prepended by a colon.
36    :complete -- do a complete linear system solving. The solution ends
37                 up in \$wdir/W.
38    :bench    -- does prep, secure, and krylov with
39                 an exceedingly large finish bound in order to perform
40                 some timings.
41
42Parameters are specified in the form <key>=<value>.
43All parameters having a meaning for the bwc programs are accepted. Some
44parameters control the wrapping script itself.
45
46mpi thr              same meaning as for bwc programs.
47mn m n prime         same meaning as for bwc programs.
48nullspace            same meaning as for bwc programs.
49ys                   same meaning as for bwc programs (krylov)
50solutions            same meaning as for bwc programs (mksol, gather)
51simd                 SIMD width to be used for krylov (ys=) and mksol,
52                     gather (solutions=) programs. Defaults to 64 if
53                     prime=2 or 1 otherwise
54lingen_mpi           like mpi, but for lingen only
55
56matrix               input matrix file. Must be a complete path. Binary, 32-b LE
57rhs                  rhs file for inhomogeneous systems (ascii with header)
58                     Note that inhomogeneous systems over GF(2) are not
59                     supported yet (but we're not that far), and the file
60                     format for the RHS file is not completely decided
61                     yet.
62wdir                 working directory.
63                     If unset, derived from 'matrix', 'mpi' 'thr'
64mpiexec              Command to invoke for mpiexec
65hosts                Comma-separated list of hosts. 'hosts' may be supplied
66                     several times, so that lists concatenate. Thus one
67                     may use the shell to supply hosts=node{1,3,5,7}.
68hostfile             path to a host list file to be understood by mpiexec
69
70Amongst parameters, some options with leading dashes may be set:
71
72-d  Do a dry run ; show the commands that would be executed.
73    (-d is relevant to the script only)
74-v  Increase verbosity level ; understood by bwc, but meaningless at the moment
75-h  Show this help.
76
77Some environment variables have an impact on this script.
78
79MPI_BINDIR   The path to the directory holding the mpiexec program (the
80             mpiexec= parameter allows to specify the same thing. Note that
81             MPI alone serves as an alias for MPI_BINDIR, but this may
82             become legacy. Normally mpiexec is obtained by cmake
83             substitution, at least this is the default.).
84BWC_BINDIR   The directory holding the bwc binaries. Defaults to the
85             directory holding the current script.
86
87EOF
88
89    if (scalar @_) {
90        print STDERR "Error messages:\n", join("\n", @_), "\n";
91    }
92
93    exit 1;
94}
95# }}}
96
97# $program denotes either a simple command, or something prepended by ':'
98# which indicates something having a special meaning for the script.
99my $main = shift @ARGV or usage;
100
101my $my_cmdline="$0 $main @ARGV";
102
103# ----- cmake substituted variables -----
104## mpiexec is substituted by cmake in case mpi has been used for the
105## compilation. NOTE that this means that a priori, mpiexec _must_ be
106## used for running all programs.
107my $mpiexec='@MPIEXEC@';
108$mpiexec='' if $mpiexec =~ m{^\@.*\@$};
109my $mpiexec_extra_stanzas='@MPIEXEC_EXTRA_STANZAS@';
110$mpiexec_extra_stanzas='' if $mpiexec_extra_stanzas =~ m{^\@.*\@$};
111
112
113my $bindir;
114if (!defined($bindir=$ENV{'BWC_BINDIR'})) {
115    $bindir=$0;
116    $bindir =~ s{/[^/]*$}{};
117    if ($bindir eq $0) {
118        $bindir='.';
119    }
120}
121
122# It's only used so that we can keep a non-zero reference count on
123# temporary filenames which we would like to keep until program exit.
124my @tempfiles;
125
126
127##################################################
128# Parse the command line, and fill the following:
129#  - %$param hash, with special rules for the hosts argument.
130#  - $show_only, @extra_args
131#  - obey -h
132my @extra_args=();
133my $show_only=0;
134my $param={};
135my $param_defaults={
136    # some defaults.
137    prime => 2,         # for factoring.
138    thr => '1x1',
139    mpi => '1x1',
140    lingen_mpi => '1x1',
141};
142# {{{
143while (defined($_ = shift @ARGV)) {
144    if ($_ eq '--') {
145        push @extra_args, splice @ARGV;
146        last;
147    }
148    # -d will never be found as a bw argument, it is only relevant here.
149    if (/^(-d|--show|--dry-run)$/) { $show_only=1; next; }
150    if (/^(-h|--help)$/) { usage; }
151    my ($k,$v);
152    if (/^--([^=]+)$/ && scalar @ARGV) { $k=$1; $v=shift(@ARGV); }
153    elsif (/^(-.*)$/) { $k=$1; $v=undef; }
154    elsif (/^([^=]+)=(.*)$/) { $k=$1; $v=$2; }
155    if (!defined($k)) {
156        usage "Garbage not understood on command line: $_";
157    }
158    if ($k eq 'bwc_bindir') { $bindir=$v; next; }
159    if (!defined($param->{$k})) {
160        $param->{$k}=$v;
161        next;
162    }
163    if ($k eq 'hosts') {
164        if (ref $param->{$k} eq '') {
165            $param->{$k} = [$param->{$k}, $v];
166        } elsif (ref $param->{$k} eq 'ARRAY') {
167            $param->{$k} = [@{$param->{$k}}, $v];
168        } else {
169            die "\$param->{$k} has gone nuts.\n";
170        }
171    } else {
172        usage "parameter $k may not be specified more than once";
173    }
174}
175for my $k (keys %$param_defaults) {
176    next if exists $param->{$k};
177    $param->{$k} = $param_defaults->{$k};
178}
179
180
181# }}}
182
183# {{{ global variables. Much resides in $param, but we do have globals
184# too.
185my @main_args;
186
187my @mpi_split=(1,1);
188my @lingen_mpi_split=(1,1);
189my @thr_split=(1,1);
190my $nh;
191my $nv;
192
193my $wdir;
194my $matrix;
195my $random_matrix;
196my $rhs;
197my $nrhs=0;
198
199## The mpi_extra_args argument is used to pass information to the mpiexec
200## command. The idea is that mpiexec, the mpi driver program, may need
201## additional info to properly setup communications between jobs. As an
202## example, if libvirt-bin is installed on a linux system, openmpi will
203## need the following additions.
204##
205## ./build/x86_64/linalg/bwc/bwc.pl :complete matrix=/tmp/mats/c90b wdir=/tmp/c59 mn=64 mpi=2x1 thr=2x4 hosts=node0,node1 mpi_extra_args='--mca btl_tcp_if_exclude lo,virbr0'
206##
207## This tells mpi not to try routing traffic through either the lo or the
208## virbr0 interface. For the former, it's already openmpi's default
209## behaviour (unless /etc/hosts is screwed up, which this file does tend to
210## be sometimes). For virbr0, it's again some local mess, but each machine
211## having this class C network defined, openmpi can't really tell whether
212## they're connected or not -- and of course they're not.
213##
214## Note that we also obey the $MPI_EXTRA_ARGS environment variable.
215my $mpi_extra_args;
216
217my ($m, $n);
218my $prime;
219my $splitwidth;
220my $simd;
221
222# my $force_complete;
223my $stop_at_step;
224
225my $hostfile;
226my @hosts=();
227my $mpi;
228my $mpi_ver;
229my $needs_mpd;
230# }}}
231#
232# Some more command line interpretation. Here we build @main_args, taking
233# out the arguments which are only of interest to us.
234# Here we read (from $param) the
235# following things.
236#  - $hostfile $mpi_extra_args
237#  - ## broken ## $force_complete
238#  - @hosts $m $n
239#  - @mpi_split @thr_split
240#  - $nh $nv
241#    --> dimensions of the split of the matrix (horizontal chunks,
242#    vertical chunks).
243#  - $prime
244#    --> Linear system is over GF(p) for that prime. For factoring, which
245#    is the default, we have prime==2
246#  - $splitwidth
247#    --> Number of vectors which are put together in a file. This is only
248#    inferred from $prime, and set to 64 for prime==2, 1 otherwise.
249#
250# {{{
251sub set_mpithr_param { # {{{ utility
252    my $v = shift @_;
253    my @s = (1,$v);
254    if ($v=~/^(\d+)$/) {
255        my $nthreads = $1;
256        my $s = int(sqrt($nthreads));
257        for (; $s >= 1; $s--) {
258            next if $nthreads % $s > 0;
259            @s = ($s, $nthreads / $s);
260            last;
261        }
262        die unless $s[0]*$s[1] == $v;
263    } elsif ($v=~/(\d+)x(\d+)$/) {
264        @s = ($1, $2);
265    } else {
266        usage "bad splitting value '$v'";
267    }
268    return @s;
269} # }}}
270
271my $my_verbose_flags = {
272    cmdline => 1,
273    checks => 1,
274    sections => 1,
275};
276
277while (my ($k,$v) = each %$param) {
278    # Some parameters are relevant to us just like they're relevant to
279    # the bwc programs, so we'll set a variable based on their value, and
280    # also copy them to the @main_args array.
281    if ($k eq 'matrix') { $matrix=$v; }
282    if ($k eq 'random_matrix') { $random_matrix=$v; }
283    # Some parameters which are simply _not_ relevant to the bwc programs.
284    if ($k eq 'hostfile') { $hostfile=$v; next; }
285    if ($k eq 'simd') { $simd=$v; next; }
286    if ($k eq 'mpi_extra_args') { $mpi_extra_args=$v; next; }
287    ## if ($k eq 'force_complete') { $force_complete=$v; next; }
288    if ($k eq 'stop_at_step') {
289        die "stop_at_step requires :complete" unless $main =~ /^(?::srun)?:complete/;
290        $stop_at_step=$v;
291        next;
292    }
293    if ($k eq 'hosts') {
294        $v=[$v] if (ref $v eq '');
295        for (@$v) { push @hosts, split(',',$_); }
296        next;
297    }
298    # Some of the command-line arguments are modified before being put
299    # into the main argument list.
300    if ($k eq 'mpi') { @mpi_split = set_mpithr_param $v; $param->{$k} = $v = join "x", @mpi_split;}
301    if ($k eq 'thr') { @thr_split = set_mpithr_param $v; $param->{$k} = $v = join "x", @thr_split; }
302    if ($k eq 'lingen_mpi') { @lingen_mpi_split = set_mpithr_param $v; $param->{$k} = $v = join "x", @lingen_mpi_split;}
303    if ($k eq 'verbose_flags') {
304        my @heritage;
305        for my $f (split(',', $v)) {
306            my $w=1;
307            my $f0=$f;
308            $w = 0 if $f =~ s/^[\^!]//;
309            if ($f =~ s/^perl-//) {
310                $my_verbose_flags->{$f}=$w;
311            } else {
312                push @heritage, $f0;
313            }
314        }
315        $v=join(',',@heritage);
316    }
317
318    # The rest is passed to subprograms, unless explicitly discarded on a
319    # per-program basis.
320    if (defined($v)) {
321        push @main_args, "$k=$v";
322    } else {
323        # This is for switches (like -v) which don't have a "value"
324        # associated with them
325        push @main_args, "$k";
326    }
327
328    # Yet it does make sense to read some parameters, as we are
329    # interested by their value.
330
331    if ($k eq 'wdir') { $wdir=$v; next; }
332    if ($k eq 'mn') { $m=$n=$v; next; }
333    if ($k eq 'n') { $n=$v; next; }
334    if ($k eq 'm') { $m=$v; next; }
335    if ($k eq 'prime') { $prime=$v; next; }
336    if ($k eq 'rhs') { $rhs=$v; next; }
337
338    # Ok, probably there's a fancy argument we don't care about.
339}
340
341$nh = $mpi_split[0] * $thr_split[0];
342$nv = $mpi_split[1] * $thr_split[1];
343$splitwidth = ($prime == 2) ? 64 : 1;
344
345if (!defined($simd)) {
346    $simd = $splitwidth;
347}
348
349# }}}
350
351print "$my_cmdline\n" if $my_verbose_flags->{'cmdline'};
352
353if ($main =~ /^:(?:mpi|s)run(?:_single)?$/) {
354    # ok, this is really an ugly ugly hack. We have some mpi detection
355    # magic in this script, which we would like to use. So the :mpirun
356    # meta-command is just for that. Of course the argument requirements
357    # are mostly waived in this case.
358    $matrix=$param->{'matrix'}=$0;
359    $param->{'prime'}=2;
360    $param->{'simd'}=$simd=64;
361    $m=$n=64;
362    $wdir=$param->{'wdir'}="/";
363    if ($main =~ /^:srun/) {
364        # Setting this will trigger mpi_needed, mpi detection check,
365        # and ultimately passing -n
366        $mpiexec='srun';
367    }
368}
369
370# {{{ Some important argument checks
371{
372    my @miss = grep { !defined $param->{$_}; } (qw/prime/);
373    die "Missing argument(s): @miss" if @miss;
374    if (!defined($param->{'matrix'}) && !defined($param->{'random_matrix'})) {
375        die "Missing parameter: matrix (or random_matrix)";
376    }
377    if (!defined($m) || !defined($n)) {
378        die "Missing parameters: m and/or n";
379    }
380}
381
382if (defined($param->{'matrix'})) {
383    $param->{'matrix'} =~ s/~/$ENV{HOME}/g;
384    die "$param->{'matrix'}: $!" unless -f $param->{'matrix'};
385}
386
387if ($prime == 2 && (($m % 64 != 0) || ($n % 64 != 0))) {
388    die "Currently for p=2 bwc supports only block sizes which are multiples of 64";
389}
390if (($m % $splitwidth != 0) || ($n % $splitwidth != 0)) {
391    die "Currently bwc supports only block sizes which are multiples of $splitwidth";
392}
393if ($prime == 2 && defined($rhs)) {
394    die "inhomogeneous systems currently supported only for p>2";
395}
396if (defined($rhs)) {
397    # Try to read the first line.
398    my $header = eval { open F, $rhs or die "$rhs: $!"; <F>; };
399    if ($header !~ /^(\d+) (\d+) (\d+)$/) {
400        die "$rhs does not seem to be ascii with header, we can't proceed.\n";
401    }
402    $param->{'nrhs'} = $nrhs = $2;
403    print "$rhs is ascii file with header. Getting nrhs=$nrhs from there\n";
404}
405if ($nrhs > $n || $nrhs > $m) {
406    die "nrhs > n or m is not supported";
407}
408
409if ($prime == 2 && ($lingen_mpi_split[0] != 1 || $lingen_mpi_split[1] != 1)) {
410    die "Current binary lingen code does not support mpi";
411}
412
413if ($lingen_mpi_split[0] != $lingen_mpi_split[1]) {
414    die "lingen_mpi ($lingen_mpi_split[0]x$lingen_mpi_split[1]) must be a square split";
415}
416
417if ($m % $lingen_mpi_split[0] != 0 || $n % $lingen_mpi_split[0] != 0) {
418    die "lingen_mpi must divide gcd(m,n)"
419}
420
421# }}}
422
423# {{{ wdir preliminary (pre-mpi) checks.
424#  - Check that wdir is defined (or define it to some default);
425#  - Check that it exists if it has to exist (:complete is entitled to
426#  create $wdir
427
428# Selection of the working directory -- there are three relevant options.
429# The 'matrix' option gives the file name of the matrix to be used.
430# The working directory used is either specified as a parameter on its
431# own named 'wdir', or inferred from the basename of the matrix file,
432# appended with some information on the splitting, e.g.  c160-4x4.
433
434# Note that when doing random matrices, we don't even need a wdir.
435
436if (!defined($random_matrix)) {
437    if (!defined($param->{'wdir'}) && defined($param->{'matrix'})) {
438        # ok, we're doing this a bit ahead of time
439        $wdir=$param->{'wdir'}="$param->{'matrix'}-${nh}x${nv}";
440        push @main_args, "wdir=$wdir";
441    }
442
443    if ($main !~ /^(?::srun)?:(?:complete|bench)$/ && !-d $param->{'wdir'}) {
444        die "$param->{'wdir'}: no such directory";
445    }
446}
447# }}}
448
449# Some quick shortcuts in case the user failed to specify some arguments
450#  - ys=
451#    --> this is valid only for krylov, for *one* sequence run.
452#    In general, this must correspond to an interval defining a sequence
453#    (i.o.w. two consecutive multiples of simd width).
454#    For interleaving, this is a bit trickier, since two
455#    contiguous intervals are to be treated.
456# The code below is just setting sensible defaults.
457# {{{
458##################################################
459# Some more default values to set. Setting separately ys= and solutions=
460# is quite awkward in the case of a single-site test.
461if ((!defined($m) || !defined($n))) {
462    usage "The parameters m and n must be set";
463}
464
465if (!defined($param->{'solutions'})) {
466    $param->{'solutions'}=["0-$simd"];
467    if ($param->{'interleaving'}) {
468        $param->{'solutions'} = [ "0-" . (2*$simd) ];
469    }
470} else {
471    # make that a list.
472    $param->{'solutions'} = [ split(',',$param->{'solutions'}) ];
473}
474
475for (@{$param->{'solutions'}}) {
476    /^\d+-\d+$/ or die "the 'solutions' parameter must match \\d+-\\d+";
477}
478
479# Default settings for ys= --> see krylov / mksol / gather.
480# }}}
481
482# Done playing around with what we've been given.
483
484##################################################
485# System interaction.
486# - set environment variables with my_setenv (which does a bit more)
487# - run commands with dosystem
488# - ssh to nodes with ssh_program ; this is MPI-backend-dependent.
489# {{{
490my $env_strings = "";
491sub my_setenv
492{
493    return if (exists($ENV{$_[0]}) && $ENV{$_[0]} eq $_[1]);
494    $env_strings .= "$_[0]=$_[1] ";
495    $ENV{$_[0]}=$_[1];
496}
497
498sub dosystem
499{
500    my $nofatal=0;
501    my $prg = shift @_;
502    if ($prg eq '-nofatal') {
503        $nofatal=1;
504        $prg=shift @_;
505    }
506    my @args = @_;
507    print STDERR '#' x 77, "\n" if $my_verbose_flags->{'sections'};
508    my $msg = "$env_strings$prg " . (join(' ', @args)) . "\n";
509
510    if ($show_only) {
511        print $msg;
512        return 0;
513    }
514
515    print STDERR $msg if $my_verbose_flags->{'cmdline'};
516    my $rc = system $prg, @args;
517    return if $rc == 0;
518    if ($rc == -1) {
519        print STDERR "Cannot execute $prg\n";
520    } elsif ($rc & 127) {
521        my $sig = $rc & 127;
522        my $coreinfo = ($rc & 128) ? 'with' : 'without';
523        print STDERR "$prg: died with signal $sig, $coreinfo coredump\n";
524    } else {
525        my $ret = $rc >> 8;
526        print STDERR "$prg: exited with status $ret\n";
527    }
528    die "aborted on subprogram error";
529}
530
531sub ssh_program
532{
533    my $ssh;
534    if ($ssh = $ENV{'SSH'}) {
535        return $ssh;
536    }
537    $ssh='ssh';
538    if (exists($ENV{'OAR_JOBID'})) {
539        $ssh="/usr/bin/oarsh";
540    }
541    return $ssh
542}
543# }}}
544
545if (!defined($random_matrix) && !-d $wdir) {        # create $wdir on script node.
546    if ($show_only) {
547        print "mkdir $wdir\n";
548    } else {
549        mkdir $wdir;
550    }
551}
552
553##################################################
554# MPI-specific detection code. Beyond probing the environment variables
555# to see which MPI middleware we're running, this code sets the following
556# important argument lists:
557#  - @mpi_precmd @mpi_precmd_single
558#    --> Simply put, the former is prepended before all important
559#    mpi-level programs (secure krylov mksol gather), while the
560#    other is of course for leader-node-only programs.
561#  - @mpi_precmd_lingen
562#    --> use lingen_mpi (no lingen_thr exists at this point)
563#  - $mpi_needed
564#    --> to be used to check whether we want mpi. This is important
565#    before calling dosystem.
566# {{{
567
568# If we've been built with mpi, then we _need_ mpi for running. Otherwise
569# we run into shared libraries mess.
570
571my $mpi_needed = $mpiexec ne '';
572
573# @mpi_precmd_single is something silly; we want provision for the case
574# where mpi is used for running non-mpi jobs. It's something which does
575# not officially work, yet it always does. And some programs do turn out
576# to be compiled with mpi, so we need the mpi libraries at runtime... So
577# short of a more accurate solution, this is a hack.
578#
579# Note also that @mpi_precmd_single is absolutely needed when we run a
580# non-mpi binary, yet that does load all of the mpi infrastructure, in a
581# context where that mpi infrastructure smartly reads the environment
582# variables and detects that we need X jobs (use case: bwccheck does
583# bw_common_init_mpi out of convenience, but really isn't an mpi program).
584my @mpi_precmd;
585my @mpi_precmd_single;
586my @mpi_precmd_lingen;
587
588
589sub detect_mpi {
590    if (defined($_=$ENV{'MPI_BINDIR'}) && -x "$_/mpiexec") {
591        $mpi=$_;
592    } elsif (defined($_=$ENV{'MPI'}) && -x "$_/mpiexec") {
593        $mpi=$_;
594    } elsif (defined($_=$ENV{'MPI'}) && -x "$_/bin/mpiexec") {
595        $mpi="$_/bin";
596    } elsif ($mpiexec && -x $mpiexec) {
597        my($basename, $dirname) = fileparse($mpiexec);
598        $mpi=$dirname;
599    } else {
600        my @path = split(':',$ENV{'PATH'});
601        for my $d (@path) {
602            if (-x "$d/mpiexec") {
603                $mpi=$d;
604                print STDERR "Auto-detected MPI_BINDIR=$mpi\n";
605                last;
606            }
607        }
608    }
609
610    my $maybe_mvapich2=1;
611    # mpich versions implementing the mpi-2 standard were named mpich2.
612    # From standard mpi-3 on, the name has returned to mpich.
613    my $maybe_mpich=1;
614    my $maybe_openmpi=1;
615
616    if (defined($mpi)) {
617        SEVERAL_CHECKS: {
618            # first check the alternatives system, which is fairly
619            # commonplace.
620            my $mpiexec = "$mpi/mpiexec";
621            while (-l $mpiexec) {
622                my $target=readlink($mpiexec);
623                print STDERR "readlink($mpiexec)->$target\n";
624                if ($target =~ m{^/}) {
625                    $mpiexec = $target;
626                } else {
627                    $mpiexec =~ s{[^/]+$}{$target};
628                }
629                if ($mpiexec =~ /openmpi/) {
630                    print STDERR "Auto-detecting openmpi based on alternatives\n";
631                    $maybe_mvapich2=0;
632                    $maybe_mpich=0;
633                    last;
634                } elsif ($mpiexec =~ /mpich2/) {
635                    print STDERR "Auto-detecting mpich2(old) based on alternatives\n";
636                    $maybe_mpich='mpich2';
637                    $maybe_mvapich2=0;
638                    $maybe_openmpi=0;
639                    last;
640                } elsif ($mpiexec =~ /mpich/) {
641                    print STDERR "Auto-detecting mpich based on alternatives\n";
642                    $maybe_mvapich2=0;
643                    $maybe_openmpi=0;
644                    last;
645                } elsif ($mpiexec =~ /hydra/) {
646                    # Newer mvapich2 uses hydra as well...
647                    print STDERR "Auto-detecting mpich or mvapich2 (hydra) or intel mpi (hydra) based on alternatives\n";
648                    $maybe_mvapich2='hydra';
649                    $maybe_mpich='hydra';
650                    $maybe_openmpi=0;
651                    last;
652                } elsif ($mpiexec =~ /mvapich2/) {
653                    print STDERR "Auto-detecting mvapich2 based on alternatives\n";
654                    $maybe_mpich=0;
655                    $maybe_openmpi=0;
656                    last;
657                }
658            }
659            CHECK_MVAPICH2: {
660                if ($maybe_mvapich2 && -x "$mpi/mpiname") {
661                    my $v = `$mpi/mpiname -n -v`;
662                    chomp($v);
663                    if ($v =~ /MVAPICH2\s+([\d\.]+)((?:\D\w*)?)/) {
664                        # Presently all versions of mvapich2 up
665                        # until 1.6rc3 included need mpd daemons.
666                        # Released version 1.6 uses hydra.
667                        $mpi_ver="mvapich2-$1$2";
668                        if (($1 < 1.6) || ($1 == 1.6 && $2 =~ /^rc\d/)) {
669                            $needs_mpd = 1;
670                        } else {
671                            $mpi_ver .= "+hydra" unless $mpi_ver =~ /hydra/;
672                        }
673                        last SEVERAL_CHECKS;
674                    }
675                }
676            }
677            CHECK_MPICH_VERSION: {
678                if ($maybe_mpich =~ /^(hydra|mpich2)/ && -x "$mpi/mpich2version") {
679                    my $v = `$mpi/mpich2version -v`;
680                    chomp($v);
681                    if ($v =~ /MPICH2 Version:\s*(\d.*)$/) {
682                        $mpi_ver="mpich2-$1";
683                        # Versions above 1.3 use hydra.
684                        $needs_mpd=($mpi_ver =~ /^mpich2-(0|1\.[012])/);
685                    } else {
686                        $mpi_ver="mpich2-UNKNOWN";
687                        $needs_mpd=1;
688                    }
689                    $v = `$mpi/mpich2version -c`;
690                    chomp($v);
691                    if ($v =~ /--with-pm=hydra/) {
692                        $mpi_ver .= "+hydra";
693                        $needs_mpd=0;
694                    }
695                    if ($maybe_mpich eq 'hydra') {
696                        $mpi_ver .= "+hydra" unless $mpi_ver =~ /hydra/;
697                        $needs_mpd=0;
698                    }
699                    last SEVERAL_CHECKS;
700                } elsif ($maybe_mpich && -x "$mpi/mpichversion") {
701                    my $v = `$mpi/mpichversion -v`;
702                    chomp($v);
703                    if ($v =~ /MPICH Version:\s*(\d.*)$/) {
704                        $mpi_ver="mpich-$1";
705                        # Only antique mpich-1.x versions don't use
706                        # hydra.
707                        $needs_mpd=($mpi_ver =~ /^mpich-[01]\./);
708                    } else {
709                        $mpi_ver="mpich-UNKNOWN";
710                        $needs_mpd=1;
711                    }
712                    last SEVERAL_CHECKS;
713                } elsif ($maybe_mpich && -x "$mpi/mpivars.sh") {
714                    my $v = `unset I_MPI_ROOT ; . $mpi/mpivars.sh  ; echo \$I_MPI_ROOT`;
715                    if ($v) {
716                        $mpi_ver="Intel MPI";
717#       comment out, as this requires mpicc on the compute nodes (at least for debugging), and that isn't always available...
718#			my $v = `$mpi/mpicc -v`;
719#			chomp($v);
720#			if ($v =~ /Intel \(R\) MPI Library (.*?) for/) {
721#			    $mpi_ver="$1";
722#			    $mpi_ver=~/^(\d+)/;
723#                            my $mpi_year=$1;
724#                            $mpi_ver=~s/\s+/_/g;
725#			    $mpi_ver="intel-$mpi_ver";
726#                            $needs_mpd=0;
727#			} else {
728#                            warn "Could not correctly recognize mpi version (perhaps intel ?)";
729#                        }
730                        $needs_mpd=0;
731                    }
732                    last SEVERAL_CHECKS;
733                }
734            }
735            CHECK_OMPI_VERSION: {
736                if ($maybe_openmpi && -x "$mpi/ompi_info") {
737                    my @v = `$mpi/ompi_info`;
738                    my @vv = grep { /Open MPI:/; } @v;
739                    last CHECK_OMPI_VERSION unless scalar @vv == 1;
740                    $needs_mpd=0;
741                    if ($vv[0] =~ /Open MPI:\s*(\d\S*)$/) {
742                        $mpi_ver="openmpi-$1";
743                        last SEVERAL_CHECKS;
744                    } else {
745                        $mpi_ver="openmpi-UNKNOWN";
746                        last SEVERAL_CHECKS;
747                    }
748                }
749            }
750        }
751
752        if (defined($mpi_ver)) {
753            print STDERR "Using $mpi_ver, MPI_BINDIR=$mpi\n";
754        } else {
755            print STDERR "Using UNKNOWN mpi, MPI_BINDIR=$mpi\n";
756            if (defined($needs_mpd=$ENV{NEEDS_MPD})) {
757                warn "Assuming needs_mpd=$needs_mpd as per env variable.\n";
758            } else {
759                $needs_mpd=1;
760                warn "Assuming needs_mpd=$needs_mpd ; " .
761                    "modify env variable \$NEEDS_MPD to " .
762                    "change fallback behaviour\n";
763            }
764        }
765    }
766
767    if (!defined($mpi)) {
768        print STDERR <<EOMSG;
769***ERROR*** No mpi library was detected. Arrange for mpiexec to be in
770***ERROR*** your PATH, or set the MPI_BINDIR environment variable.
771EOMSG
772        exit 1;
773    }
774
775    # Some final tweaks.
776    if ($mpi_ver =~ /^mvapich2/) {
777        print "## setting MV2_ENABLE_AFFINITY=0 (for mvapich2)\n";
778        $ENV{'MV2_ENABLE_AFFINITY'}=0;
779    }
780}
781
782# Starting daemons for mpich2 1.[012].x and mvapich2 ; we're assuming
783# this works the same for older mpich2's, although this has never been
784# checked.
785sub check_mpd_daemons
786{
787    return unless $needs_mpd;
788
789    my $ssh = ssh_program();
790
791    my $rc = system "$mpi/mpdtrace > /dev/null 2>&1";
792    if ($rc == 0) {
793        print "mpi daemons seem to be ok\n";
794        return;
795    }
796
797    if ($rc == -1) {
798        die "Cannot execute $mpi/mpdtrace";
799    } elsif ($rc & 127) {
800        die "$mpi/mpdtrace died with signal ", ($rc&127);
801    } else {
802        print "No mpi daemons found by $mpi/mpdtrace, restarting\n";
803    }
804
805    open F, $hostfile;
806    my %hosts;
807    while (<F>) {
808        $hosts{$_}=1;
809    }
810    close F;
811    my $n = scalar keys %hosts;
812    print "Running $n mpi daemons\n";
813    dosystem "$mpi/mpdboot -n $n -r $ssh -f $hostfile -v";
814}
815
816sub get_mpi_hosts_oar {
817    my @x = split /^/, eval {
818		local $/=undef;
819		open F, "$ENV{OAR_NODEFILE}";
820		<F> };
821    @hosts=();
822    my %h=();
823    for (@x) {
824        push @hosts, $_ unless $h{$_}++;
825    }
826}
827
828sub get_mpi_hosts_slurm {
829    @hosts=();
830    my %h=();
831    for (`scontrol show hostname`) {
832        chomp($_);
833        push @hosts, $_ unless $h{$_}++;
834    }
835    if (!scalar @hosts) {
836        die "no hosts found with \"scontrol show hostname\"; is scontrol in \$PATH ?";
837    }
838}
839
840sub get_mpi_hosts_torque {
841    my @x = split /^/, eval {
842		local $/=undef;
843		open F, "$ENV{PBS_NODEFILE}";
844		<F> };
845    my $nth = $thr_split[0] * $thr_split[1];
846    @hosts=();
847    while (scalar @x) {
848        my @z = splice @x, 0, $nth;
849        my %h=();
850        $h{$_}=1 for @z;
851        die "\$PBS_NODEFILE not consistent mod $nth\n" unless scalar
852			keys %h == 1;
853        my $c = $z[0];
854        chomp($c);
855        push @hosts, $c;
856    }
857}
858
859sub get_mpi_hosts_sge {
860    print STDERR "Building hosts file from $ENV{PE_HOSTFILE}\n";
861    my @x = split /^/, eval { local $/=undef; open F, "$ENV{PE_HOSTFILE}"; <F> };
862    my $cores_on_node = {};
863    for my $line (@x) {
864        my ($node, $ncores, $toto, $tata) = split ' ', $line;
865        print STDERR "$node: +$ncores cores\n";
866        $cores_on_node->{$node}+=$ncores;
867    }
868    my $values_for_cores_on_node = {};
869    local $_;
870    $values_for_cores_on_node->{$_}=1 for (values %$cores_on_node);
871
872    die "Not always the same number of cores obtained on the different nodes, as per \$PE_HOSTFILE" if keys %$values_for_cores_on_node != 1;
873
874    my $ncores_obtained = (keys %$values_for_cores_on_node)[0];
875    my $nnodes = scalar keys %$cores_on_node;
876    print STDERR "Obtained $ncores_obtained cores on $nnodes nodes\n";
877
878    my $nthr = $thr_split[0] * $thr_split[1];
879    my $nmpi = $mpi_split[0] * $mpi_split[1];
880
881    die "Not enough cores ($ncores_obtained) obtained: want $nthr\n" if $nthr > $ncores_obtained;
882
883    @hosts=();
884    push @hosts, $_ for keys %$cores_on_node;
885
886    die "Not enough mpi nodes ($nnodes): want $nmpi\n" if $nmpi > $nnodes;
887}
888
889# {{{ utilities
890sub version_ge {
891    my ($a, $b) = @_;
892    my @a = split(/[\.-]/, $a);
893    my @b = split(/[\.-]/, $b);
894    while (@a && @b) {
895        $a = shift @a;
896        $b = shift @b;
897        $a =~ /^(\d*)(.*)$/; my ($an, $at) = ($1, $2);
898        $b =~ /^(\d*)(.*)$/; my ($bn, $bt) = ($1, $2);
899        $an=0 if length($an)==0;
900        $bn=0 if length($bn)==0;
901        return 1 if $an > $bn;
902        return 0 if $an < $bn;
903        return 1 if $at gt $bt;
904        return 0 if $at lt $bt;
905    }
906    return 0 if @b;
907    return 1;
908}
909
910# }}}
911
912open STDIN,  '<',  '/dev/null' or die "can't redirect STDIN: $!\n";
913
914if ($mpi_needed) {
915    # This is useful for debugging in case we see new MPI environments.
916    # print STDERR "Inherited environment:\n";
917    # print STDERR "$_=$ENV{$_}\n" for keys %ENV;
918    detect_mpi;
919
920    # quirk. If called with srun, then we want to avoid mpiexec anyway.
921    if ($main =~ /^:srun/) {
922        push @mpi_precmd, 'srun', '--cpu-bind=verbose,none', '-u';
923	if ($mpi_ver =~ /^openmpi/) {
924            # srun might like that we give it the hint that we're running
925            # openmpi.
926            push @mpi_precmd, "--mpi=openmpi";
927	}
928        # quirk
929        $main =~ s/^:srun://;
930        $main =~ s/^complete/:complete/;
931    } else {
932        # Otherwise we'll start via mpiexec, and we need to be informed
933        # on the list of nodes.
934        push @mpi_precmd, "$mpi/mpiexec";
935
936        my $auto_hostfile_pattern="/tmp/cado-nfs.hosts_XXXXXXXX";
937
938        # Need hosts. Put that to the list @hosts first.
939        if ($main =~ /^:srun/) {
940            print STDERR "srun environment detected, not detecting hostfile.\n";
941        } elsif (exists($ENV{'OAR_JOBID'}) && !defined($hostfile) && !scalar @hosts) {
942            print STDERR "OAR environment detected, setting hostfile.\n";
943            get_mpi_hosts_oar;
944            $auto_hostfile_pattern="/tmp/cado-nfs.hosts.$ENV{'OAR_JOBID'}.XXXXXXX";
945        } elsif (exists($ENV{'PBS_JOBID'}) && !defined($hostfile) && !scalar @hosts ) {
946            print STDERR "Torque/OpenPBS environment detected, setting hostfile.\n";
947            get_mpi_hosts_torque;
948            $auto_hostfile_pattern="/tmp/cado-nfs.hosts.$ENV{'PBS_JOBID'}.XXXXXXX";
949        } elsif (exists($ENV{'PE_HOSTFILE'}) && exists($ENV{'NSLOTS'}) && !defined($hostfile) && !scalar @hosts) {
950            print STDERR "Oracle/SGE environment detected, setting hostfile.\n";
951            get_mpi_hosts_sge;
952            # } elsif (exists($ENV{'SLURM_STEP_NODELIST'})) {
953            # not clear that I want this detection to be done _only_ within a
954            # job step. I do see cases where mpiexec from the batch itself seems
955            # to make sense
956        } elsif (exists($ENV{'SLURM_JOBID'}) && !defined($hostfile) && !scalar @hosts) {
957            get_mpi_hosts_slurm;
958        }
959
960        if (scalar @hosts) {
961            # I think that when doing so, the file will get deleted at
962            # program exit only.
963            my $fh = File::Temp->new(TEMPLATE=>$auto_hostfile_pattern);
964            $hostfile = $fh->filename;
965            push @tempfiles, $fh;
966            for my $h (@hosts) { print $fh "$h\n"; }
967            close $fh;
968        }
969
970        if (defined($hostfile)) {
971            if ($needs_mpd) {
972                # Assume daemons do the job.
973            } elsif ($mpi_ver =~ /^\+hydra/) {
974                my_setenv 'HYDRA_HOST_FILE', $hostfile;
975                # I used to have various setups using --hostfile for openmpi,
976                # --file in some other cases and so on. I think that
977                # -machinefile is documented in the published standard, so
978                # it's better to stick to it.
979                #        } elsif ($mpi_ver =~ /^openmpi/) {
980                #            push @mpi_precmd, "--hostfile", $hostfile;
981                #        } else {
982                #            push @mpi_precmd, "-file", $hostfile;
983            } else {
984                push @mpi_precmd, "-machinefile", $hostfile;
985            }
986
987        }
988        if (!defined($hostfile)) {
989            # At this point we're going to run processes on localhost.
990            if ($mpi_ver =~ /^\+hydra/) {
991                my_setenv 'HYDRA_USE_LOCALHOST', 1;
992            }
993            # Otherwise we'll assume that the simple setup will work fine.
994        }
995        check_mpd_daemons();
996        if (!$needs_mpd) {
997            # Then we must configure ssh
998            # Note that openmpi changes the name of the option pretty
999            # frequently.
1000            if ($mpi_ver =~ /^openmpi-1\.2/) {
1001                push @mpi_precmd, qw/--mca pls_rsh_agent/, ssh_program();
1002            } elsif ($mpi_ver =~ /^openmpi-1\.[34]/) {
1003                push @mpi_precmd, qw/--mca plm_rsh_agent/, ssh_program();
1004            } elsif ($mpi_ver =~ /^openmpi-1\.[56]/) {
1005                push @mpi_precmd, qw/--mca orte_rsh_agent/, ssh_program();
1006            } elsif ($mpi_ver =~ /^openmpi/) {
1007                push @mpi_precmd, qw/--mca plm_rsh_agent/, ssh_program();
1008                if (version_ge($mpi_ver, "openmpi-1.8")) {
1009                    # This is VERY important for bwc ! The default policy for
1010                    # openmpi 1.8 seems to be by slot, which obviously
1011                    # schedules most of the desired jobs on only one node...
1012                    # which doesn't work too well.
1013                    if (!$param->{'only_mpi'}) {
1014                        # with only_mpi=1, the default policy works fine.
1015                        push @mpi_precmd, qw/--map-by node/;
1016
1017                        # I don't exactly when the --bind-to argument
1018                        # appeared.
1019                        push @mpi_precmd, qw/--bind-to none/;
1020                    }
1021                }
1022            } elsif ($mpi_ver =~ /^mpich2/ || $mpi_ver =~ /^mvapich2/) {
1023                # Not older mpich2's, which need a daemon.
1024                push @mpi_precmd, qw/-launcher ssh -launcher-exec/, ssh_program();
1025            }
1026        }
1027        # End of section where we try to set up the proper options so
1028        # that mpiexec reaches the desired nodes correctly.
1029    }
1030    if (defined($mpi_extra_args)) {
1031        my @a = split(' ', $mpi_extra_args);
1032        my @b;
1033        while (defined($_=shift(@a))) {
1034            if (/^(--map-by|bind-to)$/) {
1035                shift @a;
1036                next;
1037            }
1038            if (/^plm_rsh_agent$/) {
1039                pop @b;
1040                shift @a;
1041                next;
1042            }
1043            push @b, $_;
1044        }
1045        # filter out stuff that we already provide (there are cases where
1046        # we supply this information redundantly).
1047        push @mpi_precmd, @b;
1048    }
1049    push @mpi_precmd, split(' ', $mpiexec_extra_stanzas);
1050    push @mpi_precmd, split(' ', $ENV{'MPI_EXTRA_ARGS'}) if $ENV{'MPI_EXTRA_ARGS'};
1051
1052    @mpi_precmd_single = @mpi_precmd;
1053    @mpi_precmd_lingen = @mpi_precmd;
1054    if (!$param->{'only_mpi'}) {
1055        push @mpi_precmd, '-n', $mpi_split[0] * $mpi_split[1];
1056    } else {
1057        push @mpi_precmd, '-n', $nh * $nv;
1058    }
1059    push @mpi_precmd_lingen, '-n', $lingen_mpi_split[0] * $lingen_mpi_split[1];
1060    push @mpi_precmd_single, '-n', 1;
1061}
1062
1063# }}}
1064
1065if ($mpi_needed) {
1066    if ($ENV{'DISPLAY'}) {
1067        print "## removing the DISPLAY environment variable, as it interacts badly with MPI startup\n";
1068        delete $ENV{'DISPLAY'};
1069    }
1070    if ($ENV{'SSH_AUTH_SOCK'}) {
1071        if ($ENV{'OAR_JOBID'}) {
1072            print "## removing the SSH_AUTH_SOCK environment variable, as it interacts badly with MPI startup, and OAR does not need it.\n";
1073            delete $ENV{'SSH_AUTH_SOCK'};
1074        } else {
1075            print "## WARNING: the environment variable SSH_AUTH_SOCK is set. If it so happens that you do *not* need it, you should unset it for faster job startup.\n";
1076        }
1077    }
1078
1079    # openmpi seems to properly propagate the path to mpirun as the path
1080    # to be forwarded on the remote nodes, so no manual propagation of
1081    # LD_LIBRARY_PATH or --prefix is needed.
1082
1083    # mkdir must not be marked fatal, because if the command terminates
1084    # without having ever tried to join in an mpi collective like
1085    # mpi_init(), there's potential for the mpirun command to complain.
1086    dosystem('-nofatal', @mpi_precmd, split(' ', "mkdir -p $wdir"))
1087        unless defined($random_matrix) || $wdir eq '/';
1088}
1089
1090if ($main eq ':mpirun') {
1091    # we don't even put @main_args in, because we're tinkering with it
1092    # somewhat.
1093    dosystem(@mpi_precmd, @extra_args);
1094    exit 0;
1095}
1096
1097if ($main eq ':mpirun_single') {
1098    # we don't even put @main_args in, because we're tinkering with it
1099    # somewhat.
1100    dosystem(@mpi_precmd_single, @extra_args);
1101    exit 0;
1102}
1103
1104if ($main eq ':srun') {
1105    # we don't even put @main_args in, because we're tinkering with it
1106    # somewhat.
1107    s/mpiexec/srun/ for (@mpi_precmd);
1108    dosystem(@mpi_precmd, @extra_args);
1109    exit 0;
1110}
1111
1112if ($main eq ':srun_single') {
1113    # we don't even put @main_args in, because we're tinkering with it
1114    # somewhat.
1115    s/mpiexec/srun/ for (@mpi_precmd);
1116    dosystem(@mpi_precmd_single, @extra_args);
1117    exit 0;
1118}
1119
1120##################################################
1121### ok -- now @main_args is something relatively useful.
1122# print "main_args:\n", join("\n", @main_args), "\n";
1123
1124push @main_args, splice @extra_args;
1125
1126# Now we have one function for each of the following macroscopic steps of
1127# the program
1128#
1129# prep
1130# krylov
1131# lingen
1132# mksol
1133# gather
1134
1135# {{{ Some pretty-printing
1136my $current_task;
1137
1138my $terminal_colors = {
1139    BLACK	=> "\e[01;30m",
1140    RED 	=> "\e[01;31m",
1141    GREEN	=> "\e[01;32m",
1142    YELLOW	=> "\e[01;33m",
1143    BLUE	=> "\e[01;34m",
1144    VIOLET	=> "\e[01;35m",
1145    black	=> "\e[00;30m",
1146    red         => "\e[00;31m",
1147    green	=> "\e[00;32m",
1148    yellow	=> "\e[00;33m",
1149    blue	=> "\e[00;34m",
1150    violet	=> "\e[00;35m",
1151    normal      => "\e[0m",
1152};
1153$terminal_colors = {} if !defined($ENV{'TERM'}) || $ENV{'TERM'} !~ /^(xterm|screen|linux)/;
1154
1155sub task_begin_message {
1156    my $blue = $terminal_colors->{'BLUE'} || '';
1157    my $normal = $terminal_colors->{'normal'} || '';
1158    print "## Entering task: ${blue}$current_task${normal}\n";
1159}
1160
1161sub task_check_message {
1162    my $status = shift;
1163    return unless $my_verbose_flags->{'checks'} || $status eq 'error';
1164    my $normal = $terminal_colors->{'normal'} || '';
1165    my $color = {
1166        'ok' => $terminal_colors->{'green'} || '',
1167        'missing' => $terminal_colors->{'YELLOW'} || '',
1168        'error' => $terminal_colors->{'RED'} || '',
1169    };
1170    my @lines;
1171    push @lines, split(/^/m, $_) for @_;
1172    my $head = shift @lines;
1173    chomp($head);
1174    for (@lines) {
1175        chomp($_);
1176        $_ = "\t" . $_;
1177    }
1178    unshift @lines, $head;
1179    my @printme;
1180    push @printme, "## $color->{$status}Check for $current_task$normal: $_\n" for @lines;
1181    die @printme if $status eq 'error';
1182    print @printme;
1183}
1184# }}}
1185
1186# Inspection code for what is currently in $wdir
1187
1188# Some of the inspection code correspond to queries which are done
1189# several times, so it's better to cache the results as long as they are
1190# expected to remain valid.
1191# {{{ cache hash, and corresponding queries
1192my $cache = {
1193    # This corresponds to data which isn't supposed to expire with
1194    # immediate effect.
1195    # Here we'll find things which are inferred from reading the data
1196    # files in $wdir, but would be cumbersome to find otherwise.
1197};
1198sub expire_cache_entry {
1199    my $k = shift;
1200    delete $cache->{$k};
1201}
1202sub store_cached_entry {
1203    my ($k, $v) = @_;
1204    if (defined($cache->{$k}) && $cache->{$k} != $v) {
1205        die "Fatal error: conflict for cache entry $k: previously had $cache->{$k}, now detected $v";
1206    }
1207    $cache->{$k} = $v;
1208}
1209
1210
1211sub get_cached_leadernode_filelist {
1212    my $key = 'leadernode_filelist';
1213    my $opt = shift;
1214    my @x;
1215    if (defined(my $z = $cache->{$key})) {
1216        @x = @$z;
1217    } else {
1218        if (!$hostfile) {
1219            print STDERR "Listing files in $wdir via readdir\n";
1220            # We're running locally, thus there's no need to go to a remote
1221            # place just to run find -printf.
1222            my $dh;
1223            opendir $dh, $wdir;
1224            for (readdir $dh) {
1225                push @x, [$_, (stat "$wdir/$_")[7]];
1226            }
1227            closedir $dh;
1228        } else {
1229            my $foo = join(' ', @mpi_precmd_single, "find $wdir -maxdepth 1 -follow -type f -a -printf '%s %p\\n'");
1230            print STDERR "Listing files in $wdir via $foo\n";
1231            for my $line (`$foo`) {
1232                $line =~ s/^\s*//;
1233                chomp($line);
1234                $line =~ s/^(\d+)\s+//;
1235                my $s = $1;
1236                push @x, [basename($line), $s];
1237            }
1238        }
1239        $cache->{$key}=\@x;
1240    }
1241    if ($opt && $opt eq 'HASH') { # we could also use wantarray
1242        my $h = {};
1243        $h->{$_->[0]} = $_->[1] for @x;
1244        return $h;
1245    } else {
1246        return @x;
1247    }
1248}
1249
1250sub rename_file_on_leader {
1251    my ($old, $new) = @_;
1252    if (!$hostfile) {
1253        # We're running locally, thus there's no need to go to a remote
1254        # place just to run find -printf.
1255        rename $old, $new;
1256    } else {
1257        system(join(' ', @mpi_precmd_single, "mv $old $new"));
1258    }
1259}
1260
1261
1262# {{{ get_cached_bfile -> check for balancing file.
1263sub get_cached_bfile {
1264    my $key = 'balancing';
1265    return undef if defined($random_matrix);
1266    if ($param->{$key}) {
1267        $cache->{$key}=$param->{$key};
1268    }
1269    if (defined(my $z = $cache->{$key})) {
1270        return $z;
1271    }
1272    # We're checking on the leader node only. Because the other nodes
1273    # don't really mind if they don't see the balancing file.
1274    # sense.
1275    my $pat;
1276    my $x = $matrix;
1277    $x =~ s{^(?:.*/)?([^/]+)$}{$1};
1278    $x =~ s/\.(?:bin|txt)$//;
1279    my $bfile = "$wdir/$x.${nh}x${nv}/$x.${nh}x${nv}.bin";
1280    if (!-f $bfile) {
1281        print STDERR "$bfile: not found\n";
1282        return undef;
1283    }
1284    $cache->{$key} = $bfile;
1285    return $bfile;
1286}
1287# }}}
1288
1289sub get_cached_balancing_header {
1290    return undef if defined $random_matrix;
1291    my $key = 'balancing_header';
1292    if (defined(my $z = $cache->{$key})) { return @$z; }
1293    defined(my $balancing = get_cached_bfile) or confess "\$balancing undefined";
1294    sysopen(my $fh, $balancing, O_RDONLY) or die "$balancing: $!";
1295    sysread($fh, my $bhdr, 24);
1296    my @x = unpack("LLLLLL", $bhdr);
1297    my $zero = shift @x;
1298    die "$balancing: no leading 32-bit zero" unless $zero == 0;
1299    my $magic = shift @x;
1300    die "$balancing: bad file magic" unless $magic == 0xba1a0000;
1301    $cache->{$key} = \@x;
1302    close($fh);
1303    return @x;
1304}
1305sub get_nrows_ncols {
1306    my $key = 'nrows_ncols';
1307    if (defined(my $z = $cache->{$key})) {
1308        return @$z;
1309    }
1310    if (defined($random_matrix)) {
1311        my $nrows;
1312        my $ncols;
1313        my @tokens = split(',', $random_matrix);
1314        if ($tokens[0] =~ /^(\d+)/) {
1315            $nrows = $1;
1316            $ncols = $1;
1317        }
1318        if ($tokens[1] =~ /^(\d+)/) {
1319            $ncols = $1;
1320        }
1321        die "parameter random_matrix does not give nrows and ncols ??" unless defined($nrows) && defined($ncols);
1322        $cache->{$key} = [ $nrows, $ncols ];
1323        return @{$cache->{$key}};
1324    }
1325
1326
1327    if (defined(my $z = $cache->{'balancing_header'})) {
1328        my @x = @$z;
1329        shift @x;
1330        shift @x;
1331        $cache->{$key} = \@x;
1332        return @x;
1333    }
1334    if (defined(my $balancing = get_cached_bfile)) {
1335        my ($bnh, $bnv, $bnrows, $bncols) = get_cached_balancing_header;
1336        my @x = ($bnrows, $bncols);
1337        $cache->{$key} = \@x;
1338        return @x;
1339    }
1340    (my $mrw = $matrix) =~ s/(\.(?:bin|txt))$/.rw$1/;
1341    (my $mcw = $matrix) =~ s/(\.(?:bin|txt))$/.cw$1/;
1342    if ($mrw ne $matrix && $mcw ne $matrix && -f $mrw && -f $mcw) {
1343        my $nrows = ((stat $mrw)[7] / 4);
1344        my $ncols = ((stat $mcw)[7] / 4);
1345        my @x = ($nrows, $ncols);
1346        $cache->{$key} = \@x;
1347        return @x;
1348    }
1349    confess "Cannot find nrows and ncols ???";
1350}
1351# }}}
1352
1353# {{{ List the V files in $wdir -- output is given as a hash y-range =>
1354# iterations found. We also return the file size, which has to be
1355# constant.
1356sub list_files_generic {
1357    my $files = {};
1358    my $filesize;
1359    my ($n, $pattern) = @_;
1360    for my $fileinfo (get_cached_leadernode_filelist) {
1361        my ($file, $size) = @$fileinfo;
1362        $file =~ /^$pattern$/ or next;
1363        my $n0 = scalar @-;
1364        die "Found $n0 < $n matches. Bad pattern $pattern ??\n" if $n0 < $n;
1365        # See man perlvar
1366        my @matches = map { 0+substr $file, $-[$_], $+[$_] - $-[$_] } (1..$n0-1);
1367        my @kmatches = splice(@matches, 0, $n);
1368        push @{$files->{join("..", @kmatches)}}, \@matches;
1369        $filesize = $size if !defined($filesize);
1370        if ($filesize != $size) {
1371            task_check_message 'error', "Inconsistency detected for the sizes of the ${pattern} files. We have seen at least $filesize and $size (last seen: $file, $size). Please fix.\n";
1372        }
1373    }
1374    return $files, $filesize;
1375}
1376sub list_vfiles {
1377    my ($f, $filesize) = list_files_generic(2, qr/V(\d+)-(\d+)\.(\d+)/);
1378    if ($filesize) {    # take the occasion to store it.
1379        for my $k (keys %$f) {
1380            $k =~ /^(\d+)\.\.(\d+)$/ or die "Bad key $k returned by list_files_generic";
1381            if ($2-$1 != $splitwidth) {
1382                die "Problem with the width of V files: $1-$2 is not good";
1383            }
1384        }
1385        # Note that it might happen that we haven't computed the
1386        # balancing file yet.
1387        my ($bnrows, $bncols) = get_nrows_ncols;
1388        my $N = $bncols > $bnrows ? $bncols : $bnrows;
1389        eval {store_cached_entry('nbytes_per_splitwidth', $filesize / $N);};
1390        die "Problem with the size of V files ($filesize bytes, $N rows):\n$@" if $@;
1391    }
1392    my $flatten = sub { local $_; map { shift @$_ } @_; };
1393    $f->{$_} = [&$flatten(@{$f->{$_}})] for keys %$f;
1394    @{$f->{$_}} = sort { $a <=> $b } @{$f->{$_}} for keys %$f;
1395    return $f;
1396}
1397sub lexcmp {
1398    my $a = shift;
1399    my $b = shift;
1400    for my $k (0..$#$a) {
1401        my $z = $a->[$k] <=> $b->[$k];
1402        return $z if $z;
1403    }
1404    return 0;
1405}
1406
1407sub list_afiles {
1408    my ($f, $filesize) = list_files_generic(2, qr/A(\d+)-(\d+)\.(\d+)-(\d+)/);
1409    if ($filesize) {    # take the occasion to store it.
1410        for my $k (keys %$f) {
1411            $k =~ /^(\d+)\.\.(\d+)$/ or die "Bad key $k returned by list_files_generic";
1412            # We can tolerate $2-$1 == $n, because we still have acollect
1413            # around.
1414            if ($2-$1 != $splitwidth && $2-$1 != $n) {
1415                die "Problem with the width of A files: $1-$2 is not good";
1416            }
1417            my $length = $m * ($2-$1) * ($f->{$k}->[0]->[1] - $f->{$k}->[0]->[0]);
1418            eval { store_cached_entry('nbytes_per_splitwidth', $filesize / ($length / $splitwidth));};
1419            die "Problem with the size of A files:\n$@" if $@;
1420        }
1421    }
1422    @{$f->{$_}} = sort { lexcmp($a, $b) } @{$f->{$_}} for keys %$f;
1423    return $f;
1424}
1425
1426sub list_sfiles {
1427    my ($f, $filesize) = list_files_generic(2, qr/S\.sols(\d+)-(\d+)\.(\d+)-(\d+)/);
1428    if ($filesize) {    # take the occasion to store it.
1429        for my $k (keys %$f) {
1430            $k =~ /^(\d+)\.\.(\d+)$/ or die "Bad key $k returned by list_files_generic";
1431            if ($2-$1 != $splitwidth) {
1432                die "Problem with the width of S files: $1-$2 is not good";
1433            }
1434        }
1435        my ($bnrows, $bncols) = get_nrows_ncols;
1436        my $N = $bncols > $bnrows ? $bncols : $bnrows;
1437        my $length = $N;
1438        eval { store_cached_entry('nbytes_per_splitwidth', $filesize / $length); };
1439        die "Problem with the size of S files:\n$@" if $@;
1440    }
1441    # Because our S pattern now includes the iteration range, we pick
1442    # $_->[1] for the identifier.
1443    # my $flatten = sub { local $_; map { $_->[1] } @_; };
1444    # $f->{$_} = [&$flatten(@{$f->{$_}})] for keys %$f;
1445    #
1446    my $flatten = sub { local $_; return map { $_->[0] => $_->[1] } @_; };
1447    for (keys %$f) {
1448        my %x = &$flatten(@{$f->{$_}});
1449        $f->{$_} = \%x;
1450    }
1451    # @{$f->{$_}} = sort { $a <=> $b } @{$f->{$_}} for keys %$f;
1452    return $f;
1453}
1454# }}}
1455
1456# {{{ how many bytes per ($splitwidth) element ?
1457sub get_cached_nbytes_per_splitwidth {
1458    my $key = 'nbytes_per_splitwidth';
1459    if (defined(my $z = $cache->{$key})) { return $z; }
1460    # This function should practically never go beyond this point, as
1461    # presumably the list_* functions below, which all fill the cache as
1462    # a side effect, should have been run beforehand.
1463    list_vfiles;
1464    list_afiles;
1465    list_sfiles;
1466    if (defined(my $z = $cache->{$key})) { return $z; }
1467    die "Cannot find the number of bytes needed per finite field element";
1468}
1469# }}}
1470
1471
1472# {{{ inferring max iteration indices.
1473sub max_krylov_iteration {
1474    # read matrix dimension from the balancing header.
1475    my ($bnrows, $bncols) = get_nrows_ncols;
1476    my $length = $bncols > $bnrows ? $bncols : $bnrows;
1477    $length = int(($length+$m-1)/$m) + int(($length+$n-1)/$n);
1478    $length += 2 * int(($m+$n-1)/$m);
1479    $length += 2 * int(($m+$n-1)/$n);
1480    $length += 10;
1481    return $length;
1482}
1483# This one is of course much harder to guess.
1484sub max_mksol_iteration {
1485    my $leader_files = get_cached_leadernode_filelist 'HASH';
1486    my $nbytes_per_splitwidth = get_cached_nbytes_per_splitwidth;
1487    my @x;
1488    for my $file (keys %$leader_files) {
1489        $file =~ /^F\.sols(\d+)-(\d+)\.(\d+)-(\d+)$/ or next;
1490        if ($2-$1 != $splitwidth) {
1491            die "Problem with the width of F files: $1-$2 is not good";
1492        }
1493        if ($4-$3 != $splitwidth) {
1494            die "Problem with the width of F files: $1-$2 is not good";
1495        }
1496        my $size = $leader_files->{$file};
1497        return $size / (($2-$1)*($4-$3)/$splitwidth*$nbytes_per_splitwidth);
1498    }
1499    die "can't find any F file, cannot infer the mksol max iteration";
1500} # }}}
1501
1502# {{{ task_common_run is just a handy proxy.
1503sub task_common_run {
1504    my $program = shift @_;
1505    expire_cache_entry 'leadernode_filelist';
1506    # Some arguments are relevant only to some contexts.
1507    #
1508    # We start with lingen, because it's slightly specific
1509    # take out the ones we don't need (and acollect shares some
1510    # peculiarities).
1511    @_ = grep !/^(skip_bw_early_rank_check|rebuild_cache|cpubinding|balancing.*|interleaving|matrix|mm_impl|mpi|thr)?=/, @_ if $program =~ /(lingen|acollect$)/;
1512    if ($program =~ /lingen/) {
1513        @_ = map { s/^lingen_mpi\b/mpi/; $_; } @_;
1514    } else {
1515        @_ = grep !/^lingen_mpi?=/, @_;
1516    }
1517    # @_ = grep !/lingen_threshold/, @_ unless $program =~ /lingen/;
1518    # @_ = grep !/lingen_mpi_threshold/, @_ unless $program =~ /lingen/;
1519    @_ = grep !/allow_zero_on_rhs/, @_ unless $program =~ /^lingen/;
1520    @_ = grep !/^save_submatrices?=/, @_ unless $program =~ /^(prep|krylov|mksol|gather)$/;
1521    # are we absolutely sure that lingen needs no matrix ?
1522    @_ = grep !/^ys=/, @_ unless $program =~ /(krylov|dispatch)$/;
1523    @_ = grep !/^solutions=/, @_ unless $program =~ /(?:mksol|gather)$/;
1524    @_ = grep !/^rhs=/, @_ unless $program =~ /(?:prep|gather|lingen.*|mksol)$/;
1525    @_ = grep !/(?:precmd|tolerate_failure)/, @_;
1526
1527    $program="$bindir/$program";
1528    unshift @_, $program;
1529
1530    if ($param->{'precmd'}) {
1531        unshift @_, split(' ', $param->{'precmd'});
1532    }
1533
1534    if ($mpi_needed) {
1535        if ($program =~ /\/lingen[^\/]*$/) {
1536            unshift @_, @mpi_precmd_lingen;
1537        } elsif ($program =~ /\/(?:split|acollect|lingen|cleanup)$/) {
1538            unshift @_, @mpi_precmd_single;
1539        } elsif ($program =~ /\/(?:prep|secure|krylov|mksol|gather|dispatch)$/) {
1540            unshift @_, @mpi_precmd;
1541        } else {
1542            die "Don't know the parallel status of program $program ... ?";
1543        }
1544    } else {
1545        @_ = grep { !/^(mpi)=/ } @_;
1546    }
1547
1548    eval { dosystem @_; };
1549
1550    if ($@) {
1551        if (defined(my $tol = $param->{'tolerate_failure'})) {
1552            my $re = qr/$tol/;
1553            if ($program =~ /$re/) {
1554                print STDERR "Not aborting because $program matches tolerate_failure regexp $tol\n";
1555                return;
1556            }
1557        } else {
1558            die;
1559        }
1560    }
1561}
1562# }}}
1563
1564# {{{ SUBTASKS are used by one or several tasks.
1565
1566# {{{ subtask_krylov_todo - the hard checkpoint recovery work
1567sub subtask_krylov_todo {
1568    # For each sequence, this finds the "most advanced" V file. Being
1569    # advanced takes two things. First this means an existing file, and
1570    # the code here checks for this. But we also request that some extra
1571    # function returns true, and this check is provided by the caller.
1572    my $length = shift;
1573    my $morecheck = shift || sub{1;};
1574
1575    my @ys;
1576    if (defined($random_matrix)) {
1577        my @ys=();
1578        my $dx = $simd;
1579        $dx +=  $simd if $param->{'interleaving'};
1580        for(my $x = 0; $x < $n ; $x += $dx) {
1581            my $y = $x + $dx;
1582            push @ys, "ys=$x..$y";
1583        }
1584        return @ys;
1585    }
1586
1587    # We unconditionally do a check of the latest V checkpoint found in
1588    # $wdir, for all vectors.
1589    my @all_ys = map { [ $_*$splitwidth, ($_+1)*$splitwidth ] } (0..$n/$splitwidth - 1);
1590
1591    my $vfiles = list_vfiles;
1592    my $vstarts = {};
1593
1594    print "## complete range list for krylov: ".join(" ", map {"$_->[0]-$_->[1]";} @all_ys)."\n";
1595
1596    for my $y (@all_ys) {
1597        my $yrange = $y->[0] . ".." . $y->[1];
1598        print "## V files for $yrange:";
1599        my @for_this_y =
1600            grep { $_ == 0 || &$morecheck($yrange, $_); } @{$vfiles->{$yrange}}
1601            or do { print " none\n"; next; };
1602        $vstarts->{$yrange} = $for_this_y[$#for_this_y];
1603        $vstarts->{$yrange} .= " (DONE!)" if $vstarts->{$yrange} >= $length;
1604        print " last $current_task checkpoint is $vstarts->{$yrange}\n";
1605    }
1606
1607    my @todo;
1608
1609    my $ys_comment='';
1610    if (@ys = grep { /^ys/ } @main_args) {
1611        @ys = map { /^(?:ys=)?(\d+)\.\.(\d+)$/; $_=[$1, $2]; } @ys;
1612        $ys_comment = ' (from command line)';
1613        # Do we also have a specification for the starting checkpoint ?
1614        if (my @ss = grep { /^start/ } @main_args) {
1615            my ($start) = grep { s/^start=(\d+)/$1/; } @ss;
1616            my $yrange = $ys[0]->[0] . ".." . $ys[0]->[1];
1617            my %ok = map { $_ => 1 } grep { $_ == 0 || &$morecheck($yrange, $_); } @{$vfiles->{$yrange}};
1618            if ($ok{$start}) {
1619                print "## Forcing ys=$yrange start=$start as requested on command line\n";
1620                push @todo, "ys=$yrange start=$start";
1621                return @todo;
1622            } else {
1623                my @avail = sort { $a <=> $b } keys %ok;
1624                task_check_message 'error', "Cannot start at ys=$yrange start=$start: missing checkpoints (available: @avail)";
1625            }
1626        }
1627    } else {
1628        my $y;
1629        for(my $x = 0; $x < $n ; $x = $y) {
1630            $y = $x + $simd;
1631            $y += $simd if $param->{'interleaving'};
1632            $y = $n if $y > $n;
1633            push @ys, [ $x, $y];
1634        }
1635        $ys_comment = "simd=$simd" if $simd > $splitwidth;
1636        $ys_comment .= ", interleaving" if $param->{'interleaving'};
1637        $ys_comment = " ($ys_comment)" if $ys_comment;
1638    }
1639
1640    print "## range list for krylov: ".join(" ", map {"$_->[0]..$_->[1]";} @ys)."$ys_comment\n";
1641
1642    my @impossible;
1643    for my $ab (@ys) {
1644        # most ys are double ranges, except maybe the last one.
1645        my ($a, $b) = @{$ab};
1646        my $yrange = $a . ".." . $b;
1647        my $start;
1648        # now all saved vectors are for width = $splitwidth ; given that
1649        # we have one single start= argument, we need to make sure that
1650        # all the sub-vectors have reached the same checkpoint.
1651        my $discard;
1652        for(my $x = $a; $x < $b ; $x += $splitwidth) {
1653            my $r = $x."..".($x + $splitwidth);
1654            my $s = $vstarts->{$r};
1655            if (!defined($s)) {
1656                task_check_message 'warning',
1657                "No starting vector for range $r"
1658                . " (sub-range from $yrange)\n";
1659                $discard = 1;
1660            } elsif (!defined($start)) {
1661                $start = $s;
1662            } elsif ($s != $start) {
1663                task_check_message 'warning',
1664                "Inconsistent start vectors for the "
1665                . " sub-ranges from $yrange"
1666                . " (found both $start and $s)\n";
1667                $discard = 1;
1668            }
1669        }
1670        undef $start if $discard;
1671        if (!defined($start)) {
1672            print STDERR "## Can't schedule any work for $yrange, as *NO* checkpoint is here\n";
1673            push @impossible, $yrange;
1674            next;
1675        }
1676        next if $start =~ /DONE/;
1677        push @todo, "ys=$yrange start=$start";
1678    }
1679    if (!@todo && @impossible) {
1680        task_check_message 'error', "Cannot schedule remaining work. No checkpoints for ranges:\n", @impossible;
1681    }
1682    return @todo;
1683}
1684# }}}
1685# }}}
1686
1687# {{{ prep
1688
1689sub task_prep_missing_output_files {
1690    my @starts;
1691    for my $i (0..$n/$splitwidth-1) {
1692        my $x0 = $i*$splitwidth;
1693        my $x1 = ($i+1)*$splitwidth;
1694        push @starts, "V${x0}-${x1}.0";
1695    }
1696    my @missing;
1697    my $leader_files = get_cached_leadernode_filelist 'HASH';
1698    unshift @starts, "X";
1699    for my $file (@starts) {
1700        push @missing, $file unless exists $leader_files->{$file};
1701    }
1702    if (@missing == @starts) { return "all", @missing; }
1703    elsif (@missing) { return "some", @missing; }
1704    else { return "none" };
1705}
1706
1707sub task_prep {
1708    task_begin_message;
1709    # This prepares the starting vectors for block Wiedemann.
1710
1711    # input files:
1712    #   - output files from previous tasks: dispatch
1713    #
1714    # output files, created if needed.
1715    #   - X
1716    #   - V${x0}-${x1}.0 ; the different starting vectors.
1717
1718    my ($status, @missing) = task_prep_missing_output_files;
1719
1720    if ($status eq 'none') {
1721        task_check_message 'ok', "All output files for $current_task have been found, good";
1722        return;
1723    }
1724
1725    if ($status eq 'some') {
1726        task_check_message 'error', "Missing output files for $current_task", @missing, "We don't fix this automatically. Please investigate.";
1727    }
1728
1729    task_check_message 'missing', "none of the output files for $current_task have been found, need to run $current_task now. We want to create files:", @missing;
1730
1731    if ($prime == 2) {
1732        task_common_run('prep', @main_args);
1733    } else {
1734        # The prime case is somewhat different. We'll generate random
1735        # data, that's it. We might prepend the RHS if it exists.
1736        task_common_run('prep', @main_args, "ys=0..$splitwidth");
1737    }
1738
1739} # }}}
1740
1741# {{{ secure -- this is now just a subtask of krylov or mksol
1742sub subtask_secure {
1743    return if $param->{'skip_online_checks'};
1744    my $wanted_stops={};
1745    if (defined(my $x = $param->{'interval'})) {
1746        $wanted_stops->{$x}=1;
1747    }
1748    if (defined(my $x = $param->{'check_stops'})) {
1749        my @x = split(',', $x);
1750        $wanted_stops->{$_}=1 for @x;
1751    }
1752    my $leader_files = get_cached_leadernode_filelist 'HASH';
1753
1754    my $mustrun = 0;
1755    if (scalar keys %$wanted_stops) {
1756        $wanted_stops->{0}=1;
1757        for (keys %$wanted_stops) {
1758            my @wnames=("Cv0-$splitwidth.$_");
1759            push @wnames, "Cd0-$splitwidth.$_" if $_ > 0;
1760            for my $name (@wnames) {
1761                next if $leader_files->{$name};
1762                task_check_message 'missing', "missing check vector $name\n";
1763                $mustrun = 1;
1764            }
1765        }
1766    } else {
1767        # we can only check for the existence of _some_ check vector.
1768        unless (grep { /^Cv0-(\d+)\.\d+$/ && ($1 == $splitwidth) && !/^Cv0-\d+\.0$/ } keys %$leader_files) {
1769            task_check_message 'missing', "no check vector (Cv) found\n";
1770            $mustrun = 1;
1771        }
1772        unless (grep { /^Cd0-(\d+)\.\d+$/ && ($1 == $splitwidth) } keys %$leader_files) {
1773            task_check_message 'missing', "no check vector (Cd) found\n";
1774            $mustrun = 1;
1775        }
1776    }
1777    for my $name ("Cr0-$splitwidth.0-$splitwidth", "Ct0-$splitwidth.0-$m") {
1778        next if $leader_files->{$name};
1779        task_check_message 'missing', "missing check vector $name\n";
1780        $mustrun = 1;
1781    }
1782    if ($mustrun) {
1783        task_common_run('secure', @main_args);
1784    } else {
1785        my $x = join(", ", sort { $a <=> $b } keys %$wanted_stops);
1786        task_check_message 'ok', "All auxiliary files for checkpointing are here, good (wanted: $x).\n";
1787    }
1788}
1789# }}}
1790
1791sub task_dispatch {
1792    task_begin_message;
1793    return if defined $random_matrix;
1794    if (defined(get_cached_bfile)) {
1795        task_check_message 'ok', "All balancing files are present, good.\n";
1796        return;
1797    }
1798    # we do need to keep the ys, because those control the width that get
1799    # passed to the cache building procedures.
1800    task_common_run('dispatch', @main_args);
1801}
1802
1803sub task_secure {
1804    task_begin_message;
1805    if (defined($random_matrix)) {
1806        task_check_message 'ok', "No checkpoint verification with random_matrix\n";
1807    } else {
1808        subtask_secure;
1809    }
1810}
1811
1812# {{{ krylov
1813sub task_krylov {
1814    task_begin_message;
1815
1816    # input files:
1817    #   - output files from previous tasks: prep
1818    #     More accurately, we need V${x0}-${x1}.$i for some iteration i.
1819    #
1820    # if ys is found in the command line, then we focus on that sequence
1821    # specifically. Otherwise, we process all sequences one after
1822    # another.
1823    #
1824    # side-effect files, which have no impact on whether this step gets
1825    # re-run or not:
1826    #   - A${x0}-${x1}.* ; dot product files which are fed to lingen.
1827    #   These are write-only as far as this task is concerned.
1828
1829    subtask_secure unless defined $random_matrix;
1830
1831    my $length = max_krylov_iteration;
1832    print "## krylov max iteration is $length\n";
1833
1834    my @todo = subtask_krylov_todo $length;
1835
1836    if (!@todo) {
1837        # Note that we haven't checked for the A files yet !
1838        task_check_message 'ok',
1839                    "All krylov tasks are done, good.\n";
1840        return;
1841    }
1842
1843    task_check_message 'missing',
1844                "Pending tasks for krylov:\n" . join("\n", @todo);
1845    for my $t (@todo) {
1846        # take out ys and start from main_args, put the right ones in place if
1847        # needed. (note that if both were specified, this is essentially
1848        # putting the very same parameters back in place, but with the
1849        # benefit of having performed a check inbetween).
1850        my @args = grep { !/^ys/ && !/^start/ } @main_args;
1851        push @args, split(' ', $t);
1852        task_common_run 'krylov', @args;
1853    }
1854} # }}}
1855
1856# {{{ lingen
1857
1858sub task_lingen_input_errors {
1859    # krylov_length is not documented. It's just here to fool lingen and
1860    # let it believe that it does have the full data set.
1861    my $h = shift;
1862    my $length = $param->{'krylov_length'} || max_krylov_iteration;
1863    my $afiles = list_afiles;
1864    my $astrings = {};
1865    my $ok = 1;
1866    my $nb_afiles = 0;
1867    my $maxz1;
1868    my $minz1;
1869    for my $k (keys %$afiles) {
1870        my @strings;
1871        my ($z0, $z1);
1872        for my $seg (@{$afiles->{$k}}) {
1873            $nb_afiles++;
1874            if (defined($z1)) {
1875                if ($seg->[0] == $z1) {
1876                    $z1 = $seg->[1];
1877                } else {
1878                    push @strings, [$z0, $z1];
1879                    $z0 = undef;
1880                    $z1 = undef;
1881                }
1882                next;
1883            }
1884            ($z0, $z1) = @$seg;
1885        }
1886        $ok = 0 unless scalar @strings == 0 && $z0 == 0 && $z1 >= $length;
1887        push @strings, [$z0, $z1] if defined $z1;
1888        $maxz1 = $z1 unless defined($maxz1) && $z1 < $maxz1;
1889        $minz1 = $z1 unless defined($minz1) && $z1 > $minz1;
1890        $astrings->{$k} = \@strings;
1891    }
1892
1893    $ok = 0 if $minz1 != $maxz1;
1894    my $rlength = $minz1;
1895
1896    if (!$ok) {
1897        my @errors;
1898        push @errors, "Incomplete set of A files found";
1899
1900        for my $k (sort {$a cmp $b} keys %$afiles) {
1901            push @errors, "$k, computed range(s): " . join(" ", map { $_->[0]."..".$_->[1]; } @{$astrings->{$k}}) . "\n";
1902        }
1903        return @errors;
1904    }
1905    $h->{'concatenated_A'} = "A0-$n.0-$rlength";
1906    $h->{'need_acollect'} = $nb_afiles > 1;
1907    return ();
1908}
1909
1910sub task_lingen {
1911    task_begin_message;
1912
1913    # input files:
1914    #   - A.*-*.*-* ; all A files created by krylov.
1915    #     -> this task accumulates them in a single file.
1916    # output_file
1917    #   - F.sols*-*.*-* ; all polynomial entries of the generating
1918    #     matrix.
1919
1920    # The function task_lingen_input_errors has a few side-effects,
1921    # returned back in the hash $h.
1922    my $h = {};
1923    if (my @errors = task_lingen_input_errors $h) {
1924        task_check_message 'error', @errors;
1925    }
1926
1927    my $concatenated_A = $h->{'concatenated_A'};
1928
1929    if ($h->{'need_acollect'}) {
1930        task_check_message 'missing', "Running acollect to create $concatenated_A";
1931        task_common_run("acollect", @main_args, "--remove-old");
1932    }
1933
1934    # We expect lingen to split its output into pieces of predictable
1935    # size.
1936    my $leader_files = get_cached_leadernode_filelist 'HASH';
1937    my @missing;
1938    my @expected;
1939    for my $i (0..$n/$splitwidth-1) {
1940        my $sol = sprintf("sols%d-%d", $i*$splitwidth, ($i+1)*$splitwidth);
1941        for my $j (0..$n/$splitwidth-1) {
1942            my $j0 = $splitwidth * $j;
1943            my $j1 = $splitwidth + $j0;
1944            my $f = "F.$sol.${j0}-${j1}";
1945            push @expected, $f;
1946            push @missing, $f unless exists $leader_files->{$f};
1947            if ($j1 <= $nrhs) {
1948                $f .= ".rhs";
1949                push @expected, $f;
1950                push @missing, $f unless exists $leader_files->{$f};
1951            }
1952        }
1953    }
1954    if (@missing == 0) {
1955        task_check_message 'ok', "lingen result found, good.";
1956        return;
1957    } elsif (@missing < @expected) {
1958        task_check_message 'error', "Incomplete lingen output found. Missing files:\n" , @missing;
1959    }
1960
1961    task_check_message 'missing', "lingen has not run yet. Running now.";
1962    # Now run lingen itself. Which binary we'll run is not totally
1963    # obvious though.
1964    # NOTE: It may be worthwhile to run specifically this step, but
1965    # with adapted mpi and thr parameters.
1966    my @args;
1967    push @args, "split-output-file=1";
1968    push @args, "afile=$concatenated_A";
1969    push @args, "ffile=F";
1970    push @args, grep { /^(?:mn|m|n|wdir|prime|rhs)=/ || /allow_zero_on_rhs/ } @main_args;
1971    if (!$mpi_needed && ($lingen_mpi_split[0]*$lingen_mpi_split[1] != 1)) {
1972        print "## non-MPI build, avoiding multi-node lingen\n";
1973        # We keep thr=
1974        @args = grep { !/^(mpi)=/ } @args;
1975    }
1976    push @args, grep { /^verbose_flags=/ } @main_args;
1977    if (! -f "$wdir/$concatenated_A.gen") {
1978        if ($prime == 2) {
1979            push @args, "tuning_thresholds=recursive:128,ternary:6400,cantor:6400,notiming:0";
1980            task_common_run("lingen_u64k1", @args);
1981        } else {
1982            push @args, "tuning_thresholds=recursive:10,flint:10,notiming:0";
1983            task_common_run("lingen_pz", @args);
1984        }
1985    } else {
1986        task_check_message 'ok', "lingen already has .gen file, good.";
1987    }
1988}
1989# }}}
1990
1991# {{{ mksol
1992sub task_mksol {
1993    task_begin_message;
1994
1995    # input files:
1996    #   - V*-*.<some iteration>, e.g. maybe the starting one generated by
1997    #     prep, or any other.
1998    #   - F.sols*-*.*-* ; all polynomial entries of the generating
1999    #     matrix.
2000    #
2001    # Note that the computation will start at the first iteration number
2002    # for which a V file is here, and no S file yet (for any of the
2003    # solutions considered, which is a number limited by $nrhs if
2004    # specified).
2005    #
2006    # side-effect files, which have no impact on whether this step gets
2007    # re-run or not:
2008    #   - S.sols*-*.*-*.* ; partial sum which are fed to gather.
2009    #   These are write-only as far as this task is concerned.
2010
2011    subtask_secure unless defined $random_matrix;
2012
2013    # We choose to require the S files for considering checkpoints as
2014    # valid. This is because in all likelihood, the V files from krylov
2015    # will still be there, so even though this is a hint as to what can
2016    # be started right now, we can't really use it...
2017    my $sfiles = list_sfiles;
2018    # $sfiles->{$_} = eval { my %x=map {$_=>1;} @{$sfiles->{$_}};\%x;} for keys %$sfiles;
2019    #
2020    my $length = eval { max_mksol_iteration; };
2021    if ($@) {
2022        task_check_message 'error', "Lingen output files missing", $@, "Please run lingen first.";
2023        die "abort";
2024    }
2025
2026    print "## mksol max iteration is $length\n";
2027
2028    my @solutions=@{$param->{'solutions'}};
2029    print "## main solution ranges for mksol ".join(" ", @solutions)."\n";
2030
2031    my @all_solutions = map
2032                        { $_=sprintf("%d-%d",
2033                                $_*$splitwidth,
2034                                ($_+1)*$splitwidth);
2035                        } (0..$n/$splitwidth-1);
2036
2037    my %solutions_importance;
2038    $solutions_importance{$_}=0 for (@all_solutions);
2039    for (@solutions) {
2040        /^(\d+)-(\d+)$/ or die;
2041        my $x=$1;
2042        my $y;
2043        my $z=$2;
2044        for( ; $x < $z ; $x = $y) {
2045            $y = $x + $splitwidth;
2046            my $s = "$x-$y";
2047            die "No solution file defined for $s: we have only @all_solutions" unless defined $solutions_importance{$s};
2048            $solutions_importance{$s}=1;
2049        }
2050    }
2051    my @todo;
2052    my @only_start = grep(/^start=\d+$/, @main_args);
2053
2054    if (@only_start) {
2055        die "start says: @only_start" unless scalar @only_start == 1;
2056        die "solution says: @solutions" unless scalar @solutions == 1;
2057        @todo = "@only_start solutions=@solutions";
2058        task_check_message 'ok', "Command line imposes one specific subtask @todo";
2059    } else {
2060        for my $s (@solutions) {
2061            $s =~ /^(\d+)-(\d+)$/ or die;
2062            my $x=$1;
2063            my $y;
2064            my $z=$2;
2065            my $all_optional=1;
2066            my $n_common;
2067            # scan all subranges.
2068            for( ; $x < $z ; $x = $y) {
2069                $y = $x + $splitwidth;
2070                my $s = "$x-$y";
2071                my $optional = ($solutions_importance{$s} == 0);
2072                $all_optional = 0 unless $optional;
2073                # find latest checkpoint for that subrange
2074                my $n = 0;
2075                $s =~ /^(\d+)-(\d+)$/ or die;
2076                my $graph = $sfiles->{"$1..$2"};
2077                while (defined(my $e = $graph->{$n})) {
2078                    $n = $e;
2079                }
2080                my $msg = "## S files for $s --> last mksol checkpoint is $n";
2081                $msg .= " (DONE!)" if $n >= $length;
2082                $msg .= " (optional)" if $optional;
2083                print "$msg\n";
2084                if (!defined($n_common)) {
2085                    $n_common = $n;
2086                } elsif ($n_common != $n) {
2087                    task_check_message 'warning',
2088                    "Inconsistent latest vectors for the "
2089                    . " sub-ranges from $s"
2090                    . " (found both $n_common and $n)\n";
2091                }
2092            }
2093            if ($n_common < $length) {
2094                push @todo, "solutions=$s start=$n_common" unless $all_optional;
2095            }
2096        }
2097        if ($@) {
2098            task_check_message 'error', "Failure message while checking $current_task files";
2099        }
2100        if (!@todo) {
2101            # Note that we haven't checked for the A files yet !
2102            task_check_message 'ok', "All $current_task tasks are done, good.\n";
2103            return;
2104        }
2105
2106        task_check_message 'missing',
2107                    "Pending tasks for $current_task:\n" . join("\n", @todo);
2108    }
2109
2110    for my $t (@todo) {
2111        # take out ys from main_args, put the right one in place if
2112        # needed.
2113        # print "main_args: @main_args\n";
2114        my @args = grep { !/^(ys|n?rhs|start)/ } @main_args;
2115        push @args, split(' ', $t);
2116
2117        task_common_run 'mksol', @args;
2118    }
2119}
2120# }}}
2121
2122# {{{ gather
2123sub task_gather {
2124    task_begin_message;
2125
2126    # input files:
2127    #   - S.sols*-*.*-*.* ; partial sums computed by mksol
2128
2129    my @missing;
2130    my @todo;
2131    my $leader_files = get_cached_leadernode_filelist 'HASH';
2132    my @solutions=@{$param->{'solutions'}};
2133    my @all_solutions = map
2134                        { $_=sprintf("%d-%d",
2135                                $_*$splitwidth,
2136                                ($_+1)*$splitwidth);
2137                        } (0..$n/$splitwidth-1);
2138    my %solutions_importance;
2139    $solutions_importance{$_}=0 for (@all_solutions);
2140    for my $s (@solutions) {
2141        $s =~ /^(\d+)-(\d+)$/ or die;
2142        my @loc_found;
2143        my @loc_notfound;
2144        my $x=$1;
2145        my $y;
2146        my $z=$2;
2147        for( ; $x < $z ; $x = $y) {
2148            $y = $x + $splitwidth;
2149            my $ls = "$x-$y";
2150            die "No solution file defined for $s: we have only @all_solutions" unless defined $solutions_importance{$ls};
2151            $solutions_importance{$ls}=1;
2152
2153            my $kfile = "K.sols$ls.0";
2154            if (exists $leader_files->{$kfile}) {
2155                push @loc_found, $kfile;
2156            } else {
2157                push @loc_notfound, $kfile;
2158            }
2159        }
2160        if (@loc_found && @loc_notfound) {
2161            task_check_message 'error', "not all files are consistent for solutions=$s : found @loc_found, missing @loc_notfound";
2162        }
2163        next if @loc_found;
2164        push @missing, @loc_notfound;
2165        if ($param->{'interleaving'}) {
2166            # Gather doesn't support interleaving at all, so let's do two
2167            # distinct executions.
2168            $s =~ /^(\d+)-(\d+)$/ or die;
2169            my $x=$1;
2170            my $y=int(($1+$2)/2);
2171            my $z=$2;
2172            push @todo, "solutions=$x-$y";
2173            push @todo, "solutions=$y-$z";
2174        } else {
2175            push @todo, "solutions=$s";
2176        }
2177    }
2178    if (@missing == 0) {
2179        task_check_message 'ok', "All solution files produced by gather seem to be present, good.";
2180        return;
2181    }
2182    task_check_message 'missing', "Need to run gather to create the files: @missing\n";
2183    @missing=();
2184    # {{{ Print the number of files found in a matrix.
2185    my $sfiles = list_sfiles;
2186    my $maxmksol = eval { max_mksol_iteration; };
2187    if ($@) {
2188        task_check_message 'error', "Lingen output files missing", $@, "Please run lingen first.";
2189    }
2190    print "## mksol max iteration is $maxmksol\n";
2191
2192    my $cmat = {};
2193    for my $k (keys %$sfiles) {
2194        $k =~ /^(\d+)\.\.(\d+)$/ or die;
2195        my $graph = $sfiles->{$k};
2196        my $key = "sols$1-$2";
2197        $cmat->{$key}= scalar keys %$graph;
2198        # find max advanced point.
2199        my $n = 0;
2200        while (defined(my $e = $graph->{$n})) {
2201            $n = $e;
2202        }
2203        $cmat->{$key}.= "*" if $n < $maxmksol;
2204    }
2205    my $c0 = 4;
2206    my $c = 0;
2207
2208    for my $s (@solutions) {
2209        $s =~ /^(\d+)-(\d+)$/ or die;
2210        my $x=$1;
2211        my $y;
2212        my $z=$2;
2213        for( ; $x < $z ; $x = $y) {
2214            $y = $x + $splitwidth;
2215            my $ls = "$x-$y";
2216            my $key = "sols$ls";
2217            my $optional = ($solutions_importance{$ls} == 0);
2218            my $l = 4+length($ls); $c0 = $l if $l > $c0;
2219            my $n = $cmat->{$key} || 'NONE'; $cmat->{$key} = $n;
2220            $l = length($n); $c = $l if $l > $c;
2221            next if $optional;
2222            push @missing, "S.sols$s" if $n eq 'NONE' || $n =~ /\*$/;
2223        }
2224    }
2225    print "## Number of S files found:\n";
2226    for my $s (@solutions) {
2227        $s =~ /^(\d+)-(\d+)$/ or die;
2228        my $x=$1;
2229        my $y;
2230        my $z=$2;
2231        for( ; $x < $z ; $x = $y) {
2232            $y = $x + $splitwidth;
2233            my $ls = "$x-$y";
2234            my $optional = ($solutions_importance{$ls} == 0);
2235            print "##    " . sprintf("%${c0}s","$ls") . " | "
2236                . sprintf("%${c}s", $cmat->{"sols$ls"})
2237                . ($optional ? " (optional)" : "")
2238                . "\n";
2239        }
2240    }
2241    # }}}
2242
2243    my $rhs_companion;
2244    if ($param->{'rhs'}) {
2245        for my $s (@solutions) {
2246            for my $j (0..$n/$splitwidth - 1) {
2247                my $y0 = $j * $splitwidth;
2248                my $y1 = $y0 + $splitwidth;
2249                next if $y0 >= $nrhs;
2250                my $f = "F.sols$s.$y0-$y1.rhs";
2251                push @missing, $f unless exists($leader_files->{$f});
2252            }
2253        }
2254    }
2255
2256    if (@missing) {
2257        task_check_message 'error', "Missing files for $current_task:", @missing;
2258    }
2259
2260    task_check_message 'ok', "All required files for gather seem to be present, good.";
2261
2262    my @args = grep { !/^ys/ } @main_args;
2263    for my $t (@todo) {
2264        task_common_run 'gather', @args, $t;
2265    }
2266}
2267# }}}
2268
2269# {{{ cleanup -- For p=2, this extra step produces a RREF solution.
2270sub task_cleanup {
2271    # This is only for p=2 for the moment.
2272    return if $prime ne 2;
2273    task_begin_message;
2274
2275    my @missing;
2276    my $leader_files = get_cached_leadernode_filelist 'HASH';
2277    my $per_solution_files = {};
2278    my $err;
2279    my @todo;
2280    my @solutions=@{$param->{'solutions'}};
2281    for my $sol (@solutions) {
2282        $sol =~ /^(\d+)-(\d+)$/ or die;
2283        my $x=$1;
2284        my $y;
2285        my $z=$2;
2286        for( ; $x < $z ; $x = $y) {
2287            $y = $x + $splitwidth;
2288            my $s = "$x-$y";
2289
2290            my $wfile = "W.sols$s";
2291            if (exists($leader_files->{$wfile})) {
2292                task_check_message 'ok', "Solution $s is already computed in file $wfile, good.\n";
2293                next;
2294            }
2295
2296            my @ks =
2297                sort {
2298                    basename($a)=~/\.(\d+)$/; my $xa=$1;
2299                    basename($b)=~/\.(\d+)$/; my $xb=$1;
2300                    $xa <=> $xb;
2301                }
2302                grep {
2303                    /^(.*)\.\d+$/ && $1 eq "K.sols$s";
2304                }
2305                (keys %$leader_files);
2306
2307            if (!@ks) {
2308                task_check_message 'error', "No files created by gather for solution $sol";
2309                $err=1;
2310            } else {
2311                task_check_message 'missing', "Will run cleanup to compute $wfile from the files:", @ks;
2312            }
2313            push @todo, [$s, $wfile, @ks];
2314        }
2315    }
2316    for my $klist (@todo) {
2317        my $sol = shift @$klist;
2318        my @x = map { "$wdir/$_"; } @$klist;
2319        my $wfile = shift @x;
2320        $sol =~ /^(\d+)-(\d+)$/ or die "$sol: could not parse";
2321        my $nsols = $2-$1;
2322        task_common_run('cleanup', "--ncols", $nsols, "--out", $wfile, @x);
2323    }
2324    if (scalar @todo == 1 && (!-f"$wdir/W" || ((stat "$wdir/W")[7] lt (stat "$wdir/W.sols$solutions[0]")[7]))) {
2325        print STDERR "## Providing $wdir/W as an alias to $wdir/W.sols$solutions[0]\n";
2326        symlink "$wdir/W.sols$solutions[0]", "$wdir/W";
2327    }
2328}
2329# }}}
2330
2331my $tasks = {
2332    prep	=> \&task_prep,
2333    krylov	=> \&task_krylov,
2334    # also keep "secure" and "dispatch", but just as handy proxies.
2335    secure	=> \&task_secure,
2336    dispatch	=> \&task_dispatch,
2337    lingen	=> \&task_lingen,
2338    mksol	=> \&task_mksol,
2339    gather	=> \&task_gather,
2340    cleanup	=> \&task_cleanup,
2341};
2342
2343my @complete = qw(
2344        prep
2345        krylov
2346        lingen
2347        mksol
2348        gather
2349        cleanup
2350    );
2351
2352my @tasks_todo=();
2353
2354if ($main eq ':complete') {
2355    @tasks_todo=@complete;
2356} else {
2357    @tasks_todo=($main);
2358}
2359
2360for (@tasks_todo) {
2361    $current_task = $_;
2362    if ($stop_at_step && $current_task eq $stop_at_step) {
2363        print "Exiting early, because of stop_at_step=$stop_at_step\n";
2364        last;
2365    }
2366    if (defined($tasks->{$current_task})) {
2367        &{$tasks->{$current_task}}(@main_args);
2368    } else {
2369        die "No task by that name: $current_task";
2370    }
2371}
2372