1
2###############################################################################
3 #
4 #  This file is part of canu, a software program that assembles whole-genome
5 #  sequencing reads into contigs.
6 #
7 #  This software is based on:
8 #    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 #    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 #
11 #  Except as indicated otherwise, this is a 'United States Government Work',
12 #  and is released in the public domain.
13 #
14 #  File 'README.licenses' in the root directory of this distribution
15 #  contains full conditions and disclaimers.
16 ##
17
18package canu::Grid_LSF;
19
20require Exporter;
21
22@ISA    = qw(Exporter);
23@EXPORT = qw(detectLSF configureLSF);
24
25use strict;
26use warnings "all";
27no  warnings "uninitialized";
28
29use canu::Defaults;
30use canu::Execution;
31
32use canu::Grid "formatAllowedResources";
33
34
35
36sub detectLSF () {
37
38    return   if ( defined(getGlobal("gridEngine")));
39
40    my $bsub = findExecutable("bsub");
41
42    return   if (!defined($bsub));
43
44    if (getGlobal("useGrid") eq "0") {
45        print STDERR "--\n";
46        print STDERR "-- Detected LSF with 'bsub' binary in $bsub.\n";
47        print STDERR "--          LSF disabled by useGrid=false\n";
48    }
49    else {
50        print STDERR "--\n";
51        print STDERR "-- Detected LSF with 'bsub' binary in $bsub.\n";
52
53        setGlobal("gridEngine", "LSF");
54    }
55}
56
57
58sub configureLSF () {
59
60    return   if (uc(getGlobal("gridEngine")) ne "LSF");
61
62    my $maxArraySize = getGlobal("gridEngineArrayMaxJobs");
63
64    if (!defined($maxArraySize)) {
65        $maxArraySize = 65535;
66
67        if (defined($ENV{"MAX_JOB_ARRAY_SIZE"})) {
68            $maxArraySize = $ENV{"MAX_JOB_ARRAY_SIZE"};
69        }
70    }
71
72    setGlobalIfUndef("gridEngineSubmitCommand",              "bsub");
73    setGlobalIfUndef("gridEngineNameOption",                 "-J");
74    setGlobalIfUndef("gridEngineArrayOption",                "");
75    setGlobalIfUndef("gridEngineArrayName",                  "ARRAY_NAME\[ARRAY_JOBS\]");
76    setGlobalIfUndef("gridEngineArrayMaxJobs",               $maxArraySize);
77    setGlobalIfUndef("gridEngineOutputOption",               "-o");
78    setGlobalIfUndef("gridEngineResourceOption",             "-R span[hosts=1] -n THREADS -M MEMORY");
79    setGlobalIfUndef("gridEngineMemoryPerJob",               "1");
80    setGlobalIfUndef("gridEngineNameToJobIDCommand",         "bjobs -A -J \"WAIT_TAG\" | grep -v JOBID");
81    setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray",  "bjobs -J \"WAIT_TAG\" | grep -v JOBID");
82    setGlobalIfUndef("gridEngineTaskID",                     "LSB_JOBINDEX");
83    setGlobalIfUndef("gridEngineArraySubmitID",              "%I");
84    setGlobalIfUndef("gridEngineJobID",                      "LSB_JOBID");
85
86    #
87    #  LSF has variation in the units used to request memory.
88    #  They are defined by the LSF_UNIT_FOR_LIMITS variable in lsf.conf.
89    #
90    #  Expecting lines like 'LSF_UNIT_FOR_LIMITS=MB'.
91    #
92    #  Docs say:
93    #    Set to MB at time of installation. If LSF_UNIT_FOR_LIMITS
94    #    is not defined in lsf.conf, then the default setting is
95    #    in KB, and for RUSAGE it is MB.
96    #
97
98    my $memUnits = getGlobal("gridEngineMemoryUnits");
99
100    if (!defined($memUnits)) {
101        my $lsfroot = $ENV{"LSF_ENVDIR"};
102
103        if (-e "$lsfroot/lsf.conf") {
104            open(F, "< $lsfroot/lsf.conf") or caExit("can't open '$lsfroot/lsf.conf' for reading: $!", undef);
105
106            while (<F>) {
107                if (m/^\s*LSF_UNIT_FOR_LIMITS\s*=\s*(.*)\s*/i) {
108                    $memUnits = $1
109                }
110            }
111
112            close(F);
113
114            print STDERR "-- Discovered LSF_UNIT_FOR_LIMITS of '$memUnits' from '$lsfroot/lsf.conf'.\n";
115        }
116
117        else {
118            print STDERR "--\n";
119            print STDERR "-- ERROR:  Can't find '\$LSF_ENVDIR/lsf.conf' to determine the unit to use for memory\n";
120            print STDERR "-- ERROR:  sizes.  Set gridEngineMemoryUnits to one of 'k', 'm', 'g', or 't'.\n";
121            print STDERR "--\n";
122
123            caExit("can't configure for LSF", undef);
124        }
125    }
126
127    #  Build a list of the resources available in the grid.  This will contain a list with keys
128    #  of "#CPUs-#GBs" and values of the number of nodes With such a config.  Later on, we'll use this
129    #  to figure out what specific settings to use for each algorithm.
130    #
131    #  The list is saved in global{"availableHosts"}
132    #
133    my %hosts;
134
135    open(F, "lshosts |");
136
137    my $h = <F>;  #  header
138
139    my @h = split '\s+', $h;
140
141    my $cpuIdx  = 4;
142    my $memIdx  = 5;
143    my $srvIdx  = 7;
144
145    for (my $ii=0; ($ii < scalar(@h)); $ii++) {
146        $cpuIdx  = $ii  if ($h[$ii] eq "ncpus");
147        $memIdx  = $ii  if ($h[$ii] eq "maxmem");
148        $srvIdx  = $ii  if ($h[$ii] eq "server");
149    }
150
151    while (<F>) {
152        my @v = split '\s+', $_;
153
154        my $cpus  = $v[$cpuIdx];
155        my $mem   = $v[$memIdx];
156        my $srv   = $v[$srvIdx];
157
158        next if ($mem =~ m/-/);
159        next if ($srv !~ m/yes/i);
160
161        # if we failed to find the units from the configuration, inherit it from the lshosts output
162        if (!defined($memUnits)) {
163            $memUnits = "t" if ($mem =~ m/(\d+.*\d+)[tT]/);
164            $memUnits = "g" if ($mem =~ m/(\d+.*\d+)[gG]/);
165            $memUnits = "m" if ($mem =~ m/(\d+.*\d+)[mM]/);
166            $memUnits = "k" if ($mem =~ m/(\d+.*\d+)[kK]/);
167        }
168
169        $mem  = $1 * 1024         if ($mem =~ m/(\d+.*\d+)[tT]/);
170        $mem  = $1 * 1            if ($mem =~ m/(\d+.*\d+)[gG]/);
171        $mem  = $1 / 1024         if ($mem =~ m/(\d+.*\d+)[mM]/);
172        $mem  = $1 / 1024 / 1024  if ($mem =~ m/(\d+.*\d+)[kK]/);
173        $mem  = int($mem);
174
175        $hosts{"$cpus-$mem"}++    if ($cpus gt 0);
176    }
177    close(F);
178    setGlobal("availableHosts", formatAllowedResources(%hosts, "LSF"));
179    setGlobal("gridEngineMemoryUnits", $memUnits);
180    print STDERR "-- \n";
181    print STDERR "-- On LSF detected memory is requested in " . uc(${memUnits}) . "B\n";
182    print STDERR "-- \n";
183}
184
185