1 2############################################################################### 3 # 4 # This file is part of canu, a software program that assembles whole-genome 5 # sequencing reads into contigs. 6 # 7 # This software is based on: 8 # 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net) 9 # the 'kmer package' r1994 (http://kmer.sourceforge.net) 10 # 11 # Except as indicated otherwise, this is a 'United States Government Work', 12 # and is released in the public domain. 13 # 14 # File 'README.licenses' in the root directory of this distribution 15 # contains full conditions and disclaimers. 16 ## 17 18package canu::Grid_LSF; 19 20require Exporter; 21 22@ISA = qw(Exporter); 23@EXPORT = qw(detectLSF configureLSF); 24 25use strict; 26use warnings "all"; 27no warnings "uninitialized"; 28 29use canu::Defaults; 30use canu::Execution; 31 32use canu::Grid "formatAllowedResources"; 33 34 35 36sub detectLSF () { 37 38 return if ( defined(getGlobal("gridEngine"))); 39 40 my $bsub = findExecutable("bsub"); 41 42 return if (!defined($bsub)); 43 44 if (getGlobal("useGrid") eq "0") { 45 print STDERR "--\n"; 46 print STDERR "-- Detected LSF with 'bsub' binary in $bsub.\n"; 47 print STDERR "-- LSF disabled by useGrid=false\n"; 48 } 49 else { 50 print STDERR "--\n"; 51 print STDERR "-- Detected LSF with 'bsub' binary in $bsub.\n"; 52 53 setGlobal("gridEngine", "LSF"); 54 } 55} 56 57 58sub configureLSF () { 59 60 return if (uc(getGlobal("gridEngine")) ne "LSF"); 61 62 my $maxArraySize = getGlobal("gridEngineArrayMaxJobs"); 63 64 if (!defined($maxArraySize)) { 65 $maxArraySize = 65535; 66 67 if (defined($ENV{"MAX_JOB_ARRAY_SIZE"})) { 68 $maxArraySize = $ENV{"MAX_JOB_ARRAY_SIZE"}; 69 } 70 } 71 72 setGlobalIfUndef("gridEngineSubmitCommand", "bsub"); 73 setGlobalIfUndef("gridEngineNameOption", "-J"); 74 setGlobalIfUndef("gridEngineArrayOption", ""); 75 setGlobalIfUndef("gridEngineArrayName", "ARRAY_NAME\[ARRAY_JOBS\]"); 76 setGlobalIfUndef("gridEngineArrayMaxJobs", $maxArraySize); 77 setGlobalIfUndef("gridEngineOutputOption", "-o"); 78 setGlobalIfUndef("gridEngineResourceOption", "-R span[hosts=1] -n THREADS -M MEMORY"); 79 setGlobalIfUndef("gridEngineMemoryPerJob", "1"); 80 setGlobalIfUndef("gridEngineNameToJobIDCommand", "bjobs -A -J \"WAIT_TAG\" | grep -v JOBID"); 81 setGlobalIfUndef("gridEngineNameToJobIDCommandNoArray", "bjobs -J \"WAIT_TAG\" | grep -v JOBID"); 82 setGlobalIfUndef("gridEngineTaskID", "LSB_JOBINDEX"); 83 setGlobalIfUndef("gridEngineArraySubmitID", "%I"); 84 setGlobalIfUndef("gridEngineJobID", "LSB_JOBID"); 85 86 # 87 # LSF has variation in the units used to request memory. 88 # They are defined by the LSF_UNIT_FOR_LIMITS variable in lsf.conf. 89 # 90 # Expecting lines like 'LSF_UNIT_FOR_LIMITS=MB'. 91 # 92 # Docs say: 93 # Set to MB at time of installation. If LSF_UNIT_FOR_LIMITS 94 # is not defined in lsf.conf, then the default setting is 95 # in KB, and for RUSAGE it is MB. 96 # 97 98 my $memUnits = getGlobal("gridEngineMemoryUnits"); 99 100 if (!defined($memUnits)) { 101 my $lsfroot = $ENV{"LSF_ENVDIR"}; 102 103 if (-e "$lsfroot/lsf.conf") { 104 open(F, "< $lsfroot/lsf.conf") or caExit("can't open '$lsfroot/lsf.conf' for reading: $!", undef); 105 106 while (<F>) { 107 if (m/^\s*LSF_UNIT_FOR_LIMITS\s*=\s*(.*)\s*/i) { 108 $memUnits = $1 109 } 110 } 111 112 close(F); 113 114 print STDERR "-- Discovered LSF_UNIT_FOR_LIMITS of '$memUnits' from '$lsfroot/lsf.conf'.\n"; 115 } 116 117 else { 118 print STDERR "--\n"; 119 print STDERR "-- ERROR: Can't find '\$LSF_ENVDIR/lsf.conf' to determine the unit to use for memory\n"; 120 print STDERR "-- ERROR: sizes. Set gridEngineMemoryUnits to one of 'k', 'm', 'g', or 't'.\n"; 121 print STDERR "--\n"; 122 123 caExit("can't configure for LSF", undef); 124 } 125 } 126 127 # Build a list of the resources available in the grid. This will contain a list with keys 128 # of "#CPUs-#GBs" and values of the number of nodes With such a config. Later on, we'll use this 129 # to figure out what specific settings to use for each algorithm. 130 # 131 # The list is saved in global{"availableHosts"} 132 # 133 my %hosts; 134 135 open(F, "lshosts |"); 136 137 my $h = <F>; # header 138 139 my @h = split '\s+', $h; 140 141 my $cpuIdx = 4; 142 my $memIdx = 5; 143 my $srvIdx = 7; 144 145 for (my $ii=0; ($ii < scalar(@h)); $ii++) { 146 $cpuIdx = $ii if ($h[$ii] eq "ncpus"); 147 $memIdx = $ii if ($h[$ii] eq "maxmem"); 148 $srvIdx = $ii if ($h[$ii] eq "server"); 149 } 150 151 while (<F>) { 152 my @v = split '\s+', $_; 153 154 my $cpus = $v[$cpuIdx]; 155 my $mem = $v[$memIdx]; 156 my $srv = $v[$srvIdx]; 157 158 next if ($mem =~ m/-/); 159 next if ($srv !~ m/yes/i); 160 161 # if we failed to find the units from the configuration, inherit it from the lshosts output 162 if (!defined($memUnits)) { 163 $memUnits = "t" if ($mem =~ m/(\d+.*\d+)[tT]/); 164 $memUnits = "g" if ($mem =~ m/(\d+.*\d+)[gG]/); 165 $memUnits = "m" if ($mem =~ m/(\d+.*\d+)[mM]/); 166 $memUnits = "k" if ($mem =~ m/(\d+.*\d+)[kK]/); 167 } 168 169 $mem = $1 * 1024 if ($mem =~ m/(\d+.*\d+)[tT]/); 170 $mem = $1 * 1 if ($mem =~ m/(\d+.*\d+)[gG]/); 171 $mem = $1 / 1024 if ($mem =~ m/(\d+.*\d+)[mM]/); 172 $mem = $1 / 1024 / 1024 if ($mem =~ m/(\d+.*\d+)[kK]/); 173 $mem = int($mem); 174 175 $hosts{"$cpus-$mem"}++ if ($cpus gt 0); 176 } 177 close(F); 178 setGlobal("availableHosts", formatAllowedResources(%hosts, "LSF")); 179 setGlobal("gridEngineMemoryUnits", $memUnits); 180 print STDERR "-- \n"; 181 print STDERR "-- On LSF detected memory is requested in " . uc(${memUnits}) . "B\n"; 182 print STDERR "-- \n"; 183} 184 185