1#!/usr/local/bin/perl -w 2use strict; 3use warnings; 4 5## general comments: 6## this script is indirectly called by run_casp.pl, see comments therein 7## it calls several functions and mainly functions as a "gluing" script 8## its behavious is mainly controled by the config file read at the beginning 9## in general it does the following: 10## 1) build MSA from sequence via hhblits 11## 2) build hhr-template-list via hhsearch 12## 3) (pre-)select several templates via template selection strategy 13## 4) filter preselected templates and query 14## 5) rank templates with single template NN 15## 6) select final templates 16## 7) generate final (artifical) hhr file with selected templates 17## 8) optionally replace distance restraints 18## 9) call MODELLER and build final model 19## this final model in saved at outdir and will then be sent to 20## the CASP organizers via the run_casp.pl script 21 22 23## more details: 24## all temporary files are written to workingDir 25## temporary files are eg. *.a3m, *.hhm, *.tab, *.hhr 26## amd are named workingDir/queryName.* 27## final results must be copied back to "dirbasename", e.g. outbase.pdb to dirbasename.pdb 28## 29## if filtering is on the filtered files are put in another temporary directory, 30## see filterAlignments.pm 31## the final pdb-file must be in outbase.pdb so that run_casp.pl can find this file and 32## attach it to the reply-e-mail 33## 34 35use File::Basename; 36use File::Spec; 37 38 39BEGIN { 40 my $dirname = dirname(File::Spec->rel2abs(__FILE__)); 41 unshift @INC, $dirname . "/lib"; 42} 43 44use File::Temp; 45 46use config; 47use utilities; 48use Template; 49use TemplateList; 50use TemplateStruct; 51use TemplateListStruct; 52use selectTemplatesHeuristic; 53use filterAlignments; 54use predTMscore; 55use checkTemplates; 56use modeller; 57use QualityAssessModel; 58 59## called via run_casp.pl with these parameters 60my $server = "hhpred"; 61my $outFile = ""; 62my $configFile = undef; 63my $queryEnding = ""; 64my $queryName = ""; 65my $queryFile = ""; 66 67############################# 68## parse command line options 69 70my $options = ""; 71 72for (my $i=0; $i<@ARGV; $i++) { 73 $options .= " $ARGV[$i] "; 74} 75 76if ($options =~ s/-i\s+(\S+)/ /) { 77 $queryFile = $1; 78 my $fpath; 79 ($queryName, $fpath, $queryEnding) = fileparse("$queryFile", qr/\.[^.]*/); 80} 81if ($options =~ s/-o\s+(\S+)/ /) { $outFile = $1; } 82if ($options =~ s/-config\s+(\S+)/ /) { $configFile = $1; } 83 84############################## 85 86print "\n"; 87print "==========================================================\n"; 88print "| HHPRED structure predictor |\n"; 89print "==========================================================\n"; 90print "\n"; 91 92## set default values 93 94#my $queryFile = "$dirbasename" . "$queryEnding"; 95 96my $workingDir = "/tmp/$$"; 97 98## set configuration 99my $config = HHpredConfig->instance($configFile); 100 101my $preselectedTemplatesFile = ""; 102my $allRankedTemplates = TemplateList->new(); 103 104if ($queryFile eq "" or $outFile eq "" or $queryName eq "" or ! -e $queryFile) { 105 print "usage: $0 -i <infile> -o <outfile> [-c <configfile>]!\n"; 106 print " <infile> .a3m or .seq file\n"; 107 print " <outfile> resulting pdb file\n\n"; 108 exit 1; 109} 110 111## create working directory 112if (! -e "$workingDir") { 113 &System("mkdir -p $workingDir"); 114} else { 115 my $dir = File::Temp->newdir("XXXXX", DIR => "$workingDir", CLEANUP => 0); 116 $workingDir = $dir->dirname; 117 &System("chmod 777 $workingDir"); 118} 119 120if (! -e "$workingDir/$queryName") { 121 &System("mkdir -p $workingDir/$queryName"); 122 $workingDir = "$workingDir/$queryName"; 123} 124 125############################# 126$config->print(); 127print "\n"; 128 129############################# 130## set up starting files 131## all files are written to workingDir 132$queryName .= &getRandomString(7); # add random stuff so that queryName != template (needed for MODELLER) 133my $outbase = "$workingDir/$queryName"; 134 135## either a3m or seq file 136if ($queryEnding ne ".a3m") { 137 &System("cp $queryFile $outbase.seq"); 138} else { 139 &System("cp $queryFile $outbase.a3m"); 140} 141 142 143############################# 144## build query alignment if necessary 145if (! -e "$outbase.a3m") { 146 my $command = "HHLIB=" . $config->get_hhlib() 147 . " " 148 . $config->get_hhblits() 149 . " -i $outbase.seq" 150 . " -oalis $outbase" 151 . " -ohhm $outbase.hhm" 152 . " -n " . $config->get_hhblits_rounds() 153 . " -mact " . $config->get_hhblits_mact() 154 . " -oa3m $outbase.a3m" 155 . " -d " . $config->get_uniprot20() 156 . " -cpu " . $config->get_cpus(); 157 &System($command); 158 sleep(1); 159 160 &System($config->get_addss() . " $outbase.a3m"); 161 sleep(1); 162} 163 164&System($config->get_hhmake() . " -i $outbase.a3m -o $outbase.hhm"); 165sleep(1); 166 167############################## 168## search against database via hhsearch 169my $pdbdir = $config->get_pdbdir(); 170 171$pdbdir =~ s/ 172 \/$ # trim trailing slash 173 //x; 174my $pdbdb = "$pdbdir/db/pdb.hhm"; 175 176&System("HHLIB=" . $config->get_hhlib() 177 . " " 178 . $config->get_hhsearch() 179 . " -i $outbase.hhm" 180 . " -d $pdbdb" 181 . " -mact " . $config->get_hhsearch_mact() 182 . " -cpu " . $config->get_cpus() 183 . " -atab $outbase.start.tab"); 184 185while (!(-e "$outbase.hhr")) { 186 sleep(1) 187} 188 189&System("cp $outbase.hhr $outbase.start.hhr"); 190 191############################## 192## start handling of templates 193my $tList = TemplateListStruct->new(); 194$tList->hhr_to_TemplateList("$outbase.hhr"); 195my $queryLength = $tList->get_queryLength(); 196$tList->print(); 197 198############################# 199## preselect template (best according to similarity, sumprobs, probability 200## and fill up rest with heuristic 201if ($config->get_preselectTemplates()) { 202 $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, 100, 1, $tList, $config); 203 204 print "preselected templates:\n"; 205 $tList->print(); 206 print "====================================\n\n"; 207} 208 209############################# 210## filtering 211if ($config->get_doFiltering()) { 212 $tList = &Filtering($queryName, $outbase, $tList, $server, $config); 213 214 print "Filtered templates:\n"; 215 $tList->print(); 216 print "====================================\n\n"; 217} 218 219############################# 220## rank templates with NN 221if ($config->get_rankTemplates()) { 222 my $rankingNN = TMscoreNN->new(); 223 $rankingNN->rank_templates($tList, $queryLength, $config); 224 225 $allRankedTemplates = $tList; 226 227 print "TM score ranked templates\n"; 228 $tList->print(); 229 print "====================================\n\n"; 230} 231 232############################# 233## final template selection 234if ($config->get_multiTemplate()) { 235 my $maxNumTemplates = $config->get_maxNumOfTemplates(); 236 $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, $maxNumTemplates, 2, $tList, $config); 237} else { 238 $tList = &SingleTemplateSelection($tList); 239} 240 241## take same template(s) as in file (instead of the ones selected before); 242## keep previous template(s) if preselected ones are not in template list 243if ($preselectedTemplatesFile ne "") { 244 my $preselectedTemplates = TemplateList->new(); 245 $preselectedTemplates->read_from_file($preselectedTemplatesFile); 246 247 my $newTemplates = TemplateList->new(); 248 for (my $i=0; $i<$preselectedTemplates->size(); $i++) { 249 for (my $j=0; $j<$allRankedTemplates->size(); $j++) { 250 my $preTemplate = $preselectedTemplates->get($i); 251 my $rankTemplate = $allRankedTemplates->get($j); 252 253 if ($preTemplate->get_Hit() eq $rankTemplate->get_Hit()) { 254 ## check overlap 255 my $overlap = &min($preTemplate->get_Qend(), $rankTemplate->get_Qend()) - &max($preTemplate->get_Qstart(), $rankTemplate->get_Qstart()); 256 my $preLen = $preTemplate->get_Qend() - $preTemplate->get_Qstart(); 257 if ($overlap / $preLen > 0.5) { 258 $newTemplates->check_and_add($rankTemplate); 259 last; 260 } 261 } 262 } 263 } 264 265 ## templates found 266 if ($newTemplates->size() > 0) { 267 print "preselected templates could be found\n"; 268 $tList = $newTemplates; 269 } else { 270 ## keep old templates 271 print "preselected templates could NOT be found - keep old templates\n"; 272 } 273} 274 275 276print "final templates:\n"; 277$tList->print(); 278print "====================================\n\n"; 279 280$tList->write_to_file("$outbase.templates"); 281 282############################# 283## create "artificial" hhr file for input, needed by hhmakemodel to build pir alignment 284$tList->templateList_to_hhr($outbase); 285 286############################# 287## generate pir alignment 288## the hhr file is generated/overwritten by the function call before 289## the a3m is generated above using hhblast 290## CheckTemplates possibly creates "corrected" pdb files and saves them 291## in the "working" directory; then calls hhmakemodel to build outbase.pir 292print "checking templates\n"; 293&CheckTemplates($outbase, $workingDir, $config); 294print "\n====================================\n\n"; 295 296# if ($config->get_realignProbcons() && $tList->size() > 1) { 297# my $pirFile = PirFile->new("$outbase.pir"); 298# my $fastaFile = $pirFile->to_fasta(); 299# $fastaFile->write_to_file("$outbase.fas"); 300# my $probcons = $config->get_probcons(); 301 302# &System("$probcons $outbase.fas > $outbase.fas"); 303# sleep(1); 304 305# my $fastaRealigned = FastaFile->new("$outbase.fas"); 306# my $pirRealigned = $fastaRealigned->to_pir(); 307# $pirRealigned->write_to_file("$outbase.pir"); 308# } 309 310############################# 311## Modeller initialization 312print "Modeller initialization\n"; 313&ModellerRSR( 314 queryName => $queryName, 315 workingDir => $workingDir, 316 outbase => $outbase, 317 config => $config 318 ); 319 320############################# 321## replace distance restraints 322## and call Modeller 323if ($config->get_replaceDistanceRestraints()) { 324 my $normalizer = &ChangeDistanceRestraints($outbase, $workingDir, $tList, $config); 325 &ModellerNewDistance( 326 rsrFile => "$outbase.new.rsr", 327 queryName => $queryName, 328 workingDir => $workingDir, 329 outbase => $outbase, 330 config => $config, 331 normalizer => $normalizer); 332} else { 333 &ModellerRaw( 334 rsrFile => "$outbase.rsr", 335 queryName => $queryName, 336 workingDir => $workingDir, 337 outbase => $outbase, 338 config => $config 339 ); 340} 341 342&WriteCASPpdbFile("$outbase.pdb", "$outbase.pdb", $queryName, $server, $tList); 343 344if ($config->get_assessModel()) { 345 $tList->set_queryLength($queryLength); 346 my $assessor = QualityAssessModel->new(tList=>$tList, outbase=>$outbase, pdbFile=>"$outbase.pdb"); 347 $assessor->run(); 348} 349 350############################# 351## clean up 352my @tabs = <$outbase*.tab>; 353for (my $i=0; $i<@tabs; $i++) { 354 system("rm $tabs[$i]"); 355} 356 357############################# 358## copy files of interest back to outdir , e.g. to the tempdir 359&System("cp $outbase.pdb $outFile"); 360 361#&System("rm -r $workingDir"); 362 363## create a directory containing intermediate files 364#my $resultDir = $dirbasename . "Results"; 365#&System("mkdir -p $resultDir"); 366#&System("chmod 777 $resultDir"); 367#&System("cp $outbase.* $resultDir"); 368 369print "END HHpred for $queryName\n"; 370print "=========================\n"; 371