1#!/usr/local/bin/perl -w
2use strict;
3use warnings;
4
5## general comments:
6## this script is indirectly called by run_casp.pl, see comments therein
7## it calls several functions and mainly functions as a "gluing" script
8## its behavious is mainly controled by the config file read at the beginning
9## in general it does the following:
10## 1) build MSA from sequence via hhblits
11## 2) build hhr-template-list via hhsearch
12## 3) (pre-)select several templates via template selection strategy
13## 4) filter preselected templates and query
14## 5) rank templates with single template NN
15## 6) select final templates
16## 7) generate final (artifical) hhr file with selected templates
17## 8) optionally replace distance restraints
18## 9) call MODELLER and build final model
19## this final model in saved at outdir and will then be sent to
20## the CASP organizers via the run_casp.pl script
21
22
23## more details:
24## all temporary files are written to workingDir
25## temporary files are eg. *.a3m, *.hhm, *.tab, *.hhr
26## amd are named workingDir/queryName.*
27## final results must be copied back to "dirbasename", e.g. outbase.pdb to dirbasename.pdb
28##
29## if filtering is on the filtered files are put in another temporary directory,
30## see filterAlignments.pm
31## the final pdb-file must be in outbase.pdb so that run_casp.pl can find this file and
32## attach it to the reply-e-mail
33##
34
35use File::Basename;
36use File::Spec;
37
38
39BEGIN {
40    my $dirname = dirname(File::Spec->rel2abs(__FILE__));
41    unshift @INC, $dirname . "/lib";
42}
43
44use File::Temp;
45
46use config;
47use utilities;
48use Template;
49use TemplateList;
50use TemplateStruct;
51use TemplateListStruct;
52use selectTemplatesHeuristic;
53use filterAlignments;
54use predTMscore;
55use checkTemplates;
56use modeller;
57use QualityAssessModel;
58
59## called via run_casp.pl with these parameters
60my $server      = "hhpred";
61my $outFile     = "";
62my $configFile  = undef;
63my $queryEnding = "";
64my $queryName   = "";
65my $queryFile   = "";
66
67#############################
68## parse command line options
69
70my $options     = "";
71
72for (my $i=0; $i<@ARGV; $i++) {
73    $options .= " $ARGV[$i] ";
74}
75
76if ($options =~ s/-i\s+(\S+)/ /) {
77    $queryFile = $1;
78    my $fpath;
79    ($queryName, $fpath, $queryEnding) = fileparse("$queryFile", qr/\.[^.]*/);
80}
81if ($options =~ s/-o\s+(\S+)/ /) { $outFile = $1; }
82if ($options =~ s/-config\s+(\S+)/ /) { $configFile = $1; }
83
84##############################
85
86print "\n";
87print "==========================================================\n";
88print "|                HHPRED structure predictor              |\n";
89print "==========================================================\n";
90print "\n";
91
92## set default values
93
94#my $queryFile = "$dirbasename" . "$queryEnding";
95
96my $workingDir = "/tmp/$$";
97
98## set configuration
99my $config = HHpredConfig->instance($configFile);
100
101my $preselectedTemplatesFile = "";
102my $allRankedTemplates = TemplateList->new();
103
104if ($queryFile eq "" or $outFile eq "" or $queryName eq "" or ! -e $queryFile) {
105    print "usage: $0 -i <infile> -o <outfile> [-c <configfile>]!\n";
106    print "   <infile>  .a3m or .seq file\n";
107    print "   <outfile> resulting pdb file\n\n";
108    exit 1;
109}
110
111## create working directory
112if (! -e "$workingDir") {
113    &System("mkdir -p $workingDir");
114} else {
115    my $dir = File::Temp->newdir("XXXXX", DIR => "$workingDir", CLEANUP => 0);
116    $workingDir = $dir->dirname;
117    &System("chmod 777 $workingDir");
118}
119
120if (! -e "$workingDir/$queryName") {
121    &System("mkdir -p $workingDir/$queryName");
122    $workingDir = "$workingDir/$queryName";
123}
124
125#############################
126$config->print();
127print "\n";
128
129#############################
130## set up starting files
131## all files are written to workingDir
132$queryName .= &getRandomString(7); # add random stuff so that queryName != template (needed for MODELLER)
133my $outbase = "$workingDir/$queryName";
134
135## either a3m or seq file
136if ($queryEnding ne ".a3m") {
137    &System("cp $queryFile $outbase.seq");
138} else {
139   &System("cp $queryFile $outbase.a3m");
140}
141
142
143#############################
144## build query alignment if necessary
145if (! -e "$outbase.a3m") {
146    my $command = "HHLIB=" . $config->get_hhlib()
147                . " "
148                . $config->get_hhblits()
149                . " -i $outbase.seq"
150                . " -oalis $outbase"
151                . " -ohhm $outbase.hhm"
152                . " -n " . $config->get_hhblits_rounds()
153                . " -mact " . $config->get_hhblits_mact()
154                . " -oa3m $outbase.a3m"
155                . " -d " . $config->get_uniprot20()
156                . " -cpu " . $config->get_cpus();
157    &System($command);
158    sleep(1);
159
160    &System($config->get_addss() . " $outbase.a3m");
161    sleep(1);
162}
163
164&System($config->get_hhmake() . " -i $outbase.a3m -o $outbase.hhm");
165sleep(1);
166
167##############################
168## search against database via hhsearch
169my $pdbdir = $config->get_pdbdir();
170
171$pdbdir =~ s/
172    \/$ # trim trailing slash
173    //x;
174my $pdbdb = "$pdbdir/db/pdb.hhm";
175
176&System("HHLIB=" . $config->get_hhlib()
177        . " "
178        . $config->get_hhsearch()
179        . " -i $outbase.hhm"
180        . " -d $pdbdb"
181        . " -mact " . $config->get_hhsearch_mact()
182        . " -cpu " . $config->get_cpus()
183        . " -atab $outbase.start.tab");
184
185while (!(-e "$outbase.hhr")) {
186    sleep(1)
187}
188
189&System("cp $outbase.hhr $outbase.start.hhr");
190
191##############################
192## start handling of templates
193my $tList = TemplateListStruct->new();
194$tList->hhr_to_TemplateList("$outbase.hhr");
195my $queryLength = $tList->get_queryLength();
196$tList->print();
197
198#############################
199## preselect template (best according to similarity, sumprobs, probability
200## and fill up rest with heuristic
201if ($config->get_preselectTemplates()) {
202    $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, 100, 1, $tList, $config);
203
204    print "preselected templates:\n";
205    $tList->print();
206    print "====================================\n\n";
207}
208
209#############################
210## filtering
211if ($config->get_doFiltering()) {
212    $tList =  &Filtering($queryName, $outbase, $tList, $server, $config);
213
214    print "Filtered templates:\n";
215    $tList->print();
216    print "====================================\n\n";
217}
218
219#############################
220## rank templates with NN
221if ($config->get_rankTemplates()) {
222    my $rankingNN = TMscoreNN->new();
223    $rankingNN->rank_templates($tList, $queryLength, $config);
224
225    $allRankedTemplates = $tList;
226
227    print "TM score ranked templates\n";
228    $tList->print();
229    print "====================================\n\n";
230}
231
232#############################
233## final template selection
234if ($config->get_multiTemplate()) {
235    my $maxNumTemplates = $config->get_maxNumOfTemplates();
236    $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, $maxNumTemplates, 2, $tList, $config);
237} else {
238    $tList = &SingleTemplateSelection($tList);
239}
240
241## take same template(s) as in file (instead of the ones selected before);
242## keep previous template(s) if preselected ones are not in template list
243if ($preselectedTemplatesFile ne "") {
244    my $preselectedTemplates = TemplateList->new();
245    $preselectedTemplates->read_from_file($preselectedTemplatesFile);
246
247    my $newTemplates = TemplateList->new();
248    for (my $i=0; $i<$preselectedTemplates->size(); $i++) {
249        for (my $j=0; $j<$allRankedTemplates->size(); $j++) {
250            my $preTemplate = $preselectedTemplates->get($i);
251            my $rankTemplate = $allRankedTemplates->get($j);
252
253            if ($preTemplate->get_Hit() eq $rankTemplate->get_Hit()) {
254                ## check overlap
255                my $overlap = &min($preTemplate->get_Qend(), $rankTemplate->get_Qend()) - &max($preTemplate->get_Qstart(), $rankTemplate->get_Qstart());
256                my $preLen = $preTemplate->get_Qend() - $preTemplate->get_Qstart();
257                if ($overlap / $preLen > 0.5) {
258                    $newTemplates->check_and_add($rankTemplate);
259                    last;
260                }
261            }
262        }
263    }
264
265    ## templates found
266    if ($newTemplates->size() > 0) {
267        print "preselected templates could be found\n";
268        $tList = $newTemplates;
269    } else {
270        ## keep old templates
271        print "preselected templates could NOT be found - keep old templates\n";
272    }
273}
274
275
276print "final templates:\n";
277$tList->print();
278print "====================================\n\n";
279
280$tList->write_to_file("$outbase.templates");
281
282#############################
283## create "artificial" hhr file for input, needed by hhmakemodel to build pir alignment
284$tList->templateList_to_hhr($outbase);
285
286#############################
287## generate pir alignment
288## the hhr file is generated/overwritten by the function call before
289## the a3m is generated above using hhblast
290## CheckTemplates possibly creates "corrected" pdb files and saves them
291## in the "working" directory; then calls hhmakemodel to build outbase.pir
292print "checking templates\n";
293&CheckTemplates($outbase, $workingDir, $config);
294print "\n====================================\n\n";
295
296# if ($config->get_realignProbcons() && $tList->size() > 1) {
297#     my $pirFile = PirFile->new("$outbase.pir");
298#     my $fastaFile = $pirFile->to_fasta();
299#     $fastaFile->write_to_file("$outbase.fas");
300#     my $probcons = $config->get_probcons();
301
302#     &System("$probcons $outbase.fas > $outbase.fas");
303#     sleep(1);
304
305#     my $fastaRealigned = FastaFile->new("$outbase.fas");
306#     my $pirRealigned = $fastaRealigned->to_pir();
307#     $pirRealigned->write_to_file("$outbase.pir");
308# }
309
310#############################
311## Modeller initialization
312print "Modeller initialization\n";
313&ModellerRSR(
314    queryName => $queryName,
315    workingDir => $workingDir,
316    outbase => $outbase,
317    config => $config
318    );
319
320#############################
321## replace distance restraints
322## and call Modeller
323if ($config->get_replaceDistanceRestraints()) {
324    my $normalizer = &ChangeDistanceRestraints($outbase, $workingDir, $tList, $config);
325    &ModellerNewDistance(
326     rsrFile => "$outbase.new.rsr",
327     queryName => $queryName,
328     workingDir => $workingDir,
329     outbase => $outbase,
330     config => $config,
331     normalizer => $normalizer);
332} else {
333     &ModellerRaw(
334      rsrFile => "$outbase.rsr",
335      queryName => $queryName,
336      workingDir => $workingDir,
337      outbase => $outbase,
338      config => $config
339     );
340}
341
342&WriteCASPpdbFile("$outbase.pdb", "$outbase.pdb", $queryName, $server, $tList);
343
344if ($config->get_assessModel()) {
345    $tList->set_queryLength($queryLength);
346    my $assessor = QualityAssessModel->new(tList=>$tList, outbase=>$outbase, pdbFile=>"$outbase.pdb");
347    $assessor->run();
348}
349
350#############################
351## clean up
352my @tabs = <$outbase*.tab>;
353for (my $i=0; $i<@tabs; $i++) {
354    system("rm $tabs[$i]");
355}
356
357#############################
358## copy files of interest back to outdir , e.g. to the tempdir
359&System("cp $outbase.pdb $outFile");
360
361#&System("rm -r $workingDir");
362
363## create a directory containing intermediate files
364#my $resultDir = $dirbasename . "Results";
365#&System("mkdir -p $resultDir");
366#&System("chmod 777 $resultDir");
367#&System("cp $outbase.* $resultDir");
368
369print "END HHpred for $queryName\n";
370print "=========================\n";
371