1#!/user/bin/perl -w
2use strict;
3
4package TemplateListStruct;
5
6use config;
7use utilities;
8use TemplateStruct;
9
10my $config = HHpredConfig->instance();
11
12sub new {
13    my ($caller, $filename) = @_;
14    my $caller_is_obj = ref($caller);
15    my $class = $caller_is_obj || $caller;
16    no strict "refs";
17    my $self = bless {}, $class;
18
19    $self->{templates} = [];
20    $self->{queryLength} = -1;
21    $self->{query} = "";
22    $self->{neff} = -1;
23
24    if ($caller_is_obj) {
25	my $size = $caller->size();
26	for (my $i=0; $i<$size; $i++) {
27	    $self->{templates}->[$i] = $caller->{templates}->[$i];
28	}
29	$self->{queryLength} = $caller->{queryLength};
30	$self->{query} = $caller->{caller};
31	$self->{neff} = $caller->{neff};
32    }
33
34    if (defined($filename)) {
35	$self->hhr_to_TemplateList($filename);
36    }
37    return $self;
38}
39
40
41sub add_template {
42    my ($self, $template) = @_;
43    my $curSize = $self->size();
44    $self->{templates}->[$curSize] = $template;
45}
46
47
48## before adding template, check whether it is already in list
49sub check_and_add {
50    my ($self, $template) = @_;
51
52    for (my $i=0; $i<$self->size(); $i++) {
53	if ($self->{templates}->[$i]->equals($template)) {
54	    return;
55	}
56    }
57    $self->add_template($template);
58}
59
60
61sub clear {
62    my $self = shift;
63    %{$self} = ();
64    $self->{templates} = [];
65    $self->{query} = "";
66    $self->{queryLength} = -1;
67    $self->{neff} = -1;
68}
69
70
71## delete hit with No "No"
72sub delete_No {
73    my $self = shift;
74    my $No = shift;
75
76    ## get idx for hit with No "No"
77    my $deleteIdx = -1;
78    for (my $i=0; $i<$self->size(); $i++) {
79	if ($self->{templates}->[$i]->get_No() == $No) {
80	    $deleteIdx = $i;
81	    last;
82	}
83    }
84    print "deleting No=$No, idx=$deleteIdx\n";
85    if ($deleteIdx != -1) {
86	splice(@{$self->{templates}}, $deleteIdx, 1);
87    }
88}
89
90
91sub size {
92    my $self = shift;
93    return scalar(@{$self->{templates}});
94}
95
96
97sub get {
98    my ($self, $i) = @_;
99    $self->{templates}->[$i];
100}
101
102
103sub get_last {
104    my $self = shift;
105    $self->{templates}->[$self->size()-1];
106}
107
108
109
110sub to_string {
111    my $self = shift;
112    my $spacer = shift;
113    my $out = "";
114    for (my $i=0; $i<$self->size(); $i++) {
115	$out .= $self->{templates}->[$i]->to_string($spacer) . "\n";
116    }
117    return $out;
118}
119
120
121sub print {
122    my $self = shift;
123    my $out = $self->to_string();
124    print $out;
125}
126
127
128sub to_TemplateList_helper {
129    my $self = shift;
130    my $hhrFile = shift;
131    my @lines = @_;
132
133    my $matchC;
134    my $No;
135    my $filtnr = "start";  ## filter step (start means no filtering)
136    my $spaceLen = 12;
137
138    if ($hhrFile =~ /\.(\d+)\.hhr/) {
139	$filtnr = $1;
140    }
141
142    for (my $i=0; $i<@lines; $i++) {
143	my $line = $lines[$i];
144
145	if ($line =~ /^Match_columns\s*(\S+)/) {
146	    $matchC = $1;
147	    $self->_set_queryLength($matchC);
148	}
149	if ($line =~ /^Query\s+(\S+)/) {
150	    my $query = $1;
151	    $self->_set_query($query);
152	}
153	if ($line =~ /^Neff\s+(\S+)/) {
154	    my $neff = $1;
155	    $self->_set_neff($neff);
156	}
157	## No     Hit       Prob E-val  P-val  Score    SS      Cols  Query(start end) Template(start end) HMM
158	elsif ($line=~/^\s*(\d+)\s+(\S+).+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)-(\d+)\s+(\d+)-(\d+)\s*\((\S+)\)$/) {
159	    my $No       = $1;
160	    my $Hit      = $2;
161	    my $Prob     = $3;
162	    my $Eval     = $4;
163	    my $Pval     = $5;
164	    my $Score    = $6;
165	    my $SS       = $7;
166	    my $Cols     = $8;
167	    my $Qstart   = $9;
168	    my $Qend     = $10;
169	    my $Tstart   = $11;
170	    my $Tend     = $12;
171	    my $HMM      = $13;
172
173	    my $SSL      = $SS/$matchC;
174	    $SSL = sprintf("%.4f", $SSL);
175
176	    my $template = TemplateStruct->new(Filt => $filtnr,
177					 No => $No,
178					 Hit => $Hit,
179					 Prob => $Prob,
180					 Eval => $Eval,
181					 Pval => $Pval,
182					 Score => $Score,
183					 SS => $SS,
184					 Cols => $Cols,
185					 Qstart => $Qstart,
186					 Qend => $Qend,
187					 Tstart => $Tstart,
188					 Tend => $Tend,
189					 HMM => $HMM);
190	    $self->add_template($template);
191	}
192	elsif($line =~ /^No\s+(\d+)/) {
193	    $No = $1;
194	    $line = $lines[++$i];
195
196	    if ($line !~ /^>(\S+)\s/) {
197		die("Error:: wrong format in \"$line\"\n");
198	    }
199
200	    my $hit = $1;
201	    $line = $lines[++$i];
202
203	    if ($line =~ /COMPACTNESS=(\S+)/i) {
204		$self->get($No-1)->set_Compactness($1);
205	    } else {
206		#print "missing compactness in hhr-file!\n";
207	    }
208
209	    if ($line =~ /CSS=(\S+)/i) {
210		$self->get($No-1)->set_Css($1);
211	    } else {
212		#print "missing css in hhr-file!\n";
213	    }
214
215	    if ($line =~ /CONTACT=(\S+)/i) {
216		$self->get($No-1)->set_Contact($1);
217	    } else {
218		#print "missing contact in hhr-file!\n";
219	    }
220
221	    if ($line =~ /CONTACT_REALIGN=(\S+)/i) {
222		$self->get($No-1)->set_ContactRealign($1);
223	    } else {
224		#print "missing contact realign in hhr-file!\n";
225	    }
226
227	    if ($line !~ /Similarity=(\S+)\s+Sum_probs=(\S+)\s*/) {
228		die("Error: wrong format in \"$line\"\n");
229	    }
230
231	    my $Similarity = $1;
232	    my $SumProbL = $2/$matchC;
233	    $SumProbL = sprintf("%.4f" , $SumProbL);
234
235	    if ($line =~ /Identities=(\S+)%\s/) {
236		$self->get($No-1)->set_Ident($1);
237	    }
238
239	    $self->get($No-1)->set_Sim($Similarity);
240	    $self->get($No-1)->set_SumProbL($SumProbL);
241	}
242	elsif ($line =~ /^T\s+ss_dssp(\s+)(\S+)/) {
243	    $spaceLen = length($1)-1;
244	    my $ss_dssp = $self->get($No-1)->get_ss_dssp();
245	    $self->get($No-1)->set_ss_dssp("$ss_dssp" . $2);
246	}
247	## Confidence line may contain spaces => read number of spaces from ss_dssp line
248	elsif ($line =~ /^Confidence\s{$spaceLen}(.*)\n/) {
249	    my $conf = $self->get($No-1)->get_conf();
250	    $self->get($No-1)->set_conf("$conf" . $1);
251	}
252    }
253}
254
255
256sub str_to_TemplateList {
257    my $self = shift;
258    my $str = shift;
259
260    my @lines;
261    @lines = split(/\n/, $str);
262
263    $self->to_TemplateList_helper("dummy", @lines);
264}
265
266
267sub hhr_to_TemplateList {
268    my ($self, $hhrFile) = @_;
269
270    my @lines;
271    open(HHR,"< $hhrFile") or die("Cant open $hhrFile: $!\n");
272    @lines = <HHR>;
273    close(HHR);
274
275    $self->to_TemplateList_helper($hhrFile, @lines);
276}
277
278
279sub write_to_file {
280    my ($self, $outfile) = @_;
281
282    open (OH, "> $outfile") or die("Cant write to $outfile: $!\n");
283    my $out = $self->to_string('===');
284    print(OH $out);
285    close(OH);
286}
287
288
289sub read_from_file {
290    my ($self, $infile) = @_;
291    my $append = 0;
292    ## append template(s) to already existing ones
293    $append = 1 if (scalar(@_) > 2 && $_[2] == 1);
294
295    $self->clear() if (! $append);
296    open(IH, "< $infile") or die("Cant open $infile: $!\n");
297    while(<IH>) {
298	chomp;
299	if (/(\S+===)+/) {
300	    my @entry = split(/===/, $_);
301	    my $template = TemplateStruct->new(Filt => $entry[0],
302					 No => $entry[1],
303					 Hit => $entry[2],
304					 Prob => $entry[3],
305					 Eval => $entry[4],
306					 Pval => $entry[5],
307					 Score => $entry[6],
308					 SS => $entry[7],
309					 Cols => $entry[8],
310					 Qstart => $entry[9],
311					 Qend => $entry[10],
312					 Tstart => $entry[11],
313					 Tend => $entry[12],
314					 HMM => $entry[13],
315					 Ident => $entry[14],
316					 Sim => $entry[15],
317					 SumProbL => $entry[16],
318					 predTM => $entry[17],
319					 Compactness => $entry[18],
320					 Css => $entry[19],
321					 Contact => $entry[20],
322					 ContactRealign => $entry[21]);
323
324	    $self->add_template($template);
325	}
326    }
327    close(IH);
328}
329
330
331sub set_queryLength {
332    my ($self, $len) = @_;
333    $self->{queryLength} = $len;
334}
335
336sub get_queryLength {
337    my $self = shift;
338    $self->{queryLength};
339}
340
341sub set_query {
342    my ($self, $query) = @_;
343    $self->{query} = $query;
344}
345
346sub get_query {
347    my $self = shift;
348    $self->{query};
349}
350
351sub get_neff {
352    my $self = shift;
353    $self->{neff};
354}
355
356sub set_neff {
357    my ($self, $neff) = @_;
358    $self->{neff} = $neff;
359}
360
361## for backward compatibility ##
362sub _set_queryLength {
363    my ($self, $len) = @_;
364    $self->{queryLength} = $len;
365}
366
367sub _get_queryLength {
368    my $self = shift;
369    $self->{queryLength};
370}
371
372sub _set_query {
373    my ($self, $query) = @_;
374    $self->{query} = $query;
375}
376
377sub _get_query {
378    my $self = shift;
379    $self->{query};
380}
381
382sub _get_neff {
383    my $self = shift;
384    $self->{neff};
385}
386
387sub _set_neff {
388    my ($self, $neff) = @_;
389    $self->{neff} = $neff;
390}
391######
392
393
394
395
396sub sort_by_sim {
397    my $self = shift;
398    @{$self->{templates}} = sort {$b->get_Sim() <=> $a->get_Sim()} @{$self->{templates}};
399}
400
401
402sub sort_by_prob {
403    my $self = shift;
404    @{$self->{templates}} = sort {$b->get_Prob() <=> $a->get_Prob()} @{$self->{templates}};
405}
406
407
408sub sort_by_sumProbL {
409    my $self = shift;
410    @{$self->{templates}} = sort {$b->get_SumProbL() <=> $a->get_SumProbL()} @{$self->{templates}};
411}
412
413
414sub sort_by_predTM {
415    my $self = shift;
416    @{$self->{templates}} = sort {$b->get_predTM() <=> $a->get_predTM()} @{$self->{templates}};
417}
418
419
420sub templateList_to_hhr {
421    my $self = shift;
422    my $outbase = shift;
423
424    my @hhrContent = ();
425    my $hh = $config->get_hh();
426
427    open(HHR, "> $outbase.hhr") or die ("Error in templateList_to_hhr: Cant write $outbase.hhr: $!\n");
428
429    for (my $i=0; $i<$self->size(); $i++) {
430	my $template = $self->get($i);
431
432	## open apropriate hhr file (wrt filter step)
433	my $infile = "$outbase." . $template->get_Filt() . ".hhr";
434	open (IN, "< $infile") or die ("Error: cannot open $infile!\n");
435
436	my $checkedHeader = 0;
437	my $begin;
438	my $e = 0;
439	my $end;
440	my $line;
441	my $hitnr = $i+1;
442
443	while ($line = <IN>) {
444	    ## copy first header lines:
445	    if (($checkedHeader==0) && ($i==0) && ($line !~ /^\s*\d+\s+\S+.+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$/)) {
446		if ($line=~ /^Command/) {
447		    $line=~ s/(^Command\s*)(.*)$/$1$hh\/hhsearch artificial hhr file/;
448		}
449
450		## replace P-value against TMscore
451		if ($line=~ /\s+No\s+Hit\s+Prob\s+E-value\s+P-value\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s*/) {
452		    $line =~ s/(\s*No\s+Hit\s+Prob\s+E-value\s+)(P-value)(\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s+)/$1TMScore$3/;
453		}
454		print (HHR "$line");
455	    }
456	    else {
457		$checkedHeader = 1;
458	    }
459
460	    ## get hit Info:
461	    my $No = $template->get_No();
462	    if ($line =~ /^\s*$No(\s+\S+.+\s+\S+\s+\S+)\s+\S+(\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$)/)	{
463		## replace P-value by TMScore in hit info
464		$line = sprintf("%3s$1  %1.4f$2\n", $hitnr, $template->get_predTM());
465		print (HHR "$line");
466		last;
467	    }
468	}
469
470	## skip all lines up to alignment block
471	## Find beginning of alignment and replace hit index by new one
472	while ($line = <IN>){
473	    my $No = $template->get_No();
474	    if ($line =~ /^No\s+$No/) {
475		last;
476	    }
477	}
478
479	$line =~ s/^No\s+\d+/No $hitnr/;
480	push(@hhrContent, $line);
481
482	## Push alignment block onto array
483	while ($line = <IN>) {
484	    if(($line =~ /^No\s/)) {
485		last;
486	    }
487	    if ($line =~ /Done!/) {}
488	    else {
489		push(@hhrContent, $line);
490	    }
491	}
492	close (IN);
493
494	## create associated tab file
495	&BuildSingleTabFile("$outbase." . $template->get_Filt() . ".tab", $template->get_No(), $outbase);
496    }
497    print(HHR "\n");
498    print(HHR @hhrContent);
499    print(HHR "Done!\n");
500    close (HHR);
501}
502
503
504## starting from current hhr file, extract some features and save them into resultfile
505## this is needed for benchmark set compilation
506sub createBenchmarkInfoFile {
507    my ($self, $resultFile, $pdbdir) = @_;
508
509    my $TMalign = $config->get_TMalign();
510
511    my $query = $self->_get_query();
512    my $queryPDB = "$pdbdir/$query.pdb";
513
514    my $res = "";
515    $res .= "queryName"."\t"."TMID"."\t"."coverage"."\t"."queryLen"."\t"."templateName"."\t"."TMscore\n";
516
517    ## extract information from max first 50 templates
518    for (my $i=0; $i<50 && $i<$self->size(); $i++) {
519	my $template = $self->get($i);
520
521	my $TMscore = 0;
522	my $TMid = 0;
523
524	my $templatePDB = "$pdbdir/" . $template->get_Hit() . ".pdb";
525	my $tmalignResult = `$TMalign $templatePDB $queryPDB`;
526	if ($tmalignResult =~ /TM-score\s*=\s*(\S+),\s+ID\s*=\s*(\S+)/) {
527	    $TMscore = $1;
528	    $TMid= int(($2*100)+0.5);
529	}
530
531	my $queryLen = $self->_get_queryLength();
532	my $coverage = int(($template->get_Cols()*100/$queryLen)+0.5);
533	my $templateName = $template->get_Hit();
534
535	$res .= "$query\t$TMid\t$coverage\t$queryLen\t$templateName\t$TMscore\n";
536    }
537
538    open(OH, "> $resultFile") or die "Cant write $resultFile: $!\n";
539    print (OH $res);
540    close(OH);
541}
542
5431;
544