1#!/user/bin/perl -w
2use strict;
3
4package TemplateList;
5
6use config;
7use utilities;
8use Template;
9
10my $config = HHpredConfig->instance();
11
12sub new {
13    my ($caller, %arg) = @_;
14    my $caller_is_obj = ref($caller);
15    my $class = $caller_is_obj || $caller;
16    no strict "refs";
17    my $self = bless {}, $class;
18
19    $self->{templates} = [];
20    $self->{queryLength} = -1;
21    $self->{query} = "";
22    $self->{neff} = -1;
23
24    if ($caller_is_obj) {
25	my $size = $caller->size();
26	for (my $i=0; $i<$size; $i++) {
27	    $self->{templates}->[$i] = $caller->{templates}->[$i];
28	}
29	$self->{queryLength} = $caller->{queryLength};
30	$self->{query} = $caller->{caller};
31	$self->{neff} = $caller->{neff};
32    }
33    return $self;
34}
35
36
37sub add_template {
38    my ($self, $template) = @_;
39    my $curSize = $self->size();
40    $self->{templates}->[$curSize] = $template;
41}
42
43
44## before adding template, check whether it is already in list
45sub check_and_add {
46    my ($self, $template) = @_;
47
48    for (my $i=0; $i<$self->size(); $i++) {
49	if ($self->{templates}->[$i]->equals($template)) {
50	    return;
51	}
52    }
53    $self->add_template($template);
54}
55
56
57sub clear {
58    my $self = shift;
59    %{$self} = ();
60    $self->{templates} = [];
61    $self->{query} = "";
62    $self->{queryLength} = -1;
63    $self->{neff} = -1;
64}
65
66
67## delete hit with No "No"
68sub delete_No {
69    my $self = shift;
70    my $No = shift;
71
72    ## get idx for hit with No "No"
73    my $deleteIdx = -1;
74    for (my $i=0; $i<$self->size(); $i++) {
75	if ($self->{templates}->[$i]->get_No() == $No) {
76	    $deleteIdx = $i;
77	    last;
78	}
79    }
80    print "deleting No=$No, idx=$deleteIdx\n";
81    if ($deleteIdx != -1) {
82	splice(@{$self->{templates}}, $deleteIdx, 1);
83    }
84}
85
86
87sub size {
88    my $self = shift;
89    return scalar(@{$self->{templates}});
90}
91
92
93sub get {
94    my ($self, $i) = @_;
95    $self->{templates}->[$i];
96}
97
98
99sub get_last {
100    my $self = shift;
101    $self->{templates}->[$self->size()-1];
102}
103
104
105
106sub to_string {
107    my $self = shift;
108    my $spacer = shift;
109    my $out = "";
110    for (my $i=0; $i<$self->size(); $i++) {
111	$out .= $self->{templates}->[$i]->to_string($spacer) . "\n";
112    }
113    return $out;
114}
115
116
117sub print {
118    my $self = shift;
119    my $out = $self->to_string();
120    print $out;
121}
122
123
124sub to_TemplateList_helper {
125    my $self = shift;
126    my $hhrFile = shift;
127    my @lines = @_;
128
129    my $matchC;
130    my $No;
131    my $filtnr = "start";  ## filter step (start means no filtering)
132    my $spaceLen = 12;
133
134    if ($hhrFile =~ /\.(\d+)\.hhr/) {
135	$filtnr = $1;
136    }
137
138    for (my $i=0; $i<@lines; $i++) {
139	my $line = $lines[$i];
140
141	if ($line =~ /^Match_columns\s*(\S+)/) {
142	    $matchC = $1;
143	    $self->_set_queryLength($matchC);
144	}
145	if ($line =~ /^Query\s+(\S+)/) {
146	    my $query = $1;
147	    $self->_set_query($query);
148	}
149	if ($line =~ /^Neff\s+(\S+)/) {
150	    my $neff = $1;
151	    $self->_set_neff($neff);
152	}
153	## No     Hit       Prob E-val  P-val  Score    SS      Cols  Query(start end) Template(start end) HMM
154	elsif ($line=~/^\s*(\d+)\s+(\S+).+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)-(\d+)\s+(\d+)-(\d+)\s*\((\S+)\)$/) {
155	    my $No       = $1;
156	    my $Hit      = $2;
157	    my $Prob     = $3;
158	    my $Eval     = $4;
159	    my $Pval     = $5;
160	    my $Score    = $6;
161	    my $SS       = $7;
162	    my $Cols     = $8;
163	    my $Qstart   = $9;
164	    my $Qend     = $10;
165	    my $Tstart   = $11;
166	    my $Tend     = $12;
167	    my $HMM      = $13;
168
169	    my $SSL      = $SS/$matchC;
170	    $SSL = sprintf("%.4f", $SSL);
171
172	    my $template = Template->new(Filt => $filtnr,
173					 No => $No,
174					 Hit => $Hit,
175					 Prob => $Prob,
176					 Eval => $Eval,
177					 Pval => $Pval,
178					 Score => $Score,
179					 SS => $SS,
180					 Cols => $Cols,
181					 Qstart => $Qstart,
182					 Qend => $Qend,
183					 Tstart => $Tstart,
184					 Tend => $Tend,
185					 HMM => $HMM);
186	    $self->add_template($template);
187	}
188	elsif($line =~ /^No\s+(\d+)/) {
189	    $No = $1;
190	    $line = $lines[++$i];
191
192	    if ($line !~ /^>(\S+)\s/) {
193		die("Error:: wrong format in \"$line\"\n");
194	    }
195
196	    my $hit = $1;
197	    $line = $lines[++$i];
198
199	    if ($line !~ /Similarity=(\S+)\s+Sum_probs=(\S+)\s*/) {
200		die("Error: wrong format in \"$line\"\n");
201	    }
202
203	    my $Similarity = $1;
204	    my $SumProbL = $2/$matchC;
205	    $SumProbL = sprintf("%.4f" , $SumProbL);
206
207	    if ($line =~ /Identities=(\S+)%\s/) {
208		$self->get($No-1)->set_Ident($1);
209	    }
210
211	    $self->get($No-1)->set_Sim($Similarity);
212	    $self->get($No-1)->set_SumProbL($SumProbL);
213	}
214	elsif ($line =~ /^T\s+ss_dssp(\s+)(\S+)/) {
215	    $spaceLen = length($1)-1;
216	    my $ss_dssp = $self->get($No-1)->get_ss_dssp();
217	    $self->get($No-1)->set_ss_dssp("$ss_dssp" . $2);
218	}
219	## Confidence line may contain spaces => read number of spaces from ss_dssp line
220	elsif ($line =~ /^Confidence\s{$spaceLen}(.*)\n/) {
221	    my $conf = $self->get($No-1)->get_conf();
222	    $self->get($No-1)->set_conf("$conf" . $1);
223	}
224    }
225}
226
227
228sub str_to_TemplateList {
229    my $self = shift;
230    my $str = shift;
231
232    my @lines;
233    @lines = split(/\n/, $str);
234
235    $self->to_TemplateList_helper("dummy", @lines);
236}
237
238
239sub hhr_to_TemplateList {
240    my ($self, $hhrFile) = @_;
241
242    my @lines;
243    open(HHR,"< $hhrFile") or die("Cant open $hhrFile: $!\n");
244    @lines = <HHR>;
245    close(HHR);
246
247    $self->to_TemplateList_helper($hhrFile, @lines);
248}
249
250
251sub write_to_file {
252    my ($self, $outfile) = @_;
253
254    open (OH, "> $outfile") or die("Cant write to $outfile: $!\n");
255    my $out = $self->to_string('===');
256    print(OH $out);
257    close(OH);
258}
259
260
261sub read_from_file {
262    my ($self, $infile) = @_;
263    my $append = 0;
264    ## append template(s) to already existing ones
265    $append = 1 if (scalar(@_) > 2 && $_[2] == 1);
266
267    $self->clear() if (! $append);
268    open(IH, "< $infile") or die("Cant open $infile: $!\n");
269    while(<IH>) {
270	chomp;
271	if (/(\S+===)+/) {
272	    my @entry = split(/===/, $_);
273	    my $template = Template->new(Filt => $entry[0],
274					 No => $entry[1],
275					 Hit => $entry[2],
276					 Prob => $entry[3],
277					 Eval => $entry[4],
278					 Pval => $entry[5],
279					 Score => $entry[6],
280					 SS => $entry[7],
281					 Cols => $entry[8],
282					 Qstart => $entry[9],
283					 Qend => $entry[10],
284					 Tstart => $entry[11],
285					 Tend => $entry[12],
286					 HMM => $entry[13],
287					 Ident => $entry[14],
288					 Sim => $entry[15],
289					 SumProbL => $entry[16],
290					 predTM => $entry[17]);
291	    $self->add_template($template);
292	}
293    }
294    close(IH);
295}
296
297
298sub set_queryLength {
299    my ($self, $len) = @_;
300    $self->{queryLength} = $len;
301}
302
303sub get_queryLength {
304    my $self = shift;
305    $self->{queryLength};
306}
307
308sub set_query {
309    my ($self, $query) = @_;
310    $self->{query} = $query;
311}
312
313sub get_query {
314    my $self = shift;
315    $self->{query};
316}
317
318sub get_neff {
319    my $self = shift;
320    $self->{neff};
321}
322
323sub set_neff {
324    my ($self, $neff) = @_;
325    $self->{neff} = $neff;
326}
327
328## for backward compatibility ##
329sub _set_queryLength {
330    my ($self, $len) = @_;
331    $self->{queryLength} = $len;
332}
333
334sub _get_queryLength {
335    my $self = shift;
336    $self->{queryLength};
337}
338
339sub _set_query {
340    my ($self, $query) = @_;
341    $self->{query} = $query;
342}
343
344sub _get_query {
345    my $self = shift;
346    $self->{query};
347}
348
349sub _get_neff {
350    my $self = shift;
351    $self->{neff};
352}
353
354sub _set_neff {
355    my ($self, $neff) = @_;
356    $self->{neff} = $neff;
357}
358######
359
360
361
362
363sub sort_by_sim {
364    my $self = shift;
365    @{$self->{templates}} = sort {$b->get_Sim() <=> $a->get_Sim()} @{$self->{templates}};
366}
367
368
369sub sort_by_prob {
370    my $self = shift;
371    @{$self->{templates}} = sort {$b->get_Prob() <=> $a->get_Prob()} @{$self->{templates}};
372}
373
374
375sub sort_by_sumProbL {
376    my $self = shift;
377    @{$self->{templates}} = sort {$b->get_SumProbL() <=> $a->get_SumProbL()} @{$self->{templates}};
378}
379
380
381sub sort_by_predTM {
382    my $self = shift;
383    @{$self->{templates}} = sort {$b->get_predTM() <=> $a->get_predTM()} @{$self->{templates}};
384}
385
386
387sub templateList_to_hhr {
388    my $self = shift;
389    my $outbase = shift;
390
391    my $hhsearch = $config->get_hhsearch();
392
393    my @hhrContent = ();
394
395    open(HHR, "> $outbase.hhr") or die ("Error in templateList_to_hhr: Cant write $outbase.hhr: $!\n");
396
397    for (my $i=0; $i<$self->size(); $i++) {
398	my $template = $self->get($i);
399
400	## open apropriate hhr file (wrt filter step)
401	my $infile = "$outbase." . $template->get_Filt() . ".hhr";
402	open (IN, "< $infile") or die ("Error: cannot open $infile!\n");
403
404	my $checkedHeader = 0;
405	my $begin;
406	my $e = 0;
407	my $end;
408	my $line;
409	my $hitnr = $i+1;
410
411	while ($line = <IN>) {
412	    ## copy first header lines:
413	    if (($checkedHeader==0) && ($i==0) && ($line !~ /^\s*\d+\s+\S+.+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$/)) {
414		if ($line=~ /^Command/) {
415		    $line=~ s/(^Command\s*)(.*)$/$1$hhsearch artificial hhr file/;
416		}
417
418		## replace P-value against TMscore
419		if ($line=~ /\s+No\s+Hit\s+Prob\s+E-value\s+P-value\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s*/) {
420		    $line =~ s/(\s*No\s+Hit\s+Prob\s+E-value\s+)(P-value)(\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s+)/$1TMScore$3/;
421		}
422		print (HHR "$line");
423	    }
424	    else {
425		$checkedHeader = 1;
426	    }
427
428	    ## get hit Info:
429	    my $No = $template->get_No();
430	    if ($line =~ /^\s*$No(\s+\S+.+\s+\S+\s+\S+)\s+\S+(\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$)/)	{
431		## replace P-value by TMScore in hit info
432		$line = sprintf("%3s$1  %1.4f$2\n", $hitnr, $template->get_predTM());
433		print (HHR "$line");
434		last;
435	    }
436	}
437
438	## skip all lines up to alignment block
439	## Find beginning of alignment and replace hit index by new one
440	while ($line = <IN>){
441	    my $No = $template->get_No();
442	    if ($line =~ /^No\s+$No/) {
443		last;
444	    }
445	}
446
447	$line =~ s/^No\s+\d+/No $hitnr/;
448	push(@hhrContent, $line);
449
450	## Push alignment block onto array
451	while ($line = <IN>) {
452	    if(($line =~ /^No\s/)) {
453		last;
454	    }
455	    if ($line =~ /Done!/) {}
456	    else {
457		push(@hhrContent, $line);
458	    }
459	}
460	close (IN);
461
462	## create associated tab file
463	&BuildSingleTabFile("$outbase." . $template->get_Filt() . ".tab", $template->get_No(), $outbase);
464    }
465    print(HHR "\n");
466    print(HHR @hhrContent);
467    print(HHR "Done!\n");
468    close (HHR);
469}
470
471
472## starting from current hhr file, extract some features and save them into resultfile
473## this is needed for benchmark set compilation
474sub createBenchmarkInfoFile {
475    my ($self, $resultFile, $pdbdir) = @_;
476
477    my $TMalign = $config->get_TMalign();
478
479    my $query = $self->_get_query();
480    my $queryPDB = "$pdbdir/$query.pdb";
481
482    my $res = "";
483    $res .= "queryName"."\t"."TMID"."\t"."coverage"."\t"."queryLen"."\t"."templateName"."\t"."TMscore\n";
484
485    ## extract information from max first 50 templates
486    for (my $i=0; $i<50 && $i<$self->size(); $i++) {
487	my $template = $self->get($i);
488
489	my $TMscore = 0;
490	my $TMid = 0;
491
492	my $templatePDB = "$pdbdir/" . $template->get_Hit() . ".pdb";
493	my $tmalignResult = `$TMalign $templatePDB $queryPDB`;
494	if ($tmalignResult =~ /TM-score\s*=\s*(\S+),\s+ID\s*=\s*(\S+)/) {
495	    $TMscore = $1;
496	    $TMid= int(($2*100)+0.5);
497	}
498
499	my $queryLen = $self->_get_queryLength();
500	my $coverage = int(($template->get_Cols()*100/$queryLen)+0.5);
501	my $templateName = $template->get_Hit();
502
503	$res .= "$query\t$TMid\t$coverage\t$queryLen\t$templateName\t$TMscore\n";
504    }
505
506    open(OH, "> $resultFile") or die "Cant write $resultFile: $!\n";
507    print (OH $res);
508    close(OH);
509}
510
5111;
512