1#!/user/bin/perl -w 2use strict; 3 4package TemplateList; 5 6use config; 7use utilities; 8use Template; 9 10my $config = HHpredConfig->instance(); 11 12sub new { 13 my ($caller, %arg) = @_; 14 my $caller_is_obj = ref($caller); 15 my $class = $caller_is_obj || $caller; 16 no strict "refs"; 17 my $self = bless {}, $class; 18 19 $self->{templates} = []; 20 $self->{queryLength} = -1; 21 $self->{query} = ""; 22 $self->{neff} = -1; 23 24 if ($caller_is_obj) { 25 my $size = $caller->size(); 26 for (my $i=0; $i<$size; $i++) { 27 $self->{templates}->[$i] = $caller->{templates}->[$i]; 28 } 29 $self->{queryLength} = $caller->{queryLength}; 30 $self->{query} = $caller->{caller}; 31 $self->{neff} = $caller->{neff}; 32 } 33 return $self; 34} 35 36 37sub add_template { 38 my ($self, $template) = @_; 39 my $curSize = $self->size(); 40 $self->{templates}->[$curSize] = $template; 41} 42 43 44## before adding template, check whether it is already in list 45sub check_and_add { 46 my ($self, $template) = @_; 47 48 for (my $i=0; $i<$self->size(); $i++) { 49 if ($self->{templates}->[$i]->equals($template)) { 50 return; 51 } 52 } 53 $self->add_template($template); 54} 55 56 57sub clear { 58 my $self = shift; 59 %{$self} = (); 60 $self->{templates} = []; 61 $self->{query} = ""; 62 $self->{queryLength} = -1; 63 $self->{neff} = -1; 64} 65 66 67## delete hit with No "No" 68sub delete_No { 69 my $self = shift; 70 my $No = shift; 71 72 ## get idx for hit with No "No" 73 my $deleteIdx = -1; 74 for (my $i=0; $i<$self->size(); $i++) { 75 if ($self->{templates}->[$i]->get_No() == $No) { 76 $deleteIdx = $i; 77 last; 78 } 79 } 80 print "deleting No=$No, idx=$deleteIdx\n"; 81 if ($deleteIdx != -1) { 82 splice(@{$self->{templates}}, $deleteIdx, 1); 83 } 84} 85 86 87sub size { 88 my $self = shift; 89 return scalar(@{$self->{templates}}); 90} 91 92 93sub get { 94 my ($self, $i) = @_; 95 $self->{templates}->[$i]; 96} 97 98 99sub get_last { 100 my $self = shift; 101 $self->{templates}->[$self->size()-1]; 102} 103 104 105 106sub to_string { 107 my $self = shift; 108 my $spacer = shift; 109 my $out = ""; 110 for (my $i=0; $i<$self->size(); $i++) { 111 $out .= $self->{templates}->[$i]->to_string($spacer) . "\n"; 112 } 113 return $out; 114} 115 116 117sub print { 118 my $self = shift; 119 my $out = $self->to_string(); 120 print $out; 121} 122 123 124sub to_TemplateList_helper { 125 my $self = shift; 126 my $hhrFile = shift; 127 my @lines = @_; 128 129 my $matchC; 130 my $No; 131 my $filtnr = "start"; ## filter step (start means no filtering) 132 my $spaceLen = 12; 133 134 if ($hhrFile =~ /\.(\d+)\.hhr/) { 135 $filtnr = $1; 136 } 137 138 for (my $i=0; $i<@lines; $i++) { 139 my $line = $lines[$i]; 140 141 if ($line =~ /^Match_columns\s*(\S+)/) { 142 $matchC = $1; 143 $self->_set_queryLength($matchC); 144 } 145 if ($line =~ /^Query\s+(\S+)/) { 146 my $query = $1; 147 $self->_set_query($query); 148 } 149 if ($line =~ /^Neff\s+(\S+)/) { 150 my $neff = $1; 151 $self->_set_neff($neff); 152 } 153 ## No Hit Prob E-val P-val Score SS Cols Query(start end) Template(start end) HMM 154 elsif ($line=~/^\s*(\d+)\s+(\S+).+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)-(\d+)\s+(\d+)-(\d+)\s*\((\S+)\)$/) { 155 my $No = $1; 156 my $Hit = $2; 157 my $Prob = $3; 158 my $Eval = $4; 159 my $Pval = $5; 160 my $Score = $6; 161 my $SS = $7; 162 my $Cols = $8; 163 my $Qstart = $9; 164 my $Qend = $10; 165 my $Tstart = $11; 166 my $Tend = $12; 167 my $HMM = $13; 168 169 my $SSL = $SS/$matchC; 170 $SSL = sprintf("%.4f", $SSL); 171 172 my $template = Template->new(Filt => $filtnr, 173 No => $No, 174 Hit => $Hit, 175 Prob => $Prob, 176 Eval => $Eval, 177 Pval => $Pval, 178 Score => $Score, 179 SS => $SS, 180 Cols => $Cols, 181 Qstart => $Qstart, 182 Qend => $Qend, 183 Tstart => $Tstart, 184 Tend => $Tend, 185 HMM => $HMM); 186 $self->add_template($template); 187 } 188 elsif($line =~ /^No\s+(\d+)/) { 189 $No = $1; 190 $line = $lines[++$i]; 191 192 if ($line !~ /^>(\S+)\s/) { 193 die("Error:: wrong format in \"$line\"\n"); 194 } 195 196 my $hit = $1; 197 $line = $lines[++$i]; 198 199 if ($line !~ /Similarity=(\S+)\s+Sum_probs=(\S+)\s*/) { 200 die("Error: wrong format in \"$line\"\n"); 201 } 202 203 my $Similarity = $1; 204 my $SumProbL = $2/$matchC; 205 $SumProbL = sprintf("%.4f" , $SumProbL); 206 207 if ($line =~ /Identities=(\S+)%\s/) { 208 $self->get($No-1)->set_Ident($1); 209 } 210 211 $self->get($No-1)->set_Sim($Similarity); 212 $self->get($No-1)->set_SumProbL($SumProbL); 213 } 214 elsif ($line =~ /^T\s+ss_dssp(\s+)(\S+)/) { 215 $spaceLen = length($1)-1; 216 my $ss_dssp = $self->get($No-1)->get_ss_dssp(); 217 $self->get($No-1)->set_ss_dssp("$ss_dssp" . $2); 218 } 219 ## Confidence line may contain spaces => read number of spaces from ss_dssp line 220 elsif ($line =~ /^Confidence\s{$spaceLen}(.*)\n/) { 221 my $conf = $self->get($No-1)->get_conf(); 222 $self->get($No-1)->set_conf("$conf" . $1); 223 } 224 } 225} 226 227 228sub str_to_TemplateList { 229 my $self = shift; 230 my $str = shift; 231 232 my @lines; 233 @lines = split(/\n/, $str); 234 235 $self->to_TemplateList_helper("dummy", @lines); 236} 237 238 239sub hhr_to_TemplateList { 240 my ($self, $hhrFile) = @_; 241 242 my @lines; 243 open(HHR,"< $hhrFile") or die("Cant open $hhrFile: $!\n"); 244 @lines = <HHR>; 245 close(HHR); 246 247 $self->to_TemplateList_helper($hhrFile, @lines); 248} 249 250 251sub write_to_file { 252 my ($self, $outfile) = @_; 253 254 open (OH, "> $outfile") or die("Cant write to $outfile: $!\n"); 255 my $out = $self->to_string('==='); 256 print(OH $out); 257 close(OH); 258} 259 260 261sub read_from_file { 262 my ($self, $infile) = @_; 263 my $append = 0; 264 ## append template(s) to already existing ones 265 $append = 1 if (scalar(@_) > 2 && $_[2] == 1); 266 267 $self->clear() if (! $append); 268 open(IH, "< $infile") or die("Cant open $infile: $!\n"); 269 while(<IH>) { 270 chomp; 271 if (/(\S+===)+/) { 272 my @entry = split(/===/, $_); 273 my $template = Template->new(Filt => $entry[0], 274 No => $entry[1], 275 Hit => $entry[2], 276 Prob => $entry[3], 277 Eval => $entry[4], 278 Pval => $entry[5], 279 Score => $entry[6], 280 SS => $entry[7], 281 Cols => $entry[8], 282 Qstart => $entry[9], 283 Qend => $entry[10], 284 Tstart => $entry[11], 285 Tend => $entry[12], 286 HMM => $entry[13], 287 Ident => $entry[14], 288 Sim => $entry[15], 289 SumProbL => $entry[16], 290 predTM => $entry[17]); 291 $self->add_template($template); 292 } 293 } 294 close(IH); 295} 296 297 298sub set_queryLength { 299 my ($self, $len) = @_; 300 $self->{queryLength} = $len; 301} 302 303sub get_queryLength { 304 my $self = shift; 305 $self->{queryLength}; 306} 307 308sub set_query { 309 my ($self, $query) = @_; 310 $self->{query} = $query; 311} 312 313sub get_query { 314 my $self = shift; 315 $self->{query}; 316} 317 318sub get_neff { 319 my $self = shift; 320 $self->{neff}; 321} 322 323sub set_neff { 324 my ($self, $neff) = @_; 325 $self->{neff} = $neff; 326} 327 328## for backward compatibility ## 329sub _set_queryLength { 330 my ($self, $len) = @_; 331 $self->{queryLength} = $len; 332} 333 334sub _get_queryLength { 335 my $self = shift; 336 $self->{queryLength}; 337} 338 339sub _set_query { 340 my ($self, $query) = @_; 341 $self->{query} = $query; 342} 343 344sub _get_query { 345 my $self = shift; 346 $self->{query}; 347} 348 349sub _get_neff { 350 my $self = shift; 351 $self->{neff}; 352} 353 354sub _set_neff { 355 my ($self, $neff) = @_; 356 $self->{neff} = $neff; 357} 358###### 359 360 361 362 363sub sort_by_sim { 364 my $self = shift; 365 @{$self->{templates}} = sort {$b->get_Sim() <=> $a->get_Sim()} @{$self->{templates}}; 366} 367 368 369sub sort_by_prob { 370 my $self = shift; 371 @{$self->{templates}} = sort {$b->get_Prob() <=> $a->get_Prob()} @{$self->{templates}}; 372} 373 374 375sub sort_by_sumProbL { 376 my $self = shift; 377 @{$self->{templates}} = sort {$b->get_SumProbL() <=> $a->get_SumProbL()} @{$self->{templates}}; 378} 379 380 381sub sort_by_predTM { 382 my $self = shift; 383 @{$self->{templates}} = sort {$b->get_predTM() <=> $a->get_predTM()} @{$self->{templates}}; 384} 385 386 387sub templateList_to_hhr { 388 my $self = shift; 389 my $outbase = shift; 390 391 my $hhsearch = $config->get_hhsearch(); 392 393 my @hhrContent = (); 394 395 open(HHR, "> $outbase.hhr") or die ("Error in templateList_to_hhr: Cant write $outbase.hhr: $!\n"); 396 397 for (my $i=0; $i<$self->size(); $i++) { 398 my $template = $self->get($i); 399 400 ## open apropriate hhr file (wrt filter step) 401 my $infile = "$outbase." . $template->get_Filt() . ".hhr"; 402 open (IN, "< $infile") or die ("Error: cannot open $infile!\n"); 403 404 my $checkedHeader = 0; 405 my $begin; 406 my $e = 0; 407 my $end; 408 my $line; 409 my $hitnr = $i+1; 410 411 while ($line = <IN>) { 412 ## copy first header lines: 413 if (($checkedHeader==0) && ($i==0) && ($line !~ /^\s*\d+\s+\S+.+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$/)) { 414 if ($line=~ /^Command/) { 415 $line=~ s/(^Command\s*)(.*)$/$1$hhsearch artificial hhr file/; 416 } 417 418 ## replace P-value against TMscore 419 if ($line=~ /\s+No\s+Hit\s+Prob\s+E-value\s+P-value\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s*/) { 420 $line =~ s/(\s*No\s+Hit\s+Prob\s+E-value\s+)(P-value)(\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s+)/$1TMScore$3/; 421 } 422 print (HHR "$line"); 423 } 424 else { 425 $checkedHeader = 1; 426 } 427 428 ## get hit Info: 429 my $No = $template->get_No(); 430 if ($line =~ /^\s*$No(\s+\S+.+\s+\S+\s+\S+)\s+\S+(\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$)/) { 431 ## replace P-value by TMScore in hit info 432 $line = sprintf("%3s$1 %1.4f$2\n", $hitnr, $template->get_predTM()); 433 print (HHR "$line"); 434 last; 435 } 436 } 437 438 ## skip all lines up to alignment block 439 ## Find beginning of alignment and replace hit index by new one 440 while ($line = <IN>){ 441 my $No = $template->get_No(); 442 if ($line =~ /^No\s+$No/) { 443 last; 444 } 445 } 446 447 $line =~ s/^No\s+\d+/No $hitnr/; 448 push(@hhrContent, $line); 449 450 ## Push alignment block onto array 451 while ($line = <IN>) { 452 if(($line =~ /^No\s/)) { 453 last; 454 } 455 if ($line =~ /Done!/) {} 456 else { 457 push(@hhrContent, $line); 458 } 459 } 460 close (IN); 461 462 ## create associated tab file 463 &BuildSingleTabFile("$outbase." . $template->get_Filt() . ".tab", $template->get_No(), $outbase); 464 } 465 print(HHR "\n"); 466 print(HHR @hhrContent); 467 print(HHR "Done!\n"); 468 close (HHR); 469} 470 471 472## starting from current hhr file, extract some features and save them into resultfile 473## this is needed for benchmark set compilation 474sub createBenchmarkInfoFile { 475 my ($self, $resultFile, $pdbdir) = @_; 476 477 my $TMalign = $config->get_TMalign(); 478 479 my $query = $self->_get_query(); 480 my $queryPDB = "$pdbdir/$query.pdb"; 481 482 my $res = ""; 483 $res .= "queryName"."\t"."TMID"."\t"."coverage"."\t"."queryLen"."\t"."templateName"."\t"."TMscore\n"; 484 485 ## extract information from max first 50 templates 486 for (my $i=0; $i<50 && $i<$self->size(); $i++) { 487 my $template = $self->get($i); 488 489 my $TMscore = 0; 490 my $TMid = 0; 491 492 my $templatePDB = "$pdbdir/" . $template->get_Hit() . ".pdb"; 493 my $tmalignResult = `$TMalign $templatePDB $queryPDB`; 494 if ($tmalignResult =~ /TM-score\s*=\s*(\S+),\s+ID\s*=\s*(\S+)/) { 495 $TMscore = $1; 496 $TMid= int(($2*100)+0.5); 497 } 498 499 my $queryLen = $self->_get_queryLength(); 500 my $coverage = int(($template->get_Cols()*100/$queryLen)+0.5); 501 my $templateName = $template->get_Hit(); 502 503 $res .= "$query\t$TMid\t$coverage\t$queryLen\t$templateName\t$TMscore\n"; 504 } 505 506 open(OH, "> $resultFile") or die "Cant write $resultFile: $!\n"; 507 print (OH $res); 508 close(OH); 509} 510 5111; 512