1#!/user/bin/perl -w 2use strict; 3 4package TemplateListStruct; 5 6use config; 7use utilities; 8use TemplateStruct; 9 10my $config = HHpredConfig->instance(); 11 12sub new { 13 my ($caller, $filename) = @_; 14 my $caller_is_obj = ref($caller); 15 my $class = $caller_is_obj || $caller; 16 no strict "refs"; 17 my $self = bless {}, $class; 18 19 $self->{templates} = []; 20 $self->{queryLength} = -1; 21 $self->{query} = ""; 22 $self->{neff} = -1; 23 24 if ($caller_is_obj) { 25 my $size = $caller->size(); 26 for (my $i=0; $i<$size; $i++) { 27 $self->{templates}->[$i] = $caller->{templates}->[$i]; 28 } 29 $self->{queryLength} = $caller->{queryLength}; 30 $self->{query} = $caller->{caller}; 31 $self->{neff} = $caller->{neff}; 32 } 33 34 if (defined($filename)) { 35 $self->hhr_to_TemplateList($filename); 36 } 37 return $self; 38} 39 40 41sub add_template { 42 my ($self, $template) = @_; 43 my $curSize = $self->size(); 44 $self->{templates}->[$curSize] = $template; 45} 46 47 48## before adding template, check whether it is already in list 49sub check_and_add { 50 my ($self, $template) = @_; 51 52 for (my $i=0; $i<$self->size(); $i++) { 53 if ($self->{templates}->[$i]->equals($template)) { 54 return; 55 } 56 } 57 $self->add_template($template); 58} 59 60 61sub clear { 62 my $self = shift; 63 %{$self} = (); 64 $self->{templates} = []; 65 $self->{query} = ""; 66 $self->{queryLength} = -1; 67 $self->{neff} = -1; 68} 69 70 71## delete hit with No "No" 72sub delete_No { 73 my $self = shift; 74 my $No = shift; 75 76 ## get idx for hit with No "No" 77 my $deleteIdx = -1; 78 for (my $i=0; $i<$self->size(); $i++) { 79 if ($self->{templates}->[$i]->get_No() == $No) { 80 $deleteIdx = $i; 81 last; 82 } 83 } 84 print "deleting No=$No, idx=$deleteIdx\n"; 85 if ($deleteIdx != -1) { 86 splice(@{$self->{templates}}, $deleteIdx, 1); 87 } 88} 89 90 91sub size { 92 my $self = shift; 93 return scalar(@{$self->{templates}}); 94} 95 96 97sub get { 98 my ($self, $i) = @_; 99 $self->{templates}->[$i]; 100} 101 102 103sub get_last { 104 my $self = shift; 105 $self->{templates}->[$self->size()-1]; 106} 107 108 109 110sub to_string { 111 my $self = shift; 112 my $spacer = shift; 113 my $out = ""; 114 for (my $i=0; $i<$self->size(); $i++) { 115 $out .= $self->{templates}->[$i]->to_string($spacer) . "\n"; 116 } 117 return $out; 118} 119 120 121sub print { 122 my $self = shift; 123 my $out = $self->to_string(); 124 print $out; 125} 126 127 128sub to_TemplateList_helper { 129 my $self = shift; 130 my $hhrFile = shift; 131 my @lines = @_; 132 133 my $matchC; 134 my $No; 135 my $filtnr = "start"; ## filter step (start means no filtering) 136 my $spaceLen = 12; 137 138 if ($hhrFile =~ /\.(\d+)\.hhr/) { 139 $filtnr = $1; 140 } 141 142 for (my $i=0; $i<@lines; $i++) { 143 my $line = $lines[$i]; 144 145 if ($line =~ /^Match_columns\s*(\S+)/) { 146 $matchC = $1; 147 $self->_set_queryLength($matchC); 148 } 149 if ($line =~ /^Query\s+(\S+)/) { 150 my $query = $1; 151 $self->_set_query($query); 152 } 153 if ($line =~ /^Neff\s+(\S+)/) { 154 my $neff = $1; 155 $self->_set_neff($neff); 156 } 157 ## No Hit Prob E-val P-val Score SS Cols Query(start end) Template(start end) HMM 158 elsif ($line=~/^\s*(\d+)\s+(\S+).+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\d+)-(\d+)\s+(\d+)-(\d+)\s*\((\S+)\)$/) { 159 my $No = $1; 160 my $Hit = $2; 161 my $Prob = $3; 162 my $Eval = $4; 163 my $Pval = $5; 164 my $Score = $6; 165 my $SS = $7; 166 my $Cols = $8; 167 my $Qstart = $9; 168 my $Qend = $10; 169 my $Tstart = $11; 170 my $Tend = $12; 171 my $HMM = $13; 172 173 my $SSL = $SS/$matchC; 174 $SSL = sprintf("%.4f", $SSL); 175 176 my $template = TemplateStruct->new(Filt => $filtnr, 177 No => $No, 178 Hit => $Hit, 179 Prob => $Prob, 180 Eval => $Eval, 181 Pval => $Pval, 182 Score => $Score, 183 SS => $SS, 184 Cols => $Cols, 185 Qstart => $Qstart, 186 Qend => $Qend, 187 Tstart => $Tstart, 188 Tend => $Tend, 189 HMM => $HMM); 190 $self->add_template($template); 191 } 192 elsif($line =~ /^No\s+(\d+)/) { 193 $No = $1; 194 $line = $lines[++$i]; 195 196 if ($line !~ /^>(\S+)\s/) { 197 die("Error:: wrong format in \"$line\"\n"); 198 } 199 200 my $hit = $1; 201 $line = $lines[++$i]; 202 203 if ($line =~ /COMPACTNESS=(\S+)/i) { 204 $self->get($No-1)->set_Compactness($1); 205 } else { 206 #print "missing compactness in hhr-file!\n"; 207 } 208 209 if ($line =~ /CSS=(\S+)/i) { 210 $self->get($No-1)->set_Css($1); 211 } else { 212 #print "missing css in hhr-file!\n"; 213 } 214 215 if ($line =~ /CONTACT=(\S+)/i) { 216 $self->get($No-1)->set_Contact($1); 217 } else { 218 #print "missing contact in hhr-file!\n"; 219 } 220 221 if ($line =~ /CONTACT_REALIGN=(\S+)/i) { 222 $self->get($No-1)->set_ContactRealign($1); 223 } else { 224 #print "missing contact realign in hhr-file!\n"; 225 } 226 227 if ($line !~ /Similarity=(\S+)\s+Sum_probs=(\S+)\s*/) { 228 die("Error: wrong format in \"$line\"\n"); 229 } 230 231 my $Similarity = $1; 232 my $SumProbL = $2/$matchC; 233 $SumProbL = sprintf("%.4f" , $SumProbL); 234 235 if ($line =~ /Identities=(\S+)%\s/) { 236 $self->get($No-1)->set_Ident($1); 237 } 238 239 $self->get($No-1)->set_Sim($Similarity); 240 $self->get($No-1)->set_SumProbL($SumProbL); 241 } 242 elsif ($line =~ /^T\s+ss_dssp(\s+)(\S+)/) { 243 $spaceLen = length($1)-1; 244 my $ss_dssp = $self->get($No-1)->get_ss_dssp(); 245 $self->get($No-1)->set_ss_dssp("$ss_dssp" . $2); 246 } 247 ## Confidence line may contain spaces => read number of spaces from ss_dssp line 248 elsif ($line =~ /^Confidence\s{$spaceLen}(.*)\n/) { 249 my $conf = $self->get($No-1)->get_conf(); 250 $self->get($No-1)->set_conf("$conf" . $1); 251 } 252 } 253} 254 255 256sub str_to_TemplateList { 257 my $self = shift; 258 my $str = shift; 259 260 my @lines; 261 @lines = split(/\n/, $str); 262 263 $self->to_TemplateList_helper("dummy", @lines); 264} 265 266 267sub hhr_to_TemplateList { 268 my ($self, $hhrFile) = @_; 269 270 my @lines; 271 open(HHR,"< $hhrFile") or die("Cant open $hhrFile: $!\n"); 272 @lines = <HHR>; 273 close(HHR); 274 275 $self->to_TemplateList_helper($hhrFile, @lines); 276} 277 278 279sub write_to_file { 280 my ($self, $outfile) = @_; 281 282 open (OH, "> $outfile") or die("Cant write to $outfile: $!\n"); 283 my $out = $self->to_string('==='); 284 print(OH $out); 285 close(OH); 286} 287 288 289sub read_from_file { 290 my ($self, $infile) = @_; 291 my $append = 0; 292 ## append template(s) to already existing ones 293 $append = 1 if (scalar(@_) > 2 && $_[2] == 1); 294 295 $self->clear() if (! $append); 296 open(IH, "< $infile") or die("Cant open $infile: $!\n"); 297 while(<IH>) { 298 chomp; 299 if (/(\S+===)+/) { 300 my @entry = split(/===/, $_); 301 my $template = TemplateStruct->new(Filt => $entry[0], 302 No => $entry[1], 303 Hit => $entry[2], 304 Prob => $entry[3], 305 Eval => $entry[4], 306 Pval => $entry[5], 307 Score => $entry[6], 308 SS => $entry[7], 309 Cols => $entry[8], 310 Qstart => $entry[9], 311 Qend => $entry[10], 312 Tstart => $entry[11], 313 Tend => $entry[12], 314 HMM => $entry[13], 315 Ident => $entry[14], 316 Sim => $entry[15], 317 SumProbL => $entry[16], 318 predTM => $entry[17], 319 Compactness => $entry[18], 320 Css => $entry[19], 321 Contact => $entry[20], 322 ContactRealign => $entry[21]); 323 324 $self->add_template($template); 325 } 326 } 327 close(IH); 328} 329 330 331sub set_queryLength { 332 my ($self, $len) = @_; 333 $self->{queryLength} = $len; 334} 335 336sub get_queryLength { 337 my $self = shift; 338 $self->{queryLength}; 339} 340 341sub set_query { 342 my ($self, $query) = @_; 343 $self->{query} = $query; 344} 345 346sub get_query { 347 my $self = shift; 348 $self->{query}; 349} 350 351sub get_neff { 352 my $self = shift; 353 $self->{neff}; 354} 355 356sub set_neff { 357 my ($self, $neff) = @_; 358 $self->{neff} = $neff; 359} 360 361## for backward compatibility ## 362sub _set_queryLength { 363 my ($self, $len) = @_; 364 $self->{queryLength} = $len; 365} 366 367sub _get_queryLength { 368 my $self = shift; 369 $self->{queryLength}; 370} 371 372sub _set_query { 373 my ($self, $query) = @_; 374 $self->{query} = $query; 375} 376 377sub _get_query { 378 my $self = shift; 379 $self->{query}; 380} 381 382sub _get_neff { 383 my $self = shift; 384 $self->{neff}; 385} 386 387sub _set_neff { 388 my ($self, $neff) = @_; 389 $self->{neff} = $neff; 390} 391###### 392 393 394 395 396sub sort_by_sim { 397 my $self = shift; 398 @{$self->{templates}} = sort {$b->get_Sim() <=> $a->get_Sim()} @{$self->{templates}}; 399} 400 401 402sub sort_by_prob { 403 my $self = shift; 404 @{$self->{templates}} = sort {$b->get_Prob() <=> $a->get_Prob()} @{$self->{templates}}; 405} 406 407 408sub sort_by_sumProbL { 409 my $self = shift; 410 @{$self->{templates}} = sort {$b->get_SumProbL() <=> $a->get_SumProbL()} @{$self->{templates}}; 411} 412 413 414sub sort_by_predTM { 415 my $self = shift; 416 @{$self->{templates}} = sort {$b->get_predTM() <=> $a->get_predTM()} @{$self->{templates}}; 417} 418 419 420sub templateList_to_hhr { 421 my $self = shift; 422 my $outbase = shift; 423 424 my @hhrContent = (); 425 my $hh = $config->get_hh(); 426 427 open(HHR, "> $outbase.hhr") or die ("Error in templateList_to_hhr: Cant write $outbase.hhr: $!\n"); 428 429 for (my $i=0; $i<$self->size(); $i++) { 430 my $template = $self->get($i); 431 432 ## open apropriate hhr file (wrt filter step) 433 my $infile = "$outbase." . $template->get_Filt() . ".hhr"; 434 open (IN, "< $infile") or die ("Error: cannot open $infile!\n"); 435 436 my $checkedHeader = 0; 437 my $begin; 438 my $e = 0; 439 my $end; 440 my $line; 441 my $hitnr = $i+1; 442 443 while ($line = <IN>) { 444 ## copy first header lines: 445 if (($checkedHeader==0) && ($i==0) && ($line !~ /^\s*\d+\s+\S+.+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$/)) { 446 if ($line=~ /^Command/) { 447 $line=~ s/(^Command\s*)(.*)$/$1$hh\/hhsearch artificial hhr file/; 448 } 449 450 ## replace P-value against TMscore 451 if ($line=~ /\s+No\s+Hit\s+Prob\s+E-value\s+P-value\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s*/) { 452 $line =~ s/(\s*No\s+Hit\s+Prob\s+E-value\s+)(P-value)(\s+Score\s+SS\s+Cols\s+Query\s+HMM\s+Template\s+HMM\s+)/$1TMScore$3/; 453 } 454 print (HHR "$line"); 455 } 456 else { 457 $checkedHeader = 1; 458 } 459 460 ## get hit Info: 461 my $No = $template->get_No(); 462 if ($line =~ /^\s*$No(\s+\S+.+\s+\S+\s+\S+)\s+\S+(\s+\S+\s+\S+\s+\S+\s+\d+-\d+\s+\S+\s*\(\S+\)$)/) { 463 ## replace P-value by TMScore in hit info 464 $line = sprintf("%3s$1 %1.4f$2\n", $hitnr, $template->get_predTM()); 465 print (HHR "$line"); 466 last; 467 } 468 } 469 470 ## skip all lines up to alignment block 471 ## Find beginning of alignment and replace hit index by new one 472 while ($line = <IN>){ 473 my $No = $template->get_No(); 474 if ($line =~ /^No\s+$No/) { 475 last; 476 } 477 } 478 479 $line =~ s/^No\s+\d+/No $hitnr/; 480 push(@hhrContent, $line); 481 482 ## Push alignment block onto array 483 while ($line = <IN>) { 484 if(($line =~ /^No\s/)) { 485 last; 486 } 487 if ($line =~ /Done!/) {} 488 else { 489 push(@hhrContent, $line); 490 } 491 } 492 close (IN); 493 494 ## create associated tab file 495 &BuildSingleTabFile("$outbase." . $template->get_Filt() . ".tab", $template->get_No(), $outbase); 496 } 497 print(HHR "\n"); 498 print(HHR @hhrContent); 499 print(HHR "Done!\n"); 500 close (HHR); 501} 502 503 504## starting from current hhr file, extract some features and save them into resultfile 505## this is needed for benchmark set compilation 506sub createBenchmarkInfoFile { 507 my ($self, $resultFile, $pdbdir) = @_; 508 509 my $TMalign = $config->get_TMalign(); 510 511 my $query = $self->_get_query(); 512 my $queryPDB = "$pdbdir/$query.pdb"; 513 514 my $res = ""; 515 $res .= "queryName"."\t"."TMID"."\t"."coverage"."\t"."queryLen"."\t"."templateName"."\t"."TMscore\n"; 516 517 ## extract information from max first 50 templates 518 for (my $i=0; $i<50 && $i<$self->size(); $i++) { 519 my $template = $self->get($i); 520 521 my $TMscore = 0; 522 my $TMid = 0; 523 524 my $templatePDB = "$pdbdir/" . $template->get_Hit() . ".pdb"; 525 my $tmalignResult = `$TMalign $templatePDB $queryPDB`; 526 if ($tmalignResult =~ /TM-score\s*=\s*(\S+),\s+ID\s*=\s*(\S+)/) { 527 $TMscore = $1; 528 $TMid= int(($2*100)+0.5); 529 } 530 531 my $queryLen = $self->_get_queryLength(); 532 my $coverage = int(($template->get_Cols()*100/$queryLen)+0.5); 533 my $templateName = $template->get_Hit(); 534 535 $res .= "$query\t$TMid\t$coverage\t$queryLen\t$templateName\t$TMscore\n"; 536 } 537 538 open(OH, "> $resultFile") or die "Cant write $resultFile: $!\n"; 539 print (OH $res); 540 close(OH); 541} 542 5431; 544