1#! /usr/bin/perl -w
2
3# Do a piece of a profmark benchmark, for phmmer searches by FPS
4# (family-pairwise-search; best E-value of all individual queries).
5#
6# This script is normally called by pmark_master.pl; its command line
7# syntax is tied to pmark_master.pl.
8#
9# Usage:     x-phmmer-fps <top_builddir>                     <top_srcdir> <resultdir> <tblfile> <msafile> <fafile> <outfile>
10# Example: ./x-phmmer-fps ~/releases/hmmer-3.0/build-icc-mpi ~/src/hmmer  testdir     test.tbl  pmark.msa test.fa  test.out
11#
12# SRE, Sun Feb  7 08:54:28 2010 [Casa de Gatos]
13# SVN $Id$
14
15BEGIN {
16    $top_builddir  = shift;
17    $top_srcdir    = shift;
18    $wrkdir        = shift;
19    $tblfile       = shift;
20    $msafile       = shift;
21    $fafile        = shift;
22    $outfile       = shift;
23}
24
25$phmmer      = "$top_builddir/src/phmmer";
26$opts        = "";
27
28if (! -d $top_builddir)                                 { die "didn't find build directory $top_builddir"; }
29if (! -d $top_srcdir)                                   { die "didn't find source directory $top_srcdir"; }
30if (! -x $phmmer)                                       { die "didn't find executable $phmmer"; }
31if (! -e $wrkdir)                                       { die "$wrkdir doesn't exist"; }
32
33open(OUTFILE,">$outfile") || die "failed to open $outfile";
34open(TABLE, "$tblfile")   || die "failed to open $tblfile";
35MSA:
36while (<TABLE>)
37{
38    ($msaname) = split;
39
40    %seen          = ();
41    %best_pval     = ();
42    %best_bitscore = ();
43
44    `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`;
45    if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; }
46
47    # Extract a list of individual sequence names from the multiple alignment.
48    $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`;
49    if ($?) { print "FAILED: esl-seqstat\n"; next MSA; }
50
51    @qnames = split(/^/,$output);
52    chop (@qnames);
53
54    # Loop over each query; phmmer; accumulate best pval for each target
55    foreach $qname (@qnames)
56    {
57	$output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`;
58	if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; }
59
60	`$phmmer --cpu 1 --tblout $wrkdir/$msaname.tmp $opts $wrkdir/$msaname.query.fa $fafile > /dev/null`;
61	if ($?) { print "FAILED: $phmmer --tblout $msaname.tmp $opts $wrkdir/$msaname.query.fa $fafile\n"; next MSA; }
62
63	open(OUTPUT, "$wrkdir/$msaname.tmp") || die "FAILED: to open $wrkdir/$msaname.tmp tabular output file";
64        while (<OUTPUT>)
65	{
66	    if (/^\#/) { next; }
67	    @fields   = split(' ', $_, 7);
68	    $target   = $fields[0];
69	    $pval     = $fields[4];
70	    $bitscore = $fields[5];
71	    if (! $seen{$target} || $pval < $best_pval{$target})
72	    {
73		$seen{$target}          = 1;
74		$best_pval{$target}     = $pval;
75		$best_bitscore{$target} = $bitscore;
76	    }
77	}
78	close OUTPUT;
79    }
80
81    # Append to the outfile.
82    foreach $target (keys(%seen))
83    {
84	printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname;
85    }
86
87    unlink "$wrkdir/$msaname.tmp";
88    unlink "$wrkdir/$msaname.query.fa";
89    unlink "$wrkdir/$msaname.sto";
90}
91close TABLE;
92close OUTFILE;
93