1#! /usr/bin/perl -w
2
3# Do a piece of a profmark benchmark, for NCBI BLASTP searches by FPS
4# (family-pairwise-search; best E-value of all individual queries).
5#
6# This script is normally called by pmark_master.pl; its command line
7# syntax is tied to pmark_master.pl.
8#
9# Usage:     x-ncbiblast-fps <top_builddir>          <top_srcdir>       <resultdir> <tblfile> <msafile> <fafile> <outfile>
10# Example: ./x-ncbiblast-fps /usr/local/blast-2.2.22 ~/src/hmmer/trunk  testdir     test.tbl  pmark.msa test.fa  test.out
11#
12# SRE, Tue Apr 20 08:29:03 2010 [Janelia]
13# SVN $Id$
14
15BEGIN {
16    $top_builddir  = shift;
17    $top_srcdir    = shift;
18    $wrkdir        = shift;
19    $tblfile       = shift;
20    $msafile       = shift;
21    $fafile        = shift;
22    $outfile       = shift;
23}
24use lib "${top_srcdir}/easel/demotic";
25use demotic_blast;
26
27$blastp      = "${top_builddir}/bin/blastall";
28$blastopts   = "-p blastp -a 1 -v 9999 -b 0";
29
30if (! -d $top_builddir)                                 { die "didn't find BLAST build directory $top_builddir"; }
31if (! -d $top_srcdir)                                   { die "didn't find H3 source directory $top_srcdir"; }
32if (! -x $blastp)                                       { die "didn't find executable $blastp"; }
33if (! -e $wrkdir)                                       { die "$wrkdir doesn't exist"; }
34
35open(OUTFILE,">$outfile") || die "failed to open $outfile";
36open(TABLE, "$tblfile")   || die "failed to open $tblfile";
37MSA:
38while (<TABLE>)
39{
40    ($msaname) = split;
41
42    %seen          = ();
43    %best_pval     = ();
44    %best_bitscore = ();
45
46    `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`;
47    if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; }
48
49    # Extract a list of individual sequence names from the multiple alignment.
50    $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`;
51    if ($?) { print "FAILED: esl-seqstat -a $wrkdir/$msaname.sto\n"; next MSA; }
52
53    @qnames = split(/^/,$output);
54    chop (@qnames);
55
56    # Loop over each query; blast; accumulate best pval for each target
57    foreach $qname (@qnames)
58    {
59	$output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`;
60	if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; }
61
62	if (! open(BLASTP, "$blastp -d $fafile -i $wrkdir/$msaname.query.fa $blastopts |")) {
63	    print  "FAILED: $blastp -d $fafile -i $wrkdir/$msaname.query.fa $blastopts |\n"; next MSA;
64	}
65
66	if (! demotic_blast::parse(\*BLASTP)) {
67	    print "FAILED: demotic parser for blastp output\n"; next MSA;
68	}
69
70	for ($i = 0; $i < $demotic_blast::nhits; $i++)
71	{
72	    $target   = $demotic_blast::hit_target[$i];
73	    $pval     = $demotic_blast::hit_Eval[$i];
74	    $bitscore = $demotic_blast::hit_bitscore[$i];
75
76	    if (! $seen{$target} || $pval < $best_pval{$target})
77	    {
78		$seen{$target}          = 1;
79		$best_pval{$target}     = $pval;
80		$best_bitscore{$target} = $bitscore;
81	    }
82	}
83	close BLASTP;
84    }
85
86    # Append to the outfile.
87    foreach $target (keys(%seen))
88    {
89	printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname;
90    }
91
92    unlink "$wrkdir/$msaname.sto";
93    unlink "$wrkdir/$msaname.query.fa";
94}
95close TABLE;
96close OUTFILE;
97