1#! /usr/bin/perl -w 2 3# Do a piece of a profmark benchmark, for NCBI BLASTP searches by FPS 4# (family-pairwise-search; best E-value of all individual queries). 5# 6# This script is normally called by pmark_master.pl; its command line 7# syntax is tied to pmark_master.pl. 8# 9# Usage: x-ncbiblast-fps <top_builddir> <top_srcdir> <resultdir> <tblfile> <msafile> <fafile> <outfile> 10# Example: ./x-ncbiblast-fps /usr/local/blast-2.2.22 ~/src/hmmer/trunk testdir test.tbl pmark.msa test.fa test.out 11# 12# SRE, Tue Apr 20 08:29:03 2010 [Janelia] 13# SVN $Id$ 14 15BEGIN { 16 $top_builddir = shift; 17 $top_srcdir = shift; 18 $wrkdir = shift; 19 $tblfile = shift; 20 $msafile = shift; 21 $fafile = shift; 22 $outfile = shift; 23} 24use lib "${top_srcdir}/easel/demotic"; 25use demotic_blast; 26 27$blastp = "${top_builddir}/bin/blastall"; 28$blastopts = "-p blastp -a 1 -v 9999 -b 0"; 29 30if (! -d $top_builddir) { die "didn't find BLAST build directory $top_builddir"; } 31if (! -d $top_srcdir) { die "didn't find H3 source directory $top_srcdir"; } 32if (! -x $blastp) { die "didn't find executable $blastp"; } 33if (! -e $wrkdir) { die "$wrkdir doesn't exist"; } 34 35open(OUTFILE,">$outfile") || die "failed to open $outfile"; 36open(TABLE, "$tblfile") || die "failed to open $tblfile"; 37MSA: 38while (<TABLE>) 39{ 40 ($msaname) = split; 41 42 %seen = (); 43 %best_pval = (); 44 %best_bitscore = (); 45 46 `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`; 47 if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; } 48 49 # Extract a list of individual sequence names from the multiple alignment. 50 $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`; 51 if ($?) { print "FAILED: esl-seqstat -a $wrkdir/$msaname.sto\n"; next MSA; } 52 53 @qnames = split(/^/,$output); 54 chop (@qnames); 55 56 # Loop over each query; blast; accumulate best pval for each target 57 foreach $qname (@qnames) 58 { 59 $output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`; 60 if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; } 61 62 if (! open(BLASTP, "$blastp -d $fafile -i $wrkdir/$msaname.query.fa $blastopts |")) { 63 print "FAILED: $blastp -d $fafile -i $wrkdir/$msaname.query.fa $blastopts |\n"; next MSA; 64 } 65 66 if (! demotic_blast::parse(\*BLASTP)) { 67 print "FAILED: demotic parser for blastp output\n"; next MSA; 68 } 69 70 for ($i = 0; $i < $demotic_blast::nhits; $i++) 71 { 72 $target = $demotic_blast::hit_target[$i]; 73 $pval = $demotic_blast::hit_Eval[$i]; 74 $bitscore = $demotic_blast::hit_bitscore[$i]; 75 76 if (! $seen{$target} || $pval < $best_pval{$target}) 77 { 78 $seen{$target} = 1; 79 $best_pval{$target} = $pval; 80 $best_bitscore{$target} = $bitscore; 81 } 82 } 83 close BLASTP; 84 } 85 86 # Append to the outfile. 87 foreach $target (keys(%seen)) 88 { 89 printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname; 90 } 91 92 unlink "$wrkdir/$msaname.sto"; 93 unlink "$wrkdir/$msaname.query.fa"; 94} 95close TABLE; 96close OUTFILE; 97