1#! /usr/bin/perl -w 2 3# Do a piece of a profmark benchmark, for PSI-BLAST 4# 5# This script is normally called by pmark_master.pl; its command line 6# syntax is tied to pmark_master.pl. 7# 8# Usage: x-psiblast <top_builddir> <top_srcdir> <resultdir> <tblfile> <msafile> <fafile> <outfile> 9# 10# Example: ./x-psiblast /usr/local/blast-2.2.22 ~/src/hmmer testdir test.tbl pmark.msa test.fa test.out 11# 12# For best results, the target database should be masked. For example: 13# seg pmark.fa -x > pmark-seg.fa 14# 15 16BEGIN { 17 $top_builddir = shift; 18 $top_srcdir = shift; 19 $wrkdir = shift; 20 $tblfile = shift; 21 $msafile = shift; 22 $fafile = shift; 23 $outfile = shift; 24} 25use lib "${top_srcdir}/easel/demotic"; 26use demotic_blast; 27 28$formatdb = "${top_builddir}/bin/formatdb"; 29$blastpgp = "${top_builddir}/bin/blastpgp"; 30$blastopts1 = "-a 1 -v 9999 -b 0 -F T -u 1 -j 5 -J TRUE"; # opts for generating checkpoint file 31$blastopts2 = "-a 1 -v 9999 -b 0 -F F -q 1 -t 1"; # opts for searching the benchmark 32 33if (! -d $top_builddir) { die "didn't find H2 build directory $top_builddir"; } 34if (! -d $top_srcdir) { die "didn't find H3 source directory $top_srcdir"; } 35if (! -x $formatdb) { die "didn't find executable $formatdb"; } 36if (! -x $blastpgp) { die "didn't find executable $blastpgp"; } 37if (! -e $wrkdir) { die "$wrkdir doesn't exist"; } 38 39open(OUTFILE,">$outfile") || die "failed to open $outfile"; 40open(TABLE, "$tblfile") || die "failed to open $tblfile"; 41MSA: 42while (<TABLE>) 43{ 44 ($msaname) = split; 45 46 # Fetch the query MSA from the benchmark (.sto file) 47 $output = `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`; 48 if ($? != 0) { print "FAILED: esl-afetch on $msaname\n"; next MSA; } 49 50 # Reformat to psiblast format (.pbl file) 51 $output = `esl-reformat -o $wrkdir/$msaname.pbl psiblast $wrkdir/$msaname.sto`; 52 if ($? != 0) { print "FAILED: esl-reformat psiblast on $msaname\n"; next MSA; } 53 54 # Select median length single sequence as the "query" (.query.fa file) 55 $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | sort -n -k3 | awk '{print \$2}'`; 56 if ($?) { print "FAILED: esl-seqstat on $msaname\n"; next MSA; } 57 @qnames = split(/^/,$output); 58 chop (@qnames); 59 $qname = $qnames[ int(($#qnames+1) / 2)]; 60 $output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`; 61 if ($?) { print "FAILED: esl-sfetch on $msaname\n"; next MSA; } 62 63 # Create a PSI-BLAST checkpoint file by iterative search of the query 64 # against the seed sequences. (Thus, we're not using the seed alignment, 65 # only the seed seqs) 66 $output = `esl-reformat -o $wrkdir/$msaname.fa fasta $wrkdir/$msaname.sto`; 67 if ($?) { print "FAILED: esl-reformat fasta on $msaname\n"; next MSA; } 68 69 $output = `seg $wrkdir/$msaname.fa -x > $wrkdir/$msaname.x.fa`; 70 if ($?) { print "FAILED: esl-reformat fasta on $msaname\n"; next MSA; } 71 72 $output = `$formatdb -i $wrkdir/$msaname.x.fa`; 73 if ($?) { print "FAILED: formatdb on $msaname\n"; next MSA; } 74 75 $output = `$blastpgp $blastopts1 -d $wrkdir/$msaname.x.fa -i $wrkdir/$msaname.query.fa -C $wrkdir/$msaname.asnt`; 76 if ($?) { print "FAILED: blastpgp checkpoint file on $msaname\n"; next MSA; } 77 78 # Run psi-blast against the benchmark 79 if (! open(PSIBLAST, "$blastpgp $blastopts2 -d $fafile -R $wrkdir/$msaname.asnt 2>/dev/null |")) { print "FAILED: $blastpgp on $msaname\n"; next MSA; } 80 if (! demotic_blast::parse(\*PSIBLAST) ) { print "FAILED: demotic psiblast parser on $msaname\n"; next MSA; } 81 82 for ($i = 0; $i < $demotic_blast::nhits; $i++) 83 { 84 printf OUTFILE ("%g\t%.1f\t%s\t%s\n", 85 $demotic_blast::hit_Eval[$i], 86 $demotic_blast::hit_bitscore[$i], 87 $demotic_blast::hit_target[$i], 88 $msaname); 89 } 90 close PSIBLAST; 91 92 unlink "$wrkdir/$msaname.query.fa"; 93 unlink "$wrkdir/$msaname.hmm"; 94 unlink "$wrkdir/$msaname.pbl"; 95 unlink "$wrkdir/$msaname.sto"; 96 unlink "$wrkdir/$msaname.asnt"; 97 unlink "$wrkdir/$msaname.fa"; 98 unlink "$wrkdir/$msaname.x.fa"; 99 unlink "$wrkdir/$msaname.x.fa.phr"; 100 unlink "$wrkdir/$msaname.x.fa.pin"; 101 unlink "$wrkdir/$msaname.x.fa.psq"; 102 unlink "$wrkdir/formatdb.log"; 103} 104close TABLE; 105close OUTFILE; 106