1#! /usr/bin/perl -w
2
3# Do a piece of a profmark benchmark, for PSI-BLAST
4#
5# This script is normally called by pmark_master.pl; its command line
6# syntax is tied to pmark_master.pl.
7#
8# Usage:     x-psiblast  <top_builddir>          <top_srcdir> <resultdir> <tblfile> <msafile> <fafile> <outfile>
9#
10# Example: ./x-psiblast  /usr/local/blast-2.2.22 ~/src/hmmer  testdir     test.tbl  pmark.msa test.fa  test.out
11#
12# For best results, the target database should be masked. For example:
13#   seg pmark.fa -x > pmark-seg.fa
14#
15
16BEGIN {
17    $top_builddir  = shift;
18    $top_srcdir    = shift;
19    $wrkdir        = shift;
20    $tblfile       = shift;
21    $msafile       = shift;
22    $fafile        = shift;
23    $outfile       = shift;
24}
25use lib "${top_srcdir}/easel/demotic";
26use demotic_blast;
27
28$formatdb   = "${top_builddir}/bin/formatdb";
29$blastpgp   = "${top_builddir}/bin/blastpgp";
30$blastopts1 = "-a 1 -v 9999 -b 0 -F T -u 1 -j 5 -J TRUE"; # opts for generating checkpoint file
31$blastopts2 = "-a 1 -v 9999 -b 0 -F F -q 1 -t 1";         # opts for searching the benchmark
32
33if (! -d $top_builddir)                                 { die "didn't find H2 build directory $top_builddir"; }
34if (! -d $top_srcdir)                                   { die "didn't find H3 source directory $top_srcdir"; }
35if (! -x $formatdb)                                     { die "didn't find executable $formatdb"; }
36if (! -x $blastpgp)                                     { die "didn't find executable $blastpgp"; }
37if (! -e $wrkdir)                                       { die "$wrkdir doesn't exist"; }
38
39open(OUTFILE,">$outfile")   || die "failed to open $outfile";
40open(TABLE, "$tblfile")     || die "failed to open $tblfile";
41MSA:
42while (<TABLE>)
43{
44    ($msaname) = split;
45
46    # Fetch the query MSA from the benchmark  (.sto file)
47    $output = `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`;
48    if ($? != 0) { print "FAILED: esl-afetch on $msaname\n"; next MSA; }
49
50    # Reformat to psiblast format (.pbl file)
51    $output = `esl-reformat -o $wrkdir/$msaname.pbl psiblast $wrkdir/$msaname.sto`;
52    if ($? != 0) { print "FAILED: esl-reformat psiblast on $msaname\n"; next MSA; }
53
54    # Select median length single sequence as the "query" (.query.fa file)
55    $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | sort -n -k3 | awk '{print \$2}'`;
56    if ($?) { print "FAILED: esl-seqstat on $msaname\n"; next MSA; }
57    @qnames = split(/^/,$output);
58    chop (@qnames);
59    $qname = $qnames[ int(($#qnames+1) / 2)];
60    $output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`;
61    if ($?) { print "FAILED: esl-sfetch on $msaname\n"; next MSA; }
62
63    # Create a PSI-BLAST checkpoint file by iterative search of the query
64    # against the seed sequences. (Thus, we're not using the seed alignment,
65    # only the seed seqs)
66    $output = `esl-reformat -o $wrkdir/$msaname.fa fasta $wrkdir/$msaname.sto`;
67    if ($?) { print "FAILED: esl-reformat fasta on $msaname\n"; next MSA; }
68
69    $output = `seg $wrkdir/$msaname.fa -x > $wrkdir/$msaname.x.fa`;
70    if ($?) { print "FAILED: esl-reformat fasta on $msaname\n"; next MSA; }
71
72    $output = `$formatdb -i $wrkdir/$msaname.x.fa`;
73    if ($?) { print "FAILED: formatdb on $msaname\n"; next MSA; }
74
75    $output = `$blastpgp $blastopts1 -d $wrkdir/$msaname.x.fa -i $wrkdir/$msaname.query.fa -C $wrkdir/$msaname.asnt`;
76    if ($?) { print "FAILED: blastpgp checkpoint file on $msaname\n"; next MSA; }
77
78    # Run psi-blast against the benchmark
79    if (! open(PSIBLAST, "$blastpgp $blastopts2 -d $fafile -R $wrkdir/$msaname.asnt 2>/dev/null |")) { print "FAILED: $blastpgp on $msaname\n";               next MSA; }
80    if (! demotic_blast::parse(\*PSIBLAST)                                                         ) { print "FAILED: demotic psiblast parser on $msaname\n"; next MSA; }
81
82    for ($i = 0; $i < $demotic_blast::nhits; $i++)
83    {
84        printf OUTFILE ("%g\t%.1f\t%s\t%s\n",
85			$demotic_blast::hit_Eval[$i],
86			$demotic_blast::hit_bitscore[$i],
87			$demotic_blast::hit_target[$i],
88			$msaname);
89    }
90    close PSIBLAST;
91
92    unlink "$wrkdir/$msaname.query.fa";
93    unlink "$wrkdir/$msaname.hmm";
94    unlink "$wrkdir/$msaname.pbl";
95    unlink "$wrkdir/$msaname.sto";
96    unlink "$wrkdir/$msaname.asnt";
97    unlink "$wrkdir/$msaname.fa";
98    unlink "$wrkdir/$msaname.x.fa";
99    unlink "$wrkdir/$msaname.x.fa.phr";
100    unlink "$wrkdir/$msaname.x.fa.pin";
101    unlink "$wrkdir/$msaname.x.fa.psq";
102    unlink "$wrkdir/formatdb.log";
103}
104close TABLE;
105close OUTFILE;
106