1 /* phmmer: search a protein sequence against a protein database
2  */
3 #include "p7_config.h"
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 
9 #include "easel.h"
10 #include "esl_alphabet.h"
11 #include "esl_dmatrix.h"
12 #include "esl_getopts.h"
13 #include "esl_msa.h"
14 #include "esl_msafile.h"
15 #include "esl_scorematrix.h"
16 #include "esl_sq.h"
17 #include "esl_sqio.h"
18 #include "esl_stopwatch.h"
19 
20 #ifdef HMMER_MPI
21 #include "mpi.h"
22 #include "esl_mpi.h"
23 #endif
24 
25 #ifdef HMMER_THREADS
26 #include <unistd.h>
27 #include "esl_threads.h"
28 #include "esl_workqueue.h"
29 #endif
30 
31 #include "hmmer.h"
32 
33 typedef struct {
34 #ifdef HMMER_THREADS
35   ESL_WORK_QUEUE   *queue;
36 #endif
37   P7_BG            *bg;
38   P7_PIPELINE      *pli;
39   P7_TOPHITS       *th;
40   P7_OPROFILE      *om;
41 } WORKER_INFO;
42 
43 #define REPOPTS     "-E,-T,--cut_ga,--cut_nc,--cut_tc"
44 #define DOMREPOPTS  "--domE,--domT,--cut_ga,--cut_nc,--cut_tc"
45 #define INCOPTS     "--incE,--incT,--cut_ga,--cut_nc,--cut_tc"
46 #define INCDOMOPTS  "--incdomE,--incdomT,--cut_ga,--cut_nc,--cut_tc"
47 #define THRESHOPTS  "-E,-T,--domE,--domT,--incE,--incT,--incdomE,--incdomT,--cut_ga,--cut_nc,--cut_tc"
48 
49 #if defined (HMMER_THREADS) && defined (HMMER_MPI)
50 #define CPUOPTS     "--mpi"
51 #define MPIOPTS     "--cpu"
52 #else
53 #define CPUOPTS     NULL
54 #define MPIOPTS     NULL
55 #endif
56 
57 static ESL_OPTIONS options[] = {
58   /* name           type              default   env  range   toggles   reqs   incomp                             help                                       docgroup*/
59   { "-h",           eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL,  NULL,              "show brief help on version and usage",                         1 },
60 /* Control of output */
61   { "-o",           eslARG_OUTFILE,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "direct output to file <f>, not stdout",                        2 },
62   { "-A",           eslARG_OUTFILE,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "save multiple alignment of hits to file <f>",                  2 },
63   { "--tblout",     eslARG_OUTFILE,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "save parseable table of per-sequence hits to file <f>",        2 },
64   { "--domtblout",  eslARG_OUTFILE,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "save parseable table of per-domain hits to file <f>",          2 },
65   { "--pfamtblout", eslARG_OUTFILE,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "save table of hits and domains to file, in Pfam format <f>",   2 },
66   { "--acc",        eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL,  NULL,              "prefer accessions over names in output",                       2 },
67   { "--noali",      eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL,  NULL,              "don't output alignments, so output is smaller",                2 },
68   { "--notextw",    eslARG_NONE,         NULL, NULL, NULL,      NULL,  NULL, "--textw",          "unlimit ASCII text output line width",                         2 },
69   { "--textw",      eslARG_INT,         "120", NULL, "n>=120",  NULL,  NULL, "--notextw",        "set max width of ASCII text output lines",                     2 },
70 /* Control of scoring system */
71   { "--popen",      eslARG_REAL,       "0.02", NULL, "0<=x<0.5",NULL,  NULL,  NULL,              "gap open probability",                                         3 },
72   { "--pextend",    eslARG_REAL,        "0.4", NULL, "0<=x<1",  NULL,  NULL,  NULL,              "gap extend probability",                                       3 },
73   { "--mx",         eslARG_STRING, "BLOSUM62", NULL, NULL,      NULL,  NULL,  "--mxfile",        "substitution score matrix choice (of some built-in matrices)", 3 },
74   { "--mxfile",     eslARG_INFILE,       NULL, NULL, NULL,      NULL,  NULL,  "--mx",            "read substitution score matrix from file <f>",                 3 },
75 /* Control of reporting thresholds */
76   { "-E",           eslARG_REAL,       "10.0", NULL, "x>0",     NULL,  NULL,  REPOPTS,           "report sequences <= this E-value threshold in output",         4 },
77   { "-T",           eslARG_REAL,        FALSE, NULL,  NULL,     NULL,  NULL,  REPOPTS,           "report sequences >= this score threshold in output",           4 },
78   { "--domE",       eslARG_REAL,       "10.0", NULL, "x>0",     NULL,  NULL,  DOMREPOPTS,        "report domains <= this E-value threshold in output",           4 },
79   { "--domT",       eslARG_REAL,        FALSE, NULL,  NULL,     NULL,  NULL,  DOMREPOPTS,        "report domains >= this score cutoff in output",                4 },
80 /* Control of inclusion thresholds */
81   { "--incE",       eslARG_REAL,       "0.01", NULL, "x>0",     NULL,  NULL,  INCOPTS,           "consider sequences <= this E-value threshold as significant",  5 },
82   { "--incT",       eslARG_REAL,        FALSE, NULL,  NULL,     NULL,  NULL,  INCOPTS,           "consider sequences >= this score threshold as significant",    5 },
83   { "--incdomE",    eslARG_REAL,       "0.01", NULL, "x>0",     NULL,  NULL,  INCDOMOPTS,        "consider domains <= this E-value threshold as significant",    5 },
84   { "--incdomT",    eslARG_REAL,        FALSE, NULL,  NULL,     NULL,  NULL,  INCDOMOPTS,        "consider domains >= this score threshold as significant",      5 },
85 /* Model-specific thresholding for both reporting and inclusion (unused in phmmer)*/
86   { "--cut_ga",     eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL,  THRESHOPTS,        "use profile's GA gathering cutoffs to set all thresholding",  99 },
87   { "--cut_nc",     eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL,  THRESHOPTS,        "use profile's NC noise cutoffs to set all thresholding",      99 },
88   { "--cut_tc",     eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL,  THRESHOPTS,        "use profile's TC trusted cutoffs to set all thresholding",    99 },
89 /* Control of filter pipeline */
90   { "--max",        eslARG_NONE,        FALSE, NULL, NULL,      NULL,  NULL, "--F1,--F2,--F3",   "Turn all heuristic filters off (less speed, more power)",      7 },
91   { "--F1",         eslARG_REAL,       "0.02", NULL, NULL,      NULL,  NULL, "--max",            "Stage 1 (MSV) threshold: promote hits w/ P <= F1",             7 },
92   { "--F2",         eslARG_REAL,       "1e-3", NULL, NULL,      NULL,  NULL, "--max",            "Stage 2 (Vit) threshold: promote hits w/ P <= F2",             7 },
93   { "--F3",         eslARG_REAL,       "1e-5", NULL, NULL,      NULL,  NULL, "--max",            "Stage 3 (Fwd) threshold: promote hits w/ P <= F3",             7 },
94   { "--nobias",     eslARG_NONE,        NULL,  NULL, NULL,      NULL,  NULL, "--max",            "turn off composition bias filter",                             7 },
95 /* Control of E-value calibration */
96   { "--EmL",        eslARG_INT,         "200", NULL,"n>0",      NULL,  NULL,  NULL,              "length of sequences for MSV Gumbel mu fit",                   11 },
97   { "--EmN",        eslARG_INT,         "200", NULL,"n>0",      NULL,  NULL,  NULL,              "number of sequences for MSV Gumbel mu fit",                   11 },
98   { "--EvL",        eslARG_INT,         "200", NULL,"n>0",      NULL,  NULL,  NULL,              "length of sequences for Viterbi Gumbel mu fit",               11 },
99   { "--EvN",        eslARG_INT,         "200", NULL,"n>0",      NULL,  NULL,  NULL,              "number of sequences for Viterbi Gumbel mu fit",               11 },
100   { "--EfL",        eslARG_INT,         "100", NULL,"n>0",      NULL,  NULL,  NULL,              "length of sequences for Forward exp tail tau fit",            11 },
101   { "--EfN",        eslARG_INT,         "200", NULL,"n>0",      NULL,  NULL,  NULL,              "number of sequences for Forward exp tail tau fit",            11 },
102   { "--Eft",        eslARG_REAL,       "0.04", NULL,"0<x<1",    NULL,  NULL,  NULL,              "tail mass for Forward exponential tail tau fit",              11 },
103 /* other options */
104   { "--nonull2",    eslARG_NONE,        NULL,  NULL, NULL,      NULL,  NULL,  NULL,              "turn off biased composition score corrections",               12 },
105   { "-Z",           eslARG_REAL,       FALSE, NULL, "x>0",     NULL,  NULL,  NULL,              "set # of comparisons done, for E-value calculation",          12 },
106   { "--domZ",       eslARG_REAL,       FALSE, NULL, "x>0",     NULL,  NULL,  NULL,              "set # of significant seqs, for domain E-value calculation",   12 },
107   { "--seed",       eslARG_INT,         "42",  NULL, "n>=0",    NULL,  NULL,  NULL,              "set RNG seed to <n> (if 0: one-time arbitrary seed)",         12 },
108   { "--qformat",    eslARG_STRING,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "assert query <seqfile> is in format <s>: no autodetection",   12 },
109   { "--tformat",    eslARG_STRING,      NULL, NULL, NULL,      NULL,  NULL,  NULL,              "assert target <seqdb> is in format <s>>: no autodetection",   12 },
110 #ifdef HMMER_THREADS
111   { "--cpu",        eslARG_INT,  p7_NCPU,"HMMER_NCPU", "n>=0",NULL,  NULL,  CPUOPTS,            "number of parallel CPU workers to use for multithreads",      12 },
112 #endif
113 #ifdef HMMER_MPI
114   { "--stall",      eslARG_NONE,   FALSE, NULL, NULL,      NULL,"--mpi", NULL,              "arrest after start: for debugging MPI under gdb",             12 },
115   { "--mpi",        eslARG_NONE,   FALSE, NULL, NULL,      NULL,  NULL,  MPIOPTS,           "run as an MPI parallel program",                              12 },
116 #endif
117 
118   /* Restrict search to subset of database - hidden because these flags are
119    *   (a) currently for internal use
120    *   (b) probably going to change
121    * Doesn't work with MPI
122    */
123   { "--restrictdb_stkey", eslARG_STRING, "0",  NULL, NULL,    NULL,  NULL,  NULL,       "Search starts at the sequence with name <s> (not with MPI)",     99 },
124   { "--restrictdb_n",eslARG_INT,        "-1",  NULL, NULL,    NULL,  NULL,  NULL,       "Search <j> target sequences (starting at --restrictdb_stkey)",   99 },
125   { "--ssifile",    eslARG_STRING,       NULL, NULL, NULL,    NULL,  NULL,  NULL,       "restrictdb_x values require ssi file. Override default to <s>",  99 },
126 
127  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
128 };
129 static char usage[]  = "[-options] <seqfile> <seqdb>";
130 static char banner[] = "search a protein sequence against a protein database";
131 
132 /* struct cfg_s : "Global" application configuration shared by all threads/processes
133  *
134  * This structure is passed to routines within main.c, as a means of semi-encapsulation
135  * of shared data amongst different parallel processes (threads or MPI processes).
136  */
137 struct cfg_s {
138   char            *qfile;             /* query sequence file                             */
139   char            *dbfile;            /* database file                               */
140 
141   int              do_mpi;            /* TRUE if we're doing MPI parallelization         */
142   int              nproc;             /* how many MPI processes, total                   */
143   int              my_rank;           /* who am I, in 0..nproc-1                         */
144 
145   char             *firstseq_key;     /* name of the first sequence in the restricted db range */
146   int              n_targetseq;       /* number of sequences in the restricted range */
147 };
148 
149 static int  serial_master(ESL_GETOPTS *go, struct cfg_s *cfg);
150 static int  serial_loop  (WORKER_INFO *info, ESL_SQFILE *dbfp, int n_targetseqs);
151 
152 #ifdef HMMER_THREADS
153 #define BLOCK_SIZE 1000
154 
155 static int  thread_loop(ESL_THREADS *obj, ESL_WORK_QUEUE *queue, ESL_SQFILE *dbfp, int n_targetseqs);
156 static void pipeline_thread(void *arg);
157 #endif
158 
159 #ifdef HMMER_MPI
160 static int  mpi_master   (ESL_GETOPTS *go, struct cfg_s *cfg);
161 static int  mpi_worker   (ESL_GETOPTS *go, struct cfg_s *cfg);
162 #endif
163 
164 /* process_commandline()
165  * Take argc, argv, and options; parse the command line;
166  * display help/usage info.
167  */
168 static int
process_commandline(int argc,char ** argv,ESL_GETOPTS ** ret_go,char ** ret_qfile,char ** ret_dbfile)169 process_commandline(int argc, char **argv, ESL_GETOPTS **ret_go, char **ret_qfile, char **ret_dbfile)
170 {
171   ESL_GETOPTS *go = esl_getopts_Create(options);
172   int          status;
173 
174   if (esl_opt_ProcessEnvironment(go)         != eslOK)  { if (printf("Failed to process environment: %s\n", go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
175   if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK)  { if (printf("Failed to parse command line: %s\n",  go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
176   if (esl_opt_VerifyConfig(go)               != eslOK)  { if (printf("Failed to parse command line: %s\n",  go->errbuf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
177 
178   /* help format: */
179   if (esl_opt_GetBoolean(go, "-h") == TRUE)
180     {
181       p7_banner(stdout, argv[0], banner);
182       esl_usage(stdout, argv[0], usage);
183       if (puts("\nBasic options:")                                           < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
184       esl_opt_DisplayHelp(stdout, go, 1, 2, 80); /* 1= group; 2 = indentation; 120=textwidth*/
185 
186       if (puts("\nOptions directing output:")                                < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
187       esl_opt_DisplayHelp(stdout, go, 2, 2, 80);
188 
189       if (puts("\nOptions controlling scoring system:")                      < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
190       esl_opt_DisplayHelp(stdout, go, 3, 2, 80);
191 
192       if (puts("\nOptions controlling reporting thresholds:")                < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
193       esl_opt_DisplayHelp(stdout, go, 4, 2, 80);
194 
195       if (puts("\nOptions controlling inclusion (significance) thresholds:") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
196       esl_opt_DisplayHelp(stdout, go, 5, 2, 80);
197 
198       if (puts("\nOptions controlling acceleration heuristics:")             < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
199       esl_opt_DisplayHelp(stdout, go, 7, 2, 80);
200 
201       if (puts("\nOptions controlling E value calibration:")                 < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
202       esl_opt_DisplayHelp(stdout, go, 11, 2, 80);
203 
204       if (puts("\nOther expert options:")                                    < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
205       esl_opt_DisplayHelp(stdout, go, 12, 2, 80);
206       exit(0);
207     }
208 
209   if (esl_opt_ArgNumber(go)                 != 2)    { if (puts("Incorrect number of command line arguments.")      < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
210   if ((*ret_qfile  = esl_opt_GetArg(go, 1)) == NULL) { if (puts("Failed to get <seqfile> argument on command line") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
211   if ((*ret_dbfile = esl_opt_GetArg(go, 2)) == NULL) { if (puts("Failed to get <seqdb> argument on command line")   < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); goto FAILURE; }
212 
213   /* Validate any attempted use of stdin streams */
214   if (strcmp(*ret_qfile, "-") == 0 && strcmp(*ret_dbfile, "-") == 0)
215     { if (puts("Either <seqfile> or <seqdb> may be '-' (to read from stdin), but not both.") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");  goto FAILURE; }
216 
217   *ret_go = go;
218   return eslOK;
219 
220  FAILURE:  /* all errors handled here are user errors, so be polite.  */
221   esl_usage(stdout, argv[0], usage);
222   if (puts("\nwhere basic options are:")                                       < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
223   esl_opt_DisplayHelp(stdout, go, 1, 2, 120); /* 1= group; 2 = indentation; 120=textwidth*/
224   if (printf("\nTo see more help on available options, do %s -h\n\n", argv[0]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed");
225   esl_getopts_Destroy(go);
226   exit(1);
227 
228  ERROR:
229   if (go) esl_getopts_Destroy(go);
230   exit(status);
231 }
232 
233 
234 static int
output_header(FILE * ofp,ESL_GETOPTS * go,char * qfile,char * dbfile)235 output_header(FILE *ofp, ESL_GETOPTS *go, char *qfile, char *dbfile)
236 {
237   p7_banner(ofp, go->argv[0], banner);
238 
239   if (fprintf(ofp, "# query sequence file:             %s\n", qfile)                                                                                 < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
240   if (fprintf(ofp, "# target sequence database:        %s\n", dbfile)                                                                                < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
241   if (esl_opt_IsUsed(go, "-o")          && fprintf(ofp, "# output directed to file:         %s\n",             esl_opt_GetString(go, "-o"))          < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
242   if (esl_opt_IsUsed(go, "-A")          && fprintf(ofp, "# MSA of hits saved to file:       %s\n",             esl_opt_GetString(go, "-A"))          < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
243   if (esl_opt_IsUsed(go, "--tblout")    && fprintf(ofp, "# per-seq hits tabular output:     %s\n",             esl_opt_GetString(go, "--tblout"))    < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
244   if (esl_opt_IsUsed(go, "--domtblout") && fprintf(ofp, "# per-dom hits tabular output:     %s\n",             esl_opt_GetString(go, "--domtblout")) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
245   if (esl_opt_IsUsed(go, "--pfamtblout")&& fprintf(ofp, "# pfam-style tabular hit output:   %s\n",             esl_opt_GetString(go, "--pfamtblout")) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
246   if (esl_opt_IsUsed(go, "--acc")       && fprintf(ofp, "# prefer accessions over names:    yes\n")                                                  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
247   if (esl_opt_IsUsed(go, "--noali")     && fprintf(ofp, "# show alignments in output:       no\n")                                                   < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
248   if (esl_opt_IsUsed(go, "--notextw")   && fprintf(ofp, "# max ASCII text line length:      unlimited\n")                                            < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
249   if (esl_opt_IsUsed(go, "--textw")     && fprintf(ofp, "# max ASCII text line length:      %d\n",             esl_opt_GetInteger(go, "--textw"))    < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
250   if (esl_opt_IsUsed(go, "--popen")     && fprintf(ofp, "# gap open probability:            %f\n",             esl_opt_GetReal  (go, "--popen"))     < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
251   if (esl_opt_IsUsed(go, "--pextend")   && fprintf(ofp, "# gap extend probability:          %f\n",             esl_opt_GetReal  (go, "--pextend"))   < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
252   if (esl_opt_IsUsed(go, "--mx")        && fprintf(ofp, "# subst score matrix (built-in):   %s\n",             esl_opt_GetString(go, "--mx"))        < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
253   if (esl_opt_IsUsed(go, "--mxfile")    && fprintf(ofp, "# subst score matrix (file):       %s\n",             esl_opt_GetString(go, "--mxfile"))    < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
254   if (esl_opt_IsUsed(go, "-E")          && fprintf(ofp, "# sequence reporting threshold:    E-value <= %g\n",  esl_opt_GetReal(go, "-E"))            < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
255   if (esl_opt_IsUsed(go, "-T")          && fprintf(ofp, "# sequence reporting threshold:    score >= %g\n",    esl_opt_GetReal(go, "-T"))            < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
256   if (esl_opt_IsUsed(go, "--domE")      && fprintf(ofp, "# domain reporting threshold:      E-value <= %g\n",  esl_opt_GetReal(go, "--domE"))        < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
257   if (esl_opt_IsUsed(go, "--domT")      && fprintf(ofp, "# domain reporting threshold:      score >= %g\n",    esl_opt_GetReal(go, "--domT"))        < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
258   if (esl_opt_IsUsed(go, "--incE")      && fprintf(ofp, "# sequence inclusion threshold:    E-value <= %g\n",  esl_opt_GetReal(go, "--incE"))        < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
259   if (esl_opt_IsUsed(go, "--incT")      && fprintf(ofp, "# sequence inclusion threshold:    score >= %g\n",    esl_opt_GetReal(go, "--incT"))        < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
260   if (esl_opt_IsUsed(go, "--incdomE")   && fprintf(ofp, "# domain inclusion threshold:      E-value <= %g\n",  esl_opt_GetReal(go, "--incdomE"))     < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
261   if (esl_opt_IsUsed(go, "--incdomT")   && fprintf(ofp, "# domain inclusion threshold:      score >= %g\n",    esl_opt_GetReal(go, "--incdomT"))     < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
262 //if (esl_opt_IsUsed(go, "--cut_ga")    && fprintf(ofp, "# model-specific thresholding:     GA cutoffs\n")                                           < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
263 //if (esl_opt_IsUsed(go, "--cut_nc")    && fprintf(ofp, "# model-specific thresholding:     NC cutoffs\n")                                           < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
264 //if (esl_opt_IsUsed(go, "--cut_tc")    && fprintf(ofp, "# model-specific thresholding:     TC cutoffs\n")                                           < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
265   if (esl_opt_IsUsed(go, "--max")       && fprintf(ofp, "# Max sensitivity mode:            on [all heuristic filters off]\n")                       < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
266   if (esl_opt_IsUsed(go, "--F1")        && fprintf(ofp, "# MSV filter P threshold:       <= %g\n",             esl_opt_GetReal(go, "--F1"))          < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
267   if (esl_opt_IsUsed(go, "--F2")        && fprintf(ofp, "# Vit filter P threshold:       <= %g\n",             esl_opt_GetReal(go, "--F2"))          < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
268   if (esl_opt_IsUsed(go, "--F3")        && fprintf(ofp, "# Fwd filter P threshold:       <= %g\n",             esl_opt_GetReal(go, "--F3"))          < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
269   if (esl_opt_IsUsed(go, "--nobias")    && fprintf(ofp, "# biased composition HMM filter:   off\n")                                                  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
270   if (esl_opt_IsUsed(go, "--restrictdb_stkey") && fprintf(ofp, "# Restrict db to start at seq key: %s\n",            esl_opt_GetString(go, "--restrictdb_stkey"))  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
271   if (esl_opt_IsUsed(go, "--restrictdb_n")     && fprintf(ofp, "# Restrict db to # target seqs:    %d\n",            esl_opt_GetInteger(go, "--restrictdb_n")) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
272   if (esl_opt_IsUsed(go, "--ssifile")          && fprintf(ofp, "# Override ssi file to:            %s\n",            esl_opt_GetString(go, "--ssifile"))       < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
273 
274   if (esl_opt_IsUsed(go, "--nonull2")   && fprintf(ofp, "# null2 bias corrections:          off\n")                                                  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
275   if (esl_opt_IsUsed(go, "--EmL")       && fprintf(ofp, "# seq length, MSV Gumbel mu fit:   %d\n",             esl_opt_GetInteger(go, "--EmL"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
276   if (esl_opt_IsUsed(go, "--EmN")       && fprintf(ofp, "# seq number, MSV Gumbel mu fit:   %d\n",             esl_opt_GetInteger(go, "--EmN"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
277   if (esl_opt_IsUsed(go, "--EvL")       && fprintf(ofp, "# seq length, Vit Gumbel mu fit:   %d\n",             esl_opt_GetInteger(go, "--EvL"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
278   if (esl_opt_IsUsed(go, "--EvN")       && fprintf(ofp, "# seq number, Vit Gumbel mu fit:   %d\n",             esl_opt_GetInteger(go, "--EvN"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
279   if (esl_opt_IsUsed(go, "--EfL")       && fprintf(ofp, "# seq length, Fwd exp tau fit:     %d\n",             esl_opt_GetInteger(go, "--EfL"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
280   if (esl_opt_IsUsed(go, "--EfN")       && fprintf(ofp, "# seq number, Fwd exp tau fit:     %d\n",             esl_opt_GetInteger(go, "--EfN"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
281   if (esl_opt_IsUsed(go, "--Eft")       && fprintf(ofp, "# tail mass for Fwd exp tau fit:   %f\n",             esl_opt_GetReal   (go, "--Eft"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
282   if (esl_opt_IsUsed(go, "-Z")          && fprintf(ofp, "# sequence search space set to:    %.0f\n",           esl_opt_GetReal(go, "-Z"))            < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
283   if (esl_opt_IsUsed(go, "--domZ")      && fprintf(ofp, "# domain search space set to:      %.0f\n",           esl_opt_GetReal(go, "--domZ"))        < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
284   if (esl_opt_IsUsed(go, "--seed"))  {
285     if (esl_opt_GetInteger(go, "--seed") == 0 && fprintf(ofp, "# random number seed:              one-time arbitrary\n")                             < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
286     else if (                                    fprintf(ofp, "# random number seed set to:       %d\n",      esl_opt_GetInteger(go, "--seed"))      < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
287   }
288   if (esl_opt_IsUsed(go, "--qformat")   && fprintf(ofp, "# query <seqfile> format asserted: %s\n",            esl_opt_GetString(go, "--qformat"))    < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
289   if (esl_opt_IsUsed(go, "--tformat")   && fprintf(ofp, "# target <seqdb> format asserted:  %s\n",            esl_opt_GetString(go, "--tformat"))    < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
290 #ifdef HMMER_THREADS
291   if (esl_opt_IsUsed(go, "--cpu")       && fprintf(ofp, "# number of worker threads:        %d\n",            esl_opt_GetInteger(go, "--cpu"))       < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
292 #endif
293 #ifdef HMMER_MPI
294   if (esl_opt_IsUsed(go, "--mpi")       && fprintf(ofp, "# MPI:                             on\n")                                                   < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
295 #endif
296   if (fprintf(ofp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n")                                                  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
297   return eslOK;
298 }
299 
300 
301 int
main(int argc,char ** argv)302 main(int argc, char **argv)
303 {
304   int              status   = eslOK;
305 
306   ESL_GETOPTS     *go  = NULL;	/* command line processing                 */
307   struct cfg_s     cfg;         /* configuration data                      */
308 
309   /* Set processor specific flags */
310   impl_Init();
311 
312   /* Initialize what we can in the config structure (without knowing the alphabet yet)
313    */
314   cfg.qfile      = NULL;
315   cfg.dbfile     = NULL;
316 
317   cfg.do_mpi     = FALSE;	           /* this gets reset below, if we init MPI */
318   cfg.nproc      = 0;		           /* this gets reset below, if we init MPI */
319   cfg.my_rank    = 0;		           /* this gets reset below, if we init MPI */
320   cfg.firstseq_key = NULL;
321   cfg.n_targetseq  = -1;
322 
323   /* Initializations */
324   p7_FLogsumInit();		/* we're going to use table-driven Logsum() approximations at times */
325   process_commandline(argc, argv, &go, &cfg.qfile, &cfg.dbfile);
326 
327 
328   /* is the range restricted? */
329   if (esl_opt_IsUsed(go, "--restrictdb_stkey") )
330     if ((cfg.firstseq_key = esl_opt_GetString(go, "--restrictdb_stkey")) == NULL)  p7_Fail("Failure capturing --restrictdb_stkey\n");
331 
332   if (esl_opt_IsUsed(go, "--restrictdb_n") )
333     cfg.n_targetseq = esl_opt_GetInteger(go, "--restrictdb_n");
334 
335   if ( cfg.n_targetseq != -1 && cfg.n_targetseq < 1 )
336     p7_Fail("--restrictdb_n must be >= 1\n");
337 
338   /* Figure out who we are, and send control there:
339    * we might be an MPI master, an MPI worker, or a serial program.
340    */
341 #ifdef HMMER_MPI
342   /* pause the execution of the programs execution until the user has a
343    * chance to attach with a debugger and send a signal to resume execution
344    * i.e. (gdb) signal SIGCONT
345    */
346   if (esl_opt_GetBoolean(go, "--stall")) pause();
347 
348   if (esl_opt_GetBoolean(go, "--mpi"))
349     {
350       cfg.do_mpi     = TRUE;
351       MPI_Init(&argc, &argv);
352       MPI_Comm_rank(MPI_COMM_WORLD, &(cfg.my_rank));
353       MPI_Comm_size(MPI_COMM_WORLD, &(cfg.nproc));
354 
355       if (cfg.my_rank > 0)  status = mpi_worker(go, &cfg);
356       else 		    status = mpi_master(go, &cfg);
357 
358       MPI_Finalize();
359     }
360   else
361 #endif /*HMMER_MPI*/
362     {
363       status = serial_master(go, &cfg);
364     }
365 
366   esl_getopts_Destroy(go);
367 
368   return status;
369 }
370 
371 /* serial_master()
372  * For each query sequence in <seqfile> search the database for hits.
373  *
374  * A master can only return if it's successful. All errors are handled
375  * immediately and fatally with p7_Fail(). Where we use the
376  * ESL_EXCEPTION mechanism and ERROR: block, it's for convenience; we
377  * know we're using a fatal exception handler.
378  */
379 static int
serial_master(ESL_GETOPTS * go,struct cfg_s * cfg)380 serial_master(ESL_GETOPTS *go, struct cfg_s *cfg)
381 {
382   FILE            *ofp      = stdout;             /* output file for results (default stdout)         */
383   FILE            *afp      = NULL;               /* alignment output file (-A option)                */
384   FILE            *tblfp    = NULL;		  /* output stream for tabular per-seq (--tblout)     */
385   FILE            *domtblfp = NULL;		  /* output stream for tabular per-seq (--domtblout)  */
386   FILE            *pfamtblfp= NULL;              /* output stream for pfam tabular output (--pfamtblout)    */
387   int              qformat  = eslSQFILE_UNKNOWN;  /* format of qfile                                  */
388   ESL_SQFILE      *qfp      = NULL;		  /* open qfile                                       */
389   ESL_SQ          *qsq      = NULL;               /* query sequence                                   */
390   int              dbformat = eslSQFILE_UNKNOWN;  /* format of dbfile                                 */
391   ESL_SQFILE      *dbfp     = NULL;               /* open dbfile                                      */
392   ESL_ALPHABET    *abc      = NULL;               /* sequence alphabet                                */
393   P7_BG           *bg       = NULL;		  /* null model (copies made of this into threads)    */
394   P7_BUILDER      *bld      = NULL;               /* HMM construction configuration                   */
395   ESL_STOPWATCH   *w        = NULL;               /* for timing                                       */
396   int              nquery   = 0;
397   int              seed;
398   int              textw;
399   int              status   = eslOK;
400   int              qstatus  = eslOK;
401   int              sstatus  = eslOK;
402   int              i;
403   int              ncpus    = 0;
404   int              infocnt  = 0;
405   WORKER_INFO     *info     = NULL;
406 #ifdef HMMER_THREADS
407   ESL_SQ_BLOCK    *block    = NULL;
408   ESL_THREADS     *threadObj= NULL;
409   ESL_WORK_QUEUE  *queue    = NULL;
410 #endif
411 
412   /* Initializations */
413   abc     = esl_alphabet_Create(eslAMINO);
414   w       = esl_stopwatch_Create();
415   textw   = (esl_opt_GetBoolean(go, "--notextw") ? 0 : esl_opt_GetInteger(go, "--textw"));
416   bg      = p7_bg_Create(abc);
417 
418   /* If caller declared input formats, decode them */
419   if (esl_opt_IsOn(go, "--qformat")) {
420     qformat = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--qformat"));
421     if (qformat == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--qformat"));
422   }
423   if (esl_opt_IsOn(go, "--tformat")) {
424     dbformat = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--tformat"));
425     if (dbformat == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized sequence database file format\n", esl_opt_GetString(go, "--tformat"));
426   }
427 
428   /* Initialize a default builder configuration,
429    * then set only the options we need for single sequence search
430    */
431   bld = p7_builder_Create(NULL, abc);
432   if ((seed = esl_opt_GetInteger(go, "--seed")) > 0)
433     {				/* a little wasteful - we're blowing a couple of usec by reinitializing */
434       esl_randomness_Init(bld->r, seed);
435       bld->do_reseeding = TRUE;
436     }
437   bld->EmL = esl_opt_GetInteger(go, "--EmL");
438   bld->EmN = esl_opt_GetInteger(go, "--EmN");
439   bld->EvL = esl_opt_GetInteger(go, "--EvL");
440   bld->EvN = esl_opt_GetInteger(go, "--EvN");
441   bld->EfL = esl_opt_GetInteger(go, "--EfL");
442   bld->EfN = esl_opt_GetInteger(go, "--EfN");
443   bld->Eft = esl_opt_GetReal   (go, "--Eft");
444 
445   /* Default is stored in the --mx option, so it's always IsOn(). Check --mxfile first; then go to the --mx option and the default. */
446   if (esl_opt_IsOn(go, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(go, "--mxfile"), NULL, esl_opt_GetReal(go, "--popen"), esl_opt_GetReal(go, "--pextend"), bg);
447   else                              status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(go, "--mx"),           esl_opt_GetReal(go, "--popen"), esl_opt_GetReal(go, "--pextend"), bg);
448   if (status != eslOK) p7_Fail("Failed to set single query seq score system:\n%s\n", bld->errbuf);
449 
450   /* Open results output files */
451   if (esl_opt_IsOn(go, "-o"))          { if ((ofp      = fopen(esl_opt_GetString(go, "-o"),          "w")) == NULL)  p7_Fail("Failed to open output file %s for writing\n",                 esl_opt_GetString(go, "-o")); }
452   if (esl_opt_IsOn(go, "-A"))          { if ((afp      = fopen(esl_opt_GetString(go, "-A"),          "w")) == NULL)  p7_Fail("Failed to open alignment output file %s for writing\n",       esl_opt_GetString(go, "-A")); }
453   if (esl_opt_IsOn(go, "--tblout"))    { if ((tblfp    = fopen(esl_opt_GetString(go, "--tblout"),    "w")) == NULL)  p7_Fail("Failed to open tabular per-seq output file %s for writing\n", esl_opt_GetString(go, "--tblfp")); }
454   if (esl_opt_IsOn(go, "--domtblout")) { if ((domtblfp = fopen(esl_opt_GetString(go, "--domtblout"), "w")) == NULL)  p7_Fail("Failed to open tabular per-dom output file %s for writing\n", esl_opt_GetString(go, "--domtblfp")); }
455   if (esl_opt_IsOn(go, "--pfamtblout")){ if ((pfamtblfp = fopen(esl_opt_GetString(go, "--pfamtblout"), "w")) == NULL)  esl_fatal("Failed to open pfam-style tabular output file %s for writing\n", esl_opt_GetString(go, "--pfamtblout")); }
456 
457   /* Open the target sequence database for sequential access. */
458   status =  esl_sqfile_OpenDigital(abc, cfg->dbfile, dbformat, p7_SEQDBENV, &dbfp);
459   if      (status == eslENOTFOUND) p7_Fail("Failed to open target sequence database %s for reading\n",      cfg->dbfile);
460   else if (status == eslEFORMAT)   p7_Fail("Target sequence database file %s is empty or misformatted\n",   cfg->dbfile);
461   else if (status == eslEINVAL)    p7_Fail("Can't autodetect format of a stdin or .gz seqfile");
462   else if (status != eslOK)        p7_Fail("Unexpected error %d opening target sequence database file %s\n", status, cfg->dbfile);
463 
464 
465   if (esl_opt_IsUsed(go, "--restrictdb_stkey") || esl_opt_IsUsed(go, "--restrictdb_n")) {
466     if (esl_opt_IsUsed(go, "--ssifile"))
467       esl_sqfile_OpenSSI(dbfp, esl_opt_GetString(go, "--ssifile"));
468     else
469       esl_sqfile_OpenSSI(dbfp, NULL);
470   }
471 
472 
473   /* Open the query sequence file  */
474   status = esl_sqfile_OpenDigital(abc, cfg->qfile, qformat, NULL, &qfp);
475   if      (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n",      cfg->qfile);
476   else if (status == eslEFORMAT)   p7_Fail("Sequence file %s is empty or misformatted\n",        cfg->qfile);
477   else if (status == eslEINVAL)    p7_Fail("Can't autodetect format of a stdin or .gz seqfile");
478   else if (status != eslOK)        p7_Fail ("Unexpected error %d opening sequence file %s\n", status, cfg->qfile);
479   qsq  = esl_sq_CreateDigital(abc);
480 
481 #ifdef HMMER_THREADS
482   /* initialize thread data */
483   ncpus = ESL_MIN( esl_opt_GetInteger(go, "--cpu"), esl_threads_GetCPUCount());
484   if (ncpus > 0)
485     {
486       threadObj = esl_threads_Create(&pipeline_thread);
487       queue = esl_workqueue_Create(ncpus * 2);
488     }
489 #endif
490 
491   infocnt = (ncpus == 0) ? 1 : ncpus;
492   ESL_ALLOC(info, sizeof(*info) * infocnt);
493 
494   /* Show header output */
495   output_header(ofp, go, cfg->qfile, cfg->dbfile);
496 
497   for (i = 0; i < infocnt; ++i)
498     {
499       info[i].pli   = NULL;
500       info[i].th    = NULL;
501       info[i].om    = NULL;
502       info[i].bg    = p7_bg_Clone(bg);
503 #ifdef HMMER_THREADS
504       info[i].queue = queue;
505 #endif
506     }
507 
508 #ifdef HMMER_THREADS
509   for (i = 0; i < ncpus * 2; ++i)
510     {
511       block = esl_sq_CreateDigitalBlock(BLOCK_SIZE, abc);
512       if (block == NULL)
513 	{
514 	  p7_Fail("Failed to allocate sequence block");
515 	}
516 
517       status = esl_workqueue_Init(queue, block);
518       if (status != eslOK)
519 	{
520 	  p7_Fail("Failed to add block to work queue");
521 	}
522     }
523 #endif
524 
525   /* Outer loop over sequence queries */
526   while ((qstatus = esl_sqio_Read(qfp, qsq)) == eslOK)
527     {
528       P7_OPROFILE     *om       = NULL;           /* optimized query profile                  */
529 
530       nquery++;
531       if (qsq->n == 0) continue; /* skip zero length seqs as if they aren't even present */
532 
533       esl_stopwatch_Start(w);
534 
535       /* seqfile may need to be rewound (multiquery mode) */
536       if (nquery > 1)
537       {
538         if (! esl_sqfile_IsRewindable(dbfp)) p7_Fail("Target sequence file %s isn't rewindable; can't search it with multiple queries", cfg->dbfile);
539 
540         if ( cfg->firstseq_key == NULL )
541           esl_sqfile_Position(dbfp, 0); //only re-set current position to 0 if we're not planning to set it in a moment
542       }
543 
544       if ( cfg->firstseq_key != NULL ) { //it's tempting to want to do this once and capture the offset position for future passes, but ncbi files make this non-trivial, so this keeps it general
545         sstatus = esl_sqfile_PositionByKey(dbfp, cfg->firstseq_key);
546         if (sstatus != eslOK)
547           p7_Fail("Failure setting restrictdb_stkey to %d\n", cfg->firstseq_key);
548       }
549 
550 
551       if (fprintf(ofp, "Query:       %s  [L=%ld]\n", qsq->name, (long) qsq->n) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
552       if (qsq->acc[0]  != '\0' && fprintf(ofp, "Accession:   %s\n", qsq->acc)  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
553       if (qsq->desc[0] != '\0' && fprintf(ofp, "Description: %s\n", qsq->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
554 
555 
556       /* Build the model */
557       p7_SingleBuilder(bld, qsq, info[0].bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */
558 
559       for (i = 0; i < infocnt; ++i)
560       {
561         /* Create processing pipeline and hit list */
562         info[i].th  = p7_tophits_Create();
563         info[i].om  = p7_oprofile_Clone(om);
564         info[i].pli = p7_pipeline_Create(go, om->M, 100, FALSE, p7_SEARCH_SEQS); /* L_hint = 100 is just a dummy for now */
565         p7_pli_NewModel(info[i].pli, info[i].om, info[i].bg);
566 
567 #ifdef HMMER_THREADS
568         if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]);
569 #endif
570       }
571 
572 #ifdef HMMER_THREADS
573       if (ncpus > 0) sstatus = thread_loop(threadObj, queue, dbfp, cfg->n_targetseq);
574       else           sstatus = serial_loop(info, dbfp, cfg->n_targetseq);
575 #else
576       sstatus = serial_loop(info, dbfp, cfg->n_targetseq);
577 #endif
578       switch(sstatus)
579       {
580       case eslEFORMAT:
581         p7_Fail("Parse failed (sequence file %s):\n%s\n",
582             dbfp->filename, esl_sqfile_GetErrorBuf(dbfp));
583         break;
584       case eslEOF:
585         /* do nothing */
586         break;
587       default:
588         p7_Fail("Unexpected error %d reading sequence file %s",
589             sstatus, dbfp->filename);
590       }
591 
592 
593       /* merge the results of the search results */
594       for (i = 1; i < infocnt; ++i)
595       {
596         p7_tophits_Merge(info[0].th, info[i].th);
597         p7_pipeline_Merge(info[0].pli, info[i].pli);
598 
599         p7_pipeline_Destroy(info[i].pli);
600         p7_tophits_Destroy(info[i].th);
601         p7_oprofile_Destroy(info[i].om);
602       }
603 
604       /* Print the results.  */
605       p7_tophits_SortBySortkey(info->th);
606       p7_tophits_Threshold(info->th, info->pli);
607       p7_tophits_Targets(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
608       p7_tophits_Domains(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
609 
610       if (tblfp)     p7_tophits_TabularTargets(tblfp,    qsq->name, qsq->acc, info->th, info->pli, (nquery == 1));
611       if (domtblfp)  p7_tophits_TabularDomains(domtblfp, qsq->name, qsq->acc, info->th, info->pli, (nquery == 1));
612       if (pfamtblfp) p7_tophits_TabularXfam(pfamtblfp, qsq->name, qsq->acc, info->th, info->pli);
613 
614       esl_stopwatch_Stop(w);
615       p7_pli_Statistics(ofp, info->pli, w);
616       if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
617       fflush(ofp);
618 
619       /* Output the results in an MSA (-A option) */
620       if (afp) {
621 	ESL_MSA *msa = NULL;
622 
623 	if ( p7_tophits_Alignment(info->th, abc, NULL, NULL, 0, p7_ALL_CONSENSUS_COLS, &msa) == eslOK)
624 	  {
625 	    esl_msa_SetName     (msa, om->name, -1);   // don't use qsq->name; it's optional in a ESL_SQ, and SingleBuilder took care of naming model.
626 	    if (qsq->acc[0]  != '\0') esl_msa_SetAccession(msa, qsq->acc,  -1);
627 	    if (qsq->desc[0] != '\0') esl_msa_SetDesc     (msa, qsq->desc, -1);
628 	    esl_msa_FormatAuthor(msa, "phmmer (HMMER %s)", HMMER_VERSION);
629 
630 	    if (textw > 0) esl_msafile_Write(afp, msa, eslMSAFILE_STOCKHOLM);
631 	    else           esl_msafile_Write(afp, msa, eslMSAFILE_PFAM);
632 
633 	    if (fprintf(ofp, "# Alignment of %d hits satisfying inclusion thresholds saved to: %s\n", msa->nseq, esl_opt_GetString(go, "-A")) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
634 	  }
635 	else if (fprintf(ofp, "# No hits satisfy inclusion thresholds; no alignment saved\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
636 
637 	esl_msa_Destroy(msa);
638       }
639 
640       p7_tophits_Destroy(info->th);
641       p7_pipeline_Destroy(info->pli);
642       p7_oprofile_Destroy(info->om);
643       p7_oprofile_Destroy(om);
644       esl_sq_Reuse(qsq);
645     } /* end outer loop over query sequences */
646   if      (qstatus == eslEFORMAT) p7_Fail("Parse failed (sequence file %s):\n%s\n",
647 					    qfp->filename, esl_sqfile_GetErrorBuf(qfp));
648   else if (qstatus != eslEOF)     p7_Fail("Unexpected error %d reading sequence file %s",
649 					    qstatus, qfp->filename);
650 
651 
652   /* Terminate outputs - any last words?
653    */
654   if (tblfp)     p7_tophits_TabularTail(tblfp,    "phmmer", p7_SEARCH_SEQS, cfg->qfile, cfg->dbfile, go);
655   if (domtblfp)  p7_tophits_TabularTail(domtblfp, "phmmer", p7_SEARCH_SEQS, cfg->qfile, cfg->dbfile, go);
656   if (pfamtblfp) p7_tophits_TabularTail(pfamtblfp,"phmmer", p7_SEARCH_SEQS, cfg->qfile, cfg->dbfile, go);
657   if (ofp)    { if (fprintf(ofp, "[ok]\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); }
658 
659   /* Cleanup - prepare for successful exit
660    */
661   for (i = 0; i < infocnt; ++i)
662     p7_bg_Destroy(info[i].bg);
663 
664 #ifdef HMMER_THREADS
665   if (ncpus > 0)
666     {
667       esl_workqueue_Reset(queue);
668       while (esl_workqueue_Remove(queue, (void **) &block) == eslOK)
669 	esl_sq_DestroyBlock(block);
670       esl_workqueue_Destroy(queue);
671       esl_threads_Destroy(threadObj);
672     }
673 #endif
674 
675   free(info);
676   esl_sqfile_Close(dbfp);
677   esl_sqfile_Close(qfp);
678   esl_stopwatch_Destroy(w);
679   esl_sq_Destroy(qsq);
680   p7_bg_Destroy(bg);
681   p7_builder_Destroy(bld);
682   esl_alphabet_Destroy(abc);
683 
684   if (ofp      != stdout) fclose(ofp);
685   if (afp      != NULL)   fclose(afp);
686   if (tblfp    != NULL)   fclose(tblfp);
687   if (domtblfp != NULL)   fclose(domtblfp);
688   if (pfamtblfp)     fclose(pfamtblfp);
689   return eslOK;
690 
691  ERROR:
692   return status;
693 }
694 
695 #ifdef HMMER_MPI
696 
697 /* Define common tags used by the MPI master/slave processes */
698 #define HMMER_ERROR_TAG          1
699 #define HMMER_HMM_TAG            2
700 #define HMMER_SEQUENCE_TAG       3
701 #define HMMER_BLOCK_TAG          4
702 #define HMMER_PIPELINE_TAG       5
703 #define HMMER_TOPHITS_TAG        6
704 #define HMMER_HIT_TAG            7
705 #define HMMER_TERMINATING_TAG    8
706 #define HMMER_READY_TAG          9
707 
708 /* mpi_failure()
709  * Generate an error message.  If the clients rank is not 0, a
710  * message is created with the error message and sent to the
711  * master process for handling.
712  */
713 static void
mpi_failure(char * format,...)714 mpi_failure(char *format, ...)
715 {
716   va_list  argp;
717   int      status = eslFAIL;
718   int      len;
719   int      rank;
720   char     str[512];
721 
722   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
723 
724   /* format the error mesg */
725   va_start(argp, format);
726   len = vsnprintf(str, sizeof(str), format, argp);
727   va_end(argp);
728 
729   /* make sure the error string is terminated */
730   str[sizeof(str)-1] = '\0';
731 
732   /* if the caller is the master, print the results and abort */
733   if (rank == 0)
734     {
735       if (fprintf(stderr, "\nError: ") < 0) exit(eslEWRITE);
736       if (fprintf(stderr, "%s", str)   < 0) exit(eslEWRITE);
737       if (fprintf(stderr, "\n")        < 0) exit(eslEWRITE);
738       fflush(stderr);
739 
740       MPI_Abort(MPI_COMM_WORLD, status);
741       exit(1);
742     }
743   else
744     {
745       MPI_Send(str, len, MPI_CHAR, 0, HMMER_ERROR_TAG, MPI_COMM_WORLD);
746       pause();
747     }
748 }
749 
750 #define MAX_BLOCK_SIZE (512*1024)
751 
752 typedef struct {
753   uint64_t  offset;
754   uint64_t  length;
755   uint64_t  count;
756 } SEQ_BLOCK;
757 
758 typedef struct {
759   int        complete;
760   int        size;
761   int        current;
762   int        last;
763   SEQ_BLOCK *blocks;
764 } BLOCK_LIST;
765 
766 /* this routine parses the database keeping track of the blocks
767  * offset within the file, number of sequences and the length
768  * of the block.  These blocks are passed as work units to the
769  * MPI workers.  If multiple hmm's are in the query file, the
770  * blocks are reused without parsing the database a second time.
771  */
next_block(ESL_SQFILE * sqfp,ESL_SQ * sq,BLOCK_LIST * list,SEQ_BLOCK * block,int n_targetseqs)772 int next_block(ESL_SQFILE *sqfp, ESL_SQ *sq, BLOCK_LIST *list, SEQ_BLOCK *block, int n_targetseqs)
773 {
774   int      status   = eslOK;
775 
776   /* if the list has been calculated, use it instead of parsing the database */
777   if (list->complete)
778   {
779       if (list->current == list->last)
780       {
781         block->offset = 0;
782         block->length = 0;
783         block->count  = 0;
784 
785         status = eslEOF;
786       }
787       else
788       {
789         int inx = list->current++;
790 
791         block->offset = list->blocks[inx].offset;
792         block->length = list->blocks[inx].length;
793         block->count  = list->blocks[inx].count;
794 
795         status = eslOK;
796       }
797 
798       return status;
799   }
800 
801   block->offset = 0;
802   block->length = 0;
803   block->count = 0;
804 
805   esl_sq_Reuse(sq);
806   if (n_targetseqs == 0) status = eslEOF; //this is to handle the end-case of a restrictdb scenario, where no more targets are required, and we want to mark the list as complete
807   while (block->length < MAX_BLOCK_SIZE && (n_targetseqs < 0 || block->count < n_targetseqs) && (status = esl_sqio_ReadInfo(sqfp, sq)) == eslOK)
808     {
809       if (block->count == 0) block->offset = sq->roff;
810       block->length = sq->eoff - block->offset + 1;
811       block->count++;
812       esl_sq_Reuse(sq);
813     }
814 
815   if (block->count > 0)
816     if (status == eslEOF || block->count == n_targetseqs)
817       status = eslOK;
818   if (status == eslEOF) list->complete = 1;
819 
820   /* add the block to the list of known blocks */
821   if (status == eslOK)
822     {
823       int inx;
824 
825       if (list->last >= list->size)
826 	{
827 	  void *tmp;
828 	  list->size += 500;
829 	  ESL_RALLOC(list->blocks, tmp, sizeof(SEQ_BLOCK) * list->size);
830 	}
831 
832       inx = list->last++;
833       list->blocks[inx].offset = block->offset;
834       list->blocks[inx].length = block->length;
835       list->blocks[inx].count  = block->count;
836     }
837 
838   return status;
839 
840  ERROR:
841   return eslEMEM;
842 }
843 
844 /* mpi_master()
845  * The MPI version of hmmbuild.
846  * Follows standard pattern for a master/worker load-balanced MPI program (J1/78-79).
847  *
848  * A master can only return if it's successful.
849  * Where we use EXCEPTION mechanism and ERROR block, it's for convenience;
850  * we know we're using a fatal exception handler.
851  *
852  * Errors in an MPI master come in two classes: recoverable and nonrecoverable.
853  *
854  * Recoverable errors include all worker-side errors, and any
855  * master-side error that do not affect MPI communication. Error
856  * messages from recoverable messages are delayed until we've cleanly
857  * shut down the workers.
858  *
859  * Unrecoverable errors are master-side errors that may affect MPI
860  * communication, meaning we cannot count on being able to reach the
861  * workers and shut them down. Unrecoverable errors result in immediate
862  * p7_Fail()'s, which will cause MPI to shut down the worker processes
863  * uncleanly.
864  */
865 static int
mpi_master(ESL_GETOPTS * go,struct cfg_s * cfg)866 mpi_master(ESL_GETOPTS *go, struct cfg_s *cfg)
867 {
868   FILE            *ofp      = stdout;             /* output file for results (default stdout)         */
869   FILE            *afp      = NULL;               /* alignment output file (-A option)                */
870   FILE            *tblfp    = NULL;		  /* output stream for tabular per-seq (--tblout)     */
871   FILE            *domtblfp = NULL;		  /* output stream for tabular per-seq (--domtblout)  */
872   FILE            *pfamtblfp= NULL;              /* output stream for pfam-style tabular output  (--pfamtblout) */
873   int              qformat  = eslSQFILE_UNKNOWN;  /* format of qfile                                  */
874   P7_BG           *bg       = NULL;	          /* null model                                      */
875   ESL_SQFILE      *qfp      = NULL;		  /* open qfile                                       */
876   ESL_SQ          *qsq      = NULL;               /* query sequence                                   */
877   int              dbformat = eslSQFILE_UNKNOWN;  /* format of dbfile                                 */
878   ESL_SQFILE      *dbfp     = NULL;               /* open dbfile                                      */
879   ESL_SQ          *dbsq     = NULL;               /* target sequence                                  */
880   ESL_ALPHABET    *abc      = NULL;               /* sequence alphabet                                */
881   P7_BUILDER      *bld      = NULL;               /* HMM construction configuration                   */
882   ESL_STOPWATCH   *w        = NULL;               /* for timing                                       */
883   int              nquery   = 0;
884   int              seed;
885   int              textw;
886   int              status   = eslOK;
887   int              qstatus  = eslOK;
888   int              sstatus  = eslOK;
889   int              dest;
890 
891   char            *mpi_buf  = NULL;               /* buffer used to pack/unpack structures            */
892   int              mpi_size = 0;                  /* size of the allocated buffer                     */
893   BLOCK_LIST      *list     = NULL;
894   SEQ_BLOCK        block;
895 
896   int              i;
897   int              size;
898   MPI_Status       mpistatus;
899 
900   int              n_targets;
901 
902   /* Initializations */
903   abc     = esl_alphabet_Create(eslAMINO);
904   w       = esl_stopwatch_Create();
905   if (esl_opt_GetBoolean(go, "--notextw")) textw = 0;
906   else                                     textw = esl_opt_GetInteger(go, "--textw");
907   esl_stopwatch_Start(w);
908 
909   /* If caller declared input formats, decode them */
910   if (esl_opt_IsOn(go, "--qformat")) {
911     qformat = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--qformat"));
912     if (qformat == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--qformat"));
913   }
914   if (esl_opt_IsOn(go, "--tformat")) {
915     dbformat = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--tformat"));
916     if (dbformat == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized sequence database file format\n", esl_opt_GetString(go, "--tformat"));
917   }
918 
919   bg = p7_bg_Create(abc);
920 
921   /* Initialize a default builder configuration,
922    * then set only the options we need for single sequence search
923    */
924   bld = p7_builder_Create(NULL, abc);
925   if ((seed = esl_opt_GetInteger(go, "--seed")) > 0)
926     {				/* a little wasteful - we're blowing a couple of usec by reinitializing */
927       esl_randomness_Init(bld->r, seed);
928       bld->do_reseeding = TRUE;
929     }
930   bld->EmL = esl_opt_GetInteger(go, "--EmL");
931   bld->EmN = esl_opt_GetInteger(go, "--EmN");
932   bld->EvL = esl_opt_GetInteger(go, "--EvL");
933   bld->EvN = esl_opt_GetInteger(go, "--EvN");
934   bld->EfL = esl_opt_GetInteger(go, "--EfL");
935   bld->EfN = esl_opt_GetInteger(go, "--EfN");
936   bld->Eft = esl_opt_GetReal   (go, "--Eft");
937 
938   if (esl_opt_IsOn(go, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(go, "--mxfile"), NULL, esl_opt_GetReal(go, "--popen"), esl_opt_GetReal(go, "--pextend"), bg);
939   else                              status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(go, "--mx"),           esl_opt_GetReal(go, "--popen"), esl_opt_GetReal(go, "--pextend"), bg);
940   if (status != eslOK) mpi_failure("Failed to set single query seq score system:\n%s\n", bld->errbuf);
941 
942   /* Open results output files */
943   if (esl_opt_IsOn(go, "-o")          && (ofp      = fopen(esl_opt_GetString(go, "-o"),          "w")) == NULL)
944     mpi_failure("Failed to open output file %s for writing\n",                 esl_opt_GetString(go, "-o"));
945   if (esl_opt_IsOn(go, "-A")          && (afp      = fopen(esl_opt_GetString(go, "-A"),          "w")) == NULL)
946     mpi_failure("Failed to open alignment output file %s for writing\n",       esl_opt_GetString(go, "-A"));
947   if (esl_opt_IsOn(go, "--tblout")    && (tblfp    = fopen(esl_opt_GetString(go, "--tblout"),    "w")) == NULL)
948     mpi_failure("Failed to open tabular per-seq output file %s for writing\n", esl_opt_GetString(go, "--tblfp"));
949   if (esl_opt_IsOn(go, "--domtblout") && (domtblfp = fopen(esl_opt_GetString(go, "--domtblout"), "w")) == NULL)
950     mpi_failure("Failed to open tabular per-dom output file %s for writing\n", esl_opt_GetString(go, "--domtblfp"));
951   if (esl_opt_IsOn(go, "--pfamtblout") && (pfamtblfp = fopen(esl_opt_GetString(go, "--pfamtblout"), "w")) == NULL)
952     mpi_failure("Failed to open pfam-style tabular output file %s for writing\n", esl_opt_GetString(go, "--pfamtblout"));
953 
954   /* Open the target sequence database for sequential access. */
955   status =  esl_sqfile_OpenDigital(abc, cfg->dbfile, dbformat, p7_SEQDBENV, &dbfp);
956   if      (status == eslENOTFOUND) mpi_failure("Failed to open target sequence database %s for reading\n",      cfg->dbfile);
957   else if (status == eslEFORMAT)   mpi_failure("Target sequence database file %s is empty or misformatted\n",   cfg->dbfile);
958   else if (status == eslEINVAL)    mpi_failure("Can't autodetect format of a stdin or .gz seqfile");
959   else if (status != eslOK)        mpi_failure("Unexpected error %d opening target sequence database file %s\n", status, cfg->dbfile);
960   dbsq = esl_sq_CreateDigital(abc);
961 
962   if (esl_opt_IsUsed(go, "--restrictdb_stkey") || esl_opt_IsUsed(go, "--restrictdb_n")) {
963       if (esl_opt_IsUsed(go, "--ssifile"))
964         esl_sqfile_OpenSSI(dbfp, esl_opt_GetString(go, "--ssifile"));
965       else
966         esl_sqfile_OpenSSI(dbfp, NULL);
967   }
968 
969 
970 
971   /* Open the query sequence file  */
972   status = esl_sqfile_OpenDigital(abc, cfg->qfile, qformat, NULL, &qfp);
973   if      (status == eslENOTFOUND) mpi_failure("Failed to open sequence file %s for reading\n",      cfg->qfile);
974   else if (status == eslEFORMAT)   mpi_failure("Sequence file %s is empty or misformatted\n",        cfg->qfile);
975   else if (status == eslEINVAL)    mpi_failure("Can't autodetect format of a stdin or .gz seqfile");
976   else if (status != eslOK)        mpi_failure ("Unexpected error %d opening sequence file %s\n", status, cfg->qfile);
977   qsq  = esl_sq_CreateDigital(abc);
978 
979   ESL_ALLOC(list, sizeof(SEQ_BLOCK));
980   list->complete = 0;
981   list->size     = 0;
982   list->current  = 0;
983   list->last     = 0;
984   list->blocks   = NULL;
985 
986 
987   /* Show header output */
988   output_header(ofp, go, cfg->qfile, cfg->dbfile);
989 
990   if ( cfg->firstseq_key != NULL ) { //it's tempting to want to do this once and capture the offset position for future passes, but ncbi files make this non-trivial, so this keeps it general
991     sstatus = esl_sqfile_PositionByKey(dbfp, cfg->firstseq_key);
992     if (sstatus != eslOK)
993       p7_Fail("Failure setting restrictdb_stkey to %d\n", cfg->firstseq_key);
994   }
995 
996 
997   /* Outer loop over sequence queries */
998   while ((qstatus = esl_sqio_Read(qfp, qsq)) == eslOK)
999     {
1000       P7_PIPELINE     *pli      = NULL;		  /* processing pipeline                      */
1001       P7_TOPHITS      *th       = NULL;        	  /* top-scoring sequence hits                */
1002       P7_OPROFILE     *om       = NULL;           /* optimized query profile                  */
1003       int              seq_cnt  = 0;
1004 
1005       nquery++;
1006       if (qsq->n == 0) continue; /* skip zero length seqs as if they aren't even present */
1007 
1008       esl_stopwatch_Start(w);
1009 
1010       n_targets = cfg->n_targetseq;
1011 
1012       /* seqfile may need to be rewound (multiquery mode) */
1013       if (nquery > 1) list->current = 0;
1014 
1015       if (fprintf(ofp, "Query:       %s  [L=%ld]\n", qsq->name, (long) qsq->n) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1016       if (qsq->acc[0]  != '\0' && fprintf(ofp, "Accession:   %s\n", qsq->acc)  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1017       if (qsq->desc[0] != '\0' && fprintf(ofp, "Description: %s\n", qsq->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1018 
1019       /* Build the model */
1020       p7_SingleBuilder(bld, qsq, bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */
1021 
1022       /* Create processing pipeline and hit list */
1023       th  = p7_tophits_Create();
1024       pli = p7_pipeline_Create(go, om->M, 100, FALSE, p7_SEARCH_SEQS); /* L_hint = 100 is just a dummy for now */
1025       p7_pli_NewModel(pli, om, bg);
1026 
1027       /* Main loop: */
1028       while ((n_targets==-1 || seq_cnt<=n_targets) && (sstatus = next_block(dbfp, dbsq, list, &block, n_targets-seq_cnt)) == eslOK )
1029       {
1030         seq_cnt += block.count;
1031 
1032         if (MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &mpistatus) != 0)
1033           mpi_failure("MPI error %d receiving message from %d\n", mpistatus.MPI_SOURCE);
1034 
1035         MPI_Get_count(&mpistatus, MPI_PACKED, &size);
1036         if (mpi_buf == NULL || size > mpi_size) {
1037           void *tmp;
1038           ESL_RALLOC(mpi_buf, tmp, sizeof(char) * size);
1039           mpi_size = size;
1040         }
1041 
1042         dest = mpistatus.MPI_SOURCE;
1043         MPI_Recv(mpi_buf, size, MPI_PACKED, dest, mpistatus.MPI_TAG, MPI_COMM_WORLD, &mpistatus);
1044 
1045         if (mpistatus.MPI_TAG == HMMER_ERROR_TAG)
1046           mpi_failure("MPI client %d raised error:\n%s\n", dest, mpi_buf);
1047         if (mpistatus.MPI_TAG != HMMER_READY_TAG)
1048           mpi_failure("Unexpected tag %d from %d\n", mpistatus.MPI_TAG, dest);
1049 
1050         MPI_Send(&block, 3, MPI_LONG_LONG_INT, dest, HMMER_BLOCK_TAG, MPI_COMM_WORLD);
1051       }
1052 
1053       if (n_targets!=-1 && seq_cnt==n_targets)
1054         sstatus = eslEOF;
1055 
1056       switch(sstatus)
1057       {
1058       case eslEFORMAT:
1059         mpi_failure("Parse failed (sequence file %s):\n%s\n",
1060               dbfp->filename, esl_sqfile_GetErrorBuf(dbfp));
1061         break;
1062       case eslEOF:
1063         break;
1064       default:
1065         mpi_failure("Unexpected error %d reading sequence file %s", sstatus, dbfp->filename);
1066       }
1067 
1068       block.offset = 0;
1069       block.length = 0;
1070       block.count  = 0;
1071 
1072       /* wait for all workers to finish up their work blocks */
1073       for (i = 1; i < cfg->nproc; ++i)
1074 	{
1075 	  if (MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &mpistatus) != 0)
1076 	    mpi_failure("MPI error %d receiving message from %d\n", mpistatus.MPI_SOURCE);
1077 
1078 	  MPI_Get_count(&mpistatus, MPI_PACKED, &size);
1079 	  if (mpi_buf == NULL || size > mpi_size) {
1080 	    void *tmp;
1081 	    ESL_RALLOC(mpi_buf, tmp, sizeof(char) * size);
1082 	    mpi_size = size;
1083 	  }
1084 
1085 	  dest = mpistatus.MPI_SOURCE;
1086 	  MPI_Recv(mpi_buf, size, MPI_PACKED, dest, mpistatus.MPI_TAG, MPI_COMM_WORLD, &mpistatus);
1087 
1088 	  if (mpistatus.MPI_TAG == HMMER_ERROR_TAG)
1089 	    mpi_failure("MPI client %d raised error:\n%s\n", dest, mpi_buf);
1090 	  if (mpistatus.MPI_TAG != HMMER_READY_TAG)
1091 	    mpi_failure("Unexpected tag %d from %d\n", mpistatus.MPI_TAG, dest);
1092 	}
1093 
1094       /* merge the results of the search results */
1095       for (dest = 1; dest < cfg->nproc; ++dest)
1096 	{
1097 	  P7_PIPELINE     *mpi_pli   = NULL;
1098 	  P7_TOPHITS      *mpi_th    = NULL;
1099 
1100 	  /* send an empty block to signal the worker they are done */
1101 	  MPI_Send(&block, 3, MPI_LONG_LONG_INT, dest, HMMER_BLOCK_TAG, MPI_COMM_WORLD);
1102 
1103 	  /* wait for the results */
1104 	  if ((status = p7_tophits_MPIRecv(dest, HMMER_TOPHITS_TAG, MPI_COMM_WORLD, &mpi_buf, &mpi_size, &mpi_th)) != eslOK)
1105 	    mpi_failure("Unexpected error %d receiving tophits from %d", status, dest);
1106 
1107 	  if ((status = p7_pipeline_MPIRecv(dest, HMMER_PIPELINE_TAG, MPI_COMM_WORLD, &mpi_buf, &mpi_size, go, &mpi_pli)) != eslOK)
1108 	    mpi_failure("Unexpected error %d receiving pipeline from %d", status, dest);
1109 
1110 	  p7_tophits_Merge(th, mpi_th);
1111 	  p7_pipeline_Merge(pli, mpi_pli);
1112 
1113 	  p7_pipeline_Destroy(mpi_pli);
1114 	  p7_tophits_Destroy(mpi_th);
1115 	}
1116 
1117       /* Print the results.  */
1118       p7_tophits_SortBySortkey(th);
1119       p7_tophits_Threshold(th, pli);
1120       p7_tophits_Targets(ofp, th, pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1121       p7_tophits_Domains(ofp, th, pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1122 
1123       if (tblfp)     p7_tophits_TabularTargets(tblfp,    qsq->name, qsq->acc, th, pli, (nquery == 1));
1124       if (domtblfp)  p7_tophits_TabularDomains(domtblfp, qsq->name, qsq->acc, th, pli, (nquery == 1));
1125       if (pfamtblfp) p7_tophits_TabularXfam(pfamtblfp,  qsq->name, qsq->acc, th, pli);
1126 
1127       esl_stopwatch_Stop(w);
1128       p7_pli_Statistics(ofp, pli, w);
1129       if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1130 
1131       /* Output the results in an MSA (-A option) */
1132       if (afp) {
1133 	ESL_MSA *msa = NULL;
1134 
1135 	if ( p7_tophits_Alignment(th, abc, NULL, NULL, 0, p7_ALL_CONSENSUS_COLS, &msa) == eslOK)
1136 	  {
1137 	    esl_msa_SetName     (msa, om->name, -1);   // don't use qsq->name; it's optional in a ESL_SQ, and SingleBuilder took care of naming model.
1138 	    if (qsq->acc[0]  != '\0') esl_msa_SetAccession(msa, qsq->acc,  -1);
1139 	    if (qsq->desc[0] != '\0') esl_msa_SetDesc     (msa, qsq->desc, -1);
1140 	    esl_msa_FormatAuthor(msa, "phmmer (HMMER %s)", HMMER_VERSION);
1141 
1142 	    esl_msa_FormatAuthor(msa, "phmmer (HMMER %s)", HMMER_VERSION);
1143 
1144 	    if (textw > 0) esl_msafile_Write(afp, msa, eslMSAFILE_STOCKHOLM);
1145 	    else           esl_msafile_Write(afp, msa, eslMSAFILE_PFAM);
1146 
1147 	    if (fprintf(ofp, "# Alignment of %d hits satisfying inclusion thresholds saved to: %s\n", msa->nseq, esl_opt_GetString(go, "-A")) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed");
1148 	  }
1149 	else { if (fprintf(ofp, "# No hits satisfy inclusion thresholds; no alignment saved\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); }
1150 	esl_msa_Destroy(msa);
1151       }
1152 
1153       p7_tophits_Destroy(th);
1154       p7_pipeline_Destroy(pli);
1155       p7_oprofile_Destroy(om);
1156       esl_sq_Reuse(qsq);
1157     } /* end outer loop over query sequences */
1158   if      (qstatus == eslEFORMAT) mpi_failure("Parse failed (sequence file %s):\n%s\n",
1159 				 	      qfp->filename, esl_sqfile_GetErrorBuf(qfp));
1160   else if (qstatus != eslEOF)     mpi_failure("Unexpected error %d reading sequence file %s",
1161 					      qstatus, qfp->filename);
1162 
1163   /* monitor all the workers to make sure they have ended */
1164   for (i = 1; i < cfg->nproc; ++i)
1165     {
1166       if (MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &mpistatus) != 0)
1167 	mpi_failure("MPI error %d receiving message from %d\n", mpistatus.MPI_SOURCE);
1168 
1169       MPI_Get_count(&mpistatus, MPI_PACKED, &size);
1170       if (mpi_buf == NULL || size > mpi_size) {
1171 	void *tmp;
1172 	ESL_RALLOC(mpi_buf, tmp, sizeof(char) * size);
1173 	mpi_size = size;
1174       }
1175 
1176       dest = mpistatus.MPI_SOURCE;
1177       MPI_Recv(mpi_buf, size, MPI_PACKED, dest, mpistatus.MPI_TAG, MPI_COMM_WORLD, &mpistatus);
1178 
1179       if (mpistatus.MPI_TAG == HMMER_ERROR_TAG)
1180 	mpi_failure("MPI client %d raised error:\n%s\n", dest, mpi_buf);
1181       if (mpistatus.MPI_TAG != HMMER_TERMINATING_TAG)
1182 	mpi_failure("Unexpected tag %d from %d\n", mpistatus.MPI_TAG, dest);
1183     }
1184 
1185   /* Terminate outputs - any last words?
1186    */
1187   if (tblfp)    p7_tophits_TabularTail(tblfp,    "phmmer", p7_SEARCH_SEQS, cfg->qfile, cfg->dbfile, go);
1188   if (domtblfp) p7_tophits_TabularTail(domtblfp, "phmmer", p7_SEARCH_SEQS, cfg->qfile, cfg->dbfile, go);
1189   if (pfamtblfp)p7_tophits_TabularTail(pfamtblfp, "phmmer", p7_SEARCH_SEQS, cfg->qfile, cfg->dbfile, go);
1190   if (ofp)      { if (fprintf(ofp, "[ok]\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); }
1191 
1192   /* Cleanup - prepare for successful exit
1193    */
1194   free(list);
1195   if (mpi_buf != NULL) free(mpi_buf);
1196 
1197   p7_bg_Destroy(bg);
1198 
1199   esl_sqfile_Close(dbfp);
1200   esl_sqfile_Close(qfp);
1201   esl_stopwatch_Destroy(w);
1202   esl_sq_Destroy(dbsq);
1203   esl_sq_Destroy(qsq);
1204   p7_builder_Destroy(bld);
1205   esl_alphabet_Destroy(abc);
1206 
1207   if (ofp      != stdout) fclose(ofp);
1208   if (afp      != NULL)   fclose(afp);
1209   if (tblfp    != NULL)   fclose(tblfp);
1210   if (domtblfp != NULL)   fclose(domtblfp);
1211   if (pfamtblfp)     fclose(pfamtblfp);
1212   return eslOK;
1213 
1214  ERROR:
1215   return status;
1216 }
1217 
1218 
1219 static int
mpi_worker(ESL_GETOPTS * go,struct cfg_s * cfg)1220 mpi_worker(ESL_GETOPTS *go, struct cfg_s *cfg)
1221 {
1222   int              qformat  = eslSQFILE_UNKNOWN;  /* format of qfile                                  */
1223   P7_BG           *bg       = NULL;	          /* null model                                      */
1224   ESL_SQFILE      *qfp      = NULL;		  /* open qfile                                       */
1225   ESL_SQ          *qsq      = NULL;               /* query sequence                                   */
1226   int              dbformat = eslSQFILE_UNKNOWN;  /* format of dbfile                                 */
1227   ESL_SQFILE      *dbfp     = NULL;               /* open dbfile                                      */
1228   ESL_SQ          *dbsq     = NULL;               /* target sequence                                  */
1229   ESL_ALPHABET    *abc      = NULL;               /* sequence alphabet                                */
1230   P7_BUILDER      *bld      = NULL;               /* HMM construction configuration                   */
1231   ESL_STOPWATCH   *w        = NULL;               /* for timing                                       */
1232   int              seed;
1233   int              status   = eslOK;
1234   int              qstatus  = eslOK;
1235   int              sstatus  = eslOK;
1236 
1237   char            *mpi_buf  = NULL;               /* buffer used to pack/unpack structures            */
1238   int              mpi_size = 0;                  /* size of the allocated buffer                     */
1239 
1240   MPI_Status       mpistatus;
1241 
1242   /* Initializations */
1243   abc  = esl_alphabet_Create(eslAMINO);
1244   w    = esl_stopwatch_Create();
1245   bg   = p7_bg_Create(abc);
1246 
1247   /* If caller declared input formats, decode them */
1248   if (esl_opt_IsOn(go, "--qformat")) {
1249     qformat = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--qformat"));
1250     if (qformat == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--qformat"));
1251   }
1252   if (esl_opt_IsOn(go, "--tformat")) {
1253     dbformat = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--tformat"));
1254     if (dbformat == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized sequence database file format\n", esl_opt_GetString(go, "--tformat"));
1255   }
1256 
1257   /* Initialize a default builder configuration,
1258    * then set only the options we need for single sequence search
1259    */
1260   bld = p7_builder_Create(NULL, abc);
1261   if ((seed = esl_opt_GetInteger(go, "--seed")) > 0)
1262     {				/* a little wasteful - we're blowing a couple of usec by reinitializing */
1263       esl_randomness_Init(bld->r, seed);
1264       bld->do_reseeding = TRUE;
1265     }
1266   bld->EmL = esl_opt_GetInteger(go, "--EmL");
1267   bld->EmN = esl_opt_GetInteger(go, "--EmN");
1268   bld->EvL = esl_opt_GetInteger(go, "--EvL");
1269   bld->EvN = esl_opt_GetInteger(go, "--EvN");
1270   bld->EfL = esl_opt_GetInteger(go, "--EfL");
1271   bld->EfN = esl_opt_GetInteger(go, "--EfN");
1272   bld->Eft = esl_opt_GetReal   (go, "--Eft");
1273 
1274   if (esl_opt_IsOn(go, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(go, "--mxfile"), NULL, esl_opt_GetReal(go, "--popen"), esl_opt_GetReal(go, "--pextend"), bg);
1275   else                              status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(go, "--mx"),           esl_opt_GetReal(go, "--popen"), esl_opt_GetReal(go, "--pextend"), bg);
1276   if (status != eslOK) mpi_failure("Failed to set single query seq score system:\n%s\n", bld->errbuf);
1277 
1278   /* Open the target sequence database for sequential access. */
1279   status =  esl_sqfile_OpenDigital(abc, cfg->dbfile, dbformat, p7_SEQDBENV, &dbfp);
1280   if      (status == eslENOTFOUND) mpi_failure("Failed to open target sequence database %s for reading\n",      cfg->dbfile);
1281   else if (status == eslEFORMAT)   mpi_failure("Target sequence database file %s is empty or misformatted\n",   cfg->dbfile);
1282   else if (status == eslEINVAL)    mpi_failure("Can't autodetect format of a stdin or .gz seqfile");
1283   else if (status != eslOK)        mpi_failure("Unexpected error %d opening target sequence database file %s\n", status, cfg->dbfile);
1284   dbsq = esl_sq_CreateDigital(abc);
1285 
1286   /* Open the query sequence file  */
1287   status = esl_sqfile_OpenDigital(abc, cfg->qfile, qformat, NULL, &qfp);
1288   if      (status == eslENOTFOUND) mpi_failure("Failed to open sequence file %s for reading\n",      cfg->qfile);
1289   else if (status == eslEFORMAT)   mpi_failure("Sequence file %s is empty or misformatted\n",        cfg->qfile);
1290   else if (status == eslEINVAL)    mpi_failure("Can't autodetect format of a stdin or .gz seqfile");
1291   else if (status != eslOK)        mpi_failure ("Unexpected error %d opening sequence file %s\n", status, cfg->qfile);
1292   qsq  = esl_sq_CreateDigital(abc);
1293 
1294 
1295 
1296   /* Outer loop over sequence queries */
1297   while ((qstatus = esl_sqio_Read(qfp, qsq)) == eslOK)
1298     {
1299       P7_PIPELINE     *pli      = NULL;		  /* processing pipeline                      */
1300       P7_TOPHITS      *th       = NULL;        	  /* top-scoring sequence hits                */
1301       P7_OPROFILE     *om       = NULL;           /* optimized query profile                  */
1302 
1303       SEQ_BLOCK        block;
1304 
1305       status = 0;
1306       MPI_Send(&status, 1, MPI_INT, 0, HMMER_READY_TAG, MPI_COMM_WORLD);
1307 
1308       if (qsq->n == 0) continue; /* skip zero length seqs as if they aren't even present */
1309 
1310       esl_stopwatch_Start(w);
1311 
1312       /* Build the model */
1313       p7_SingleBuilder(bld, qsq, bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */
1314 
1315       /* Create processing pipeline and hit list */
1316       th  = p7_tophits_Create();
1317       pli = p7_pipeline_Create(go, om->M, 100, FALSE, p7_SEARCH_SEQS); /* L_hint = 100 is just a dummy for now */
1318       p7_pli_NewModel(pli, om, bg);
1319 
1320       /* receive a sequence block from the master */
1321       MPI_Recv(&block, 3, MPI_LONG_LONG_INT, 0, HMMER_BLOCK_TAG, MPI_COMM_WORLD, &mpistatus);
1322       while (block.count > 0)
1323 	{
1324 	  uint64_t length = 0;
1325 	  uint64_t count  = block.count;
1326 
1327 	  status = esl_sqfile_Position(dbfp, block.offset);
1328 	  if (status != eslOK) mpi_failure("Cannot position sequence database to %ld\n", block.offset);
1329 
1330 	  while (count > 0 && (sstatus = esl_sqio_Read(dbfp, dbsq)) == eslOK)
1331 	    {
1332 	      length = dbsq->eoff - block.offset + 1;
1333 
1334 	      p7_pli_NewSeq(pli, dbsq);
1335 	      p7_bg_SetLength(bg, dbsq->n);
1336 	      p7_oprofile_ReconfigLength(om, dbsq->n);
1337 
1338 	      p7_Pipeline(pli, om, bg, dbsq, NULL, th);
1339 
1340 	      esl_sq_Reuse(dbsq);
1341 	      p7_pipeline_Reuse(pli);
1342 
1343 	      --count;
1344 	    }
1345 
1346 	  /* lets do a little bit of sanity checking here to make sure the blocks are the same */
1347 	  if (count > 0)              mpi_failure("Block count mismatch - expected %ld found %ld at offset %ld\n",  block.count,  block.count - count, block.offset);
1348 	  if (block.length != length) mpi_failure("Block length mismatch - expected %ld found %ld at offset %ld\n", block.length, length,              block.offset);
1349 
1350 	  /* inform the master we need another block of sequences */
1351 	  status = 0;
1352 	  MPI_Send(&status, 1, MPI_INT, 0, HMMER_READY_TAG, MPI_COMM_WORLD);
1353 
1354 	  /* wait for the next block of sequences */
1355 	  MPI_Recv(&block, 3, MPI_LONG_LONG_INT, 0, HMMER_BLOCK_TAG, MPI_COMM_WORLD, &mpistatus);
1356 	}
1357 
1358       esl_stopwatch_Stop(w);
1359 
1360       /* Send the top hits back to the master. */
1361       p7_tophits_MPISend(th, 0, HMMER_TOPHITS_TAG, MPI_COMM_WORLD,  &mpi_buf, &mpi_size);
1362       p7_pipeline_MPISend(pli, 0, HMMER_PIPELINE_TAG, MPI_COMM_WORLD,  &mpi_buf, &mpi_size);
1363 
1364       p7_tophits_Destroy(th);
1365       p7_pipeline_Destroy(pli);
1366       p7_oprofile_Destroy(om);
1367       esl_sq_Reuse(qsq);
1368     } /* end outer loop over query sequences */
1369   if      (qstatus == eslEFORMAT) mpi_failure("Parse failed (sequence file %s):\n%s\n",
1370 				 	      qfp->filename, esl_sqfile_GetErrorBuf(qfp));
1371   else if (qstatus != eslEOF)     mpi_failure("Unexpected error %d reading sequence file %s",
1372 					      qstatus, qfp->filename);
1373 
1374   status = 0;
1375   MPI_Send(&status, 1, MPI_INT, 0, HMMER_TERMINATING_TAG, MPI_COMM_WORLD);
1376 
1377   if (mpi_buf != NULL) free(mpi_buf);
1378 
1379   p7_bg_Destroy(bg);
1380 
1381   esl_sqfile_Close(dbfp);
1382   esl_sqfile_Close(qfp);
1383   esl_stopwatch_Destroy(w);
1384   esl_sq_Destroy(dbsq);
1385   esl_sq_Destroy(qsq);
1386   p7_builder_Destroy(bld);
1387   esl_alphabet_Destroy(abc);
1388   return eslOK;
1389 }
1390 #endif /*HMMER_MPI*/
1391 
1392 
1393 static int
serial_loop(WORKER_INFO * info,ESL_SQFILE * dbfp,int n_targetseqs)1394 serial_loop(WORKER_INFO *info, ESL_SQFILE *dbfp, int n_targetseqs)
1395 {
1396   int      sstatus   = eslOK;
1397   ESL_SQ   *dbsq     = NULL;   /* one target sequence (digital)  */
1398   int seq_cnt = 0;
1399 
1400   dbsq = esl_sq_CreateDigital(info->om->abc);
1401 
1402   /* Main loop: */
1403   while ((n_targetseqs==-1 || seq_cnt<n_targetseqs) && (sstatus = esl_sqio_Read(dbfp, dbsq)) == eslOK)
1404     {
1405       p7_pli_NewSeq(info->pli, dbsq);
1406       p7_bg_SetLength(info->bg, dbsq->n);
1407       p7_oprofile_ReconfigLength(info->om, dbsq->n);
1408 
1409       p7_Pipeline(info->pli, info->om, info->bg, dbsq, NULL, info->th);
1410 
1411       seq_cnt++;
1412       esl_sq_Reuse(dbsq);
1413       p7_pipeline_Reuse(info->pli);
1414     }
1415 
1416   if (n_targetseqs!=-1 && seq_cnt==n_targetseqs)
1417     sstatus = eslEOF;
1418 
1419   esl_sq_Destroy(dbsq);
1420 
1421   return sstatus;
1422 }
1423 
1424 #ifdef HMMER_THREADS
1425 static int
thread_loop(ESL_THREADS * obj,ESL_WORK_QUEUE * queue,ESL_SQFILE * dbfp,int n_targetseqs)1426 thread_loop(ESL_THREADS *obj, ESL_WORK_QUEUE *queue, ESL_SQFILE *dbfp, int n_targetseqs)
1427 {
1428   int  status  = eslOK;
1429   int  sstatus = eslOK;
1430   int  eofCount = 0;
1431   ESL_SQ_BLOCK *block;
1432   void         *newBlock;
1433 
1434   esl_workqueue_Reset(queue);
1435   esl_threads_WaitForStart(obj);
1436 
1437   status = esl_workqueue_ReaderUpdate(queue, NULL, &newBlock);
1438   if (status != eslOK) p7_Fail("Work queue reader failed");
1439 
1440   /* Main loop: */
1441   while (sstatus == eslOK)
1442     {
1443       block = (ESL_SQ_BLOCK *) newBlock;
1444 
1445       if (n_targetseqs == 0)
1446       {
1447         block->count = 0;
1448         sstatus = eslEOF;
1449       } else {
1450         sstatus = esl_sqio_ReadBlock(dbfp, block, -1, n_targetseqs, FALSE);
1451         n_targetseqs -= block->count;
1452       }
1453 
1454       if (sstatus == eslEOF)
1455       {
1456         if (eofCount < esl_threads_GetWorkerCount(obj)) sstatus = eslOK;
1457         ++eofCount;
1458       }
1459 
1460       if (sstatus == eslOK)
1461       {
1462         status = esl_workqueue_ReaderUpdate(queue, block, &newBlock);
1463         if (status != eslOK) p7_Fail("Work queue reader failed");
1464       }
1465     }
1466 
1467   status = esl_workqueue_ReaderUpdate(queue, block, NULL);
1468   if (status != eslOK) p7_Fail("Work queue reader failed");
1469 
1470   if (sstatus == eslEOF)
1471     {
1472       /* wait for all the threads to complete */
1473       esl_threads_WaitForFinish(obj);
1474       esl_workqueue_Complete(queue);
1475     }
1476 
1477   return sstatus;
1478 }
1479 
1480 static void
pipeline_thread(void * arg)1481 pipeline_thread(void *arg)
1482 {
1483   int i;
1484   int status;
1485   int workeridx;
1486   WORKER_INFO   *info;
1487   ESL_THREADS   *obj;
1488   ESL_SQ_BLOCK  *block = NULL;
1489   void          *newBlock;
1490 
1491   impl_Init();
1492 
1493   obj = (ESL_THREADS *) arg;
1494   esl_threads_Started(obj, &workeridx);
1495 
1496   info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx);
1497 
1498   status = esl_workqueue_WorkerUpdate(info->queue, NULL, &newBlock);
1499   if (status != eslOK) p7_Fail("Work queue worker failed");
1500 
1501   /* loop until all blocks have been processed */
1502   block = (ESL_SQ_BLOCK *) newBlock;
1503   while (block->count > 0)
1504     {
1505       /* Main loop: */
1506       for (i = 0; i < block->count; ++i)
1507 	{
1508 	  ESL_SQ *dbsq = block->list + i;
1509 
1510 	  p7_pli_NewSeq(info->pli, dbsq);
1511 	  p7_bg_SetLength(info->bg, dbsq->n);
1512 	  p7_oprofile_ReconfigLength(info->om, dbsq->n);
1513 
1514 	  p7_Pipeline(info->pli, info->om, info->bg, dbsq, NULL, info->th);
1515 
1516 	  esl_sq_Reuse(dbsq);
1517 	  p7_pipeline_Reuse(info->pli);
1518 	}
1519 
1520       status = esl_workqueue_WorkerUpdate(info->queue, block, &newBlock);
1521       if (status != eslOK) p7_Fail("Work queue worker failed");
1522 
1523       block = (ESL_SQ_BLOCK *) newBlock;
1524     }
1525 
1526   status = esl_workqueue_WorkerUpdate(info->queue, block, NULL);
1527   if (status != eslOK) p7_Fail("Work queue worker failed");
1528 
1529   esl_threads_Finished(obj, workeridx);
1530   return;
1531 }
1532 #endif   /* HMMER_THREADS */
1533 
1534 
1535