1 /* Input/output of HMMER3 HMMs in HMMER2 save file formats:
2  * for backwards compatibility.
3  *
4  * Contents:
5  *    1. Writing profiles in HMMER2 format.
6  */
7 #include "p7_config.h"
8 
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <string.h>
12 
13 #include "hmmer.h"
14 #include "easel.h"
15 
16 static int h2_multiline(FILE *fp, const char *pfx, char *s);
17 static int printprob(FILE *fp, int fieldwidth, float p, float null);
18 
19 /*****************************************************************
20  *= 1. Writing profiles in HMMER2 format
21  *****************************************************************/
22 
23 /* Function:  p7_h2io_WriteASCII()
24  * Synopsis:  Write an H3 HMM in HMMER2 compatible format
25  *
26  * Purpose:   Write HMM <hmm> to stream <fp> in HMMER2 ASCII save
27  *            file format.
28  *
29  *            HMMER2 saved the null model and the search configuration
30  *            (local vs. glocal, for example) as part of its HMM file;
31  *            H3 only saves the core HMM. The HMMER2 file is created
32  *            for HMMER2's default ``ls mode'' (glocal) with default
33  *            null model transitions and default special state
34  *            transitions (NECJ).
35  *
36  *            Optional statistical calibration and alignment checksum
37  *            are not written, because for these H3 and H2 differ too
38  *            much.
39  *
40  * Args:      fp   - stream to write save file format to
41  *            hmm  - HMM to save
42  *
43  * Returns:   <eslOK> on success.
44  *
45  * Throws:    <eslEMEM> on allocation error.
46  *
47  *            <eslEINVAL> if <hmm> can't be converted; for example, if
48  *            it is not in a protein or nucleic acid alphabet (H2
49  *            requires biosequence in its save files).
50  *
51  *            <eslEWRITE> if any write fails; for example, if the
52  *            disk fills up.
53  */
54 int
p7_h2io_WriteASCII(FILE * fp,P7_HMM * hmm)55 p7_h2io_WriteASCII(FILE *fp, P7_HMM *hmm)
56 {
57   P7_BG *bg;			/* H2 saves null model in HMM file   */
58   int    k;                     /* counter for nodes                 */
59   int    x;                     /* counter for symbols               */
60   int    ts;			/* counter for state transitions     */
61   float  pmove,ploop;		/* default H2 null model transitions */
62   int    status;
63 
64   if ((bg = p7_bg_Create(hmm->abc)) == NULL) { status = eslEMEM; goto ERROR; }
65 
66   /* magic header */
67   if (fprintf(fp, "HMMER2.0  [converted from %s]\n", HMMER_VERSION) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
68   if (fprintf(fp, "NAME  %s\n", hmm->name)                          < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
69   if (hmm->acc  && fprintf(fp, "ACC   %s\n", hmm->acc)              < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
70   if (hmm->desc && fprintf(fp, "DESC  %s\n", hmm->desc)             < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
71   if (fprintf(fp, "LENG  %d\n", hmm->M)                             < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
72 
73   if      (hmm->abc->type == eslAMINO)   { if (fprintf(fp, "ALPH  Amino\n")   < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
74   else if (hmm->abc->type == eslDNA)     { if (fprintf(fp, "ALPH  Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
75   else if (hmm->abc->type == eslRNA)     { if (fprintf(fp, "ALPH  Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
76   else    ESL_XEXCEPTION(eslEINVAL, "Only protein, DNA, RNA HMMs can be saved in H2 format");
77 
78   if (fprintf(fp, "RF    %s\n", (hmm->flags & p7H_RF)  ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
79   if (fprintf(fp, "CS    %s\n", (hmm->flags & p7H_CS)  ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
80   if (fprintf(fp, "MAP   %s\n", (hmm->flags & p7H_MAP) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
81   /* H3 consensus line has no counterpart in H2 */
82 
83   if (hmm->comlog != NULL)  { if ( (status = h2_multiline(fp, "COM   ",     hmm->comlog)) != eslOK) goto ERROR; }
84   if (hmm->nseq   != -1)    { if (           fprintf     (fp, "NSEQ  %d\n", hmm->nseq)     < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
85   if (hmm->ctime  != NULL)  { if (           fprintf     (fp, "DATE  %s\n", hmm->ctime)    < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
86 
87   /* Checksum is not written; H2 and H3 use different checksum algorithms */
88 
89   if (hmm->flags & p7H_GA) { if (fprintf(fp, "GA    %.1f %.1f\n", hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
90   if (hmm->flags & p7H_TC) { if (fprintf(fp, "TC    %.1f %.1f\n", hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
91   if (hmm->flags & p7H_NC) { if (fprintf(fp, "NC    %.1f %.1f\n", hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
92 
93   /* in H3, the HMM does not include NECJ; these are part of the profile.
94    * for emulating H2 output, assume default LS config */
95   if (fputs("XT     ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
96   pmove = ( (hmm->abc->type == eslAMINO) ?   1./351. :    1./1001.);
97   ploop = ( (hmm->abc->type == eslAMINO) ? 350./351. : 1000./1001.);
98   if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* NB */
99   if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* NN */
100   if ( (status = printprob(fp, 6,   0.5, 1.0)) != eslOK) goto ERROR;	/* EC */
101   if ( (status = printprob(fp, 6,   0.5, 1.0)) != eslOK) goto ERROR;	/* EJ */
102   if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* CT */
103   if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* CC */
104   if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* JB */
105   if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* JJ */
106   if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
107 
108   /* Save the default H2 null model transitions, not H3's null model transitions  */
109   if (fprintf(fp, "NULT   ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
110   if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR;	/* 1-p1 */
111   if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR;	/* p1   */
112   if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
113 
114   /* but null emissions really are the H3 null model emissions */
115   if (fputs("NULE   ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
116   for (x = 0; x < hmm->abc->K; x++)
117     { if ( (status = printprob(fp, 6, bg->f[x], 1./(float)hmm->abc->K)) != eslOK) goto ERROR; }
118   if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
119 
120   /* Don't save stats; H3 local alignment stats are different from H2 calibration */
121 
122   /* The main model section */
123   if (fprintf(fp, "HMM      ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
124   for (x = 0; x < hmm->abc->K; x++)
125     { if (fprintf(fp, "  %c    ", hmm->abc->sym[x]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
126   if (fprintf(fp, "\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
127   if (fprintf(fp, "       %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
128 	      "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d", "b->m", "m->e") < 0)
129     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
130 
131   /* Print HMM parameters (main section of the save file) */
132   if (fprintf(fp, "      ")                                    < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
133   if ( (status = printprob(fp, 6, 1.-hmm->t[0][p7H_MD], 1.0)) != eslOK) goto ERROR;
134   if (fprintf(fp, " %6s", "*")                                 < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
135   if ( (status = printprob(fp, 6, hmm->t[0][p7H_MD], 1.0))    != eslOK) goto ERROR;
136   if (fputc('\n', fp)                                          < 0)     ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
137 
138   for (k = 1; k <= hmm->M; k++)
139     {
140 				/* Line 1: k, match emissions, map */
141       if (fprintf(fp, " %5d ", k)              < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
142       for (x = 0; x < hmm->abc->K; x++)
143 	if ( (status = printprob(fp, 6, hmm->mat[k][x], bg->f[x])) != eslOK) goto ERROR;
144       if (hmm->flags & p7H_MAP)
145 	{ if (fprintf(fp, " %5d", hmm->map[k]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
146       if (fputc('\n', fp)                      < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
147 				/* Line 2: RF and insert emissions */
148       if (fprintf(fp, " %5c ", hmm->flags & p7H_RF ? hmm->rf[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
149       for (x = 0; x < hmm->abc->K; x++)
150 	if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->ins[k][x] : 0.0), bg->f[x])) != eslOK) goto ERROR;
151       if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
152 				/* Line 3: CS and transition probs */
153       if (fprintf(fp, " %5c ", hmm->flags & p7H_CS ? hmm->cs[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
154       for (ts = 0; ts < 7; ts++)
155 	if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->t[k][ts] : 0.0), 1.0)) != eslOK) goto ERROR;
156       if ( (status = printprob(fp, 6, ((k==1)     ? hmm->t[0][p7H_MM] : 0.0), 1.0)) != eslOK) goto ERROR;
157       if ( (status = printprob(fp, 6, ((k<hmm->M) ? 0.0: 1.0), 1.0))                != eslOK) goto ERROR;
158       if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
159 
160     }
161   if (fputs("//\n", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
162 
163   p7_bg_Destroy(bg);
164   return eslOK;
165 
166  ERROR:
167   p7_bg_Destroy(bg);
168   return status;
169 }
170 
171 /* h2_multiline()
172  *
173  * Used to print the command log to HMMER2 ASCII save files.
174  * H3 records command numbers in brackets, as in "COM [1] hmmbuild ..."
175  * H2 just records commands, as in "COM   hmmbuild ...".
176  * Compare p7_hmmfile.c::multiline().
177  *
178  * Given a record (like the comlog) that contains
179  * multiple lines, print it as multiple lines with
180  * a given prefix. e.g.:
181  *
182  * given:   "COM   ", "foo\nbar\nbaz"
183  * print:   COM   foo
184  *          COM   bar
185  *          COM   baz
186  *
187  * If <s> is NULL, no-op. Otherwise <s> must be a <NUL>-terminated
188  * string.  It does not matter if it ends in <\n> or not. <pfx>
189  * must be a valid <NUL>-terminated string; it may be empty.
190  *
191  * Args:     fp:   FILE to print to
192  *           pfx:  prefix for each line
193  *           s:    line to break up and print; tolerates a NULL
194  *
195  * Returns:  <eslOK> on success.
196  *
197  * Throws:   <eslEWRITE> on any write error.
198  */
199 static int
h2_multiline(FILE * fp,const char * pfx,char * s)200 h2_multiline(FILE *fp, const char *pfx, char *s)
201 {
202   char *sptr  = s;
203   char *end   = NULL;
204   int   n     = 0;
205 
206   do {
207     end = strchr(sptr, '\n');
208 
209     if (end != NULL) 		             /* if there's no \n left, end == NULL */
210       {
211 	n = end - sptr;	                     /* n chars exclusive of \n */
212 	if (fprintf(fp, "%s ", pfx)            < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
213 	if (fwrite(sptr, sizeof(char), n, fp) != n) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed");   /* using fwrite lets us write fixed # of chars   */
214 	if (fprintf(fp, "\n")                  < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed");   /* while writing \n w/ printf allows newline conversion */
215 	sptr += n + 1;	                     /* +1 to get past \n */
216       }
217     else
218       {
219 	if (fprintf(fp, "%s %s\n", pfx, sptr) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* last line */
220       }
221   } while (end != NULL  && *sptr != '\0');   /* *sptr == 0 if <s> terminates with a \n */
222   return eslOK;
223 }
224 
225 
226 /* printprob()
227  * Print a probability (with a leading space), formatted
228  * for an H2 ASCII save file.
229  *
230  * Returns: <eslOK> on success.
231  *
232  * Throws:  <eslEWRITE> on any write failure.
233  */
234 static int
printprob(FILE * fp,int fieldwidth,float p,float null)235 printprob(FILE *fp, int fieldwidth, float p, float null)
236 {
237   if      (p == 0.0)                { if (fprintf(fp, " %*s", fieldwidth, "*")                                       < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
238   else if (null == 1.0 && p == 1.0) { if (fprintf(fp, " %*d", fieldwidth, 0)                                         < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
239   else                              { if (fprintf(fp, " %*d", fieldwidth, (int) floor(0.5 + 1442.695 * log(p/null))) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
240   return eslOK;
241 }
242 
243 
244