1 /* Input/output of HMMER3 HMMs in HMMER2 save file formats:
2 * for backwards compatibility.
3 *
4 * Contents:
5 * 1. Writing profiles in HMMER2 format.
6 */
7 #include "p7_config.h"
8
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <string.h>
12
13 #include "hmmer.h"
14 #include "easel.h"
15
16 static int h2_multiline(FILE *fp, const char *pfx, char *s);
17 static int printprob(FILE *fp, int fieldwidth, float p, float null);
18
19 /*****************************************************************
20 *= 1. Writing profiles in HMMER2 format
21 *****************************************************************/
22
23 /* Function: p7_h2io_WriteASCII()
24 * Synopsis: Write an H3 HMM in HMMER2 compatible format
25 *
26 * Purpose: Write HMM <hmm> to stream <fp> in HMMER2 ASCII save
27 * file format.
28 *
29 * HMMER2 saved the null model and the search configuration
30 * (local vs. glocal, for example) as part of its HMM file;
31 * H3 only saves the core HMM. The HMMER2 file is created
32 * for HMMER2's default ``ls mode'' (glocal) with default
33 * null model transitions and default special state
34 * transitions (NECJ).
35 *
36 * Optional statistical calibration and alignment checksum
37 * are not written, because for these H3 and H2 differ too
38 * much.
39 *
40 * Args: fp - stream to write save file format to
41 * hmm - HMM to save
42 *
43 * Returns: <eslOK> on success.
44 *
45 * Throws: <eslEMEM> on allocation error.
46 *
47 * <eslEINVAL> if <hmm> can't be converted; for example, if
48 * it is not in a protein or nucleic acid alphabet (H2
49 * requires biosequence in its save files).
50 *
51 * <eslEWRITE> if any write fails; for example, if the
52 * disk fills up.
53 */
54 int
p7_h2io_WriteASCII(FILE * fp,P7_HMM * hmm)55 p7_h2io_WriteASCII(FILE *fp, P7_HMM *hmm)
56 {
57 P7_BG *bg; /* H2 saves null model in HMM file */
58 int k; /* counter for nodes */
59 int x; /* counter for symbols */
60 int ts; /* counter for state transitions */
61 float pmove,ploop; /* default H2 null model transitions */
62 int status;
63
64 if ((bg = p7_bg_Create(hmm->abc)) == NULL) { status = eslEMEM; goto ERROR; }
65
66 /* magic header */
67 if (fprintf(fp, "HMMER2.0 [converted from %s]\n", HMMER_VERSION) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
68 if (fprintf(fp, "NAME %s\n", hmm->name) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
69 if (hmm->acc && fprintf(fp, "ACC %s\n", hmm->acc) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
70 if (hmm->desc && fprintf(fp, "DESC %s\n", hmm->desc) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
71 if (fprintf(fp, "LENG %d\n", hmm->M) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
72
73 if (hmm->abc->type == eslAMINO) { if (fprintf(fp, "ALPH Amino\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
74 else if (hmm->abc->type == eslDNA) { if (fprintf(fp, "ALPH Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
75 else if (hmm->abc->type == eslRNA) { if (fprintf(fp, "ALPH Nucleic\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
76 else ESL_XEXCEPTION(eslEINVAL, "Only protein, DNA, RNA HMMs can be saved in H2 format");
77
78 if (fprintf(fp, "RF %s\n", (hmm->flags & p7H_RF) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
79 if (fprintf(fp, "CS %s\n", (hmm->flags & p7H_CS) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
80 if (fprintf(fp, "MAP %s\n", (hmm->flags & p7H_MAP) ? "yes" : "no") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
81 /* H3 consensus line has no counterpart in H2 */
82
83 if (hmm->comlog != NULL) { if ( (status = h2_multiline(fp, "COM ", hmm->comlog)) != eslOK) goto ERROR; }
84 if (hmm->nseq != -1) { if ( fprintf (fp, "NSEQ %d\n", hmm->nseq) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
85 if (hmm->ctime != NULL) { if ( fprintf (fp, "DATE %s\n", hmm->ctime) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
86
87 /* Checksum is not written; H2 and H3 use different checksum algorithms */
88
89 if (hmm->flags & p7H_GA) { if (fprintf(fp, "GA %.1f %.1f\n", hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
90 if (hmm->flags & p7H_TC) { if (fprintf(fp, "TC %.1f %.1f\n", hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
91 if (hmm->flags & p7H_NC) { if (fprintf(fp, "NC %.1f %.1f\n", hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
92
93 /* in H3, the HMM does not include NECJ; these are part of the profile.
94 * for emulating H2 output, assume default LS config */
95 if (fputs("XT ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
96 pmove = ( (hmm->abc->type == eslAMINO) ? 1./351. : 1./1001.);
97 ploop = ( (hmm->abc->type == eslAMINO) ? 350./351. : 1000./1001.);
98 if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* NB */
99 if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* NN */
100 if ( (status = printprob(fp, 6, 0.5, 1.0)) != eslOK) goto ERROR; /* EC */
101 if ( (status = printprob(fp, 6, 0.5, 1.0)) != eslOK) goto ERROR; /* EJ */
102 if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* CT */
103 if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* CC */
104 if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* JB */
105 if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* JJ */
106 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
107
108 /* Save the default H2 null model transitions, not H3's null model transitions */
109 if (fprintf(fp, "NULT ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
110 if ( (status = printprob(fp, 6, ploop, 1.0)) != eslOK) goto ERROR; /* 1-p1 */
111 if ( (status = printprob(fp, 6, pmove, 1.0)) != eslOK) goto ERROR; /* p1 */
112 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
113
114 /* but null emissions really are the H3 null model emissions */
115 if (fputs("NULE ", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
116 for (x = 0; x < hmm->abc->K; x++)
117 { if ( (status = printprob(fp, 6, bg->f[x], 1./(float)hmm->abc->K)) != eslOK) goto ERROR; }
118 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
119
120 /* Don't save stats; H3 local alignment stats are different from H2 calibration */
121
122 /* The main model section */
123 if (fprintf(fp, "HMM ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
124 for (x = 0; x < hmm->abc->K; x++)
125 { if (fprintf(fp, " %c ", hmm->abc->sym[x]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
126 if (fprintf(fp, "\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
127 if (fprintf(fp, " %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
128 "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d", "b->m", "m->e") < 0)
129 ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
130
131 /* Print HMM parameters (main section of the save file) */
132 if (fprintf(fp, " ") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
133 if ( (status = printprob(fp, 6, 1.-hmm->t[0][p7H_MD], 1.0)) != eslOK) goto ERROR;
134 if (fprintf(fp, " %6s", "*") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
135 if ( (status = printprob(fp, 6, hmm->t[0][p7H_MD], 1.0)) != eslOK) goto ERROR;
136 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
137
138 for (k = 1; k <= hmm->M; k++)
139 {
140 /* Line 1: k, match emissions, map */
141 if (fprintf(fp, " %5d ", k) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
142 for (x = 0; x < hmm->abc->K; x++)
143 if ( (status = printprob(fp, 6, hmm->mat[k][x], bg->f[x])) != eslOK) goto ERROR;
144 if (hmm->flags & p7H_MAP)
145 { if (fprintf(fp, " %5d", hmm->map[k]) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
146 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
147 /* Line 2: RF and insert emissions */
148 if (fprintf(fp, " %5c ", hmm->flags & p7H_RF ? hmm->rf[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
149 for (x = 0; x < hmm->abc->K; x++)
150 if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->ins[k][x] : 0.0), bg->f[x])) != eslOK) goto ERROR;
151 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
152 /* Line 3: CS and transition probs */
153 if (fprintf(fp, " %5c ", hmm->flags & p7H_CS ? hmm->cs[k] : '-') < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
154 for (ts = 0; ts < 7; ts++)
155 if ( (status = printprob(fp, 6, ((k < hmm->M) ? hmm->t[k][ts] : 0.0), 1.0)) != eslOK) goto ERROR;
156 if ( (status = printprob(fp, 6, ((k==1) ? hmm->t[0][p7H_MM] : 0.0), 1.0)) != eslOK) goto ERROR;
157 if ( (status = printprob(fp, 6, ((k<hmm->M) ? 0.0: 1.0), 1.0)) != eslOK) goto ERROR;
158 if (fputc('\n', fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
159
160 }
161 if (fputs("//\n", fp) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
162
163 p7_bg_Destroy(bg);
164 return eslOK;
165
166 ERROR:
167 p7_bg_Destroy(bg);
168 return status;
169 }
170
171 /* h2_multiline()
172 *
173 * Used to print the command log to HMMER2 ASCII save files.
174 * H3 records command numbers in brackets, as in "COM [1] hmmbuild ..."
175 * H2 just records commands, as in "COM hmmbuild ...".
176 * Compare p7_hmmfile.c::multiline().
177 *
178 * Given a record (like the comlog) that contains
179 * multiple lines, print it as multiple lines with
180 * a given prefix. e.g.:
181 *
182 * given: "COM ", "foo\nbar\nbaz"
183 * print: COM foo
184 * COM bar
185 * COM baz
186 *
187 * If <s> is NULL, no-op. Otherwise <s> must be a <NUL>-terminated
188 * string. It does not matter if it ends in <\n> or not. <pfx>
189 * must be a valid <NUL>-terminated string; it may be empty.
190 *
191 * Args: fp: FILE to print to
192 * pfx: prefix for each line
193 * s: line to break up and print; tolerates a NULL
194 *
195 * Returns: <eslOK> on success.
196 *
197 * Throws: <eslEWRITE> on any write error.
198 */
199 static int
h2_multiline(FILE * fp,const char * pfx,char * s)200 h2_multiline(FILE *fp, const char *pfx, char *s)
201 {
202 char *sptr = s;
203 char *end = NULL;
204 int n = 0;
205
206 do {
207 end = strchr(sptr, '\n');
208
209 if (end != NULL) /* if there's no \n left, end == NULL */
210 {
211 n = end - sptr; /* n chars exclusive of \n */
212 if (fprintf(fp, "%s ", pfx) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed");
213 if (fwrite(sptr, sizeof(char), n, fp) != n) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* using fwrite lets us write fixed # of chars */
214 if (fprintf(fp, "\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* while writing \n w/ printf allows newline conversion */
215 sptr += n + 1; /* +1 to get past \n */
216 }
217 else
218 {
219 if (fprintf(fp, "%s %s\n", pfx, sptr) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); /* last line */
220 }
221 } while (end != NULL && *sptr != '\0'); /* *sptr == 0 if <s> terminates with a \n */
222 return eslOK;
223 }
224
225
226 /* printprob()
227 * Print a probability (with a leading space), formatted
228 * for an H2 ASCII save file.
229 *
230 * Returns: <eslOK> on success.
231 *
232 * Throws: <eslEWRITE> on any write failure.
233 */
234 static int
printprob(FILE * fp,int fieldwidth,float p,float null)235 printprob(FILE *fp, int fieldwidth, float p, float null)
236 {
237 if (p == 0.0) { if (fprintf(fp, " %*s", fieldwidth, "*") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
238 else if (null == 1.0 && p == 1.0) { if (fprintf(fp, " %*d", fieldwidth, 0) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
239 else { if (fprintf(fp, " %*d", fieldwidth, (int) floor(0.5 + 1442.695 * log(p/null))) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "h2 profile write failed"); }
240 return eslOK;
241 }
242
243
244