1 /* cp9.c based on HMMER 2.x's plan7.c
2 * EPN 02.27.06
3 *
4 * Support for CM-Plan 9 HMM data structure, CP9_t.
5 *
6 * All of the CP9 code is based on analogous HMMER 2.x Plan 7 HMM
7 * code.
8 *
9 */
10
11 #include "esl_config.h"
12 #include "p7_config.h"
13 #include "config.h"
14
15 #include <stdio.h>
16 #include <string.h>
17 #include <ctype.h>
18 #include <time.h>
19 #include <math.h>
20
21 #include "easel.h"
22 #include "esl_msa.h"
23 #include "esl_vectorops.h"
24
25 #include "hmmer.h"
26
27 #include "infernal.h"
28
29 /* Functions: AllocCPlan9(), AllocCPlan9Shell(), AllocCPlan9Body(), FreeCPlan9()
30 *
31 * Purpose: Allocate or free a CPlan9 HMM structure.
32 * Can either allocate all at once (AllocCPlan9()) or
33 * in two steps (AllocCPlan9Shell(), AllocCPlan9Body()).
34 * The two step method is used in CP9_hmmio.c where we start
35 * parsing the header of an HMM file but don't
36 * see the size of the model 'til partway thru the header.
37 */
38 CP9_t *
AllocCPlan9(int M,const ESL_ALPHABET * abc)39 AllocCPlan9(int M, const ESL_ALPHABET *abc)
40 {
41 CP9_t *hmm;
42
43 hmm = AllocCPlan9Shell();
44 AllocCPlan9Body(hmm, M, abc);
45 return hmm;
46 }
47 CP9_t *
AllocCPlan9Shell(void)48 AllocCPlan9Shell(void)
49 {
50 int status;
51 CP9_t *hmm;
52
53 ESL_ALLOC(hmm, sizeof(CP9_t));
54 hmm->abc = NULL;
55
56 hmm->M = 0;
57
58 hmm->t = NULL;
59 hmm->mat = NULL;
60 hmm->ins = NULL;
61
62 hmm->tsc = hmm->msc = hmm->isc = NULL;
63 hmm->tsc_mem = hmm->msc_mem = hmm->isc_mem = NULL;
64
65 hmm->begin = NULL;
66 hmm->end = NULL;
67
68 hmm->bsc = hmm->bsc_mem = NULL;
69 hmm->esc = hmm->esc_mem = NULL;
70
71 hmm->otsc = NULL;
72
73 hmm->has_el = NULL;
74 hmm->el_from_ct = NULL;
75 hmm->el_from_idx = NULL;
76 hmm->el_from_cmnd= NULL;
77
78 hmm->flags = 0;
79 return hmm;
80
81 ERROR:
82 cm_Fail("Memory allocation error.\n");
83 return NULL; /* never reached */
84 }
85
86 void
AllocCPlan9Body(CP9_t * hmm,int M,const ESL_ALPHABET * abc)87 AllocCPlan9Body(CP9_t *hmm, int M, const ESL_ALPHABET *abc)
88 {
89 int status;
90 int k, x;
91
92 hmm->abc = abc;
93
94 hmm->M = M;
95
96 ESL_ALLOC(hmm->t, (M+1) * sizeof(float *));
97 ESL_ALLOC(hmm->mat, (M+1) * sizeof(float *));
98 ESL_ALLOC(hmm->ins, (M+1) * sizeof(float *));
99 ESL_ALLOC(hmm->t[0],(cp9_NTRANS*(M+1)) * sizeof(float));
100 ESL_ALLOC(hmm->mat[0],(abc->K*(M+1)) * sizeof(float));
101 ESL_ALLOC(hmm->ins[0],(abc->K*(M+1)) * sizeof(float));
102
103 ESL_ALLOC(hmm->tsc, cp9_NTRANS * sizeof(int *));
104 ESL_ALLOC(hmm->msc, hmm->abc->Kp * sizeof(int *));
105 ESL_ALLOC(hmm->isc, hmm->abc->Kp * sizeof(int *));
106 ESL_ALLOC(hmm->tsc_mem,(cp9_NTRANS*(M+1)) * sizeof(int));
107 ESL_ALLOC(hmm->msc_mem,(hmm->abc->Kp*(M+1)) * sizeof(int));
108 ESL_ALLOC(hmm->isc_mem,(hmm->abc->Kp*(M+1)) * sizeof(int));
109
110 hmm->tsc[0] = hmm->tsc_mem;
111 hmm->msc[0] = hmm->msc_mem;
112 hmm->isc[0] = hmm->isc_mem;
113
114 /* transition scores reordered */
115 ESL_ALLOC(hmm->otsc, sizeof(int) * (M+1) * cp9O_NTRANS);
116
117 /* note allocation strategy for important 2D arrays -- trying
118 * to keep locality as much as possible, cache efficiency etc.
119 */
120 for (k = 1; k <= M; k++) {
121 hmm->mat[k] = hmm->mat[0] + k * abc->K;
122 hmm->ins[k] = hmm->ins[0] + k * abc->K;
123 hmm->t[k] = hmm->t[0] + k * cp9_NTRANS;
124 }
125 for (x = 1; x < hmm->abc->Kp; x++) {
126 hmm->msc[x] = hmm->msc[0] + x * (M+1);
127 hmm->isc[x] = hmm->isc[0] + x * (M+1);
128 }
129 for (x = 0; x < cp9_NTRANS; x++)
130 hmm->tsc[x] = hmm->tsc[0] + x * (M+1);
131
132 /* tsc[x][0] is used as a boundary condition sometimes [Viterbi()],
133 * so set to -inf always.
134 */
135 for (x = 0; x < cp9_NTRANS; x++)
136 hmm->tsc[x][0] = -INFTY;
137
138 ESL_ALLOC(hmm->begin, (M+1) * sizeof(float));
139 ESL_ALLOC(hmm->end, (M+1) * sizeof(float));
140
141 ESL_ALLOC(hmm->bsc_mem, (M+1) * sizeof(int));
142 ESL_ALLOC(hmm->esc_mem, (M+1) * sizeof(int));
143
144 ESL_ALLOC(hmm->null, (abc->K) * sizeof(float));
145
146 hmm->bsc = hmm->bsc_mem;
147 hmm->esc = hmm->esc_mem;
148
149 /* end[0], begin[0], esc[0] and bsc[0] are never
150 * used, set them to 0. and -INFTY */
151 hmm->end[0] = hmm->begin[0] = -INFTY;
152 hmm->esc[0] = hmm->bsc[0] = -INFTY;
153
154 ESL_ALLOC(hmm->has_el, (M+1) * sizeof(int));
155 ESL_ALLOC(hmm->el_from_ct, (M+2) * sizeof(int));
156 ESL_ALLOC(hmm->el_from_idx,(M+2) * sizeof(int *));
157 ESL_ALLOC(hmm->el_from_cmnd,(M+2) * sizeof(int *));
158 esl_vec_ISet(hmm->has_el, M+1, FALSE);
159 esl_vec_ISet(hmm->el_from_ct, M+1, 0);
160 for(k = 0; k <= M+1; k++) {
161 hmm->el_from_idx[k] = NULL;
162 hmm->el_from_cmnd[k] = NULL;
163 }
164
165 return;
166 ERROR:
167 cm_Fail("Memory allocation error.");
168 }
169
170
171 void
FreeCPlan9(CP9_t * hmm)172 FreeCPlan9(CP9_t *hmm)
173 {
174 int k;
175 if (hmm->null != NULL) free(hmm->null);
176 if (hmm->bsc_mem != NULL) free(hmm->bsc_mem);
177 if (hmm->begin != NULL) free(hmm->begin);
178 if (hmm->esc_mem != NULL) free(hmm->esc_mem);
179 if (hmm->end != NULL) free(hmm->end);
180 if (hmm->msc_mem != NULL) free(hmm->msc_mem);
181 if (hmm->isc_mem != NULL) free(hmm->isc_mem);
182 if (hmm->tsc_mem != NULL) free(hmm->tsc_mem);
183 if (hmm->mat != NULL) free(hmm->mat[0]);
184 if (hmm->ins != NULL) free(hmm->ins[0]);
185 if (hmm->t != NULL) free(hmm->t[0]);
186 if (hmm->msc != NULL) free(hmm->msc);
187 if (hmm->isc != NULL) free(hmm->isc);
188 if (hmm->tsc != NULL) free(hmm->tsc);
189 if (hmm->otsc != NULL) free(hmm->otsc);
190 if (hmm->mat != NULL) free(hmm->mat);
191 if (hmm->ins != NULL) free(hmm->ins);
192 if (hmm->t != NULL) free(hmm->t);
193 if (hmm->has_el != NULL) free(hmm->has_el);
194 if (hmm->el_from_ct != NULL) free(hmm->el_from_ct);
195 if(hmm->el_from_idx != NULL)
196 {
197 for(k = 0; k <= hmm->M+1; k++)
198 if(hmm->el_from_idx[k] != NULL)
199 free(hmm->el_from_idx[k]);
200 free(hmm->el_from_idx);
201 }
202 if(hmm->el_from_cmnd != NULL)
203 {
204 for(k = 0; k <= hmm->M+1; k++)
205 if(hmm->el_from_cmnd[k] != NULL)
206 free(hmm->el_from_cmnd[k]);
207 free(hmm->el_from_cmnd);
208 }
209
210 free(hmm);
211 }
212
213 /* Function: ZeroCPlan9()
214 *
215 * Purpose: Zeros the counts/probabilities fields in a model.
216 * Leaves null model untouched.
217 */
218 void
ZeroCPlan9(CP9_t * hmm)219 ZeroCPlan9(CP9_t *hmm)
220 {
221 int k;
222 esl_vec_FSet(hmm->ins[0], hmm->abc->K, 0.);
223 esl_vec_FSet(hmm->t[0], cp9_NTRANS, 0.);
224 for (k = 1; k <= hmm->M; k++)
225 {
226 esl_vec_FSet(hmm->t[k], cp9_NTRANS, 0.);
227 esl_vec_FSet(hmm->mat[k], hmm->abc->K, 0.);
228 esl_vec_FSet(hmm->ins[k], hmm->abc->K, 0.);
229 }
230 esl_vec_FSet(hmm->begin+1, hmm->M, 0.);
231 esl_vec_FSet(hmm->end+1, hmm->M, 0.);
232
233 /* initialize the el_* data structures, these
234 * depend on the CM guide tree and will be set
235 * when the CP9 is constructed from the CM.
236 */
237 for (k = 0; k <= (hmm->M); k++)
238 {
239 hmm->has_el[k] = FALSE;
240 hmm->el_from_ct[k] = 0;
241 hmm->el_from_idx[k] = NULL;
242 hmm->el_from_cmnd[k] = NULL;
243 }
244 /* special case hmm->M+1 corresponds to the E state here */
245 hmm->el_from_ct[(hmm->M+1)] = 0;
246 hmm->el_from_idx[(hmm->M+1)] = NULL;
247 hmm->el_from_cmnd[(hmm->M+1)] = NULL;
248
249 hmm->flags &= ~CPLAN9_HASBITS; /* invalidates scores */
250 hmm->flags &= ~CPLAN9_HASPROB; /* invalidates probabilities */
251 hmm->el_self = 0.; /* EL self transition probability */
252 }
253
254
255 /* Function: CPlan9SetNullModel()
256 *
257 * Purpose: Set the null model section of an HMM.
258 * Convenience function.
259 *
260 * Assumes null* is allocated to hmm->abc->K
261 */
262 void
CPlan9SetNullModel(CP9_t * hmm,float * null,float p1)263 CPlan9SetNullModel(CP9_t *hmm, float *null, float p1)
264 {
265 int x;
266 for (x = 0; x < hmm->abc->K; x++)
267 hmm->null[x] = null[x];
268 hmm->p1 = p1;
269 }
270
271 /* Function: cp9_Clone()
272 * Date: EPN, Thu Dec 1 10:30:18 2011
273 * Purpose: Clone a CP9 HMM CP9_t object
274 */
275 CP9_t *
cp9_Clone(CP9_t * cp9)276 cp9_Clone(CP9_t *cp9)
277 {
278 CP9_t *cp9dup = NULL;
279 int status;
280
281 if ((cp9dup = AllocCPlan9(cp9->M, cp9->abc)) == NULL) return NULL;
282 if ((status = cp9_Copy(cp9, cp9dup)) != eslOK) goto ERROR;
283 return cp9dup;
284
285 ERROR:
286 FreeCPlan9(cp9dup);
287 return NULL;
288 }
289
290
291 /* Function: cp9_Copy()
292 * Synopsis: Copy a CM plan 9 HMM.
293 *
294 * Purpose: Copies cp9 hmm <src> to cp9 hmm <dst>, where <dst>
295 * has already been allocated to be of sufficient size.
296 *
297 * <src> should be properly normalized, no check is done to
298 * ensure that. If <src> is logoddsified (src->flags &
299 * CPLAN9_HASBITS) its bit scores will be copied to <dst>,
300 * otherwise they are invalid and won't be copied.
301 *
302 * Returns: <eslOK> on success.
303 *
304 * Throws: <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small
305 * to fit <src>.
306 */
307 int
cp9_Copy(const CP9_t * src,CP9_t * dst)308 cp9_Copy(const CP9_t *src, CP9_t *dst)
309 {
310 int status;
311 int k;
312 int src_has_bits = (src->flags & CPLAN9_HASBITS) ? TRUE : FALSE;
313
314 if (src->M != dst->M) return eslEINVAL;
315
316 dst->abc = src->abc;
317
318 for(k = 0; k <= src->M; k++) {
319 esl_vec_FCopy(src->t[k], cp9_NTRANS, dst->t[k]);
320 esl_vec_FCopy(src->mat[k], src->abc->K, dst->mat[k]);
321 esl_vec_FCopy(src->ins[k], src->abc->K, dst->ins[k]);
322 }
323 esl_vec_FCopy(src->begin, src->M+1, dst->begin);
324 esl_vec_FCopy(src->end, src->M+1, dst->end);
325 if(src_has_bits) {
326 esl_vec_ICopy(src->bsc_mem, src->M+1, dst->bsc_mem);
327 esl_vec_ICopy(src->esc_mem, src->M+1, dst->esc_mem);
328 }
329
330 /* exploit linear-memory of these 2d arrays */
331 if(src_has_bits) {
332 esl_vec_ICopy(src->tsc_mem, cp9_NTRANS * (src->M+1), dst->tsc_mem);
333 esl_vec_ICopy(src->msc_mem, src->abc->Kp * (src->M+1), dst->msc_mem);
334 esl_vec_ICopy(src->isc_mem, src->abc->Kp * (src->M+1), dst->isc_mem);
335 esl_vec_ICopy(src->otsc, cp9O_NTRANS * (src->M+1), dst->otsc);
336 }
337
338 /* EL info */
339 dst->el_self = src->el_self;
340 dst->el_selfsc = src->el_selfsc;
341 esl_vec_ICopy(src->has_el, src->M+1, dst->has_el);
342 esl_vec_ICopy(src->el_from_ct, src->M+2, dst->el_from_ct);
343 for(k = 0; k <= src->M+1; k++) {
344 if(src->el_from_ct[k] > 0) {
345 ESL_ALLOC(dst->el_from_idx[k], sizeof(int) * src->el_from_ct[k]);
346 ESL_ALLOC(dst->el_from_cmnd[k], sizeof(int) * src->el_from_ct[k]);
347 esl_vec_ICopy(src->el_from_idx[k], src->el_from_ct[k], dst->el_from_idx[k]);
348 esl_vec_ICopy(src->el_from_cmnd[k], src->el_from_ct[k], dst->el_from_cmnd[k]);
349 }
350 }
351
352 dst->null2_omega = src->null2_omega;
353 dst->null3_omega = src->null3_omega;
354 esl_vec_FCopy(src->null, src->abc->K, dst->null);
355
356 dst->p1 = src->p1;
357 dst->flags = src->flags;
358
359 return eslOK;
360
361 ERROR:
362 return status;
363 }
364
365
366 /* Function: cp9_Sizeof()
367 * Date: EPN, Wed Jan 18 05:46:02 2012
368 * Purpose: Return size of a CP9_t object, in Mb.
369 */
370 float
cp9_Sizeof(CP9_t * cp9)371 cp9_Sizeof(CP9_t *cp9)
372 {
373 float bytes = 0.;
374 int M = cp9->M; /* for convenience */
375
376 bytes += sizeof(CP9_t);
377 if(cp9->M > 0 && cp9->abc != NULL) {
378 /* from AllocCPlan9Body() */
379 bytes += sizeof(float *) * (M+1); /* hmm->t */
380 bytes += sizeof(float *) * (M+1); /* hmm->mat */
381 bytes += sizeof(float *) * (M+1); /* hmm->ins */
382 bytes += sizeof(float) * (cp9_NTRANS*(M+1)); /* hmm->t[0] */
383 bytes += sizeof(float) * (cp9->abc->K*(M+1)); /* hmm->mat[0] */
384 bytes += sizeof(float) * (cp9->abc->K*(M+1)); /* hmm->ins[0] */
385
386 bytes += sizeof(int *) * cp9_NTRANS; /* hmm->tsc */
387 bytes += sizeof(int *) * cp9->abc->Kp; /* hmm->msc */
388 bytes += sizeof(int *) * cp9->abc->Kp; /* hmm->isc */
389 bytes += sizeof(int) * (cp9_NTRANS*(M+1)); /* hmm->tsc_mem */
390 bytes += sizeof(int) * (cp9->abc->Kp*(M+1)); /* hmm->msc_mem */
391 bytes += sizeof(int) * (cp9->abc->Kp*(M+1)); /* hmm->isc_mem */
392
393 bytes += sizeof(int) * (M+1) * cp9O_NTRANS; /* hmm->otsc */
394
395 bytes += sizeof(float) * (M+1); /* hmm->begin */
396 bytes += sizeof(float) * (M+1); /* hmm->end */
397 bytes += sizeof(int) * (M+1); /* hmm->bsc_mem */
398 bytes += sizeof(int) * (M+1); /* hmm->esc_mem */
399 bytes += sizeof(float) * (cp9->abc->K); /* hmm->null */
400
401 bytes += sizeof(int) * (M+1); /* hmm->has_el */
402 bytes += sizeof(int) * (M+2); /* hmm->el_from_ct */
403 bytes += sizeof(int *) * (M+2); /* hmm->el_from_idx */
404 bytes += sizeof(int *) * (M+2); /* hmm->el_from_cmnd */
405
406 int k;
407 for(k = 0; k <= cp9->M+1; k++) {
408 if(cp9->el_from_ct[k] > 0) {
409 bytes += sizeof(int) * cp9->el_from_ct[k]; /* hmm->el_from_idx */
410 bytes += sizeof(int) * cp9->el_from_ct[k]; /* hmm->el_from_cmnd */
411 }
412 }
413 }
414
415 return (bytes / 1000000.);
416 }
417
418 /* Function: cp9_GetNCalcsPerResidue()
419 * Date: EPN, Thu Jan 17 06:12:37 2008
420 *
421 * Returns: eslOK on success, eslEINCOMPAT on contract violation.
422 * <ret_cp9_ncalcs_per_res> set as millions of DP calculations
423 * per residue for the CP9 HMM.
424 */
425 int
cp9_GetNCalcsPerResidue(CP9_t * cp9,char * errbuf,float * ret_cp9_ncalcs_per_res)426 cp9_GetNCalcsPerResidue(CP9_t *cp9, char *errbuf, float *ret_cp9_ncalcs_per_res)
427 {
428 int cp9_ntrans;
429 float cp9_ncalcs_per_res;
430
431 if(cp9 == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_GetNCalcsPerRes(), cp9 == NULL.");
432 if(ret_cp9_ncalcs_per_res == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_GetNCalcsPerRes(), ret_cp9_ncalcs_per_res == NULL.");
433
434 /* determine millions of CP9 DP calcs per residue */
435 cp9_ntrans = NHMMSTATETYPES * NHMMSTATETYPES; /* 3*3 = 9 transitions in global mode */
436 if(cp9->flags & CPLAN9_LOCAL_BEGIN) cp9_ntrans++;
437 if(cp9->flags & CPLAN9_LOCAL_END) cp9_ntrans++;
438 if(cp9->flags & CPLAN9_EL) cp9_ntrans++;
439 cp9_ncalcs_per_res = (cp9_ntrans * cp9->M) / 1000000.; /* convert to millions of calcs per residue */
440
441 *ret_cp9_ncalcs_per_res = cp9_ncalcs_per_res;
442 return eslOK;
443 }
444
445
446 /* Function: CP9Logoddsify()
447 *
448 * Purpose: Take an HMM with valid probabilities, and
449 * fill in the integer log-odds score section of the model.
450 *
451 * Notes on log-odds scores (simplified from plan7.c):
452 * type of parameter probability score
453 * ----------------- ----------- ------
454 * any emission p_x log_2 p_x/null_x
455 * any transition t_x log_2 t_x
456 *
457 * Args: hmm - the hmm to calculate scores in.
458 *
459 * Return: (void)
460 * hmm scores are filled in.
461 */
462 void
CP9Logoddsify(CP9_t * hmm)463 CP9Logoddsify(CP9_t *hmm)
464 {
465 /*printf("in CP9Logoddsify()\n");*/
466 int k; /* counter for model position */
467 int x; /* counter for symbols */
468 int *sc;
469 int status;
470
471 if (hmm->flags & CPLAN9_HASBITS) return;
472
473 ESL_ALLOC(sc, hmm->abc->Kp * sizeof(int));
474
475 /* Symbol emission scores
476 */
477
478 sc[hmm->abc->K] = -INFTY; /* gap character */
479 sc[hmm->abc->Kp-1] = -INFTY; /* missing data character */
480 sc[hmm->abc->Kp-2] = -INFTY; /* non-residue data character */
481
482 /* Insert emission scores, relies on sc[K, Kp-1] initialization to -inf above */
483 for (k = 0; k <= hmm->M; k++) {
484 for (x = 0; x < hmm->abc->K; x++)
485 sc[x] = Prob2Score(hmm->ins[k][x], hmm->null[x]);
486 esl_abc_IExpectScVec(hmm->abc, sc, hmm->null);
487 for (x = 0; x < hmm->abc->Kp; x++) {
488 hmm->isc[x][k] = sc[x];
489 }
490 }
491
492 /* Match emission scores, relies on sc[K, Kp-1] initialization to -inf above */
493 for (k = 1; k <= hmm->M; k++) {
494 for (x = 0; x < hmm->abc->K; x++)
495 sc[x] = Prob2Score(hmm->mat[k][x], hmm->null[x]);
496 esl_abc_IExpectScVec(hmm->abc, sc, hmm->null);
497 for (x = 0; x < hmm->abc->Kp; x++) {
498 hmm->msc[x][k] = sc[x];
499 }
500 }
501
502 for (k = 0; k <= hmm->M; k++)
503 {
504 hmm->tsc[CTMM][k] = Prob2Score(hmm->t[k][CTMM], 1.0);
505 hmm->tsc[CTMI][k] = Prob2Score(hmm->t[k][CTMI], 1.0);
506 hmm->tsc[CTMD][k] = Prob2Score(hmm->t[k][CTMD], 1.0);
507 hmm->tsc[CTMEL][k] = Prob2Score(hmm->t[k][CTMEL], 1.0);
508 hmm->tsc[CTIM][k] = Prob2Score(hmm->t[k][CTIM], 1.0);
509 hmm->tsc[CTII][k] = Prob2Score(hmm->t[k][CTII], 1.0);
510 hmm->tsc[CTID][k] = Prob2Score(hmm->t[k][CTID], 1.0);
511 if(k != 0)
512 {
513 hmm->tsc[CTDM][k] = Prob2Score(hmm->t[k][CTDM], 1.0);
514 hmm->tsc[CTDI][k] = Prob2Score(hmm->t[k][CTDI], 1.0);
515 hmm->tsc[CTDD][k] = Prob2Score(hmm->t[k][CTDD], 1.0);
516 }
517 else
518 {
519 hmm->tsc[CTDM][k] = -INFTY;
520 hmm->tsc[CTDD][k] = -INFTY; /*D_0 doesn't exist*/
521 hmm->tsc[CTDI][k] = -INFTY;
522 }
523 if(k != 0)
524 {
525 hmm->bsc[k] = Prob2Score(hmm->begin[k], 1.0);
526 hmm->esc[k] = Prob2Score(hmm->end[k], 1.0);
527 }
528 }
529 hmm->el_selfsc = Prob2Score(hmm->el_self, 1.0);
530
531 /* Finally, fill the efficiently reordered transition scores for this HMM. */
532 for (k = 0 ; k <= hmm->M; k++) {
533 int *otsc_k = hmm->otsc + k*cp9O_NTRANS;
534 otsc_k[cp9O_MM] = hmm->tsc[CTMM][k];
535 otsc_k[cp9O_MI] = hmm->tsc[CTMI][k];
536 otsc_k[cp9O_MD] = hmm->tsc[CTMD][k];
537 otsc_k[cp9O_IM] = hmm->tsc[CTIM][k];
538 otsc_k[cp9O_II] = hmm->tsc[CTII][k];
539 otsc_k[cp9O_DM] = hmm->tsc[CTDM][k];
540 otsc_k[cp9O_DD] = hmm->tsc[CTDD][k];
541 otsc_k[cp9O_ID] = hmm->tsc[CTID][k];
542 otsc_k[cp9O_DI] = hmm->tsc[CTDI][k];
543 otsc_k[cp9O_BM] = hmm->bsc[k];
544 otsc_k[cp9O_MEL]= hmm->tsc[CTMEL][k];
545 otsc_k[cp9O_ME] = hmm->esc[k];
546 }
547
548 hmm->flags |= CPLAN9_HASBITS; /* raise the log-odds ready flag */
549
550 free(sc);
551
552 return;
553
554 ERROR:
555 cm_Fail("Memory allocation error.\n");
556 return; /* never reached */
557 }
558
559 /* Function: CPlan9Renormalize()
560 *
561 * Purpose: Take an HMM in counts form, and renormalize
562 * all of its probability vectors. Also enforces
563 * CM Plan9 restrictions on nonexistent transitions.
564 *
565 * Args: hmm - the model to renormalize.
566 *
567 * Return: (void)
568 * hmm is changed.
569 */
570 void
CPlan9Renormalize(CP9_t * hmm)571 CPlan9Renormalize(CP9_t *hmm)
572 {
573 int k; /* counter for model position */
574 float d; /* denominator */
575
576 /* match emissions */
577 esl_vec_FSet(hmm->mat[0], hmm->abc->K, 0.); /*M_0 is B state, non-emitter*/
578 for (k = 1; k <= hmm->M; k++)
579 esl_vec_FNorm(hmm->mat[k], hmm->abc->K);
580 /* insert emissions */
581 for (k = 0; k <= hmm->M; k++)
582 esl_vec_FNorm(hmm->ins[k], hmm->abc->K);
583
584 /* begin transitions */
585 d = esl_vec_FSum(hmm->begin+1, hmm->M) + hmm->t[0][CTMI] + hmm->t[0][CTMD] + hmm->t[0][CTMEL];
586 /* hmm->t[0][CTMEL] should always be 0., can't local end from the M_0 == B state */
587 esl_vec_FScale(hmm->begin+1, hmm->M, 1./d);
588 hmm->t[0][CTMI] /= d;
589 hmm->t[0][CTMD] /= d;
590 hmm->t[0][CTMEL] /= d;
591
592 esl_vec_FNorm(hmm->t[0] + cp9_TRANS_INSERT_OFFSET, cp9_TRANS_NINSERT); /* transitions out of insert for node 0 (state N)*/
593 esl_vec_FSet (hmm->t[0] + cp9_TRANS_DELETE_OFFSET, cp9_TRANS_NDELETE, 0.);
594 /* main model transitions */
595 for (k = 1; k <= hmm->M; k++) /* safe for node M too, hmm->t[hmm->M][CTMM] should be 0.*/
596 {
597 d = esl_vec_FSum(hmm->t[k], cp9_TRANS_NMATCH) + hmm->end[k];
598 esl_vec_FScale(hmm->t[k], cp9_TRANS_NMATCH, 1./d);
599 hmm->end[k] /= d;
600
601 esl_vec_FNorm(hmm->t[k] + cp9_TRANS_INSERT_OFFSET, cp9_TRANS_NINSERT); /* insert */
602 esl_vec_FNorm(hmm->t[k] + cp9_TRANS_DELETE_OFFSET, cp9_TRANS_NDELETE); /* delete */
603 }
604 /* null model emissions */
605 esl_vec_FNorm(hmm->null, hmm->abc->K);
606
607 hmm->flags &= ~CPLAN9_HASBITS; /* clear the log-odds ready flag */
608 hmm->flags |= CPLAN9_HASPROB; /* set the probabilities OK flag */
609 }
610