1 /* cp9.c based on HMMER 2.x's plan7.c
2  * EPN 02.27.06
3  *
4  * Support for CM-Plan 9 HMM data structure, CP9_t.
5  *
6  * All of the CP9 code is based on analogous HMMER 2.x Plan 7 HMM
7  * code.
8  *
9  */
10 
11 #include "esl_config.h"
12 #include "p7_config.h"
13 #include "config.h"
14 
15 #include <stdio.h>
16 #include <string.h>
17 #include <ctype.h>
18 #include <time.h>
19 #include <math.h>
20 
21 #include "easel.h"
22 #include "esl_msa.h"
23 #include "esl_vectorops.h"
24 
25 #include "hmmer.h"
26 
27 #include "infernal.h"
28 
29 /* Functions: AllocCPlan9(), AllocCPlan9Shell(), AllocCPlan9Body(), FreeCPlan9()
30  *
31  * Purpose:   Allocate or free a CPlan9 HMM structure.
32  *            Can either allocate all at once (AllocCPlan9()) or
33  *            in two steps (AllocCPlan9Shell(), AllocCPlan9Body()).
34  *            The two step method is used in CP9_hmmio.c where we start
35  *            parsing the header of an HMM file but don't
36  *            see the size of the model 'til partway thru the header.
37  */
38 CP9_t *
AllocCPlan9(int M,const ESL_ALPHABET * abc)39 AllocCPlan9(int M, const ESL_ALPHABET *abc)
40 {
41   CP9_t *hmm;
42 
43   hmm = AllocCPlan9Shell();
44   AllocCPlan9Body(hmm, M, abc);
45   return hmm;
46 }
47 CP9_t *
AllocCPlan9Shell(void)48 AllocCPlan9Shell(void)
49 {
50   int    status;
51   CP9_t *hmm;
52 
53   ESL_ALLOC(hmm, sizeof(CP9_t));
54   hmm->abc = NULL;
55 
56   hmm->M = 0;
57 
58   hmm->t      = NULL;
59   hmm->mat    = NULL;
60   hmm->ins    = NULL;
61 
62   hmm->tsc     = hmm->msc     = hmm->isc     = NULL;
63   hmm->tsc_mem = hmm->msc_mem = hmm->isc_mem = NULL;
64 
65   hmm->begin  = NULL;
66   hmm->end    = NULL;
67 
68   hmm->bsc = hmm->bsc_mem = NULL;
69   hmm->esc = hmm->esc_mem = NULL;
70 
71   hmm->otsc = NULL;
72 
73   hmm->has_el      = NULL;
74   hmm->el_from_ct  = NULL;
75   hmm->el_from_idx = NULL;
76   hmm->el_from_cmnd= NULL;
77 
78   hmm->flags = 0;
79   return hmm;
80 
81  ERROR:
82   cm_Fail("Memory allocation error.\n");
83   return NULL; /* never reached */
84 }
85 
86 void
AllocCPlan9Body(CP9_t * hmm,int M,const ESL_ALPHABET * abc)87 AllocCPlan9Body(CP9_t *hmm, int M, const ESL_ALPHABET *abc)
88 {
89   int status;
90   int k, x;
91 
92   hmm->abc = abc;
93 
94   hmm->M = M;
95 
96   ESL_ALLOC(hmm->t,   (M+1) *           sizeof(float *));
97   ESL_ALLOC(hmm->mat, (M+1) *           sizeof(float *));
98   ESL_ALLOC(hmm->ins, (M+1) *           sizeof(float *));
99   ESL_ALLOC(hmm->t[0],(cp9_NTRANS*(M+1)) *  sizeof(float));
100   ESL_ALLOC(hmm->mat[0],(abc->K*(M+1))   * sizeof(float));
101   ESL_ALLOC(hmm->ins[0],(abc->K*(M+1))   * sizeof(float));
102 
103   ESL_ALLOC(hmm->tsc, cp9_NTRANS     *   sizeof(int *));
104   ESL_ALLOC(hmm->msc, hmm->abc->Kp   *   sizeof(int *));
105   ESL_ALLOC(hmm->isc, hmm->abc->Kp   *   sizeof(int *));
106   ESL_ALLOC(hmm->tsc_mem,(cp9_NTRANS*(M+1))     *       sizeof(int));
107   ESL_ALLOC(hmm->msc_mem,(hmm->abc->Kp*(M+1)) * sizeof(int));
108   ESL_ALLOC(hmm->isc_mem,(hmm->abc->Kp*(M+1)) * sizeof(int));
109 
110   hmm->tsc[0] = hmm->tsc_mem;
111   hmm->msc[0] = hmm->msc_mem;
112   hmm->isc[0] = hmm->isc_mem;
113 
114   /* transition scores reordered */
115   ESL_ALLOC(hmm->otsc, sizeof(int)   * (M+1)  * cp9O_NTRANS);
116 
117   /* note allocation strategy for important 2D arrays -- trying
118    * to keep locality as much as possible, cache efficiency etc.
119    */
120   for (k = 1; k <= M; k++) {
121     hmm->mat[k] = hmm->mat[0] + k * abc->K;
122     hmm->ins[k] = hmm->ins[0] + k * abc->K;
123     hmm->t[k]   = hmm->t[0]   + k * cp9_NTRANS;
124   }
125   for (x = 1; x < hmm->abc->Kp; x++) {
126     hmm->msc[x] = hmm->msc[0] + x * (M+1);
127     hmm->isc[x] = hmm->isc[0] + x * (M+1);
128   }
129   for (x = 0; x < cp9_NTRANS; x++)
130     hmm->tsc[x] = hmm->tsc[0] + x * (M+1);
131 
132   /* tsc[x][0] is used as a boundary condition sometimes [Viterbi()],
133    * so set to -inf always.
134    */
135   for (x = 0; x < cp9_NTRANS; x++)
136     hmm->tsc[x][0] = -INFTY;
137 
138   ESL_ALLOC(hmm->begin, (M+1) * sizeof(float));
139   ESL_ALLOC(hmm->end,   (M+1) * sizeof(float));
140 
141   ESL_ALLOC(hmm->bsc_mem, (M+1) * sizeof(int));
142   ESL_ALLOC(hmm->esc_mem, (M+1) * sizeof(int));
143 
144   ESL_ALLOC(hmm->null, (abc->K) * sizeof(float));
145 
146   hmm->bsc = hmm->bsc_mem;
147   hmm->esc = hmm->esc_mem;
148 
149   /* end[0], begin[0], esc[0] and bsc[0] are never
150    * used, set them to 0. and -INFTY */
151   hmm->end[0] = hmm->begin[0] = -INFTY;
152   hmm->esc[0] = hmm->bsc[0] = -INFTY;
153 
154   ESL_ALLOC(hmm->has_el,     (M+1) * sizeof(int));
155   ESL_ALLOC(hmm->el_from_ct, (M+2) * sizeof(int));
156   ESL_ALLOC(hmm->el_from_idx,(M+2) * sizeof(int *));
157   ESL_ALLOC(hmm->el_from_cmnd,(M+2) * sizeof(int *));
158   esl_vec_ISet(hmm->has_el,     M+1, FALSE);
159   esl_vec_ISet(hmm->el_from_ct, M+1, 0);
160   for(k = 0; k <= M+1; k++) {
161     hmm->el_from_idx[k] = NULL;
162     hmm->el_from_cmnd[k] = NULL;
163   }
164 
165   return;
166  ERROR:
167   cm_Fail("Memory allocation error.");
168 }
169 
170 
171 void
FreeCPlan9(CP9_t * hmm)172 FreeCPlan9(CP9_t *hmm)
173 {
174   int k;
175   if (hmm->null       != NULL) free(hmm->null);
176   if (hmm->bsc_mem    != NULL) free(hmm->bsc_mem);
177   if (hmm->begin      != NULL) free(hmm->begin);
178   if (hmm->esc_mem    != NULL) free(hmm->esc_mem);
179   if (hmm->end        != NULL) free(hmm->end);
180   if (hmm->msc_mem    != NULL) free(hmm->msc_mem);
181   if (hmm->isc_mem    != NULL) free(hmm->isc_mem);
182   if (hmm->tsc_mem    != NULL) free(hmm->tsc_mem);
183   if (hmm->mat        != NULL) free(hmm->mat[0]);
184   if (hmm->ins        != NULL) free(hmm->ins[0]);
185   if (hmm->t          != NULL) free(hmm->t[0]);
186   if (hmm->msc        != NULL) free(hmm->msc);
187   if (hmm->isc        != NULL) free(hmm->isc);
188   if (hmm->tsc        != NULL) free(hmm->tsc);
189   if (hmm->otsc       != NULL) free(hmm->otsc);
190   if (hmm->mat        != NULL) free(hmm->mat);
191   if (hmm->ins        != NULL) free(hmm->ins);
192   if (hmm->t          != NULL) free(hmm->t);
193   if (hmm->has_el     != NULL) free(hmm->has_el);
194   if (hmm->el_from_ct != NULL) free(hmm->el_from_ct);
195   if(hmm->el_from_idx != NULL)
196     {
197       for(k = 0; k <= hmm->M+1; k++)
198 	if(hmm->el_from_idx[k] != NULL)
199 	  free(hmm->el_from_idx[k]);
200       free(hmm->el_from_idx);
201     }
202   if(hmm->el_from_cmnd != NULL)
203     {
204       for(k = 0; k <= hmm->M+1; k++)
205 	if(hmm->el_from_cmnd[k] != NULL)
206 	  free(hmm->el_from_cmnd[k]);
207       free(hmm->el_from_cmnd);
208     }
209 
210   free(hmm);
211 }
212 
213 /* Function: ZeroCPlan9()
214  *
215  * Purpose:  Zeros the counts/probabilities fields in a model.
216  *           Leaves null model untouched.
217  */
218 void
ZeroCPlan9(CP9_t * hmm)219 ZeroCPlan9(CP9_t *hmm)
220 {
221   int k;
222   esl_vec_FSet(hmm->ins[0], hmm->abc->K, 0.);
223   esl_vec_FSet(hmm->t[0], cp9_NTRANS, 0.);
224   for (k = 1; k <= hmm->M; k++)
225     {
226       esl_vec_FSet(hmm->t[k], cp9_NTRANS, 0.);
227       esl_vec_FSet(hmm->mat[k], hmm->abc->K, 0.);
228       esl_vec_FSet(hmm->ins[k], hmm->abc->K, 0.);
229     }
230   esl_vec_FSet(hmm->begin+1, hmm->M, 0.);
231   esl_vec_FSet(hmm->end+1, hmm->M, 0.);
232 
233   /* initialize the el_* data structures, these
234    * depend on the CM guide tree and will be set
235    * when the CP9 is constructed from the CM.
236    */
237   for (k = 0; k <= (hmm->M); k++)
238     {
239       hmm->has_el[k]      = FALSE;
240       hmm->el_from_ct[k]  = 0;
241       hmm->el_from_idx[k] = NULL;
242       hmm->el_from_cmnd[k] = NULL;
243     }
244   /* special case hmm->M+1 corresponds to the E state here */
245   hmm->el_from_ct[(hmm->M+1)]  = 0;
246   hmm->el_from_idx[(hmm->M+1)] = NULL;
247   hmm->el_from_cmnd[(hmm->M+1)] = NULL;
248 
249   hmm->flags &= ~CPLAN9_HASBITS;	/* invalidates scores */
250   hmm->flags &= ~CPLAN9_HASPROB;	/* invalidates probabilities */
251   hmm->el_self = 0.; /* EL self transition probability */
252 }
253 
254 
255 /* Function: CPlan9SetNullModel()
256  *
257  * Purpose:  Set the null model section of an HMM.
258  *           Convenience function.
259  *
260  *            Assumes null* is allocated to hmm->abc->K
261  */
262 void
CPlan9SetNullModel(CP9_t * hmm,float * null,float p1)263 CPlan9SetNullModel(CP9_t *hmm, float *null, float p1)
264 {
265   int x;
266   for (x = 0; x < hmm->abc->K; x++)
267     hmm->null[x] = null[x];
268   hmm->p1 = p1;
269 }
270 
271 /* Function: cp9_Clone()
272  * Date:     EPN, Thu Dec  1 10:30:18 2011
273  * Purpose:  Clone a CP9 HMM CP9_t object
274  */
275 CP9_t *
cp9_Clone(CP9_t * cp9)276 cp9_Clone(CP9_t *cp9)
277 {
278   CP9_t *cp9dup = NULL;
279   int    status;
280 
281   if ((cp9dup = AllocCPlan9(cp9->M, cp9->abc)) == NULL) return NULL;
282   if ((status = cp9_Copy(cp9, cp9dup)) != eslOK) goto ERROR;
283   return cp9dup;
284 
285  ERROR:
286   FreeCPlan9(cp9dup);
287   return NULL;
288 }
289 
290 
291 /* Function:  cp9_Copy()
292  * Synopsis:  Copy a CM plan 9 HMM.
293  *
294  * Purpose:   Copies cp9 hmm <src> to cp9 hmm <dst>, where <dst>
295  *            has already been allocated to be of sufficient size.
296  *
297  *            <src> should be properly normalized, no check is done to
298  *            ensure that. If <src> is logoddsified (src->flags &
299  *            CPLAN9_HASBITS) its bit scores will be copied to <dst>,
300  *            otherwise they are invalid and won't be copied.
301  *
302  * Returns:   <eslOK> on success.
303  *
304  * Throws:    <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small
305  *            to fit <src>.
306  */
307 int
cp9_Copy(const CP9_t * src,CP9_t * dst)308 cp9_Copy(const CP9_t *src, CP9_t *dst)
309 {
310   int status;
311   int k;
312   int src_has_bits = (src->flags & CPLAN9_HASBITS) ? TRUE : FALSE;
313 
314   if (src->M != dst->M) return eslEINVAL;
315 
316   dst->abc = src->abc;
317 
318   for(k = 0; k <= src->M; k++) {
319     esl_vec_FCopy(src->t[k],   cp9_NTRANS,  dst->t[k]);
320     esl_vec_FCopy(src->mat[k], src->abc->K, dst->mat[k]);
321     esl_vec_FCopy(src->ins[k], src->abc->K, dst->ins[k]);
322   }
323   esl_vec_FCopy(src->begin, src->M+1, dst->begin);
324   esl_vec_FCopy(src->end,   src->M+1, dst->end);
325   if(src_has_bits) {
326     esl_vec_ICopy(src->bsc_mem, src->M+1, dst->bsc_mem);
327     esl_vec_ICopy(src->esc_mem, src->M+1, dst->esc_mem);
328   }
329 
330   /* exploit linear-memory of these 2d arrays */
331   if(src_has_bits) {
332     esl_vec_ICopy(src->tsc_mem, cp9_NTRANS   * (src->M+1), dst->tsc_mem);
333     esl_vec_ICopy(src->msc_mem, src->abc->Kp * (src->M+1), dst->msc_mem);
334     esl_vec_ICopy(src->isc_mem, src->abc->Kp * (src->M+1), dst->isc_mem);
335     esl_vec_ICopy(src->otsc,    cp9O_NTRANS  * (src->M+1), dst->otsc);
336   }
337 
338   /* EL info */
339   dst->el_self     = src->el_self;
340   dst->el_selfsc   = src->el_selfsc;
341   esl_vec_ICopy(src->has_el,     src->M+1,    dst->has_el);
342   esl_vec_ICopy(src->el_from_ct, src->M+2,    dst->el_from_ct);
343   for(k = 0; k <= src->M+1; k++) {
344     if(src->el_from_ct[k] > 0) {
345       ESL_ALLOC(dst->el_from_idx[k],  sizeof(int) * src->el_from_ct[k]);
346       ESL_ALLOC(dst->el_from_cmnd[k], sizeof(int) * src->el_from_ct[k]);
347       esl_vec_ICopy(src->el_from_idx[k],  src->el_from_ct[k], dst->el_from_idx[k]);
348       esl_vec_ICopy(src->el_from_cmnd[k], src->el_from_ct[k], dst->el_from_cmnd[k]);
349     }
350   }
351 
352   dst->null2_omega = src->null2_omega;
353   dst->null3_omega = src->null3_omega;
354   esl_vec_FCopy(src->null, src->abc->K, dst->null);
355 
356   dst->p1    = src->p1;
357   dst->flags = src->flags;
358 
359   return eslOK;
360 
361  ERROR:
362   return status;
363 }
364 
365 
366 /* Function: cp9_Sizeof()
367  * Date:     EPN, Wed Jan 18 05:46:02 2012
368  * Purpose:  Return size of a CP9_t object, in Mb.
369  */
370 float
cp9_Sizeof(CP9_t * cp9)371 cp9_Sizeof(CP9_t *cp9)
372 {
373   float bytes = 0.;
374   int   M = cp9->M; /* for convenience */
375 
376   bytes += sizeof(CP9_t);
377   if(cp9->M > 0 && cp9->abc != NULL) {
378     /* from AllocCPlan9Body() */
379     bytes += sizeof(float *) * (M+1); /* hmm->t */
380     bytes += sizeof(float *) * (M+1); /* hmm->mat */
381     bytes += sizeof(float *) * (M+1); /* hmm->ins */
382     bytes += sizeof(float)   * (cp9_NTRANS*(M+1)); /* hmm->t[0] */
383     bytes += sizeof(float)   * (cp9->abc->K*(M+1));     /* hmm->mat[0] */
384     bytes += sizeof(float)   * (cp9->abc->K*(M+1));     /* hmm->ins[0] */
385 
386     bytes += sizeof(int *) * cp9_NTRANS;   /* hmm->tsc */
387     bytes += sizeof(int *) * cp9->abc->Kp; /* hmm->msc */
388     bytes += sizeof(int *) * cp9->abc->Kp; /* hmm->isc */
389     bytes += sizeof(int)   * (cp9_NTRANS*(M+1)); /* hmm->tsc_mem */
390     bytes += sizeof(int)   * (cp9->abc->Kp*(M+1)); /* hmm->msc_mem */
391     bytes += sizeof(int)   * (cp9->abc->Kp*(M+1)); /* hmm->isc_mem */
392 
393     bytes += sizeof(int)   * (M+1) * cp9O_NTRANS; /* hmm->otsc */
394 
395     bytes += sizeof(float) * (M+1); /* hmm->begin */
396     bytes += sizeof(float) * (M+1); /* hmm->end   */
397     bytes += sizeof(int)   * (M+1); /* hmm->bsc_mem */
398     bytes += sizeof(int)   * (M+1); /* hmm->esc_mem */
399     bytes += sizeof(float) * (cp9->abc->K); /* hmm->null */
400 
401     bytes += sizeof(int)   * (M+1); /* hmm->has_el */
402     bytes += sizeof(int)   * (M+2); /* hmm->el_from_ct */
403     bytes += sizeof(int *) * (M+2); /* hmm->el_from_idx */
404     bytes += sizeof(int *) * (M+2); /* hmm->el_from_cmnd */
405 
406     int k;
407     for(k = 0; k <= cp9->M+1; k++) {
408       if(cp9->el_from_ct[k] > 0) {
409 	bytes += sizeof(int) * cp9->el_from_ct[k]; /* hmm->el_from_idx */
410 	bytes += sizeof(int) * cp9->el_from_ct[k]; /* hmm->el_from_cmnd */
411       }
412     }
413   }
414 
415   return (bytes / 1000000.);
416 }
417 
418 /* Function: cp9_GetNCalcsPerResidue()
419  * Date:     EPN, Thu Jan 17 06:12:37 2008
420  *
421  * Returns: eslOK on success, eslEINCOMPAT on contract violation.
422  *          <ret_cp9_ncalcs_per_res> set as millions of DP calculations
423  *          per residue for the CP9 HMM.
424  */
425 int
cp9_GetNCalcsPerResidue(CP9_t * cp9,char * errbuf,float * ret_cp9_ncalcs_per_res)426 cp9_GetNCalcsPerResidue(CP9_t *cp9, char *errbuf, float *ret_cp9_ncalcs_per_res)
427 {
428   int cp9_ntrans;
429   float cp9_ncalcs_per_res;
430 
431   if(cp9 == NULL)                    ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_GetNCalcsPerRes(), cp9 == NULL.");
432   if(ret_cp9_ncalcs_per_res == NULL) ESL_FAIL(eslEINCOMPAT, errbuf, "cp9_GetNCalcsPerRes(), ret_cp9_ncalcs_per_res == NULL.");
433 
434   /* determine millions of CP9 DP calcs per residue */
435   cp9_ntrans = NHMMSTATETYPES * NHMMSTATETYPES; /* 3*3 = 9 transitions in global mode */
436   if(cp9->flags & CPLAN9_LOCAL_BEGIN) cp9_ntrans++;
437   if(cp9->flags & CPLAN9_LOCAL_END)   cp9_ntrans++;
438   if(cp9->flags & CPLAN9_EL)          cp9_ntrans++;
439   cp9_ncalcs_per_res = (cp9_ntrans * cp9->M) / 1000000.; /* convert to millions of calcs per residue */
440 
441   *ret_cp9_ncalcs_per_res = cp9_ncalcs_per_res;
442   return eslOK;
443 }
444 
445 
446 /* Function: CP9Logoddsify()
447  *
448  * Purpose:  Take an HMM with valid probabilities, and
449  *           fill in the integer log-odds score section of the model.
450  *
451  *    Notes on log-odds scores (simplified from plan7.c):
452  *         type of parameter       probability        score
453  *         -----------------       -----------        ------
454  *         any emission             p_x           log_2 p_x/null_x
455  *         any transition           t_x           log_2 t_x
456  *
457  * Args:      hmm          - the hmm to calculate scores in.
458  *
459  * Return:    (void)
460  *            hmm scores are filled in.
461  */
462 void
CP9Logoddsify(CP9_t * hmm)463 CP9Logoddsify(CP9_t *hmm)
464 {
465   /*printf("in CP9Logoddsify()\n");*/
466   int k;			/* counter for model position */
467   int x;			/* counter for symbols        */
468   int *sc;
469   int status;
470 
471   if (hmm->flags & CPLAN9_HASBITS) return;
472 
473   ESL_ALLOC(sc, hmm->abc->Kp * sizeof(int));
474 
475   /* Symbol emission scores
476    */
477 
478   sc[hmm->abc->K]     = -INFTY; /* gap character */
479   sc[hmm->abc->Kp-1]  = -INFTY; /* missing data character */
480   sc[hmm->abc->Kp-2]  = -INFTY; /* non-residue data character */
481 
482   /* Insert emission scores, relies on sc[K, Kp-1] initialization to -inf above */
483   for (k = 0; k <= hmm->M; k++) {
484     for (x = 0; x < hmm->abc->K; x++)
485       sc[x] = Prob2Score(hmm->ins[k][x], hmm->null[x]);
486     esl_abc_IExpectScVec(hmm->abc, sc, hmm->null);
487     for (x = 0; x < hmm->abc->Kp; x++) {
488       hmm->isc[x][k] = sc[x];
489     }
490   }
491 
492   /* Match emission scores, relies on sc[K, Kp-1] initialization to -inf above */
493   for (k = 1; k <= hmm->M; k++) {
494     for (x = 0; x < hmm->abc->K; x++)
495       sc[x] = Prob2Score(hmm->mat[k][x], hmm->null[x]);
496     esl_abc_IExpectScVec(hmm->abc, sc, hmm->null);
497     for (x = 0; x < hmm->abc->Kp; x++) {
498       hmm->msc[x][k] = sc[x];
499     }
500   }
501 
502   for (k = 0; k <= hmm->M; k++)
503     {
504       hmm->tsc[CTMM][k] = Prob2Score(hmm->t[k][CTMM], 1.0);
505       hmm->tsc[CTMI][k] = Prob2Score(hmm->t[k][CTMI], 1.0);
506       hmm->tsc[CTMD][k] = Prob2Score(hmm->t[k][CTMD], 1.0);
507       hmm->tsc[CTMEL][k] = Prob2Score(hmm->t[k][CTMEL], 1.0);
508       hmm->tsc[CTIM][k] = Prob2Score(hmm->t[k][CTIM], 1.0);
509       hmm->tsc[CTII][k] = Prob2Score(hmm->t[k][CTII], 1.0);
510       hmm->tsc[CTID][k] = Prob2Score(hmm->t[k][CTID], 1.0);
511       if(k != 0)
512 	{
513 	  hmm->tsc[CTDM][k] = Prob2Score(hmm->t[k][CTDM], 1.0);
514 	  hmm->tsc[CTDI][k] = Prob2Score(hmm->t[k][CTDI], 1.0);
515 	  hmm->tsc[CTDD][k] = Prob2Score(hmm->t[k][CTDD], 1.0);
516 	}
517       else
518 	{
519 	  hmm->tsc[CTDM][k] = -INFTY;
520 	  hmm->tsc[CTDD][k] = -INFTY; /*D_0 doesn't exist*/
521 	  hmm->tsc[CTDI][k] = -INFTY;
522 	}
523       if(k != 0)
524 	{
525 	  hmm->bsc[k]   = Prob2Score(hmm->begin[k], 1.0);
526 	  hmm->esc[k] = Prob2Score(hmm->end[k], 1.0);
527 	}
528     }
529   hmm->el_selfsc = Prob2Score(hmm->el_self, 1.0);
530 
531   /* Finally, fill the efficiently reordered transition scores for this HMM. */
532   for (k = 0 ; k <= hmm->M; k++) {
533     int *otsc_k = hmm->otsc + k*cp9O_NTRANS;
534     otsc_k[cp9O_MM] = hmm->tsc[CTMM][k];
535     otsc_k[cp9O_MI] = hmm->tsc[CTMI][k];
536     otsc_k[cp9O_MD] = hmm->tsc[CTMD][k];
537     otsc_k[cp9O_IM] = hmm->tsc[CTIM][k];
538     otsc_k[cp9O_II] = hmm->tsc[CTII][k];
539     otsc_k[cp9O_DM] = hmm->tsc[CTDM][k];
540     otsc_k[cp9O_DD] = hmm->tsc[CTDD][k];
541     otsc_k[cp9O_ID] = hmm->tsc[CTID][k];
542     otsc_k[cp9O_DI] = hmm->tsc[CTDI][k];
543     otsc_k[cp9O_BM] = hmm->bsc[k];
544     otsc_k[cp9O_MEL]= hmm->tsc[CTMEL][k];
545     otsc_k[cp9O_ME] = hmm->esc[k];
546   }
547 
548   hmm->flags |= CPLAN9_HASBITS;	/* raise the log-odds ready flag */
549 
550   free(sc);
551 
552   return;
553 
554  ERROR:
555   cm_Fail("Memory allocation error.\n");
556   return; /* never reached */
557 }
558 
559 /* Function: CPlan9Renormalize()
560  *
561  * Purpose:  Take an HMM in counts form, and renormalize
562  *           all of its probability vectors. Also enforces
563  *           CM Plan9 restrictions on nonexistent transitions.
564  *
565  * Args:     hmm - the model to renormalize.
566  *
567  * Return:   (void)
568  *           hmm is changed.
569  */
570 void
CPlan9Renormalize(CP9_t * hmm)571 CPlan9Renormalize(CP9_t *hmm)
572 {
573   int   k;			/* counter for model position */
574   float d;			/* denominator */
575 
576 				/* match emissions */
577   esl_vec_FSet(hmm->mat[0], hmm->abc->K, 0.);   /*M_0 is B state, non-emitter*/
578   for (k = 1; k <= hmm->M; k++)
579     esl_vec_FNorm(hmm->mat[k], hmm->abc->K);
580 				/* insert emissions */
581   for (k = 0; k <= hmm->M; k++)
582     esl_vec_FNorm(hmm->ins[k], hmm->abc->K);
583 
584 				/* begin transitions */
585   d = esl_vec_FSum(hmm->begin+1, hmm->M) + hmm->t[0][CTMI] + hmm->t[0][CTMD] + hmm->t[0][CTMEL];
586   /* hmm->t[0][CTMEL] should always be 0., can't local end from the M_0 == B state */
587   esl_vec_FScale(hmm->begin+1, hmm->M, 1./d);
588   hmm->t[0][CTMI] /= d;
589   hmm->t[0][CTMD] /= d;
590   hmm->t[0][CTMEL] /= d;
591 
592   esl_vec_FNorm(hmm->t[0] + cp9_TRANS_INSERT_OFFSET, cp9_TRANS_NINSERT);	        /* transitions out of insert for node 0 (state N)*/
593   esl_vec_FSet (hmm->t[0] + cp9_TRANS_DELETE_OFFSET, cp9_TRANS_NDELETE, 0.);
594 				/* main model transitions */
595   for (k = 1; k <= hmm->M; k++) /* safe for node M too, hmm->t[hmm->M][CTMM] should be 0.*/
596     {
597       d = esl_vec_FSum(hmm->t[k], cp9_TRANS_NMATCH) + hmm->end[k];
598       esl_vec_FScale(hmm->t[k], cp9_TRANS_NMATCH, 1./d);
599       hmm->end[k] /= d;
600 
601       esl_vec_FNorm(hmm->t[k] + cp9_TRANS_INSERT_OFFSET, cp9_TRANS_NINSERT);	/* insert */
602       esl_vec_FNorm(hmm->t[k] + cp9_TRANS_DELETE_OFFSET, cp9_TRANS_NDELETE);	/* delete */
603     }
604                                  /* null model emissions */
605   esl_vec_FNorm(hmm->null, hmm->abc->K);
606 
607   hmm->flags &= ~CPLAN9_HASBITS;	/* clear the log-odds ready flag */
608   hmm->flags |= CPLAN9_HASPROB;	/* set the probabilities OK flag */
609 }
610