1 /* model.c
2  * Allocation, initialization, free'ing of models.
3  *
4  * Includes code supporting both original node-based CM structure, as well
5  * as the modified, state-based CM structure used by the newer alignment
6  * implementations.
7  *
8  * SRE, Tue Sep  7 09:22:03 1993
9  *
10  */
11 
12 
13 #include <stdlib.h>
14 #include <string.h>
15 #include "structs.h"
16 #include "funcs.h"
17 
18 #ifdef MEMDEBUG
19 #include "dbmalloc.h"
20 #endif
21 
22 static void fill_state(struct istate_s *st, int nodeidx, int statetype, int offset);
23 static void copy_singlet_emissions(struct istate_s *st, double *emvec, double *rfreq);
24 static void copy_pairwise_emissions(struct istate_s *st, double *em, double *rfreq);
25 static void copy_state_transitions(struct istate_s *st, double *tvec, int tflags);
26 static void copy_pstate_transitions(struct pstate_s *st, double *tvec, int tflags);
27 static void fill_pstate(struct pstate_s *st, int nodeidx, int statetype, int offset);
28 
29 /* Function: AllocCM()
30  *
31  * Purpose:  Allocate for a model containing some number of nodes,
32  *           inclusive of root but exclusive of ENDs. Blank the model.
33  *
34  * Args:     nodes - number of nodes to allocate for
35  *
36  * Return:   pointer to the new model. Caller must free, with FreeCM()
37  */
38 struct cm_s *
AllocCM(int nodes)39 AllocCM(int nodes)
40 {
41   struct cm_s *cm;
42   int          k;
43 
44   if ((cm = (struct cm_s *) malloc (sizeof(struct cm_s))) == NULL)
45     Die("malloc failed");
46 
47   cm->nodes  = nodes;
48 
49   if ((cm->nd = (struct node_s *) malloc (nodes * sizeof(struct node_s))) == NULL)
50     Die("malloc failed");
51 
52   /* fast way to wipe everything to zero  */
53   memset(cm->nd, 0, (nodes * sizeof(struct node_s)));
54 
55   /* set all the topology connections to -1 */
56   for (k = 0; k < nodes; k++)
57     cm->nd[k].nxt = cm->nd[k].nxt2 = -1;
58 
59   return cm;
60 }
61 
62 
63 /* Function: FreeCM()
64  *
65  * Purpose:  Free memory allocated to a covariance model.
66  *
67  * Return:   (void)
68  */
69 void
FreeCM(struct cm_s * cm)70 FreeCM(struct cm_s *cm)
71 {
72  free(cm->nd);
73  free(cm);
74 }
75 
76 
77 
78 /* Function: NormalizeCM()
79  *
80  * Purpose:  Normalize all the probability distributions in a model.
81  *           Only normalizes the meaningful ones: i.e., matr_emit
82  *           emission statistics are ignored for MATL_NODEs, etc.
83  *
84  * Return:  (void)
85  */
86 void
NormalizeCM(struct cm_s * cm)87 NormalizeCM(struct cm_s *cm)
88 {
89   int    k;			/* counter over nodes            */
90   int    fy;        		/* from statetype, to statetype  */
91 
92 
93   for (k = 0; k < cm->nodes; k++)
94     {
95       for (fy = 0; fy < STATETYPES; fy++)
96 	DNorm(cm->nd[k].tmx[fy], STATETYPES);       /* state transitions */
97 
98       DNorm((double *) cm->nd[k].mp_emit, ALPHASIZE * ALPHASIZE); /* MATP emissions */
99       DNorm(cm->nd[k].ml_emit, ALPHASIZE);             /* MATL emissions */
100       DNorm(cm->nd[k].mr_emit, ALPHASIZE);             /* MATR emissions */
101       DNorm(cm->nd[k].il_emit, ALPHASIZE);             /* INSL emissions */
102       DNorm(cm->nd[k].ir_emit, ALPHASIZE);             /* INSR emissions */
103     }
104 }
105 
106 
107 
108 /* Function: VerifyCM()
109  *
110  * Purpose:  Have a look at a CM and make sure nothing stupid
111  *           is wrong with it. Returns 0 if something's wrong
112  *           and writes diagnostics to stderr. Returns 1 if
113  *           everything looks OK.
114  */
115 int
VerifyCM(struct cm_s * cm)116 VerifyCM(struct cm_s *cm)
117 {
118   int status = 1;
119   int    k;			/* counter over nodes            */
120 
121   for (k = 0; k < cm->nodes; k++)
122     {
123       if (cm->nd[k].type < 0 || cm->nd[k].type >= NODETYPES)
124 	{
125 	  status = 0;
126 	  fprintf(stderr, "Node %d has invalid type %d\n", k, cm->nd[k].type);
127 	}
128 
129       if ((cm->nd[k].nxt <= k && cm->nd[k].nxt != -1) ||
130 	  (cm->nd[k].nxt >= cm->nodes))
131 	{
132 	  status = 0;
133 	  fprintf(stderr, "Node %d points to invalid left child %d\n", k, cm->nd[k].nxt);
134 	}
135 
136       if ((cm->nd[k].nxt2 <= k && cm->nd[k].nxt2 != -1) ||
137 	  (cm->nd[k].nxt2 >= cm->nodes))
138 	{
139 	  status = 0;
140 	  fprintf(stderr, "Node %d points to invalid right child %d\n", k, cm->nd[k].nxt2);
141 	}
142     }
143   return status;
144 }
145 
146 
147 /* Function: RearrangeCM()
148  *
149  * Purpose:  Convert a cm into an "integer cm", a specialized structure
150  *           used only in the alignment algorithms.
151  *
152  *           The integer CM is an array of istate_s structures; i.e., rather
153  *           than a node-oriented form, a state-oriented form. Rearrange
154  *           transition tables to optimize the recursion in recurse_mx().
155  *
156  *           The node is expanded into states in proper order (uDEL_ST,
157  *           uMATP_ST, uMATL_ST, uMATR_ST, uINSL_ST, uINSR_ST). However, the
158  *           state transition vectors are rearranged such that INSL, INSR
159  *           are the first elements.
160  *
161  *           Insert states are explicitly assumed to have a zero emission
162  *           score!
163  *
164  * Args:     cm -      a covariance model, probability form
165  *           rfreq  -  frequencies to use as a random model (expected background)
166  *           ret_icm - RETURN: array of istate_s structures for states
167  *                     in model. Contains a state 0 for the root;
168  *                     does not contain anything for the end
169  *           ret_statenum - number of states in ret_icm, 0..statenum-1
170  *
171  * Return:   1 on success, 0 on failure.
172  *           ret_icm is malloc'ed here and must be free'ed by caller;
173  *           use free(*ret_icm).
174  */
175 int
RearrangeCM(struct cm_s * cm,double * rfreq,struct istate_s ** ret_icm,int * ret_statenum)176 RearrangeCM(struct cm_s      *cm,
177 	    double           *rfreq,
178 	    struct istate_s **ret_icm,
179 	    int              *ret_statenum)
180 {
181   struct istate_s *icm;         /* new int-lod states-based model */
182   struct istate_s *smallicm;    /* streamlined (realloc'ed) icm   */
183   struct m2ali_s  *bifstack;    /* pda for deferring bifurc connection assignment */
184   int        bifidx;		/* state index of a BEGINR's parent bifurc        */
185   int        k;			/* counter for nodes              */
186   int        y;			/* counter for states             */
187   int        tflags;		/* flags for which to state transitions are used   */
188   int        fflags;		/* flags for which from state transitions are used */
189   int        offset;		/* offset to next connected state */
190 
191   /* We know we can fit the new model into cm->nodes * STATETYPES
192    * states. We'll give back the excess memory later.
193    */
194   if ((icm = (struct istate_s *) calloc ((cm->nodes * STATETYPES), sizeof(struct istate_s))) == NULL)
195     return 0;
196 
197   bifstack = Init_m2ali();
198 
199   y = 0;
200   for (k = 0; k < cm->nodes; k++)
201     {
202 				/* figure out what we're connected to */
203       if (cm->nd[k].nxt == -1)
204 	tflags = uEND_ST;
205       else
206 	{
207 	  switch (cm->nd[cm->nd[k].nxt].type) {
208 	  case BIFURC_NODE:
209 	    tflags = uBIFURC_ST;
210 	    break;
211 
212 	  case MATP_NODE:
213 	    tflags = uDEL_ST | uMATP_ST | uMATR_ST | uMATL_ST;
214 	    break;
215 
216 	  case MATL_NODE:
217 	    tflags = uDEL_ST | uMATL_ST;
218 	    break;
219 
220 	  case MATR_NODE:
221 	    tflags = uDEL_ST | uMATR_ST;
222 	    break;
223 
224 	  case BEGINL_NODE:
225 	  case BEGINR_NODE:
226 	    tflags = uBEGIN_ST;
227 	    break;
228 
229 	  default: Die("no such node type %d", cm->nd[cm->nd[k].nxt].type);
230 	  }
231 	}
232 
233 				/* figure out what we're coming from */
234       switch (cm->nd[k].type) {
235       case BIFURC_NODE:
236 	fflags = uBIFURC_ST;
237 	offset = 1;
238 	break;
239 
240       case MATP_NODE:
241 	fflags = uDEL_ST | uMATP_ST | uMATL_ST | uMATR_ST | uINSL_ST | uINSR_ST;
242 	tflags |= uINSL_ST | uINSR_ST;
243 	offset = 4;
244 	break;
245 
246       case MATL_NODE:
247 	fflags = uDEL_ST | uMATL_ST | uINSL_ST;
248 	tflags |= uINSL_ST;
249 	offset = 2;
250 	break;
251 
252       case MATR_NODE:
253 	fflags = uDEL_ST | uMATR_ST | uINSR_ST;
254 	tflags |=  uINSR_ST;
255 	offset = 2;
256 	break;
257 
258       case BEGINL_NODE:
259 	fflags = uBEGIN_ST;
260 	offset = 1;
261 	break;
262 
263       case BEGINR_NODE:
264 	fflags  = uBEGIN_ST | uINSL_ST;
265 	offset  = 1;
266 	tflags |= uINSL_ST;
267 	break;
268 
269       case ROOT_NODE:
270 	fflags = uBEGIN_ST | uINSL_ST | uINSR_ST;
271 	offset = 1;
272 	tflags |= uINSL_ST | uINSR_ST;
273 	break;
274 
275       default: Die("No such node type %d\n", cm->nd[k].type);
276 
277       }
278 
279       if (fflags & uDEL_ST)
280 	{
281 	  fill_state(&icm[y],   k, uDEL_ST,  offset);
282 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[DEL_ST], tflags);
283 	  offset--;
284 	  y++;
285 	}
286       else if (fflags & uBIFURC_ST)
287 	{
288 	  fill_state(&icm[y],   k, uBIFURC_ST,  offset);
289 	  /* A hack. tmx[0] gets the state index of the left connected BEGIN
290 	   * child; tmx[1] gets the right connected BEGIN child. The left
291            * child is guaranteed to be the next state, but the assignment
292            * of the right state must be deferred: we push the bifurc state index
293 	   * into a pda
294 	   */
295 	  icm[y].tmx[0] = y+1;
296 	  Push_m2ali(bifstack, y, 0, NULL);
297 	  y++;
298 	}
299       else if (fflags & uBEGIN_ST)
300 	{
301 	  fill_state(&icm[y],   k, uBEGIN_ST,  offset);
302 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[DEL_ST], tflags);
303 
304 	  /* continuation of the above commentary. If we're a right BEGIN_ST,
305 	   * then we pop the state index of our parent bifurc off the pda
306 	   */
307 	  if (cm->nd[k].type == BEGINR_NODE)
308 	    {
309 	      Pop_m2ali(bifstack, &bifidx, (int *) NULL, (struct align_s **) NULL);
310 	      icm[bifidx].tmx[1] = y;
311 	    }
312 
313 	  offset--;
314 	  y++;
315 	}
316 
317       if (fflags & uMATP_ST)
318 	{
319 	  fill_state(&icm[y], k, uMATP_ST, offset);
320 	  copy_pairwise_emissions(&icm[y], (double *) cm->nd[k].mp_emit, rfreq);
321 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[MATP_ST], tflags);
322 	  offset--;
323 	  y++;
324 	}
325 
326       if (fflags & uMATL_ST)
327 	{
328 	  fill_state(&icm[y], k, uMATL_ST, offset);
329 	  copy_singlet_emissions(&icm[y], cm->nd[k].ml_emit, rfreq);
330 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[MATL_ST], tflags);
331 	  offset--;
332 	  y++;
333 	}
334 
335       if (fflags & uMATR_ST)
336 	{
337 	  fill_state(&icm[y], k, uMATR_ST, offset);
338 	  copy_singlet_emissions(&icm[y], cm->nd[k].mr_emit, rfreq);
339 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[MATR_ST], tflags);
340 	  offset--;
341 	  y++;
342 	}
343 
344       if (fflags & uINSL_ST)
345 	{
346 	  fill_state(&icm[y], k, uINSL_ST, 0);
347 	  copy_singlet_emissions(&icm[y], cm->nd[k].il_emit, rfreq);
348 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[INSL_ST], tflags);
349 	  y++;
350 	}
351 
352       if (fflags & uINSR_ST)
353 	{
354 				/* beware an asymmetry: INSR->INSL transits are disallowed */
355 	  fill_state(&icm[y], k, uINSR_ST, 0);
356 	  copy_singlet_emissions(&icm[y], cm->nd[k].ir_emit, rfreq);
357 	  copy_state_transitions(&icm[y], cm->nd[k].tmx[INSR_ST], tflags & ~uINSL_ST);
358 	  y++;
359 	}
360 
361 
362       /* End states must be added
363        */
364       if  (cm->nd[k].nxt == -1)
365 	{
366 	  fill_state(&icm[y], -1, uEND_ST, 0);
367 	  y++;
368 	}
369     } /* end loop over nodes */
370 
371 
372   Free_m2ali(bifstack);
373 
374   /* Return some of the alloc'ed memory
375    */
376   smallicm = (struct istate_s *) realloc (icm, y * sizeof(struct istate_s));
377   *ret_icm = (smallicm != NULL) ? smallicm : icm;
378   *ret_statenum = y;
379   return 1;
380 }
381 
382 
383 
384 /* Function: fill_state()
385  *
386  * Purpose:  fill in values in a state: node index, state type unique
387  *           identifier, offset to first of the next states, and number
388  *           of ynext connections.
389  *
390  *           transition and emission probabilities are dealt with
391  *           elsewhere.
392  *
393  */
394 static void
fill_state(struct istate_s * st,int nodeidx,int statetype,int offset)395 fill_state(struct istate_s *st,
396 	   int              nodeidx,
397 	   int              statetype,
398 	   int              offset)
399 {
400   st->nodeidx    = nodeidx;
401   st->statetype  = statetype;
402   st->offset     = offset;
403 }
404 
405 
406 
407 /* Function: copy_singlet_emissions()
408  *
409  * Purpose:  Copy a singlet emission vector into a state structure,
410  *           converting the probabilities into integer log odds.
411  */
412 static void
copy_singlet_emissions(struct istate_s * st,double * emvec,double * rfreq)413 copy_singlet_emissions(struct istate_s *st, double *emvec, double *rfreq)
414 {
415   int x;
416 
417   for (x = 0; x < ALPHASIZE; x++)
418     st->emit[x] = ILOG2(emvec[x] / rfreq[x]);
419 }
420 
421 
422 
423 
424 /* Function: copy_pairwise_emissions()
425  *
426  * Purpose:  Copy a pairwise emission table into a state structure,
427  *           converting the probabilities into integer log odds.
428  *           Beware the funny business with the pairwise emission
429  *           array; it was mp_emit[4][4], now cast to a pointer,
430  *           and accessed like a vector.
431  */
432 static void
copy_pairwise_emissions(struct istate_s * st,double * em,double * rfreq)433 copy_pairwise_emissions(struct istate_s *st, double *em, double *rfreq)
434 {
435   int x;
436 
437   for (x = 0; x < ALPHASIZE * ALPHASIZE; x++)
438     st->emit[x] = ILOG2(em[x] / (rfreq[x % ALPHASIZE] * rfreq[x / ALPHASIZE]));
439 }
440 
441 
442 
443 
444 /* Function: copy_state_transitions()
445  *
446  * Purpose:  Copy a state transition vector from a CM into a state
447  *           structure, copying only the used state transitions
448  *           as given by tflags. The state transition vector is
449  *           rearranged for an optimization: transits to INSL, INSR
450  *           are placed first.
451  */
452 static void
copy_state_transitions(struct istate_s * st,double * tvec,int tflags)453 copy_state_transitions(struct istate_s *st,
454 		       double          *tvec,
455 		       int              tflags)
456 {
457   int stx;		/* counter for state vector */
458 
459   stx = 0;
460   if (tflags & uINSL_ST)
461     { st->tmx[stx] = ILOG2(tvec[INSL_ST]);  stx++; }
462 
463   if (tflags & uINSR_ST)
464     { st->tmx[stx] = ILOG2(tvec[INSR_ST]);  stx++; }
465 
466   if (tflags & uDEL_ST || tflags & uBIFURC_ST ||
467       tflags & uBEGIN_ST || tflags & uEND_ST)
468     { st->tmx[stx] = ILOG2(tvec[DEL_ST]);   stx++; }
469 
470   if (tflags & uMATP_ST)
471     { st->tmx[stx] = ILOG2(tvec[MATP_ST]);  stx++; }
472 
473   if (tflags & uMATL_ST)
474     { st->tmx[stx] = ILOG2(tvec[MATL_ST]);  stx++; }
475 
476   if (tflags & uMATR_ST)
477     { st->tmx[stx] = ILOG2(tvec[MATR_ST]);  stx++; }
478 
479   st->connectnum = stx;
480 }
481 
482 
483 /* Function: MakePCM()
484  *
485  * Purpose:  Like RearrangeCM(), but leaving the model
486  *           in floating-point probabilities in struct pstate_s
487  *           structures.
488  *
489  * Args:     cm -      a covariance model, probability form
490  *           ret_pcm - RETURN: array of pstate_s structures for states
491  *                     in model. Contains a state 0 for the root.
492  *                     end states are explicit.
493  *           ret_statenum - number of states in ret_pcm, 0..statenum-1
494  *
495  * Return:   1 on success, 0 on failure.
496  *           ret_pcm is malloc'ed here and must be free'ed by caller;
497  *           use free(*ret_pcm).
498  */
499 int
MakePCM(struct cm_s * cm,struct pstate_s ** ret_pcm,int * ret_statenum)500 MakePCM(struct cm_s      *cm,
501 	struct pstate_s **ret_pcm,
502 	int              *ret_statenum)
503 {
504   struct pstate_s *pcm;         /* new states-based model */
505   struct pstate_s *smallpcm;    /* streamlined (realloc'ed) pcm   */
506   struct intstack_s *bifstack;  /* pda for deferring bifurc connection assignment */
507   int        bifidx;		/* state index of a BEGINR's parent bifurc        */
508   int        k;			/* counter for nodes              */
509   int        y;			/* counter for states             */
510   int        tflags;		/* flags for which to state transitions are used   */
511   int        fflags;		/* flags for which from state transitions are used */
512   int        offset;		/* offset to next connected state */
513 
514   /* We know we can fit the new model into cm->nodes * STATETYPES
515    * states. We'll give back the excess memory later.
516    */
517   if ((pcm = (struct pstate_s *) calloc ((cm->nodes * STATETYPES), sizeof(struct pstate_s))) == NULL)
518     return 0;
519 
520   bifstack = InitIntStack();
521 
522   y = 0;
523   for (k = 0; k < cm->nodes; k++)
524     {
525 				/* figure out what we're connected to */
526       if (cm->nd[k].nxt == -1)
527 	tflags = uEND_ST;
528       else
529 	{
530 	  switch (cm->nd[cm->nd[k].nxt].type) {
531 	  case BIFURC_NODE: tflags = uBIFURC_ST; 	                       break;
532 	  case MATP_NODE:   tflags = uDEL_ST | uMATP_ST | uMATR_ST | uMATL_ST; break;
533 	  case MATL_NODE:   tflags = uDEL_ST | uMATL_ST;                       break;
534 	  case MATR_NODE:   tflags = uDEL_ST | uMATR_ST;                       break;
535 	  case BEGINL_NODE: tflags = uBEGIN_ST;                                break;
536 	  case BEGINR_NODE: tflags = uBEGIN_ST;                                break;
537 	  default: Die("no such node type %d", cm->nd[cm->nd[k].nxt].type);
538 	  }
539 	}
540 
541 				/* figure out what we're coming from */
542       switch (cm->nd[k].type) {
543       case BIFURC_NODE:
544 	fflags = uBIFURC_ST;
545 	offset = 1;
546 	break;
547 
548       case MATP_NODE:
549 	fflags = uDEL_ST | uMATP_ST | uMATL_ST | uMATR_ST | uINSL_ST | uINSR_ST;
550 	tflags |= uINSL_ST | uINSR_ST;
551 	offset = 4;
552 	break;
553 
554       case MATL_NODE:
555 	fflags = uDEL_ST | uMATL_ST | uINSL_ST;
556 	tflags |= uINSL_ST;
557 	offset = 2;
558 	break;
559 
560       case MATR_NODE:
561 	fflags = uDEL_ST | uMATR_ST | uINSR_ST;
562 	tflags |=  uINSR_ST;
563 	offset = 2;
564 	break;
565 
566       case BEGINL_NODE:
567 	fflags = uBEGIN_ST;
568 	offset = 1;
569 	break;
570 
571       case BEGINR_NODE:
572 	fflags  = uBEGIN_ST | uINSL_ST;
573 	offset  = 1;
574 	tflags |= uINSL_ST;
575 	break;
576 
577       case ROOT_NODE:
578 	fflags = uBEGIN_ST | uINSL_ST | uINSR_ST;
579 	offset = 1;
580 	tflags |= uINSL_ST | uINSR_ST;
581 	break;
582 
583       default: Die("No such node type %d\n", cm->nd[k].type);
584 
585       }
586 
587       if (fflags & uDEL_ST)
588 	{
589 	  fill_pstate(&pcm[y],   k, uDEL_ST,  offset);
590 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[DEL_ST], tflags);
591 	  offset--;
592 	  y++;
593 	}
594       else if (fflags & uBIFURC_ST)
595 	{
596 	  fill_pstate(&pcm[y],   k, uBIFURC_ST,  offset);
597 				/* We defer the assignment of bifr */
598 	  PushIntStack(bifstack, y);
599 	  y++;
600 	}
601       else if (fflags & uBEGIN_ST)
602 	{
603 	  fill_pstate(&pcm[y],   k, uBEGIN_ST,  offset);
604 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[DEL_ST], tflags);
605 
606 	  /* continuation of the above commentary. If we're a right BEGIN_ST,
607 	   * then we pop the state index of our parent bifurc off the pda
608 	   */
609 	  if (cm->nd[k].type == BEGINR_NODE)
610 	    {
611 	      PopIntStack(bifstack, &bifidx);
612 	      pcm[bifidx].bifr = y;
613 	    }
614 	  offset--;
615 	  y++;
616 	}
617 
618       if (fflags & uMATP_ST)
619 	{
620 	  fill_pstate(&pcm[y], k, uMATP_ST, offset);
621 	  memcpy(pcm[y].emit, cm->nd[k].mp_emit, sizeof(double) * ALPHASIZE * ALPHASIZE);
622 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[MATP_ST], tflags);
623 	  offset--;
624 	  y++;
625 	}
626 
627       if (fflags & uMATL_ST)
628 	{
629 	  fill_pstate(&pcm[y], k, uMATL_ST, offset);
630 	  memcpy(pcm[y].emit, cm->nd[k].ml_emit, sizeof(double) * ALPHASIZE);
631 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[MATL_ST], tflags);
632 	  offset--;
633 	  y++;
634 	}
635 
636       if (fflags & uMATR_ST)
637 	{
638 	  fill_pstate(&pcm[y], k, uMATR_ST, offset);
639 	  memcpy(pcm[y].emit, cm->nd[k].mr_emit, sizeof(double) * ALPHASIZE);
640 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[MATR_ST], tflags);
641 	  offset--;
642 	  y++;
643 	}
644 
645       if (fflags & uINSL_ST)
646 	{
647 	  fill_pstate(&pcm[y], k, uINSL_ST, 0);
648 	  memcpy(pcm[y].emit, cm->nd[k].il_emit, sizeof(double) * ALPHASIZE);
649 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[INSL_ST], tflags);
650 	  y++;
651 	}
652 
653       if (fflags & uINSR_ST)
654 	{
655 				/* beware an asymmetry: INSR->INSL transits are disallowed */
656 	  fill_pstate(&pcm[y], k, uINSR_ST, 0);
657 	  memcpy(pcm[y].emit, cm->nd[k].ir_emit, sizeof(double) * ALPHASIZE);
658 	  copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[INSR_ST], tflags & ~uINSL_ST);
659 	  y++;
660 	}
661 
662 
663       /* End states must be added
664        */
665       if  (cm->nd[k].nxt == -1)
666 	{
667 	  fill_pstate(&pcm[y], -1, uEND_ST, 0);
668 	  y++;
669 	}
670     } /* end loop over nodes */
671 
672 
673   FreeIntStack(bifstack);
674 
675   /* Return some of the alloc'ed memory
676    */
677   smallpcm = (struct pstate_s *) realloc (pcm, y * sizeof(struct pstate_s));
678   *ret_pcm = (smallpcm != NULL) ? smallpcm : pcm;
679   *ret_statenum = y;
680   return 1;
681 }
682 
683 
684 
685 
686 /* Function: copy_pstate_transitions()
687  *
688  * Purpose:  Copy a state transition vector from a CM into a state
689  *           structure, copying only the used state transitions
690  *           as given by tflags. The state transition vector is
691  *           rearranged for an optimization: transits to INSL, INSR
692  *           are placed first.
693  */
694 static void
copy_pstate_transitions(struct pstate_s * st,double * tvec,int tflags)695 copy_pstate_transitions(struct pstate_s *st,
696 		       double          *tvec,
697 		       int              tflags)
698 {
699   int stx;		/* counter for state vector */
700 
701   stx = 0;
702   if (tflags & uINSL_ST)
703     { st->tmx[stx] = tvec[INSL_ST];  stx++; }
704 
705   if (tflags & uINSR_ST)
706     { st->tmx[stx] = tvec[INSR_ST];  stx++; }
707 
708   if (tflags & uDEL_ST || tflags & uBIFURC_ST ||
709       tflags & uBEGIN_ST || tflags & uEND_ST)
710     { st->tmx[stx] = tvec[DEL_ST];   stx++; }
711 
712   if (tflags & uMATP_ST)
713     { st->tmx[stx] = tvec[MATP_ST];  stx++; }
714 
715   if (tflags & uMATL_ST)
716     { st->tmx[stx] = tvec[MATL_ST];  stx++; }
717 
718   if (tflags & uMATR_ST)
719     { st->tmx[stx] = tvec[MATR_ST];  stx++; }
720 
721   st->connectnum = stx;
722 }
723 
724 
725 /* Function: fill_pstate()
726  *
727  * Purpose:  fill in values in a state: node index, state type unique
728  *           identifier, offset to first of the next states.
729  *
730  *           transition and emission probabilities are dealt with
731  *           elsewhere.
732  *
733  */
734 static void
fill_pstate(struct pstate_s * st,int nodeidx,int statetype,int offset)735 fill_pstate(struct pstate_s *st,
736 	    int              nodeidx,
737 	    int              statetype,
738 	    int              offset)
739 {
740   st->nodeidx    = nodeidx;
741   st->statetype  = statetype;
742   st->offset     = offset;
743 }
744 
745 
746 /* Function: NormalizePCM()
747  *
748  * Purpose:  Make damn sure a probability-form, states-based CM is
749  *           properly normalized. Workaround for a bug!
750  */
751 void
NormalizePCM(struct pstate_s * pcm,int M)752 NormalizePCM(struct pstate_s *pcm, int M)
753 {
754   int    y;
755 
756   for (y = 0; y < M; y++)
757     {
758 				/* emission distributions */
759       switch (pcm[y].statetype) {
760       case uMATP_ST: DNorm(pcm[y].emit, ALPHASIZE * ALPHASIZE); break;
761       case uMATL_ST:
762       case uMATR_ST:
763       case uINSL_ST:
764       case uINSR_ST: DNorm(pcm[y].emit, ALPHASIZE); break;
765       }
766 
767 				/* transition distributions */
768       if (pcm[y].statetype != uBIFURC_ST)
769 	DNorm(pcm[y].tmx, pcm[y].connectnum);
770     }
771 }
772