1 /* model.c
2 * Allocation, initialization, free'ing of models.
3 *
4 * Includes code supporting both original node-based CM structure, as well
5 * as the modified, state-based CM structure used by the newer alignment
6 * implementations.
7 *
8 * SRE, Tue Sep 7 09:22:03 1993
9 *
10 */
11
12
13 #include <stdlib.h>
14 #include <string.h>
15 #include "structs.h"
16 #include "funcs.h"
17
18 #ifdef MEMDEBUG
19 #include "dbmalloc.h"
20 #endif
21
22 static void fill_state(struct istate_s *st, int nodeidx, int statetype, int offset);
23 static void copy_singlet_emissions(struct istate_s *st, double *emvec, double *rfreq);
24 static void copy_pairwise_emissions(struct istate_s *st, double *em, double *rfreq);
25 static void copy_state_transitions(struct istate_s *st, double *tvec, int tflags);
26 static void copy_pstate_transitions(struct pstate_s *st, double *tvec, int tflags);
27 static void fill_pstate(struct pstate_s *st, int nodeidx, int statetype, int offset);
28
29 /* Function: AllocCM()
30 *
31 * Purpose: Allocate for a model containing some number of nodes,
32 * inclusive of root but exclusive of ENDs. Blank the model.
33 *
34 * Args: nodes - number of nodes to allocate for
35 *
36 * Return: pointer to the new model. Caller must free, with FreeCM()
37 */
38 struct cm_s *
AllocCM(int nodes)39 AllocCM(int nodes)
40 {
41 struct cm_s *cm;
42 int k;
43
44 if ((cm = (struct cm_s *) malloc (sizeof(struct cm_s))) == NULL)
45 Die("malloc failed");
46
47 cm->nodes = nodes;
48
49 if ((cm->nd = (struct node_s *) malloc (nodes * sizeof(struct node_s))) == NULL)
50 Die("malloc failed");
51
52 /* fast way to wipe everything to zero */
53 memset(cm->nd, 0, (nodes * sizeof(struct node_s)));
54
55 /* set all the topology connections to -1 */
56 for (k = 0; k < nodes; k++)
57 cm->nd[k].nxt = cm->nd[k].nxt2 = -1;
58
59 return cm;
60 }
61
62
63 /* Function: FreeCM()
64 *
65 * Purpose: Free memory allocated to a covariance model.
66 *
67 * Return: (void)
68 */
69 void
FreeCM(struct cm_s * cm)70 FreeCM(struct cm_s *cm)
71 {
72 free(cm->nd);
73 free(cm);
74 }
75
76
77
78 /* Function: NormalizeCM()
79 *
80 * Purpose: Normalize all the probability distributions in a model.
81 * Only normalizes the meaningful ones: i.e., matr_emit
82 * emission statistics are ignored for MATL_NODEs, etc.
83 *
84 * Return: (void)
85 */
86 void
NormalizeCM(struct cm_s * cm)87 NormalizeCM(struct cm_s *cm)
88 {
89 int k; /* counter over nodes */
90 int fy; /* from statetype, to statetype */
91
92
93 for (k = 0; k < cm->nodes; k++)
94 {
95 for (fy = 0; fy < STATETYPES; fy++)
96 DNorm(cm->nd[k].tmx[fy], STATETYPES); /* state transitions */
97
98 DNorm((double *) cm->nd[k].mp_emit, ALPHASIZE * ALPHASIZE); /* MATP emissions */
99 DNorm(cm->nd[k].ml_emit, ALPHASIZE); /* MATL emissions */
100 DNorm(cm->nd[k].mr_emit, ALPHASIZE); /* MATR emissions */
101 DNorm(cm->nd[k].il_emit, ALPHASIZE); /* INSL emissions */
102 DNorm(cm->nd[k].ir_emit, ALPHASIZE); /* INSR emissions */
103 }
104 }
105
106
107
108 /* Function: VerifyCM()
109 *
110 * Purpose: Have a look at a CM and make sure nothing stupid
111 * is wrong with it. Returns 0 if something's wrong
112 * and writes diagnostics to stderr. Returns 1 if
113 * everything looks OK.
114 */
115 int
VerifyCM(struct cm_s * cm)116 VerifyCM(struct cm_s *cm)
117 {
118 int status = 1;
119 int k; /* counter over nodes */
120
121 for (k = 0; k < cm->nodes; k++)
122 {
123 if (cm->nd[k].type < 0 || cm->nd[k].type >= NODETYPES)
124 {
125 status = 0;
126 fprintf(stderr, "Node %d has invalid type %d\n", k, cm->nd[k].type);
127 }
128
129 if ((cm->nd[k].nxt <= k && cm->nd[k].nxt != -1) ||
130 (cm->nd[k].nxt >= cm->nodes))
131 {
132 status = 0;
133 fprintf(stderr, "Node %d points to invalid left child %d\n", k, cm->nd[k].nxt);
134 }
135
136 if ((cm->nd[k].nxt2 <= k && cm->nd[k].nxt2 != -1) ||
137 (cm->nd[k].nxt2 >= cm->nodes))
138 {
139 status = 0;
140 fprintf(stderr, "Node %d points to invalid right child %d\n", k, cm->nd[k].nxt2);
141 }
142 }
143 return status;
144 }
145
146
147 /* Function: RearrangeCM()
148 *
149 * Purpose: Convert a cm into an "integer cm", a specialized structure
150 * used only in the alignment algorithms.
151 *
152 * The integer CM is an array of istate_s structures; i.e., rather
153 * than a node-oriented form, a state-oriented form. Rearrange
154 * transition tables to optimize the recursion in recurse_mx().
155 *
156 * The node is expanded into states in proper order (uDEL_ST,
157 * uMATP_ST, uMATL_ST, uMATR_ST, uINSL_ST, uINSR_ST). However, the
158 * state transition vectors are rearranged such that INSL, INSR
159 * are the first elements.
160 *
161 * Insert states are explicitly assumed to have a zero emission
162 * score!
163 *
164 * Args: cm - a covariance model, probability form
165 * rfreq - frequencies to use as a random model (expected background)
166 * ret_icm - RETURN: array of istate_s structures for states
167 * in model. Contains a state 0 for the root;
168 * does not contain anything for the end
169 * ret_statenum - number of states in ret_icm, 0..statenum-1
170 *
171 * Return: 1 on success, 0 on failure.
172 * ret_icm is malloc'ed here and must be free'ed by caller;
173 * use free(*ret_icm).
174 */
175 int
RearrangeCM(struct cm_s * cm,double * rfreq,struct istate_s ** ret_icm,int * ret_statenum)176 RearrangeCM(struct cm_s *cm,
177 double *rfreq,
178 struct istate_s **ret_icm,
179 int *ret_statenum)
180 {
181 struct istate_s *icm; /* new int-lod states-based model */
182 struct istate_s *smallicm; /* streamlined (realloc'ed) icm */
183 struct m2ali_s *bifstack; /* pda for deferring bifurc connection assignment */
184 int bifidx; /* state index of a BEGINR's parent bifurc */
185 int k; /* counter for nodes */
186 int y; /* counter for states */
187 int tflags; /* flags for which to state transitions are used */
188 int fflags; /* flags for which from state transitions are used */
189 int offset; /* offset to next connected state */
190
191 /* We know we can fit the new model into cm->nodes * STATETYPES
192 * states. We'll give back the excess memory later.
193 */
194 if ((icm = (struct istate_s *) calloc ((cm->nodes * STATETYPES), sizeof(struct istate_s))) == NULL)
195 return 0;
196
197 bifstack = Init_m2ali();
198
199 y = 0;
200 for (k = 0; k < cm->nodes; k++)
201 {
202 /* figure out what we're connected to */
203 if (cm->nd[k].nxt == -1)
204 tflags = uEND_ST;
205 else
206 {
207 switch (cm->nd[cm->nd[k].nxt].type) {
208 case BIFURC_NODE:
209 tflags = uBIFURC_ST;
210 break;
211
212 case MATP_NODE:
213 tflags = uDEL_ST | uMATP_ST | uMATR_ST | uMATL_ST;
214 break;
215
216 case MATL_NODE:
217 tflags = uDEL_ST | uMATL_ST;
218 break;
219
220 case MATR_NODE:
221 tflags = uDEL_ST | uMATR_ST;
222 break;
223
224 case BEGINL_NODE:
225 case BEGINR_NODE:
226 tflags = uBEGIN_ST;
227 break;
228
229 default: Die("no such node type %d", cm->nd[cm->nd[k].nxt].type);
230 }
231 }
232
233 /* figure out what we're coming from */
234 switch (cm->nd[k].type) {
235 case BIFURC_NODE:
236 fflags = uBIFURC_ST;
237 offset = 1;
238 break;
239
240 case MATP_NODE:
241 fflags = uDEL_ST | uMATP_ST | uMATL_ST | uMATR_ST | uINSL_ST | uINSR_ST;
242 tflags |= uINSL_ST | uINSR_ST;
243 offset = 4;
244 break;
245
246 case MATL_NODE:
247 fflags = uDEL_ST | uMATL_ST | uINSL_ST;
248 tflags |= uINSL_ST;
249 offset = 2;
250 break;
251
252 case MATR_NODE:
253 fflags = uDEL_ST | uMATR_ST | uINSR_ST;
254 tflags |= uINSR_ST;
255 offset = 2;
256 break;
257
258 case BEGINL_NODE:
259 fflags = uBEGIN_ST;
260 offset = 1;
261 break;
262
263 case BEGINR_NODE:
264 fflags = uBEGIN_ST | uINSL_ST;
265 offset = 1;
266 tflags |= uINSL_ST;
267 break;
268
269 case ROOT_NODE:
270 fflags = uBEGIN_ST | uINSL_ST | uINSR_ST;
271 offset = 1;
272 tflags |= uINSL_ST | uINSR_ST;
273 break;
274
275 default: Die("No such node type %d\n", cm->nd[k].type);
276
277 }
278
279 if (fflags & uDEL_ST)
280 {
281 fill_state(&icm[y], k, uDEL_ST, offset);
282 copy_state_transitions(&icm[y], cm->nd[k].tmx[DEL_ST], tflags);
283 offset--;
284 y++;
285 }
286 else if (fflags & uBIFURC_ST)
287 {
288 fill_state(&icm[y], k, uBIFURC_ST, offset);
289 /* A hack. tmx[0] gets the state index of the left connected BEGIN
290 * child; tmx[1] gets the right connected BEGIN child. The left
291 * child is guaranteed to be the next state, but the assignment
292 * of the right state must be deferred: we push the bifurc state index
293 * into a pda
294 */
295 icm[y].tmx[0] = y+1;
296 Push_m2ali(bifstack, y, 0, NULL);
297 y++;
298 }
299 else if (fflags & uBEGIN_ST)
300 {
301 fill_state(&icm[y], k, uBEGIN_ST, offset);
302 copy_state_transitions(&icm[y], cm->nd[k].tmx[DEL_ST], tflags);
303
304 /* continuation of the above commentary. If we're a right BEGIN_ST,
305 * then we pop the state index of our parent bifurc off the pda
306 */
307 if (cm->nd[k].type == BEGINR_NODE)
308 {
309 Pop_m2ali(bifstack, &bifidx, (int *) NULL, (struct align_s **) NULL);
310 icm[bifidx].tmx[1] = y;
311 }
312
313 offset--;
314 y++;
315 }
316
317 if (fflags & uMATP_ST)
318 {
319 fill_state(&icm[y], k, uMATP_ST, offset);
320 copy_pairwise_emissions(&icm[y], (double *) cm->nd[k].mp_emit, rfreq);
321 copy_state_transitions(&icm[y], cm->nd[k].tmx[MATP_ST], tflags);
322 offset--;
323 y++;
324 }
325
326 if (fflags & uMATL_ST)
327 {
328 fill_state(&icm[y], k, uMATL_ST, offset);
329 copy_singlet_emissions(&icm[y], cm->nd[k].ml_emit, rfreq);
330 copy_state_transitions(&icm[y], cm->nd[k].tmx[MATL_ST], tflags);
331 offset--;
332 y++;
333 }
334
335 if (fflags & uMATR_ST)
336 {
337 fill_state(&icm[y], k, uMATR_ST, offset);
338 copy_singlet_emissions(&icm[y], cm->nd[k].mr_emit, rfreq);
339 copy_state_transitions(&icm[y], cm->nd[k].tmx[MATR_ST], tflags);
340 offset--;
341 y++;
342 }
343
344 if (fflags & uINSL_ST)
345 {
346 fill_state(&icm[y], k, uINSL_ST, 0);
347 copy_singlet_emissions(&icm[y], cm->nd[k].il_emit, rfreq);
348 copy_state_transitions(&icm[y], cm->nd[k].tmx[INSL_ST], tflags);
349 y++;
350 }
351
352 if (fflags & uINSR_ST)
353 {
354 /* beware an asymmetry: INSR->INSL transits are disallowed */
355 fill_state(&icm[y], k, uINSR_ST, 0);
356 copy_singlet_emissions(&icm[y], cm->nd[k].ir_emit, rfreq);
357 copy_state_transitions(&icm[y], cm->nd[k].tmx[INSR_ST], tflags & ~uINSL_ST);
358 y++;
359 }
360
361
362 /* End states must be added
363 */
364 if (cm->nd[k].nxt == -1)
365 {
366 fill_state(&icm[y], -1, uEND_ST, 0);
367 y++;
368 }
369 } /* end loop over nodes */
370
371
372 Free_m2ali(bifstack);
373
374 /* Return some of the alloc'ed memory
375 */
376 smallicm = (struct istate_s *) realloc (icm, y * sizeof(struct istate_s));
377 *ret_icm = (smallicm != NULL) ? smallicm : icm;
378 *ret_statenum = y;
379 return 1;
380 }
381
382
383
384 /* Function: fill_state()
385 *
386 * Purpose: fill in values in a state: node index, state type unique
387 * identifier, offset to first of the next states, and number
388 * of ynext connections.
389 *
390 * transition and emission probabilities are dealt with
391 * elsewhere.
392 *
393 */
394 static void
fill_state(struct istate_s * st,int nodeidx,int statetype,int offset)395 fill_state(struct istate_s *st,
396 int nodeidx,
397 int statetype,
398 int offset)
399 {
400 st->nodeidx = nodeidx;
401 st->statetype = statetype;
402 st->offset = offset;
403 }
404
405
406
407 /* Function: copy_singlet_emissions()
408 *
409 * Purpose: Copy a singlet emission vector into a state structure,
410 * converting the probabilities into integer log odds.
411 */
412 static void
copy_singlet_emissions(struct istate_s * st,double * emvec,double * rfreq)413 copy_singlet_emissions(struct istate_s *st, double *emvec, double *rfreq)
414 {
415 int x;
416
417 for (x = 0; x < ALPHASIZE; x++)
418 st->emit[x] = ILOG2(emvec[x] / rfreq[x]);
419 }
420
421
422
423
424 /* Function: copy_pairwise_emissions()
425 *
426 * Purpose: Copy a pairwise emission table into a state structure,
427 * converting the probabilities into integer log odds.
428 * Beware the funny business with the pairwise emission
429 * array; it was mp_emit[4][4], now cast to a pointer,
430 * and accessed like a vector.
431 */
432 static void
copy_pairwise_emissions(struct istate_s * st,double * em,double * rfreq)433 copy_pairwise_emissions(struct istate_s *st, double *em, double *rfreq)
434 {
435 int x;
436
437 for (x = 0; x < ALPHASIZE * ALPHASIZE; x++)
438 st->emit[x] = ILOG2(em[x] / (rfreq[x % ALPHASIZE] * rfreq[x / ALPHASIZE]));
439 }
440
441
442
443
444 /* Function: copy_state_transitions()
445 *
446 * Purpose: Copy a state transition vector from a CM into a state
447 * structure, copying only the used state transitions
448 * as given by tflags. The state transition vector is
449 * rearranged for an optimization: transits to INSL, INSR
450 * are placed first.
451 */
452 static void
copy_state_transitions(struct istate_s * st,double * tvec,int tflags)453 copy_state_transitions(struct istate_s *st,
454 double *tvec,
455 int tflags)
456 {
457 int stx; /* counter for state vector */
458
459 stx = 0;
460 if (tflags & uINSL_ST)
461 { st->tmx[stx] = ILOG2(tvec[INSL_ST]); stx++; }
462
463 if (tflags & uINSR_ST)
464 { st->tmx[stx] = ILOG2(tvec[INSR_ST]); stx++; }
465
466 if (tflags & uDEL_ST || tflags & uBIFURC_ST ||
467 tflags & uBEGIN_ST || tflags & uEND_ST)
468 { st->tmx[stx] = ILOG2(tvec[DEL_ST]); stx++; }
469
470 if (tflags & uMATP_ST)
471 { st->tmx[stx] = ILOG2(tvec[MATP_ST]); stx++; }
472
473 if (tflags & uMATL_ST)
474 { st->tmx[stx] = ILOG2(tvec[MATL_ST]); stx++; }
475
476 if (tflags & uMATR_ST)
477 { st->tmx[stx] = ILOG2(tvec[MATR_ST]); stx++; }
478
479 st->connectnum = stx;
480 }
481
482
483 /* Function: MakePCM()
484 *
485 * Purpose: Like RearrangeCM(), but leaving the model
486 * in floating-point probabilities in struct pstate_s
487 * structures.
488 *
489 * Args: cm - a covariance model, probability form
490 * ret_pcm - RETURN: array of pstate_s structures for states
491 * in model. Contains a state 0 for the root.
492 * end states are explicit.
493 * ret_statenum - number of states in ret_pcm, 0..statenum-1
494 *
495 * Return: 1 on success, 0 on failure.
496 * ret_pcm is malloc'ed here and must be free'ed by caller;
497 * use free(*ret_pcm).
498 */
499 int
MakePCM(struct cm_s * cm,struct pstate_s ** ret_pcm,int * ret_statenum)500 MakePCM(struct cm_s *cm,
501 struct pstate_s **ret_pcm,
502 int *ret_statenum)
503 {
504 struct pstate_s *pcm; /* new states-based model */
505 struct pstate_s *smallpcm; /* streamlined (realloc'ed) pcm */
506 struct intstack_s *bifstack; /* pda for deferring bifurc connection assignment */
507 int bifidx; /* state index of a BEGINR's parent bifurc */
508 int k; /* counter for nodes */
509 int y; /* counter for states */
510 int tflags; /* flags for which to state transitions are used */
511 int fflags; /* flags for which from state transitions are used */
512 int offset; /* offset to next connected state */
513
514 /* We know we can fit the new model into cm->nodes * STATETYPES
515 * states. We'll give back the excess memory later.
516 */
517 if ((pcm = (struct pstate_s *) calloc ((cm->nodes * STATETYPES), sizeof(struct pstate_s))) == NULL)
518 return 0;
519
520 bifstack = InitIntStack();
521
522 y = 0;
523 for (k = 0; k < cm->nodes; k++)
524 {
525 /* figure out what we're connected to */
526 if (cm->nd[k].nxt == -1)
527 tflags = uEND_ST;
528 else
529 {
530 switch (cm->nd[cm->nd[k].nxt].type) {
531 case BIFURC_NODE: tflags = uBIFURC_ST; break;
532 case MATP_NODE: tflags = uDEL_ST | uMATP_ST | uMATR_ST | uMATL_ST; break;
533 case MATL_NODE: tflags = uDEL_ST | uMATL_ST; break;
534 case MATR_NODE: tflags = uDEL_ST | uMATR_ST; break;
535 case BEGINL_NODE: tflags = uBEGIN_ST; break;
536 case BEGINR_NODE: tflags = uBEGIN_ST; break;
537 default: Die("no such node type %d", cm->nd[cm->nd[k].nxt].type);
538 }
539 }
540
541 /* figure out what we're coming from */
542 switch (cm->nd[k].type) {
543 case BIFURC_NODE:
544 fflags = uBIFURC_ST;
545 offset = 1;
546 break;
547
548 case MATP_NODE:
549 fflags = uDEL_ST | uMATP_ST | uMATL_ST | uMATR_ST | uINSL_ST | uINSR_ST;
550 tflags |= uINSL_ST | uINSR_ST;
551 offset = 4;
552 break;
553
554 case MATL_NODE:
555 fflags = uDEL_ST | uMATL_ST | uINSL_ST;
556 tflags |= uINSL_ST;
557 offset = 2;
558 break;
559
560 case MATR_NODE:
561 fflags = uDEL_ST | uMATR_ST | uINSR_ST;
562 tflags |= uINSR_ST;
563 offset = 2;
564 break;
565
566 case BEGINL_NODE:
567 fflags = uBEGIN_ST;
568 offset = 1;
569 break;
570
571 case BEGINR_NODE:
572 fflags = uBEGIN_ST | uINSL_ST;
573 offset = 1;
574 tflags |= uINSL_ST;
575 break;
576
577 case ROOT_NODE:
578 fflags = uBEGIN_ST | uINSL_ST | uINSR_ST;
579 offset = 1;
580 tflags |= uINSL_ST | uINSR_ST;
581 break;
582
583 default: Die("No such node type %d\n", cm->nd[k].type);
584
585 }
586
587 if (fflags & uDEL_ST)
588 {
589 fill_pstate(&pcm[y], k, uDEL_ST, offset);
590 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[DEL_ST], tflags);
591 offset--;
592 y++;
593 }
594 else if (fflags & uBIFURC_ST)
595 {
596 fill_pstate(&pcm[y], k, uBIFURC_ST, offset);
597 /* We defer the assignment of bifr */
598 PushIntStack(bifstack, y);
599 y++;
600 }
601 else if (fflags & uBEGIN_ST)
602 {
603 fill_pstate(&pcm[y], k, uBEGIN_ST, offset);
604 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[DEL_ST], tflags);
605
606 /* continuation of the above commentary. If we're a right BEGIN_ST,
607 * then we pop the state index of our parent bifurc off the pda
608 */
609 if (cm->nd[k].type == BEGINR_NODE)
610 {
611 PopIntStack(bifstack, &bifidx);
612 pcm[bifidx].bifr = y;
613 }
614 offset--;
615 y++;
616 }
617
618 if (fflags & uMATP_ST)
619 {
620 fill_pstate(&pcm[y], k, uMATP_ST, offset);
621 memcpy(pcm[y].emit, cm->nd[k].mp_emit, sizeof(double) * ALPHASIZE * ALPHASIZE);
622 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[MATP_ST], tflags);
623 offset--;
624 y++;
625 }
626
627 if (fflags & uMATL_ST)
628 {
629 fill_pstate(&pcm[y], k, uMATL_ST, offset);
630 memcpy(pcm[y].emit, cm->nd[k].ml_emit, sizeof(double) * ALPHASIZE);
631 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[MATL_ST], tflags);
632 offset--;
633 y++;
634 }
635
636 if (fflags & uMATR_ST)
637 {
638 fill_pstate(&pcm[y], k, uMATR_ST, offset);
639 memcpy(pcm[y].emit, cm->nd[k].mr_emit, sizeof(double) * ALPHASIZE);
640 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[MATR_ST], tflags);
641 offset--;
642 y++;
643 }
644
645 if (fflags & uINSL_ST)
646 {
647 fill_pstate(&pcm[y], k, uINSL_ST, 0);
648 memcpy(pcm[y].emit, cm->nd[k].il_emit, sizeof(double) * ALPHASIZE);
649 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[INSL_ST], tflags);
650 y++;
651 }
652
653 if (fflags & uINSR_ST)
654 {
655 /* beware an asymmetry: INSR->INSL transits are disallowed */
656 fill_pstate(&pcm[y], k, uINSR_ST, 0);
657 memcpy(pcm[y].emit, cm->nd[k].ir_emit, sizeof(double) * ALPHASIZE);
658 copy_pstate_transitions(&pcm[y], cm->nd[k].tmx[INSR_ST], tflags & ~uINSL_ST);
659 y++;
660 }
661
662
663 /* End states must be added
664 */
665 if (cm->nd[k].nxt == -1)
666 {
667 fill_pstate(&pcm[y], -1, uEND_ST, 0);
668 y++;
669 }
670 } /* end loop over nodes */
671
672
673 FreeIntStack(bifstack);
674
675 /* Return some of the alloc'ed memory
676 */
677 smallpcm = (struct pstate_s *) realloc (pcm, y * sizeof(struct pstate_s));
678 *ret_pcm = (smallpcm != NULL) ? smallpcm : pcm;
679 *ret_statenum = y;
680 return 1;
681 }
682
683
684
685
686 /* Function: copy_pstate_transitions()
687 *
688 * Purpose: Copy a state transition vector from a CM into a state
689 * structure, copying only the used state transitions
690 * as given by tflags. The state transition vector is
691 * rearranged for an optimization: transits to INSL, INSR
692 * are placed first.
693 */
694 static void
copy_pstate_transitions(struct pstate_s * st,double * tvec,int tflags)695 copy_pstate_transitions(struct pstate_s *st,
696 double *tvec,
697 int tflags)
698 {
699 int stx; /* counter for state vector */
700
701 stx = 0;
702 if (tflags & uINSL_ST)
703 { st->tmx[stx] = tvec[INSL_ST]; stx++; }
704
705 if (tflags & uINSR_ST)
706 { st->tmx[stx] = tvec[INSR_ST]; stx++; }
707
708 if (tflags & uDEL_ST || tflags & uBIFURC_ST ||
709 tflags & uBEGIN_ST || tflags & uEND_ST)
710 { st->tmx[stx] = tvec[DEL_ST]; stx++; }
711
712 if (tflags & uMATP_ST)
713 { st->tmx[stx] = tvec[MATP_ST]; stx++; }
714
715 if (tflags & uMATL_ST)
716 { st->tmx[stx] = tvec[MATL_ST]; stx++; }
717
718 if (tflags & uMATR_ST)
719 { st->tmx[stx] = tvec[MATR_ST]; stx++; }
720
721 st->connectnum = stx;
722 }
723
724
725 /* Function: fill_pstate()
726 *
727 * Purpose: fill in values in a state: node index, state type unique
728 * identifier, offset to first of the next states.
729 *
730 * transition and emission probabilities are dealt with
731 * elsewhere.
732 *
733 */
734 static void
fill_pstate(struct pstate_s * st,int nodeidx,int statetype,int offset)735 fill_pstate(struct pstate_s *st,
736 int nodeidx,
737 int statetype,
738 int offset)
739 {
740 st->nodeidx = nodeidx;
741 st->statetype = statetype;
742 st->offset = offset;
743 }
744
745
746 /* Function: NormalizePCM()
747 *
748 * Purpose: Make damn sure a probability-form, states-based CM is
749 * properly normalized. Workaround for a bug!
750 */
751 void
NormalizePCM(struct pstate_s * pcm,int M)752 NormalizePCM(struct pstate_s *pcm, int M)
753 {
754 int y;
755
756 for (y = 0; y < M; y++)
757 {
758 /* emission distributions */
759 switch (pcm[y].statetype) {
760 case uMATP_ST: DNorm(pcm[y].emit, ALPHASIZE * ALPHASIZE); break;
761 case uMATL_ST:
762 case uMATR_ST:
763 case uINSL_ST:
764 case uINSR_ST: DNorm(pcm[y].emit, ALPHASIZE); break;
765 }
766
767 /* transition distributions */
768 if (pcm[y].statetype != uBIFURC_ST)
769 DNorm(pcm[y].tmx, pcm[y].connectnum);
770 }
771 }
772