/******************************************************************************* * * This file is part of the General Hidden Markov Model Library, * GHMM version __VERSION__, see http://ghmm.org * * Filename: ghmm/ghmm/viterbi.c * Authors: Wasinee Rungsarityotin, Benjamin Georgi * * Copyright (C) 1998-2004 Alexander Schliep * Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln * Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik, * Berlin * * Contact: schliep@ghmm.org * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * * This file is version $Revision: 1713 $ * from $Date: 2006-10-16 10:06:28 -0400 (Mon, 16 Oct 2006) $ * last change by $Author: grunau $. * *******************************************************************************/ /* Possible sources of errors: initialisation, pushback (the loop after) */ #ifdef HAVE_CONFIG_H # include "../config.h" #endif #include #include #include #include #include "ghmm.h" #include "mprintf.h" #include "mes.h" #include "matrix.h" #include "pmodel.h" #include "psequence.h" #include "pviterbi.h" #include "ghmm_internals.h" typedef struct plocal_store_t { /** precomputed log probabilities for transitions into the states for each transition class **/ double *** log_in_a; /** precomputed log probabilities for each state for the emissions **/ double ** log_b; /** lookback matrix for the last offset steps **/ double *** phi; /** log probabilities for the current u, v and every state **/ double *phi_new; /** traceback matrix **/ int ***psi; /** for convinience store a pointer to the model **/ ghmm_dpmodel * mo; /** for the debug mode store information of matrix sizes **/ /** length of sequence X determines size of psi **/ int len_x; /** length of sequence Y determines size of phi and psi **/ int len_y; /** non functional stuff **/ int *topo_order; int topo_order_length; } plocal_store_t; static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv); static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y); static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y, int max_offset_x, int max_offset_y); static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y); static double get_phi(plocal_store_t * pv, int x, int y, int offset_x, int offset_y, int state); static void set_phi(plocal_store_t * pv, int x, int y, int ghmm_dstate, double prob); static void push_back_phi(plocal_store_t * pv, int length_y); static void set_psi(plocal_store_t * pv, int x, int y, int ghmm_dstate, int from_state); /*============================================================================*/ static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y) { #define CUR_PROC "pviterbi_alloc" plocal_store_t* v = NULL; int i, j; ARRAY_CALLOC (v, 1); v->mo = mo; v->len_y = len_y; v->len_x = len_x; /* Allocate the log_in_a's -> individal lenghts */ ARRAY_CALLOC (v->log_in_a, mo->N); /* first index of log_in_a: target state */ for (j = 0; j < mo->N; j++){ /* second index: source state */ ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states); for (i=0; is[j].in_states; i++) { /* third index: transition classes of source state */ ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses); } } ARRAY_CALLOC (v->log_b, mo->N); for (j=0; jN; j++) { ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1); } if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N); if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} ARRAY_CALLOC (v->phi_new, mo->N); v->psi = ighmm_dmatrix_3d_alloc(len_x + mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N); if (!(v->psi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} v->topo_order_length = 0; ARRAY_CALLOC (v->topo_order, mo->N); return(v); STOP: /* Label STOP from ARRAY_[CM]ALLOC */ pviterbi_free((&v), mo->N, len_x, len_y, mo->max_offset_x, mo->max_offset_y); return(NULL); #undef CUR_PROC } /* viterbi_alloc */ /*============================================================================*/ static int pviterbi_free(plocal_store_t **v, int n, int len_x, int len_y, int max_offset_x, int max_offset_y) { #define CUR_PROC "pviterbi_free" int i, j; mes_check_ptr(v, return(-1)); if( !*v ) return(0); for (j = 0; j < n; j++) { for (i=0; i < (*v)->mo->s[j].in_states; i++) m_free((*v)->log_in_a[j][i]); m_free((*v)->log_in_a[j]); } m_free((*v)->log_in_a); for (j=0; jlog_b[j]); m_free((*v)->log_b); ighmm_cmatrix_3d_free( &((*v)->phi), max_offset_x + 1, len_y + max_offset_y + 1); m_free((*v)->phi_new); ighmm_dmatrix_3d_free( &((*v)->psi), len_x + max_offset_x + 1, len_y + max_offset_y + 1); m_free((*v)->topo_order); (*v)->mo = NULL; m_free(*v); return(0); #undef CUR_PROC } /* viterbi_free */ /*============================================================================*/ static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv) { int j, k; ghmm_dpmodel * mo; printf("Local store for pair HMM viterbi algorithm\n"); printf("Log in a:\n"); mo = pv->mo; for (j = 0; j < mo->N; j++){ printf("state %i in states %i\n", j, mo->s[j].in_states); for (k=0; ks[j].in_states; k++) printf("FIXME: log_in_a has three dimensions!"/*From: %i %f\n", mo->s[j].in_id[k], pv->log_in_a[j][k]*/); } printf("Log b:\n"); for (j = 0; j < mo->N; j++){ printf("state %i #chars: %i\n", j, ghmm_dpmodel_emission_table_size(mo, j)); for (k=0; klog_b[j][k]); } } /*============================================================================*/ static void pviterbi_precompute( ghmm_dpmodel *mo, plocal_store_t *v) { #define CUR_PROC "pviterbi_precompute" int i, j, emission, t_class; /* Precomputing the log(a_ij) */ for (j = 0; j < mo->N; j++) for (i = 0; i < mo->s[j].in_states; i++) for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){ if ( mo->s[j].in_a[i][t_class] == 0.0 ) /* DBL_EPSILON ? */ v->log_in_a[j][i][t_class] = +1; /*Not used any further in the calculations */ else v->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] ); } /* Precomputing the log emission probabilities for each state*/ for (j = 0; j < mo->N; j++) { for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) { if (1) { if ( mo->s[j].b[emission] == 0.0 ) /* DBL_EPSILON ? */ v->log_b[j][emission] = +1; else v->log_b[j][emission] = log( mo->s[j].b[emission] ); } else{ v->log_b[j][emission] = 0.0; } } v->log_b[j][emission] = 1; /* the last field is for invalid emissions */ } #undef CUR_PROC }/* viterbi_precompute */ /*============================================================================*/ /** */ /* static void p__viterbi_silent( ghmm_dmodel *mo, int t, plocal_store_t *v ) */ /* { */ /* int topocount; */ /* int i, k; */ /* double max_value, value; */ /* for ( topocount = 0; topocount < mo->topo_order_length; topocount++) { */ /* k = mo->topo_order[topocount]; */ /* if ( mo->silent[k] ) { /\* Silent states *\/ */ /* /\* Determine the maximum *\/ */ /* /\* max_phi = phi[i] + log_in_a[j][i] ... *\/ */ /* max_value = -DBL_MAX; */ /* v->psi[t][k] = -1; */ /* for (i = 0; i < mo->s[k].in_states; i++) { */ /* if ( v->phi[ mo->s[k].in_id[i] ] != +1 && */ /* v->log_in_a[k][i] != +1) { */ /* value = v->phi[ mo->s[k].in_id[i] ] + v->log_in_a[k][i]; */ /* if (value > max_value) { */ /* max_value = value; */ /* v->psi[t][k] = mo->s[k].in_id[i]; */ /* } */ /* } */ /* } */ /* /\* No maximum found (that is, state never reached) */ /* or the output O[t] = 0.0: *\/ */ /* if (max_value == -DBL_MAX) { */ /* v->phi[k] = +1; */ /* } else { */ /* v->phi[k] = max_value; */ /* } */ /* } */ /* } */ /* } */ /*============================================================================*/ static double sget_log_in_a(plocal_store_t * pv, int i, int j, ghmm_dpseq * X, ghmm_dpseq * Y, int index_x, int index_y) { /* determine the transition class for the source ghmm_dstate */ int id = pv->mo->s[i].in_id[j]; int cl = pv->mo->s[id].class_change->get_class(pv->mo, X, Y, index_x,index_y, pv->mo->s[id].class_change->user_data); return pv->log_in_a[i][j][cl]; } /*============================================================================*/ static double log_b(plocal_store_t * pv, int state, int emission) { #ifdef DEBUG if (ghmm_dstate > pv->mo->N) fprintf(stderr, "log_b: State index out of bounds %i > %i\n", ghmm_dstate, pv->mo->N); if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state)) fprintf(stderr, "log_b: Emission index out of bounds %i > %i for ghmm_dstate %i\n", emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state); #endif return pv->log_b[state][emission]; } /*============================================================================*/ static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y) { #ifdef DEBUG int emission; #endif int u, v, j, i, off_x, y; double log_in_a_ij; double value, max_value, previous_prob, log_b_i; /* printf("ghmm_dpmodel_viterbi init\n"); */ ghmm_dpmodel * mo = pv->mo; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a; /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/ for (off_x=0; off_xmax_offset_x + 1; off_x++) for (y=0; ylength + mo->max_offset_y + 1; y++) for (j=0; jN; j++) { pv->phi[off_x][y][j] = +1; } if ( mo->model_type & GHMM_kSilentStates ) { /* could go into silent state at t=0 */ /*p__viterbi_silent( mo, t=0, v);*/ } /*for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for( i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); }*/ /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */ /* this is in principle the same as the main recurrence but adds initial probabilities to states that cannot be inhabitated at u=0, v=0 because of greater offsets than one iteration start is u=-1 v=-1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v or equally for offset_y == 0 */ /* u, v <= max offsets */ for (u = -1; u <= mo->max_offset_x; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } /* for each state i */ for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* this is the difference from the main loop: check whether this state could be an initial state and add the initial probability */ if (log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else { if (max_value == -DBL_MAX) set_phi(pv, u, v, i, +1); else set_phi(pv, u, v, i, max_value); /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 == v) { */ if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 == v) { set_phi(pv, u, v, i, mo->s[i].log_pi); #ifdef DEBUG printf("Initial log prob state %i at (%i, %i) = %f\n", i, u, v, get_phi(pv, u, v, 0, 0, i)); printf("Characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v)); #endif } if (get_phi(pv, u, v, 0, 0, i) != 1) set_phi(pv, u, v, i, get_phi(pv, u, v, 0, 0, i) + log_b_i); } } /* if (v == 0) { printf"(%i, %i, %i) preceding %i\n", u, v, i, pv->psi[u][v][i]); } */ } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi(pv, Y->length); } /* End for u in X */ } /*============================================================================*/ /* call this function when you are at position (x, y) and want to get the probability of state 'state' which is offset_x and offset_y away from x,y */ static double get_phi(plocal_store_t * pv, int x, int y, int offset_x, int offset_y, int state){ #ifdef DEBUG if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y) fprintf(stderr, "get_phi: out of bounds %i %i %i\n", x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state); #endif if (y - offset_y + pv->mo->max_offset_y >= 0) return pv->phi[offset_x][y - offset_y + pv->mo->max_offset_y][state]; else return 1; } /*============================================================================*/ /* set the value of this matrix cell */ static void set_phi(plocal_store_t * pv, int x, int y, int state, double prob){ #ifdef DEBUG if (y > pv->len_y || state > pv->mo->N || y < - pv->mo->max_offset_y) fprintf(stderr, "set_phi: out of bounds %i %i %i %f\n", x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state, prob); #endif pv->phi[0][y + pv->mo->max_offset_y][state] = prob; } /*============================================================================*/ /* since we only keep the frontier we have to push back when ever a row is complete */ static void push_back_phi(plocal_store_t * pv, int length_y){ int off_x, y, j; /* push back the old phi values */ for (off_x=pv->mo->max_offset_x; off_x>0; off_x--) for (y=0; ymo->max_offset_y + 1; y++) for (j=0; jmo->N; j++) pv->phi[off_x][y][j] = pv->phi[off_x-1][y][j]; } /*============================================================================*/ static void set_psi(plocal_store_t * pv, int x, int y, int state, int from_state){ /* shift by max_offsets for negative indices */ #ifdef DEBUG if (x > pv->len_x || y > pv->len_y || state > pv->mo->N || x < - pv->mo->max_offset_x || y < - pv->mo->max_offset_y) fprintf(stderr, "set_psi: out of bounds %i %i %i %i\n", x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state, from_state); #endif pv->psi[x + pv->mo->max_offset_x][y + pv->mo->max_offset_y][state] = from_state; } /*============================================================================*/ static int get_psi(plocal_store_t * pv, int x, int y, int state) { /* shift by max_offsets for negative indices*/ #ifdef DEBUG if (x > pv->len_x || y > pv->len_y || state > pv->mo->N || x < - pv->mo->max_offset_x || y < - pv->mo->max_offset_y) fprintf(stderr, "get_psi: out of bounds %i %i %i\n", x + pv->mo->max_offset_x, y + pv->mo->max_offset_y, state); #endif return pv->psi[x + pv->mo->max_offset_x][y + pv->mo->max_offset_y][state]; } /*============================================================================*/ int *ghmm_dpmodel_viterbi_test(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length) { plocal_store_t *pv; printf("---- viterbi test -----\n"); /*ghmm_dpmodel_print(mo);*/ pv = pviterbi_alloc(mo, X->length, Y->length); printf("try free within pviterbi_test\n"); pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , mo->max_offset_y); printf("OK\n"); return NULL; } /*============================================================================*/ int *ghmm_dpmodel_viterbi(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length) { return ghmm_dpmodel_viterbi_variable_tb(mo, X, Y, log_p, path_length, -1); } /*============================================================================*/ int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, int start_traceback_with) { #define CUR_PROC "ghmm_dpmodel_viterbi" int u, v, j, i, off_x, off_y, current_state_index; double value, max_value, previous_prob; plocal_store_t *pv; int *state_seq = NULL; int emission; double log_b_i, log_in_a_ij; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); /* printf("---- viterbi -----\n"); */ i_list * state_list; state_list = ighmm_list_init_list(); log_in_a = &sget_log_in_a; /* int len_path = mo->N*len; the length of the path is not known apriori */ /* if (mo->model_type & kSilentStates && */ /* mo->silent != NULL && */ /* mo->topo_order == NULL) { */ /* ghmm_dmodel_topo_order( mo ); */ /* } */ /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */ pv = pviterbi_alloc(mo, X->length, Y->length); if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* Precomputing the log(a_ij) and log(bj(ot)) */ pviterbi_precompute(mo, pv); /* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/ init_phi(pv, X, Y); /* u > max_offset_x , v starts -1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v */ /** THIS IS THE MAIN RECURRENCE **/ for (u = mo->max_offset_x + 1; u < X->length; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); #ifdef DEBUG if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else set_phi(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ push_back_phi(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; ighmm_list_append(state_list, -1); /* if start_traceback_with is -1 (it is by default) search for the most likely state at the end of both sequences */ if (start_traceback_with == -1) { for (j = 0; j < mo->N; j++){ #ifdef DEBUG printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j)); #endif if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 && get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) { max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j); state_list->last->val = j; } } } /* this is the special traceback mode for the d & c algorithm that also connects the traceback to the first state of the rest of the path */ else { #ifdef DEBUG printf("D & C traceback from state %i!\n", start_traceback_with); printf("Last characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet, X->length-1), ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet, Y->length-1)); for (j = 0; j < mo->N; j++){ printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j)); } #endif max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with); if (max_value != 1 && max_value > -DBL_MAX) state_list->last->val = start_traceback_with; } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; /* Backtracing doesn't work, because state_seq[*] allocated with -1 */ /* for (t = len - 2; t >= 0; t--) state_list->last->val = -1; */ } else { /* Backtracing, should put DEL path nicely */ *log_p = max_value; /* removed the handling of silent states here */ /* start trace back at the end of both sequences */ u = X->length - 1; v = Y->length - 1; current_state_index = state_list->first->val; off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) { /* while (u > 0 && v > 0) { */ /* look up the preceding state and save it in the first position of the state list */ /* printf("Current state %i at (%i,%i) -> preceding state %i\n", current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */ /* update the current state */ current_state_index = get_psi(pv, u, v, current_state_index); if (current_state_index != -1) ighmm_list_insert(state_list, current_state_index); /* move in the alignment matrix */ u -= off_x; v -= off_y; /* get the next offsets */ off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; } } /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , mo->max_offset_y); /* printf("After traceback: last state = %i\n", state_list->last->val); */ state_seq = ighmm_list_to_array(state_list); *path_length = state_list->length; /* PRINT PATH */ /* fprintf(stderr, "Viterbi path: " ); */ /* int t; */ /* for(t=0; t < *path_length; t++) */ /* if (state_seq[t] >= 0) fprintf(stderr, " %d ", state_seq[t]); */ /* fprintf(stderr, "\n Freeing ... \n"); */ return (state_seq); STOP: /* Label STOP from ARRAY_[CM]ALLOC */ /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x, mo->max_offset_y); m_free(state_seq); ighmm_list_free(state_list); return NULL; #undef CUR_PROC } /* viterbi */ /*============================================================================*/ double ghmm_dpmodel_viterbi_logp(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, int *state_seq, int state_seq_len) { #define CUR_PROC "ghmm_dpmodel_viterbi_logp" int s, t, i, j, u, v; double log_p = 0.0; double log_b_i = 1.0; double log_in_a = 1.0; plocal_store_t *pv; /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */ pv = pviterbi_alloc(mo, 0, 0); pviterbi_precompute(mo, pv); /* initial state */ t = 0; u = -1; v = -1; if (state_seq_len > t) { i = state_seq[t]; /* initial state probability */ /* log_p += log(mo->s[i].pi); */ log_p += mo->s[i].log_pi; if (log_p == 1.0) { pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "the initial probability of state %i is zero\n", i); return 1.0;/* the initial prob is zero */ } /* consume the first characters of the sequences */ u += mo->s[i].offset_x; v += mo->s[i].offset_y; /* get the emission probability */ log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); if (log_b_i == 1.0) { /* chars cant be emitted */ pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t); return 1.0; } log_p += log_b_i; } else { /* there is no path.. */ pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "No path given!\n"); return 1.0; } /* rest of the sequence */ for (t=1; ts[i].offset_x; v += mo->s[i].offset_y; if (u >= X->length || v >= Y->length) { /* path consumes too many chars */ pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "path consumes too many chars\n"); return 1.0; } /* transition prob state j -> i*/ log_in_a = 1.0; for (s=0; ss[i].in_states; s++) { if (mo->s[i].in_id[s] == j) { log_in_a = sget_log_in_a(pv, i, s, X, Y, u, v); break; } } if (log_in_a == 1.0) { pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "transition (%i -> %i) at t=%i not possible\n", j, i,t); return 1.0; /* transition not possible */ } /* emission probability */ log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); if (log_b_i == 1.0) { pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t); return 1.0; /* characters cant be emitted */ } log_p += log_in_a + log_b_i; } pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); /* check if all of the sequences has been consumed */ if (u != X->length - 1 && v != Y->length - 1) { fprintf(stderr, "path consumes not all characters (%i of %i, %i of %i)\n", u + 1, X->length, v + 1, Y->length); return 1.0; } return log_p; #undef CUR_PROC } /* ghmm_dpmodel_viterbi_logp */