1 /******************************************************************************* 2 * 3 * This file is part of the General Hidden Markov Model Library, 4 * GHMM version __VERSION__, see http://ghmm.org 5 * 6 * Filename: ghmm/ghmm/smodel.h 7 * Authors: Bernhard Knab, Benjamin Georgi 8 * 9 * Copyright (C) 1998-2004 Alexander Schliep 10 * Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln 11 * Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik, 12 * Berlin 13 * 14 * Contact: schliep@ghmm.org 15 * 16 * This library is free software; you can redistribute it and/or 17 * modify it under the terms of the GNU Library General Public 18 * License as published by the Free Software Foundation; either 19 * version 2 of the License, or (at your option) any later version. 20 * 21 * This library is distributed in the hope that it will be useful, 22 * but WITHOUT ANY WARRANTY; without even the implied warranty of 23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24 * Library General Public License for more details. 25 * 26 * You should have received a copy of the GNU Library General Public 27 * License along with this library; if not, write to the Free 28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 29 * 30 * 31 * This file is version $Revision: 2277 $ 32 * from $Date: 2009-04-28 08:44:31 -0400 (Tue, 28 Apr 2009) $ 33 * last change by $Author: grunau $. 34 * 35 *******************************************************************************/ 36 #ifndef GHMM_SMODEL_H 37 #define GHMM_SMODEL_H 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 #include <stdio.h> 44 45 #include "ghmm.h" 46 #include "scanner.h" 47 48 49 /**@name SHMM-Modell */ 50 /*@{ (Doc++-Group: smodel) */ 51 52 /** Continuous HMM. Structures and function. 53 ghmm_cmodel includes continuous ghmm_dmodel with one transition matrix 54 (COS is set to 1) and an extension for 55 models with several matrices 56 (COS is set to a value greater than 1). In the latter case 57 a suitable (depending on the spezific application) function 58 has to be set in the get_class function pointer of 59 ghmm_cmodel_class_change_context */ 60 61 /** 62 density types for ghmm_cmodel. 63 */ 64 typedef enum { 65 normal, /**< gaussian */ 66 normal_right, /**< right tail */ 67 normal_approx, /**< approximated gaussian */ 68 normal_left, /**< left tail */ 69 uniform, 70 binormal, /**< two dimensional gaussian */ 71 multinormal, /**< multivariate gaussian */ 72 density_number /**< number of density types, has to stay last */ 73 } ghmm_density_t; 74 75 /** 76 ghmm_c_emission bundles all emission parameters 77 */ 78 typedef struct ghmm_c_emission { 79 /** specify the type of the density */ 80 ghmm_density_t type; 81 /** dimension > 1 for multivariate normals */ 82 int dimension; 83 /** mean for output functions (pointer to mean vector 84 for multivariate) */ 85 union { 86 double val; 87 double *vec; 88 } mean; 89 /** variance or pointer to a covariance matrix 90 for multivariate normals */ 91 union { 92 double val; 93 double *mat; 94 } variance; 95 /** pointer to inverse of covariance matrix if multivariate normal 96 else NULL */ 97 double *sigmainv; 98 /** determinant of covariance matrix for multivariate normal */ 99 double det; 100 /** Cholesky decomposition of covariance matrix A, 101 if A = GG' sigmacd only holds G */ 102 double *sigmacd; 103 /** minimum of uniform distribution 104 or left boundary for rigth-tail gaussians */ 105 double min; 106 /** maximum of uniform distribution 107 or right boundary for left-tail gaussians */ 108 double max; 109 /** if fixed != 0 the parameters of the density are fixed */ 110 int fixed; 111 } ghmm_c_emission; 112 113 /** 114 State struct for continuous emission HMMs. 115 */ 116 typedef struct ghmm_cstate { 117 /** Number of output densities per state */ 118 int M; 119 /** initial prob. */ 120 double pi; 121 /** IDs of successor states */ 122 int *out_id; 123 /** IDs of predecessor states */ 124 int *in_id; 125 /** transition probs to successor states. It is a 126 matrix in case of mult. transition matrices (COS > 1)*/ 127 double **out_a; 128 /** transition probs from predecessor states. It is a 129 matrix in case of mult. transition matrices (COS > 1) */ 130 double **in_a; 131 /** number of successor states */ 132 int out_states; 133 /** number of predecessor states */ 134 int in_states; 135 /** weight vector for output function components */ 136 double *c; 137 /** flag for fixation of parameter. If fix = 1 do not change parameters of 138 output functions, if fix = 0 do normal training. Default is 0. */ 139 int fix; 140 /** vector of ghmm_c_emission 141 (type and parameters of output function components) */ 142 ghmm_c_emission *e; 143 /** contains a description of the state (null terminated utf-8)*/ 144 char *desc; 145 /** x coordinate position for graph representation plotting **/ 146 int xPosition; 147 /** y coordinate position for graph representation plotting **/ 148 int yPosition; 149 } ghmm_cstate; 150 151 struct ghmm_cmodel; 152 153 /** Class change context for continuous emission HMMs. 154 */ 155 typedef struct ghmm_cmodel_class_change_context { 156 157 /* Names of class change module/function (for python callback) */ 158 char *python_module; 159 char *python_function; 160 161 /* index of current sequence */ 162 int k; 163 164 /** pointer to class function */ 165 int (*get_class) (struct ghmm_cmodel *, const double *, int, int); 166 167 168 /* space for any data necessary for class switch, USER is RESPONSIBLE */ 169 void *user_data; 170 171 } ghmm_cmodel_class_change_context; 172 173 /** Model struct for continuous emission HMMs. 174 */ 175 typedef struct ghmm_cmodel { 176 /** Number of states */ 177 int N; 178 /** Maximun number of components in the states */ 179 int M; 180 /** Number of dimensions of the emission components. 181 All emissions must have the same number of dimensions */ 182 int dim; 183 /** ghmm_cmodel includes continuous model with one transition matrix 184 (cos is set to 1) and an extension for models with several matrices 185 (cos is set to a positive integer value > 1).*/ 186 int cos; 187 /** prior for a priori prob. of the model. -1 means no prior specified (all 188 models have equal prob. a priori. */ 189 double prior; 190 191 /* contains a arbitrary name for the model (null terminated utf-8) */ 192 char *name; 193 194 /** Contains bit flags for varios model extensions such as 195 kSilentStates (see ghmm.h for a complete list) 196 */ 197 int model_type; 198 199 /** All states of the model. Transition probs are part of the states. */ 200 ghmm_cstate *s; 201 202 /* pointer to a ghmm_cmodel_class_change_context struct necessary for multiple transition 203 classes */ 204 ghmm_cmodel_class_change_context *class_change; 205 206 } ghmm_cmodel; 207 208 209 210 /* don't include this earlier: in sequence.h ghmm_cmodel has to be known */ 211 #include "sequence.h" 212 213 int ghmm_cmodel_class_change_alloc (ghmm_cmodel * smo); 214 215 /** Allocates the multivariate arrays of a ghmm_c_emmission struct 216 @return 0: success, -1: error 217 @param emissions address of ghmm_c_emission 218 @param dim dimension for multivariate emissions 219 */ 220 int ghmm_c_emission_alloc(ghmm_c_emission *emissions, int dim); 221 222 /** Allocates a cstate 223 @return 0: success, -1: error 224 @param s pointer to the allocated states 225 @param M maximal number of densities of the state 226 @param in_states number of incoming transitions 227 @param out_states number of outgoing transitions 228 @param cos number of transition classes 229 */ 230 int ghmm_cstate_alloc (ghmm_cstate * s, int M, 231 int in_states, int out_states, int cos); 232 233 /** Alloc model 234 @return allocated cmodel, -1: error 235 @param N number of states in the model 236 @param modeltype type of the model 237 */ 238 ghmm_cmodel * ghmm_cmodel_calloc( int N, int modeltype); 239 240 /** Free memory of multivariate ghmm_c_emission arrays 241 @param emission pointer of ghmm_c_emission to free */ 242 void ghmm_c_emission_free(ghmm_c_emission *emission); 243 244 /** Free memory ghmm_cmodel 245 @return 0: success, -1: error 246 @param smo pointer pointer of ghmm_cmodel */ 247 int ghmm_cmodel_free (ghmm_cmodel ** smo); 248 249 /** 250 Copies one smodel. Memory alloc is here. 251 @return pointer to ghmm_cmodel copy 252 @param smo ghmm_cmodel to be copied */ 253 ghmm_cmodel *ghmm_cmodel_copy (const ghmm_cmodel * smo); 254 255 /** 256 Checks if ghmm_cmodel is well definded. E.g. sum pi = 1, only positive values 257 etc. 258 @return 0 if ghmm_cmodel is ok, -1 for error 259 @param smo ghmm_cmodel for checking 260 */ 261 int ghmm_cmodel_check (const ghmm_cmodel * smo); 262 263 /** 264 For a vector of smodels: check that the number of states and the number 265 of output function components are the same in each smodel. 266 @return 0 if smodels are ok, -1 for error 267 @param smo: vector of smodels for checking 268 @param smodel_number: number of smodels 269 */ 270 int ghmm_cmodel_check_compatibility (ghmm_cmodel ** smo, int smodel_number); 271 272 /** 273 Generates random symbol. 274 Generates random number(s) for a specified state and specified 275 output component of the given smodel. 276 @return 0 on success 277 @param smo: ghmm_cmodel 278 @param state: state 279 @param m: index of output component 280 @param x: pointer to double to store result 281 */ 282 int ghmm_cmodel_get_random_var(ghmm_cmodel *smo, int state, int m, double *x); 283 284 285 /** 286 Produces sequences to a given model. All memory that is needed for the 287 sequences is allocated inside the function. It is possible to define 288 the length of the sequences globally (global_len > 0) or it can be set 289 inside the function, when a final state in the model is reached (a state 290 with no output). If the model has no final state, the sequences will 291 have length MAX_SEQ_LEN. 292 @return pointer to an array of sequences 293 @param smo: model 294 @param seed: initial parameter for the random value generator 295 (an integer). If seed == 0, then the random value 296 generator is not initialized. 297 @param global_len: length of sequences (=0: automatically via final states) 298 @param seq_number: number of sequences 299 @param Tmax: maximal sequence length, set to MAX_SEQ_LEN if -1 300 */ 301 302 ghmm_cseq *ghmm_cmodel_generate_sequences (ghmm_cmodel * smo, int seed, 303 int global_len, long seq_number, 304 int Tmax); 305 306 /** 307 Computes sum over all sequence of 308 seq_w * log( P ( O|lambda ) ). If a sequence can't be generated by smo 309 error cost of seq_w * PRENALTY_LOGP are imposed. 310 @return n: number of evaluated sequences, -1: error 311 @param smo ghmm_cmodel 312 @param sqd sequence struct 313 @param log_p evaluated log likelihood 314 */ 315 int ghmm_cmodel_likelihood (ghmm_cmodel * smo, ghmm_cseq * sqd, double *log_p); 316 317 /** 318 Computes log likelihood for all sequence of 319 seq_w * log( P ( O|lambda ) ). If a sequence can't be generated by smo 320 error cost of seq_w * PRENALTY_LOGP are imposed. 321 @return n: number of evaluated sequences, -1: error 322 @param smo ghmm_cmodel 323 @param sqd sequence struct 324 @param log_ps array of evaluated likelihoods 325 */ 326 int ghmm_cmodel_individual_likelihoods (ghmm_cmodel * smo, ghmm_cseq * sqd, 327 double *log_ps); 328 329 330 /** Reads an XML file with specifications for one or more smodels. 331 All parameters in matrix or vector form. 332 @return vector of read smodels 333 @param filename input xml file 334 @param smo_number number of smodels to read*/ 335 ghmm_cmodel **ghmm_cmodel_xml_read (const char *filename, int *smo_number); 336 337 /** Writes an XML file with specifications for one or more smodels. 338 @return 0:sucess, -1:error 339 @param file output xml filename 340 @param smo ghmm_cmodel(s) 341 @param smo_number number of smodels to write*/ 342 int ghmm_cmodel_xml_write(ghmm_cmodel** smo, const char *file, int smo_number); 343 344 /** 345 Prints one ghmm_cmodel in matrix form. 346 @param file output file 347 @param smo ghmm_cmodel 348 */ 349 void ghmm_cmodel_print (FILE * file, ghmm_cmodel * smo); 350 351 /** 352 Prints one ghmm_cmodel with only one transition Matrix A (=Ak_0). 353 @param file output file 354 @param smo ghmm_cmodel 355 */ 356 void ghmm_cmodel_print_oneA (FILE * file, ghmm_cmodel * smo); 357 358 /** 359 Prints transition matrix of specified class. 360 @param file output file 361 @param smo ghmm_cmodel 362 @param k transition class 363 @param tab format: leading tab 364 @param separator format: seperator 365 @param ending format: end of data in line 366 */ 367 void ghmm_cmodel_Ak_print (FILE * file, ghmm_cmodel * smo, int k, char *tab, 368 char *separator, char *ending); 369 370 /** 371 Prints weight matrix of output functions of an smodel. 372 @param file output file 373 @param smo ghmm_cmodel 374 @param tab format: leading tab 375 @param separator format: seperator 376 @param ending format: end of data in line 377 */ 378 void ghmm_cmodel_C_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, 379 char *ending); 380 381 /** 382 Prints mean matrix of output functions of an smodel. 383 @param file output file 384 @param smo ghmm_cmodel 385 @param tab format: leading tab 386 @param separator format: seperator 387 @param ending format: end of data in line 388 */ 389 void ghmm_cmodel_Mue_print (FILE * file, ghmm_cmodel * smo, char *tab, 390 char *separator, char *ending); 391 /** 392 Prints variance matrix of output functions of an smodel. 393 @param file output file 394 @param smo ghmm_cmodel 395 @param tab format: leading tab 396 @param separator format: seperator 397 @param ending format: end of data in line 398 */ 399 void ghmm_cmodel_U_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, 400 char *ending); 401 /** 402 Prints initial prob vector of an smodel. 403 @param file output file 404 @param smo ghmm_cmodel 405 @param tab format: leading tab 406 @param separator format: seperator 407 @param ending format: end of data in line 408 */ 409 void ghmm_cmodel_Pi_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator, 410 char *ending); 411 /** 412 Prints vector of fix_states. 413 @param file output file 414 @param smo ghmm_cmodel 415 @param tab format: leading tab 416 @param separator format: seperator 417 @param ending format: end of data in line 418 */ 419 void ghmm_cmodel_fix_print (FILE * file, ghmm_cmodel * smo, char *tab, 420 char *separator, char *ending); 421 422 /** Computes the density of one symbol (omega) in a given state and a 423 given output component 424 @return calculated density 425 @param state ghmm_cstate 426 @param m output component 427 @param omega given symbol 428 */ 429 double ghmm_cmodel_calc_cmbm(const ghmm_cstate *state, int m, double *omega); 430 431 /** Computes the density of one symbol (omega) in a given state (sums over 432 all output components 433 @return calculated density 434 @param state state 435 @param omega given symbol 436 */ 437 double ghmm_cmodel_calc_b(ghmm_cstate *state, const double *omega); 438 439 /** Computes probabilistic distance of two models 440 @return the distance 441 @param cm0 ghmm_cmodel used for generating random output 442 @param cm ghmm_cmodel to compare with 443 @param maxT maximum output length (for HMMs with absorbing states multiple 444 sequences with a toal length of at least maxT will be 445 generated) 446 @param symmetric flag, whether to symmetrize distance (not implemented yet) 447 @param verbose flag, whether to monitor distance in 40 steps. 448 Prints to stdout (yuk!) 449 */ 450 double ghmm_cmodel_prob_distance (ghmm_cmodel * cm0, ghmm_cmodel * cm, int maxT, 451 int symmetric, int verbose); 452 453 /** 454 Computes value of distribution function for a given symbol omega, a given 455 ghmm_cstate and a given output component. 456 @return value of distribution function 457 @param state ghmm_cstate 458 @param m component 459 @param omega symbol 460 */ 461 double ghmm_cmodel_calc_cmBm(const ghmm_cstate *state, int m, const double *omega); 462 463 /** 464 Computes value of distribution function for a given symbol omega and 465 a given state. Sums over all components. 466 @return value of distribution function 467 @param state ghmm_cstate 468 @param omega symbol 469 */ 470 double ghmm_cmodel_calc_B(ghmm_cstate *state, const double *omega); 471 472 /** Computes the number of free parameters in an array of 473 smodels. E.g. if the number of parameter from pi is N - 1. 474 Counts only those parameters, that can be changed during 475 training. If pi[i] = 0 it is not counted, since it can't be changed. 476 @return number of free parameters 477 @param smo ghmm_cmodel 478 @param smo_number number of smodels 479 */ 480 int ghmm_cmodel_count_free_parameter (ghmm_cmodel ** smo, int smo_number); 481 482 483 /*============================================================================*/ 484 485 /* keep the following functions for first distribution??? 486 --> BK ? 487 */ 488 489 490 /** Generates interval(a,b) with B(a) < 0.01, B(b) > 0.99 491 @param smo continous HMM 492 @param state given state 493 @param a return-value: left side 494 @param b return-value: right side 495 */ 496 void ghmm_cmodel_get_interval_B (ghmm_cmodel * smo, int state, double *a, double *b); 497 498 /** Normalizes initial and transisition probablilities and mixture weights 499 @return 0 on success / -1 on error 500 @param smo model to normalize 501 */ 502 503 int ghmm_cmodel_normalize(ghmm_cmodel *smo); 504 505 /** 506 Get transition probabality from state 'i' to state 'j' to value 'prob'. 507 NOTE: No internal checks 508 @return 1 if there is a transition, 0 otherwise 509 @param mo model 510 @param i state index (source) 511 @param j state index (target) 512 @param c transition class 513 */ 514 double ghmm_cmodel_get_transition(ghmm_cmodel* mo, int i, int j, int c); 515 516 /** 517 Checks if a non zero transition exists between state 'i' to state 'j'. 518 NOTE: No internal checks 519 @return 0 if there is no non zero transition from state i to state j, 1 else 520 @param mo model 521 @param i state index (source) 522 @param j state index (target) 523 @param c transition class 524 */ 525 int ghmm_cmodel_check_transition(ghmm_cmodel* mo, int i, int j, int c); 526 527 /** 528 Set transition from state 'i' to state 'j' to value 'prob'. 529 NOTE: No internal checks - model might get broken if used without care. 530 @param mo model 531 @param i state index (source) 532 @param j state index (target) 533 @param prob probability 534 @param c transition class 535 */ 536 void ghmm_cmodel_set_transition (ghmm_cmodel* mo, int i, int j, int c, double prob); 537 538 #ifdef __cplusplus 539 } 540 #endif 541 #endif 542 /*@} (Doc++-Group: smodel) */ 543