1 /*******************************************************************************
2 *
3 *       This file is part of the General Hidden Markov Model Library,
4 *       GHMM version __VERSION__, see http://ghmm.org
5 *
6 *       Filename: ghmm/ghmm/smodel.h
7 *       Authors:  Bernhard Knab, Benjamin Georgi
8 *
9 *       Copyright (C) 1998-2004 Alexander Schliep
10 *       Copyright (C) 1998-2001 ZAIK/ZPR, Universitaet zu Koeln
11 *	Copyright (C) 2002-2004 Max-Planck-Institut fuer Molekulare Genetik,
12 *                               Berlin
13 *
14 *       Contact: schliep@ghmm.org
15 *
16 *       This library is free software; you can redistribute it and/or
17 *       modify it under the terms of the GNU Library General Public
18 *       License as published by the Free Software Foundation; either
19 *       version 2 of the License, or (at your option) any later version.
20 *
21 *       This library is distributed in the hope that it will be useful,
22 *       but WITHOUT ANY WARRANTY; without even the implied warranty of
23 *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24 *       Library General Public License for more details.
25 *
26 *       You should have received a copy of the GNU Library General Public
27 *       License along with this library; if not, write to the Free
28 *       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 *
31 *       This file is version $Revision: 2277 $
32 *                       from $Date: 2009-04-28 08:44:31 -0400 (Tue, 28 Apr 2009) $
33 *             last change by $Author: grunau $.
34 *
35 *******************************************************************************/
36 #ifndef GHMM_SMODEL_H
37 #define GHMM_SMODEL_H
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 #include <stdio.h>
44 
45 #include "ghmm.h"
46 #include "scanner.h"
47 
48 
49 /**@name SHMM-Modell */
50 /*@{ (Doc++-Group: smodel) */
51 
52 /** Continuous HMM. Structures and function.
53     ghmm_cmodel includes continuous ghmm_dmodel with one transition matrix
54     (COS  is set to 1) and an extension for
55     models with several matrices
56     (COS is set to a value greater than 1). In the latter case
57     a suitable (depending on the spezific application) function
58     has to be set in the get_class function pointer of
59     ghmm_cmodel_class_change_context */
60 
61 /**
62    density types for ghmm_cmodel.
63 */
64   typedef enum {
65     normal,        /**< gaussian */
66     normal_right,  /**< right tail */
67     normal_approx, /**< approximated gaussian */
68     normal_left,   /**< left tail */
69     uniform,
70     binormal,      /**< two dimensional gaussian */
71     multinormal,   /**< multivariate gaussian */
72     density_number /**< number of density types, has to stay last */
73   } ghmm_density_t;
74 
75   /**
76       ghmm_c_emission bundles all emission parameters
77   */
78   typedef struct ghmm_c_emission {
79     /** specify the type of the density */
80     ghmm_density_t type;
81     /** dimension > 1 for multivariate normals */
82     int dimension;
83     /** mean for output functions (pointer to mean vector
84         for multivariate) */
85     union {
86       double val;
87       double *vec;
88     } mean;
89     /** variance or pointer to a covariance matrix
90         for multivariate normals */
91     union {
92       double val;
93       double *mat;
94     } variance;
95     /** pointer to inverse of covariance matrix if multivariate normal
96         else NULL */
97     double *sigmainv;
98     /** determinant of covariance matrix for multivariate normal */
99     double det;
100     /** Cholesky decomposition of covariance matrix A,
101         if A = GG' sigmacd only holds G */
102     double *sigmacd;
103     /** minimum of uniform distribution
104        or left boundary for rigth-tail gaussians */
105     double min;
106     /** maximum of uniform distribution
107        or right boundary for left-tail gaussians */
108     double max;
109     /** if fixed != 0 the parameters of the density are fixed */
110     int fixed;
111   } ghmm_c_emission;
112 
113 /**
114     State struct for continuous emission HMMs.
115 */
116   typedef struct ghmm_cstate {
117   /** Number of output densities per state */
118     int M;
119   /** initial prob. */
120     double pi;
121   /** IDs of successor states */
122     int *out_id;
123   /** IDs of predecessor states */
124     int *in_id;
125   /** transition probs to successor states. It is a
126    matrix in case of mult. transition matrices (COS > 1)*/
127     double **out_a;
128   /** transition probs from predecessor states. It is a
129    matrix in case of mult. transition matrices (COS > 1) */
130     double **in_a;
131   /** number of  successor states */
132     int out_states;
133   /** number of  predecessor states */
134     int in_states;
135   /** weight vector for output function components */
136     double *c;
137   /** flag for fixation of parameter. If fix = 1 do not change parameters of
138       output functions, if fix = 0 do normal training. Default is 0. */
139     int fix;
140   /** vector of ghmm_c_emission
141       (type and parameters of output function components) */
142     ghmm_c_emission *e;
143   /** contains a description of the state (null terminated utf-8)*/
144   char *desc;
145   /** x coordinate position for graph representation plotting **/
146   int xPosition;
147   /** y coordinate position for graph representation plotting **/
148   int yPosition;
149   } ghmm_cstate;
150 
151   struct ghmm_cmodel;
152 
153   /** Class change context for continuous emission HMMs.
154    */
155   typedef struct ghmm_cmodel_class_change_context {
156 
157     /* Names of class change module/function (for python callback) */
158     char *python_module;
159     char *python_function;
160 
161     /* index of current sequence */
162     int k;
163 
164     /** pointer to class function */
165     int (*get_class) (struct ghmm_cmodel *, const double *, int, int);
166 
167 
168     /* space for any data necessary for class switch, USER is RESPONSIBLE */
169     void *user_data;
170 
171   } ghmm_cmodel_class_change_context;
172 
173 /** Model struct for continuous emission HMMs.
174  */
175   typedef struct ghmm_cmodel {
176   /** Number of states */
177     int N;
178   /** Maximun number of components in the states */
179     int M;
180   /** Number of dimensions of the emission components.
181       All emissions must have the same number of dimensions */
182     int dim;
183   /** ghmm_cmodel includes continuous model with one transition matrix
184       (cos  is set to 1) and an extension for models with several matrices
185       (cos is set to a positive integer value > 1).*/
186     int cos;
187   /** prior for a priori prob. of the model. -1 means no prior specified (all
188       models have equal prob. a priori. */
189     double prior;
190 
191   /* contains a arbitrary name for the model (null terminated utf-8) */
192   char *name;
193 
194   /** Contains bit flags for varios model extensions such as
195       kSilentStates (see ghmm.h for a complete list)
196   */
197   int model_type;
198 
199   /** All states of the model. Transition probs are part of the states. */
200     ghmm_cstate *s;
201 
202     /* pointer to a ghmm_cmodel_class_change_context struct necessary for multiple transition
203        classes */
204     ghmm_cmodel_class_change_context *class_change;
205 
206   } ghmm_cmodel;
207 
208 
209 
210 /* don't include this earlier: in sequence.h ghmm_cmodel has to be known */
211 #include "sequence.h"
212 
213   int ghmm_cmodel_class_change_alloc (ghmm_cmodel * smo);
214 
215 /** Allocates the multivariate arrays of a ghmm_c_emmission struct
216     @return 0: success, -1: error
217     @param emissions   address of ghmm_c_emission
218     @param dim         dimension for multivariate emissions
219 */
220   int ghmm_c_emission_alloc(ghmm_c_emission *emissions, int dim);
221 
222 /** Allocates a cstate
223     @return 0: success, -1: error
224     @param s           pointer to the allocated states
225     @param M           maximal number of densities of the state
226     @param in_states   number of incoming transitions
227     @param out_states  number of outgoing transitions
228     @param cos         number of transition classes
229 */
230   int ghmm_cstate_alloc (ghmm_cstate * s, int M,
231 			  int in_states, int out_states, int cos);
232 
233 /** Alloc model
234     @return allocated cmodel, -1: error
235     @param N number of states in the model
236     @param modeltype type of the model
237 */
238   ghmm_cmodel * ghmm_cmodel_calloc( int N, int modeltype);
239 
240 /** Free memory of multivariate ghmm_c_emission arrays
241     @param emission pointer of ghmm_c_emission to free */
242   void ghmm_c_emission_free(ghmm_c_emission *emission);
243 
244 /** Free memory ghmm_cmodel
245     @return 0: success, -1: error
246     @param smo  pointer pointer of ghmm_cmodel */
247   int ghmm_cmodel_free (ghmm_cmodel ** smo);
248 
249 /**
250    Copies one smodel. Memory alloc is here.
251    @return pointer to ghmm_cmodel copy
252    @param smo   ghmm_cmodel to be copied  */
253   ghmm_cmodel *ghmm_cmodel_copy (const ghmm_cmodel * smo);
254 
255 /**
256    Checks if ghmm_cmodel is well definded. E.g. sum pi = 1, only positive values
257    etc.
258    @return 0 if ghmm_cmodel is ok, -1 for error
259    @param smo   ghmm_cmodel for  checking
260 */
261   int ghmm_cmodel_check (const ghmm_cmodel * smo);
262 
263 /**
264    For a vector of smodels: check that the number of states and the number
265    of output function components are the same in each smodel.
266    @return 0 if smodels are  ok, -1 for error
267    @param smo:            vector of smodels for checking
268    @param smodel_number:  number of smodels
269  */
270   int ghmm_cmodel_check_compatibility (ghmm_cmodel ** smo, int smodel_number);
271 
272 /**
273    Generates random symbol.
274    Generates random number(s) for a specified state and specified
275    output component of the given smodel.
276    @return           0 on success
277    @param smo:       ghmm_cmodel
278    @param state:     state
279    @param m:         index of output component
280    @param x:         pointer to double to store result
281 */
282   int ghmm_cmodel_get_random_var(ghmm_cmodel *smo, int state, int m, double *x);
283 
284 
285 /**
286     Produces sequences to a given model. All memory that is needed for the
287     sequences is allocated inside the function. It is possible to define
288     the length of the sequences globally (global_len > 0) or it can be set
289     inside the function, when a final state in the model is reached (a state
290     with no output). If the model has no final state, the sequences will
291     have length MAX_SEQ_LEN.
292     @return             pointer to an array of sequences
293     @param smo:         model
294     @param seed:        initial parameter for the random value generator
295                         (an integer). If seed == 0, then the random value
296 			generator is not initialized.
297     @param global_len:  length of sequences (=0: automatically via final states)
298     @param seq_number:  number of sequences
299     @param Tmax:        maximal sequence length, set to MAX_SEQ_LEN if -1
300 */
301 
302   ghmm_cseq *ghmm_cmodel_generate_sequences (ghmm_cmodel * smo, int seed,
303                                            int global_len, long seq_number,
304                                            int Tmax);
305 
306 /**
307     Computes sum over all sequence of
308     seq_w * log( P ( O|lambda ) ). If a sequence can't be generated by smo
309     error cost of seq_w * PRENALTY_LOGP are imposed.
310    @return       n: number of evaluated sequences, -1: error
311    @param smo   ghmm_cmodel
312    @param sqd    sequence struct
313    @param log_p  evaluated log likelihood
314 */
315   int ghmm_cmodel_likelihood (ghmm_cmodel * smo, ghmm_cseq * sqd, double *log_p);
316 
317 /**
318     Computes log likelihood for all sequence of
319     seq_w * log( P ( O|lambda ) ). If a sequence can't be generated by smo
320     error cost of seq_w * PRENALTY_LOGP are imposed.
321    @return      n: number of evaluated sequences, -1: error
322    @param smo   ghmm_cmodel
323    @param sqd   sequence struct
324    @param log_ps array of evaluated likelihoods
325 */
326   int ghmm_cmodel_individual_likelihoods (ghmm_cmodel * smo, ghmm_cseq * sqd,
327                                      double *log_ps);
328 
329 
330 /** Reads an XML file with specifications for one or more smodels.
331     All parameters in matrix or vector form.
332    @return vector of read smodels
333    @param filename   input xml file
334    @param smo_number  number of smodels to read*/
335   ghmm_cmodel **ghmm_cmodel_xml_read (const char *filename, int *smo_number);
336 
337 /** Writes an XML file with specifications for one or more smodels.
338    @return           0:sucess, -1:error
339    @param file       output xml filename
340    @param smo        ghmm_cmodel(s)
341    @param smo_number number of smodels to write*/
342   int ghmm_cmodel_xml_write(ghmm_cmodel** smo, const char *file, int smo_number);
343 
344 /**
345    Prints one ghmm_cmodel in matrix form.
346    @param file     output file
347    @param smo   ghmm_cmodel
348 */
349   void ghmm_cmodel_print (FILE * file, ghmm_cmodel * smo);
350 
351 /**
352    Prints one ghmm_cmodel with only one transition Matrix A (=Ak_0).
353    @param file     output file
354    @param smo   ghmm_cmodel
355 */
356   void ghmm_cmodel_print_oneA (FILE * file, ghmm_cmodel * smo);
357 
358 /**
359    Prints transition matrix of specified class.
360    @param file       output file
361    @param smo     ghmm_cmodel
362    @param k          transition class
363    @param tab      format: leading tab
364    @param separator  format: seperator
365    @param ending     format: end of data in line
366 */
367   void ghmm_cmodel_Ak_print (FILE * file, ghmm_cmodel * smo, int k, char *tab,
368                         char *separator, char *ending);
369 
370 /**
371    Prints weight matrix of output functions of an smodel.
372    @param file       output file
373    @param smo     ghmm_cmodel
374    @param tab      format: leading tab
375    @param separator  format: seperator
376    @param ending     format: end of data in line
377 */
378   void ghmm_cmodel_C_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,
379                        char *ending);
380 
381 /**
382    Prints mean matrix of output functions of an smodel.
383    @param file       output file
384    @param smo     ghmm_cmodel
385    @param tab      format: leading tab
386    @param separator  format: seperator
387    @param ending     format: end of data in line
388 */
389   void ghmm_cmodel_Mue_print (FILE * file, ghmm_cmodel * smo, char *tab,
390                          char *separator, char *ending);
391 /**
392    Prints variance matrix of output functions of an smodel.
393    @param file       output file
394    @param smo     ghmm_cmodel
395    @param tab      format: leading tab
396    @param separator  format: seperator
397    @param ending     format: end of data in line
398 */
399   void ghmm_cmodel_U_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,
400                        char *ending);
401 /**
402    Prints initial prob vector of an smodel.
403    @param file       output file
404    @param smo     ghmm_cmodel
405    @param tab      format: leading tab
406    @param separator  format: seperator
407    @param ending     format: end of data in line
408 */
409   void ghmm_cmodel_Pi_print (FILE * file, ghmm_cmodel * smo, char *tab, char *separator,
410                         char *ending);
411 /**
412    Prints vector of fix_states.
413    @param file       output file
414    @param smo     ghmm_cmodel
415    @param tab      format: leading tab
416    @param separator  format: seperator
417    @param ending     format: end of data in line
418 */
419   void ghmm_cmodel_fix_print (FILE * file, ghmm_cmodel * smo, char *tab,
420                          char *separator, char *ending);
421 
422 /** Computes the density of one symbol (omega) in a given state and a
423     given output component
424     @return       calculated density
425     @param state  ghmm_cstate
426     @param m      output component
427     @param omega  given symbol
428 */
429   double ghmm_cmodel_calc_cmbm(const ghmm_cstate *state, int m, double *omega);
430 
431 /** Computes the density of one symbol (omega) in a given state (sums over
432     all output components
433     @return calculated density
434     @param state state
435     @param omega given symbol
436 */
437   double ghmm_cmodel_calc_b(ghmm_cstate *state, const double *omega);
438 
439 /** Computes probabilistic distance of two models
440     @return the distance
441     @param cm0  ghmm_cmodel used for generating random output
442     @param cm   ghmm_cmodel to compare with
443     @param maxT  maximum output length (for HMMs with absorbing states multiple
444                  sequences with a toal length of at least maxT will be
445 		 generated)
446     @param symmetric  flag, whether to symmetrize distance (not implemented yet)
447     @param verbose  flag, whether to monitor distance in 40 steps.
448                     Prints to stdout (yuk!)
449 */
450   double ghmm_cmodel_prob_distance (ghmm_cmodel * cm0, ghmm_cmodel * cm, int maxT,
451                                int symmetric, int verbose);
452 
453 /**
454     Computes value of distribution function for a given symbol omega, a given
455     ghmm_cstate and a given output component.
456     @return       value of distribution function
457     @param state  ghmm_cstate
458     @param m      component
459     @param omega  symbol
460 */
461   double ghmm_cmodel_calc_cmBm(const ghmm_cstate *state, int m, const double *omega);
462 
463 /**
464     Computes value of distribution function for a given symbol omega and
465     a given  state. Sums over all components.
466     @return       value of distribution function
467     @param state  ghmm_cstate
468     @param omega  symbol
469 */
470   double ghmm_cmodel_calc_B(ghmm_cstate *state, const double *omega);
471 
472 /** Computes the number of free parameters in an array of
473    smodels. E.g. if the number of parameter from pi is N - 1.
474    Counts only those parameters, that can be changed during
475    training. If pi[i] = 0 it is not counted, since it can't be changed.
476    @return number of free parameters
477    @param smo ghmm_cmodel
478    @param smo_number number of smodels
479 */
480   int ghmm_cmodel_count_free_parameter (ghmm_cmodel ** smo, int smo_number);
481 
482 
483 /*============================================================================*/
484 
485 /* keep the following functions for first distribution???
486    --> BK ?
487 */
488 
489 
490 /** Generates interval(a,b) with  B(a) < 0.01, B(b) > 0.99
491     @param smo    continous HMM
492     @param state  given state
493     @param a      return-value: left side
494     @param b      return-value: right side
495 */
496   void ghmm_cmodel_get_interval_B (ghmm_cmodel * smo, int state, double *a, double *b);
497 
498 /** Normalizes initial and transisition probablilities and mixture weights
499     @return     0 on success / -1 on error
500     @param smo  model to normalize
501 */
502 
503   int ghmm_cmodel_normalize(ghmm_cmodel *smo);
504 
505 /**
506     Get transition probabality from state 'i' to state 'j' to value 'prob'.
507     NOTE: No internal checks
508     @return   1 if there is a transition, 0 otherwise
509     @param mo model
510     @param i  state index (source)
511     @param j  state index (target)
512     @param c  transition class
513 */
514   double ghmm_cmodel_get_transition(ghmm_cmodel* mo, int i, int j, int c);
515 
516 /**
517     Checks if a non zero transition exists between state 'i' to state 'j'.
518     NOTE: No internal checks
519     @return  0 if there is no non zero transition from state i to state j, 1 else
520     @param mo model
521     @param i  state index (source)
522     @param j  state index (target)
523     @param c  transition class
524 */
525   int ghmm_cmodel_check_transition(ghmm_cmodel* mo, int i, int j, int c);
526 
527 /**
528     Set transition from state 'i' to state 'j' to value 'prob'.
529     NOTE: No internal checks - model might get broken if used without care.
530     @param mo model
531     @param i  state index (source)
532     @param j  state index (target)
533     @param prob probability
534     @param c  transition class
535 */
536   void ghmm_cmodel_set_transition (ghmm_cmodel* mo, int i, int j, int c, double prob);
537 
538 #ifdef __cplusplus
539 }
540 #endif
541 #endif
542 /*@} (Doc++-Group: smodel) */
543