1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_GRAPH_ALGORITHM_HMM_H
34 #define SEQAN_HEADER_GRAPH_ALGORITHM_HMM_H
35 
36 namespace seqan
37 {
38 
39 // ============================================================================
40 // Forwards
41 // ============================================================================
42 
43 // ============================================================================
44 // Tags, Classes, Enums
45 // ============================================================================
46 
47 // ============================================================================
48 // Metafunctions
49 // ============================================================================
50 
51 // ============================================================================
52 // Functions
53 // ============================================================================
54 
55 // --------------------------------------------------------------------------
56 // Function viterbiAlgorithm()
57 // --------------------------------------------------------------------------
58 
59 /*!
60  * @defgroup HmmAlgorithms HMM Algorithms
61  * @brief Algorithms on @link HmmGraph @endlink objects.
62  */
63 
64 /*!
65  * @fn HmmAlgorithms#viterbiAlgorithm
66  * @headerfile <seqan/graph_algorithms.h>
67  * @brief Implements the Viterbi algorithm for Hidden Markov Models.
68  *
69  * @signature TCargo viterbiAlgorithm(path, hmm, seq);
70  *
71  * @param[out] path The state path;  String of vertex descriptors.
72  * @param[in]  hmm  The @link HmmGraph @endlink to use.
73  * @param[in]  seq  Input sequence.
74  *
75  * @return TCargo Probability of the path, the type parameter <tt>TCargo</tt> from type of <tt>hmm</tt>.
76  *
77  * The Viterbi algorithm computes the most likely sequence of hidden states of the Hidden Markov Model <tt>hmm</tt>
78  * given the sequence <tt>seq</tt> using dynamic programming.  The result is the most likely sequence of hidden states
79  * and returned in <tt>path</tt>.
80  *
81  * @section Remarks
82  *
83  * See the <a href="http://wikipedia.org/wiki/Viterbi_algorithm"> Wikipedia article on the Viterbi algorithm</a> for an
84  * introduction to the algorithm itself.
85  *
86  * @see HmmAlgorithms#forwardAlgorithm
87  * @see HmmAlgorithms#backwardAlgorithm
88  */
89 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TPath>
90 inline TProbability
viterbiAlgorithm(TPath & path,Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq)91 viterbiAlgorithm(TPath & path,
92                  Graph<Hmm<TAlphabet, TProbability, TSpec> > const & hmm,
93                  TSequence const & seq)
94 {
95     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
96     typedef typename Size<TGraph>::Type TSize;
97     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
98     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
99 
100     // Initialization
101     String<TProbability> vMat;
102     String<TSize> traceback;
103     TSize numCols = length(seq) + 2;
104     TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
105     resize(vMat, numCols * numRows, 0.0);
106     resize(traceback, numCols * numRows);
107     value(vMat, getBeginState(hmm)) = 1.0;
108     TVertexDescriptor bState = getBeginState(hmm);
109     TVertexDescriptor eState = getEndState(hmm);
110     TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
111 
112     // Distinct between silent states and real states
113     typedef String<TVertexDescriptor> TStateSet;
114     typedef typename Iterator<TStateSet>::Type TStateIter;
115     TStateSet silentStates;
116     TStateSet realStates;
117     TVertexIterator itVertex(hmm);
118     for(;!atEnd(itVertex);goNext(itVertex)) {
119         if (isSilent(hmm, value(itVertex))) {
120             appendValue(silentStates, value(itVertex));
121         } else {
122             appendValue(realStates, value(itVertex));
123         }
124     }
125 
126     // Initialization for silent states connected to the begin state
127     TStateIter itSilentStateEnd = end(silentStates);
128     for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
129         if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
130         TProbability maxValue = 0.0;
131         TVertexDescriptor maxVertex = nilVertex;
132 
133         for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
134             TProbability local = value(vMat, value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
135             if (local > maxValue) {
136                 maxValue = local;
137                 maxVertex = value(itBelow);
138             }
139         }
140 
141         // Set traceback vertex
142         if (maxVertex != nilVertex) {
143             value(vMat, value(itSilentState)) = maxValue;
144             value(traceback, value(itSilentState)) = maxVertex;
145         }
146     }
147 
148     // Recurrence
149     TSize len = length(seq);
150     for(TSize i=1; i<=len; ++i) {
151         // Iterate over real states
152         TStateIter itRealStateEnd = end(realStates);
153         for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
154             // Find maximum
155             TProbability maxValue = 0.0;
156             TVertexDescriptor maxVertex = nilVertex;
157             TVertexIterator itMax(hmm);
158             for(;!atEnd(itMax);++itMax) {
159                 TProbability local = value(vMat, (i-1) * numRows + value(itMax)) * getTransitionProbability(hmm, value(itMax), value(itRealState));
160                 if (local > maxValue) {
161                     maxValue = local;
162                     maxVertex = *itMax;
163                 }
164             }
165             // Set traceback vertex
166             if (maxVertex != nilVertex) {
167                 value(vMat, i * numRows + value(itRealState)) = maxValue * getEmissionProbability(hmm, value(itRealState), value(seq, i-1));
168                 value(traceback, i * numRows + value(itRealState)) = maxVertex;
169             }
170         }
171 
172         // Iterate over silent states
173         for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
174             if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
175             // Find maximum
176             TProbability maxValue = 0.0;
177             TVertexDescriptor maxVertex = nilVertex;
178 
179             // Iterate over real states
180             for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
181                 TProbability local = value(vMat, i * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
182                 if (local > maxValue) {
183                     maxValue = local;
184                     maxVertex = value(itRealState);
185                 }
186             }
187             // Iterate over silent states in increasing order
188             for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
189                 TProbability local = value(vMat, i * numRows + value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
190                 if (local > maxValue) {
191                     maxValue = local;
192                     maxVertex = value(itBelow);
193                 }
194             }
195             // Set traceback vertex
196             if (maxVertex != nilVertex) {
197                 value(traceback, i * numRows + value(itSilentState)) = maxVertex;
198                 value(vMat, i * numRows + value(itSilentState)) = maxValue;
199             }
200         }
201     }
202 
203     // Termination
204     TProbability maxValue = 0.0;
205     TVertexDescriptor maxVertex = 0;
206     TVertexIterator itMax(hmm);
207     for(;!atEnd(itMax);++itMax) {
208         TProbability local = value(vMat, len * numRows + *itMax) * getTransitionProbability(hmm, value(itMax), eState);
209         if (local > maxValue) {
210             maxValue = local;
211             maxVertex = value(itMax);
212         }
213     }
214     value(traceback, (len + 1) * numRows + eState) = maxVertex;
215     if (maxVertex != nilVertex) value(vMat, (len+1) * numRows + eState) = maxValue;
216 
217     // Traceback
218     if (maxValue > 0.0) {
219         clear(path);
220         TVertexDescriptor oldState = eState;
221         appendValue(path, oldState);
222         for(TSize i = len + 1; i>=1; --i) {
223             do {
224                 if ((!isSilent(hmm, oldState)) || (oldState == eState)) oldState = value(traceback, i * numRows + oldState);
225                 else oldState = value(traceback, (i - 1) * numRows + oldState);
226                 appendValue(path, oldState);
227             } while ((isSilent(hmm, oldState)) && (oldState != bState));
228         }
229         std::reverse(begin(path), end(path));
230     }
231 
232     //// Debug code
233     //for(TSize i = 0; i<numRows; ++i) {
234     //  for(TSize j=0; j<numCols; ++j) {
235     //      std::cout << value(vMat, j*numRows + i) << ',';
236     //      //std::cout << value(traceback, j*numRows + i) << ',';
237     //  }
238     //  std::cout << std::endl;
239     //}
240     //for(TSize i = 0; i<length(path); ++i) {
241     //  std::cout << path[i] << ',';
242     //}
243     //std::cout << std::endl;
244 
245     return value(vMat, (len+1) * numRows + eState);
246 }
247 
248 // --------------------------------------------------------------------------
249 // Function forwardAlgorithm()
250 // --------------------------------------------------------------------------
251 
252 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TForwardMatrix>
253 inline TProbability
_forwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq,TForwardMatrix & fMat)254 _forwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
255                    TSequence const& seq,
256                    TForwardMatrix& fMat)
257 {
258     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
259     typedef typename Size<TGraph>::Type TSize;
260     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
261     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
262 
263     // Initialization
264     TSize numCols = length(seq) + 2;
265     TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
266     resize(fMat, numCols * numRows, 0.0);
267     value(fMat, getBeginState(hmm)) = 1.0;
268     TVertexDescriptor bState = getBeginState(hmm);
269     TVertexDescriptor eState = getEndState(hmm);
270 
271     // Distinct between silent states and real states
272     typedef String<TVertexDescriptor> TStateSet;
273     typedef typename Iterator<TStateSet>::Type TStateIter;
274     TStateSet silentStates;
275     TStateSet realStates;
276     TVertexIterator itVertex(hmm);
277     for(;!atEnd(itVertex);goNext(itVertex)) {
278         if (isSilent(hmm, value(itVertex))) {
279             appendValue(silentStates, value(itVertex));
280         } else {
281             appendValue(realStates, value(itVertex));
282         }
283     }
284 
285     // Initialization for silent states connected to the begin state
286     TStateIter itSilentStateEnd = end(silentStates);
287     for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
288         if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
289         TProbability sumValue = 0.0;
290         for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
291             sumValue += value(fMat, value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
292         }
293         value(fMat, value(itSilentState)) = sumValue;
294     }
295 
296     // Recurrence
297     TSize len = length(seq);
298     for(TSize i=1; i<=len; ++i) {
299         // Iterate over real states
300         TStateIter itRealStateEnd = end(realStates);
301         for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
302             TProbability sum = 0.0;
303             TVertexIterator itAll(hmm);
304             for(;!atEnd(itAll);++itAll) sum += value(fMat, (i-1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itAll), value(itRealState));
305             value(fMat, i * numRows + value(itRealState)) = getEmissionProbability(hmm, value(itRealState), seq[i-1]) * sum;
306         }
307 
308         // Iterate over silent states
309         for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
310             if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
311             TProbability sumValue = 0.0;
312             // Iterate over real states
313             for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
314                 sumValue += value(fMat, i * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
315             }
316             // Iterate over silent states in increasing order
317             for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
318                 sumValue += value(fMat, i * numRows + value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
319             }
320             value(fMat, i * numRows + value(itSilentState)) = sumValue;
321         }
322     }
323 
324     // Termination
325     TProbability sum = 0.0;
326     TVertexIterator itAll(hmm);
327     for(;!atEnd(itAll);++itAll) {
328         sum += value(fMat, len * numRows + value(itAll)) * getTransitionProbability(hmm, value(itAll), eState);
329     }
330     value(fMat, (len+1) * numRows + eState) = sum;
331 
332     //// Debug code
333     //for(TSize i = 0; i<numRows; ++i) {
334     //  for(TSize j=0; j<numCols; ++j) {
335     //      std::cout << value(fMat, j*numRows + i) << ',';
336     //  }
337     //  std::cout << std::endl;
338     //}
339 
340     return value(fMat, (len+1) * numRows + eState);
341 }
342 
343 /*!
344  * @fn HmmAlgorithms#forwardAlgorithm
345  * @headerfile <seqan/graph_algorithms.h>
346  * @brief Given a Hidden Markov Model <tt>hmm</tt>, the forward algorithm computes the probability of the sequence
347  *        <tt>seq</tt>.
348  *
349  * @signature TCargo forwardAlgorithm(hmm, seq);
350  *
351  * @param[in] hmm The @link HmmGraph @endlink with the HMM to use.
352  * @param[in] seq Input sequence to use in the forward algorithm.
353  *
354  * @return TProbability The probability of the sequence <tt>seq</tt>.  <tt>TProbability</tt> is the type parameter
355  *                      <tt>TCargo</tt> of the type of <tt>hmm</tt>.
356  *
357  * @section Remarks
358  *
359  * See the <a href="http://en.wikipedia.org/wiki/Forward_algorithm">Wikipedia article on the Forward algorithm</a> for
360  * an introduction to the algorithm itself.
361  *
362  * @see HmmAlgorithms#viterbiAlgorithm
363  * @see HmmAlgorithms#backwardAlgorithm
364  */
365 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence>
366 TProbability
forwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq)367 forwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const & hmm,
368                  TSequence const & seq)
369 {
370     String<TProbability> fMat;
371     return _forwardAlgorithm(hmm, seq, fMat);
372 }
373 
374 // --------------------------------------------------------------------------
375 // Function backwardAlgorithm()
376 // --------------------------------------------------------------------------
377 
378 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TBackwardMatrix>
379 inline TProbability
_backwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq,TBackwardMatrix & bMat)380 _backwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
381                     TSequence const& seq,
382                     TBackwardMatrix& bMat)
383 {
384     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
385     typedef typename Size<TGraph>::Type TSize;
386     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
387     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
388 
389     // Initialization
390     TSize numCols = length(seq) + 2;
391     TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
392     resize(bMat, numCols * numRows, 0.0);
393     TVertexDescriptor bState = getBeginState(hmm);
394     TVertexDescriptor eState = getEndState(hmm);
395     TSize len = length(seq);
396     value(bMat, (len + 1) * numRows + eState) = 1.0;
397 
398     // Distinct between silent states and real states
399     typedef String<TVertexDescriptor> TStateSet;
400     typedef typename Iterator<TStateSet>::Type TStateIter;
401     TStateSet silentStates;
402     TStateSet realStates;
403     TVertexIterator itVertex(hmm);
404     for(;!atEnd(itVertex);goNext(itVertex)) {
405         if (isSilent(hmm, value(itVertex))) {
406             appendValue(silentStates, value(itVertex));
407         } else {
408             appendValue(realStates, value(itVertex));
409         }
410     }
411     // Reverse the silent states order
412     std::reverse(begin(silentStates), end(silentStates));
413 
414     // Initialization for silent states connected to the end state
415     TStateIter itSilentStateEnd = end(silentStates);
416     for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
417         if (value(itSilentState) == eState) continue;
418         TProbability sumValue = getTransitionProbability(hmm, value(itSilentState), eState);
419         for(TStateIter itAbove = begin(silentStates); itAbove != itSilentState; goNext(itAbove)) {
420             sumValue += value(bMat, len * numRows + value(itAbove)) * getTransitionProbability(hmm, value(itSilentState), value(itAbove));
421         }
422         value(bMat, len * numRows + value(itSilentState)) = sumValue;
423     }
424 
425     // Initialization for real states
426     TStateIter itRealStateEnd = end(realStates);
427     for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
428         TProbability sumValue = getTransitionProbability(hmm, value(itRealState), eState);
429         for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
430             sumValue += value(bMat, len * numRows + value(itSilentState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
431         }
432         value(bMat, len * numRows + value(itRealState)) = sumValue;
433     }
434 
435 
436     // Recurrence
437     if (len > 0) {
438         for(TSize i=len - 1; i>0; --i) {
439             // Iterate over silent states
440             for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
441                 if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
442                 TProbability sumValue = 0.0;
443                 // Iterate over real states
444                 for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
445                     sumValue += value(bMat, (i+1) * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itSilentState), value(itRealState))* getEmissionProbability(hmm, value(itRealState), value(seq, i));
446                 }
447                 // Iterate over silent states in decreasing order
448                 for(TStateIter itAbove = begin(silentStates); itAbove != itSilentState; goNext(itAbove)) {
449                     if ((value(itAbove) == bState) || (value(itAbove) == eState)) continue;
450                     sumValue += value(bMat, i * numRows + value(itAbove)) * getTransitionProbability(hmm, value(itSilentState), value(itAbove));
451                 }
452                 value(bMat, i * numRows + value(itSilentState)) = sumValue;
453             }
454 
455             // Iteration over real states
456             for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
457                 TProbability sumValue = 0.0;
458                 // Iterate over silent states
459                 for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
460                     if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
461                     sumValue += value(bMat, i * numRows + value(itSilentState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
462                 }
463                 // Iterate over real states
464                 for(TStateIter itR = begin(realStates); itR != itRealStateEnd; goNext(itR)) {
465                     sumValue += value(bMat, (i+1) * numRows + value(itR)) * getTransitionProbability(hmm, value(itRealState), value(itR)) * getEmissionProbability(hmm, value(itR), value(seq, i));
466                 }
467                 value(bMat, i * numRows + value(itRealState)) =  sumValue;
468             }
469         }
470 
471         // Termination
472         // Iterate over silent states
473         for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
474             if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
475             TProbability sumValue = 0.0;
476             // Iterate over real states
477             for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
478                 sumValue += value(bMat, 1 * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itSilentState), value(itRealState))* getEmissionProbability(hmm, value(itRealState), value(seq, 0));
479             }
480             // Iterate over silent states in decreasing order
481             for(TStateIter itAbove = begin(silentStates); itAbove != itSilentState; goNext(itAbove)) {
482                 if ((value(itAbove) == bState) || (value(itAbove) == eState)) continue;
483                 sumValue += value(bMat, value(itAbove)) * getTransitionProbability(hmm, value(itSilentState), value(itAbove));
484             }
485             value(bMat, value(itSilentState)) = sumValue;
486         }
487         // Sum up all values
488         TProbability sumValue = 0.0;
489         for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
490             if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
491             sumValue += value(bMat, value(itSilentState)) * getTransitionProbability(hmm, bState, value(itSilentState));
492         }
493         for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
494             sumValue += value(bMat, 1 * numRows + value(itRealState)) * getTransitionProbability(hmm, bState, value(itRealState)) * getEmissionProbability(hmm, value(itRealState), value(seq, 0));
495         }
496         value(bMat, bState) = sumValue;
497     }
498 
499     //// Debug code
500     //for(TSize i = 0; i<numRows; ++i) {
501     //  for(TSize j=0; j<numCols; ++j) {
502     //      std::cout << value(bMat, j*numRows + i) << ',';
503     //  }
504     //  std::cout << std::endl;
505     //}
506 
507     return value(bMat, bState);
508 }
509 
510 /*!
511  * @fn HmmAlgorithms#backwardAlgorithm
512  * @headerfile <seqan/graph_algorithms.h>
513  * @brief Given a Hidden Markov Model <tt>hmm</tt>, the backward algorithm computes the probability of the sequence
514  *        <tt>seq</tt>.
515  *
516  * @signature TCargo backwardAlgorithm(hmm, seq);
517  *
518  * @param[in] hmm The @link HmmGraph @endlink with the HMM to use.
519  * @param[in] seq Input sequence to use in the backward algorithm.
520  *
521  * @return TCargo The probability of the sequence <tt>seq</tt>.  <tt>TProbability</tt> is the type parameter
522  *                <tt>TCargo</tt> of the type of <tt>hmm</tt>.
523  *
524  * See the <a href="http://en.wikipedia.org/wiki/Forward-backward_algorithm">Wikipedia article on the Forward-backward
525  * algorithm</a> for an introduction to the algorithm itself.
526  *
527  * @see HmmAlgorithms#viterbiAlgorithm
528  * @see HmmAlgorithms#forwardAlgorithm
529  */
530 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence>
531 TProbability
backwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq)532 backwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const & hmm,
533                   TSequence const & seq)
534 {
535     String<TProbability> bMat;
536     return _backwardAlgorithm(hmm, seq, bMat);
537 }
538 
539 // --------------------------------------------------------------------------
540 // Function generateSequence()
541 // --------------------------------------------------------------------------
542 
543 /*!
544  * @fn HmmAlgorithms#generateSequence
545  * @headerfile <seqan/graph_algorithms.h>
546  * @brief Generates random state and alphabet sequence of a given HMM.
547  *
548  * @signature void generateSequence(hmm, seq, states, numSeq, maxLen);
549  *
550  * @param[out] seq    A @link StringSet @endlink of alphabet sequences.
551  * @param[out] states A @link ContainerConcept @endlink object of state sequences.
552  * @param[in]  hmm    The @link HmmGraph @endlink to use.
553  * @param[in]  numSeq The number of sequences to generate.
554  * @param[in]  maxLen The maximum length of the sequences.  The sequences might be shorter if the ends tate is reached
555  *                    before maxLen.
556  *
557  * @section Remarks
558  *
559  * Because of silent states, generated alphabet and state sequences might have different lengths.
560  */
561 template<typename TAlphabet, typename TProbability, typename TSpec,typename TSequenceSet, typename TStateSeqSet, typename TSize>
generateSequence(TSequenceSet & sequences,TStateSeqSet & states,Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSize numSeq,TSize maxLength)562 void generateSequence(TSequenceSet & sequences,
563                       TStateSeqSet & states,
564                       Graph<Hmm<TAlphabet, TProbability, TSpec> > const & hmm,
565                       TSize numSeq,
566                       TSize maxLength)
567 {
568     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
569     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
570     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
571     typedef typename Value<TSequenceSet>::Type TSequence;
572     typedef typename Value<TStateSeqSet>::Type TStateSeq;
573 
574     // TODO(holtgrew): It should be possible to use a per-call RNG.
575     typedef typename GetDefaultRng<TGraph>::Type TRng;
576 
577     std::uniform_real_distribution<double> pdf(0.0, 1.0);
578     TRng & rng = defaultRng(TGraph());
579 
580     // Initialization
581     clear(sequences);
582     clear(states);
583     TSize alphSize = ValueSize<TAlphabet>::VALUE;
584 
585     // Simulate sequences
586     TVertexDescriptor currentState;
587     TVertexDescriptor endState = getEndState(hmm);
588     for(TSize i=0;i<numSeq;++i){
589         currentState = getBeginState(hmm);
590         TSequence seq;
591         TStateSeq stat;
592         appendValue(stat, getBeginState(hmm));
593         bool stop = false;
594         TSize pos = 0;
595         while (pos < maxLength) {
596             TProbability prob = pdf(rng);
597             TProbability compareProb = 0.0;
598             TOutEdgeIterator itState(hmm, currentState);
599             // Determine the next state
600             for(;!atEnd(itState);++itState) {
601                 // Probability of the next transition
602                 compareProb += getTransitionProbability(hmm, value(itState));
603                 // Compare with random probability
604                 if (prob <= compareProb){
605                     TVertexDescriptor nextState = targetVertex(hmm, value(itState));
606                     if (nextState == endState) {
607                         stop = true;
608                         break;
609                     }
610                     appendValue(stat, nextState);
611                     if (!isSilent(hmm, nextState)) {
612                         compareProb =0.0;
613                         prob = pdf(rng);
614                         for (TSize c=0;c<alphSize;++c){
615                             compareProb += getEmissionProbability(hmm,targetVertex(hmm, value(itState)), TAlphabet(c));
616                             if (prob <= compareProb) {
617                                 appendValue(seq, TAlphabet(c));
618                                 ++pos;
619                                 break;
620                             }
621                         }
622                     }
623                     currentState = nextState;
624                     break;
625                 }
626             }
627             if (stop==true) break;
628         }
629         appendValue(stat, getEndState(hmm));
630         appendValue(sequences, seq);
631         appendValue(states, stat);
632     }
633 }
634 
635 template<typename TAlphabet, typename TProbability, typename TSpec,typename TSequenceSet, typename TSize>
636 inline void
generateSequence(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequenceSet & sequences,TSize numSeq,TSize maxLength)637 generateSequence(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
638                  TSequenceSet& sequences,
639                  TSize numSeq,
640                  TSize maxLength)
641 {
642     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
643     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
644     StringSet<String<TVertexDescriptor> > states;
645     generateSequence(hmm, sequences, states, numSeq, maxLength);
646 }
647 
648 // --------------------------------------------------------------------------
649 // Function estimationWithStates()
650 // --------------------------------------------------------------------------
651 
652 template<typename TAlphabet, typename TProbability, typename TSpec,typename TEmissionCounter, typename TTransitionCounter>
653 inline void
_parameterEstimator(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TEmissionCounter const & emission,TTransitionCounter const & transition)654 _parameterEstimator(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
655                      TEmissionCounter const& emission,
656                      TTransitionCounter const& transition)
657 {
658     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
659     typedef typename Size<TGraph>::Type TSize;
660     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
661     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
662     typedef typename Value<TTransitionCounter>::Type TCounterValue;
663 
664     // Initialization
665     TSize alphSize = ValueSize<TAlphabet>::VALUE;
666     TCounterValue pseudoCount = (TCounterValue) 1.0;
667 
668     // Estimate the parameters from the counter values
669     TVertexIterator itAll(hmm);
670     for(;!atEnd(itAll);++itAll){
671         if (!isSilent(hmm, value(itAll))) {
672             TCounterValue summedCount = (TCounterValue) (0.0);
673             for(TSize i=0; i<alphSize;++i) summedCount += (pseudoCount + value(value(emission, value(itAll)),i));
674             for(TSize i=0; i<alphSize;++i) emissionProbability(hmm,value(itAll),TAlphabet(i)) = (pseudoCount + value(value(emission, value(itAll)),i)) / summedCount;
675         }
676         TCounterValue summedCount = (TCounterValue) (0.0);
677         TOutEdgeIterator itOutSum(hmm,value(itAll));
678         for(;!atEnd(itOutSum);++itOutSum) summedCount += (pseudoCount + getProperty(transition, value(itOutSum)));
679         TOutEdgeIterator itOut(hmm, value(itAll));
680         for(;!atEnd(itOut);++itOut) transitionProbability(hmm, value(itOut)) = (pseudoCount + getProperty(transition, value(itOut))) / summedCount;
681     }
682 }
683 
684 ///////////////////////////////////////////////////////////////////////////////////////
685 
686 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequenceSet, typename TStateSeqSet>
687 inline void
estimationWithStates(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TSequenceSet & sequences,TStateSeqSet & states)688 estimationWithStates(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
689                      TSequenceSet& sequences,
690                      TStateSeqSet& states)
691 {
692     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
693     typedef typename Size<TGraph>::Type TSize;
694     typedef typename Value<TStateSeqSet>::Type TStateSeq;
695     typedef typename Value<TStateSeq>::Type TState;
696     typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
697 
698     // Initialization
699     TSize alphSize = ValueSize<TAlphabet>::VALUE;
700     typedef String<TSize> TCountString;
701     TCountString transitionCounter;
702     resize(transitionCounter, getIdUpperBound(_getEdgeIdManager(hmm)), 0);
703     StringSet<TCountString> emissionCounter;
704     TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
705     for(TSize i =0; i<numRows;++i) {
706         TCountString emisCount;
707         resize(emisCount,alphSize,0);
708         appendValue(emissionCounter,emisCount);
709     }
710 
711     // Iterate over all sequences
712     for (TSize j=0;j<length(states);++j) {
713         TSize posSeq = 0;
714         for (TSize i = 0;i<length(value(states,j));++i) {
715             TState s = value(value(states,j),i);
716             if (!isSilent(hmm, s)) {
717                 TAlphabet c = value(value(sequences,j),posSeq);
718                 value(value(emissionCounter, s), ordValue(c)) += 1;
719                 ++posSeq;
720             }
721             if (i<(length(value(states,j))-1)) {
722                 TEdgeDescriptor currEdge = findEdge(hmm, s, value(value(states,j), (i+1)));
723                 property(transitionCounter, currEdge) += 1;
724             }
725         }
726     }
727 
728     //// Debug Code
729     //typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
730     //TEdgeIterator itE(hmm);
731     //for(;!atEnd(itE);goNext(itE)) std::cout << sourceVertex(itE) << ',' << targetVertex(itE) << ':' << property(transitionCounter, value(itE)) << std::endl;
732     //for(TSize j=0; j<length(emissionCounter); ++j) {
733     //  for(TSize i=0;i<length(value(emissionCounter, j));++i) {
734     //      std::cout << j << ":" << TAlphabet(i) << '=' << value(value(emissionCounter, j), i) << std::endl;
735     //  }
736     //}
737 
738     // Estimate Parameters
739     _parameterEstimator(hmm,emissionCounter, transitionCounter);
740 }
741 
742 // --------------------------------------------------------------------------
743 // Function baumWelchAlgorithm()
744 // --------------------------------------------------------------------------
745 
746 template<typename TAlphabet, typename TProbability, typename TSpec>
747 inline void
_fillHmmUniform(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm)748 _fillHmmUniform(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm)
749 {
750     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
751     typedef typename Size<TGraph>::Type TSize;
752     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
753     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
754 
755     // Initialization
756     TSize alphSize = ValueSize<TAlphabet>::VALUE;
757 
758     // Iterate over all states
759     TVertexIterator itState(hmm);
760     for(;!atEnd(itState);goNext(itState)) {     //pass through the states of the hmm
761         TSize oD = outDegree(hmm, value(itState));
762         TOutEdgeIterator itOut(hmm, value(itState));
763         for (;!atEnd(itOut);goNext(itOut)) transitionProbability(hmm, value(itOut)) = (TProbability) (1.0 / (double) (oD));
764         if (!isSilent(hmm, value(itState))) {
765             for(TSize i=0;i<alphSize;++i){
766                 emissionProbability(hmm,value(itState),TAlphabet(i)) = (TProbability) (1.0 / (double) (alphSize));
767             }
768         }
769     }
770 }
771 
772 template<typename TAlphabet, typename TProbability, typename TSpec, typename TRNG>
773 inline void
_fillHmmRandom(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TRNG & rng)774 _fillHmmRandom(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
775                 TRNG & rng)
776 {
777     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
778     typedef typename Size<TGraph>::Type TSize;
779     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
780     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
781 
782     // Initialization
783     std::uniform_int_distribution<TSize> pdf(20, 99);
784     TSize alphSize = ValueSize<TAlphabet>::VALUE;
785 
786     // Iterate over all states
787     TVertexIterator itState(hmm);
788     for(;!atEnd(itState);goNext(itState)) {     //pass through the states of the hmm
789         TSize oD = outDegree(hmm, value(itState));
790         if (oD > 0) {
791             String<TSize> counts;
792             TSize sum = 0;
793             for(TSize i = 0;i<oD;++i) {
794                 TSize rd = pdf(rng);
795                 sum += rd;
796                 appendValue(counts, rd);
797             }
798             TSize pos = 0;
799             TOutEdgeIterator itOut(hmm, value(itState));
800             for (;!atEnd(itOut);goNext(itOut), ++pos) transitionProbability(hmm, value(itOut)) = (TProbability) ((double) (value(counts, pos)) / (double) (sum));
801         }
802         if (!isSilent(hmm, value(itState))) {
803             String<TSize> counts;
804             TSize sum = 0;
805             for(TSize i = 0;i<alphSize;++i) {
806                 TSize rd = pdf(rng);
807                 sum += rd;
808                 appendValue(counts, rd);
809             }
810             for(TSize i=0;i<alphSize;++i) emissionProbability(hmm,value(itState),TAlphabet(i)) = (TProbability) ((double) (value(counts, i)) / (double) (sum));
811         }
812     }
813 }
814 
815 template<typename TAlphabet, typename TProbability, typename TSpec, typename TRNG>
816 inline void
randomizeHmm(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TRNG &)817 randomizeHmm(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
818              TRNG & /*rng*/)
819 {
820     // TODO(holtgrew): Why uniform and not random?
821     //_fillHmmRandom(hmm, rng);
822     _fillHmmUniform(hmm);
823 }
824 
825 template <typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TSize>
826 inline TProbability
_baumWelchAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,StringSet<TSequence> const & seqSet,TSize maxIter,TProbability epsilon)827 _baumWelchAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec > >& hmm,
828                      StringSet<TSequence> const& seqSet,
829                      TSize maxIter,
830                      TProbability epsilon)
831 {
832     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
833     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
834     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
835     typedef String<TProbability> TCountString;
836 
837     // Initialization
838     TSize alphSize = ValueSize<TAlphabet>::VALUE;
839     TProbability lastTotalModelProb = 0.0;
840 
841     // Randomize current HMM so only topology is preserved
842     randomizeHmm(hmm);
843 
844     // Iterative Optimization
845     for(TSize iter=0; iter<maxIter; ++iter){
846         std::cout << "Iteration: "<< iter << std::endl;
847         std::cout << hmm << std::endl;
848         TCountString transitionCounter;
849         resize(transitionCounter, getIdUpperBound(_getEdgeIdManager(hmm)), 0.0);
850         StringSet<TCountString> emissionCounter;
851         TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
852         for(TSize i =0; i<numRows;++i) {
853             TCountString emisCount;
854             resize(emisCount,alphSize, 0.0);
855             appendValue(emissionCounter,emisCount);
856         }
857 
858         // MaximationStep
859         // Determine new contributions
860         TProbability totalModelLogProb = 0.0;
861         for (TSize i=0; i <length(seqSet);++i){     //sequences
862             TSize len = length(value(seqSet,i));
863 
864             // Forward algorithm
865             String<TProbability> fMat;
866             TProbability modelLogProb = _forwardAlgorithm(hmm, value(seqSet,i), fMat);
867             totalModelLogProb += modelLogProb;
868 
869             // Backward algorithm
870             String<TProbability> bMat;
871             _backwardAlgorithm(hmm, value(seqSet,i), bMat);
872 
873             // Use the posterior probabilities to estimate counter values
874             for (TSize j=0;j<len;++j){
875                 TAlphabet c = value(value(seqSet,i),j);
876                 TAlphabet nextC = c;
877                 if (j < (len - 1))  nextC = value(value(seqSet,i),(j+1));
878 
879                 // Iterate over all states
880                 TVertexIterator itAll(hmm);
881                 for(;!atEnd(itAll);++itAll) {
882                     // Handle begin state
883                     if (value(itAll) == beginState(hmm)) {
884                         if (j == 0) {
885                             TOutEdgeIterator itOut(hmm, value(itAll));
886                             for(;!atEnd(itOut); goNext(itOut)) {
887                                 if (!isSilent(hmm, targetVertex(itOut))) {
888                                     property(transitionCounter, value(itOut)) += (value(fMat, beginState(hmm)) * getTransitionProbability(hmm, value(itOut)) * getEmissionProbability(hmm, targetVertex(itOut), c) * value(bMat, numRows + targetVertex(itOut)) / modelLogProb);
889                                 } else {
890                                     property(transitionCounter, value(itOut)) += (value(fMat, beginState(hmm)) * getTransitionProbability(hmm, value(itOut)) * value(bMat, targetVertex(itOut)) / modelLogProb);
891                                 }
892                             }
893 
894                         }
895                         continue;
896                     }
897                     // Ignore the end state
898                     if (value(itAll) == endState(hmm)) continue;
899 
900                     // Determine emission expectation values
901                     if (!isSilent(hmm, value(itAll))) value(value(emissionCounter, value(itAll)),ordValue(c)) += ((value(fMat, (j+1) * numRows + value(itAll)) * value(bMat, (j+1) * numRows + value(itAll))) / modelLogProb);
902 
903                     // Determine transition expectation values
904                     TOutEdgeIterator itOut(hmm, value(itAll));
905                     for(;!atEnd(itOut); goNext(itOut)) {
906                         // Handle the end state
907                         if ((j == (len - 1)) && (targetVertex(itOut)==endState(hmm))) {
908                             property(transitionCounter, value(itOut)) += (value(fMat, (j+1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itAll),endState(hmm)) / modelLogProb);
909                         } else {
910                             if (!isSilent(hmm, targetVertex(itOut))) {
911                                 if (j < (len - 1)) property(transitionCounter, value(itOut)) += (value(fMat, (j+1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itOut)) * getEmissionProbability(hmm, targetVertex(itOut), nextC) * value(bMat, (j+2) * numRows + targetVertex(itOut)) / modelLogProb);
912                             } else {
913                                 property(transitionCounter, value(itOut)) += (value(fMat, (j+1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itOut)) * value(bMat, (j+1) * numRows + targetVertex(itOut)) / modelLogProb);
914                             }
915                         }
916                     }
917                 }
918             }
919         }
920         // Expectation step
921         _parameterEstimator(hmm,emissionCounter, transitionCounter);
922 
923 
924         // Termination?
925         if ((iter > 5) && ((totalModelLogProb - lastTotalModelProb) < epsilon)) {
926             lastTotalModelProb = totalModelLogProb;
927             break;
928         } else {
929             lastTotalModelProb = totalModelLogProb;
930         }
931     }
932     return lastTotalModelProb;
933 }
934 
935 template <typename TAlphabet, typename TProbability, typename TSpec, typename TSequence>
936 inline TProbability
baumWelchAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,StringSet<TSequence> const & seqSet)937 baumWelchAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec > >& hmm,
938                    StringSet<TSequence> const& seqSet)
939 {
940     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
941     typedef typename Size<TGraph>::Type TSize;
942     TSize maxIter = 100;
943     TProbability epsilon = 0.00001;
944     return _baumWelchAlgorithm(hmm, seqSet, maxIter, epsilon);
945 }
946 
947 /*
948 //////////////////////////////////////////////////////////////////////////////
949 // Profile HMMs
950 //////////////////////////////////////////////////////////////////////////////
951 
952 //////////////////////////////////////////////////////////////////////////////
953 
954 
955 template < typename TAlphabet, typename TProbability, typename TSpec, typename TMat, typename TConsensus, typename TSize>
956 inline void
957 _profileHmmCounter(Graph<Hmm<TAlphabet, TProbability, TSpec> >& pHmm,
958                     TMat const& matr,
959                     TConsensus const& consensus,
960                     TSize const& numRows,
961                     TSize const& numCols)
962 {
963     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
964     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
965     typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
966     typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
967     typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
968     typedef typename Value<TConsensus>::Type TValue;
969 
970     // Initialization
971     TSize alphSize = ValueSize<TAlphabet>::VALUE;
972     TValue gapChar = gapValue<TValue>();
973     TVertexDescriptor begState = beginState(pHmm);
974     TVertexDescriptor eState = endState(pHmm);
975     TVertexDescriptor matchState = 1;
976     TVertexDescriptor insertState = 2;
977     TVertexDescriptor deleteState = 3;
978 
979     typedef String<TProbability> TCountString;
980     TCountString transitionCounter;
981     resize(transitionCounter, getIdUpperBound(_getEdgeIdManager(pHmm)), 0.0);
982     StringSet<TCountString> emissionCounter;
983     TSize nR = getIdUpperBound(_getVertexIdManager(pHmm));
984     for(TSize i =0; i<nR;++i) {
985         TCountString emisCount;
986         resize(emisCount,alphSize, 0.0);
987         appendValue(emissionCounter,emisCount);
988     }
989 
990 
991     String<TVertexDescriptor> oldCol;
992     String<TVertexDescriptor> currCol;
993     resize(oldCol, numRows, begState);
994     resize(currCol, numRows, 0);
995     TEdgeDescriptor currEdge;
996 
997     for(TSize i = 0; i<length(consensus); ++i) {
998         //transitionvalues
999 
1000         //being in insertState
1001         if (value(consensus, i)==gapChar){
1002             for(TSize j = 0; j<numRows; ++j){
1003                 if ((value(matr, j * numCols + i)!=gapChar)){
1004                     value(currCol,j) = insertState;
1005                     currEdge = findEdge(pHmm, value(oldCol,j), value(currCol,j));
1006                     property(transitionCounter, currEdge) += 1;
1007                     value(oldCol,j) = value(currCol,j);
1008                 }
1009                 else if ((value(matr, j * numCols + i)==gapChar) ){
1010                     value(currCol,j) = value(oldCol,j);
1011                 }
1012             }
1013         }
1014         //being in normal State
1015         else{
1016             for(TSize j = 0; j<numRows; ++j){
1017                 if(value(matr, j * numCols + i)!=gapChar) value(currCol,j) = matchState;
1018                 else value(currCol,j) = deleteState;
1019                 currEdge = findEdge(pHmm, value(oldCol,j), value(currCol,j));
1020                 property(transitionCounter, currEdge) += 1;
1021                 value(oldCol,j)=value(currCol,j);
1022             }
1023         }
1024         //emissionvalues
1025         if (value(consensus, i)==gapChar) {
1026             for(TSize j = 0; j<numRows; ++j)
1027                 if(value(matr, j * numCols + i)!=gapChar)
1028                     value(value(emissionCounter, insertState), ordValue( (TAlphabet)  value(matr, j * numCols + i) ) ) += 1;
1029 
1030             continue;
1031         }
1032         else
1033             for(TSize j = 0; j<numRows; ++j)
1034                 if(value(matr, j * numCols + i)!=gapChar)
1035                     value(value(emissionCounter, matchState), ordValue( (TAlphabet)  value(matr, j * numCols + i) ) ) += 1;
1036 
1037         matchState+=3;
1038         if ((insertState+3)==eState) insertState+=2;
1039         else insertState+=3;
1040         deleteState+=3;
1041     }
1042 
1043     //transition in endState
1044     for(TSize j = 0; j<numRows; ++j){
1045         currEdge = findEdge(pHmm,value(currCol,j),eState);
1046         property(transitionCounter, currEdge) += 1;
1047     }
1048 
1049     _parameterEstimator(pHmm, emissionCounter, transitionCounter);
1050 }
1051 
1052 
1053 //////////////////////////////////////////////////////////////////////////////
1054 
1055 template<typename TAlphabet, typename TCargo, typename TSpec, typename TConsensus>
1056 inline void
1057 _createProfileHmm(Graph<Hmm<TAlphabet, TCargo, TSpec> >& pHmm,
1058                    TConsensus const& consensus)
1059 {
1060     typedef Graph<Hmm<TAlphabet, TCargo, TSpec> > TGraph;
1061     typedef typename Size<TGraph>::Type TSize;
1062     typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1063     typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
1064     typedef typename Value<TConsensus>::Type TValue;
1065 
1066     // Initialization
1067     TSize alphSize = ValueSize<TAlphabet>::VALUE;
1068     TValue gapChar = gapValue<TValue>();
1069     clear(pHmm);
1070 
1071     // Add begin state
1072     TVertexDescriptor begState = addVertex(pHmm);
1073     assignBeginState(pHmm, begState);
1074 
1075     // Add for each consensus letter 3 states
1076     for (TSize i=0;i<length(consensus);++i){
1077         if (value(consensus,i) == gapChar) continue;
1078         addVertex(pHmm); addVertex(pHmm); addVertex(pHmm, true);
1079     }
1080 
1081     // Add last insertion state
1082     TVertexDescriptor lastIState = addVertex(pHmm);
1083 
1084     // Add end state
1085     TVertexDescriptor endState = addVertex(pHmm);
1086     assignEndState(pHmm, endState);
1087 
1088     // Is there no consensus letter?
1089     if (lastIState == 1) {
1090         clear(pHmm);
1091         return;
1092     }
1093 
1094     // Remember the kind of state
1095     TVertexDescriptor mState = 1;
1096     TVertexDescriptor iState = 2;
1097     TVertexDescriptor dState = 3;
1098 
1099     // Add tranistions from begin state
1100     addEdge(pHmm, begState, mState);
1101     addEdge(pHmm, begState, iState);
1102     addEdge(pHmm, begState, dState);
1103 
1104     // Add all remaining transitions
1105     for (TSize i=0;i<length(consensus);++i){
1106         if (value(consensus,i) == gapChar) continue;
1107         else if ((mState + 3) == lastIState) {
1108             addEdge(pHmm, iState, mState);
1109             addEdge(pHmm, iState, iState);
1110             addEdge(pHmm, iState, dState);
1111             break;
1112         }
1113         else{
1114             addEdge(pHmm, mState, (mState+3));
1115             addEdge(pHmm, iState, mState);
1116             addEdge(pHmm, dState, (mState+3));
1117             addEdge(pHmm, mState, (iState+3));
1118             addEdge(pHmm, iState, iState);
1119             addEdge(pHmm, dState, (iState+3));
1120             addEdge(pHmm, mState, (dState+3));
1121             addEdge(pHmm, iState, dState);
1122             addEdge(pHmm, dState, (dState+3));
1123             mState+=3;
1124             iState+=3;
1125             dState+=3;
1126         }
1127     }
1128 
1129     // Transitions to the endState and the last I-state
1130     addEdge(pHmm, mState, endState);
1131     addEdge(pHmm, mState, lastIState);
1132     addEdge(pHmm, lastIState, endState);
1133     addEdge(pHmm, lastIState, lastIState);
1134     addEdge(pHmm, dState, endState);
1135     addEdge(pHmm, dState, lastIState);
1136 }
1137 
1138 //////////////////////////////////////////////////////////////////////////////
1139 
1140 //e.g.
1141 //String<char> matr = "-AT---GAG-G-AG-CT-C--A--GT-G-CT---G";
1142 //msaToProfileHmm(matr, hmm, 5);
1143 
1144 template<typename TAlignmentChar, typename TAlphabet, typename TProbability, typename TSpec, typename TSize>
1145 inline void
1146 msaToProfileHmm(String<TAlignmentChar> const& matr,
1147                 Graph<Hmm<TAlphabet, TProbability, TSpec> >& pHmm,
1148                 TSize nSeq)
1149 {
1150     typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > THmm;
1151 
1152     // Consensus
1153     String<unsigned int> coverage;
1154     String<char> gappedConsensus;
1155     String<Dna> consensusSequence;
1156     consensusCalling(matr, consensusSequence, gappedConsensus, coverage, nSeq, MajorityVote() );
1157 
1158     // Build the HMM topology
1159     _createProfileHmm(pHmm,gappedConsensus);
1160 
1161     // Parameterize the pHmm
1162     TSize numCols = length(matr) / nSeq;
1163     _profileHmmCounter(pHmm, matr, gappedConsensus, nSeq, numCols);
1164 }
1165 
1166 */
1167 
1168 
1169 }  // namespace seqan
1170 
1171 #endif //#ifndef SEQAN_HEADER_...
1172