1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_GRAPH_ALGORITHM_HMM_H
34 #define SEQAN_HEADER_GRAPH_ALGORITHM_HMM_H
35 
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38 
39 //////////////////////////////////////////////////////////////////////////////
40 // Basic HMM algorithms
41 //////////////////////////////////////////////////////////////////////////////
42 
43 
44 //////////////////////////////////////////////////////////////////////////////
45 
46 /**
47 .Function.viterbiAlgorithm:
48 ..cat:Graph
49 ..summary:Implements the viterbi algorithm.
50 ..signature:viterbiAlgorithm(hmm, seq, path)
51 ..param.hmm:In-parameter:Input HMM.
52 ...type:Spec.Hmm
53 ..param.seq:In-parameter:Input sequence.
54 ..param.path:Out-parameter:State path.
55 ..returns:TCargo
56 ...remarks:Probability of the path.
57 ..see:Function.forwardAlgorithm
58 ..see:Function.backwardAlgorithm
59 ..include:seqan/graph_algorithms.h
60 */
61 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TPath>
62 inline TProbability
viterbiAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq,TPath & path)63 viterbiAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
64 				 TSequence const& seq,
65 				 TPath& path)
66 {
67 	SEQAN_CHECKPOINT
68 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
69 	typedef typename Size<TGraph>::Type TSize;
70 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
71 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
72 
73 	// Initialization
74 	String<TProbability> vMat;
75 	String<TSize> traceback;
76 	TSize numCols = length(seq) + 2;
77 	TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
78 	resize(vMat, numCols * numRows, 0.0);
79 	resize(traceback, numCols * numRows);
80 	value(vMat, getBeginState(hmm)) = 1.0;
81 	TVertexDescriptor bState = getBeginState(hmm);
82 	TVertexDescriptor eState = getEndState(hmm);
83 	TVertexDescriptor nilVertex = getNil<TVertexDescriptor>();
84 
85 	// Distinct between silent states and real states
86 	typedef String<TVertexDescriptor> TStateSet;
87 	typedef typename Iterator<TStateSet>::Type TStateIter;
88 	TStateSet silentStates;
89 	TStateSet realStates;
90 	TVertexIterator itVertex(hmm);
91 	for(;!atEnd(itVertex);goNext(itVertex)) {
92 		if (isSilent(hmm, value(itVertex))) {
93 			appendValue(silentStates, value(itVertex));
94 		} else {
95 			appendValue(realStates, value(itVertex));
96 		}
97 	}
98 
99 	// Initialization for silent states connected to the begin state
100 	TStateIter itSilentStateEnd = end(silentStates);
101 	for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
102 		if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
103 		TProbability maxValue = 0.0;
104 		TVertexDescriptor maxVertex = nilVertex;
105 
106 		for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
107 			TProbability local = value(vMat, value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
108 			if (local > maxValue) {
109 				maxValue = local;
110 				maxVertex = value(itBelow);
111 			}
112 		}
113 
114 		// Set traceback vertex
115 		if (maxVertex != nilVertex) {
116 			value(vMat, value(itSilentState)) = maxValue;
117 			value(traceback, value(itSilentState)) = maxVertex;
118 		}
119 	}
120 
121 	// Recurrence
122 	TSize len = length(seq);
123 	for(TSize i=1; i<=len; ++i) {
124 		// Iterate over real states
125 		TStateIter itRealStateEnd = end(realStates);
126 		for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
127 			// Find maximum
128 			TProbability maxValue = 0.0;
129 			TVertexDescriptor maxVertex = nilVertex;
130 			TVertexIterator itMax(hmm);
131 			for(;!atEnd(itMax);++itMax) {
132 				TProbability local = value(vMat, (i-1) * numRows + value(itMax)) * getTransitionProbability(hmm, value(itMax), value(itRealState));
133 				if (local > maxValue) {
134 					maxValue = local;
135 					maxVertex = *itMax;
136 				}
137 			}
138 			// Set traceback vertex
139 			if (maxVertex != nilVertex) {
140 				value(vMat, i * numRows + value(itRealState)) = maxValue * getEmissionProbability(hmm, value(itRealState), value(seq, i-1));
141 				value(traceback, i * numRows + value(itRealState)) = maxVertex;
142 			}
143 		}
144 
145 		// Iterate over silent states
146 		for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
147 			if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
148 			// Find maximum
149 			TProbability maxValue = 0.0;
150 			TVertexDescriptor maxVertex = nilVertex;
151 
152 			// Iterate over real states
153 			for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
154 				TProbability local = value(vMat, i * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
155 				if (local > maxValue) {
156 					maxValue = local;
157 					maxVertex = value(itRealState);
158 				}
159 			}
160 			// Iterate over silent states in increasing order
161 			for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
162 				TProbability local = value(vMat, i * numRows + value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
163 				if (local > maxValue) {
164 					maxValue = local;
165 					maxVertex = value(itBelow);
166 				}
167 			}
168 			// Set traceback vertex
169 			if (maxVertex != nilVertex) {
170 				value(traceback, i * numRows + value(itSilentState)) = maxVertex;
171 				value(vMat, i * numRows + value(itSilentState)) = maxValue;
172 			}
173 		}
174 	}
175 
176 	// Termination
177 	TProbability maxValue = 0.0;
178 	TVertexDescriptor maxVertex = 0;
179 	TVertexIterator itMax(hmm);
180 	for(;!atEnd(itMax);++itMax) {
181 		TProbability local = value(vMat, len * numRows + *itMax) * getTransitionProbability(hmm, value(itMax), eState);
182 		if (local > maxValue) {
183 			maxValue = local;
184 			maxVertex = value(itMax);
185 		}
186 	}
187 	value(traceback, (len + 1) * numRows + eState) = maxVertex;
188 	if (maxVertex != nilVertex) value(vMat, (len+1) * numRows + eState) = maxValue;
189 
190 	// Traceback
191 	if (maxValue > 0.0) {
192 		clear(path);
193 		TVertexDescriptor oldState = eState;
194 		appendValue(path, oldState);
195 		for(TSize i = len + 1; i>=1; --i) {
196 			do {
197 				if ((!isSilent(hmm, oldState)) || (oldState == eState)) oldState = value(traceback, i * numRows + oldState);
198 				else oldState = value(traceback, (i - 1) * numRows + oldState);
199 				appendValue(path, oldState);
200 			} while ((isSilent(hmm, oldState)) && (oldState != bState));
201 		}
202 		std::reverse(begin(path), end(path));
203 	}
204 
205 	//// Debug code
206 	//for(TSize i = 0; i<numRows; ++i) {
207 	//	for(TSize j=0; j<numCols; ++j) {
208 	//		std::cout << value(vMat, j*numRows + i) << ',';
209 	//		//std::cout << value(traceback, j*numRows + i) << ',';
210 	//	}
211 	//	std::cout << std::endl;
212 	//}
213 	//for(TSize i = 0; i<length(path); ++i) {
214 	//	std::cout << path[i] << ',';
215 	//}
216 	//std::cout << std::endl;
217 
218 	return value(vMat, (len+1) * numRows + eState);
219 }
220 
221 
222 //////////////////////////////////////////////////////////////////////////////
223 
224 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TForwardMatrix>
225 inline TProbability
_forwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq,TForwardMatrix & fMat)226 _forwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
227 				   TSequence const& seq,
228 				   TForwardMatrix& fMat)
229 {
230 	SEQAN_CHECKPOINT
231 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
232 	typedef typename Size<TGraph>::Type TSize;
233 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
234 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
235 
236 	// Initialization
237 	TSize numCols = length(seq) + 2;
238 	TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
239 	resize(fMat, numCols * numRows, 0.0);
240 	value(fMat, getBeginState(hmm)) = 1.0;
241 	TVertexDescriptor bState = getBeginState(hmm);
242 	TVertexDescriptor eState = getEndState(hmm);
243 
244 	// Distinct between silent states and real states
245 	typedef String<TVertexDescriptor> TStateSet;
246 	typedef typename Iterator<TStateSet>::Type TStateIter;
247 	TStateSet silentStates;
248 	TStateSet realStates;
249 	TVertexIterator itVertex(hmm);
250 	for(;!atEnd(itVertex);goNext(itVertex)) {
251 		if (isSilent(hmm, value(itVertex))) {
252 			appendValue(silentStates, value(itVertex));
253 		} else {
254 			appendValue(realStates, value(itVertex));
255 		}
256 	}
257 
258 	// Initialization for silent states connected to the begin state
259 	TStateIter itSilentStateEnd = end(silentStates);
260 	for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
261 		if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
262 		TProbability sumValue = 0.0;
263 		for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
264 			sumValue += value(fMat, value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
265 		}
266 		value(fMat, value(itSilentState)) = sumValue;
267 	}
268 
269 	// Recurrence
270 	TSize len = length(seq);
271 	for(TSize i=1; i<=len; ++i) {
272 		// Iterate over real states
273 		TStateIter itRealStateEnd = end(realStates);
274 		for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
275 			TProbability sum = 0.0;
276 			TVertexIterator itAll(hmm);
277 			for(;!atEnd(itAll);++itAll) sum += value(fMat, (i-1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itAll), value(itRealState));
278 			value(fMat, i * numRows + value(itRealState)) = getEmissionProbability(hmm, value(itRealState), seq[i-1]) * sum;
279 		}
280 
281 		// Iterate over silent states
282 		for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
283 			if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
284 			TProbability sumValue = 0.0;
285 			// Iterate over real states
286 			for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
287 				sumValue += value(fMat, i * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
288 			}
289 			// Iterate over silent states in increasing order
290 			for(TStateIter itBelow = begin(silentStates); itBelow != itSilentState; goNext(itBelow)) {
291 				sumValue += value(fMat, i * numRows + value(itBelow)) * getTransitionProbability(hmm, value(itBelow), value(itSilentState));
292 			}
293 			value(fMat, i * numRows + value(itSilentState)) = sumValue;
294 		}
295 	}
296 
297 	// Termination
298 	TProbability sum = 0.0;
299 	TVertexIterator itAll(hmm);
300 	for(;!atEnd(itAll);++itAll) {
301 		sum += value(fMat, len * numRows + value(itAll)) * getTransitionProbability(hmm, value(itAll), eState);
302 	}
303 	value(fMat, (len+1) * numRows + eState) = sum;
304 
305 	//// Debug code
306 	//for(TSize i = 0; i<numRows; ++i) {
307 	//	for(TSize j=0; j<numCols; ++j) {
308 	//		std::cout << value(fMat, j*numRows + i) << ',';
309 	//	}
310 	//	std::cout << std::endl;
311 	//}
312 
313 	return value(fMat, (len+1) * numRows + eState);
314 }
315 
316 //////////////////////////////////////////////////////////////////////////////
317 
318 /**
319 .Function.forwardAlgorithm:
320 ..cat:Graph
321 ..summary:Implements the forward algorithm.
322 ..signature:forwardAlgorithm(hmm, seq)
323 ..param.hmm:In-parameter:Input HMM.
324 ...type:Spec.Hmm
325 ..param.seq:In-parameter:Input sequence.
326 ..returns:TProbability
327 ...remarks:Probability of the sequence.
328 ..see:Function.viterbiAlgorithm
329 ..see:Function.backwardAlgorithm
330 ..include:seqan/graph_algorithms.h
331 */
332 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence>
333 inline TProbability
forwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq)334 forwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
335 				 TSequence const& seq)
336 {
337 	SEQAN_CHECKPOINT
338 	String<TProbability> fMat;
339 	return _forwardAlgorithm(hmm, seq, fMat);
340 }
341 
342 //////////////////////////////////////////////////////////////////////////////
343 
344 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TBackwardMatrix>
345 inline TProbability
_backwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq,TBackwardMatrix & bMat)346 _backwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
347 					TSequence const& seq,
348 					TBackwardMatrix& bMat)
349 {
350 	SEQAN_CHECKPOINT
351 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
352 	typedef typename Size<TGraph>::Type TSize;
353 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
354 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
355 
356 	// Initialization
357 	TSize numCols = length(seq) + 2;
358 	TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
359 	resize(bMat, numCols * numRows, 0.0);
360 	TVertexDescriptor bState = getBeginState(hmm);
361 	TVertexDescriptor eState = getEndState(hmm);
362 	TSize len = length(seq);
363 	value(bMat, (len + 1) * numRows + eState) = 1.0;
364 
365 	// Distinct between silent states and real states
366 	typedef String<TVertexDescriptor> TStateSet;
367 	typedef typename Iterator<TStateSet>::Type TStateIter;
368 	TStateSet silentStates;
369 	TStateSet realStates;
370 	TVertexIterator itVertex(hmm);
371 	for(;!atEnd(itVertex);goNext(itVertex)) {
372 		if (isSilent(hmm, value(itVertex))) {
373 			appendValue(silentStates, value(itVertex));
374 		} else {
375 			appendValue(realStates, value(itVertex));
376 		}
377 	}
378 	// Reverse the silent states order
379 	std::reverse(begin(silentStates), end(silentStates));
380 
381 	// Initialization for silent states connected to the end state
382 	TStateIter itSilentStateEnd = end(silentStates);
383 	for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
384 		if (value(itSilentState) == eState) continue;
385 		TProbability sumValue = getTransitionProbability(hmm, value(itSilentState), eState);
386 		for(TStateIter itAbove = begin(silentStates); itAbove != itSilentState; goNext(itAbove)) {
387 			sumValue += value(bMat, len * numRows + value(itAbove)) * getTransitionProbability(hmm, value(itSilentState), value(itAbove));
388 		}
389 		value(bMat, len * numRows + value(itSilentState)) = sumValue;
390 	}
391 
392 	// Initialization for real states
393 	TStateIter itRealStateEnd = end(realStates);
394 	for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
395 		TProbability sumValue = getTransitionProbability(hmm, value(itRealState), eState);
396 		for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
397 			sumValue += value(bMat, len * numRows + value(itSilentState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
398 		}
399 		value(bMat, len * numRows + value(itRealState)) = sumValue;
400 	}
401 
402 
403 	// Recurrence
404 	if (len > 0) {
405 		for(TSize i=len - 1; i>0; --i) {
406 			// Iterate over silent states
407 			for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
408 				if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
409 				TProbability sumValue = 0.0;
410 				// Iterate over real states
411 				for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
412 					sumValue += value(bMat, (i+1) * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itSilentState), value(itRealState))* getEmissionProbability(hmm, value(itRealState), value(seq, i));
413 				}
414 				// Iterate over silent states in decreasing order
415 				for(TStateIter itAbove = begin(silentStates); itAbove != itSilentState; goNext(itAbove)) {
416 					if ((value(itAbove) == bState) || (value(itAbove) == eState)) continue;
417 					sumValue += value(bMat, i * numRows + value(itAbove)) * getTransitionProbability(hmm, value(itSilentState), value(itAbove));
418 				}
419 				value(bMat, i * numRows + value(itSilentState)) = sumValue;
420 			}
421 
422 			// Iteration over real states
423 			for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
424 				TProbability sumValue = 0.0;
425 				// Iterate over silent states
426 				for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
427 					if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
428 					sumValue += value(bMat, i * numRows + value(itSilentState)) * getTransitionProbability(hmm, value(itRealState), value(itSilentState));
429 				}
430 				// Iterate over real states
431 				for(TStateIter itR = begin(realStates); itR != itRealStateEnd; goNext(itR)) {
432 					sumValue += value(bMat, (i+1) * numRows + value(itR)) * getTransitionProbability(hmm, value(itRealState), value(itR)) * getEmissionProbability(hmm, value(itR), value(seq, i));
433 				}
434 				value(bMat, i * numRows + value(itRealState)) =  sumValue;
435 			}
436 		}
437 
438 		// Termination
439 		// Iterate over silent states
440 		for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
441 			if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
442 			TProbability sumValue = 0.0;
443 			// Iterate over real states
444 			for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
445 				sumValue += value(bMat, 1 * numRows + value(itRealState)) * getTransitionProbability(hmm, value(itSilentState), value(itRealState))* getEmissionProbability(hmm, value(itRealState), value(seq, 0));
446 			}
447 			// Iterate over silent states in decreasing order
448 			for(TStateIter itAbove = begin(silentStates); itAbove != itSilentState; goNext(itAbove)) {
449 				if ((value(itAbove) == bState) || (value(itAbove) == eState)) continue;
450 				sumValue += value(bMat, value(itAbove)) * getTransitionProbability(hmm, value(itSilentState), value(itAbove));
451 			}
452 			value(bMat, value(itSilentState)) = sumValue;
453 		}
454 		// Sum up all values
455 		TProbability sumValue = 0.0;
456 		for(TStateIter itSilentState = begin(silentStates); itSilentState != itSilentStateEnd; goNext(itSilentState)) {
457 			if ((value(itSilentState) == bState) || (value(itSilentState) == eState)) continue;
458 			sumValue += value(bMat, value(itSilentState)) * getTransitionProbability(hmm, bState, value(itSilentState));
459 		}
460 		for(TStateIter itRealState = begin(realStates); itRealState != itRealStateEnd; goNext(itRealState)) {
461 			sumValue += value(bMat, 1 * numRows + value(itRealState)) * getTransitionProbability(hmm, bState, value(itRealState)) * getEmissionProbability(hmm, value(itRealState), value(seq, 0));
462 		}
463 		value(bMat, bState) = sumValue;
464 	}
465 
466 	//// Debug code
467 	//for(TSize i = 0; i<numRows; ++i) {
468 	//	for(TSize j=0; j<numCols; ++j) {
469 	//		std::cout << value(bMat, j*numRows + i) << ',';
470 	//	}
471 	//	std::cout << std::endl;
472 	//}
473 
474 	return value(bMat, bState);
475 }
476 
477 
478 
479 //////////////////////////////////////////////////////////////////////////////
480 
481 /**
482 .Function.backwardAlgorithm:
483 ..cat:Graph
484 ..summary:Implements the backward algorithm.
485 ..signature:backwardAlgorithm(hmm, seq)
486 ..param.hmm:In-parameter:Input HMM.
487 ...type:Spec.Hmm
488 ..param.seq:In-parameter:Input sequence.
489 ..returns:TProbability
490 ...remarks:Probability of the sequence.
491 ..see:Function.viterbiAlgorithm
492 ..see:Function.forwardAlgorithm
493 ..include:seqan/graph_algorithms.h
494 */
495 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequence>
496 inline TProbability
backwardAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequence const & seq)497 backwardAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
498 				  TSequence const& seq)
499 {
500 	SEQAN_CHECKPOINT
501 	String<TProbability> bMat;
502 	return _backwardAlgorithm(hmm, seq, bMat);
503 }
504 
505 ///////////////////////////////////////////////////////////////////////////////////////
506 
507 /**
508 .Function.generateSequence:
509 ..cat:Graph
510 ..summary:Generates random state and alphabet sequences of a given HMM.
511 ..signature:generateSequence(hmm, sequences, states, numSeq, maxLength)
512 ..param.hmm:In-parameter:Input HMM.
513 ...type:Spec.Hmm
514 ..param.sequences:The StringSet of alphabet sequences.
515 ...type:Class.StringSet
516 ..param.sequences:The StringSet of state sequences.
517 ...type:Class.StringSet
518 ..param.numSeq:The number of sequences to generate.
519 ..param.maxLength:The maximum length of the sequences.
520 ...remarks:Sequences might be shorter if the end state is reached prior to maxLength.
521 ..remarks: Because of silent states, generated alphabet and state sequences might have different length.
522 ..returns:void
523 ..include:seqan/graph_algorithms.h
524 */
525 template<typename TAlphabet, typename TProbability, typename TSpec,typename TSequenceSet, typename TStateSeqSet, typename TSize>
526 inline void
generateSequence(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequenceSet & sequences,TStateSeqSet & states,TSize numSeq,TSize maxLength)527 generateSequence(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
528 				 TSequenceSet& sequences,
529 				 TStateSeqSet& states,
530 				 TSize numSeq,
531 				 TSize maxLength)
532 {
533 	SEQAN_CHECKPOINT
534 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
535 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
536 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
537 	typedef typename Value<TSequenceSet>::Type TSequence;
538 	typedef typename Value<TStateSeqSet>::Type TStateSeq;
539 
540 	// Initialization
541 	mtRandInit();
542 	clear(sequences);
543 	clear(states);
544 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
545 
546 	// Simulate sequences
547 	TVertexDescriptor currentState;
548 	TVertexDescriptor endState = getEndState(hmm);
549 	for(TSize i=0;i<numSeq;++i){
550 		currentState = getBeginState(hmm);
551 		TSequence seq;
552 		TStateSeq stat;
553 		appendValue(stat, getBeginState(hmm));
554 		bool stop = false;
555 		TSize pos = 0;
556 		while (pos < maxLength) {
557 			TProbability prob = (double) (mtRand() % 100000) / (double) (100000);
558 			TProbability compareProb = 0.0;
559 			TOutEdgeIterator itState(hmm, currentState);
560 			// Determine the next state
561 			for(;!atEnd(itState);++itState) {
562 				// Probability of the next transition
563 				compareProb += getTransitionProbability(hmm, value(itState));
564 				// Compare with random probability
565 				if (prob <= compareProb){
566 					TVertexDescriptor nextState = targetVertex(hmm, value(itState));
567 					if (nextState == endState) {
568 						stop = true;
569 						break;
570 					}
571 					appendValue(stat, nextState);
572 					if (!isSilent(hmm, nextState)) {
573 						compareProb =0.0;
574 						prob = (double) (mtRand() % 100000) / (double) (100000);
575 						for (TSize c=0;c<alphSize;++c){
576 							compareProb += getEmissionProbability(hmm,targetVertex(hmm, value(itState)), TAlphabet(c));
577 							if (prob <= compareProb) {
578 								appendValue(seq, TAlphabet(c));
579 								++pos;
580 								break;
581 							}
582 						}
583 					}
584 					currentState = nextState;
585 					break;
586 				}
587 			}
588 			if (stop==true) break;
589 		}
590 		appendValue(stat, getEndState(hmm));
591 		appendValue(sequences, seq);
592 		appendValue(states, stat);
593 	}
594 }
595 
596 ///////////////////////////////////////////////////////////////////////////////////////
597 
598 template<typename TAlphabet, typename TProbability, typename TSpec,typename TSequenceSet, typename TSize>
599 inline void
generateSequence(Graph<Hmm<TAlphabet,TProbability,TSpec>> const & hmm,TSequenceSet & sequences,TSize numSeq,TSize maxLength)600 generateSequence(Graph<Hmm<TAlphabet, TProbability, TSpec> > const& hmm,
601 				 TSequenceSet& sequences,
602 				 TSize numSeq,
603 				 TSize maxLength)
604 {
605 	SEQAN_CHECKPOINT
606 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
607 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
608 	StringSet<String<TVertexDescriptor> > states;
609 	generateSequence(hmm, sequences, states, numSeq, maxLength);
610 }
611 
612 
613 
614 //////////////////////////////////////////////////////////////////////////////
615 // Training algorithms
616 //////////////////////////////////////////////////////////////////////////////
617 
618 
619 //////////////////////////////////////////////////////////////////////////////
620 
621 template<typename TAlphabet, typename TProbability, typename TSpec,typename TEmissionCounter, typename TTransitionCounter>
622 inline void
_parameterEstimator(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TEmissionCounter const & emission,TTransitionCounter const & transition)623 _parameterEstimator(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
624 					 TEmissionCounter const& emission,
625 					 TTransitionCounter const& transition)
626 {
627 	SEQAN_CHECKPOINT
628 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
629 	typedef typename Size<TGraph>::Type TSize;
630 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
631 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
632 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
633 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
634 	typedef typename Value<TTransitionCounter>::Type TCounterValue;
635 
636 	// Initialization
637 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
638 	TCounterValue pseudoCount = (TCounterValue) 1.0;
639 
640 	// Estimate the parameters from the counter values
641 	TVertexIterator itAll(hmm);
642 	for(;!atEnd(itAll);++itAll){
643 		if (!isSilent(hmm, value(itAll))) {
644 			TCounterValue summedCount = (TCounterValue) (0.0);
645 			for(TSize i=0; i<alphSize;++i) summedCount += (pseudoCount + value(value(emission, value(itAll)),i));
646 			for(TSize i=0; i<alphSize;++i) emissionProbability(hmm,value(itAll),TAlphabet(i)) = (pseudoCount + value(value(emission, value(itAll)),i)) / summedCount;
647 		}
648 		TCounterValue summedCount = (TCounterValue) (0.0);
649 		TOutEdgeIterator itOutSum(hmm,value(itAll));
650 		for(;!atEnd(itOutSum);++itOutSum) summedCount += (pseudoCount + getProperty(transition, value(itOutSum)));
651 		TOutEdgeIterator itOut(hmm, value(itAll));
652 		for(;!atEnd(itOut);++itOut) transitionProbability(hmm, value(itOut)) = (pseudoCount + getProperty(transition, value(itOut))) / summedCount;
653 	}
654 }
655 
656 ///////////////////////////////////////////////////////////////////////////////////////
657 
658 template<typename TAlphabet, typename TProbability, typename TSpec, typename TSequenceSet, typename TStateSeqSet>
659 inline void
estimationWithStates(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TSequenceSet & sequences,TStateSeqSet & states)660 estimationWithStates(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
661 					 TSequenceSet& sequences,
662 					 TStateSeqSet& states)
663 {
664 	SEQAN_CHECKPOINT
665 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
666 	typedef typename Size<TGraph>::Type TSize;
667 	typedef typename Value<TStateSeqSet>::Type TStateSeq;
668 	typedef typename Value<TStateSeq>::Type TState;
669 	typedef typename Value<TSequenceSet>::Type TSequence;
670 	typedef typename Value<TSequence>::Type TChar;
671 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
672 
673 	// Initialization
674 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
675 	typedef String<TSize> TCountString;
676 	TCountString transitionCounter;
677 	resize(transitionCounter, getIdUpperBound(_getEdgeIdManager(hmm)), 0);
678 	StringSet<TCountString> emissionCounter;
679 	TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
680 	for(TSize i =0; i<numRows;++i) {
681 		TCountString emisCount;
682 		resize(emisCount,alphSize,0);
683 		appendValue(emissionCounter,emisCount);
684 	}
685 
686 	// Iterate over all sequences
687 	for (TSize j=0;j<length(states);++j) {
688 		TSize posSeq = 0;
689 		for (TSize i = 0;i<length(value(states,j));++i) {
690 			TState s = value(value(states,j),i);
691 			if (!isSilent(hmm, s)) {
692 				TAlphabet c = value(value(sequences,j),posSeq);
693 				value(value(emissionCounter, s), ordValue(c)) += 1;
694 				++posSeq;
695 			}
696 			if (i<(length(value(states,j))-1)) {
697 				TEdgeDescriptor currEdge = findEdge(hmm, s, value(value(states,j), (i+1)));
698 				property(transitionCounter, currEdge) += 1;
699 			}
700 		}
701 	}
702 
703 	//// Debug Code
704 	//typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
705 	//TEdgeIterator itE(hmm);
706 	//for(;!atEnd(itE);goNext(itE)) std::cout << sourceVertex(itE) << ',' << targetVertex(itE) << ':' << property(transitionCounter, value(itE)) << std::endl;
707 	//for(TSize j=0; j<length(emissionCounter); ++j) {
708 	//	for(TSize i=0;i<length(value(emissionCounter, j));++i) {
709 	//		std::cout << j << ":" << TAlphabet(i) << '=' << value(value(emissionCounter, j), i) << std::endl;
710 	//	}
711 	//}
712 
713 	// Estimate Parameters
714 	_parameterEstimator(hmm,emissionCounter, transitionCounter);
715 }
716 
717 ///////////////////////////////////////////////////////////////////////////////////////
718 
719 template<typename TAlphabet, typename TProbability, typename TSpec>
720 inline void
_fillHmmUniform(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm)721 _fillHmmUniform(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm)
722 {
723 	SEQAN_CHECKPOINT
724 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
725 	typedef typename Size<TGraph>::Type TSize;
726 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
727 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
728 
729 	// Initialization
730 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
731 
732 	// Iterate over all states
733 	TVertexIterator itState(hmm);
734 	for(;!atEnd(itState);goNext(itState)) {		//pass through the states of the hmm
735 		TSize oD = outDegree(hmm, value(itState));
736 		TOutEdgeIterator itOut(hmm, value(itState));
737 		for (;!atEnd(itOut);goNext(itOut)) transitionProbability(hmm, value(itOut)) = (TProbability) (1.0 / (double) (oD));
738 		if (!isSilent(hmm, value(itState))) {
739 			for(TSize i=0;i<alphSize;++i){
740 				emissionProbability(hmm,value(itState),TAlphabet(i)) = (TProbability) (1.0 / (double) (alphSize));
741 			}
742 		}
743 	}
744 }
745 
746 ///////////////////////////////////////////////////////////////////////////////////////
747 
748 template<typename TAlphabet, typename TProbability, typename TSpec, typename TRNG>
749 inline void
_fillHmmRandom(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TRNG & rng)750 _fillHmmRandom(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
751                 TRNG & rng)
752 {
753 	SEQAN_CHECKPOINT
754 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
755 	typedef typename Size<TGraph>::Type TSize;
756 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
757 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
758 
759 	// Initialization
760     Pdf<Uniform<TSize> > pdf(20, 99);
761 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
762 
763 	// Iterate over all states
764 	TVertexIterator itState(hmm);
765 	for(;!atEnd(itState);goNext(itState)) {		//pass through the states of the hmm
766 		TSize oD = outDegree(hmm, value(itState));
767 		if (oD > 0) {
768 			String<TSize> counts;
769 			TSize sum = 0;
770 			for(TSize i = 0;i<oD;++i) {
771 				TSize rd = pickRandomNumber(rng, pdf);
772 				sum += rd;
773 				appendValue(counts, rd);
774 			}
775 			TSize pos = 0;
776 			TOutEdgeIterator itOut(hmm, value(itState));
777 			for (;!atEnd(itOut);goNext(itOut), ++pos) transitionProbability(hmm, value(itOut)) = (TProbability) ((double) (value(counts, pos)) / (double) (sum));
778 		}
779 		if (!isSilent(hmm, value(itState))) {
780 			String<TSize> counts;
781 			TSize sum = 0;
782 			for(TSize i = 0;i<alphSize;++i) {
783 				TSize rd = pickRandomNumber(rng, pdf);
784 				sum += rd;
785 				appendValue(counts, rd);
786 			}
787 			for(TSize i=0;i<alphSize;++i) emissionProbability(hmm,value(itState),TAlphabet(i)) = (TProbability) ((double) (value(counts, i)) / (double) (sum));
788 		}
789 	}
790 }
791 
792 ///////////////////////////////////////////////////////////////////////////////////////
793 
794 template<typename TAlphabet, typename TProbability, typename TSpec, typename TRNG>
795 inline void
randomizeHmm(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,TRNG &)796 randomizeHmm(Graph<Hmm<TAlphabet, TProbability, TSpec> >& hmm,
797              TRNG & /*rng*/)
798 {
799 	//_fillHmmRandom(hmm, rng);
800 	_fillHmmUniform(hmm);
801 }
802 
803 ///////////////////////////////////////////////////////////////////////////////////////
804 
805 template <typename TAlphabet, typename TProbability, typename TSpec, typename TSequence, typename TSize>
806 inline TProbability
_baumWelchAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,StringSet<TSequence> const & seqSet,TSize maxIter,TProbability epsilon)807 _baumWelchAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec > >& hmm,
808 					 StringSet<TSequence> const& seqSet,
809 					 TSize maxIter,
810 					 TProbability epsilon)
811 {
812 
813 	SEQAN_CHECKPOINT
814 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
815 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
816 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
817 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
818 	typedef typename Iterator<TGraph, EdgeIterator>::Type TEdgeIterator;
819 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
820 	typedef String<TProbability> TCountString;
821 
822 	// Initialization
823 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
824 	TProbability lastTotalModelProb = 0.0;
825 
826 	// Randomize current HMM so only topology is preserved
827 	randomizeHmm(hmm);
828 
829 	// Iterative Optimization
830 	for(TSize iter=0; iter<maxIter; ++iter){
831 		std::cout << "Iteration: "<< iter << std::endl;
832 		std::cout << hmm << std::endl;
833 		TCountString transitionCounter;
834 		resize(transitionCounter, getIdUpperBound(_getEdgeIdManager(hmm)), 0.0);
835 		StringSet<TCountString> emissionCounter;
836 		TSize numRows = getIdUpperBound(_getVertexIdManager(hmm));
837 		for(TSize i =0; i<numRows;++i) {
838 			TCountString emisCount;
839 			resize(emisCount,alphSize, 0.0);
840 			appendValue(emissionCounter,emisCount);
841 		}
842 
843 		// MaximationStep
844 		// Determine new contributions
845 		TProbability totalModelLogProb = 0.0;
846 		for (TSize i=0; i <length(seqSet);++i){		//sequences
847 			TSize len = length(value(seqSet,i));
848 
849 			// Forward algorithm
850 			String<TProbability> fMat;
851 			TProbability modelLogProb = _forwardAlgorithm(hmm, value(seqSet,i), fMat);
852 			totalModelLogProb += modelLogProb;
853 
854 			// Backward algorithm
855 			String<TProbability> bMat;
856 			_backwardAlgorithm(hmm, value(seqSet,i), bMat);
857 
858 			// Use the posterior probabilities to estimate counter values
859 			for (TSize j=0;j<len;++j){
860 				TAlphabet c = value(value(seqSet,i),j);
861 				TAlphabet nextC = c;
862 				if (j < (len - 1))  nextC = value(value(seqSet,i),(j+1));
863 
864 				// Iterate over all states
865 				TVertexIterator itAll(hmm);
866 				for(;!atEnd(itAll);++itAll) {
867 					// Handle begin state
868 					if (value(itAll) == beginState(hmm)) {
869 						if (j == 0) {
870 							TOutEdgeIterator itOut(hmm, value(itAll));
871 							for(;!atEnd(itOut); goNext(itOut)) {
872 								if (!isSilent(hmm, targetVertex(itOut))) {
873 									property(transitionCounter, value(itOut)) += (value(fMat, beginState(hmm)) * getTransitionProbability(hmm, value(itOut)) * getEmissionProbability(hmm, targetVertex(itOut), c) * value(bMat, numRows + targetVertex(itOut)) / modelLogProb);
874 								} else {
875 									property(transitionCounter, value(itOut)) += (value(fMat, beginState(hmm)) * getTransitionProbability(hmm, value(itOut)) * value(bMat, targetVertex(itOut)) / modelLogProb);
876 								}
877 							}
878 
879 						}
880 						continue;
881 					}
882 					// Ignore the end state
883 					if (value(itAll) == endState(hmm)) continue;
884 
885 					// Determine emission expectation values
886 					if (!isSilent(hmm, value(itAll))) value(value(emissionCounter, value(itAll)),ordValue(c)) += ((value(fMat, (j+1) * numRows + value(itAll)) * value(bMat, (j+1) * numRows + value(itAll))) / modelLogProb);
887 
888 					// Determine transition expectation values
889 					TOutEdgeIterator itOut(hmm, value(itAll));
890 					for(;!atEnd(itOut); goNext(itOut)) {
891 						// Handle the end state
892 						if ((j == (len - 1)) && (targetVertex(itOut)==endState(hmm))) {
893 							property(transitionCounter, value(itOut)) += (value(fMat, (j+1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itAll),endState(hmm)) / modelLogProb);
894 						} else {
895 							if (!isSilent(hmm, targetVertex(itOut))) {
896 								if (j < (len - 1)) property(transitionCounter, value(itOut)) += (value(fMat, (j+1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itOut)) * getEmissionProbability(hmm, targetVertex(itOut), nextC) * value(bMat, (j+2) * numRows + targetVertex(itOut)) / modelLogProb);
897 							} else {
898 								property(transitionCounter, value(itOut)) += (value(fMat, (j+1) * numRows + value(itAll)) * getTransitionProbability(hmm, value(itOut)) * value(bMat, (j+1) * numRows + targetVertex(itOut)) / modelLogProb);
899 							}
900 						}
901 					}
902 				}
903 			}
904 		}
905 		// Expectation step
906 		_parameterEstimator(hmm,emissionCounter, transitionCounter);
907 
908 
909 		// Termination?
910 		if ((iter > 5) && ((totalModelLogProb - lastTotalModelProb) < epsilon)) {
911 			lastTotalModelProb = totalModelLogProb;
912 			break;
913 		} else {
914 			lastTotalModelProb = totalModelLogProb;
915 		}
916 	}
917 	return lastTotalModelProb;
918 }
919 
920 ///////////////////////////////////////////////////////////////////////////////////////
921 
922 template <typename TAlphabet, typename TProbability, typename TSpec, typename TSequence>
923 inline TProbability
baumWelchAlgorithm(Graph<Hmm<TAlphabet,TProbability,TSpec>> & hmm,StringSet<TSequence> const & seqSet)924 baumWelchAlgorithm(Graph<Hmm<TAlphabet, TProbability, TSpec > >& hmm,
925 				   StringSet<TSequence> const& seqSet)
926 {
927 	SEQAN_CHECKPOINT
928 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
929 	typedef typename Size<TGraph>::Type TSize;
930 	TSize maxIter = 100;
931 	TProbability epsilon = 0.00001;
932 	return _baumWelchAlgorithm(hmm, seqSet, maxIter, epsilon);
933 }
934 
935 /*
936 //////////////////////////////////////////////////////////////////////////////
937 // Profile HMMs
938 //////////////////////////////////////////////////////////////////////////////
939 
940 //////////////////////////////////////////////////////////////////////////////
941 
942 
943 template < typename TAlphabet, typename TProbability, typename TSpec, typename TMat, typename TConsensus, typename TSize>
944 inline void
945 _profileHmmCounter(Graph<Hmm<TAlphabet, TProbability, TSpec> >& pHmm,
946 					TMat const& matr,
947 					TConsensus const& consensus,
948 					TSize const& numRows,
949 					TSize const& numCols)
950 {
951 	SEQAN_CHECKPOINT
952 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > TGraph;
953 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
954 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
955 	typedef typename Iterator<TGraph, VertexIterator>::Type TVertexIterator;
956 	typedef typename Iterator<TGraph, OutEdgeIterator>::Type TOutEdgeIterator;
957 	typedef typename Value<TConsensus>::Type TValue;
958 
959 	// Initialization
960 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
961 	TValue gapChar = gapValue<TValue>();
962 	TVertexDescriptor begState = beginState(pHmm);
963 	TVertexDescriptor eState = endState(pHmm);
964 	TVertexDescriptor matchState = 1;
965 	TVertexDescriptor insertState = 2;
966 	TVertexDescriptor deleteState = 3;
967 
968 	typedef String<TProbability> TCountString;
969 	TCountString transitionCounter;
970 	resize(transitionCounter, getIdUpperBound(_getEdgeIdManager(pHmm)), 0.0);
971 	StringSet<TCountString> emissionCounter;
972 	TSize nR = getIdUpperBound(_getVertexIdManager(pHmm));
973 	for(TSize i =0; i<nR;++i) {
974 		TCountString emisCount;
975 		resize(emisCount,alphSize, 0.0);
976 		appendValue(emissionCounter,emisCount);
977 	}
978 
979 
980 	String<TVertexDescriptor> oldCol;
981 	String<TVertexDescriptor> currCol;
982 	resize(oldCol, numRows, begState);
983 	resize(currCol, numRows, 0);
984 	TEdgeDescriptor currEdge;
985 
986 	for(TSize i = 0; i<length(consensus); ++i) {
987 		//transitionvalues
988 
989 		//being in insertState
990 		if (value(consensus, i)==gapChar){
991 			for(TSize j = 0; j<numRows; ++j){
992 				if ((value(matr, j * numCols + i)!=gapChar)){
993 					value(currCol,j) = insertState;
994 					currEdge = findEdge(pHmm, value(oldCol,j), value(currCol,j));
995 					property(transitionCounter, currEdge) += 1;
996 					value(oldCol,j) = value(currCol,j);
997 				}
998 				else if ((value(matr, j * numCols + i)==gapChar) ){
999 					value(currCol,j) = value(oldCol,j);
1000 				}
1001 			}
1002 		}
1003 		//being in normal State
1004 		else{
1005 			for(TSize j = 0; j<numRows; ++j){
1006 				if(value(matr, j * numCols + i)!=gapChar) value(currCol,j) = matchState;
1007 				else value(currCol,j) = deleteState;
1008 				currEdge = findEdge(pHmm, value(oldCol,j), value(currCol,j));
1009 				property(transitionCounter, currEdge) += 1;
1010 				value(oldCol,j)=value(currCol,j);
1011 			}
1012 		}
1013 		//emissionvalues
1014 		if (value(consensus, i)==gapChar) {
1015 			for(TSize j = 0; j<numRows; ++j)
1016 				if(value(matr, j * numCols + i)!=gapChar)
1017 					value(value(emissionCounter, insertState), ordValue( (TAlphabet)  value(matr, j * numCols + i) ) ) += 1;
1018 
1019 			continue;
1020 		}
1021 		else
1022 			for(TSize j = 0; j<numRows; ++j)
1023 				if(value(matr, j * numCols + i)!=gapChar)
1024 					value(value(emissionCounter, matchState), ordValue( (TAlphabet)  value(matr, j * numCols + i) ) ) += 1;
1025 
1026 		matchState+=3;
1027 		if ((insertState+3)==eState) insertState+=2;
1028 		else insertState+=3;
1029 		deleteState+=3;
1030 	}
1031 
1032 	//transition in endState
1033 	for(TSize j = 0; j<numRows; ++j){
1034 		currEdge = findEdge(pHmm,value(currCol,j),eState);
1035 		property(transitionCounter, currEdge) += 1;
1036 	}
1037 
1038 	_parameterEstimator(pHmm, emissionCounter, transitionCounter);
1039 }
1040 
1041 
1042 //////////////////////////////////////////////////////////////////////////////
1043 
1044 template<typename TAlphabet, typename TCargo, typename TSpec, typename TConsensus>
1045 inline void
1046 _createProfileHmm(Graph<Hmm<TAlphabet, TCargo, TSpec> >& pHmm,
1047 				   TConsensus const& consensus)
1048 {
1049 	SEQAN_CHECKPOINT
1050 	typedef Graph<Hmm<TAlphabet, TCargo, TSpec> > TGraph;
1051 	typedef typename Size<TGraph>::Type TSize;
1052 	typedef typename VertexDescriptor<TGraph>::Type TVertexDescriptor;
1053 	typedef typename EdgeDescriptor<TGraph>::Type TEdgeDescriptor;
1054 	typedef typename Value<TConsensus>::Type TValue;
1055 
1056 	// Initialization
1057 	TSize alphSize = ValueSize<TAlphabet>::VALUE;
1058 	TValue gapChar = gapValue<TValue>();
1059 	clear(pHmm);
1060 
1061 	// Add begin state
1062 	TVertexDescriptor begState = addVertex(pHmm);
1063 	assignBeginState(pHmm, begState);
1064 
1065 	// Add for each consensus letter 3 states
1066 	for (TSize i=0;i<length(consensus);++i){
1067 		if (value(consensus,i) == gapChar) continue;
1068 		addVertex(pHmm); addVertex(pHmm); addVertex(pHmm, true);
1069 	}
1070 
1071 	// Add last insertion state
1072 	TVertexDescriptor lastIState = addVertex(pHmm);
1073 
1074 	// Add end state
1075 	TVertexDescriptor endState = addVertex(pHmm);
1076 	assignEndState(pHmm, endState);
1077 
1078 	// Is there no consensus letter?
1079 	if (lastIState == 1) {
1080 		clear(pHmm);
1081 		return;
1082 	}
1083 
1084 	// Remember the kind of state
1085 	TVertexDescriptor mState = 1;
1086 	TVertexDescriptor iState = 2;
1087 	TVertexDescriptor dState = 3;
1088 
1089 	// Add tranistions from begin state
1090 	addEdge(pHmm, begState, mState);
1091 	addEdge(pHmm, begState, iState);
1092 	addEdge(pHmm, begState, dState);
1093 
1094 	// Add all remaining transitions
1095 	for (TSize i=0;i<length(consensus);++i){
1096 		if (value(consensus,i) == gapChar) continue;
1097 		else if ((mState + 3) == lastIState) {
1098 			addEdge(pHmm, iState, mState);
1099 			addEdge(pHmm, iState, iState);
1100 			addEdge(pHmm, iState, dState);
1101 			break;
1102 		}
1103 		else{
1104 			addEdge(pHmm, mState, (mState+3));
1105 			addEdge(pHmm, iState, mState);
1106 			addEdge(pHmm, dState, (mState+3));
1107 			addEdge(pHmm, mState, (iState+3));
1108 			addEdge(pHmm, iState, iState);
1109 			addEdge(pHmm, dState, (iState+3));
1110 			addEdge(pHmm, mState, (dState+3));
1111 			addEdge(pHmm, iState, dState);
1112 			addEdge(pHmm, dState, (dState+3));
1113 			mState+=3;
1114 			iState+=3;
1115 			dState+=3;
1116 		}
1117 	}
1118 
1119 	// Transitions to the endState and the last I-state
1120 	addEdge(pHmm, mState, endState);
1121 	addEdge(pHmm, mState, lastIState);
1122 	addEdge(pHmm, lastIState, endState);
1123 	addEdge(pHmm, lastIState, lastIState);
1124 	addEdge(pHmm, dState, endState);
1125 	addEdge(pHmm, dState, lastIState);
1126 }
1127 
1128 //////////////////////////////////////////////////////////////////////////////
1129 
1130 //e.g.
1131 //String<char> matr = "-AT---GAG-G-AG-CT-C--A--GT-G-CT---G";
1132 //msaToProfileHmm(matr, hmm, 5);
1133 
1134 template<typename TAlignmentChar, typename TAlphabet, typename TProbability, typename TSpec, typename TSize>
1135 inline void
1136 msaToProfileHmm(String<TAlignmentChar> const& matr,
1137 				Graph<Hmm<TAlphabet, TProbability, TSpec> >& pHmm,
1138 				TSize nSeq)
1139 {
1140 	SEQAN_CHECKPOINT
1141 	typedef Graph<Hmm<TAlphabet, TProbability, TSpec> > THmm;
1142 
1143 	// Consensus
1144 	String<unsigned int> coverage;
1145 	String<char> gappedConsensus;
1146 	String<Dna> consensusSequence;
1147 	consensusCalling(matr, consensusSequence, gappedConsensus, coverage, nSeq, MajorityVote() );
1148 
1149 	// Build the HMM topology
1150 	_createProfileHmm(pHmm,gappedConsensus);
1151 
1152 	// Parameterize the pHmm
1153 	TSize numCols = length(matr) / nSeq;
1154 	_profileHmmCounter(pHmm, matr, gappedConsensus, nSeq, numCols);
1155 }
1156 
1157 */
1158 
1159 
1160 }// namespace SEQAN_NAMESPACE_MAIN
1161 
1162 #endif //#ifndef SEQAN_HEADER_...
1163