1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_FIND_MOTIF_PROJECTION_H
34 #define SEQAN_HEADER_FIND_MOTIF_PROJECTION_H
35 
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38 
39 //////////////////////////////////////////////////////////////////////////////
40 // Projection
41 //////////////////////////////////////////////////////////////////////////////
42 
43 /**
44 .Spec.Projection:
45 ..summary: Represents the PROJECTION algorithm of Buhler and Tompa.
46 ..general:Class.MotifFinder
47 ..cat:Motif Search
48 ..signature:MotifFinder<TValue, Projection>
49 ..param.TValue:The type of sequences to be analyzed.
50 ...type:Spec.Dna
51 ...type:Spec.AminoAcid
52 ..remarks:The @Spec.Projection|Projection algorithm@ is a heuristic algorithm that does not guarantee
53           that the unknown motif will be found every time. We can increase the chance of success
54 		  by performing a large number of independent trials to generate multiple guesses.
55 		  In each trial, @Spec.Projection@ makes a preselection of sets of length-l patterns called l-mers
56 		  which are likely to be a collection of motif instances (filtering step) and
57 		  refines them by some local searching techniques, e.g. @Function.em|EM algorithm@, Gibbs Sampling etc (refinement step).
58 ..include:seqan/find_motif.h
59 */
60 
61 struct Projection_;
62 typedef Tag<Projection_> const Projection;
63 
64 //////////////////////////////////////////////////////////////////////////////
65 // MotifFinder - Projection Spec
66 //
67 // t:=dataset size (number of sequences)
68 // l:=motif size
69 // m:=number of possible l-mers
70 // d:=number of substitutions
71 // k:=projection size
72 // s:=bucket size
73 // tr:=number of independent trials
74 //////////////////////////////////////////////////////////////////////////////
75 
76 template <typename TValue>
77 class MotifFinder<TValue, Projection>
78 {
79 	enum { ALPHABET_SIZE = ValueSize<TValue>::VALUE };
80 //____________________________________________________________________________________________
81 	/*
82 	enum { ALPHABET_SIZE = ValueSize<TValue>::VALUE };
83 	typedef String<TValue> TString;
84 	typedef String<TString> TStrings;
85 	typedef typename Size<TStrings>::Type TSize1;
86 	typedef typename Size<TString>::Type TSize2;*/
87 
88 //____________________________________________________________________________________________
89 
90 public:
91 	typedef unsigned int TSize;
92 	typedef String<TValue> TString;
93 	typedef String<TString> TStrings;
94 
95 
96 	TSize dataset_size;
97 	TSize motif_size;
98 	TSize total_num_of_l_mers;
99 	TSize num_of_substitutions;
100 	bool has_exact_substitutions;
101 	TSize projection_size;
102 	TSize bucket_threshold;
103 	TSize num_of_trials;
104 	TStrings set_of_motifs;
105 	int score;
106 
107 //____________________________________________________________________________________________
108 
MotifFinder()109 	MotifFinder():
110 		dataset_size(0),
111 		motif_size(0),
112 		total_num_of_l_mers(0),
113 		num_of_substitutions(0),
114 		has_exact_substitutions(false),
115 		projection_size(0),
116 		bucket_threshold(0),
117 		num_of_trials(0)
118 	{
119     SEQAN_CHECKPOINT;
120 	}
MotifFinder(TSize const & t_,TSize const & l_,TSize const & m_total_,TSize const & d_,bool const & is_exact_,TSize const & k_,TSize const & s_,TSize const & tr_)121 	MotifFinder(TSize const & t_,
122 				TSize const & l_,
123 				TSize const & m_total_,
124 				TSize const & d_,
125 				bool const & is_exact_,
126 				TSize const & k_,
127 				TSize const & s_,
128 				TSize const & tr_):
129 		dataset_size(t_),
130 		motif_size(l_),
131         total_num_of_l_mers(m_total_),
132 		num_of_substitutions(d_),
133 		has_exact_substitutions(is_exact_),
134 		projection_size(k_),
135 		bucket_threshold(s_),
136 		num_of_trials(tr_)
137 	{
138     SEQAN_CHECKPOINT;
139 	}
MotifFinder(TSize const & t_,TSize const & l_,TSize const & m_total_,TSize const & d_,bool const & is_exact_)140 	MotifFinder(TSize const & t_,
141 				TSize const & l_,
142 				TSize const & m_total_,
143 				TSize const & d_,
144 				bool const & is_exact_):
145 		dataset_size(t_),
146 		motif_size(l_),
147 		total_num_of_l_mers(m_total_),
148 		num_of_substitutions(d_),
149 		has_exact_substitutions(is_exact_),
150 		projection_size(0),
151 		bucket_threshold(0),
152 		num_of_trials(0)
153 	{
154     SEQAN_CHECKPOINT;
155 
156 		projection_size =
157 			_computeProjectionSize<TSize>(ALPHABET_SIZE,
158 			                                     motif_size,
159 								                 num_of_substitutions,
160 								                 total_num_of_l_mers);
161 
162 		bucket_threshold =
163 			_computeBucketThreshold<TSize>(ALPHABET_SIZE,
164 			                                      motif_size,
165 								                  num_of_substitutions,
166 								                  total_num_of_l_mers,
167 												  projection_size);
168 
169 		double prob_q = static_cast<double>(0.95);
170 		num_of_trials = _computeNumOfTrials(dataset_size,
171 								            motif_size,
172 								            num_of_substitutions,
173 								            projection_size,
174 								            bucket_threshold,
175 								            prob_q);
176 	}
MotifFinder(MotifFinder const & other_)177 	MotifFinder(MotifFinder const & other_):
178 		dataset_size(other_.dataset_size),
179 		motif_size(other_.motif_size),
180 		total_num_of_l_mers(other_.total_num_of_l_mers),
181 		num_of_substitutions(other_.num_of_substitutions),
182 		has_exact_substitutions(other_.has_exact_substitutions),
183 		projection_size(other_.projection_size),
184 		bucket_threshold(other_.bucket_threshold),
185 		num_of_trials(other_.num_of_trials)
186 	{
187     SEQAN_CHECKPOINT;
188 	}
~MotifFinder()189 	~MotifFinder()
190 	{
191     SEQAN_CHECKPOINT;
192 	}
193 
194 	MotifFinder const &
195 	operator = (MotifFinder const & other_)
196 	{
197     SEQAN_CHECKPOINT;
198 		if(this!=&other_)
199 		{
200 			this->dataset_size = other_.dataset_size;
201 			this->motif_size = other_.motif_size;
202 			this->total_num_of_l_mers = other_.total_num_of_l_mers;
203 			this->num_of_substitutions = other_.number_of_substitutions;
204 			this->has_exact_substitutions = other_.has_exact_substitutions;
205 			this->projection_size = other_.projection_size;
206 			this->bucket_threshold = other_.bucket_threshold;
207 			this->num_of_trials = other_.num_of_trials;
208 		}
209 
210 		return *this;
211 	}
212 
213 //____________________________________________________________________________________________
214 
215 }; // class MotifFinder<TValue, Projection>
216 
217 
218 //////////////////////////////////////////////////////////////////////////////
219 // Functions
220 //////////////////////////////////////////////////////////////////////////////
221 
222 /*
223 .Function._computeProjectionSize:
224 ..summary:Computes the projection size (k).
225 ..cat:Motif Search
226 ..signature:_computeProjectionSize(alp_size,l,d,m)
227 ..param.alp_size:The size of the sequence alphabet.
228 ...remarks:The alp_size object is four for nucleotide sequences and twenty for amino acid sequences.
229 ..param.l:The size of the motif.
230 ..param.d:The number of substitutions.
231 ..param.m:The total number of possible l-mers of a given dataset.
232 ..include:seqan/find_motif.h
233 */
234 
235 template<typename TType>
236 TType
_computeProjectionSize(TType const & alp_size,TType const & l,TType const & d,TType const & m_total)237 _computeProjectionSize(TType const & alp_size,
238 					   TType const & l,
239 					   TType const & d,
240 					   TType const & m_total)
241 {
242     SEQAN_CHECKPOINT;
243 
244 	TType result = static_cast<TType>(0);
245 	double numerator = log(static_cast<double>(m_total));
246 	double denominator = log(static_cast<double>(alp_size));
247 	double fraction = (numerator/denominator);
248 
249 	if( fraction<static_cast<double>(l-d-1) )
250 	{
251 		result = static_cast<TType>(floor(fraction)+1);
252 	}
253 	else
254 	{
255 		result = l-d-1;
256 	}
257 
258 	return result;
259 }
260 
261 //////////////////////////////////////////////////////////////////////////////
262 
263 /*
264 .Function._computeBucketThreshold:
265 ..summary:Computes the bucket threshold size (s).
266 ..cat:Motif Search
267 ..signature:_computeBucketThreshold(alp_size,l,d,m,k)
268 ..param.alp_size:The size of the sequence alphabet.
269 ...remarks:The alp_size object is four for nucleotide sequences and twenty for amino acid sequences.
270 ..param.l:The size of the motif.
271 ..param.d:The number of substitutions.
272 ..param.m:The total number of possible l-mers of a given dataset.
273 ..param.k:The projection size.
274 ..include:seqan/find_motif.h
275 */
276 
277 template<typename TType>
278 TType
_computeBucketThreshold(TType const & alp_size,TType const & l,TType const & d,TType const & m_total,TType const & k)279 _computeBucketThreshold(TType const & alp_size,
280 					    TType const & l,
281 					    TType const & d,
282 					    TType const & m_total,
283 						TType const & k)
284 {
285     SEQAN_CHECKPOINT;
286 
287 	TType result = 3; // or 4
288 	double numerator = log(static_cast<double>(m_total));
289 	double denominator = log(static_cast<double>(alp_size));
290 	double fraction = (numerator/denominator);
291 
292 	if( fraction>=static_cast<double>(l-d-1) )
293 	{
294 		result =
295 			static_cast<TType>(floor(static_cast<double>(m_total/
296 			                          pow(static_cast<double>(alp_size), static_cast<double>(k))*2)));
297 		if(result<1)
298 		{
299 			result = 1;
300 		}
301 	}
302 
303 	return result;
304 }
305 
306 //////////////////////////////////////////////////////////////////////////////
307 
308 /*
309 .Function._computeNumOfTrials:
310 ..summary:Computes the number of independent trials (tr).
311 ..cat:Motif Search
312 ..signature:_computeNumOfTrials(t,l,d,k,s,prob_q)
313 ..param.t:The number of input sequences.
314 ..param.l:The size of the motif.
315 ..param.d:The number of substitutions.
316 ..param.k:The projection size.
317 ..param.s:The bucket threshold size.
318 ..param.prob_q:
319 ...remarks:The prob_q object represents the probability that the planted bucket contains "s" or
320            more planted motif instances in at least one of the "tr" trials. Normally, we use
321 		   prob_q=0.95.
322 ...type:$double$
323 ..remarks:tr>= log(1-q)/log(B), where p is the probability that each motif occurence hashes
324           to the planted bucket and B is the probability that fewer than s planted occurences hash
325           to the planted buckes in a given trial
326 ..include:seqan/find_motif.h
327 */
328 
329 template<typename TType>
330 TType
_computeNumOfTrials(TType const & t,TType const & l,TType const & d,TType const & k,TType const & s,double const & prob_q)331 _computeNumOfTrials(TType const & t,
332 					TType const & l,
333 					TType const & d,
334 					TType const & k,
335 					TType const & s,
336 					double const & prob_q)
337 {
338     SEQAN_CHECKPOINT;
339 
340 	double prob_p =
341 		static_cast<double>(binomialCoefficient( (l-d), k ))
342 	   /static_cast<double>(binomialCoefficient(l,k));
343 
344 	double prob_B = static_cast<double>(0);
345 	for(unsigned int i=0; i<s; ++i)
346 	{
347 		prob_B+=
348 			static_cast<double>(binomialCoefficient(t,i))
349 		   *pow(prob_p, static_cast<double>(i))
350 		   *pow(static_cast<double>(1)-prob_p, static_cast<double>(t-i));
351 	}
352 
353 	double numerator = log(static_cast<double>(1)-prob_q);
354 	double denominator = log(static_cast<double>(prob_B));
355 	TType result =
356 		static_cast<TType>(ceil(static_cast<double>(numerator/denominator)-static_cast<double>(0.5)));
357 
358 	if(result<1)
359 	{
360 		result = 1;
361 	}
362 
363 	return result;
364 }
365 
366 //////////////////////////////////////////////////////////////////////////////
367 
368 /*
369 .Function.findMotif (for PROJECTION)
370 ..summary:Represents the PROJECTION algorithm.
371 ..cat:Motif Search
372 ..signature:findMotif(finder,dataset,seq_model)
373 ..param.finder:The @Class.MotifFinder@ object.
374 ...type:Class.MotifFinder
375 ..param.dataset:The dataset object representing the input sequences.
376 ...type:Class.StringSet
377 ..param.seq_model:The seq_model object.
378 ...type:Tag.Oops
379 ...type:Tag.Zoops
380 ...type:Tag.Tcm
381 ...remarks:The sequence models rely on different assumptions about the distribution of motif occurrences
382            across the sample sequences.
383 ..remarks:The PROJECTION algorithm which consists of two steps, the filtering and the refinement step,
384           is able to run in Oops, Zoops and Tcm mode.
385 ..remarks:The algorithm uses the EM procedure during the refinement phase which was introduced by Bailey
386           and Elkan.
387 ..include:seqan/find_motif.h
388 */
389 
390 template<typename TSeqType, typename TStrings, typename TModel>
391 void
findMotif(MotifFinder<TSeqType,Projection> & finder,TStrings & dataset,TModel seq_model)392 findMotif(MotifFinder<TSeqType, Projection> & finder,
393 		  TStrings & dataset,
394 		  TModel seq_model)
395 {
396     SEQAN_CHECKPOINT;
397 
398 	typedef typename Value<TStrings>::Type TString;
399 	typedef typename Position<TString>::Type TPos;
400 	typedef String<int> TArray;
401 	typedef std::vector<int> TBucket;
402 	typedef typename Size<TArray>::Type TArSize;
403 	typedef String<TBucket> TBuckets;
404 
405 	// dataset information
406 	typedef typename Size<TStrings>::Type TStringsSize;
407 	TStringsSize t = length(dataset);
408 
409 	// count_ar:=array of votes for each h(k-mer)
410 	TArSize ar_size =
411 		static_cast<TArSize>(pow(static_cast<double>(ValueSize<TSeqType>::VALUE), static_cast<int>(finder.projection_size)));
412 	finder.score = -1;
413 	//std::set< String<int> > occurred_positions;
414 	for(unsigned int trial=0; trial<finder.num_of_trials; ++trial)
415 	{
416 		//
417 		//std::cout << " . ";
418 		//
419 
420 		TArray count_ar;
421 		resize(count_ar, ar_size);
422 
423 		// ----------------------------------------------------------------------------
424 		// STEP 1:
425 		// filtering phase (:=key random projection phase)
426 		// ----------------------------------------------------------------------------
427 		// choose randomly k different positions
428 		std::set<int> positions;
429 		choosePositions(positions,finder.motif_size,finder.projection_size);
430 
431 		// array of collection of l-mers
432 		TBuckets bucket_ar;
433 		unsigned int num_of_relevant_buckets = 0;
434 
435 		_filteringStep(bucket_ar,
436 					   count_ar,
437 					   num_of_relevant_buckets,
438 					   dataset,
439 					   positions,
440 					   finder.motif_size,
441 					   finder.bucket_threshold);
442 
443 		// ----------------------------------------------------------------------------
444 		// STEP 2:
445 		// checking phase (:= local search-based refinement procedure)
446 		// ----------------------------------------------------------------------------
447 		TPos i = 0;
448 		TPos j = 0;
449 		while( (j<num_of_relevant_buckets) & (i<ar_size) )
450 		{
451 			unsigned int bucket_size = (bucket_ar[i]).size();
452 			if(bucket_size>=finder.bucket_threshold)
453 			{
454 				++j;
455 
456 				TStrings l_mers;
457 				resize(l_mers, bucket_size);
458 				TBucket::iterator bucket_iter, bucket_end;
459 				bucket_iter = (bucket_ar[i]).begin();
460 				bucket_end = (bucket_ar[i]).end();
461 				int bucket_element = 0;
462 				while(bucket_iter!=bucket_end)
463 				{
464 					TString l_mer =
465 						inverseHash<TSeqType>(*bucket_iter, ValueSize<TSeqType>::VALUE, finder.motif_size);
466 					l_mers[bucket_element] = l_mer;
467 					++bucket_element;
468 					++bucket_iter;
469 				}
470 
471 				TString consensus_pat;
472 				TStrings motif_instances;
473 				int score =
474 					_refinementStep(consensus_pat,
475 									l_mers,dataset,
476 									finder.motif_size,
477 									finder.num_of_substitutions,
478 									finder.has_exact_substitutions,
479 									seq_model);
480 
481 				if(score>finder.score)
482 				{
483 					finder.score = score;
484 					if (!length(finder.set_of_motifs)) appendValue(finder.set_of_motifs, consensus_pat);
485 					else finder.set_of_motifs[0] = consensus_pat;
486 
487 					if((TStringsSize) finder.score==t)
488 					{
489 						trial = finder.num_of_trials;
490 						j = num_of_relevant_buckets;
491 					}
492 				}
493 			}
494 			++i;
495 		}// end while( (j<num_of_relevant_buckets) & (i<ar_size) )
496 	}// end for(unsigned int trial=0; trial<finder.num_of_trials; ++trial)
497 
498 	//
499 	//std::cout << "\n";
500 	//
501 }
502 
503 //////////////////////////////////////////////////////////////////////////////
504 
505 /*
506 .Function._filteringStep:
507 ..summary:Given a position set with k different positions we compute a projection value
508           for each l-mer in the input sequences and store the specific l-mer in the appropriate
509 		  bucket which is labeled with the specific projection value.
510 ..cat:Motif Search
511 ..signature:_filteringStep(buckets,count_ar,num_of_relevant_buckets,dataset,shape,l,s)
512 ..include:seqan/find_motif.h
513 */
514 
515 template<typename TBucketAr, typename TArray, typename TType, typename TStrings, typename TPositions>
516 void
_filteringStep(TBucketAr & buckets,TArray & count_ar,TType & num_of_relevant_buckets,TStrings & dataset,TPositions & positions,TType const & l,TType const & s)517 _filteringStep(TBucketAr & buckets,
518 			   TArray & count_ar,
519 			   TType & num_of_relevant_buckets,
520 			   TStrings & dataset,
521 			   TPositions & positions,
522 			   TType const & l,
523 			   TType const & s)
524 {
525     SEQAN_CHECKPOINT;
526 
527 	typedef typename Value<TStrings>::Type TString;
528 	typedef typename Value<TString>::Type TValue;
529 	typedef typename Value<TBucketAr>::Type TBucket;
530 	typename Iterator<TStrings>::Type ds_iter = begin(dataset);
531 	typename Size<TArray>::Type ar_size = length(count_ar);
532 	Shape<TValue> shape(l); //to compute hash value of l-mer x
533 
534 	// initialize pointer by setting it to null
535 	// (=std::fill(begin(count_ar),end(count_ar),0))
536 	typename Iterator<TArray>::Type count_ar_iter = begin(count_ar) ;
537 	typename Iterator<TArray>::Type count_ar_end = end(count_ar);
538 	while(count_ar_iter!=count_ar_end)
539 	{
540 		*count_ar_iter = 0;
541 		++count_ar_iter;
542 	}
543 
544 	// go over input sequences & increment corresponding counter in count_ar
545 	// fill l_mer_index with entries
546 	resize(buckets, ar_size);
547 	int y = 0; //hash-value of created k-mer
548 	int x = 0; //hash-value of l-mer
549 	for(; !atEnd(ds_iter, dataset); goNext(ds_iter))
550 	{
551 		typename Size<TString>::Type seq_length = length(*ds_iter);
552 		typename Iterator<TString>::Type seq_iter = begin(*ds_iter);
553 		typename Iterator<TString>::Type seq_end = begin(*ds_iter)+(seq_length-l+1);
554 		while( seq_iter!=seq_end )
555 		{
556 			x = hash(shape, seq_iter);
557 			y = projectLMer<TValue>(positions, seq_iter);
558 	    	++count_ar[y];
559 			(buckets[y]).push_back(x);
560 			++seq_iter;
561 		}
562 	}
563 
564 	num_of_relevant_buckets =
565 		std::count_if(begin(count_ar),
566 		              end(count_ar),
567 					  bind2nd(std::greater_equal<int>(),static_cast<int>(s)));
568 }
569 
570 //////////////////////////////////////////////////////////////////////////////
571 
572 /*
573 .Function._refinementStep:
574 ..summary:Refines the collection of l-mers in each relevant bucket which contains at least s l-mers.
575 ..cat:Motif Search
576 ..signature:_refinementStep(consensus_seq,positions,l_mers,dataset,t,l,d,is_exact,model_type)
577 ..include:seqan/find_motif.h
578 */
579 
580 //////////////////////////////////////////////////////////////////////////////
581 //	Oops model
582 //////////////////////////////////////////////////////////////////////////////
583 
584 template<typename TString, typename TType>
585 int
_refinementStep(TString & consensus_seq,String<TString> const & l_mers,String<TString> & dataset,TType const & l,TType const & d,bool const & is_exact,Oops const & oops)586 _refinementStep(TString & consensus_seq,
587 			    String<TString> const & l_mers,
588 			    String<TString> & dataset,
589 				TType const & l,
590 				TType const & d,
591 				bool const & is_exact,
592 				Oops const & oops)
593 {
594     SEQAN_CHECKPOINT;
595 
596 	typedef String<TString> TStrings;
597 	typedef typename Value<TString>::Type TValue;
598 	typedef typename Position<TString>::Type TPos;
599 	typedef FrequencyDistribution<TValue> TFrequencyDistribution;
600 	TType t = length(dataset);
601 	int score = 0;
602 
603 	// compute background frequency
604 	TFrequencyDistribution background;
605 	backgroundFrequency(background, begin(dataset), end(dataset));
606 
607 	// step1: initial guess (profile) from bucket
608 	double epsilon = 0.1;
609 	Pseudocount<TValue, CMode> pseudocount_mode_c(epsilon);
610 	String<TFrequencyDistribution> profile;
611 	convertSetOfPatternsToProfile(profile, l_mers, pseudocount_mode_c);
612 	completeProfile(profile, background);
613 
614 	// step2: refinement of initial profile with em: 5 trials
615 	double likelihood_score = 0;
616 	int iterations = 3; //5
617 
618 	while(iterations>0)
619 	{
620 		likelihood_score = em(profile, begin(dataset), t, l, oops);
621 		--iterations;
622 	}
623 
624 	determineConsensusSeq(consensus_seq, profile, l);
625 	typename Iterator<TStrings>::Type ds_iter, ds_end;
626 	ds_iter = begin(dataset);
627 	ds_end = end(dataset);
628 	typename Position<TStrings>::Type seq_nr;
629 	do
630 	{
631 		seq_nr = t-(ds_end-ds_iter);
632 		TPos m = (TPos)(length(*ds_iter)-l+1);
633 		int * hd_ar = new int[m];
634 		typename Iterator<TString>::Type seq_iter, seq_end, consensus_begin;
635 		seq_iter = begin(*ds_iter);
636 		seq_end = seq_iter+m;
637 		while(seq_iter!=seq_end)
638 		{
639 			consensus_begin = begin(consensus_seq);
640 			hd_ar[m-(seq_end-seq_iter)] = hammingDistance<int>(seq_iter, seq_iter+l, consensus_begin);
641 			++seq_iter;
642 		}
643 
644 		if( (!is_exact) &
645 			(count_if(hd_ar,hd_ar+m,bind2nd(std::less_equal<TType>(), d))==1) )
646 		{
647 			++score;
648 		}
649 		else if( is_exact &
650 			     (count_if(hd_ar,hd_ar+m,bind2nd(std::equal_to<TType>(), d))==1) )
651 		{
652 			++score;
653 		}
654 
655 		delete[] hd_ar;
656 		++ds_iter;
657 	}
658 	while( (ds_iter!=ds_end) & (score==(int) seq_nr+1) );
659 
660 	return score;
661 }
662 
663 //////////////////////////////////////////////////////////////////////////////
664 //	Omops model
665 //////////////////////////////////////////////////////////////////////////////
666 
667 template<typename TString, typename TType>
668 int
_refinementStep(TString & consensus_seq,String<TString> const & l_mers,String<TString> & dataset,TType const & l,TType const & d,bool const & is_exact,Omops const &)669 _refinementStep(TString & consensus_seq,
670 			    String<TString> const & l_mers,
671 			    String<TString> & dataset,
672 				TType const & l,
673 				TType const & d,
674 				bool const & is_exact,
675 				Omops const & /*omops*/)
676 {
677 	typedef String<TString> TStrings;
678 	typedef typename Value<TString>::Type TValue;
679 	typedef typename Position<TString>::Type TPos;
680 	typedef FrequencyDistribution<TValue> TFrequencyDistribution;
681 	TType t = length(dataset);
682 	int score = 0;
683 
684 	// compute background frequency
685 	TFrequencyDistribution background;
686 	backgroundFrequency(background, begin(dataset), end(dataset));
687 
688 	// step1: initial guess (profile) from bucket
689 	double epsilon = 0.1;
690 	Pseudocount<TValue, CMode> pseudocount_mode_c(epsilon);
691 	String<TFrequencyDistribution> profile;
692 	convertSetOfPatternsToProfile(profile, l_mers, pseudocount_mode_c);
693 	completeProfile(profile, background);
694 
695 	// step2: refinement of initial profile with em: 5 trials
696 	double likelihood_score = 0;
697 	int iterations = 3; //5
698 
699 	while(iterations>0)
700 	{
701 		likelihood_score = em(profile, begin(dataset), t, l, Oops());
702 		--iterations;
703 	}
704 
705 	determineConsensusSeq(consensus_seq, profile, l);
706 	typename Iterator<TStrings>::Type ds_iter, ds_end;
707 	ds_iter = begin(dataset);
708 	ds_end = end(dataset);
709 	typename Position<TStrings>::Type seq_nr;
710 	do
711 	{
712 		seq_nr = t-(ds_end-ds_iter);
713 		TPos m = (TPos)(length(*ds_iter)-l+1);
714 		typename Iterator<TString>::Type seq_iter, seq_end, consensus_begin;
715 		seq_iter = begin(*ds_iter);
716 		seq_end = seq_iter+m;
717 		while(seq_iter!=seq_end)
718 		{
719 			consensus_begin = begin(consensus_seq);
720 			TType hd = hammingDistance<TType>(seq_iter, seq_iter+l, consensus_begin);
721 
722 			if( (!is_exact) & (hd<=d) )
723 			{
724 				++score;
725 				seq_iter = seq_end-1;
726 			}
727 			else if( is_exact & (hd==d) )
728 			{
729 				++score;
730 				seq_iter = seq_end-1;
731 			}
732 			++seq_iter;
733 		}
734 		++ds_iter;
735 	}
736 	while( (ds_iter!=ds_end) & (score== (int) seq_nr+1) );
737 
738 	return score;
739 }
740 
741 //////////////////////////////////////////////////////////////////////////////
742 //	Zoops model
743 //////////////////////////////////////////////////////////////////////////////
744 
745 template<typename TString, typename TType>
746 int
_refinementStep(TString & consensus_seq,String<TString> const & l_mers,String<TString> & dataset,TType const & l,TType const & d,bool const & is_exact,Zoops const & zoops)747 _refinementStep(TString & consensus_seq,
748 			    String<TString> const & l_mers,
749 			    String<TString> & dataset,
750 				TType const & l,
751 				TType const & d,
752 				bool const & is_exact,
753 				Zoops const & zoops)
754 {
755     SEQAN_CHECKPOINT;
756 
757 	typedef String<TString> TStrings;
758 	typedef typename Value<TString>::Type TValue;
759 	typedef typename Position<TString>::Type TPos;
760 	typedef FrequencyDistribution<TValue> TFrequencyDistribution;
761 	TType t = length(dataset);
762 	int score = 0;
763 	int lower_limit = (int) floor(t*(zoops.threshold)+0.5);
764 
765 	// compute background frequency
766 	TFrequencyDistribution background;
767 	backgroundFrequency(background, begin(dataset), end(dataset));
768 
769 	// step1: initial guess (profile) from bucket
770 	double epsilon = 0.1;
771 	Pseudocount<TValue, CMode> pseudocount_mode_c(epsilon);
772 	String<TFrequencyDistribution> profile;
773 	convertSetOfPatternsToProfile(profile, l_mers, pseudocount_mode_c);
774 	completeProfile(profile, background);
775 
776 	// step2: refinement of initial profile with em: 5 trials
777 	double likelihood_score = 0;
778 	double gamma = static_cast<double>(1)/sqrt(static_cast<double>(t));
779 	int iterations = 3; //5
780 	while(iterations>0)
781 	{
782 		likelihood_score = em(profile, begin(dataset), t, l, gamma, zoops);
783 		--iterations;
784 	}
785 
786 	determineConsensusSeq(consensus_seq, profile, l);
787 	typename Iterator<TStrings>::Type ds_iter, ds_end;
788 	ds_iter = begin(dataset);
789 	ds_end = end(dataset);
790 	typename Position<TStrings>::Type seq_nr;
791 	do
792 	{
793 		seq_nr = t-(ds_end-ds_iter);
794 		TPos m = (TPos)(length(*ds_iter)-l+1);
795 		int * hd_ar = new int[m];
796 		typename Iterator<TString>::Type seq_iter, seq_end, consensus_begin;
797 		seq_iter = begin(*ds_iter);
798 		seq_end = seq_iter+m;
799 		while(seq_iter!=seq_end)
800 		{
801 			consensus_begin = begin(consensus_seq);
802 			hd_ar[m-(seq_end-seq_iter)] = hammingDistance<int>(seq_iter, seq_iter+l, consensus_begin);
803 			++seq_iter;
804 		}
805 
806 		if(!is_exact)
807 		{
808 			TType num = count_if(hd_ar,hd_ar+m,bind2nd(std::less_equal<TType>(), d));
809 			if(num>1)
810 			{
811 				ds_iter = ds_end-1;
812 			}
813 			else if(num==1)
814 			{
815 				++score;
816 			}
817 		}
818 		else
819 		{
820 			TType num = count_if(hd_ar,hd_ar+m,bind2nd(std::equal_to<TType>(), d));
821 			if(num>1)
822 			{
823 				ds_iter = ds_end-1;
824 			}
825 			else if(num==1)
826 			{
827 				++score;
828 			}
829 		}
830 		delete[] hd_ar;
831 		++ds_iter;
832 	}
833 	while(ds_iter!=ds_end);
834 
835 	if(score<lower_limit)
836 	{
837 		score = 0;
838 	}
839 
840 	return score;
841 }
842 
843 //////////////////////////////////////////////////////////////////////////////
844 //	Tcm model
845 //////////////////////////////////////////////////////////////////////////////
846 
847 template<typename TString, typename TType>
848 int
_refinementStep(TString & consensus_seq,String<TString> const & l_mers,String<TString> & dataset,TType const & l,TType const & d,bool const & is_exact,Tcm const & tcm)849 _refinementStep(TString & consensus_seq,
850 			    String<TString> const & l_mers,
851 			    String<TString> & dataset,
852 				TType const & l,
853 				TType const & d,
854 				bool const & is_exact,
855 				Tcm const & tcm)
856 {
857     SEQAN_CHECKPOINT;
858 
859 	typedef String<TString> TStrings;
860 	typedef typename Value<TString>::Type TValue;
861 	typedef typename Position<TString>::Type TPos;
862 	typedef FrequencyDistribution<TValue> TFrequencyDistribution;
863 	TType t = length(dataset);
864 	int score = 0;
865 	int lower_limit = (int) floor(t*(tcm.threshold)+0.5);
866 
867 	// compute background frequency
868 	TFrequencyDistribution background;
869 	backgroundFrequency(background, begin(dataset), end(dataset));
870 
871 	// step1: initial guess (profile) from bucket
872 	double epsilon = 0.1;
873 	Pseudocount<TValue, CMode> pseudocount_mode_c(epsilon);
874 	String<TFrequencyDistribution> profile;
875 	convertSetOfPatternsToProfile(profile, l_mers, pseudocount_mode_c);
876 	completeProfile(profile, background);
877 
878 	// step2: refinement of initial profile with em: 5 trials
879 	double gamma = static_cast<double>(1)/sqrt(static_cast<double>(t));
880 	double lambda = 0;
881 	typename Iterator<TStrings>::Type ds_iter, ds_end;
882 	ds_iter = begin(dataset);
883 	ds_end = end(dataset);
884 	while(ds_iter!=ds_end)
885 	{
886 		TPos m = (TPos)(length(*ds_iter)-l+1);
887 		lambda += (gamma/((double)m));
888 		++ds_iter;
889 	}
890 	lambda = lambda/((double)t);
891 
892 	double likelihood_score = 0;
893 	int iterations = 3; //3
894 	while(iterations>0)
895 	{
896 		likelihood_score = em(profile, begin(dataset), t, l, lambda, tcm);
897 		--iterations;
898 	}
899 
900 	determineConsensusSeq(consensus_seq, profile, l);
901 	ds_iter = begin(dataset);
902 	typename Position<TStrings>::Type seq_nr;
903 	do
904 	{
905 		seq_nr = t-(ds_end-ds_iter);
906 		TPos m = (TPos)(length(*ds_iter)-l+1);
907 		typename Iterator<TString>::Type seq_iter, seq_end, consensus_begin;
908 		seq_iter = begin(*ds_iter);
909 		seq_end = seq_iter+m;
910 		while(seq_iter!=seq_end)
911 		{
912 			consensus_begin = begin(consensus_seq);
913 			TType hd = hammingDistance<TType>(seq_iter, seq_iter+l, consensus_begin);
914 
915 			if( (!is_exact) & (hd<=d) )
916 			{
917 				++score;
918 				seq_iter = seq_end-1;
919 			}
920 			else if( is_exact & (hd==d) )
921 			{
922 				++score;
923 				seq_iter = seq_end-1;
924 			}
925 			++seq_iter;
926 		}
927 		++ds_iter;
928 	}
929 	while(ds_iter!=ds_end);
930 
931 	if(score<lower_limit)
932 	{
933 		score = 0;
934 	}
935 
936 	return score;
937 }
938 
939 //////////////////////////////////////////////////////////////////////////////
940 //Subfunctions
941 //////////////////////////////////////////////////////////////////////////////
942 
943 /*
944 .Function.choosePositions:
945 ..summary:Chooses randomly k different positions from {0,1,...,(l-1)}
946 ..cat:Motif Search
947 ..signature:choosePositions(positions,l,k)
948 ..param.positions:The set of k chosen positions.
949 ...type:$set<int>$
950 ..param.l:The size of the motif.
951 ..param.k:The projection size.
952 ..include:seqan/find_motif.h
953 */
954 
955 template<typename TAssociativeContainer, typename TType>
956 void
choosePositions(TAssociativeContainer & positions,TType const & l,TType const & k)957 choosePositions(TAssociativeContainer & positions, TType const & l, TType const & k)
958 {
959     SEQAN_CHECKPOINT;
960 	while(positions.size()<k)
961 	{
962 		int position = rand() % l;
963 		positions.insert(position);
964 	}
965 }
966 
967 //////////////////////////////////////////////////////////////////////////////
968 
969 /*
970 .Function.projectLMer:
971 ..summary:Based on set "positions" the function uses the letters of a given l-mer at
972           chosen k positions to compute an appropriate hash value of the new k-mer.
973 ..cat:Motif Search
974 ..signature:projectLMer(positions,l,k)
975 ..param.positions:The set of k chosen positions.
976 ...remarks:$positions$ is of type $set<int>$
977 ..param.k:An iterator pointing to the first positions of a given sequence.
978 ..include:seqan/find_motif.h
979 */
980 
981 template<typename TValue, typename TIter>
982 inline std::set<int>::value_type
projectLMer(std::set<int> & positions,TIter it)983 projectLMer(std::set<int> & positions, TIter it)
984 {
985     SEQAN_CHECKPOINT;
986 
987 	typedef std::set<int>::value_type THValue;
988 	THValue prev_pos; //, cur_pos;
989 
990 	std::set<int>::iterator positions_iter, positions_end;
991 	positions_iter = positions.begin();
992 	positions_end = positions.end();
993 	prev_pos = *positions_iter;
994 	it += prev_pos;
995 	THValue hValue = ordValue(*it);
996 	++positions_iter;
997 	while(positions_iter!=positions_end)
998 	{
999 		THValue cur_pos = *positions_iter;
1000 		goFurther(it, (cur_pos-prev_pos));
1001 		hValue = hValue * ValueSize<TValue>::VALUE + ordValue(*it);
1002 		prev_pos = cur_pos;
1003 		++positions_iter;
1004 	}
1005 
1006 	return hValue;
1007 }
1008 
1009 
1010 //////////////////////////////////////////////////////////////////////////////
1011 
1012 /*
1013 .Function._getLMersWithTheLargestLikelihoodRatio:
1014 ..summary:Forms a guess for the planted motif by selecting from each input sequence
1015 		  the l-mer x with the largest likelihood ratio.
1016 ..signature:_getLMersWithTheLargestLikelihoodRatio(l_mers,positions,dataset_start,dataset_end,profile,l)
1017 ..param.l_mers:The collection of t l-mers.
1018 ..param.dataset_start:An iterator pointing to the first input sequence of a given dataset.
1019 ..param.dataset_end:An iterator pointing to the last input sequence of a given dataset.
1020 ..param.t:The number of input sequences.
1021 ..param.profile:The profile object which is a set of frequency distributions.
1022 ...type:Class.String
1023 ....signature:String<TFrequencyDistribution>
1024 ..param.l:The size of the motif.
1025 ..include:seqan/find_motif.h
1026 */
1027 
1028 template<typename TStrings, typename TIter, typename TType, typename TProfile>
1029 void
_getLMersWithTheLargestLikelihoodRatio(TStrings & l_mers,TIter dataset_start,TIter dataset_end,TProfile const & profile,TType const & l)1030 _getLMersWithTheLargestLikelihoodRatio(TStrings & l_mers,
1031 									   TIter dataset_start,
1032 									   TIter dataset_end,
1033 									   TProfile const & profile,
1034 								       TType const & l)
1035 {
1036     SEQAN_CHECKPOINT;
1037 
1038 	typedef typename Value<TStrings>::Type TString;
1039 	typedef typename Position<TStrings>::Type TPos1;
1040 	typedef typename Position<TString>::Type TPos2;
1041 	typename Size<TStrings>::Type t = (dataset_end-dataset_start);
1042 	resize(l_mers, t);
1043 	while(dataset_start!=dataset_end)
1044 	{
1045 		typename Position<TStrings>::Type seq_nr = t-(dataset_end-dataset_start);
1046 		TPos2 m = (TPos2)(length(*dataset_start)-l+1);
1047 		double * likelihood_ratio_mat = new double[m];
1048 		typename Iterator<TString>::Type seq_iter, seq_end;
1049 		seq_iter = begin(*dataset_start);
1050 		seq_end = seq_iter+m;
1051 		while(seq_iter!=seq_end)
1052 		{
1053 			likelihood_ratio_mat[m-(seq_end-seq_iter)] =
1054 				_computeLikelihoodRatioOfLMer(seq_iter, seq_iter+l, profile);
1055 			++seq_iter;
1056 		}
1057 
1058 		TPos2 max_pos =
1059 			(std::max_element(likelihood_ratio_mat, likelihood_ratio_mat+m)-likelihood_ratio_mat);
1060 		l_mers[seq_nr] = infix(*dataset_start, max_pos, max_pos+l);
1061 
1062 		delete[] likelihood_ratio_mat;
1063 		++dataset_start;
1064 	}
1065 }
1066 
1067 //////////////////////////////////////////////////////////////////////////////
1068 
1069 /*
1070 .Function._computeLikelihoodRatioOfLMer:
1071 ..summary:Computes the likelihood ratio of a given l-mer.
1072 ..signature:_computeLikelihoodRatioOfLMer(l_mer_begin,l_mer_end,profile)
1073 ..param.l_mer_begin:An iterator pointing to the beginning of a given l-mer pattern.
1074 ...type:Concept.Iterator Iterator
1075 ....remarks:Standard conform iterator
1076 ...type:Shortcut.DnaIterator
1077 ....remarks:Iterator for @Shortcut.DnaString@ (a string of @Spec.Dna@).
1078 ....see:Shortcut.DnaIterator
1079 ...type:Shortcut.PeptideIterator
1080 ....remarks:Iterator for @Shortcut.Peptide@ (a string of @Spec.AminoAcid@).
1081 ....see:Shortcut.PeptideIterator
1082 ..param.l_mer_end:An iterator pointing to the end of a given l-mer pattern.
1083 ...type:Concept.Iterator Iterator
1084 ....remarks:Standard conform iterator
1085 ...type:Shortcut.DnaIterator
1086 ....remarks:Iterator for @Shortcut.DnaString@ (a string of @Spec.Dna@).
1087 ....see:Shortcut.DnaIterator
1088 ...type:Shortcut.PeptideIterator
1089 ....remarks:Iterator for @Shortcut.Peptide@ (a string of @Spec.AminoAcid@).
1090 ....see:Shortcut.PeptideIterator
1091 ..param.profile:The profile object which is a set of frequency distributions.
1092 ...type:Class.String
1093 ....signature:String<TFrequencyDistribution>
1094 ..remarks:Computes the sum of log probabilites instead of the product of probabilites
1095 ..include:seqan/find_motif.h
1096 */
1097 
1098 template<typename TStrIter, typename TProfile>
1099 double
_computeLikelihoodRatioOfLMer(TStrIter l_mer_begin,TStrIter l_mer_end,TProfile const & profile)1100 _computeLikelihoodRatioOfLMer(TStrIter l_mer_begin,
1101 							  TStrIter l_mer_end,
1102 							  TProfile const & profile)
1103 {
1104     SEQAN_CHECKPOINT;
1105 
1106 	double result = 0;
1107 	unsigned int l = (unsigned int)(l_mer_end-l_mer_begin);
1108 	typedef typename Position<TProfile>::Type TPos;
1109 	TProfile log_profile = profile;
1110 	for(TPos i=0; i<length(log_profile); ++i)
1111 	{
1112 		logarithmize(log_profile[i]);
1113 	}
1114 
1115 	double motif_component = 0;
1116 	double backgr_component = 0;
1117 	while(l_mer_begin!=l_mer_end)
1118 	{
1119 		motif_component += log_profile[l-(l_mer_end-l_mer_begin)+1][(int)*l_mer_begin];
1120 		backgr_component += log_profile[0][(int)*l_mer_begin];
1121 		++l_mer_begin;
1122 	}
1123 	result = motif_component-backgr_component;
1124 
1125 	return exp(result);
1126 }
1127 
1128 //////////////////////////////////////////////////////////////////////////////
1129 
1130 /*
1131 .Function._computeLikelihoodRatioOfLMers:
1132 ..summary:Computes the likelihood ratio of a given set of l-mers.
1133 ..signature:_computeLikelihoodRatioOfLMers(l_mers,profile)
1134 ..param.l_mers:The collection of l-mers.
1135 ..param.profile:The profile object which is a set of frequency distributions.
1136 ...type:Class.String
1137 ....signature:String<TFrequencyDistribution>
1138 ..include:seqan/find_motif.h
1139 */
1140 
1141 template<typename TStrings, typename TProfile>
1142 double
_computeLikelihoodRatioOfLMers(TStrings const & l_mers,TProfile const & profile)1143 _computeLikelihoodRatioOfLMers(TStrings const & l_mers,
1144 							   TProfile const & profile)
1145 {
1146     SEQAN_CHECKPOINT;
1147 
1148 	typedef typename Position<TStrings>::Type TPos;
1149 	double score = 1;
1150 	typename Size<TStrings>::Type num_of_l_mers = length(l_mers);
1151 	for(TPos i=0; i<num_of_l_mers; ++i)
1152 	{
1153 		score *= _computeLikelihoodRatioOfLMer(begin(l_mers[i]), end(l_mers[i]), profile);
1154 	}
1155 
1156 	return score;
1157 }
1158 
1159 //////////////////////////////////////////////////////////////////////////////
1160 
1161 /**
1162 .Function.determineConsensusSeq:
1163 ..summary:Determines the consensus pattern of a given profile.
1164 ..cat:Motif Search
1165 ..signature:determineConsensusSeq(consensus_seq,profile,l)
1166 ..param.consensus_seq:The consensus pattern.
1167 ...type:Class.String
1168 ...type:Shortcut.DnaString
1169 ...type:Shortcut.Peptide
1170 ..param.profile:A StringSet of @Class.FrequencyDistribution|frequency distributions@.
1171 ...type:Class.StringSet
1172 ..param.l:The size of the motif.
1173 ..include:seqan/find_motif.h
1174 */
1175 
1176 template<typename TString, typename TProfile>
1177 void
determineConsensusSeq(TString & consensus_seq,TProfile & profile,typename Size<TString>::Type const & l)1178 determineConsensusSeq(TString & consensus_seq,
1179 					  TProfile & profile,
1180 					  typename Size<TString>::Type const & l)
1181 {
1182     SEQAN_CHECKPOINT;
1183 
1184 	typedef typename Value<TString>::Type TValue;
1185 	typename Position<TString>::Type i;
1186 
1187 	resize(consensus_seq, l);
1188 	if(length(profile)==l) //profile only consists of the motif component
1189 	{
1190 		for(i=0; i<l; ++i)
1191 		{
1192 			consensus_seq[i] =
1193 				static_cast<TValue>(posOfMax(profile[i]));
1194 		}
1195 	}
1196 	else
1197 	{
1198 		for(i=1; i<=l; ++i)
1199 		{
1200 			consensus_seq[i-1] =
1201 				static_cast<TValue>(posOfMax(profile[i]));
1202 		}
1203 	}
1204 }
1205 
1206 //////////////////////////////////////////////////////////////////////////////
1207 
1208 /*
1209 .Function.displayResult:
1210 ..summary:Displays the consensus pattern of the found motif candidate.
1211 ..cat:Motif Search
1212 ..signature:displayResult(motif_finder)
1213 ..param.motif_finder:The @Class.MotifFinder@ object.
1214 ...type:Class.MotifFinder
1215 ..param.dataset:The dataset object representing the input sequences.
1216 ...type:Class.String
1217 ...signature:String<TString>
1218 ...param.TString:A @Class.String@ type
1219 ....type:Class.String
1220 ..include:seqan/find_motif.h
1221 */
1222 
1223 template<typename TValue>
1224 void
displayResult(MotifFinder<TValue,Projection> & projection)1225 displayResult(MotifFinder<TValue, Projection> & projection)
1226 {
1227     SEQAN_CHECKPOINT;
1228 
1229 	if(length(projection.set_of_motifs)!=0)
1230 	{
1231 		std::cout << projection.set_of_motifs[0] << "\n";
1232 	}
1233 	else
1234 	{
1235 		std::cout << "NO MOTIF HAS BEEN FOUND!!!\n";
1236 	}
1237 }
1238 
1239 //////////////////////////////////////////////////////////////////////////////////////////////
1240 // Access Functions
1241 //////////////////////////////////////////////////////////////////////////////////////////////
1242 
1243 template <typename TValue>
1244 inline int
getScore(MotifFinder<TValue,Projection> & me)1245 getScore(MotifFinder<TValue, Projection> & me)
1246 {
1247     SEQAN_CHECKPOINT;
1248 	return me.score;
1249 }
1250 
1251 
1252 //////////////////////////////////////////////////////////////////////////////////////////////
1253 
1254 }// namespace SEQAN_NAMESPACE_MAIN
1255 
1256 #endif //#ifndef SEQAN_HEADER_...
1257