1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_CHAIN_BASE_H
34 #define SEQAN_HEADER_CHAIN_BASE_H
35 
36 /*
37  *  chain_base.h
38  *  chaining
39  *
40  *  Created by Hendrik Woehrle
41  *
42  *	Basis declarations & definitions for chaining algorithms
43  *
44  */
45 
46 namespace seqan
47 {
48 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
49 //
50 //			class forward declarations
51 //
52 ////////////////////////////////////////////////////////////////////////////////////////////////////////
53 
54 		// basic data structure for fragments (see seed_base.h)
55 	//template< typename TBorder, typename TSpec = Default >
56 	//struct Seed;
57 
58 		// wrapper data structure for point in the RMT
59 	template< typename T, typename TSpec >
60 	struct ChainPoint_;
61 
62 		// wrapper for points for the line sweep paradigma
63 	template< typename T >
64 	struct WrapperPoint_;
65 
66 		// structure for metainformation about fragments in chains
67 	template< typename T >
68 	struct MetaFragment_;
69 
70 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
71 //
72 //			tag classes
73 //
74 ////////////////////////////////////////////////////////////////////////////////////////////////////////
75 
76 	struct GenericChaining_;
77 	typedef Tag<GenericChaining_> const GenericChaining;
78 
79 	struct RangeTreeChaining_;
80 	typedef Tag<RangeTreeChaining_> const RangetreeChaining;
81 
82 
83 	struct GZeroCost
84 	{};
85 	//typedef Tag<ZeroCost_> const GZeroCost;
86 
87 	struct GOneCost
88 	{};
89 	//typedef Tag<G_1_Cost_> const GOneCost;
90 
91 	struct GInftyCost
92 	{};
93 	//typedef Tag<G_Inf_Cost_> const GInftyCost;
94 
95 	struct GSumOfPairCost
96 	{};
97 	//typedef Tag<F_SoP_Cost_> const GInftyCost;
98 
99 	template< typename Spec >
100 	struct ChainSpecType_
101 	{
102 		typedef Spec Type;
103 	};
104 
105 
106 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
107 //
108 //			special metatype declarations
109 //
110 ///////////////////////////////////////////////////////////////////////////////////////////////////////
111 // sizes
112 
113 	//template< typename TBorder, typename TSpec >
114 	//struct Size< Seed< TBorder, TSpec > >
115 	//{
116 	//	typedef size_t Type;
117 	//};
118 
119 	template< typename T, typename TSpec >
120 	struct Size< ChainPoint_< T, TSpec > >
121 	{
122 		typedef size_t Type;
123 	};
124 
125 	template< typename T >
126 	struct Size< WrapperPoint_< T > >
127 	{
128 		typedef size_t Type;
129 	};
130 
131 	template< typename T >
132 	struct Size< MetaFragment_< T > >
133 	{
134 		typedef size_t Type;
135 	};
136 
137 
138 //////////////////////////////////////////////////////////////////////////
139 // key types
140 
141 	//template< typename TBorder, typename TSpec >
142 	//struct Key< Seed< TBorder, TSpec > >
143 	//{
144 	//	typedef TBorder Type;
145 	//};
146 
147 	template< typename T, typename TSpec >
148 	struct Key< ChainPoint_< T, TSpec > >
149 	{
150 		typedef typename Key< T >::Type Type;
151 	};
152 
153 	template< typename T >
154 	struct Key< WrapperPoint_< T > >
155 	{
156 		typedef typename Key< T >::Type Type;
157 	};
158 
159 	template< typename T >
160 	struct Key< MetaFragment_< T > >
161 	{
162 		typedef typename Key< T >::Type Type;
163 	};
164 
165 
166 
167 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
168 //
169 //			functions
170 //
171 ///////////////////////////////////////////////////////////////////////////////////////////////////////
172 
173 
174 /*
175 	template< typename TSource, typename TDest, typename TScoreValue, typename TScoreType, typename TStructuring, typename TAlgorithm >
176 	TScoreValue
177 	computeChain( TSource & source, TDest & dest, Score< TScoreValue, TScoreType > const & score_, TAlgorithm tag, TStructuring structuring );
178 
179 	template< typename TSource, typename TDest, typename TCostModel, typename TScoreValue, typename TScoreType, typename TStructuring, typename TAlgorithm, typename TSpec >
180 	TScoreValue
181 	_computeChain( TSource & source, TDest & dest, TCostModel type, Score< TScoreValue, TScoreType > const & score_, TAlgorithm tag, TStructuring structuring, TSpec spec );
182 
183 */
184 	template< typename TSource, typename TDest, typename TScoreValue, typename TScoreType, typename TStructuring >
185 	TScoreValue
186 	computeChain( TSource & source, TDest & dest, Score< TScoreValue, TScoreType > const & score_, TStructuring structuring )
187 	{
188 
189 //		SEQAN_CHECK2( scoreGapOpen( score_ ) == scoreGapExtend( score_ ), "Chaining only supports linear gap costs" )
190 //		SEQAN_CHECK2( scoreGapOpen( score_ ) >= 0 && scoreGapExtend( score_ ) >= 0 && scoreMismatch( score_ ) >= 0, "Scores should be positive" )
191 		switch( dimension( value( begin( source ) ) ) )
192 		{
193 			case 1: SEQAN_REPORT("One dimensional chaining not supported")
194 					return 0;
195 			case 2: if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 )
196 					{
197 						return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 1 > >() );
198 					}
199 					else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 )
200 					{
201 						if( scoreMismatch( score_ ) == scoreGap( score_ ) )
202 							return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 1 > >() );
203 						else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) )
204 							return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 2 > >() );
205 					}
206 					SEQAN_ASSERT2( false, "Gap/mismatch model not supported" );
207 					break;
208 			case 3: if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 )
209 					return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 2 > >() );
210 					else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 )
211 					{
212 						if( scoreMismatch( score_ ) == scoreGap( score_ ) )
213 							return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 2 > >() );
214 						else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) )
215 							return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 3 > >() );
216 					}
217 					SEQAN_ASSERT2( false, "Gap/mismatch model not supported" );
218 					break;
219 			case 4:	if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 )
220 					return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 3 > >() );
221 					else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 )
222 					{
223 						if( scoreMismatch( score_ ) == scoreGap( score_ ) )
224 							return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 3 > >() );
225 						else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) )
226 							return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 4 > >() );
227 					}
228 					SEQAN_ASSERT2( false, "Gap/mismatch model not supported" );
229 					break;
230 			case 5:	if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 )
231 					return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 4 > >() );
232 					else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 )
233 					{
234 						if( scoreMismatch( score_ ) == scoreGap( score_ ) )
235 							return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 4 > >() );
236 						else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) )
237 							return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 5 > >() );
238 					}
239 					SEQAN_ASSERT2( false, "Gap/mismatch model not supported" );
240 					break;
241 			default:if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 )
242 					return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Default >() );
243 					else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 )
244 					{
245 						if( scoreMismatch( score_ ) == scoreGap( score_ ) )
246 							return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Default >() );
247 						else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) )
248 							return _computeChain( source, dest, GSumOfPairCost(), score_, structuring, ChainSpecType_< Default >() );
249 					}
250 					SEQAN_ASSERT2( false, "Gap/mismatch model not supported" );
251 
252 		}
253 		return minValue< TScoreValue >();
254 	}
255 
256 ///////////////////////////////////////////////////////////////////////////////////////////////////////
257 
258 template< typename TSource, typename TDest, typename TScoring>
259 inline typename Value<TScoring>::Type
260 globalChaining(TSource & source,
261 	  TDest & dest,
262 	  TScoring const & scoring,
263 	  RangetreeChaining)
264 {
265 	typedef typename Iterator<TSource, Standard>::Type TSourceIterator;
266 	typedef typename Iterator<TDest, Standard>::Type TDestIterator;
267 
268 	SEQAN_ASSERT(length(source))
269 
270 	//transform coordinates to old style ("end is last item")
271 	unsigned int dim = dimension(source[0]);
272 	for (TSourceIterator it = begin(source, Standard()); it < end(source, Standard()); ++it)
273 	{
274 		for (unsigned int i = 0; i < dim; ++i)
275 		{
276 			_setLeftPosition(*it, i, leftPosition(*it, i) + 1);
277 		}
278 	}
279 
280 	//compute chain
281 	typename Value<TScoring>::Type ret_value = computeChain(source, dest, scoring, SemiDeferred()); //Deferred(), Complete()
282 
283 	//retransform coordinates to new style ("end is behind last item")
284 	for (TSourceIterator it = begin(source, Standard()); it < end(source, Standard()); ++it)
285 	{
286 		for (unsigned int i = 0; i < dim; ++i)
287 		{
288 			_setLeftPosition(*it, i, leftPosition(*it, i) - 1);
289 		}
290 	}
291 	for (TDestIterator it = begin(dest, Standard()) + 1; it < end(dest, Standard()); ++it)
292 	{
293 		for (unsigned int i = 0; i < dim; ++i)
294 		{
295 			_setLeftPosition(*it, i, leftPosition(*it, i) - 1);
296 		}
297 	}
298 
299 	//adjust right positions of the end fragment
300 	TDestIterator it2 = end(dest, Standard());
301 	--it2;
302 	for (unsigned int i = 0; i < dim; ++i)
303 	{
304 		_setRightPosition(*it2, i, rightPosition(*it2, i) - 1);
305 	}
306 
307 	//return score
308 	return ret_value;
309 }
310 
311 ///////////////////////////////////////////////////////////////////////////////////////////////////////
312 
313 
314 
315 
316 
317 //implementation for Default
318 template< typename TSource, typename TDest, typename TScoring>
319 inline typename Value<TScoring>::Type
320 globalChaining(TSource & source,
321 	  TDest & dest,
322 	  TScoring const & scoring,
323 	  Default)
324 {
325 	return globalChaining(source, dest, scoring, GenericChaining()); //default is GenericChaining
326 }
327 template< typename TSource, typename TDest, typename TValue>
328 inline TValue
329 globalChaining(TSource & source,
330 	  TDest & dest,
331 	  Score<TValue, Zero> const & scoring,
332 	  Default)
333 {
334 	return globalChaining(source, dest, scoring, RangetreeChaining());
335 }
336 template< typename TSource, typename TDest, typename TValue>
337 inline TValue
338 globalChaining(TSource & source,
339 	  TDest & dest,
340 	  Score<TValue, Manhattan> const & scoring,
341 	  Default)
342 {
343 	return globalChaining(source, dest, scoring, RangetreeChaining());
344 }
345 template< typename TSource, typename TDest, typename TValue>
346 inline TValue
347 globalChaining(TSource & source,
348 	  TDest & dest,
349 	  Score<TValue, ChainSoP> const & scoring,
350 	  Default)
351 {
352 	return globalChaining(source, dest, scoring, RangetreeChaining());
353 }
354 
355 
356 //chain(3) => chain(4)
357 template< typename TSource, typename TDest, typename TScoring>
358 inline typename Value<TScoring>::Type
359 globalChaining(TSource & source,
360 	  TDest & dest,
361 	  TScoring const & scoring)
362 {
363 	return globalChaining(source, dest, scoring, Default());
364 }
365 
366 
367 ///////////////////////////////////////////////////////////////////////////////////////////////////////
368 
369 }
370 
371 #endif	// SEQAN_HEADER_CHAIN_BASE_H
372