1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 33 #ifndef SEQAN_HEADER_CHAIN_BASE_H 34 #define SEQAN_HEADER_CHAIN_BASE_H 35 36 /* 37 * chain_base.h 38 * chaining 39 * 40 * Created by Hendrik Woehrle 41 * 42 * Basis declarations & definitions for chaining algorithms 43 * 44 */ 45 46 namespace seqan 47 { 48 /////////////////////////////////////////////////////////////////////////////////////////////////////////////// 49 // 50 // class forward declarations 51 // 52 //////////////////////////////////////////////////////////////////////////////////////////////////////// 53 54 // basic data structure for fragments (see seed_base.h) 55 //template< typename TBorder, typename TSpec = Default > 56 //struct Seed; 57 58 // wrapper data structure for point in the RMT 59 template< typename T, typename TSpec > 60 struct ChainPoint_; 61 62 // wrapper for points for the line sweep paradigma 63 template< typename T > 64 struct WrapperPoint_; 65 66 // structure for metainformation about fragments in chains 67 template< typename T > 68 struct MetaFragment_; 69 70 /////////////////////////////////////////////////////////////////////////////////////////////////////////////// 71 // 72 // tag classes 73 // 74 //////////////////////////////////////////////////////////////////////////////////////////////////////// 75 76 struct GenericChaining_; 77 typedef Tag<GenericChaining_> const GenericChaining; 78 79 struct RangeTreeChaining_; 80 typedef Tag<RangeTreeChaining_> const RangetreeChaining; 81 82 83 struct GZeroCost 84 {}; 85 //typedef Tag<ZeroCost_> const GZeroCost; 86 87 struct GOneCost 88 {}; 89 //typedef Tag<G_1_Cost_> const GOneCost; 90 91 struct GInftyCost 92 {}; 93 //typedef Tag<G_Inf_Cost_> const GInftyCost; 94 95 struct GSumOfPairCost 96 {}; 97 //typedef Tag<F_SoP_Cost_> const GInftyCost; 98 99 template< typename Spec > 100 struct ChainSpecType_ 101 { 102 typedef Spec Type; 103 }; 104 105 106 /////////////////////////////////////////////////////////////////////////////////////////////////////////////// 107 // 108 // special metatype declarations 109 // 110 /////////////////////////////////////////////////////////////////////////////////////////////////////// 111 // sizes 112 113 //template< typename TBorder, typename TSpec > 114 //struct Size< Seed< TBorder, TSpec > > 115 //{ 116 // typedef size_t Type; 117 //}; 118 119 template< typename T, typename TSpec > 120 struct Size< ChainPoint_< T, TSpec > > 121 { 122 typedef size_t Type; 123 }; 124 125 template< typename T > 126 struct Size< WrapperPoint_< T > > 127 { 128 typedef size_t Type; 129 }; 130 131 template< typename T > 132 struct Size< MetaFragment_< T > > 133 { 134 typedef size_t Type; 135 }; 136 137 138 ////////////////////////////////////////////////////////////////////////// 139 // key types 140 141 //template< typename TBorder, typename TSpec > 142 //struct Key< Seed< TBorder, TSpec > > 143 //{ 144 // typedef TBorder Type; 145 //}; 146 147 template< typename T, typename TSpec > 148 struct Key< ChainPoint_< T, TSpec > > 149 { 150 typedef typename Key< T >::Type Type; 151 }; 152 153 template< typename T > 154 struct Key< WrapperPoint_< T > > 155 { 156 typedef typename Key< T >::Type Type; 157 }; 158 159 template< typename T > 160 struct Key< MetaFragment_< T > > 161 { 162 typedef typename Key< T >::Type Type; 163 }; 164 165 166 167 /////////////////////////////////////////////////////////////////////////////////////////////////////////////// 168 // 169 // functions 170 // 171 /////////////////////////////////////////////////////////////////////////////////////////////////////// 172 173 174 /* 175 template< typename TSource, typename TDest, typename TScoreValue, typename TScoreType, typename TStructuring, typename TAlgorithm > 176 TScoreValue 177 computeChain( TSource & source, TDest & dest, Score< TScoreValue, TScoreType > const & score_, TAlgorithm tag, TStructuring structuring ); 178 179 template< typename TSource, typename TDest, typename TCostModel, typename TScoreValue, typename TScoreType, typename TStructuring, typename TAlgorithm, typename TSpec > 180 TScoreValue 181 _computeChain( TSource & source, TDest & dest, TCostModel type, Score< TScoreValue, TScoreType > const & score_, TAlgorithm tag, TStructuring structuring, TSpec spec ); 182 183 */ 184 template< typename TSource, typename TDest, typename TScoreValue, typename TScoreType, typename TStructuring > 185 TScoreValue 186 computeChain( TSource & source, TDest & dest, Score< TScoreValue, TScoreType > const & score_, TStructuring structuring ) 187 { 188 189 // SEQAN_CHECK2( scoreGapOpen( score_ ) == scoreGapExtend( score_ ), "Chaining only supports linear gap costs" ) 190 // SEQAN_CHECK2( scoreGapOpen( score_ ) >= 0 && scoreGapExtend( score_ ) >= 0 && scoreMismatch( score_ ) >= 0, "Scores should be positive" ) 191 switch( dimension( value( begin( source ) ) ) ) 192 { 193 case 1: SEQAN_REPORT("One dimensional chaining not supported") 194 return 0; 195 case 2: if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 ) 196 { 197 return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 1 > >() ); 198 } 199 else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 ) 200 { 201 if( scoreMismatch( score_ ) == scoreGap( score_ ) ) 202 return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 1 > >() ); 203 else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) ) 204 return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 2 > >() ); 205 } 206 SEQAN_ASSERT2( false, "Gap/mismatch model not supported" ); 207 break; 208 case 3: if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 ) 209 return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 2 > >() ); 210 else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 ) 211 { 212 if( scoreMismatch( score_ ) == scoreGap( score_ ) ) 213 return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 2 > >() ); 214 else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) ) 215 return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 3 > >() ); 216 } 217 SEQAN_ASSERT2( false, "Gap/mismatch model not supported" ); 218 break; 219 case 4: if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 ) 220 return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 3 > >() ); 221 else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 ) 222 { 223 if( scoreMismatch( score_ ) == scoreGap( score_ ) ) 224 return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 3 > >() ); 225 else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) ) 226 return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 4 > >() ); 227 } 228 SEQAN_ASSERT2( false, "Gap/mismatch model not supported" ); 229 break; 230 case 5: if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 ) 231 return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Array< 4 > >() ); 232 else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 ) 233 { 234 if( scoreMismatch( score_ ) == scoreGap( score_ ) ) 235 return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Array< 4 > >() ); 236 else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) ) 237 return _computeChain( source, dest, GSumOfPairCost(), score_, structuring,ChainSpecType_< Array< 5 > >() ); 238 } 239 SEQAN_ASSERT2( false, "Gap/mismatch model not supported" ); 240 break; 241 default:if( scoreMismatch( score_ ) == 0 && scoreGap( score_ ) == 0 ) 242 return _computeChain( source, dest, GZeroCost(), score_, structuring, ChainSpecType_< Default >() ); 243 else if( scoreMismatch( score_ ) > 0 && scoreGap( score_ ) > 0 ) 244 { 245 if( scoreMismatch( score_ ) == scoreGap( score_ ) ) 246 return _computeChain( source, dest, GOneCost(), score_, structuring, ChainSpecType_< Default >() ); 247 else //if( scoreMismatch( score_ ) > 2 * scoreGapExtend( score_ ) ) 248 return _computeChain( source, dest, GSumOfPairCost(), score_, structuring, ChainSpecType_< Default >() ); 249 } 250 SEQAN_ASSERT2( false, "Gap/mismatch model not supported" ); 251 252 } 253 return minValue< TScoreValue >(); 254 } 255 256 /////////////////////////////////////////////////////////////////////////////////////////////////////// 257 258 template< typename TSource, typename TDest, typename TScoring> 259 inline typename Value<TScoring>::Type 260 globalChaining(TSource & source, 261 TDest & dest, 262 TScoring const & scoring, 263 RangetreeChaining) 264 { 265 typedef typename Iterator<TSource, Standard>::Type TSourceIterator; 266 typedef typename Iterator<TDest, Standard>::Type TDestIterator; 267 268 SEQAN_ASSERT(length(source)) 269 270 //transform coordinates to old style ("end is last item") 271 unsigned int dim = dimension(source[0]); 272 for (TSourceIterator it = begin(source, Standard()); it < end(source, Standard()); ++it) 273 { 274 for (unsigned int i = 0; i < dim; ++i) 275 { 276 _setLeftPosition(*it, i, leftPosition(*it, i) + 1); 277 } 278 } 279 280 //compute chain 281 typename Value<TScoring>::Type ret_value = computeChain(source, dest, scoring, SemiDeferred()); //Deferred(), Complete() 282 283 //retransform coordinates to new style ("end is behind last item") 284 for (TSourceIterator it = begin(source, Standard()); it < end(source, Standard()); ++it) 285 { 286 for (unsigned int i = 0; i < dim; ++i) 287 { 288 _setLeftPosition(*it, i, leftPosition(*it, i) - 1); 289 } 290 } 291 for (TDestIterator it = begin(dest, Standard()) + 1; it < end(dest, Standard()); ++it) 292 { 293 for (unsigned int i = 0; i < dim; ++i) 294 { 295 _setLeftPosition(*it, i, leftPosition(*it, i) - 1); 296 } 297 } 298 299 //adjust right positions of the end fragment 300 TDestIterator it2 = end(dest, Standard()); 301 --it2; 302 for (unsigned int i = 0; i < dim; ++i) 303 { 304 _setRightPosition(*it2, i, rightPosition(*it2, i) - 1); 305 } 306 307 //return score 308 return ret_value; 309 } 310 311 /////////////////////////////////////////////////////////////////////////////////////////////////////// 312 313 314 315 316 317 //implementation for Default 318 template< typename TSource, typename TDest, typename TScoring> 319 inline typename Value<TScoring>::Type 320 globalChaining(TSource & source, 321 TDest & dest, 322 TScoring const & scoring, 323 Default) 324 { 325 return globalChaining(source, dest, scoring, GenericChaining()); //default is GenericChaining 326 } 327 template< typename TSource, typename TDest, typename TValue> 328 inline TValue 329 globalChaining(TSource & source, 330 TDest & dest, 331 Score<TValue, Zero> const & scoring, 332 Default) 333 { 334 return globalChaining(source, dest, scoring, RangetreeChaining()); 335 } 336 template< typename TSource, typename TDest, typename TValue> 337 inline TValue 338 globalChaining(TSource & source, 339 TDest & dest, 340 Score<TValue, Manhattan> const & scoring, 341 Default) 342 { 343 return globalChaining(source, dest, scoring, RangetreeChaining()); 344 } 345 template< typename TSource, typename TDest, typename TValue> 346 inline TValue 347 globalChaining(TSource & source, 348 TDest & dest, 349 Score<TValue, ChainSoP> const & scoring, 350 Default) 351 { 352 return globalChaining(source, dest, scoring, RangetreeChaining()); 353 } 354 355 356 //chain(3) => chain(4) 357 template< typename TSource, typename TDest, typename TScoring> 358 inline typename Value<TScoring>::Type 359 globalChaining(TSource & source, 360 TDest & dest, 361 TScoring const & scoring) 362 { 363 return globalChaining(source, dest, scoring, Default()); 364 } 365 366 367 /////////////////////////////////////////////////////////////////////////////////////////////////////// 368 369 } 370 371 #endif // SEQAN_HEADER_CHAIN_BASE_H 372