1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Rene Rahn <rene.rahn@fu-berlin.de> 33 // ========================================================================== 34 // Implements the tracback algorithm for the banded chain alignment. 35 // First the section where both dp matrices intersect is computed to fing 36 // the glue poit between the traces of the current matrix and the next one. 37 // Afterwards the final traceback beginnging in this glue point is computed 38 // fir the current dp matrix. 39 // ========================================================================== 40 41 #ifndef INCLUDE_SEQAN_SEEDS_BANDED_CHAIN_ALIGNMENT_TRACEBACK_H_ 42 #define INCLUDE_SEQAN_SEEDS_BANDED_CHAIN_ALIGNMENT_TRACEBACK_H_ 43 44 namespace seqan { 45 46 // ============================================================================ 47 // Forwards 48 // ============================================================================ 49 50 // ============================================================================ 51 // Tags, Classes, Enums 52 // ============================================================================ 53 54 // ============================================================================ 55 // Metafunctions 56 // ============================================================================ 57 58 // ---------------------------------------------------------------------------- 59 // Metafunction PreferGapsAtEnd_ 60 // ---------------------------------------------------------------------------- 61 62 template <typename TFreeEndGaps, typename TMatrixSpec, typename TTracebackSpec> 63 struct PreferGapsAtEnd_<DPProfile_<BandedChainAlignment_<TFreeEndGaps, TMatrixSpec>, 64 AffineGaps, TTracebackSpec> > : False{}; 65 66 template <typename TFreeEndGaps, typename TTracebackSpec> 67 struct PreferGapsAtEnd_<DPProfile_<BandedChainAlignment_<TFreeEndGaps, BandedChainFinalDPMatrix>, 68 AffineGaps, TTracebackSpec> > : True{}; 69 70 // ============================================================================ 71 // Functions 72 // ============================================================================ 73 74 // ---------------------------------------------------------------------------- 75 // Function _adaptLocalTracesToGlobalGrid() 76 // ---------------------------------------------------------------------------- 77 78 template <typename TTraceSet, typename TGridPoint> 79 inline void 80 _adaptLocalTracesToGlobalGrid(TTraceSet & traceSet, TGridPoint const & gridBegin) 81 { 82 typedef typename Value<TTraceSet>::Type TTraceString; 83 typedef typename Iterator<TTraceString>::Type TTraceStringIterator; 84 85 for (unsigned i = 0; i < length(traceSet); ++i) 86 { 87 for (TTraceStringIterator it = begin(traceSet[i]); it != end(traceSet[i]); ++it) 88 { 89 value(it)._horizontalBeginPos += gridBegin.i1; 90 value(it)._verticalBeginPos += gridBegin.i2; 91 } 92 } 93 } 94 95 // ---------------------------------------------------------------------------- 96 // Function _smoothGluePoint() 97 // ---------------------------------------------------------------------------- 98 99 template <typename TTraceSegment, typename TStringSpec, typename TSize> 100 inline void 101 _smoothGluePoint(String<TTraceSegment, TStringSpec> & tracePath, TSize referenceSize) 102 { 103 typedef typename Iterator<String<TTraceSegment, TStringSpec> >::Type TTraceSegmentsIterator; 104 105 TTraceSegmentsIterator itEndOldTrace = end(tracePath) - referenceSize; 106 TTraceSegmentsIterator itBeginNewTrace = itEndOldTrace - 1; 107 if (_getTraceValue(value(itEndOldTrace)) == _getTraceValue(value(itBeginNewTrace))) 108 { 109 _setLength(value(itEndOldTrace), length(value(itEndOldTrace)) + length(value(itBeginNewTrace))); 110 erase(tracePath, position(itBeginNewTrace, tracePath)); 111 } 112 } 113 114 // ---------------------------------------------------------------------------- 115 // Function _glueTracebacks() 116 // ---------------------------------------------------------------------------- 117 118 // TODO(rmaerker): What about using a set to find matching parts more efficiently. Optimize this code here! 119 template <typename TTraceSet> 120 inline void _glueTracebacks(TTraceSet & globalTraces, TTraceSet & localTraces) 121 { 122 typedef typename Value<TTraceSet>::Type TTraceSegments; 123 typedef typename Value<TTraceSegments>::Type TTraceSegment; 124 typedef typename Size<TTraceSegments>::Type TSize; 125 126 bool isGlued = false; 127 128 if (empty(globalTraces)) 129 { 130 globalTraces = localTraces; 131 return; 132 } 133 134 TSize lengthGlobalTraces = length(globalTraces); 135 TSize oldNumOfGlobalTraces = lengthGlobalTraces; 136 String<unsigned> elementsToErase; 137 138 for (unsigned j = 0; j < lengthGlobalTraces; ++j) 139 { 140 // traceback from back to front -> first trace segment is at end of sequences 141 TTraceSegment globalTraceEndPoint = value(begin(globalTraces[j])); 142 TSize numOfCurrElements = length(globalTraces[j]); 143 TSize numOfAddedTraces = 0; 144 // check for all existing paths if traces can be glued together. 145 bool isConnected = false; 146 for (unsigned i = 0; i < length(localTraces); ++i) 147 { 148 // traceback from back to front -> last trace segment is at beginning of sequences. 149 TTraceSegment localTraceBeginPoint = value(end(localTraces[i]) - 1); 150 // trace segments match in horizontal position 151 if (_getEndHorizontal(globalTraceEndPoint) == _getBeginHorizontal(localTraceBeginPoint)) 152 { 153 // trace segments match in vertical position 154 if (_getEndVertical(globalTraceEndPoint) == _getBeginVertical(localTraceBeginPoint)) 155 { // found a glue point between local and global trace 156 TSize numOfElementsToAdd = length(localTraces[i]); 157 158 if (isConnected) 159 { 160 // create a new traceback track. 161 TTraceSegments newTraceTrack; 162 resize(newTraceTrack, numOfCurrElements + numOfElementsToAdd, Generous()); 163 arrayMoveForward(begin(localTraces[i]), end(localTraces[i]), begin(newTraceTrack)); 164 arrayCopyForward(end(globalTraces[j]) - numOfCurrElements, end(globalTraces[j]), begin(newTraceTrack) + numOfElementsToAdd); 165 appendValue(globalTraces, newTraceTrack); 166 ++numOfAddedTraces; 167 continue; 168 } 169 // resize global traces such that new elements fit into it 170 resize(globalTraces[j], numOfCurrElements + numOfElementsToAdd, Generous()); 171 // shift old values to correct position in array after new elements will be added. 172 // use backward move in order to avoid overwriting when ranges overlap 173 arrayMoveBackward(begin(globalTraces[j]), begin(globalTraces[j]) + numOfCurrElements, 174 begin(globalTraces[j]) + numOfElementsToAdd); 175 // actually move new elements to global traces at it's begin 176 arrayMoveForward(begin(localTraces[i]), end(localTraces[i]), begin(globalTraces[j])); 177 isGlued = true; 178 isConnected = true; 179 } 180 } 181 } 182 if (!isConnected) 183 { 184 appendValue(elementsToErase, j); 185 } 186 else 187 { 188 // First, smooth the actual path we are currently gluing. 189 _smoothGluePoint(globalTraces[j], numOfCurrElements); 190 // Second, smooth all paths that have been added to the global traceback. 191 for (unsigned traceId = oldNumOfGlobalTraces; traceId < oldNumOfGlobalTraces + numOfAddedTraces; ++traceId) 192 _smoothGluePoint(globalTraces[traceId], numOfCurrElements); 193 oldNumOfGlobalTraces += numOfAddedTraces; 194 } 195 } 196 197 for (unsigned i = length(elementsToErase); i > 0; --i) 198 { 199 erase(globalTraces, elementsToErase[i-1]); // erase from behind to avoid accessing an element beyond the scope 200 } 201 SEQAN_ASSERT_EQ_MSG(isGlued, true, "Fatal error while trying to connect trace backs: No glue point available!"); 202 (void)isGlued; 203 } 204 205 // ---------------------------------------------------------------------------- 206 // Function _correctDPCellForAffineGaps() 207 // ---------------------------------------------------------------------------- 208 209 template <typename TScoreValue, typename TTraceValue> 210 inline void 211 _correctDPCellForAffineGaps(DPCell_<TScoreValue, LinearGaps> const &, TTraceValue /*lastTraceValue*/) 212 { 213 //no-op 214 } 215 216 template <typename TScoreValue, typename TTraceValue> 217 inline void 218 _correctDPCellForAffineGaps(DPCell_<TScoreValue, AffineGaps> & dpCell, TTraceValue lastTraceValue) 219 { 220 typedef DPCell_<TScoreValue, AffineGaps> TDPCell; 221 if (lastTraceValue & TraceBitMap_::DIAGONAL) 222 { 223 dpCell._verticalScore = DPCellDefaultInfinity<TDPCell>::VALUE; 224 dpCell._horizontalScore = DPCellDefaultInfinity<TDPCell>::VALUE; 225 } 226 else if (lastTraceValue & TraceBitMap_::VERTICAL) 227 dpCell._horizontalScore = DPCellDefaultInfinity<TDPCell>::VALUE; 228 else 229 dpCell._verticalScore = DPCellDefaultInfinity<TDPCell>::VALUE; 230 } 231 232 // ---------------------------------------------------------------------------- 233 // Function _computeTraceback() [BandedChainAlignment] 234 // ---------------------------------------------------------------------------- 235 236 template<typename TTarget, typename TDPTraceMatrixNavigator, typename TDPCell, typename TScoutSpec, 237 typename TSequenceH, typename TSequenceV, typename TBandFlag, typename TFreeEndGaps, typename TDPMatrixLocation, 238 typename TGapCosts, typename TTracebackSpec> 239 void _computeTraceback(TTarget & target, 240 TDPTraceMatrixNavigator & matrixNavigator, 241 unsigned maxHostPosition, 242 DPScout_<TDPCell, TScoutSpec> & dpScout, 243 TSequenceH const & seqH, 244 TSequenceV const & seqV, 245 DPBandConfig<TBandFlag> const & band, 246 DPProfile_<BandedChainAlignment_<TFreeEndGaps, TDPMatrixLocation>, TGapCosts, TTracebackSpec> const & dpProfile) 247 { 248 typedef DPScout_<TDPCell, TScoutSpec> TDPScout_; 249 typedef typename TDPScout_::TScoutState TScoutState_; 250 typedef typename TScoutState_::TInitCell TInitCell; 251 typedef typename Container<TDPTraceMatrixNavigator>::Type TContainer; 252 typedef typename Size<TContainer>::Type TSize; 253 //typedef typename MakeSigned<TSize>::Type TSignedSize; 254 typedef typename Position<TContainer>::Type TPosition; 255 typedef typename MakeSigned<TPosition>::Type TSignedPosition; 256 typedef typename Size<TSequenceH>::Type TSizeSeqH; 257 typedef typename Size<TSequenceV>::Type TSizeSeqV; 258 typedef typename TraceBitMap_::TTraceValue TTraceValue; 259 260 if (IsSameType<TTracebackSpec, TracebackOff>::VALUE) 261 return; 262 263 TSizeSeqH seqHSize = length(seqH); 264 TSizeSeqV seqVSize = length(seqV); 265 266 // Determine whether or not we place gaps to the left. 267 typedef typename IsGapsLeft_<TTracebackSpec>::Type TIsGapsLeft; 268 269 // TODO(rmaerker): Define separate function for this. 270 // Set to the correct position within the trace matrix. 271 _setToPosition(matrixNavigator, maxHostPosition); 272 273 SEQAN_ASSERT_LEQ(coordinate(matrixNavigator, +DPMatrixDimension_::HORIZONTAL), seqHSize); 274 SEQAN_ASSERT_LEQ(coordinate(matrixNavigator, +DPMatrixDimension_::VERTICAL), seqVSize); 275 276 TTraceValue traceValue = value(matrixNavigator); 277 TTraceValue lastTraceValue = _retrieveInitialTraceDirection(traceValue, dpProfile); 278 279 TracebackCoordinator_<TPosition> tracebackCoordinator(coordinate(matrixNavigator, +DPMatrixDimension_::HORIZONTAL), 280 coordinate(matrixNavigator, +DPMatrixDimension_::VERTICAL), 281 dpScout._dpScoutStatePtr->_horizontalNextGridOrigin, 282 dpScout._dpScoutStatePtr->_verticalNextGridOrigin, 283 band, seqHSize, seqVSize); 284 285 // Record trailing gaps if any. 286 if (IsSameType<TDPMatrixLocation, BandedChainFinalDPMatrix>::VALUE) 287 { 288 if (tracebackCoordinator._currRow != seqVSize) 289 _recordSegment(target, seqHSize, tracebackCoordinator._currRow, seqVSize - tracebackCoordinator._currRow, 290 +TraceBitMap_::VERTICAL); 291 if (tracebackCoordinator._currColumn != seqHSize) 292 _recordSegment(target, tracebackCoordinator._currColumn, tracebackCoordinator._currRow, seqHSize - 293 tracebackCoordinator._currColumn, +TraceBitMap_::HORIZONTAL); 294 295 _computeTraceback(target, matrixNavigator, position(matrixNavigator), seqH, seqV, band, dpProfile); 296 return; 297 } 298 299 TSize fragmentLength = 0; 300 TTarget tmp; 301 while(!_hasReachedEnd(tracebackCoordinator) && traceValue != +TraceBitMap_::NONE) 302 _doTraceback(tmp, matrixNavigator, traceValue, lastTraceValue, fragmentLength, tracebackCoordinator, TGapCosts(), TIsGapsLeft()); 303 304 TSignedPosition horizontalInitPos = static_cast<TSignedPosition>(tracebackCoordinator._currColumn) - 305 static_cast<TSignedPosition>(tracebackCoordinator._endColumn); 306 TSignedPosition verticalInitPos = static_cast<TSignedPosition>(tracebackCoordinator._currRow) - 307 static_cast<TSignedPosition>(tracebackCoordinator._endRow); 308 309 bool insertResult; 310 if (verticalInitPos <= 0) 311 { 312 _correctDPCellForAffineGaps(dpScout._dpScoutStatePtr->_horizontalInitNextMatrix[horizontalInitPos], lastTraceValue); 313 insertResult = dpScout._dpScoutStatePtr->_nextInitializationCells.insert(TInitCell(horizontalInitPos, 0, 314 dpScout._dpScoutStatePtr->_horizontalInitNextMatrix[horizontalInitPos])).second; 315 } 316 else 317 { 318 _correctDPCellForAffineGaps(dpScout._dpScoutStatePtr->_verticalInitNextMatrix[verticalInitPos], lastTraceValue); 319 insertResult = dpScout._dpScoutStatePtr->_nextInitializationCells.insert(TInitCell(0, verticalInitPos, 320 dpScout._dpScoutStatePtr->_verticalInitNextMatrix[verticalInitPos])).second; 321 } 322 323 // Now before we can continue at the current position, we might want to track a vertical/horizontal gap up to this position. 324 if (insertResult) 325 { 326 if (verticalInitPos < 0) // Here we are in a vertical gap. 327 _recordSegment(target, tracebackCoordinator._currColumn, tracebackCoordinator._currRow, -verticalInitPos, 328 lastTraceValue); 329 else if (horizontalInitPos < 0) // Here we are in a horizontal gap. 330 _recordSegment(target, tracebackCoordinator._currColumn, tracebackCoordinator._currRow, -horizontalInitPos, 331 lastTraceValue); 332 _computeTraceback(target, matrixNavigator, position(matrixNavigator), seqH, seqV, band, dpProfile); 333 } 334 335 if (IsSameType<TDPMatrixLocation, BandedChainInitialDPMatrix>::VALUE) 336 { 337 TPosition currCol = coordinate(matrixNavigator, +DPMatrixDimension_::HORIZONTAL); 338 TPosition currRow = coordinate(matrixNavigator, +DPMatrixDimension_::VERTICAL); 339 340 // Correct the row position. 341 if (IsSameType<TBandFlag, BandOn>::VALUE) 342 if (upperDiagonal(band) > 0) 343 if (currCol < tracebackCoordinator._breakpoint1) 344 if (currCol < tracebackCoordinator._breakpoint2) 345 currRow -= length(container(matrixNavigator), +DPMatrixDimension_::VERTICAL) - 1 + lowerDiagonal(band) - currCol; 346 // Record leading gaps if any. 347 if (currRow != 0u) 348 _recordSegment(target, 0, 0, currRow, +TraceBitMap_::VERTICAL); 349 if (currCol != 0u) 350 _recordSegment(target, 0, 0, currCol, +TraceBitMap_::HORIZONTAL); 351 } 352 } 353 354 // ---------------------------------------------------------------------------- 355 // Function _computeTraceback() [BandedChainAlignment, global interface] 356 // ---------------------------------------------------------------------------- 357 358 template <typename TTarget, typename TDPTraceMatrixNavigator, typename TDPScout, typename TSequenceH, typename TSequenceV, 359 typename TBandFlag, typename TFreeEndGaps, typename TDPMatrixLocation, typename TGapCosts, typename TTracebackSpec> 360 void _computeTraceback(StringSet<TTarget> & targetSet, 361 TDPTraceMatrixNavigator & matrixNavigator, 362 TDPScout & dpScout, 363 TSequenceH const & seqH, 364 TSequenceV const & seqV, 365 DPBandConfig<TBandFlag> const & band, 366 DPProfile_<BandedChainAlignment_<TFreeEndGaps, TDPMatrixLocation>, TGapCosts, TTracebackSpec> const & dpProfile) 367 { 368 typedef typename TDPScout::TMaxHostPositionString TMaxHostPositions; 369 typedef typename Iterator<TMaxHostPositions, Standard>::Type TMaxHostPositionsIterator; 370 371 // We have to clear the nextInitalization cells here. 372 dpScout._dpScoutStatePtr->_nextInitializationCells.clear(); 373 TMaxHostPositions & tracebackCandidates = maxHostPositions(dpScout); 374 375 TMaxHostPositionsIterator itTraceCandidates = begin(tracebackCandidates, Standard()); 376 377 for (;itTraceCandidates != end(tracebackCandidates, Standard()); ++itTraceCandidates) 378 { 379 TTarget tmpTarget; 380 _computeTraceback(tmpTarget, matrixNavigator, *itTraceCandidates, dpScout, seqH, seqV, band, dpProfile); 381 if (!empty(tmpTarget)) 382 appendValue(targetSet, tmpTarget); // We only need to add the tmpTarget if it gives us a new alignment. 383 } 384 } 385 386 } // namespace seqan 387 388 #endif // #ifndef INCLUDE_SEQAN_SEEDS_BANDED_CHAIN_ALIGNMENT_TRACEBACK_H_ 389