1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: David Weese <david.weese@fu-berlin.de> 33 // ========================================================================== 34 35 #ifndef SEQAN_HEADER_REPEAT_BASE_H 36 #define SEQAN_HEADER_REPEAT_BASE_H 37 38 #if SEQAN_ENABLE_PARALLELISM 39 #include <seqan/parallel.h> 40 #endif // #if SEQAN_ENABLE_PARALLELISM 41 42 namespace seqan { 43 44 /*! 45 * @class Repeat 46 * @headerfile <seqan/index.h> 47 * @brief Store information about a repeat. 48 * 49 * @signature template <typename TPos, typename TPeriod> 50 * struct Repeat; 51 * 52 * @tparam TPeriod Type to use for storing the repeat period. Default: 1 53 * @tparam TPos Type to use for storing positions. 54 * 55 * @see findRepeats 56 * 57 * @var TPos Repeat::endPosition; 58 * @brief The end position of the repeat of type <tt>TPos</tt>. 59 * 60 * @var TPos Repeat::beginPosition; 61 * @brief The begin position of the repeat of type <tt>TPos</tt>. 62 * 63 * @var TPeriod Repeat::period; 64 * @brief The period of the repeat of type <tt>TPeriod</tt>. 65 */ 66 67 template <typename TPos, typename TPeriod> 68 struct Repeat { 69 TPos beginPosition; 70 TPos endPosition; 71 TPeriod period; 72 }; 73 74 template <typename TPos, typename TPeriod> 75 struct Value< Repeat<TPos, TPeriod> > { 76 typedef TPos Type; 77 }; 78 79 template <typename TPos, typename TPeriod> 80 struct Size< Repeat<TPos, TPeriod> > { 81 typedef TPeriod Type; 82 }; 83 84 85 template <typename TSize> 86 struct RepeatFinderParams { 87 TSize minRepeatLen; 88 TSize maxPeriod; 89 }; 90 91 // custom TSpec for our customized wotd-Index 92 struct TRepeatFinder; 93 94 template <typename TText> 95 struct Cargo<Index<TText, IndexWotd<TRepeatFinder> > > 96 { 97 typedef Index<TText, IndexWotd<TRepeatFinder> > TIndex; 98 typedef typename Size<TIndex>::Type TSize; 99 typedef RepeatFinderParams<TSize> Type; 100 }; 101 102 103 // node predicate 104 template <typename TText, typename TSpec> 105 bool nodePredicate(Iter<Index<TText, IndexWotd<TRepeatFinder> >, TSpec> &it) 106 { 107 // return countOccurrences(it) * nodeDepth(it) >= cargo(container(it)).minRepeatLen; 108 return countOccurrences(it) * repLength(it) >= cargo(container(it)).minRepeatLen; 109 } 110 111 // monotonic hull 112 template <typename TText, typename TSpec> 113 bool nodeHullPredicate(Iter<Index<TText, IndexWotd<TRepeatFinder> >, TSpec> &it) 114 { 115 // return nodeDepth(it) <= cargo(container(it)).maxPeriod; 116 return repLength(it) <= cargo(container(it)).maxPeriod; 117 } 118 119 template <typename TPos> 120 struct RepeatLess_ : public std::binary_function<TPos, TPos, bool> 121 { 122 // key less 123 inline bool operator() (TPos const &a, TPos const &b) const { 124 return posLess(a, b); 125 } 126 }; 127 128 template <typename TValue> 129 inline bool _repeatMaskValue(TValue const &) 130 { 131 // TODO(holtgrew): Maybe use unknownValue<TValue>() instead of specializing for all alphabets, especially since we have Rna5 now and might want Rna5Q later. 132 return false; 133 } 134 135 template <> 136 inline bool _repeatMaskValue(Dna5 const &val) 137 { 138 return val == unknownValue<Dna5>(); // 'N' 139 } 140 141 template <> 142 inline bool _repeatMaskValue(Dna5Q const &val) 143 { 144 return val == unknownValue<Dna5Q>(); // 'N' 145 } 146 147 template <> 148 inline bool _repeatMaskValue(Iupac const &val) 149 { 150 return val == unknownValue<Iupac>(); // 'N' 151 } 152 /* 153 template <> 154 inline bool _repeatMaskValue(AminoAcid val) 155 { 156 return val == 'X'; 157 } 158 */ 159 /*! 160 * @fn findRepeats 161 * @headerfile <seqan/index.h> 162 * @brief Search for repeats in a text. 163 * 164 * @signature void findRepeats(repeatString, text, minRepeatLength[, maxPeriod]); 165 * 166 * @param[out] repeatString A @link String @endlink of @link Repeat @endlink objects. 167 * @param[in] text The text to search repeats in. Types: @link ContainerConcept @endlink 168 * @param[in] minRepeatLength The minimum length each reported repeat must have. 169 * @param[in] maxPeriod Optionally, the maximal period that reported repeats can have. Default: 1 170 * 171 * Subsequences of undefined values/<tt>N</tt>s will always be reported. 172 * 173 * @section Examples 174 * 175 * The following demonstrates finding repeats of period 3. 176 * 177 * @include demos/index/find_repeats.cpp 178 * 179 * @code{.console} 180 * # of repeats: 15 181 * i == 0, beginPosition = 3, endPosition = 7, period = 1 182 * i == 1, beginPosition = 46, endPosition = 53, period = 1 183 * i == 2, beginPosition = 101, endPosition = 105, period = 1 184 * i == 3, beginPosition = 105, endPosition = 109, period = 1 185 * i == 4, beginPosition = 164, endPosition = 169, period = 1 186 * i == 5, beginPosition = 291, endPosition = 297, period = 1 187 * i == 6, beginPosition = 319, endPosition = 327, period = 1 188 * i == 7, beginPosition = 400, endPosition = 404, period = 1 189 * i == 8, beginPosition = 442, endPosition = 446, period = 1 190 * i == 9, beginPosition = 468, endPosition = 473, period = 1 191 * i == 10, beginPosition = 476, endPosition = 480, period = 1 192 * i == 11, beginPosition = 507, endPosition = 513, period = 1 193 * i == 12, beginPosition = 561, endPosition = 566, period = 1 194 * i == 13, beginPosition = 623, endPosition = 627, period = 1 195 * i == 14, beginPosition = 655, endPosition = 659, period = 1 196 * @endcode 197 * 198 * @see AlphabetWithUnknownValueConcept#unknownValue 199 * @see Repeat 200 */ 201 // TODO(holtgrew): minRepeatLength is 1-off. 202 203 // period-1 optimization 204 template <typename TRepeatStore, typename TString, typename TRepeatSize> 205 inline void findRepeats(TRepeatStore &repString, TString const &text, TRepeatSize minRepeatLen) 206 { 207 typedef typename Value<TRepeatStore>::Type TRepeat; 208 typedef typename Iterator<TString const>::Type TIterator; 209 typedef typename Size<TString>::Type TSize; 210 211 #if SEQAN_ENABLE_PARALLELISM 212 typedef typename Value<TString>::Type TValue; 213 214 if (length(text) > (TSize)(omp_get_max_threads() * 2 * minRepeatLen)) { 215 // std::cerr << ">>> PARALLEL WABOOGIE!" << std::endl; 216 // std::cerr << "omp_get_max_threads() == " << omp_get_max_threads() << std::endl; 217 // Parallel case. 218 219 // NOTE(holtgrew): The minimum text length check above makes it impossible that more than two chunks are 220 // required to form an otherwise too short repeat. 221 222 // TODO(holtgrew): Load balancing? Probably not worth it. 223 String<TSize> splitters; 224 String<TRepeatStore> threadLocalStores; 225 226 // Each threads finds repeats on its chunk in parallel. 227 #pragma omp parallel 228 { 229 // We have to determine the number of available threads at this point. We will use the number of thread 230 // local stores to determin the number of available threads later on. 231 #pragma omp master 232 { 233 // std::cerr << "omp_get_num_threads() == " << omp_get_num_threads() << std::endl; 234 computeSplitters(splitters, length(text), omp_get_num_threads()); 235 resize(threadLocalStores, omp_get_num_threads()); 236 } // end of #pragma omp master 237 #pragma omp barrier 238 239 int const t = omp_get_thread_num(); 240 TRepeatStore & store = threadLocalStores[t]; 241 242 TRepeat rep; 243 rep.beginPosition = 0; 244 rep.endPosition = 0; 245 rep.period = 1; 246 247 // Flags used for force-adding repeats for the chunks that have a left/right neighbour. 248 bool forceFirst = t > 0; 249 bool forceLast = (t + 1) < omp_get_num_threads(); 250 // #pragma omp critical 251 // std::cerr << "omp_get_num_threads() == " << omp_get_num_threads() << std::endl; 252 253 TIterator it = iter(text, splitters[t], Standard()); 254 TIterator itEnd = iter(text, splitters[t + 1], Standard()); 255 if (it != itEnd) 256 { 257 TValue last = *it; 258 TSize repLeft = 0; 259 TSize repRight = 1; 260 261 for (++it; it != itEnd; ++it, ++repRight) 262 { 263 if (*it != last) 264 { 265 // #pragma omp critical 266 // std::cerr << "t == " << t << ", last == " << last << ", repRight = " << repRight << ", repLeft == " << repLeft << ", minRepeatLen = " << minRepeatLen << ", forceFirst = " << forceFirst << std::endl; 267 if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen || forceFirst) 268 { 269 forceFirst = false; 270 // insert repeat 271 rep.beginPosition = splitters[t] + repLeft; 272 rep.endPosition = splitters[t] + repRight; 273 // #pragma omp critical 274 // std::cerr << " t == " << t << ", append" << std::endl; 275 appendValue(store, rep); 276 } 277 repLeft = repRight; 278 last = *it; 279 } 280 } 281 // #pragma omp critical 282 // std::cerr << "t == " << t << ", last == " << last << ", repRight = " << repRight << ", repLeft == " << repLeft << ", minRepeatLen = " << minRepeatLen << ", forceLast = " << forceLast << std::endl; 283 if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen || forceLast) 284 { 285 // Insert repeat but only if it is not already in there. 286 if (empty(store) || (back(store).beginPosition != repLeft && back(store).endPosition != repRight)) 287 { 288 rep.beginPosition = splitters[t] + repLeft; 289 rep.endPosition = splitters[t] + repRight; 290 // #pragma omp critical 291 // std::cerr << " t == " << t << ", append" << std::endl; 292 appendValue(store, rep); 293 } 294 } 295 } 296 } // end of #pragma omp parallel 297 298 // std::cerr << ",-- REPEATS BEFORE MENDING\n"; 299 // for (unsigned i = 0; i < length(threadLocalStores); ++i) 300 // { 301 // std::cerr << "| i = " << i << std::endl; 302 // for (unsigned j = 0; j < length(threadLocalStores[i]); ++j) 303 // std::cerr << "| threadLocalStores[" << i << "][" << j << "] == {" << threadLocalStores[i][j].beginPosition << ", " << threadLocalStores[i][j].endPosition << "}" << std::endl; 304 // } 305 // std::cerr << "`--" << std::endl; 306 307 // Mend the splice points. 308 // 309 // We will copy out infixes described by fromPositions. 310 String<Pair<TSize> > fromPositions; 311 resize(fromPositions, length(threadLocalStores)); 312 for (unsigned i = 0; i < length(fromPositions); ++i) 313 { 314 fromPositions[i].i1 = 0; 315 fromPositions[i].i2 = length(threadLocalStores[i]); 316 } 317 // First, merge repeats spanning blocks. Do this iteratively until all has been merged. 318 bool anyChange; 319 do 320 { 321 anyChange = false; 322 int lastNonEmpty = -1; 323 for (unsigned i = 0; i < length(threadLocalStores); ++i) 324 { 325 if (fromPositions[i].i1 == fromPositions[i].i2) 326 continue; // Skip empty buckets. 327 328 if (lastNonEmpty != -1) 329 { 330 bool const adjacent = back(threadLocalStores[lastNonEmpty]).endPosition == front(threadLocalStores[i]).beginPosition; 331 bool const charsEqual = text[back(threadLocalStores[lastNonEmpty]).beginPosition] == text[front(threadLocalStores[i]).beginPosition]; 332 if (adjacent && charsEqual) 333 { 334 anyChange = true; 335 back(threadLocalStores[lastNonEmpty]).endPosition = front(threadLocalStores[i]).endPosition; 336 fromPositions[i].i1 += 1; 337 } 338 } 339 340 if (fromPositions[i].i1 != fromPositions[i].i2) 341 lastNonEmpty = i; 342 } 343 } 344 while (anyChange); 345 // Then, remove any repeats in the beginning and end of blocks that are too short. 346 for (unsigned i = 0; i < length(threadLocalStores); ++i) 347 { 348 if (fromPositions[i].i1 == fromPositions[i].i2) 349 continue; 350 unsigned j = fromPositions[i].i1; 351 TRepeatSize len = threadLocalStores[i][j].endPosition - threadLocalStores[i][j].beginPosition; 352 if (!_repeatMaskValue(text[threadLocalStores[i][j].beginPosition]) && // Never remove mask value. 353 len <= minRepeatLen) 354 fromPositions[i].i1 += 1; 355 if (fromPositions[i].i1 == fromPositions[i].i2) 356 continue; 357 j = fromPositions[i].i2 - 1; 358 len = threadLocalStores[i][j].endPosition - threadLocalStores[i][j].beginPosition; 359 if (!_repeatMaskValue(text[threadLocalStores[i][j].beginPosition]) && // Never remove mask value. 360 len <= minRepeatLen) 361 fromPositions[i].i2 -= 1; 362 } 363 // Last, build splitters for output in parallel. 364 String<unsigned> outSplitters; 365 appendValue(outSplitters, 0); 366 for (unsigned i = 0; i < length(threadLocalStores); ++i) 367 appendValue(outSplitters, back(outSplitters) + fromPositions[i].i2 - fromPositions[i].i1); 368 369 // std::cerr << ",-- REPEATS AFTER MENDING\n"; 370 // for (unsigned i = 0; i < length(threadLocalStores); ++i) 371 // { 372 // std::cerr << "| i = " << i << std::endl; 373 // std::cerr << "`--, fromPositions[" << i << "] = (" << fromPositions[i].i1 << ", " << fromPositions[i].i2 << std::endl; 374 // for (unsigned j = 0; j < length(threadLocalStores[i]); ++j) 375 // std::cerr << " | threadLocalStores[" << i << "][" << j << "] == {" << threadLocalStores[i][j].beginPosition << ", " << threadLocalStores[i][j].endPosition << "}" << std::endl; 376 // } 377 // std::cerr << " `--" << std::endl; 378 379 // Allocate memory. 380 clear(repString); 381 resize(repString, back(outSplitters)); 382 383 // Copy back the repeats in parallel. 384 unsigned nt = length(threadLocalStores); 385 (void) nt; // Otherwise, GCC 4.6 warns, does not see it used in pragma clause below. 386 #pragma omp parallel num_threads(nt) 387 { 388 int const t = omp_get_thread_num(); 389 arrayCopy(iter(threadLocalStores[t], fromPositions[t].i1, Standard()), 390 iter(threadLocalStores[t], fromPositions[t].i2, Standard()), 391 iter(repString, outSplitters[t], Standard())); 392 } // end of #pragma omp parallel 393 } else { 394 #endif // #if SEQAN_ENABLE_PARALLELISM 395 // Sequential case. 396 TRepeat rep; 397 rep.period = 1; 398 clear(repString); 399 400 TIterator it = begin(text, Standard()); 401 TIterator itEnd = end(text, Standard()); 402 if (it == itEnd) return; 403 404 TSize repLen = 1; 405 for (++it; it != itEnd; ++it) 406 { 407 if (*it != *(it-1)) 408 { 409 if (_repeatMaskValue(*(it-1)) || repLen > (TSize)minRepeatLen) 410 { 411 // insert repeat 412 rep.endPosition = it - begin(text, Standard()); 413 rep.beginPosition = rep.endPosition - repLen; 414 // std::cerr<<"left:"<<rep.beginPosition<<" right:"<<rep.endPosition<<" length:"<<posSub(rep.endPosition,rep.beginPosition)<<" period:"<<rep.period<<std::endl; 415 appendValue(repString, rep); 416 } 417 repLen = 1; 418 } else 419 ++repLen; 420 } 421 if (_repeatMaskValue(*(it-1)) || repLen > (TSize)minRepeatLen) 422 { 423 // insert repeat 424 rep.endPosition = length(text); 425 rep.beginPosition = rep.endPosition - repLen; 426 // std::cerr<<"left:"<<rep.beginPosition<<" right:"<<rep.endPosition<<" length:"<<posSub(rep.endPosition,rep.beginPosition)<<" period:"<<rep.period<<std::endl; 427 appendValue(repString, rep); 428 } 429 #if SEQAN_ENABLE_PARALLELISM 430 } 431 #endif // #if SEQAN_ENABLE_PARALLELISM 432 // #pragma omp critical 433 // { 434 // std::cerr << "thread #" << omp_get_thread_num() << " REPEATS:"; 435 // for (unsigned i = 0; i < length(repString); ++i) { 436 // std::cerr << " (" << repString[i].beginPosition << ", " << repString[i].endPosition << ", " << repString[i].period << ")"; 437 // } 438 // std::cerr << std::endl; 439 // } 440 } 441 442 // TODO(holtgrew): Why for TString const and StringSet<> const? 443 template <typename TRepeatStore, typename TString, typename TSpec, typename TRepeatSize> 444 inline void findRepeats(TRepeatStore &repString, StringSet<TString, TSpec> const &text, TRepeatSize minRepeatLen) 445 { 446 typedef typename Value<TRepeatStore>::Type TRepeat; 447 typedef typename Iterator<TString>::Type TIterator; 448 typedef typename Value<TString>::Type TValue; 449 typedef typename Size<TString>::Type TSize; 450 451 TRepeat rep; 452 rep.period = 1; 453 clear(repString); 454 455 for (unsigned i = 0; i < length(text); ++i) 456 { 457 TIterator it = begin(text[i], Standard()); 458 TIterator itEnd = end(text[i], Standard()); 459 if (it == itEnd) continue; 460 461 TValue last = *it; 462 TSize repLeft = 0; 463 TSize repRight = 1; 464 rep.beginPosition.i1 = i; 465 rep.endPosition.i1 = i; 466 467 for (++it; it != itEnd; ++it, ++repRight) 468 { 469 if (last != *it) 470 { 471 if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen) 472 { 473 // insert repeat 474 rep.beginPosition.i2 = repLeft; 475 rep.endPosition.i2 = repRight; 476 // std::cerr<<"left:"<<rep.beginPosition<<" right:"<<rep.endPosition<<" length:"<<posSub(rep.endPosition,rep.beginPosition)<<" period:"<<rep.period<<std::endl; 477 appendValue(repString, rep); 478 } 479 repLeft = repRight; 480 last = *it; 481 } 482 } 483 if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen) 484 { 485 // insert repeat 486 rep.beginPosition.i2 = repLeft; 487 rep.endPosition.i2 = repRight; 488 // std::cerr<<"left:"<<rep.beginPosition<<" right:"<<rep.endPosition<<" length:"<<posSub(rep.endPosition,rep.beginPosition)<<" period:"<<rep.period<<std::endl; 489 appendValue(repString, rep); 490 } 491 } 492 } 493 494 // main function 495 template <typename TRepeatStore, typename TText, typename TRepeatSize, typename TPeriodSize> 496 void findRepeats(TRepeatStore &repString, TText const &text, TRepeatSize minRepeatLen, TPeriodSize maxPeriod) 497 { 498 typedef Index<TText, IndexWotd<TRepeatFinder> > TIndex; 499 typedef typename Size<TIndex>::Type TSize; 500 typedef typename Iterator<TIndex, TopDown<ParentLinks<> > >::Type TNodeIterator; 501 typedef typename Fibre<TIndex, FibreSA>::Type const TSA; 502 typedef typename Infix<TSA>::Type TOccString; 503 typedef typename Iterator<TOccString>::Type TOccIterator; 504 505 typedef typename Value<TRepeatStore>::Type TRepeat; 506 typedef typename Value<TOccString>::Type TOcc; 507 508 typedef std::map<TOcc,TRepeat,RepeatLess_<TOcc> > TRepeatList; 509 510 if (maxPeriod < 1) return; 511 if (maxPeriod == 1) 512 { 513 findRepeats(repString, text, minRepeatLen); 514 return; 515 } 516 517 TIndex index(text); 518 TRepeatList list; 519 520 // set repeat finder parameters 521 cargo(index).minRepeatLen = minRepeatLen; 522 cargo(index).maxPeriod = maxPeriod; 523 524 TNodeIterator nodeIt(index); 525 TOccIterator itA, itB, itRepBegin, itEnd; 526 TRepeat rep; 527 for (; !atEnd(nodeIt); goNext(nodeIt)) 528 { 529 if (isRoot(nodeIt)) continue; 530 531 // get occurrences 532 TOccString occ = getOccurrences(nodeIt); 533 itA = begin(occ, Standard()); 534 itEnd = end(occ, Standard()); 535 itRepBegin = itB = itA; 536 537 TSize repLen = repLength(nodeIt); // representative length 538 if ((TSize)minRepeatLen <= repLen) continue; 539 540 TSize diff, period = 0; // period of current repeat 541 TSize repeatLen = 0; // overall length of current repeat 542 TSize minLen = minRepeatLen - repLen; // minimum repeat length minus length of representative 543 544 for (++itB; itB != itEnd; ++itB) 545 { 546 diff = posSub(*itB, *itA); 547 if (diff != period || getSeqNo(*itA) != getSeqNo(*itB)) 548 { 549 // is the repeat long enough? 550 if (repeatLen >= minLen) 551 // is the repeat self overlapping or connected? 552 if (parentRepLength(nodeIt) < period && period <= repLen) 553 { 554 // insert repeat 555 rep.beginPosition = *itRepBegin; 556 rep.endPosition = posAdd(*itA, period); 557 rep.period = period; 558 // std::cerr<<"left:"<<rep.beginPosition<<" right:"<<rep.endPosition<<" length:"<<posSub(rep.endPosition,rep.beginPosition)<<" period:"<<rep.period<<std::endl; 559 list.insert(std::pair<TOcc,TRepeat>(rep.beginPosition, rep)); 560 } 561 itRepBegin = itA; 562 period = diff; 563 repeatLen = 0; 564 } 565 repeatLen += period; 566 itA = itB; 567 } 568 569 // is the last repeat long enough? 570 if (repeatLen >= minLen) 571 // is the repeat self overlapping or connected? 572 if (parentRepLength(nodeIt) < period && period <= repLen) 573 { 574 // insert repeat 575 rep.beginPosition = *itRepBegin; 576 rep.endPosition = posAdd(*itA, period); 577 rep.period = period; 578 // std::cerr<<"left:"<<rep.beginPosition<<" right:"<<rep.endPosition<<" length:"<<posSub(rep.endPosition,rep.beginPosition)<<" period:"<<rep.period<<std::endl; 579 list.insert(std::pair<TOcc,TRepeat>(rep.beginPosition, rep)); 580 } 581 } 582 583 // copy low-complex regions to result string 584 clear(repString); 585 reserve(repString, list.size(), Exact()); 586 typename TRepeatList::const_iterator lit = list.begin(); 587 typename TRepeatList::const_iterator litEnd = list.end(); 588 for (TSize i = 0; lit != litEnd; ++lit, ++i) 589 appendValue(repString, (*lit).second); 590 } 591 592 } // namespace seqan 593 594 #endif 595