1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_REPEAT_BASE_H
36 #define SEQAN_HEADER_REPEAT_BASE_H
37 
38 #if SEQAN_ENABLE_PARALLELISM
39 #include <seqan/parallel.h>
40 #endif  // #if SEQAN_ENABLE_PARALLELISM
41 
42 namespace seqan {
43 
44 /*!
45  * @class Repeat
46  * @headerfile <seqan/index.h>
47  * @brief Store information about a repeat.
48  *
49  * @signature template <typename TPos, typename TPeriod>
50  *            struct Repeat;
51  *
52  * @tparam TPeriod Type to use for storing the repeat period. Default: 1
53  * @tparam TPos Type to use for storing positions.
54  *
55  * @see findRepeats
56  *
57  * @var TPos Repeat::endPosition;
58  * @brief The end position of the repeat of type <tt>TPos</tt>.
59  *
60  * @var TPos Repeat::beginPosition;
61  * @brief The begin position of the repeat of type <tt>TPos</tt>.
62  *
63  * @var TPeriod Repeat::period;
64  * @brief The period of the repeat of type <tt>TPeriod</tt>.
65  */
66 
67     template <typename TPos, typename TPeriod>
68     struct Repeat {
69         TPos        beginPosition;
70         TPos        endPosition;
71         TPeriod        period;
72     };
73 
74     template <typename TPos, typename TPeriod>
75     struct Value< Repeat<TPos, TPeriod> > {
76         typedef TPos Type;
77     };
78 
79     template <typename TPos, typename TPeriod>
80     struct Size< Repeat<TPos, TPeriod> > {
81         typedef TPeriod Type;
82     };
83 
84 
85     template <typename TSize>
86     struct RepeatFinderParams {
87         TSize minRepeatLen;
88         TSize maxPeriod;
89     };
90 
91     // custom TSpec for our customized wotd-Index
92     struct TRepeatFinder;
93 
94     template <typename TText>
95     struct Cargo<Index<TText, IndexWotd<TRepeatFinder> > >
96     {
97         typedef Index<TText, IndexWotd<TRepeatFinder> >    TIndex;
98         typedef typename Size<TIndex>::Type                    TSize;
99         typedef RepeatFinderParams<TSize>                    Type;
100     };
101 
102 
103     // node predicate
104     template <typename TText, typename TSpec>
105     bool nodePredicate(Iter<Index<TText, IndexWotd<TRepeatFinder> >, TSpec> &it)
106     {
107 //        return countOccurrences(it) * nodeDepth(it) >= cargo(container(it)).minRepeatLen;
108         return countOccurrences(it) * repLength(it) >= cargo(container(it)).minRepeatLen;
109     }
110 
111     // monotonic hull
112     template <typename TText, typename TSpec>
113     bool nodeHullPredicate(Iter<Index<TText, IndexWotd<TRepeatFinder> >, TSpec> &it)
114     {
115 //        return nodeDepth(it) <= cargo(container(it)).maxPeriod;
116         return repLength(it) <= cargo(container(it)).maxPeriod;
117     }
118 
119     template <typename TPos>
120     struct RepeatLess_ : public std::binary_function<TPos, TPos, bool>
121     {
122         // key less
123         inline bool operator() (TPos const &a, TPos const &b) const {
124             return posLess(a, b);
125         }
126     };
127 
128     template <typename TValue>
129     inline bool _repeatMaskValue(TValue const &)
130     {
131         // TODO(holtgrew): Maybe use unknownValue<TValue>() instead of specializing for all alphabets, especially since we have Rna5 now and might want Rna5Q later.
132         return false;
133     }
134 
135     template <>
136     inline bool _repeatMaskValue(Dna5 const &val)
137     {
138         return val == unknownValue<Dna5>(); // 'N'
139     }
140 
141     template <>
142     inline bool _repeatMaskValue(Dna5Q const &val)
143     {
144         return val == unknownValue<Dna5Q>(); // 'N'
145     }
146 
147     template <>
148     inline bool _repeatMaskValue(Iupac const &val)
149     {
150         return val == unknownValue<Iupac>(); // 'N'
151     }
152 /*
153     template <>
154     inline bool _repeatMaskValue(AminoAcid val)
155     {
156         return val == 'X';
157     }
158 */
159 /*!
160  * @fn findRepeats
161  * @headerfile <seqan/index.h>
162  * @brief Search for repeats in a text.
163  *
164  * @signature void findRepeats(repeatString, text, minRepeatLength[, maxPeriod]);
165  *
166  * @param[out] repeatString    A @link String @endlink of @link Repeat @endlink objects.
167  * @param[in]  text            The text to search repeats in.  Types: @link ContainerConcept @endlink
168  * @param[in]  minRepeatLength The minimum length each reported repeat must have.
169  * @param[in]  maxPeriod       Optionally, the maximal period that reported repeats can have. Default: 1
170  *
171  * Subsequences of undefined values/<tt>N</tt>s will always be reported.
172  *
173  * @section Examples
174  *
175  * The following demonstrates finding repeats of period 3.
176  *
177  * @include demos/dox/index/find_repeats.cpp
178  *
179  * @code{.console}
180  * # of repeats: 15
181  * i == 0, beginPosition = 3, endPosition = 7, period = 1
182  * i == 1, beginPosition = 46, endPosition = 53, period = 1
183  * i == 2, beginPosition = 101, endPosition = 105, period = 1
184  * i == 3, beginPosition = 105, endPosition = 109, period = 1
185  * i == 4, beginPosition = 164, endPosition = 169, period = 1
186  * i == 5, beginPosition = 291, endPosition = 297, period = 1
187  * i == 6, beginPosition = 319, endPosition = 327, period = 1
188  * i == 7, beginPosition = 400, endPosition = 404, period = 1
189  * i == 8, beginPosition = 442, endPosition = 446, period = 1
190  * i == 9, beginPosition = 468, endPosition = 473, period = 1
191  * i == 10, beginPosition = 476, endPosition = 480, period = 1
192  * i == 11, beginPosition = 507, endPosition = 513, period = 1
193  * i == 12, beginPosition = 561, endPosition = 566, period = 1
194  * i == 13, beginPosition = 623, endPosition = 627, period = 1
195  * i == 14, beginPosition = 655, endPosition = 659, period = 1
196  * @endcode
197  *
198  * @see AlphabetWithUnknownValueConcept#unknownValue
199  * @see Repeat
200  */
201 // TODO(holtgrew): minRepeatLength is 1-off.
202 
203     // period-1 optimization
204     template <typename TRepeatStore, typename TString, typename TRepeatSize>
205     inline void findRepeats(TRepeatStore &repString, TString const &text, TRepeatSize minRepeatLen)
206     {
207         typedef typename Value<TRepeatStore>::Type    TRepeat;
208         typedef typename Iterator<TString const>::Type    TIterator;
209         typedef typename Size<TString>::Type        TSize;
210 
211 #if SEQAN_ENABLE_PARALLELISM
212         typedef typename Value<TString>::Type        TValue;
213 
214         if (length(text) > (TSize)(omp_get_max_threads() * 2 * minRepeatLen)) {
215             // std::cerr << ">>> PARALLEL WABOOGIE!" << std::endl;
216             // std::cerr << "omp_get_max_threads() == " << omp_get_max_threads() << std::endl;
217             // Parallel case.
218 
219             // NOTE(holtgrew): The minimum text length check above makes it impossible that more than two chunks are
220             // required to form an otherwise too short repeat.
221 
222             // TODO(holtgrew): Load balancing? Probably not worth it.
223             String<TSize> splitters;
224             String<TRepeatStore> threadLocalStores;
225 
226             // Each threads finds repeats on its chunk in parallel.
227             #pragma omp parallel
228             {
229                 // We have to determine the number of available threads at this point.  We will use the number of thread
230                 // local stores to determin the number of available threads later on.
231                 #pragma omp master
232                 {
233                     // std::cerr << "omp_get_num_threads() == " << omp_get_num_threads() << std::endl;
234                     computeSplitters(splitters, length(text), omp_get_num_threads());
235                     resize(threadLocalStores, omp_get_num_threads());
236                 }  // end of #pragma omp master
237                 #pragma omp barrier
238 
239                 int const t = omp_get_thread_num();
240                 TRepeatStore & store = threadLocalStores[t];
241 
242                 TRepeat rep;
243                 rep.beginPosition = 0;
244                 rep.endPosition = 0;
245                 rep.period = 1;
246 
247                 // Flags used for force-adding repeats for the chunks that have a left/right neighbour.
248                 bool forceFirst = t > 0;
249                 bool forceLast = (t + 1) < omp_get_num_threads();
250                 // #pragma omp critical
251                 // std::cerr << "omp_get_num_threads() == " << omp_get_num_threads() << std::endl;
252 
253                 TIterator it = iter(text, splitters[t], Standard());
254                 TIterator itEnd = iter(text, splitters[t + 1], Standard());
255                 if (it != itEnd)
256                 {
257                     TValue last = *it;
258                     TSize repLeft = 0;
259                     TSize repRight = 1;
260 
261                     for (++it; it != itEnd; ++it, ++repRight)
262                     {
263                         if (*it != last)
264                         {
265                             // #pragma omp critical
266                             // std::cerr << "t == " << t << ", last == " << last << ", repRight = " << repRight << ", repLeft == " << repLeft << ", minRepeatLen = " << minRepeatLen << ", forceFirst = " << forceFirst << std::endl;
267                             if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen || forceFirst)
268                             {
269                                 forceFirst = false;
270                                 // insert repeat
271                                 rep.beginPosition = splitters[t] + repLeft;
272                                 rep.endPosition = splitters[t] + repRight;
273                                 // #pragma omp critical
274                                 // std::cerr << "  t == " << t << ", append" << std::endl;
275                                 appendValue(store, rep);
276                             }
277                             repLeft = repRight;
278                             last = *it;
279                         }
280                     }
281                     // #pragma omp critical
282                     // std::cerr << "t == " << t << ", last == " << last << ", repRight = " << repRight << ", repLeft == " << repLeft << ", minRepeatLen = " << minRepeatLen << ", forceLast = " << forceLast << std::endl;
283                     if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen || forceLast)
284                     {
285                         // Insert repeat but only if it is not already in there.
286                         if (empty(store) || (back(store).beginPosition != repLeft && back(store).endPosition != repRight))
287                         {
288                             rep.beginPosition = splitters[t] + repLeft;
289                             rep.endPosition = splitters[t] + repRight;
290                             // #pragma omp critical
291                             // std::cerr << "  t == " << t << ", append" << std::endl;
292                             appendValue(store, rep);
293                         }
294                     }
295                 }
296             }  // end of #pragma omp parallel
297 
298             // std::cerr << ",-- REPEATS BEFORE MENDING\n";
299             // for (unsigned i = 0; i < length(threadLocalStores); ++i)
300             // {
301             //     std::cerr << "| i = " << i << std::endl;
302             //     for (unsigned j = 0; j < length(threadLocalStores[i]); ++j)
303             //         std::cerr << "| threadLocalStores[" << i << "][" << j << "] == {" << threadLocalStores[i][j].beginPosition << ", " << threadLocalStores[i][j].endPosition << "}" << std::endl;
304             // }
305             // std::cerr << "`--" << std::endl;
306 
307             // Mend the splice points.
308             //
309             // We will copy out infixes described by fromPositions.
310             String<Pair<TSize> > fromPositions;
311             resize(fromPositions, length(threadLocalStores));
312             for (unsigned i = 0; i < length(fromPositions); ++i)
313             {
314                 fromPositions[i].i1 = 0;
315                 fromPositions[i].i2 = length(threadLocalStores[i]);
316             }
317             // First, merge repeats spanning blocks.  Do this iteratively until all has been merged.
318             bool anyChange;
319             do
320             {
321                 anyChange = false;
322                 int lastNonEmpty = -1;
323                 for (unsigned i = 0; i < length(threadLocalStores); ++i)
324                 {
325                     if (fromPositions[i].i1 == fromPositions[i].i2)
326                         continue;  // Skip empty buckets.
327 
328                     if (lastNonEmpty != -1)
329                     {
330                         bool const adjacent = back(threadLocalStores[lastNonEmpty]).endPosition == front(threadLocalStores[i]).beginPosition;
331                         bool const charsEqual = text[back(threadLocalStores[lastNonEmpty]).beginPosition] == text[front(threadLocalStores[i]).beginPosition];
332                         if (adjacent && charsEqual)
333                         {
334                             anyChange = true;
335                             back(threadLocalStores[lastNonEmpty]).endPosition = front(threadLocalStores[i]).endPosition;
336                             fromPositions[i].i1 += 1;
337                         }
338                     }
339 
340                     if (fromPositions[i].i1 != fromPositions[i].i2)
341                         lastNonEmpty = i;
342                 }
343             }
344             while (anyChange);
345             // Then, remove any repeats in the beginning and end of blocks that are too short.
346             for (unsigned i = 0; i < length(threadLocalStores); ++i)
347             {
348                 if (fromPositions[i].i1 == fromPositions[i].i2)
349                     continue;
350                 unsigned j = fromPositions[i].i1;
351                 TRepeatSize len = threadLocalStores[i][j].endPosition - threadLocalStores[i][j].beginPosition;
352                 if (!_repeatMaskValue(text[threadLocalStores[i][j].beginPosition]) &&  // Never remove mask value.
353                     len <= minRepeatLen)
354                     fromPositions[i].i1 += 1;
355                 if (fromPositions[i].i1 == fromPositions[i].i2)
356                     continue;
357                 j = fromPositions[i].i2 - 1;
358                 len = threadLocalStores[i][j].endPosition - threadLocalStores[i][j].beginPosition;
359                 if (!_repeatMaskValue(text[threadLocalStores[i][j].beginPosition]) &&  // Never remove mask value.
360                     len <= minRepeatLen)
361                     fromPositions[i].i2 -= 1;
362             }
363             // Last, build splitters for output in parallel.
364             String<unsigned> outSplitters;
365             appendValue(outSplitters, 0);
366             for (unsigned i = 0; i < length(threadLocalStores); ++i)
367                 appendValue(outSplitters, back(outSplitters) + fromPositions[i].i2 - fromPositions[i].i1);
368 
369             // std::cerr << ",-- REPEATS AFTER MENDING\n";
370             // for (unsigned i = 0; i < length(threadLocalStores); ++i)
371             // {
372             //     std::cerr << "| i = " << i << std::endl;
373             //     std::cerr << "`--, fromPositions[" << i << "] = (" << fromPositions[i].i1 << ", " << fromPositions[i].i2 << std::endl;
374             //     for (unsigned j = 0; j < length(threadLocalStores[i]); ++j)
375             //         std::cerr << "   | threadLocalStores[" << i << "][" << j << "] == {" << threadLocalStores[i][j].beginPosition << ", " << threadLocalStores[i][j].endPosition << "}" << std::endl;
376             // }
377             // std::cerr << "    `--" << std::endl;
378 
379             // Allocate memory.
380             clear(repString);
381             resize(repString, back(outSplitters));
382 
383             // Copy back the repeats in parallel.
384             unsigned nt = length(threadLocalStores);
385             (void) nt;  // Otherwise, GCC 4.6 warns, does not see it used in pragma clause below.
386             #pragma omp parallel num_threads(nt)
387             {
388                 int const t = omp_get_thread_num();
389                 arrayCopy(iter(threadLocalStores[t], fromPositions[t].i1, Standard()),
390                           iter(threadLocalStores[t], fromPositions[t].i2, Standard()),
391                           iter(repString, outSplitters[t], Standard()));
392             }  // end of #pragma omp parallel
393         } else {
394 #endif  // #if SEQAN_ENABLE_PARALLELISM
395             // Sequential case.
396             TRepeat rep;
397             rep.period = 1;
398             clear(repString);
399 
400             TIterator it = begin(text, Standard());
401             TIterator itEnd = end(text, Standard());
402             if (it == itEnd) return;
403 
404             TSize repLen = 1;
405             for (++it; it != itEnd; ++it)
406             {
407                 if (*it != *(it-1))
408                 {
409                     if (_repeatMaskValue(*(it-1)) || repLen > (TSize)minRepeatLen)
410                     {
411                         // insert repeat
412                         rep.endPosition = it - begin(text, Standard());
413                         rep.beginPosition = rep.endPosition - repLen;
414                         //                    std::cerr<<"left:"<<rep.beginPosition<<"  right:"<<rep.endPosition<<"  length:"<<posSub(rep.endPosition,rep.beginPosition)<<"  period:"<<rep.period<<std::endl;
415                         appendValue(repString, rep);
416                     }
417                     repLen = 1;
418                 } else
419                     ++repLen;
420             }
421             if (_repeatMaskValue(*(it-1)) || repLen > (TSize)minRepeatLen)
422             {
423                 // insert repeat
424                 rep.endPosition = length(text);
425                 rep.beginPosition = rep.endPosition - repLen;
426                 //            std::cerr<<"left:"<<rep.beginPosition<<"  right:"<<rep.endPosition<<"  length:"<<posSub(rep.endPosition,rep.beginPosition)<<"  period:"<<rep.period<<std::endl;
427                 appendValue(repString, rep);
428             }
429 #if SEQAN_ENABLE_PARALLELISM
430         }
431 #endif  // #if SEQAN_ENABLE_PARALLELISM
432         // #pragma omp critical
433         // {
434         //     std::cerr << "thread #" << omp_get_thread_num() << " REPEATS:";
435         //     for (unsigned i = 0; i < length(repString); ++i) {
436         //         std::cerr << " (" << repString[i].beginPosition << ", " << repString[i].endPosition << ", " << repString[i].period << ")";
437         //     }
438         //     std::cerr << std::endl;
439         // }
440     }
441 
442     // TODO(holtgrew): Why for TString const and StringSet<> const?
443     template <typename TRepeatStore, typename TString, typename TSpec, typename TRepeatSize>
444     inline void findRepeats(TRepeatStore &repString, StringSet<TString, TSpec> const &text, TRepeatSize minRepeatLen)
445     {
446         typedef typename Value<TRepeatStore>::Type    TRepeat;
447         typedef typename Iterator<TString>::Type    TIterator;
448         typedef typename Value<TString>::Type        TValue;
449         typedef typename Size<TString>::Type        TSize;
450 
451         TRepeat rep;
452         rep.period = 1;
453         clear(repString);
454 
455         for (unsigned i = 0; i < length(text); ++i)
456         {
457             TIterator it = begin(text[i], Standard());
458             TIterator itEnd = end(text[i], Standard());
459             if (it == itEnd) continue;
460 
461             TValue last = *it;
462             TSize repLeft = 0;
463             TSize repRight = 1;
464             rep.beginPosition.i1 = i;
465             rep.endPosition.i1 = i;
466 
467             for (++it; it != itEnd; ++it, ++repRight)
468             {
469                 if (last != *it)
470                 {
471                     if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen)
472                     {
473                         // insert repeat
474                         rep.beginPosition.i2 = repLeft;
475                         rep.endPosition.i2 = repRight;
476 //                        std::cerr<<"left:"<<rep.beginPosition<<"  right:"<<rep.endPosition<<"  length:"<<posSub(rep.endPosition,rep.beginPosition)<<"  period:"<<rep.period<<std::endl;
477                         appendValue(repString, rep);
478                     }
479                     repLeft = repRight;
480                     last = *it;
481                 }
482             }
483             if (_repeatMaskValue(last) || (TRepeatSize)(repRight - repLeft) > minRepeatLen)
484             {
485                 // insert repeat
486                 rep.beginPosition.i2 = repLeft;
487                 rep.endPosition.i2 = repRight;
488 //                std::cerr<<"left:"<<rep.beginPosition<<"  right:"<<rep.endPosition<<"  length:"<<posSub(rep.endPosition,rep.beginPosition)<<"  period:"<<rep.period<<std::endl;
489                 appendValue(repString, rep);
490             }
491         }
492     }
493 
494     // main function
495     template <typename TRepeatStore, typename TText, typename TRepeatSize, typename TPeriodSize>
496     void findRepeats(TRepeatStore &repString, TText const &text, TRepeatSize minRepeatLen, TPeriodSize maxPeriod)
497     {
498         typedef Index<TText, IndexWotd<TRepeatFinder> >                    TIndex;
499         typedef typename Size<TIndex>::Type                                    TSize;
500         typedef typename Iterator<TIndex, TopDown<ParentLinks<> > >::Type    TNodeIterator;
501         typedef typename Fibre<TIndex, FibreSA>::Type const                TSA;
502         typedef typename Infix<TSA>::Type                                    TOccString;
503         typedef typename Iterator<TOccString>::Type                            TOccIterator;
504 
505         typedef typename Value<TRepeatStore>::Type                            TRepeat;
506         typedef typename Value<TOccString>::Type                            TOcc;
507 
508         typedef std::map<TOcc,TRepeat,RepeatLess_<TOcc> >                    TRepeatList;
509 
510         if (maxPeriod < 1) return;
511         if (maxPeriod == 1)
512         {
513             findRepeats(repString, text, minRepeatLen);
514             return;
515         }
516 
517         TIndex        index(text);
518         TRepeatList list;
519 
520         // set repeat finder parameters
521         cargo(index).minRepeatLen = minRepeatLen;
522         cargo(index).maxPeriod = maxPeriod;
523 
524         TNodeIterator nodeIt(index);
525         TOccIterator itA, itB, itRepBegin, itEnd;
526         TRepeat rep;
527         for (; !atEnd(nodeIt); goNext(nodeIt))
528         {
529             if (isRoot(nodeIt)) continue;
530 
531             // get occurrences
532             TOccString occ = getOccurrences(nodeIt);
533             itA = begin(occ, Standard());
534             itEnd = end(occ, Standard());
535             itRepBegin = itB = itA;
536 
537             TSize repLen = repLength(nodeIt);        // representative length
538             if ((TSize)minRepeatLen <= repLen) continue;
539 
540             TSize diff, period = 0;                    // period of current repeat
541             TSize repeatLen = 0;                    // overall length of current repeat
542             TSize minLen = minRepeatLen - repLen;    // minimum repeat length minus length of representative
543 
544             for (++itB; itB != itEnd; ++itB)
545             {
546                 diff = posSub(*itB, *itA);
547                 if (diff != period || getSeqNo(*itA) != getSeqNo(*itB))
548                 {
549                     // is the repeat long enough?
550                     if (repeatLen >= minLen)
551                         // is the repeat self overlapping or connected?
552                         if (parentRepLength(nodeIt) < period && period <= repLen)
553                         {
554                             // insert repeat
555                             rep.beginPosition = *itRepBegin;
556                             rep.endPosition = posAdd(*itA, period);
557                             rep.period = period;
558 //                            std::cerr<<"left:"<<rep.beginPosition<<"  right:"<<rep.endPosition<<"  length:"<<posSub(rep.endPosition,rep.beginPosition)<<"  period:"<<rep.period<<std::endl;
559                             list.insert(std::pair<TOcc,TRepeat>(rep.beginPosition, rep));
560                         }
561                     itRepBegin = itA;
562                     period = diff;
563                     repeatLen = 0;
564                 }
565                 repeatLen += period;
566                 itA = itB;
567             }
568 
569             // is the last repeat long enough?
570             if (repeatLen >= minLen)
571                 // is the repeat self overlapping or connected?
572                 if (parentRepLength(nodeIt) < period && period <= repLen)
573                 {
574                     // insert repeat
575                     rep.beginPosition = *itRepBegin;
576                     rep.endPosition = posAdd(*itA, period);
577                     rep.period = period;
578 //                    std::cerr<<"left:"<<rep.beginPosition<<"  right:"<<rep.endPosition<<"  length:"<<posSub(rep.endPosition,rep.beginPosition)<<"  period:"<<rep.period<<std::endl;
579                     list.insert(std::pair<TOcc,TRepeat>(rep.beginPosition, rep));
580                 }
581         }
582 
583         // copy low-complex regions to result string
584         clear(repString);
585         reserve(repString, list.size(), Exact());
586         typename TRepeatList::const_iterator lit = list.begin();
587         typename TRepeatList::const_iterator litEnd = list.end();
588         for (TSize i = 0; lit != litEnd; ++lit, ++i)
589             appendValue(repString, (*lit).second);
590     }
591 
592 }    // namespace seqan
593 
594 #endif
595