1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 // Utility code for enumerating all strings in edit or Hamming distance
35 // around a given string.
36 // ==========================================================================
37 
38 // TODO(holtgrew): It would be nice if the maximal distance would be given as a run time parameter.
39 // TODO(holtgrew): Document iterator?
40 
41 #ifndef SEQAN_INCLUDE_MISC_EDIT_ENVIRONMENT_H_
42 #define SEQAN_INCLUDE_MISC_EDIT_ENVIRONMENT_H_
43 
44 namespace seqan {
45 
46 // ==========================================================================
47 // Forwards
48 // ==========================================================================
49 
50 template <typename TDistanceSpec, unsigned DISTANCE /*= 1*/>
51 struct EditEnvironment;
52 
53 // ==========================================================================
54 // Tags, Classes, Enums
55 // ==========================================================================
56 
57 // --------------------------------------------------------------------------
58 // Class StringEnumerator
59 // --------------------------------------------------------------------------
60 
61 /*!
62  * @class StringEnumerator
63  * @headerfile <seqan/misc/edit_environment.h>
64  * @brief Class to enumerate all strings within a given edit/Hamming distance.
65  *
66  * @signature template <typename TString, typename TSpec>
67  *            class StringEnumerator<TString, TSpec>;
68  *
69  * @tparam TString Type of the string to enumerate the environment of.
70  * @tparam TSpec   Specialization tag.
71  *
72  *
73  * @fn StringEnumerator::StringEnumerator
74  * @brief Constructor
75  *
76  * @signature StringEnumerator::StringEnumerator(string[, minDist]);
77  *
78  * @param[in] string  The string to use as the center. Types: <tt>TString</tt>.
79  * @param[in] minDist The smallest distance to generate strings with.  Type: <tt>unsigned</tt>.   Default: 0
80  *
81  *
82  * @var bool StringEnumerator::trim
83  * @brief Indicate whether to ignore substitutions in first or last character of string in Levenshtein mode
84  *        (optimization for approximate search).
85  *
86  * This is useful when searching for such enumerated strings in large texts.  Patterns with substitutions in the first
87  * base would also be found.
88  *
89  * @section Examples
90  *
91  * @include demos/dox/misc/enumerate_strings.cpp
92  *
93  * @include demos/dox/misc/enumerate_strings.cpp.stdout
94  */
95 
96 /*!
97  * @class HammingStringEnumerator
98  * @extends StringEnumerator
99  * @headerfile <seqan/misc/edit_environment.h>
100  * @brief Enumerate all strings within a given edit distance of a "center string".
101  *
102  * @signature template <typename TString, unsigned DISTANCE>
103  *            class StringEnumerator<TString, EditEnvironment<HammingDistance, DISTANCE> >;
104  *
105  * @tparam TString  Type of the string to enumerate the environment of.
106  * @tparam DISTANCE The maximal distance to generate strings with.
107  *
108  * See @link StringEnumerator @endlink for examples.
109  */
110 
111 /*!
112  * @class LevenshteinStringEnumerator
113  * @extends StringEnumerator
114  * @headerfile <seqan/misc/edit_environment.h>
115  * @brief Enumerate all strings within a given edit distance of a "center string" (of edit distance &lt; 3).
116  *
117  * @signature template <typename TString, unsigned DISTANCE>
118  *            class StringEnumerator<TString, EditEnvironment<LevenshteinDistance, DISTANCE> >;
119  *
120  * @tparam TString  Type of the string to enumerate the environment of.
121  * @tparam DISTANCE The maximal distance to generate strings with.
122  *
123  * See @link StringEnumerator @endlink for examples.
124  *
125  * @note The @link StringEnumerator#length LevenshteinStringEnumerator#length @endlink function does not work for
126  *       <tt>DISTANCE &gt; 2</tt>.
127  */
128 
129 template <typename TObject, typename TSpec>
130 class StringEnumerator
131 {
132 public:
133     Holder<TObject> data_host;
134     unsigned        minDist;
135     bool            trim;
136 
StringEnumerator(TObject & _original)137     StringEnumerator(TObject & _original) : data_host(_original), minDist(0), trim(true)
138     {}
139 
StringEnumerator(TObject & _original,unsigned _minDist)140     StringEnumerator(TObject & _original, unsigned _minDist) : data_host(_original), minDist(_minDist), trim(true)
141     {}
142 
StringEnumerator(TObject const & _original)143     StringEnumerator(TObject const & _original) : data_host(_original), minDist(0), trim(true)
144     {}
145 
StringEnumerator(TObject const & _original,unsigned _minDist)146     StringEnumerator(TObject const & _original, unsigned _minDist) : data_host(_original), minDist(_minDist), trim(true)
147     {}
148 };
149 
150 // --------------------------------------------------------------------------
151 // Spec EditEnvironment Iter for Hamming Distance
152 // --------------------------------------------------------------------------
153 
154 template <typename TSize>
155 struct StringEnumeratorHammingModifier_
156 {
157     TSize       errorPos;           // position of substitution
158     unsigned    character;          // replacement character
159     unsigned    skipChar;           // skip the original character
160 
StringEnumeratorHammingModifier_StringEnumeratorHammingModifier_161     StringEnumeratorHammingModifier_() : errorPos(0), character(0), skipChar(0)
162     {}
163 };
164 
165 template <typename TObject, unsigned DISTANCE>
166 class Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard>
167 {
168 public:
169     typedef typename Value<TObject>::Type           TValue;
170     typedef typename Size<TObject>::Type            TSize;
171     typedef typename MakeSigned_<TSize>::Type       TSignedSize;
172     typedef StringEnumeratorHammingModifier_<TSignedSize> TModifier;
173 
174     TObject & orig;
175 //        typename RemoveConst_<TObject>::Type    tmp;
176     String<TValue>                          tmp;
177 
178     TModifier   mod[DISTANCE];
179     unsigned    minDist;
180     bool        trim;
181 
Iter(TObject & _original)182     Iter(TObject & _original) :
183         orig(_original),
184         minDist(0),
185         trim(true)
186     {
187         goBegin(*this);
188     }
189 
Iter(TObject & _original,unsigned _minDist,bool _trim)190     Iter(TObject & _original, unsigned _minDist, bool _trim) :
191         orig(_original),
192         minDist(_minDist),
193         trim(_trim)
194     {
195         goBegin(*this);
196     }
197 
Iter(TObject & _original,MinimalCtor)198     Iter(TObject & _original, MinimalCtor) :
199         orig(_original),
200         minDist(0),
201         trim(true) {}
202 
Iter(TObject & _original,unsigned _minDist,bool _trim,MinimalCtor)203     Iter(TObject & _original, unsigned _minDist, bool _trim, MinimalCtor) :
204         orig(_original),
205         minDist(_minDist),
206         trim(_trim) {}
207 };
208 
209 // --------------------------------------------------------------------------
210 // Spec EditEnvironment Iter for Edit Distance
211 // --------------------------------------------------------------------------
212 
213 template <typename TSize>
214 struct StringEnumeratorLevenshteinModifier_
215 {
216     enum TState {DISABLED_, SUBST_, DELETE_, INSERT_, Eof_};
217     TSize       errorPosOrig;       // position of edit operation in original string
218     TSize       errorPos;           // position of edit operation in modified string
219     TSize       errorPosEnd;        // errorPos < errorPosEnd must be fulfilled
220     unsigned    character;          // replacement character
221     unsigned    skipChar;           // skip the original character
222     TState      state;              // current state subst/insert before/delete
223 
StringEnumeratorLevenshteinModifier_StringEnumeratorLevenshteinModifier_224     StringEnumeratorLevenshteinModifier_() :
225             errorPosOrig(0), errorPos(0), errorPosEnd(0), character(0), skipChar(0), state(DISABLED_)
226     {}
227 };
228 
229 template <typename TObject, unsigned DISTANCE>
230 class Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard>
231 {
232 public:
233     typedef typename Value<TObject>::Type               TValue;
234     typedef typename Size<TObject>::Type                TSize;
235     typedef typename MakeSigned_<TSize>::Type           TSignedSize;
236     typedef StringEnumeratorLevenshteinModifier_<TSignedSize> TModifier;
237 
238     TObject & orig;
239 //        typename RemoveConst_<TObject>::Type    tmp;
240     String<TValue>                          tmp;
241 
242     TModifier   mod[DISTANCE + 1];
243     unsigned    minDist;
244     unsigned    currentDistance;            // upper bound for dist(original, *this)
245     bool        trim;
246 
Iter(TObject & _original)247     Iter(TObject & _original) :
248         orig(_original),
249         minDist(0),
250         currentDistance(0),
251         trim(true)
252     {
253         goBegin(*this);
254     }
255 
Iter(TObject & _original,unsigned _minDist,bool _trim)256     Iter(TObject & _original, unsigned _minDist, bool _trim) :
257         orig(_original),
258         minDist(_minDist),
259         currentDistance(0),
260         trim(_trim)
261     {
262         goBegin(*this);
263     }
264 
Iter(TObject & _original,MinimalCtor)265     Iter(TObject & _original, MinimalCtor) :
266         orig(_original),
267         minDist(0),
268         currentDistance(0),
269         trim(true) {}
270 
Iter(TObject & _original,unsigned _minDist,bool _trim,MinimalCtor)271     Iter(TObject & _original, unsigned _minDist, bool _trim, MinimalCtor) :
272         orig(_original),
273         minDist(_minDist),
274         currentDistance(0),
275         trim(_trim) {}
276 
_reinit(int pos,int posOrig)277     inline bool _reinit(int pos, int posOrig)
278     {
279         tmp = orig;
280         int posOrigEnd = length(tmp);
281         typename TModifier::TState lastState = TModifier::DISABLED_;
282         // i from high to low  =  modifier from left to right
283         for (int i = currentDistance - 1; i >= 0; --i)
284         {
285             TModifier & _mod = mod[i];
286             switch (_mod.state)
287             {
288             case TModifier::SUBST_:
289                 // INSERT+SUBST is SUBST+INSERT (already enumerated)
290                 // DELETE+SUBST is SUBST+DELETE (already enumerated)
291                 // eventually trim front SUBSTs
292                 if (lastState == TModifier::INSERT_ || lastState == TModifier::DELETE_ ||
293                     (trim && posOrig == 0))
294                 {
295                     ++posOrig;
296                     ++pos;
297                 }
298                 if (posOrig >= posOrigEnd)
299                     return false;
300 
301                 _mod.errorPosOrig = posOrig;
302                 _mod.errorPos = pos;
303                 _mod.skipChar = ordValue(orig[posOrig]);
304                 _mod.character = (0 == _mod.skipChar) ? 1 : 0;
305                 assignValue(tmp, pos, (TValue) _mod.character);
306                 ++pos;
307                 ++posOrig;
308                 break;
309 
310             case TModifier::DELETE_:
311                 // INSERT after DELETE is one SUBST (already enumerated)
312                 if (lastState == TModifier::INSERT_)
313                 {
314                     ++posOrig;
315                     ++pos;
316                 }
317                 if (posOrig >= posOrigEnd)
318                     return false;
319 
320                 _mod.errorPosOrig = posOrig;
321                 _mod.errorPos = pos;
322                 _mod.character = ValueSize<TValue>::VALUE - 1;
323                 _mod.skipChar = -1;
324                 ++posOrig;
325                 erase(tmp, pos);
326                 break;
327 
328             case TModifier::INSERT_:
329             default:
330                 // DELETE after INSERT is one SUBST (already enumerated)
331                 // eventually trim front SUBSTs
332                 if (lastState == TModifier::DELETE_ || (trim && posOrig == 0))
333                 {
334                     ++posOrig;
335                     ++pos;
336                 }
337                 if (posOrig > posOrigEnd)
338                     return false;
339 
340                 _mod.errorPosOrig = posOrig;
341                 _mod.errorPos = pos;
342                 _mod.character = 0;
343                 _mod.skipChar = -1;
344                 insertValue(tmp, pos, (TValue)0);
345                 ++pos;
346                 break;
347             }
348             lastState = _mod.state;
349         }
350 
351         pos = length(tmp);
352         bool cut = trim;
353         for (unsigned i = 0; i < currentDistance; ++i)
354         {
355             TModifier & _mod = mod[i];
356             if (_mod.state != TModifier::DELETE_)
357             {
358                 if (cut)
359                 {
360                     if (_mod.errorPos >= (TSignedSize)(pos - 1))
361                         return false;
362 
363                     _mod.errorPosEnd = pos - 1;
364                 }
365                 else
366                 {
367                     if (_mod.errorPos >= (TSignedSize)pos)
368                         return false;
369 
370                     _mod.errorPosEnd = pos;
371                 }
372                 --pos;
373             }
374             else
375             {
376                 cut = false;
377                 if (_mod.errorPos > (TSignedSize)pos)
378                     return false;
379 
380                 _mod.errorPosEnd = pos + 1;
381             }
382         }
383         return true;
384     }
385 
386 };
387 
388 // ==========================================================================
389 // Metafunctions
390 // ==========================================================================
391 
392 // --------------------------------------------------------------------------
393 // Metafunction Value                                        StringEnumerator
394 // --------------------------------------------------------------------------
395 
396 /*!
397  * @mfn StringEnumerator#Value
398  * @brief Return value type of the string to enumerate.
399  *
400  * @signature Value<TStringEnumerator>::Type;
401  */
402 
403 template <typename TObject, typename TSpec>
404 struct Value<StringEnumerator<TObject, TSpec> > : Value<TObject>
405 {};
406 
407 // --------------------------------------------------------------------------
408 // Metafunction Reference                                    StringEnumerator
409 // --------------------------------------------------------------------------
410 
411 /*!
412  * @mfn StringEnumerator#Reference
413  * @brief Returns reference type of the enumerated strings.
414  *
415  * @signature Reference<TStringEnumerator>::Type;
416  */
417 
418 template <typename TObject, typename TSpec>
419 struct Reference<StringEnumerator<TObject, TSpec> >
420 {
421     typedef TObject & Type;
422 };
423 
424 template <typename TObject, typename TSpec>
425 struct Reference<StringEnumerator<TObject, TSpec> const>
426 {
427     typedef TObject const & Type;
428 };
429 
430 // --------------------------------------------------------------------------
431 // Metafunction Size                                         StringEnumerator
432 // --------------------------------------------------------------------------
433 
434 /*!
435  * @mfn StringEnumerator#Size
436  * @brief Returns size type.
437  *
438  * @signature Size<TStringEnumerator>::Type;
439  */
440 
441 template <typename TObject, typename TSpec>
442 struct Size<StringEnumerator<TObject, TSpec> > : Size<TObject>
443 {};
444 
445 // --------------------------------------------------------------------------
446 // Metafunction Difference                                   StringEnumerator
447 // --------------------------------------------------------------------------
448 
449 /*!
450  * @mfn StringEnumerator#Difference
451  * @brief Returns difference type.
452  *
453  * @signature Difference<TStringEnumerator>::Type;
454  */
455 
456 template <typename TObject, typename TSpec>
457 struct Difference<StringEnumerator<TObject, TSpec> > : Difference<TObject>
458 {};
459 
460 // --------------------------------------------------------------------------
461 // Metafunction Position                                     StringEnumerator
462 // --------------------------------------------------------------------------
463 
464 /*!
465  * @mfn StringEnumerator#Position
466  * @brief Returns position type.
467  *
468  * @signature Position<TStringEnumerator>::Type;
469  */
470 
471 template <typename TObject, typename TSpec>
472 struct Position<StringEnumerator<TObject, TSpec> > : Position<TObject>
473 {};
474 
475 // --------------------------------------------------------------------------
476 // Metafunction Iterator                                     StringEnumerator
477 // --------------------------------------------------------------------------
478 
479 /*!
480  * @mfn StringEnumerator#Iterator
481  * @brief Returns iterator type.
482  *
483  * @signature Position<TStringEnumerator, TSpec>::Type;
484  */
485 
486 template <typename TObject, typename TSpec>
487 struct Iterator<StringEnumerator<TObject, TSpec>, Standard>
488 {
489     typedef Iter<StringEnumerator<TObject, TSpec>, Standard> Type;
490 };
491 
492 template <typename TObject, typename TSpec>
493 struct Iterator<StringEnumerator<TObject, TSpec> const, Standard>
494 {
495     typedef Iter<StringEnumerator<TObject, TSpec>, Standard> Type;
496 };
497 
498 // --------------------------------------------------------------------------
499 // Metafunction Host                                         StringEnumerator
500 // --------------------------------------------------------------------------
501 
502 /*!
503  * @mfn StringEnumerator#Host
504  * @brief Returns host type.
505  *
506  * @signature Host<TStringEnumerator>::Type;
507  */
508 
509 template <typename TObject, typename TSpec>
510 struct Host<StringEnumerator<TObject, TSpec> >
511 {
512     typedef TObject Type;
513 };
514 
515 template <typename TObject, typename TSpec>
516 struct Host<StringEnumerator<TObject, TSpec> const>
517 {
518     typedef TObject const Type;
519 };
520 
521 // ==========================================================================
522 // Functions
523 // ==========================================================================
524 
525 // --------------------------------------------------------------------------
526 // Function _dataHost()                                      StringEnumerator
527 // --------------------------------------------------------------------------
528 
529 template <typename TText, typename TSpec>
530 inline Holder<TText> &
531 _dataHost(StringEnumerator<TText, TSpec> & enumerator)
532 {
533     return enumerator.data_host;
534 }
535 
536 template <typename TText, typename TSpec>
537 inline Holder<TText> const &
538 _dataHost(StringEnumerator<TText, TSpec> const & enumerator)
539 {
540     return enumerator.data_host;
541 }
542 
543 // --------------------------------------------------------------------------
544 // Function begin()                                          StringEnumerator
545 // --------------------------------------------------------------------------
546 
547 /*!
548  * @fn StringEnumerator#begin
549  * @brief Return begin iterator.
550  *
551  * @signature TIter begin(stringEnum[, tag]);
552  *
553  * @param[in] stringEnum StringEnumerator to query.
554  * @param[in] tag        Iterator tag to use.
555  *
556  * @return TIter Iterator to the first string in the enumerator.
557  */
558 
559 template <typename TObject, typename TSpec>
560 inline Iter<StringEnumerator<TObject, TSpec>, Standard>
561 begin(StringEnumerator<TObject, TSpec> & enumerator, Standard)
562 {
563     return Iter<StringEnumerator<TObject, TSpec>, Standard>(host(enumerator), enumerator.minDist, enumerator.trim);
564 }
565 
566 template <typename TObject, typename TSpec>
567 inline Iter<StringEnumerator<TObject, TSpec>, Standard>
568 begin(StringEnumerator<TObject, TSpec> const & enumerator, Standard)
569 {
570     return Iter<StringEnumerator<TObject, TSpec>, Standard>(host(enumerator), enumerator.minDist, enumerator.trim);
571 }
572 
573 // --------------------------------------------------------------------------
574 // Function end()                                            StringEnumerator
575 // --------------------------------------------------------------------------
576 
577 /*!
578  * @fn StringEnumerator#end
579  * @brief Return end iterator.
580  *
581  * @signature TIter end(stringEnum[, tag]);
582  *
583  * @param[in] stringEnum StringEnumerator to query.
584  * @param[in] tag        Iterator tag to use.
585  *
586  * @return TIter End iterator for the string enumerator.
587  */
588 
589 template <typename TObject, typename TSpec>
590 inline Iter<StringEnumerator<TObject, TSpec>, Standard>
591 end(StringEnumerator<TObject, TSpec> & enumerator, Standard)
592 {
593     Iter<StringEnumerator<TObject, TSpec>, Standard> iter(host(enumerator), enumerator.minDist, enumerator.trim, MinimalCtor());
594     goEnd(iter);
595     return iter;
596 }
597 
598 template <typename TObject, typename TSpec>
599 inline Iter<StringEnumerator<TObject, TSpec>, Standard>
600 end(StringEnumerator<TObject, TSpec> const & enumerator, Standard)
601 {
602     Iter<StringEnumerator<TObject, TSpec>, Standard> iter(host(enumerator), enumerator.minDist, enumerator.trim, MinimalCtor());
603     goEnd(iter);
604     return iter;
605 }
606 
607 // --------------------------------------------------------------------------
608 // Function length()                                 Hamming StringEnumerator
609 // --------------------------------------------------------------------------
610 
611 /*!
612  * @fn StringEnumerator#length
613  * @brief Return number of strings that will be enumerated.
614  *
615  * @signature TSize length(stringEnum);
616  *
617  * @param[in] stringEnum StringEnumerator to query.
618  *
619  * @return TSize The number of elements in the enumerator  (Metafunction: @link StringEnumerator#Size @endlink).
620  */
621 
622 template <typename TObject, unsigned DISTANCE>
623 inline typename Size<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> > >::Type
624 length(StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> > const & me)
625 {
626     typedef typename Value<TObject>::Type TValue;
627     typedef typename Size<TObject>::Type  TSize;
628 
629     static const unsigned alphabetSize = ValueSize<TValue>::VALUE;
630     TSize sum = 0;
631     TSize numerator = 1;
632     TSize alpha = 1;
633     TSize divisor = 1;
634 
635     for (unsigned i = 0; i <= DISTANCE; ++i)
636     {
637         if (i >= me.minDist)
638             sum += alpha * (numerator / divisor);
639 
640         divisor     *= i + 1;
641         numerator   *= length(host(me)) - i;
642         alpha       *= alphabetSize - 1;
643     }
644 
645     return sum;
646 }
647 
648 // --------------------------------------------------------------------------
649 // Function operator*()                                 StringEnumerator Iter
650 // --------------------------------------------------------------------------
651 
652 template <typename TObject, typename TSpec>
653 inline TObject const &
654 operator*(Iter<StringEnumerator<TObject, TSpec>, Standard> & it)
655 {
656     return it.tmp;
657 }
658 
659 template <typename TObject, typename TSpec>
660 inline TObject const &
661 operator*(Iter<StringEnumerator<TObject, TSpec>, Standard> const & it)
662 {
663     return it.tmp;
664 }
665 
666 // --------------------------------------------------------------------------
667 // Function goBegin()                           Hamming StringEnumerator Iter
668 // --------------------------------------------------------------------------
669 
670 template <typename TObject, unsigned DISTANCE>
671 inline void
672 goBegin(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> & it)
673 {
674     typedef typename Value<TObject>::Type                   TValue;
675     typedef typename Size<TObject>::Type                    TSize;
676     typedef typename MakeSigned_<TSize>::Type               TSignedSize;
677     typedef StringEnumeratorHammingModifier_<TSignedSize>   TModifier;
678 
679     if (empty(it.orig) || it.minDist > DISTANCE || it.minDist > length(it.orig))
680     {
681         goEnd(it);
682         return;
683     }
684 
685     it.tmp = it.orig;
686 
687     unsigned i = 0;
688     unsigned mDist = it.minDist;
689 
690     if (mDist > length(it.orig))
691         mDist = length(it.orig);
692 
693     if (mDist == 0)
694     {
695         it.mod[0].errorPos = 0;
696         it.mod[0].skipChar = -1;
697         it.mod[0].character = 0;
698         assignValue(it.tmp, 0, (TValue) 0);
699         i = 1;
700     }
701     else
702         for (; i < mDist; ++i)
703         {
704             TModifier & mod = it.mod[i];
705             mod.errorPos = (mDist - 1) - i;
706             mod.skipChar = ordValue(it.orig[mod.errorPos]);
707             mod.character = (0 == mod.skipChar) ? 1 : 0;
708             assignValue(it.tmp, mod.errorPos, (TValue) mod.character);
709         }
710     for (; i < DISTANCE; ++i)
711     {
712         TModifier & mod = it.mod[i];
713         mod.errorPos = -1;
714         mod.character = 0;
715         mod.skipChar = -1;
716     }
717 }
718 
719 // --------------------------------------------------------------------------
720 // Function atBegin()                           Hamming StringEnumerator Iter
721 // --------------------------------------------------------------------------
722 
723 template <typename TObject, unsigned DISTANCE>
724 inline void
725 atBegin(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const & it)
726 {
727     Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> tmp(it.orig, it.minDist, it.trim);
728     return tmp == it;
729 }
730 
731 template <typename TObject, unsigned DISTANCE>
732 inline bool
733 atBegin(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> & it)
734 {
735     return atBegin(const_cast<Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const &>(it));
736 }
737 
738 // --------------------------------------------------------------------------
739 // Function goEnd()                             StringEnumerator Iter Hamming
740 // --------------------------------------------------------------------------
741 
742 template <typename TObject, unsigned DISTANCE>
743 inline void
744 goEnd(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> & it)
745 {
746     typedef typename Size<TObject>::Type                    TSize;
747     typedef typename MakeSigned_<TSize>::Type               TSignedSize;
748     typedef StringEnumeratorHammingModifier_<TSignedSize>   TModifier;
749 
750     for (unsigned i = 0; i < DISTANCE; ++i)
751     {
752         TModifier & mod = it.mod[i];
753         mod.errorPos = -1;
754         mod.character = 0;
755     }
756 }
757 
758 // --------------------------------------------------------------------------
759 // Function atEnd()                             Hamming StringEnumerator Iter
760 // --------------------------------------------------------------------------
761 
762 template <typename TObject, unsigned DISTANCE>
763 inline bool
764 atEnd(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const & it)
765 {
766     typedef typename Size<TObject>::Type                    TSize;
767     typedef typename MakeSigned_<TSize>::Type               TSignedSize;
768     typedef StringEnumeratorHammingModifier_<TSignedSize>   TModifier;
769 
770     for (unsigned i = 0; i < DISTANCE; ++i)
771     {
772         TModifier const & mod = it.mod[i];
773         if (mod.errorPos != (TSignedSize) - 1 || mod.character != 0u)
774             return false;
775     }
776     return true;
777 }
778 
779 template <typename TObject, unsigned DISTANCE>
780 inline bool
781 atEnd(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> & it)
782 {
783     return atEnd(const_cast<Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const &>(it));
784 }
785 
786 // --------------------------------------------------------------------------
787 // Function operator++()                        Hamming StringEnumerator Iter
788 // --------------------------------------------------------------------------
789 
790 template <typename TObject, unsigned DISTANCE>
791 inline Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> &
792 operator++(Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> & it)
793 {
794     typedef typename Value<TObject>::Type                   TValue;
795     typedef typename Size<TObject>::Type                    TSize;
796     typedef typename MakeSigned_<TSize>::Type               TSignedSize;
797     typedef StringEnumeratorHammingModifier_<TSignedSize>   TModifier;
798 
799     for (unsigned i = 0; true; )
800     {
801         TModifier * mod = &it.mod[i];
802 
803         // next replacement value
804         if (++mod->character < ValueSize<TValue>::VALUE)
805         {
806             // output the original tuple only once
807             if (mod->character == mod->skipChar)
808                 continue;
809             assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
810             break;
811         }
812         mod->character = (0 == mod->skipChar) ? 1 : 0;
813         assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
814 
815         if (++i == DISTANCE || (mod + 1)->errorPos == (TSignedSize) - 1)
816         {
817             for (i = 0; i < DISTANCE; ++i)
818             {
819                 mod = &it.mod[i];
820 
821                 // restore char at old position
822                 if (mod->errorPos >= 0)
823                 {
824                     // std::cout << "org" << it.orig << "  tmp" << it.tmp << " ___  ";
825                     assignValue(it.tmp, mod->errorPos, it.orig[mod->errorPos]);
826                     // std::cout << "org" << it.orig << "  tmp" << it.tmp << std::endl;
827                 }
828 
829                 // next error position
830                 if (++(mod->errorPos) < (TSignedSize)(length(it.tmp) - i))
831                 {
832                     mod->skipChar = ordValue(it.orig[mod->errorPos]);
833                     mod->character = (0 == mod->skipChar) ? 1 : 0;
834                     assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
835 
836                     for (; i > 0; )
837                     {
838                         it.mod[i - 1].errorPos = mod->errorPos + 1;
839                         mod = &it.mod[--i];
840                         mod->skipChar = ordValue(it.orig[mod->errorPos]);
841                         mod->character = (0 == mod->skipChar) ? 1 : 0;
842                         assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
843                     }
844                     return it;
845                 }
846             }
847             // end
848             for (i = 0; i < DISTANCE; ++i)
849             {
850                 mod = &it.mod[i];
851                 mod->errorPos = -1;
852                 mod->character = 0;
853             }
854             return it;
855         }
856     }
857     return it;
858 }
859 
860 // --------------------------------------------------------------------------
861 // Function goBegin()                       Levensthein StringEnumerator Iter
862 // --------------------------------------------------------------------------
863 
864 template <typename TObject, unsigned DISTANCE>
865 inline void
866 goBegin(Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> & it)
867 {
868     typedef typename Value<TObject>::Type                       TValue;
869     typedef typename Size<TObject>::Type                        TSize;
870     typedef typename MakeSigned_<TSize>::Type                   TSignedSize;
871     typedef StringEnumeratorLevenshteinModifier_<TSignedSize>   TModifier;
872 
873     if (empty(it.orig) || it.minDist > DISTANCE || it.minDist > length(it.orig))
874     {
875         goEnd(it);
876         return;
877     }
878 
879     it.tmp = it.orig;
880     for (unsigned i = 0; i <= DISTANCE; ++i)
881     {
882         it.mod[i].errorPosOrig = -1;
883         it.mod[i].errorPos = -1;
884         it.mod[i].errorPosEnd = -1;
885         it.mod[i].character = ValueSize<TValue>::VALUE - 1;
886         it.mod[i].state = TModifier::DISABLED_;
887     }
888     it.currentDistance = it.minDist;
889     if (!it._reinit(0, 0))
890         goEnd(it);
891 }
892 
893 // --------------------------------------------------------------------------
894 // Function goEnd()                         Levensthein StringEnumerator Iter
895 // --------------------------------------------------------------------------
896 
897 template <typename TObject, unsigned DISTANCE>
898 inline void
899 goEnd(Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> & it)
900 {
901     typedef typename Size<TObject>::Type                        TSize;
902     typedef typename MakeSigned_<TSize>::Type                   TSignedSize;
903     typedef StringEnumeratorLevenshteinModifier_<TSignedSize>   TModifier;
904 
905     for (unsigned i = 0; i < DISTANCE; ++i)
906     {
907         TModifier & mod = it.mod[i];
908         mod.errorPos = -1;
909         mod.character = 0;
910         mod.state = TModifier::SUBST_;
911     }
912 }
913 
914 // --------------------------------------------------------------------------
915 // Function atEnd()                                Edit StringEnumerator Iter
916 // --------------------------------------------------------------------------
917 
918 template <typename TObject, unsigned DISTANCE>
919 inline bool
920 atEnd(Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> const & it)
921 {
922     typedef typename Size<TObject>::Type                        TSize;
923     typedef typename MakeSigned_<TSize>::Type                   TSignedSize;
924     typedef StringEnumeratorLevenshteinModifier_<TSignedSize>   TModifier;
925 
926     for (unsigned i = 0; i < DISTANCE; ++i)
927     {
928         TModifier const & mod = it.mod[i];
929         if (mod.errorPos != -1 || mod.character != 0u || mod.state != TModifier::SUBST_)
930             return false;
931     }
932     return true;
933 }
934 
935 template <typename TObject, unsigned DISTANCE>
936 inline bool
937 atEnd(Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> & it)
938 {
939     return atEnd(const_cast<Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> const &>(it));
940 }
941 
942 // --------------------------------------------------------------------------
943 // Function operator++()                    Levensthein StringEnumerator Iter
944 // --------------------------------------------------------------------------
945 
946 template <typename TObject, unsigned DISTANCE>
947 inline Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> &
948 operator++(Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> & it)
949 {
950     typedef typename Value<TObject>::Type                       TValue;
951     typedef typename Size<TObject>::Type                        TSize;
952     typedef typename MakeSigned_<TSize>::Type                   TSignedSize;
953     typedef StringEnumeratorLevenshteinModifier_<TSignedSize>   TModifier;
954     typedef typename TModifier::TState                          TState;
955 
956     // increment characters
957     TModifier * mod = it.mod;
958     do
959     {
960         // next replacement/insert value (loop core)
961         if (++(mod->character) < ValueSize<TValue>::VALUE)
962         {
963             // output the original tuple only once
964             if (mod->character == mod->skipChar)
965                 continue;
966             if (mod->errorPos >= 0)
967                 assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
968             return it;
969         }
970 
971         // reset counter
972         if (mod->state != mod->DELETE_)
973         {
974             mod->character = (0 == mod->skipChar) ? 1 : 0;
975             if (mod->errorPos >= 0)
976                 assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
977         }
978 
979         // next modifier
980         ++mod;
981     }
982     while (mod->state != TModifier::DISABLED_);
983 
984     // increment positions
985     mod = it.mod;
986     do
987     {
988         // restore char at old position
989         if (mod->errorPos >= 0 && static_cast<unsigned>(mod->errorPos) < length(it.tmp))
990             assignValue(it.tmp, mod->errorPos, it.orig[mod->errorPosOrig]);
991 
992 //                    int iMax = (TSignedSize)(length(it.tmp) - i);
993 //                    if (mod->state == mod->INSERT_) ++iMax;
994 
995         // next error position
996         if (++(mod->errorPos) < mod->errorPosEnd)
997         {
998             ++(mod->errorPosOrig);
999 
1000             // set next char
1001             if (mod == it.mod) // for the first modifier we use an optimization
1002             {
1003                 if (mod->state != mod->DELETE_)
1004                 {
1005                     if (mod->state == mod->SUBST_)
1006                         mod->skipChar = ordValue(it.orig[mod->errorPosOrig]);
1007                     else
1008                         mod->skipChar = -1;
1009                     mod->character = (0 == mod->skipChar) ? 1 : 0;
1010                     assignValue(it.tmp, mod->errorPos, (TValue) mod->character);
1011                 }
1012             }
1013             else if (!it._reinit(mod->errorPos, mod->errorPosOrig))
1014                 break;
1015 
1016             return it;
1017         }
1018         ++mod;
1019     }
1020     while (mod->state != TModifier::DISABLED_);
1021 
1022     // increment states
1023     mod = it.mod;
1024     TModifier * modEnd = mod + DISTANCE;
1025     do
1026     {
1027         // next edit state (subst->delete->insert)
1028         if (mod->state != mod->INSERT_)
1029         {
1030             mod->state = (TState)(mod->state + 1);
1031             if (mod->state == TModifier::SUBST_)
1032                 ++it.currentDistance;
1033 
1034             if (!it._reinit(0, 0))
1035             {
1036                 mod = it.mod;
1037                 continue;
1038             }
1039             return it;
1040         }
1041         else
1042             mod->state = mod->SUBST_;
1043         ++mod;
1044     }
1045     while (mod != modEnd);
1046 
1047     // end
1048     goEnd(it);
1049     return it;
1050 }
1051 
1052 // --------------------------------------------------------------------------
1053 // Function length()                        Levensthein StringEnumerator Iter
1054 // --------------------------------------------------------------------------
1055 
1056 template <typename TObject, unsigned DISTANCE>
1057 inline typename Size<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> > >::Type
1058 length(StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> > const & me)
1059 {
1060     typedef typename Value<TObject>::Type TValue;
1061     typedef typename Size<TObject>::Type TSize;
1062 
1063     static const unsigned alpha = ValueSize<TValue>::VALUE;
1064     TSize sum = 0;
1065     // TSize numerator = 1;
1066     // TSize divisor = 1;
1067     TSize len = length(host(me));
1068 
1069     if (me.minDist == 0 && len > 0)
1070         ++sum;
1071 
1072     if (me.minDist <= 1 && DISTANCE >= 1)
1073     {
1074         if (me.trim)
1075         {
1076             if (len > 2)
1077                 sum += (alpha - 1) * (len - 2);                 // substitutions
1078             sum += len;                                         // deletions
1079             if (len > 1)
1080                 sum += alpha * (len - 1);                       // inserts
1081         }
1082         else
1083         {
1084             sum += (alpha - 1) * len;                           // substitutions
1085             sum += len;                                         // deletions
1086             sum += alpha * (len + 1);                           // inserts
1087         }
1088     }
1089 
1090     if (me.minDist <= 2 && DISTANCE >= 2)
1091     {
1092         if (me.trim)
1093         {
1094             sum += (alpha  - 1) * (alpha - 1) *  len      * (len - 1) / 2;      // subst^2
1095             sum += (alpha  - 1)               * (len - 1) * (len - 1);          // subst*del
1096             sum += (alpha  - 1) *  alpha      *  len      * (len + 1);          // subst*ins
1097             sum +=                               len      * (len - 1) / 2;      // del^2
1098             sum +=  alpha       *  alpha      * (len + 1) * (len + 1) / 2;      // ins^2
1099         }
1100         else
1101         {
1102             sum += (alpha  - 1) * (alpha - 1) *  len      * (len - 1) / 2;      // subst^2
1103             sum += (alpha  - 1)               * (len - 1) * (len - 1);          // subst*del
1104             sum += (alpha  - 1) *  alpha      *  len      * (len + 1);          // subst*ins
1105             sum +=                               len      * (len - 1) / 2;      // del^2
1106             sum +=  alpha       *  alpha      * (len + 1) * (len + 1) / 2;      // ins^2
1107         }
1108     }
1109 
1110     // TODO: length function for DISTANCE >= 3 (if anyone needs should this)
1111     if (DISTANCE > 2u)
1112         SEQAN_FAIL("length() not implemented for DISTANCE >= 3!");
1113 
1114     return sum;
1115 }
1116 
1117 // --------------------------------------------------------------------------
1118 // Function operator==()                        Hamming StringEnumerator Iter
1119 // --------------------------------------------------------------------------
1120 
1121 template <typename TObject, unsigned DISTANCE>
1122 inline bool
1123 operator==(
1124     Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const & a,
1125     Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const & b)
1126 {
1127     typedef typename Size<TObject>::Type                    TSize;
1128     typedef typename MakeSigned_<TSize>::Type               TSignedSize;
1129     typedef StringEnumeratorHammingModifier_<TSignedSize>   TModifier;
1130 
1131     if (&a.orig != &b.orig)
1132         return false;
1133 
1134     for (unsigned i = 0; i < DISTANCE; ++i)
1135     {
1136         TModifier const & modA = a.mod[i];
1137         TModifier const & modB = b.mod[i];
1138         if (modA.errorPos != modB.errorPos || modA.character != modB.character)
1139             return false;
1140     }
1141 
1142     return true;
1143 }
1144 
1145 // --------------------------------------------------------------------------
1146 // Function operator!=()                        Hamming StringEnumerator Iter
1147 // --------------------------------------------------------------------------
1148 
1149 template <typename TObject, unsigned DISTANCE>
1150 inline bool
1151 operator!=(
1152     Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const & a,
1153     Iter<StringEnumerator<TObject, EditEnvironment<HammingDistance, DISTANCE> >, Standard> const & b)
1154 {
1155     typedef typename Size<TObject>::Type                    TSize;
1156     typedef typename MakeSigned_<TSize>::Type               TSignedSize;
1157     typedef StringEnumeratorHammingModifier_<TSignedSize>   TModifier;
1158 
1159     if (&a.orig != &b.orig)
1160         return true;
1161 
1162     for (unsigned i = 0; i < DISTANCE; ++i)
1163     {
1164         TModifier const & modA = a.mod[i];
1165         TModifier const & modB = b.mod[i];
1166         if (modA.errorPos != modB.errorPos || modA.character != modB.character)
1167             return true;
1168     }
1169 
1170     return false;
1171 }
1172 
1173 // --------------------------------------------------------------------------
1174 // Function operator==()                    Levensthein StringEnumerator Iter
1175 // --------------------------------------------------------------------------
1176 
1177 template <typename TObject, unsigned DISTANCE>
1178 inline bool
1179 operator==(
1180     Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> const & a,
1181     Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> const & b)
1182 {
1183     typedef typename Size<TObject>::Type                        TSize;
1184     typedef typename MakeSigned_<TSize>::Type                   TSignedSize;
1185     typedef StringEnumeratorLevenshteinModifier_<TSignedSize>   TModifier;
1186 
1187     if (&a.orig != &b.orig)
1188         return false;
1189 
1190     for (unsigned i = 0; i < DISTANCE; ++i)
1191     {
1192         TModifier const & modA = a.mod[i];
1193         TModifier const & modB = b.mod[i];
1194         if (modA.errorPos != modB.errorPos ||
1195             modA.character != modB.character ||
1196             modA.state != modB.state)
1197             return false;
1198     }
1199 
1200     return true;
1201 }
1202 
1203 // --------------------------------------------------------------------------
1204 // Function operator!=()                    Levenshtein StringEnumerator Iter
1205 // --------------------------------------------------------------------------
1206 
1207 template <typename TObject, unsigned DISTANCE>
1208 inline bool
1209 operator!=(
1210     Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> const & a,
1211     Iter<StringEnumerator<TObject, EditEnvironment<LevenshteinDistance, DISTANCE> >, Standard> const & b)
1212 {
1213     typedef typename Size<TObject>::Type                        TSize;
1214     typedef typename MakeSigned_<TSize>::Type                   TSignedSize;
1215     typedef StringEnumeratorLevenshteinModifier_<TSignedSize>   TModifier;
1216 
1217     if (&a.orig != &b.orig)
1218         return true;
1219 
1220     for (unsigned i = 0; i < DISTANCE; ++i)
1221     {
1222         TModifier const & modA = a.mod[i];
1223         TModifier const & modB = b.mod[i];
1224         if (modA.errorPos != modB.errorPos ||
1225             modA.character != modB.character ||
1226             modA.state != modB.state)
1227             return true;
1228     }
1229 
1230     return false;
1231 }
1232 
1233 }  // namespace seqan
1234 
1235 #endif  // #ifndef SEQAN_INCLUDE_MISC_EDIT_ENVIRONMENT_H_
1236 
1237 //  LocalWords:  StringEnumerator
1238