1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_INDEX_SKEW7_H
36 #define SEQAN_HEADER_INDEX_SKEW7_H
37 
38 namespace SEQAN_NAMESPACE_MAIN
39 {
40 
41 //namespace SEQAN_NAMESPACE_PIPELINING
42 //{
43 
44     struct Skew7 {};
45 
46 
47     //////////////////////////////////////////////////////////////////////////////
48     // external Skew7 algorithm
49     //////////////////////////////////////////////////////////////////////////////
50 
51     template <typename T>
52     struct SkewDC_<7, T>
53     {
54         static const unsigned VALUE[];
55     };
56 
57     template <typename T>
58     const unsigned SkewDC_<7, T>::VALUE[] = { 3,   1, 2, 4 };
59 
60 
61     // *** COMPARATORS & MAPS ***
62 
63     template <typename TValue, typename TResult = int>
64     struct _skew7NComp :
65         public std::binary_function<TValue, TValue, TResult>
66     {
67         inline TResult operator() (const TValue &a, const TValue &b) const
68         {
69             typedef typename Value<TValue, 1>::Type                 TSize;
70             typedef typename Value<TValue, 2>::Type                 TSeptet;
71             typedef typename Value<TSeptet>::Type                   TSeptetValue;
72             typedef typename StoredTupleValue_<TSeptetValue>::Type  TStoredValue;
73 
74             const TStoredValue *sa = a.i2.i;
75             const TStoredValue *sb = b.i2.i;
76 
77             TSize n = LENGTH<TSeptet>::VALUE;
78             if (a.i1 < n) n = a.i1;
79             if (b.i1 < n) n = b.i1;
80 
81             // compare the overlap of septets (the first n bases)
82             for (TSize i = 0; i < n; i++, ++sa, ++sb)
83             {
84                 if (*sa == *sb) continue;
85                 return (*sa < *sb)? -1 : 1;
86             }
87 
88             // overlap is equal, now suffix lengths decide
89             if (n < LENGTH<TSeptet>::VALUE)
90                 return (a.i1 < b.i1)? -1 : 1;
91             else
92                 return 0;
93         }
94     };
95 
96     // optimized for bitvectors
97     template <typename T1, typename TValue, typename TResult>
98     struct _skew7NComp< Pair<T1, Tuple<TValue, 7, BitPacked<> >, Pack >, TResult > :
99         public std::binary_function<
100             Pair<T1, Tuple<TValue, 7, BitPacked<> >, Pack >,
101             Pair<T1, Tuple<TValue, 7, BitPacked<> >, Pack >,
102             TResult>
103     {
104         inline TResult operator() (
105             const Pair<T1, Tuple<TValue, 7, BitPacked<> >, Pack > &a,
106             const Pair<T1, Tuple<TValue, 7, BitPacked<> >, Pack > &b) const
107         {
108             if (a.i2 < b.i2) return -1;
109             if (a.i2 > b.i2) return 1;
110             if (a.i1 < 7 || b.i1 < 7)
111                 return (a.i1 < b.i1)? -1 : 1;
112             return 0;
113         }
114     };
115 
116     template <typename TValue, typename TResult = typename Value<TValue, 1>::Type>
117     struct _skew7NMapLinear :
118         public std::unary_function<TValue, TResult>
119     {
120         TResult BN4, BN;
121 
122         _skew7NMapLinear(TResult BN_): BN4(BN_+1), BN(BN_) {}
123 
124         inline TResult operator() (const TValue& x) const
125         {
126             TResult i = x.i1; return (i%7 == 4)? BN4-(i-(i/7)*4): BN-(i-(i/7)*4);
127         }
128     };
129 
130     template <typename TValue, typename TResult = typename Value<TValue, 1>::Type>
131     struct _skew7NMapSliced :
132         public std::unary_function<TValue, TResult>
133     {
134         TResult off[5];
135 
136         _skew7NMapSliced(TResult BN_)
137         {
138             off[0] = 0;
139             off[1] = BN_ - 1;
140             off[2] = (2*BN_)/3 - 1;
141             off[3] = 0;
142             off[4] = BN_/3 - 1;
143         }
144 
145         inline TResult operator() (const TValue& x) const
146         {
147             return off[x.i1 % 7] - x.i1/7;
148         }
149     };
150 
151 
152     template <typename TValue, typename TResult = TValue>
153     struct _skew7UnslicerFunc :
154         public std::unary_function<TValue, TResult>
155     {
156         TResult o1, o2, o4, n4, n24;
157 
158         _skew7UnslicerFunc(TResult N) :
159             o1(N - (N + 6) % 7),
160             o2(N - (N + 5) % 7),
161             o4(N - (N + 3) % 7),
162             n4((N + 3) / 7),
163             n24((N + 5) / 7 + n4) {}
164 
165         inline TResult operator() (const TValue& x) const
166         {
167             return (x < n4)  ? o4 -  x        * 7:
168                    (x < n24) ? o2 - (x - n4)  * 7:
169                                o1 - (x - n24) * 7;
170         }
171     };
172 
173     template <typename TValue, typename TResult = typename Value<typename Value<TValue, 2>::Type>::Type>
174     struct _skew7NMapExtended :
175         public std::unary_function<TValue, TResult>
176     {
177         inline TResult operator() (const TValue& x) const
178         {
179             return x.i2[0];
180         }
181     };
182 
183     template <typename TValue, unsigned EXT_LENGTH, typename TResult = int>
184     struct _skew7ExtendComp :
185         public std::binary_function<TValue, TValue, TResult>
186     {
187         inline TResult operator() (const TValue &a, const TValue &b) const
188         {
189             for (unsigned i = 0; i < EXT_LENGTH; i++)
190             {
191                 if (a.i3[i] == b.i3[i]) continue;
192                 return (a.i3[i] <  b.i3[i])? -1 : 1;
193             }
194             return (a.i2[0] < b.i2[0])? -1 : 1;
195         }
196     };
197 
198     // optimized for bitvectors
199     /*
200     template <typename T1, typename T2, typename T, const int _size, const int EXT_LENGTH, typename TResult>
201     struct _skew7ExtendComp< Triple<T1,T2,Tuple<T,_size, BitPacked<> >, Pack>, EXT_LENGTH, TResult> :
202         public std::binary_function<
203             Triple<T1,T2,Tuple<T,_size, BitPacked<> >, Pack>,
204             Triple<T1,T2,Tuple<T,_size, BitPacked<> >, Pack>,
205             TResult>
206     {
207         inline TResult operator() (
208             const Triple<T1,T2,Tuple<T,_size, BitPacked<> >, Pack> &a,
209             const Triple<T1,T2,Tuple<T,_size, BitPacked<> >, Pack> &b) const
210         {
211             if (a.i3 < b.i3) return -1;
212             if (a.i3 > b.i3) return 1;
213             return (a.i2[0] < b.i2[0])? -1 : 1;
214         }
215     };
216     */
217 
218     template < typename TInput >
219     struct Value< Pipe< TInput, Skew7 > > :
220         public Size<TInput> {};
221 
222     template <typename T>
223     struct Skew7StringSpec_:
224         public Spec<T> {};
225 
226     template <typename T, typename TStringSpec>
227     struct Skew7StringSpec_<String<T, TStringSpec> >
228     {
229         typedef TStringSpec Type;
230     };
231 
232     template <typename T, typename TSegmentSpec>
233     struct Skew7StringSpec_<Segment<T, TSegmentSpec> >:
234         public Skew7StringSpec_<T> {};
235 
236     template <typename T>
237     struct Skew7StringSpec_<T const>:
238         public Skew7StringSpec_<T> {};
239 
240     //////////////////////////////////////////////////////////////////////////////
241     // skew7 class
242     template < typename TInput >
243     struct Pipe< TInput, Skew7 >
244     {
245 
246         // *** SPECIALIZATION ***
247 
248         // use packing if lessorequal 16 different values per char
249         typedef typename IfC<
250             (BitsPerValue<TypeOf_(TInput)>::VALUE > 0) &&
251             (BitsPerValue<TypeOf_(TInput)>::VALUE <= 4),
252             BitPacked<>,
253             Pack>::Type TPack;
254 
255         // step 1
256         typedef Pipe< TInput, Sampler<7, TPack> >  TSamplerDC7;
257                                         typedef _skew7NComp<TypeOf_(TSamplerDC7)> ncomp_t;
258         typedef Pool< TypeOf_(TSamplerDC7), SorterSpec< SorterConfigSize<ncomp_t, TSizeOf_(TSamplerDC7) > > > TSortTuples;
259         typedef Pipe< TSortTuples, Namer<ncomp_t> > TNamer;
260                                         typedef _skew7NMapSliced<TypeOf_(TNamer)> nmap_sliced_t;
261                                         typedef _skew7NMapLinear<TypeOf_(TNamer)> nmap_linear_t;
262         typedef Pool< TypeOf_(TNamer), MapperSpec< MapperConfigSize< nmap_sliced_t, TSizeOf_(TNamer) > > > TNames_Sliced;
263 
264         // unique names - shortcut
265         typedef Pool< TypeOf_(TNames_Sliced), MapperSpec< MapperConfigSize< nmap_linear_t, TSizeOf_(TNames_Sliced) > > > TNames_Linear_Unique;
266 
267         // non-unique names
268         typedef Pipe< TNames_Sliced, Filter< filterI2<TypeOf_(TNames_Sliced)> > > TFilter;
269 
270             // recursion
271             typedef Pipe< TFilter, Skew3 > TRecurse;
272                                         typedef _skew7UnslicerFunc<TypeOf_(TRecurse)> unslicer_func_t;
273             typedef Pipe< TRecurse, Filter<unslicer_func_t> > TUnslicer;
274             typedef Pipe< TUnslicer, Counter > TRenamer;
275 
276             // no recursion inMemory shortcut
277             typedef Pipe< TFilter, LarssonSadakane > TInMem;
278             typedef Pipe< TInMem, Filter<unslicer_func_t> > TUnslicerInMem;
279             typedef Pipe< TUnslicerInMem, Counter > TRenamerInMem;
280 
281         typedef Pool< TypeOf_(TRenamer), MapperSpec< MapperConfigSize< nmap_linear_t, TSizeOf_(TRenamer) > > > TNames_Linear;
282 
283         // step 2
284         typedef Pipe< Bundle2< TInput, TNames_Linear >, Extender7<TPack> > TExtender;
285                                         typedef _skew7ExtendComp<TypeOf_(typename TExtender::Out0),3> extend0_comp_t;
286                                         typedef _skew7ExtendComp<TypeOf_(typename TExtender::Out6),2> extend6_comp_t;
287                                         typedef _skew7ExtendComp<TypeOf_(typename TExtender::Out5),1> extend5_comp_t;
288                                         typedef _skew7ExtendComp<TypeOf_(typename TExtender::Out3),1> extend3_comp_t;
289         typedef Pool< TypeOf_(typename TExtender::Out0), SorterSpec< SorterConfigSize< extend0_comp_t, TSizeOf_(typename TExtender::Out0) > > > TSorterS0;
290         typedef Pool< TypeOf_(typename TExtender::Out6), SorterSpec< SorterConfigSize< extend6_comp_t, TSizeOf_(typename TExtender::Out6) > > > TSorterS6;
291         typedef Pool< TypeOf_(typename TExtender::Out5), SorterSpec< SorterConfigSize< extend5_comp_t, TSizeOf_(typename TExtender::Out5) > > > TSorterS5;
292         typedef Pool< TypeOf_(typename TExtender::Out3), SorterSpec< SorterConfigSize< extend3_comp_t, TSizeOf_(typename TExtender::Out3) > > > TSorterS3;
293 
294         // step 3
295                                         typedef _skew7NMapExtended<TypeOf_(typename TExtender::Out124)> nmap_extended_t;
296         typedef Pool< TypeOf_(typename TExtender::Out124), MapperSpec< MapperConfigSize< nmap_extended_t, TSizeOf_(typename TExtender::Out124) > > > TSorterS124;
297         typedef Pipe< Bundle5< TSorterS0, TSorterS3, TSorterS5, TSorterS6, TSorterS124 >, Merger7 > TMerger;
298 
299         TSorterS0   sortedS0;
300         TSorterS3   sortedS3;
301         TSorterS5   sortedS5;
302         TSorterS6   sortedS6;
303         TSorterS124 sortedS124;
304         TMerger     in;
305 
306         Pipe():
307             in(bundle5(sortedS0, sortedS3, sortedS5, sortedS6, sortedS124)) {}
308 
309         Pipe(TInput& _textIn):
310             in(bundle5(sortedS0, sortedS3, sortedS5, sortedS6, sortedS124))
311         {
312             process(_textIn);
313         }
314 
315         template < typename TInput_ >
316         bool process(TInput_ &textIn) {
317 
318             SEQAN_PROADD(SEQAN_PRODEPTH, 1);
319             SEQAN_PROMARK("Enter recursion");
320             #ifdef SEQAN_DEBUG_INDEX
321                 std::cerr << "enter level " << SEQAN_PROVAL(SEQAN_PRODEPTH) << " bit-packing: ";
322                 std::cerr << IsSameType<TPack, BitPacked<> >::VALUE << " " << BitsPerValue<TypeOf_(TInput)>::VALUE << std::endl;
323             #endif
324             {
325 
326 
327             // *** INSTANTIATION ***
328 
329             // step 1
330             TSamplerDC7                 sampler(textIn);
331             TSortTuples                 sorter;
332             #ifdef SEQAN_DEBUG_INDEX
333                 std::cerr << "  sort names (" << length(sampler)<< ")" << std::endl;
334             #endif
335             sorter << sampler;
336             SEQAN_PROMARK("Sorter (2) - sort 7-mers");
337 
338             TNamer                      namer(sorter);
339             nmap_sliced_t               map_sliced(length(namer));
340             nmap_linear_t               map_linear(length(namer));
341             TNames_Sliced               names_sliced(map_sliced);
342             #ifdef SEQAN_DEBUG_INDEX
343                 std::cerr << "  slice names" << std::endl;
344             #endif
345             names_sliced << namer;
346 
347             if (namer.unique() || empty(names_sliced)) {
348                 // unique names
349 
350                 clear(sorter);
351                 SEQAN_PROMARK("Mapper (4) - construct s124");
352                 TNames_Linear_Unique        names_linear(map_linear);
353 
354                 #ifdef SEQAN_DEBUG_INDEX
355                     std::cerr << "  make names linear" << std::endl;
356                 #endif
357                 names_linear << names_sliced;
358                 clear(names_sliced);
359                 SEQAN_PROMARK("Mapper (10) - construct ISA124");
360 
361                 // step 2
362                 _skew7Extend(textIn, names_linear, sortedS0, sortedS3, sortedS5, sortedS6, sortedS124);
363 
364             } else {
365                 // non-unique names
366 
367                 clear(sorter);
368                 SEQAN_PROMARK("Mapper (4) - construct s124");
369 
370                 TFilter                     filter(names_sliced);
371                 TNames_Linear               names_linear(map_linear);
372 
373                 if (length(filter) > 128*1024*1024)
374                 {
375                     // recursion
376                     TRecurse                    recurse(filter);
377 
378                     #ifdef SEQAN_TEST_SKEW7
379                     {
380                         String<typename Value<TFilter>::Type, Alloc<> > _text;
381                         _text << filter;
382                         SEQAN_ASSERT(isSuffixArray(recurse, _text));
383                     }
384                     #endif
385 
386                     clear(filter);
387                     unslicer_func_t             func(length(textIn));
388                     TUnslicer                   unslicer(recurse, func);
389                     TRenamer                    renamer(unslicer);
390 
391                     #ifdef SEQAN_DEBUG_INDEX
392                         std::cerr << "  rename names" << std::endl;
393                     #endif
394 
395                     names_linear << renamer;
396                 }
397                 else
398                 {
399                     TInMem                        inMem(filter);
400 
401                     clear(filter);
402                     unslicer_func_t                func(length(textIn));
403                     TUnslicerInMem              unslicer(inMem, func);
404                     TRenamerInMem               renamer(unslicer);
405 
406                     #ifdef SEQAN_DEBUG_INDEX
407                         std::cerr << "  rename names" << std::endl;
408                     #endif
409 
410                     names_linear << renamer;
411                 }
412 
413                 SEQAN_PROMARK("Mapper (10) - ISA124 konstruieren");
414 
415                 // step 2
416                 #ifdef SEQAN_DEBUG_INDEX
417                     std::cerr << "  prepare merge" << std::endl;
418                 #endif
419                 _skew7Extend(textIn, names_linear, sortedS0, sortedS3, sortedS5, sortedS6, sortedS124);
420                 SEQAN_PROMARK("Mapper (12), Sorter (13-16) - merge SA124, SA3, SA5, SA6, SA0");
421             }
422 
423             // step 3
424             // ... is done on-demand by merger
425             }
426             #ifdef SEQAN_DEBUG_INDEX
427                 std::cerr << "left level " << SEQAN_PROVAL(SEQAN_PRODEPTH) << std::endl;
428             #endif
429             SEQAN_PROMARK("Left recursion");
430             SEQAN_PROSUB(SEQAN_PRODEPTH, 1);
431 
432             return true;
433         }
434 
435         inline typename Value<Pipe>::Type const operator*() {
436             return *in;
437         }
438 
439         inline Pipe& operator++() {
440             ++in;
441             return *this;
442         }
443     };
444 
445     // not sure which interface is more intuitive, we support both
446     // you can call "skew << pipe" or "skew_t skew(pipe); skew.process()"
447     // for the first we would need no _in member
448     template < typename TInput, typename TObject >
449     inline bool operator<<(Pipe< TInput, Skew7 > &me, TObject &textIn) {
450         return me.process(textIn);
451     }
452 
453 
454     //////////////////////////////////////////////////////////////////////////////
455     // internal Skew7 algorithm
456     //////////////////////////////////////////////////////////////////////////////
457 
458     //////////////////////////////////////////////////////////////////////////////
459     // typedefs and helpers
460 
461     // compares n characters and in case of equality the names a2 and b2 (no clipping)
462     template <typename TTextIter, typename TSize> inline
463     bool _leqSkew7(TTextIter a1, TSize a2,   TTextIter b1, TSize b2,   TSize n)
464     {
465         // lexic. order for n-tupels
466         for (; n != 0; --n, ++a1, ++b1) {
467             if (ordLess(*a1, *b1)) return true;
468             if (ordLess(*b1, *a1)) return false;
469         }
470         return (a2 <= b2);
471     }
472 
473     // compares at most the last n characters (a) with b (clipping)
474     template <typename TTextIter, typename TSize> inline
475     bool _leqSkew7(TTextIter a,   TTextIter b,   TSize n)
476     { // lexic. order for n-tupels
477         for (; n != 0; --n, ++a, ++b) {
478             if (ordLess(*a, *b)) return true;
479             if (ordLess(*b, *a)) return false;
480         }
481         return true;    // a is shorter than b
482     }
483 
484     // compares two suffixes of residue classes a and b
485     template <typename TTextIter, typename TSize, typename TString> inline
486     bool _leqSkew7(unsigned a, unsigned b,   TTextIter spos[], const TSize tpos[], const bool islast[], const TString &s124, const TSize adjust[7][7])
487     {
488         TTextIter sa = spos[a];
489         TTextIter sb = spos[b];
490         TSize shft = SkewShift_<7>::VALUE[a][b];
491         if (sa > sb) {
492             if ((a != 0) && (a < shft) && islast[a]) // do we need to clip?
493                 return _leqSkew7 (sa,   sb,   a);
494         } else {
495             if ((b != 0) && (b < shft) && islast[b]) // do we need to clip?
496                 return !_leqSkew7 (sb,   sa,   b);
497         }
498         return _leqSkew7 (sa, s124[tpos[a] + adjust[a][b]],   sb, s124[tpos[b] + adjust[b][a]],   shft);
499     }
500 
501     template <typename TSAOut, typename TText, typename TSAIn, typename TEnable>
502     inline bool _fastTupleSortSkew7(TSAOut &, TText const &, TSAIn const &, TEnable)
503     {
504         return false;
505     }
506 
507     template <typename TSAOut, typename TText, typename TSAIn>
508     inline bool _fastTupleSortSkew7(TSAOut &SA124, TText const &s, TSAIn const &s124, True)
509     {
510         typedef typename Value<TText>::Type TValue;
511         typedef typename Value<TSAOut>::Type TSize;
512         typedef typename Iterator<TText const, Standard>::Type TValueIter;
513         typedef typename Iterator<TSAIn const, Standard>::Type TSAIter;
514 
515         // optimized tuple sort for short alphabets
516         Shape<TValue, UngappedShape<7> > shape;
517         String<TSize, Alloc<> > cnt;
518         resize(cnt, _fullDir2Length(shape), 0, Exact());
519 
520         TValueIter textBegin = begin(s, Standard());
521         TSAIter it = begin(s124, Standard());
522         TSAIter itEnd = end(s124, Standard());
523 
524         TSize n = length(s);
525         for(; it != itEnd; ++it)
526             ++cnt[hash2(shape, textBegin + *it, n - *it)];
527 
528         _qgramCummulativeSum(cnt, False());
529 
530         it = begin(s124, Standard());
531         for(; it != itEnd; ++it)
532             SA124[cnt[hash2(shape, textBegin + *it, n - *it) + 1]++] = *it;
533 
534         return true;
535     }
536 
537 
538     //////////////////////////////////////////////////////////////////////////////
539     // Skew algorithm with difference cover of Z_7
540     //
541     // Construct the suffix array SA of s[0..n-1], where s is a string over
542     // the alphabet {0..K}.
543     //
544     // The following algorithm divides the suffixes in seven residue classes
545     // according to their lengths and uses a difference cover {1,2,4}.
546     // That approach results in an algorithm that is more space and time efficient
547     // than the original skew algorithm with a difference cover {1,2} of Z_3.
548     //
549     // * no trailing 0's required
550     // * no dummy triples in special cases
551 
552     template < typename TSA,
553                typename TText >
554     void createSuffixArray(
555         TSA &SA,
556         TText &s,
557         Skew7 const &,
558         unsigned K,
559         unsigned maxdepth,
560         unsigned depth)
561     {
562         typedef typename Value<TSA>::Type TSize;
563         typedef typename Value<TText>::Type TValue;
564         typedef String<TSize, Alloc<> > TBuffer;
565         typedef typename Iterator<TSA, Standard>::Type TSAIter;
566         //typedef typename Iterator<TBuffer, Standard>::Type TBufferIter;
567         typedef typename Iterator<TText const, Standard>::Type TValueIter;
568 
569         SEQAN_ASSERT(AllowsFastRandomAccess<TText>::VALUE == true);
570         SEQAN_ASSERT(AllowsFastRandomAccess<TSA>::VALUE == true);
571 
572         #ifdef SEQAN_DEBUG_INDEX
573             std::cerr << "--- CREATE SUFFIX ARRAY ---" << std::endl;
574             std::cerr << "Skew7 [random access]" << std::endl;
575         #endif
576 
577         TSize n = length(s);
578         if (n < 1) return;
579 
580         TSize _n[7];
581         TSize _o[7];
582 
583         _n[0] = n/7;
584         _o[0] = n%7;
585         TSize j = n + 6;
586         for(int i = 1; i < 7; ++i, --j) {
587             _n[i] = j/7;
588             _o[i] = j%7;
589         }
590 
591         TSize _n24  = _n[2]+_n[4];
592         TSize _n124 = _n[1]+_n24;
593 
594         SEQAN_PROSET(SEQAN_PRODEPTH, depth);
595         SEQAN_PROSET(SEQAN_PROEXTRA1, K);
596         SEQAN_PROMARK("Rekursionsabstieg");
597         #ifdef SEQAN_DEBUG_INDEX
598             std::cerr << "enter level " << depth << " (" << n << ")" << std::endl;
599         #endif
600 
601         TBuffer s124;
602         resize(s124, _n124, Exact());
603         // we use SA[n-n124..n-1] as a temporary buffer instead of allocating one
604         typename Suffix<TSA>::Type SA124 = suffix(SA, n - _n124);
605 
606 
607         // generate positions of mod 3, mod 5 and mod 6 suffixes
608         {
609             TSize j = 0;
610             if (_n[2] > _n[4]) s124[j++] = _o[2];
611             if (_n[1] > _n[4]) s124[j++] = _o[1];
612 
613             for (TSize i=_o[4];  i < n;  i+=7) {
614                 s124[j++] = i;
615                 s124[j++] = i + 2;
616                 s124[j++] = i + 3;
617             }
618         }
619 
620 
621         // lsb radix sort the mod 3, mod 5 and mod 6 7-tupels
622         if (!_fastTupleSortSkew7(SA124, s, s124, typename Eval<BitsPerValue<TValue>::VALUE < 4>::Type()))
623         {
624             String<TSize, Alloc<> > cnt;
625             resize(cnt, K, Exact());    // counter array
626 
627             radixPass(SA124, s124,  s, cnt, K, 6);
628             radixPass(s124,  SA124, s, cnt, K, 5);
629             radixPass(SA124, s124,  s, cnt, K, 4);
630             radixPass(s124,  SA124, s, cnt, K, 3);
631             radixPass(SA124, s124,  s, cnt, K, 2);
632             radixPass(s124,  SA124, s, cnt, K, 1);
633             radixPass(SA124, s124,  s, cnt, K);
634         }
635         SEQAN_PROMARK("7-lets sortiert");
636 
637         // find lexicographic names of 7-tupel
638         TSize name = 0;
639         {
640             TSize ofs[7] = {0, _n24, _n[4], 0, 0, 0, 0};
641             bool differ = true;
642             TValue c0 = TValue(0), c1 = TValue(0), c2 = TValue(0), c3 = TValue(0), c4 = TValue(0), c5 = TValue(0), c6 = TValue(0);
643             for (TSize i = 0, clip = _max(n, (TSize)6) - 6, l;  i < _n124;  i++) {
644                 if ((l = SA124[i]) < clip) {
645                     if (differ || s[l] != c0 || s[l+1] != c1 || s[l+2] != c2 || s[l+3] != c3 ||
646                                                 s[l+4] != c4 || s[l+5] != c5 || s[l+6] != c6) {
647                         name++;  c0 = s[l];  c1 = s[l+1];  c2 = s[l+2];  c3 = s[l+3];
648                                              c4 = s[l+4];  c5 = s[l+5];  c6 = s[l+6];
649                         differ = false;
650                     }
651                 } else {
652                     name++;
653                     differ = true;  // the last 6 7-tupels always differ from the rest
654                 }
655                 s124[(l/7) + ofs[(n-l) % 7]] = name - 1;   // select a third
656             }
657         }
658         SEQAN_PROMARK("s12 konstruiert");
659 
660         // recurse if names are not yet unique
661         if (name < _n124) {
662             if (depth != maxdepth)
663             {
664                 createSuffixArray(SA124, s124, Skew7(), name, maxdepth, depth + 1);
665                 #ifdef SEQAN_TEST_SKEW7
666                     SEQAN_ASSERT(isSuffixArray(SA124, s124));
667                 #endif
668             }
669             // store unique names in s124 using the suffix array
670             for (TSize i = 0;  i < _n124;  i++) s124[SA124[i]] = i;
671         } else // generate the suffix array of s124 directly
672             for (TSize i = 0;  i < _n124;  i++) SA124[s124[i]] = i;
673 
674 
675         // use SA[0...n3-1] and SA[n3...n3+n5-1] as a temporary buffers instead of allocating some
676         // and allocate SA0, SA3, SA5 and SA6
677 
678         {
679             typename Infix<TSA>::Type s3 = infix(SA, 0, _n[3]), s5 = infix(SA, _n[3], _n[3] + _n[5]);
680             String<TSize, typename Skew7StringSpec_<TSA>::Type> SA0, SA3, SA5, SA6;
681 
682             resize(SA0, _n[0], Exact());
683             resize(SA3, _n[3], Exact());
684             resize(SA5, _n[5], Exact());
685             resize(SA6, _n[6], Exact());
686 
687             // stably sort the mod 5 and mod 3 suffixes from SA124 by their first character
688             {
689                 for (TSize i=0, j3=0, j5=0, l;  i < _n124;  i++) {
690                     l = SA124[i];
691                     if (l < _n[4]) {
692                         if ((l = _o[4] + (7 * l)) > 0)
693                             s5[j5++] = l - 1;
694                     } else if (l < _n24) {
695                         if ((l = _o[2] + (7 * (l - _n[4]))) > 0)
696                             s3[j3++] = l - 1;
697                     }
698                 }
699 
700                 {
701                     String<TSize, Alloc<> > cnt;
702                     resize(cnt, K, Exact());    // counter array
703 
704                     radixPass(SA3, s3, s, cnt, K);
705                     SEQAN_PROMARK("SA3 konstruiert");
706 
707                     radixPass(SA5, s5, s, cnt, K);
708                     SEQAN_PROMARK("SA5 konstruiert");
709 
710                     // stably sort the mod 6 suffixes from SA5 by their first character
711 
712                     if (_n[5] == _n[6]) radixExtend    (SA6, SA5, s, cnt, K);
713                     else                radixExtendClip(SA6, SA5, s, cnt, K);
714                     SEQAN_PROMARK("SA6 konstruiert");
715 
716                     // stably sort the mod 0 suffixes from SA6 by their first character
717 
718                     if (_n[6] == _n[0]) radixExtend    (SA0, SA6, s, cnt, K);
719                     else                radixExtendClip(SA0, SA6, s, cnt, K);
720                     SEQAN_PROMARK("SA0 konstruiert");
721                 }
722             }
723 
724             // MULTIWAY MERGE all SA_ streams
725             {
726                 // a helper matrix to lex-name-compare every combination of suffixes
727                 TSize adjust[7][7] =
728                     //      0               1              2             3             4              5               6
729                    {{0             , _n124-_n[0]   , _n24-_n[0]  , _n124-_n[0] , _n[4]-_n[0] , _n[4]-_n[0]   , _n24-_n[0]    },  // 0
730                     {1-_n[1]       , 0             , 0           , 1-_n[1]     , 0           , 1-_n[1]-_n[2] , 1-_n[1]-_n[2] },  // 1*
731                     {1-_n[2]       , 0             , 0           , _n[1]       , 0           , _n[1]         , 1-_n[2]       },  // 2*
732                     {1+_n[4]-_n[3] , 1+_n[4]-_n[3] , _n24-_n[3]  , 0           , _n124-_n[3] , _n24-_n[3]    , _n124-_n[3]   },  // 3
733                     {_n[1]+_n[2]   , 0             , 0           ,_n[2]        , 0           , _n[1]+_n[2]   , _n[2]         },  // 4*
734                     {_n24-_n[5]    , _n124-_n[5]   , _n[4]-_n[5] , _n[4]-_n[5] , _n24-_n[5]  , 0             , _n124-_n[5]   },  // 5
735                     {_n124-_n[6]   , _n24-_n[6]    , _n124-_n[6] , _n[4]-_n[6] , _n[4]-_n[6] , _n24-_n[6]    , 0             }}; // 6
736 
737                 TSAIter pos[7] = {begin(SA0, Standard())      , begin(SA124, Standard())      , begin(SA124, Standard())      ,
738                                   begin(SA3, Standard())      , begin(SA124, Standard())      , begin(SA5, Standard())        , begin(SA6, Standard())      };
739                 TSAIter max[7] = {begin(SA0, Standard())+_n[0], begin(SA124, Standard())+_n124, begin(SA124, Standard())+_n124,
740                                   begin(SA3, Standard())+_n[3], begin(SA124, Standard())+_n124, begin(SA5, Standard())+_n[5]  , begin(SA6, Standard())+_n[6]};
741                 TValueIter spos[7];
742                 TSize tpos[7];
743                 bool islast[7];
744 
745                 int a, b, rank[5];
746                 int fill = 0;
747                 TSize k = 0;
748 
749                 #define SEQAN_GET_ISKEW7(ii) (ii < _n[4] ? (ii * 7) + _o[4] : (ii < _n24 ? ((ii - _n[4]) * 7) + _o[2] : ((ii - _n24) * 7) + _o[1]))
750                 #define SEQAN_GET_ASKEW7(ii) (ii < _n[4] ? 4 : (ii < _n24 ? 2 : 1))
751 
752                 // fill the stream ranking list
753                 for(int i = 0; i < 7; ++i) {
754                     if (!_n[i]) continue;
755                     if (i == 2 || i == 4) continue; // insert only the least suffix of SA124
756 
757                     if (i == 1) {
758 
759                         TSize ii  = *(pos[1]);
760                         a         = SEQAN_GET_ASKEW7(ii);
761                         TSize j      = SEQAN_GET_ISKEW7(ii);
762 
763                         tpos[a]   = ii;
764                         spos[a]   = begin(s, Standard()) + j;
765                         islast[a] = (j + 7 >= n);
766 
767                     } else {
768 
769                         a = i;
770                         TSize j   = *(pos[a]);
771 
772                         tpos[a]   = j / 7;
773                         spos[a]   = begin(s, Standard()) + j;
774                         islast[a] = (j + 7 >= n);
775 
776                     }
777 
778                     // get the rank of stream a's suffix
779                     int j;
780                     for(j = 0;  j < fill;  ++j) {
781                         b = rank[j];
782                         if (_leqSkew7 (a,  b,   spos, tpos, islast, s124, adjust)) break;
783                     }
784 
785                     // insert the suffix
786                     for(int i = fill; i > j; --i)
787                         rank[i] = rank[i-1];
788                     rank[j] = a;
789                     fill++;
790                 }
791 
792                 // main merge loop
793                 // in order to find the least suffix in every step we use a stream ranking list
794                 // so we only need to keep up the ordering and thus rank[0] is always the least suffix
795                 while (fill > 1) {
796 
797                     // add the least suffix to SA and get the next of the corresponding stream
798                     a = rank[0];
799                     SA[k++] = spos[a] - begin(s, Standard());
800                     if (a == 1 || a == 2 || a == 4)
801                         pos[4] = pos[2] = ++pos[1];
802                     else
803                         ++pos[a];
804 
805                     if (pos[a] < max[a]) {
806 
807                         // set corresponding spos, tpos, islast values and adapt a if necessary
808                         if (a == 1 || a == 2 || a == 4) {
809 
810                             TSize ii  = *(pos[1]);
811                             a          = SEQAN_GET_ASKEW7(ii);
812                             TSize j   = SEQAN_GET_ISKEW7(ii);
813 
814                             tpos[a]   = ii;
815                             spos[a]   = begin(s, Standard()) + j;
816                             islast[a] = (j + 7 >= n);
817 
818                         } else {
819 
820                             TSize j   = *(pos[a]);
821 
822                             tpos[a]   = j / 7;
823                             spos[a]   = begin(s, Standard()) + j;
824                             islast[a] = (j + 7 >= n);
825 
826                         }
827 
828                         // get the rank of stream a's suffix
829 
830                         // linear search
831                         int right;
832                         for(right = 1;  right < fill;  right++) {
833                             b = rank[right];
834                             if (_leqSkew7 (a,  b,   spos, tpos, islast, s124, adjust)) break;
835                         }
836 /*
837                         // binary search
838                         int left = 0;
839                         int right = fill;
840                         while (left + 1 != right) {
841                             int middle = (left + right) >> 2;
842                             if (leq<TValue, TSize> (a,  rank[middle],   spos, tpos, islast, s124, adjust))
843                                 right = middle;
844                             else
845                                 left = middle;
846                         }*/
847 
848                         // remove the least suffix ...
849                         for(int i = 1; i < right; ++i)
850                             rank[i-1] = rank[i];
851 
852                         // ... and insert the new one
853                         rank[right-1] = a;
854 
855                     } else {
856                         // only remove the least suffix
857                         fill--;
858                         for(int i = 0; i < fill; ++i)
859                             rank[i] = rank[i+1];
860                     }
861                 }
862 
863                 // only one (or less) stream left to fill SA with
864                 a = rank[0];
865                 if (a == 1 || a == 2 || a == 4)
866                     for (;  k < n;  ++k) { TSize ii = *(pos[1]++); SA[k] = SEQAN_GET_ISKEW7(ii); }
867                 else
868                     for (;  k < n;  ++k) SA[k] = *(pos[a]++);
869             }
870         }
871         SEQAN_PROMARK("SA124, SA3, SA5, SA6, SA0 verschmolzen");
872 
873         #ifdef SEQAN_DEBUG_INDEX
874             std::cerr << "left level " << depth << std::endl;
875         #endif
876         SEQAN_PROMARK("Rekursionsaufstieg");
877         SEQAN_PROSUB(SEQAN_PRODEPTH, 1);
878     }
879 
880     template < typename TSA,
881                typename TText >
882     inline void createSuffixArray(
883         TSA &SA,
884         TText &s,
885         Skew7 const &alg,
886         unsigned K,
887         unsigned maxdepth)
888     {
889         createSuffixArray(SA, s, alg, K, maxdepth, 1);
890     }
891 
892     // creates suffix array sorted by the first maxLCP chars of suffixes
893     template < typename TSA,
894                typename TText,
895                typename TSize >
896     inline void createSuffixArrayPart(
897         TSA &SA,
898         TText &s,
899         Skew7 const &_dummy,
900         TSize maxLCP,
901         unsigned K)
902     {
903         unsigned depth = 0;
904         for(TSize i = 1; i < maxLCP; i*=7) ++depth;
905         createSuffixArray(SA, s, _dummy, K, depth);
906     }
907 
908 
909     // creates suffix array sorted by the first maxLCP chars of suffixes
910     template < typename TSA,
911                typename TText,
912                typename TSize >
913     inline void createSuffixArrayPart(
914         TSA &SA,
915         TText &s,
916         Skew7 const &_dummy,
917         TSize maxLCP)
918     {
919         SEQAN_CHECKPOINT;
920         createSuffixArrayPart(SA, s, _dummy, maxLCP, ValueSize< typename Value<TText>::Type >::VALUE);
921     }
922 //}
923 
924 }
925 
926 #endif
927