1 // ==========================================================================
2 //                           index_qgram_parallel.h
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef INCLUDE_SEQAN_INDEX_INDEX_QGRAM_PARALLEL_H_
36 #define INCLUDE_SEQAN_INDEX_INDEX_QGRAM_PARALLEL_H_
37 
38 namespace seqan {
39 
40 // ============================================================================
41 // Forwards
42 // ============================================================================
43 
44 // ============================================================================
45 // Tags, Classes, Enums
46 // ============================================================================
47 
48 // TODO(weese): should go to basic/metaprogramming...
49 template <bool VALUE_>
50 struct Bool: public Eval<VALUE_> {};
51 
52 template <unsigned VALUE_>
53 struct Unsigned
54 {
55     enum { VALUE = VALUE_ };
56 };
57 
58 template <int VALUE_>
59 struct Integer
60 {
61     enum { VALUE = VALUE_ };
62 };
63 
64 
65 // ============================================================================
66 // Metafunctions
67 // ============================================================================
68 
69 // ============================================================================
70 // Functions
71 // ============================================================================
72 
73 template < typename TIndex, typename TParallelTag >
_qgramDisableBuckets(TIndex & index,Tag<TParallelTag>)74 inline bool _qgramDisableBuckets(TIndex &index, Tag<TParallelTag>)
75 {
76 	// use the serial version (if no parallel overload is available)
77     return _qgramDisableBuckets(index);
78 }
79 
80 //////////////////////////////////////////////////////////////////////////////
81 // Counting sort - Step 3: Cumulative sum
82 //
83 // a disabled bucket 3,2,x,4   (x = disabled)               y_i=sum_{j<=i} x_j
84 // results in        0,0,3,x,5 (_qgramCummulativeSum)       z_i=y_{i-2}
85 // or in             0,3,5,5,9 (_qgramCummulativeSumAlt)    z_i=y_{i-1}
86 
87 //    3 2 1 | x x 5 |
88 // 0 0 3 | 5 x x | 6 11
89 //   0 3 5 6 6 6 11
90 
91 // 3 2 1 | 3*  2  5 |
92 // 3 5 6 | 9 11 16 |
93 // 0 3 5 | 6  9 11 | 16     SHIFT 0
94 // 0 0 3 | 5  6*  9 | 11 16  SHIFT 1
95 
96 
97 // First two entries are 0.
98 // Step 4 increments the entries hash(qgram)+1 on-the-fly while filling the SA table.
99 // After step 4 each entry (0..n-1) is the beginning of a qgram bucket.
100 
101 template < typename TSequence, typename TParallelTag >
102 inline typename Value<TSequence>::Type
_sumIgnoreDisabled(TSequence const & seq,Tag<TParallelTag>)103 _sumIgnoreDisabled(TSequence const &seq, Tag<TParallelTag>)
104 {
105     typedef typename Value<TSequence>::Type TValue;
106     typename Iterator<TSequence const>::Type it = begin(seq, Standard());
107     typename Iterator<TSequence const>::Type itEnd = end(seq, Standard());
108     TValue sum = 0;
109     for (; it != itEnd; ++it)
110         if (*it != (TValue)-1)
111             sum += *it;
112     return sum;
113 }
114 
115 //////////////////////////////////////////////////////////////////////////////
116 // Counting sort - Step 2: Count q-grams
117 template < typename TDir, typename TBucketMap, typename TText, typename TShape, typename TStepSize, typename TParallelTag >
118 inline void
_qgramCountQGrams(TDir & dir,TBucketMap & bucketMap,TText const & text,TShape shape,TStepSize stepSize,Tag<TParallelTag> parallelTag)119 _qgramCountQGrams(TDir &dir, TBucketMap &bucketMap, TText const &text, TShape shape, TStepSize stepSize, Tag<TParallelTag> parallelTag)
120 {
121     typedef typename Iterator<TText const, Standard>::Type  TIterator;
122     typedef typename Iterator<TDir, Standard>::Type         TDirIterator;
123     typedef typename Value<TDir>::Type                      TSize;
124 
125     if (empty(shape) || length(text) < length(shape))
126         return;
127 
128     TSize num_qgrams = (length(text) - length(shape)) / stepSize + 1;
129     TDirIterator dirBegin = begin(dir, Standard());
130     Splitter<TSize> splitter(0, num_qgrams, parallelTag);
131 
132     if (stepSize == 1)
133     {
134         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
135         for (int job = 0; job < (int)length(splitter); ++job)
136         {
137             TIterator itText = begin(text, Standard()) + splitter[job];
138             TIterator itTextEnd = begin(text, Standard()) + splitter[job + 1];
139 
140             atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
141             for (++itText; itText != itTextEnd; ++itText)
142                 atomicInc(*(dirBegin + requestBucket(bucketMap, hashNext(shape, itText), parallelTag)), parallelTag);
143         }
144     }
145     else
146     {
147         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
148         for (int job = 0; job < (int)length(splitter); ++job)
149         {
150             TIterator itText = begin(text, Standard()) + (splitter[job] * stepSize);
151             TIterator itTextEnd = begin(text, Standard()) + (splitter[job + 1] * stepSize);
152 
153             for (; itText != itTextEnd; itText += stepSize)
154                 atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
155         }
156     }
157 }
158 
159 template < typename TDir, typename TBucketMap, typename TString, typename TSpec, typename TShape, typename TStepSize, typename TParallelTag >
160 inline void
_qgramCountQGrams(TDir & dir,TBucketMap & bucketMap,StringSet<TString,TSpec> const & stringSet,TShape shape,TStepSize stepSize,Tag<TParallelTag> parallelTag)161 _qgramCountQGrams(TDir &dir, TBucketMap &bucketMap, StringSet<TString, TSpec> const &stringSet, TShape shape, TStepSize stepSize, Tag<TParallelTag> parallelTag)
162 {
163     typedef typename Iterator<TString const, Standard>::Type    TIterator;
164     typedef typename Iterator<TDir, Standard>::Type             TDirIterator;
165     typedef typename Value<TDir>::Type                          TSize;
166 
167     if (empty(shape) || empty(stringSet))
168         return;
169 
170     TDirIterator dirBegin = begin(dir, Standard());
171     Splitter<TSize> seqSplitter(0, countSequences(stringSet), parallelTag);
172 
173     if (stepSize == 1)
174     {
175         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
176         for (int job = 0; job < (int)length(seqSplitter); ++job)
177         {
178 			for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
179 			{
180 				TString const &sequence = value(stringSet, seqNo);
181 				if (length(sequence) < length(shape)) continue;
182 
183                 TIterator itText = begin(stringSet[seqNo], Standard());
184                 TIterator itTextEnd = itText + (length(sequence) - length(shape) + 1);
185                 atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
186                 for (++itText; itText != itTextEnd; ++itText)
187                     atomicInc(*(dirBegin + requestBucket(bucketMap, hashNext(shape, itText), parallelTag)), parallelTag);
188             }
189         }
190     }
191     else
192     {
193         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
194         for (int job = 0; job < (int)length(seqSplitter); ++job)
195         {
196 			for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
197 			{
198 				TString const &sequence = value(stringSet, seqNo);
199 				if (length(sequence) < length(shape)) continue;
200 
201                 TIterator itText = begin(stringSet[seqNo], Standard());
202                 TIterator itTextEnd = itText + ((length(sequence) - length(shape)) / stepSize + 1) * stepSize;
203                 for (; itText != itTextEnd; itText += stepSize)
204                     atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
205             }
206         }
207     }
208 }
209 
210 template < typename TDir, typename TWithConstraints, typename TKeepDisabledBuckets, unsigned SHIFT, typename TParallelTag >
211 inline typename Value<TDir>::Type
_qgramCummulativeSum(TDir & dir,TWithConstraints,TKeepDisabledBuckets,Unsigned<SHIFT>,Tag<TParallelTag> parallelTag)212 _qgramCummulativeSum(TDir &dir, TWithConstraints, TKeepDisabledBuckets, Unsigned<SHIFT>, Tag<TParallelTag> parallelTag)
213 {
214     typedef typename Value<TDir>::Type TValue;
215     typedef typename Size<TDir>::Type TSize;
216     typedef String<TValue> TBuffer;
217     typedef typename Iterator<TDir const, Standard>::Type TConstIterator;
218     typedef typename Iterator<TDir, Standard>::Type TIterator;
219     typedef typename Iterator<TBuffer, Standard>::Type TConstBufferIterator;
220 
221     if (empty(dir))
222         return 0;
223 
224     Splitter<TSize> splitter(0, length(dir), parallelTag);
225     String<TValue> localSums;
226     TBuffer prevCounts;
227     resize(localSums, length(splitter), Exact());
228     resize(prevCounts, length(splitter) * SHIFT, 0, Exact());
229     localSums[0] = 0;
230 
231     // STEP 1: compute sums of all subintervals (in parallel)
232     //
233     SEQAN_OMP_PRAGMA(parallel for)
234     for (int job = 0; job < (int)length(splitter); ++job)
235     {
236         unsigned len = _min(SHIFT, splitter[job]);
237         replace(prevCounts, (job + 1) * SHIFT - len, (job + 1) * SHIFT, infix(dir, splitter[job] - len, splitter[job]));
238 
239         typename Infix<TDir>::Type dirInfix = infix(
240             dir,
241             _max((int64_t)0, (int64_t)splitter[job] - (int64_t)SHIFT),
242             _max((int64_t)0, (int64_t)splitter[job + 1] - (int64_t)SHIFT));
243 
244         if (TWithConstraints::VALUE)
245             localSums[job] = _sumIgnoreDisabled(dirInfix, Serial());
246         else
247             localSums[job] = sum(dirInfix, Serial());
248     }
249 
250     // STEP 2: compute partial sums (of subinterval sums) from position 0 to the end of each subinterval
251     //
252     for (int job = 1; job < (int)length(splitter); ++job)
253         localSums[job] += localSums[job - 1];
254 
255     // STEP 3: compute partial sums of each subinterval starting from offset (in parallel)
256     //
257     SEQAN_OMP_PRAGMA(parallel for)
258     for (int job = 0; job < (int)length(splitter); ++job)
259     {
260         TConstIterator itBegin = begin(dir, Standard()) + splitter[job];
261         TIterator dstIt = begin(dir, Standard()) + splitter[job + 1];
262         TValue sum = localSums[job];
263 
264         // read over our subinterval
265         {
266             TConstIterator it = begin(dir, Standard()) + splitter[job + 1] - SHIFT;
267             while (it != itBegin)
268             {
269                 TValue counter = *(--it);
270                 if (!TWithConstraints::VALUE || counter != (TValue)-1)
271                     sum -= counter;
272                 else
273                     if (TKeepDisabledBuckets::VALUE)
274                     {
275                         *(--dstIt) = (TValue)-1;
276                         continue;
277                     }
278                 *(--dstIt) = sum;
279             }
280         }
281 
282         // read suffix of the previous subinterval
283         if (SHIFT != 0u)
284         {
285             TConstBufferIterator it = begin(prevCounts, Standard()) + (job + 1) * SHIFT;
286             while (dstIt != itBegin)
287             {
288                 TValue counter = *(--it);
289                 if (!TWithConstraints::VALUE || counter != (TValue)-1)
290                     sum -= counter;
291                 else
292                     if (TKeepDisabledBuckets::VALUE)
293                     {
294                         *(--dstIt) = (TValue)-1;
295                         continue;
296                     }
297                 *(--dstIt) = sum;
298             }
299         }
300     }
301 
302     return back(localSums);
303 }
304 
305 //////////////////////////////////////////////////////////////////////////////
306 // Counting sort - Step 4: Fill suffix array
307 // w/o constraints
308 template <
309     typename TSA,
310     typename TText,
311     typename TShape,
312     typename TDir,
313     typename TBucketMap,
314     typename TWithConstraints,
315     typename TStepSize,
316     typename TParallelTag >
317 inline void
_qgramFillSuffixArray(TSA & sa,TText const & text,TShape shape,TDir & dir,TBucketMap & bucketMap,TStepSize stepSize,TWithConstraints const,Tag<TParallelTag> parallelTag)318 _qgramFillSuffixArray(
319     TSA &sa,
320     TText const &text,
321     TShape shape,
322     TDir &dir,
323     TBucketMap &bucketMap,
324     TStepSize stepSize,
325     TWithConstraints const,
326     Tag<TParallelTag> parallelTag)
327 {
328     typedef typename Iterator<TText const, Standard>::Type  TIterator;
329     typedef typename Iterator<TDir, Standard>::Type         TDirIterator;
330     typedef typename Value<TDir>::Type                      TSize;
331 
332     if (empty(shape) || length(text) < length(shape))
333         return;
334 
335     TSize num_qgrams = (length(text) - length(shape)) / stepSize + 1;
336     TDirIterator dirBegin = begin(dir, Standard());
337     TDirIterator dirBegin1 = dirBegin + 1;
338     Splitter<TSize> splitter(0, num_qgrams, parallelTag);
339 
340     if (stepSize == 1)
341     {
342         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
343         for (int job = 0; job < (int)length(splitter); ++job)
344         {
345             TSize pos = splitter[job];
346             TSize posEnd = splitter[job + 1];
347             if (pos == posEnd) continue;
348             TIterator itText = begin(text, Standard()) + pos;
349 
350             // first hash
351             TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
352             if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1)       // ignore disabled buckets
353                 sa[atomicPostInc(*bktPtr, parallelTag)] = pos;
354 
355             for (++pos; pos != posEnd; ++pos)
356             {
357                 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hashNext(shape, ++itText));
358                 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1)   // ignore disabled buckets
359                     sa[atomicPostInc(*bktPtr, parallelTag)] = pos;
360             }
361         }
362     }
363     else
364     {
365         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
366         for (int job = 0; job < (int)length(splitter); ++job)
367         {
368             TSize pos = splitter[job] * stepSize;
369             TSize posEnd = splitter[job + 1] * stepSize;
370             TIterator itText = begin(text, Standard()) + pos;
371 
372             for (; pos != posEnd; pos += stepSize, itText += stepSize)
373             {
374                 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
375                 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1)   // ignore disabled buckets
376                     sa[atomicPostInc(*bktPtr, parallelTag)] = pos;
377             }
378         }
379     }
380 }
381 
382 template <
383     typename TSA,
384     typename TString,
385     typename TSpec,
386     typename TShape,
387     typename TDir,
388     typename TBucketMap,
389     typename TWithConstraints,
390     typename TStepSize,
391     typename TParallelTag >
392 inline void
_qgramFillSuffixArray(TSA & sa,StringSet<TString,TSpec> const & stringSet,TShape shape,TDir & dir,TBucketMap & bucketMap,TStepSize stepSize,TWithConstraints const,Tag<TParallelTag> parallelTag)393 _qgramFillSuffixArray(
394     TSA &sa,
395     StringSet<TString, TSpec> const &stringSet,
396     TShape shape,
397     TDir &dir,
398     TBucketMap &bucketMap,
399     TStepSize stepSize,
400     TWithConstraints const,
401     Tag<TParallelTag> parallelTag)
402 {
403     typedef typename Iterator<TString const, Standard>::Type  TIterator;
404     typedef typename Iterator<TDir, Standard>::Type         TDirIterator;
405     typedef typename Value<TDir>::Type                      TSize;
406 
407     if (empty(shape) || empty(stringSet))
408         return;
409 
410     TDirIterator dirBegin = begin(dir, Standard());
411     TDirIterator dirBegin1 = dirBegin + 1;
412     Splitter<TSize> seqSplitter(0, countSequences(stringSet), parallelTag);
413 
414     if (stepSize == 1)
415     {
416         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
417         for (int job = 0; job < (int)length(seqSplitter); ++job)
418         {
419 			for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
420 			{
421 				TString const &sequence = value(stringSet, seqNo);
422 				if (length(sequence) < length(shape)) continue;
423 
424                 TIterator itText = begin(stringSet[seqNo], Standard());
425                 TIterator itTextEnd = itText + (length(sequence) - length(shape) + 1);
426 
427 				typename Value<TSA>::Type localPos;
428 				assignValueI1(localPos, seqNo);
429 				assignValueI2(localPos, 0);
430 
431                 // first hash
432                 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
433                 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1)       // ignore disabled buckets
434                     sa[atomicPostInc(*bktPtr, parallelTag)] = localPos;
435 
436                 for (++itText; itText != itTextEnd; ++itText)
437                 {
438                     posInc(localPos);
439                     TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hashNext(shape, itText));
440                     if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1)   // ignore disabled buckets
441                         sa[atomicPostInc(*bktPtr, parallelTag)] = localPos;
442                 }
443             }
444         }
445     }
446     else
447     {
448         SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
449         for (int job = 0; job < (int)length(seqSplitter); ++job)
450         {
451 			for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
452 			{
453 				TString const &sequence = value(stringSet, seqNo);
454 				if (length(sequence) < length(shape)) continue;
455 
456                 TIterator itText = begin(stringSet[seqNo], Standard());
457                 TIterator itTextEnd = itText + ((length(sequence) - length(shape)) / stepSize + 1) * stepSize;
458 
459 				typename Value<TSA>::Type localPos;
460 				assignValueI1(localPos, seqNo);
461 				assignValueI2(localPos, 0);
462 
463                 for (; itText != itTextEnd; ++itText)
464                 {
465                     posInc(localPos, stepSize);
466                     TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
467                     if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1)   // ignore disabled buckets
468                         sa[atomicPostInc(*bktPtr, parallelTag)] = localPos;
469                 }
470             }
471         }
472     }
473 }
474 
475 //////////////////////////////////////////////////////////////////////////////
476 // Step 5: Correct disabled buckets
477 template < typename TDir, typename TParallelTag >
478 inline void
_qgramPostprocessBuckets(TDir & dir,Tag<TParallelTag> parallelTag)479 _qgramPostprocessBuckets(TDir &dir, Tag<TParallelTag> parallelTag)
480 {
481     typedef typename Iterator<TDir, Standard>::Type TDirIterator;
482     typedef typename Size<TDir>::Type               TSize;
483     typedef typename Value<TDir>::Type              TValue;
484 
485     Splitter<TDirIterator> splitter(begin(dir, Standard()), end(dir, Standard()), parallelTag);
486     String<TValue> last;
487     resize(last, length(splitter), Exact());
488 
489     SEQAN_OMP_PRAGMA(parallel for)
490     for (int job = 0; job < (int)length(splitter); ++job)
491     {
492         TDirIterator it = splitter[job];
493         TDirIterator itEnd = splitter[job + 1];
494         TSize prev = (job == 0)? 0: (TSize)-1;
495         for (; it != itEnd; ++it)
496             if (*it == (TSize)-1)
497                 *it = prev;
498             else
499                 prev = *it;
500         last[job] = prev;
501     }
502 
503     for (int job = 1; job < (int)length(splitter); ++job)
504         if (last[job] == (TSize)-1)
505             last[job] = last[job - 1];
506 
507     SEQAN_OMP_PRAGMA(parallel for)
508     for (int job = 1; job < (int)length(splitter); ++job)
509     {
510         TDirIterator it = splitter[job];
511         TDirIterator itEnd = splitter[job + 1];
512         TSize prev = last[job - 1];
513         for (; it != itEnd && *it == (TSize)-1; ++it)
514             *it = prev;
515     }
516 }
517 
518 template < typename TIndex, typename TParallelTag >
createQGramIndex(TIndex & index,Tag<TParallelTag> parallelTag)519 void createQGramIndex(TIndex &index, Tag<TParallelTag> parallelTag)
520 {
521     typename Fibre<TIndex, QGramText>::Type const &text      = indexText(index);
522     typename Fibre<TIndex, QGramSA>::Type         &sa        = indexSA(index);
523     typename Fibre<TIndex, QGramDir>::Type        &dir       = indexDir(index);
524     typename Fibre<TIndex, QGramShape>::Type      &shape     = indexShape(index);
525     typename Fibre<TIndex, QGramBucketMap>::Type  &bucketMap = index.bucketMap;
526 
527     // 1. clear counters
528     _qgramClearDir(dir, bucketMap, parallelTag);
529 
530     // 2. count q-grams
531     _qgramCountQGrams(dir, bucketMap, text, shape, getStepSize(index), parallelTag);
532 
533     if (_qgramDisableBuckets(index, parallelTag))
534     {
535         // 3. cumulative sum
536 
537         // with disabled buckets
538         // disabled buckets should still be marked in the partial sum
539         // shift all entries by one towards the end (will be corrected by _qgramFillSuffixArray)
540         _qgramCummulativeSum(dir, True(), True(), Unsigned<1>(), parallelTag);
541 
542         // 4. fill suffix array
543         _qgramFillSuffixArray(sa, text, shape, dir, bucketMap, getStepSize(index), True(), parallelTag);
544 
545         // 5. correct disabled buckets
546         _qgramPostprocessBuckets(dir, parallelTag);
547     }
548     else
549     {
550         // 3. cumulative sum
551 
552         // without disabled buckets
553         // shift all entries by one towards the end (will be corrected by _qgramFillSuffixArray)
554         _qgramCummulativeSum(dir, False(), False(), Unsigned<1>(), parallelTag);
555 
556         // 4. fill suffix array
557         _qgramFillSuffixArray(sa, text, shape, dir, bucketMap, getStepSize(index), False(), parallelTag);
558     }
559 }
560 
561 template < typename TIndex, typename TParallelTag >
createQGramIndexDirOnly(TIndex & index,Tag<TParallelTag> parallelTag)562 void createQGramIndexDirOnly(TIndex &index, Tag<TParallelTag> parallelTag)
563 {
564     typename Fibre<TIndex, QGramText>::Type const &text      = indexText(index);
565     typename Fibre<TIndex, QGramSA>::Type         &sa        = indexSA(index);
566     typename Fibre<TIndex, QGramDir>::Type        &dir       = indexDir(index);
567     typename Fibre<TIndex, QGramShape>::Type      &shape     = indexShape(index);
568     typename Fibre<TIndex, QGramBucketMap>::Type  &bucketMap = index.bucketMap;
569 
570     // 1. clear counters
571     _qgramClearDir(dir, bucketMap, parallelTag);
572 
573     // 2. count q-grams
574     _qgramCountQGrams(dir, bucketMap, text, shape, getStepSize(index), parallelTag);
575 
576     // 3. cumulative sum (Step 4 is ommited)
577     if (_qgramDisableBuckets(index, parallelTag))
578     {
579         // with disabled buckets
580         // disabled buckets should not be marked in the partial sum
581         // don't shift entries as there is no _qgramFillSuffixArray call
582         _qgramCummulativeSum(dir, True(), False(), Unsigned<0>(), parallelTag); // no shift
583     }
584     else
585     {
586         // without disabled buckets
587         // don't shift entries as there is no _qgramFillSuffixArray call
588         _qgramCummulativeSum(dir, False(), False(), Unsigned<0>(), parallelTag);
589     }
590 }
591 
592 }
593 
594 #endif  // #ifndef INCLUDE_SEQAN_INDEX_INDEX_QGRAM_PARALLEL_H_
595