1 // ==========================================================================
2 // index_qgram_parallel.h
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34
35 #ifndef INCLUDE_SEQAN_INDEX_INDEX_QGRAM_PARALLEL_H_
36 #define INCLUDE_SEQAN_INDEX_INDEX_QGRAM_PARALLEL_H_
37
38 namespace seqan {
39
40 // ============================================================================
41 // Forwards
42 // ============================================================================
43
44 // ============================================================================
45 // Tags, Classes, Enums
46 // ============================================================================
47
48 // TODO(weese): should go to basic/metaprogramming...
49 template <bool VALUE_>
50 struct Bool: public Eval<VALUE_> {};
51
52 template <unsigned VALUE_>
53 struct Unsigned
54 {
55 enum { VALUE = VALUE_ };
56 };
57
58 template <int VALUE_>
59 struct Integer
60 {
61 enum { VALUE = VALUE_ };
62 };
63
64
65 // ============================================================================
66 // Metafunctions
67 // ============================================================================
68
69 // ============================================================================
70 // Functions
71 // ============================================================================
72
73 template < typename TIndex, typename TParallelTag >
_qgramDisableBuckets(TIndex & index,Tag<TParallelTag>)74 inline bool _qgramDisableBuckets(TIndex &index, Tag<TParallelTag>)
75 {
76 // use the serial version (if no parallel overload is available)
77 return _qgramDisableBuckets(index);
78 }
79
80 //////////////////////////////////////////////////////////////////////////////
81 // Counting sort - Step 3: Cumulative sum
82 //
83 // a disabled bucket 3,2,x,4 (x = disabled) y_i=sum_{j<=i} x_j
84 // results in 0,0,3,x,5 (_qgramCummulativeSum) z_i=y_{i-2}
85 // or in 0,3,5,5,9 (_qgramCummulativeSumAlt) z_i=y_{i-1}
86
87 // 3 2 1 | x x 5 |
88 // 0 0 3 | 5 x x | 6 11
89 // 0 3 5 6 6 6 11
90
91 // 3 2 1 | 3* 2 5 |
92 // 3 5 6 | 9 11 16 |
93 // 0 3 5 | 6 9 11 | 16 SHIFT 0
94 // 0 0 3 | 5 6* 9 | 11 16 SHIFT 1
95
96
97 // First two entries are 0.
98 // Step 4 increments the entries hash(qgram)+1 on-the-fly while filling the SA table.
99 // After step 4 each entry (0..n-1) is the beginning of a qgram bucket.
100
101 template < typename TSequence, typename TParallelTag >
102 inline typename Value<TSequence>::Type
_sumIgnoreDisabled(TSequence const & seq,Tag<TParallelTag>)103 _sumIgnoreDisabled(TSequence const &seq, Tag<TParallelTag>)
104 {
105 typedef typename Value<TSequence>::Type TValue;
106 typename Iterator<TSequence const>::Type it = begin(seq, Standard());
107 typename Iterator<TSequence const>::Type itEnd = end(seq, Standard());
108 TValue sum = 0;
109 for (; it != itEnd; ++it)
110 if (*it != (TValue)-1)
111 sum += *it;
112 return sum;
113 }
114
115 //////////////////////////////////////////////////////////////////////////////
116 // Counting sort - Step 2: Count q-grams
117 template < typename TDir, typename TBucketMap, typename TText, typename TShape, typename TStepSize, typename TParallelTag >
118 inline void
_qgramCountQGrams(TDir & dir,TBucketMap & bucketMap,TText const & text,TShape shape,TStepSize stepSize,Tag<TParallelTag> parallelTag)119 _qgramCountQGrams(TDir &dir, TBucketMap &bucketMap, TText const &text, TShape shape, TStepSize stepSize, Tag<TParallelTag> parallelTag)
120 {
121 typedef typename Iterator<TText const, Standard>::Type TIterator;
122 typedef typename Iterator<TDir, Standard>::Type TDirIterator;
123 typedef typename Value<TDir>::Type TSize;
124
125 if (empty(shape) || length(text) < length(shape))
126 return;
127
128 TSize num_qgrams = (length(text) - length(shape)) / stepSize + 1;
129 TDirIterator dirBegin = begin(dir, Standard());
130 Splitter<TSize> splitter(0, num_qgrams, parallelTag);
131
132 if (stepSize == 1)
133 {
134 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
135 for (int job = 0; job < (int)length(splitter); ++job)
136 {
137 TIterator itText = begin(text, Standard()) + splitter[job];
138 TIterator itTextEnd = begin(text, Standard()) + splitter[job + 1];
139
140 atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
141 for (++itText; itText != itTextEnd; ++itText)
142 atomicInc(*(dirBegin + requestBucket(bucketMap, hashNext(shape, itText), parallelTag)), parallelTag);
143 }
144 }
145 else
146 {
147 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
148 for (int job = 0; job < (int)length(splitter); ++job)
149 {
150 TIterator itText = begin(text, Standard()) + (splitter[job] * stepSize);
151 TIterator itTextEnd = begin(text, Standard()) + (splitter[job + 1] * stepSize);
152
153 for (; itText != itTextEnd; itText += stepSize)
154 atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
155 }
156 }
157 }
158
159 template < typename TDir, typename TBucketMap, typename TString, typename TSpec, typename TShape, typename TStepSize, typename TParallelTag >
160 inline void
_qgramCountQGrams(TDir & dir,TBucketMap & bucketMap,StringSet<TString,TSpec> const & stringSet,TShape shape,TStepSize stepSize,Tag<TParallelTag> parallelTag)161 _qgramCountQGrams(TDir &dir, TBucketMap &bucketMap, StringSet<TString, TSpec> const &stringSet, TShape shape, TStepSize stepSize, Tag<TParallelTag> parallelTag)
162 {
163 typedef typename Iterator<TString const, Standard>::Type TIterator;
164 typedef typename Iterator<TDir, Standard>::Type TDirIterator;
165 typedef typename Value<TDir>::Type TSize;
166
167 if (empty(shape) || empty(stringSet))
168 return;
169
170 TDirIterator dirBegin = begin(dir, Standard());
171 Splitter<TSize> seqSplitter(0, countSequences(stringSet), parallelTag);
172
173 if (stepSize == 1)
174 {
175 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
176 for (int job = 0; job < (int)length(seqSplitter); ++job)
177 {
178 for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
179 {
180 TString const &sequence = value(stringSet, seqNo);
181 if (length(sequence) < length(shape)) continue;
182
183 TIterator itText = begin(stringSet[seqNo], Standard());
184 TIterator itTextEnd = itText + (length(sequence) - length(shape) + 1);
185 atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
186 for (++itText; itText != itTextEnd; ++itText)
187 atomicInc(*(dirBegin + requestBucket(bucketMap, hashNext(shape, itText), parallelTag)), parallelTag);
188 }
189 }
190 }
191 else
192 {
193 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
194 for (int job = 0; job < (int)length(seqSplitter); ++job)
195 {
196 for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
197 {
198 TString const &sequence = value(stringSet, seqNo);
199 if (length(sequence) < length(shape)) continue;
200
201 TIterator itText = begin(stringSet[seqNo], Standard());
202 TIterator itTextEnd = itText + ((length(sequence) - length(shape)) / stepSize + 1) * stepSize;
203 for (; itText != itTextEnd; itText += stepSize)
204 atomicInc(*(dirBegin + requestBucket(bucketMap, hash(shape, itText), parallelTag)), parallelTag);
205 }
206 }
207 }
208 }
209
210 template < typename TDir, typename TWithConstraints, typename TKeepDisabledBuckets, unsigned SHIFT, typename TParallelTag >
211 inline typename Value<TDir>::Type
_qgramCummulativeSum(TDir & dir,TWithConstraints,TKeepDisabledBuckets,Unsigned<SHIFT>,Tag<TParallelTag> parallelTag)212 _qgramCummulativeSum(TDir &dir, TWithConstraints, TKeepDisabledBuckets, Unsigned<SHIFT>, Tag<TParallelTag> parallelTag)
213 {
214 typedef typename Value<TDir>::Type TValue;
215 typedef typename Size<TDir>::Type TSize;
216 typedef String<TValue> TBuffer;
217 typedef typename Iterator<TDir const, Standard>::Type TConstIterator;
218 typedef typename Iterator<TDir, Standard>::Type TIterator;
219 typedef typename Iterator<TBuffer, Standard>::Type TConstBufferIterator;
220
221 if (empty(dir))
222 return 0;
223
224 Splitter<TSize> splitter(0, length(dir), parallelTag);
225 String<TValue> localSums;
226 TBuffer prevCounts;
227 resize(localSums, length(splitter), Exact());
228 resize(prevCounts, length(splitter) * SHIFT, 0, Exact());
229 localSums[0] = 0;
230
231 // STEP 1: compute sums of all subintervals (in parallel)
232 //
233 SEQAN_OMP_PRAGMA(parallel for)
234 for (int job = 0; job < (int)length(splitter); ++job)
235 {
236 unsigned len = _min(SHIFT, splitter[job]);
237 replace(prevCounts, (job + 1) * SHIFT - len, (job + 1) * SHIFT, infix(dir, splitter[job] - len, splitter[job]));
238
239 typename Infix<TDir>::Type dirInfix = infix(
240 dir,
241 _max((int64_t)0, (int64_t)splitter[job] - (int64_t)SHIFT),
242 _max((int64_t)0, (int64_t)splitter[job + 1] - (int64_t)SHIFT));
243
244 if (TWithConstraints::VALUE)
245 localSums[job] = _sumIgnoreDisabled(dirInfix, Serial());
246 else
247 localSums[job] = sum(dirInfix, Serial());
248 }
249
250 // STEP 2: compute partial sums (of subinterval sums) from position 0 to the end of each subinterval
251 //
252 for (int job = 1; job < (int)length(splitter); ++job)
253 localSums[job] += localSums[job - 1];
254
255 // STEP 3: compute partial sums of each subinterval starting from offset (in parallel)
256 //
257 SEQAN_OMP_PRAGMA(parallel for)
258 for (int job = 0; job < (int)length(splitter); ++job)
259 {
260 TConstIterator itBegin = begin(dir, Standard()) + splitter[job];
261 TIterator dstIt = begin(dir, Standard()) + splitter[job + 1];
262 TValue sum = localSums[job];
263
264 // read over our subinterval
265 {
266 TConstIterator it = begin(dir, Standard()) + splitter[job + 1] - SHIFT;
267 while (it != itBegin)
268 {
269 TValue counter = *(--it);
270 if (!TWithConstraints::VALUE || counter != (TValue)-1)
271 sum -= counter;
272 else
273 if (TKeepDisabledBuckets::VALUE)
274 {
275 *(--dstIt) = (TValue)-1;
276 continue;
277 }
278 *(--dstIt) = sum;
279 }
280 }
281
282 // read suffix of the previous subinterval
283 if (SHIFT != 0u)
284 {
285 TConstBufferIterator it = begin(prevCounts, Standard()) + (job + 1) * SHIFT;
286 while (dstIt != itBegin)
287 {
288 TValue counter = *(--it);
289 if (!TWithConstraints::VALUE || counter != (TValue)-1)
290 sum -= counter;
291 else
292 if (TKeepDisabledBuckets::VALUE)
293 {
294 *(--dstIt) = (TValue)-1;
295 continue;
296 }
297 *(--dstIt) = sum;
298 }
299 }
300 }
301
302 return back(localSums);
303 }
304
305 //////////////////////////////////////////////////////////////////////////////
306 // Counting sort - Step 4: Fill suffix array
307 // w/o constraints
308 template <
309 typename TSA,
310 typename TText,
311 typename TShape,
312 typename TDir,
313 typename TBucketMap,
314 typename TWithConstraints,
315 typename TStepSize,
316 typename TParallelTag >
317 inline void
_qgramFillSuffixArray(TSA & sa,TText const & text,TShape shape,TDir & dir,TBucketMap & bucketMap,TStepSize stepSize,TWithConstraints const,Tag<TParallelTag> parallelTag)318 _qgramFillSuffixArray(
319 TSA &sa,
320 TText const &text,
321 TShape shape,
322 TDir &dir,
323 TBucketMap &bucketMap,
324 TStepSize stepSize,
325 TWithConstraints const,
326 Tag<TParallelTag> parallelTag)
327 {
328 typedef typename Iterator<TText const, Standard>::Type TIterator;
329 typedef typename Iterator<TDir, Standard>::Type TDirIterator;
330 typedef typename Value<TDir>::Type TSize;
331
332 if (empty(shape) || length(text) < length(shape))
333 return;
334
335 TSize num_qgrams = (length(text) - length(shape)) / stepSize + 1;
336 TDirIterator dirBegin = begin(dir, Standard());
337 TDirIterator dirBegin1 = dirBegin + 1;
338 Splitter<TSize> splitter(0, num_qgrams, parallelTag);
339
340 if (stepSize == 1)
341 {
342 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
343 for (int job = 0; job < (int)length(splitter); ++job)
344 {
345 TSize pos = splitter[job];
346 TSize posEnd = splitter[job + 1];
347 if (pos == posEnd) continue;
348 TIterator itText = begin(text, Standard()) + pos;
349
350 // first hash
351 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
352 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1) // ignore disabled buckets
353 sa[atomicPostInc(*bktPtr, parallelTag)] = pos;
354
355 for (++pos; pos != posEnd; ++pos)
356 {
357 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hashNext(shape, ++itText));
358 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1) // ignore disabled buckets
359 sa[atomicPostInc(*bktPtr, parallelTag)] = pos;
360 }
361 }
362 }
363 else
364 {
365 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
366 for (int job = 0; job < (int)length(splitter); ++job)
367 {
368 TSize pos = splitter[job] * stepSize;
369 TSize posEnd = splitter[job + 1] * stepSize;
370 TIterator itText = begin(text, Standard()) + pos;
371
372 for (; pos != posEnd; pos += stepSize, itText += stepSize)
373 {
374 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
375 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1) // ignore disabled buckets
376 sa[atomicPostInc(*bktPtr, parallelTag)] = pos;
377 }
378 }
379 }
380 }
381
382 template <
383 typename TSA,
384 typename TString,
385 typename TSpec,
386 typename TShape,
387 typename TDir,
388 typename TBucketMap,
389 typename TWithConstraints,
390 typename TStepSize,
391 typename TParallelTag >
392 inline void
_qgramFillSuffixArray(TSA & sa,StringSet<TString,TSpec> const & stringSet,TShape shape,TDir & dir,TBucketMap & bucketMap,TStepSize stepSize,TWithConstraints const,Tag<TParallelTag> parallelTag)393 _qgramFillSuffixArray(
394 TSA &sa,
395 StringSet<TString, TSpec> const &stringSet,
396 TShape shape,
397 TDir &dir,
398 TBucketMap &bucketMap,
399 TStepSize stepSize,
400 TWithConstraints const,
401 Tag<TParallelTag> parallelTag)
402 {
403 typedef typename Iterator<TString const, Standard>::Type TIterator;
404 typedef typename Iterator<TDir, Standard>::Type TDirIterator;
405 typedef typename Value<TDir>::Type TSize;
406
407 if (empty(shape) || empty(stringSet))
408 return;
409
410 TDirIterator dirBegin = begin(dir, Standard());
411 TDirIterator dirBegin1 = dirBegin + 1;
412 Splitter<TSize> seqSplitter(0, countSequences(stringSet), parallelTag);
413
414 if (stepSize == 1)
415 {
416 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
417 for (int job = 0; job < (int)length(seqSplitter); ++job)
418 {
419 for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
420 {
421 TString const &sequence = value(stringSet, seqNo);
422 if (length(sequence) < length(shape)) continue;
423
424 TIterator itText = begin(stringSet[seqNo], Standard());
425 TIterator itTextEnd = itText + (length(sequence) - length(shape) + 1);
426
427 typename Value<TSA>::Type localPos;
428 assignValueI1(localPos, seqNo);
429 assignValueI2(localPos, 0);
430
431 // first hash
432 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
433 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1) // ignore disabled buckets
434 sa[atomicPostInc(*bktPtr, parallelTag)] = localPos;
435
436 for (++itText; itText != itTextEnd; ++itText)
437 {
438 posInc(localPos);
439 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hashNext(shape, itText));
440 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1) // ignore disabled buckets
441 sa[atomicPostInc(*bktPtr, parallelTag)] = localPos;
442 }
443 }
444 }
445 }
446 else
447 {
448 SEQAN_OMP_PRAGMA(parallel for firstprivate(shape))
449 for (int job = 0; job < (int)length(seqSplitter); ++job)
450 {
451 for(unsigned seqNo = seqSplitter[job]; seqNo < seqSplitter[job + 1]; ++seqNo)
452 {
453 TString const &sequence = value(stringSet, seqNo);
454 if (length(sequence) < length(shape)) continue;
455
456 TIterator itText = begin(stringSet[seqNo], Standard());
457 TIterator itTextEnd = itText + ((length(sequence) - length(shape)) / stepSize + 1) * stepSize;
458
459 typename Value<TSA>::Type localPos;
460 assignValueI1(localPos, seqNo);
461 assignValueI2(localPos, 0);
462
463 for (; itText != itTextEnd; ++itText)
464 {
465 posInc(localPos, stepSize);
466 TDirIterator const bktPtr = dirBegin1 + getBucket(bucketMap, hash(shape, itText));
467 if (!TWithConstraints::VALUE || *bktPtr != (TSize)-1) // ignore disabled buckets
468 sa[atomicPostInc(*bktPtr, parallelTag)] = localPos;
469 }
470 }
471 }
472 }
473 }
474
475 //////////////////////////////////////////////////////////////////////////////
476 // Step 5: Correct disabled buckets
477 template < typename TDir, typename TParallelTag >
478 inline void
_qgramPostprocessBuckets(TDir & dir,Tag<TParallelTag> parallelTag)479 _qgramPostprocessBuckets(TDir &dir, Tag<TParallelTag> parallelTag)
480 {
481 typedef typename Iterator<TDir, Standard>::Type TDirIterator;
482 typedef typename Size<TDir>::Type TSize;
483 typedef typename Value<TDir>::Type TValue;
484
485 Splitter<TDirIterator> splitter(begin(dir, Standard()), end(dir, Standard()), parallelTag);
486 String<TValue> last;
487 resize(last, length(splitter), Exact());
488
489 SEQAN_OMP_PRAGMA(parallel for)
490 for (int job = 0; job < (int)length(splitter); ++job)
491 {
492 TDirIterator it = splitter[job];
493 TDirIterator itEnd = splitter[job + 1];
494 TSize prev = (job == 0)? 0: (TSize)-1;
495 for (; it != itEnd; ++it)
496 if (*it == (TSize)-1)
497 *it = prev;
498 else
499 prev = *it;
500 last[job] = prev;
501 }
502
503 for (int job = 1; job < (int)length(splitter); ++job)
504 if (last[job] == (TSize)-1)
505 last[job] = last[job - 1];
506
507 SEQAN_OMP_PRAGMA(parallel for)
508 for (int job = 1; job < (int)length(splitter); ++job)
509 {
510 TDirIterator it = splitter[job];
511 TDirIterator itEnd = splitter[job + 1];
512 TSize prev = last[job - 1];
513 for (; it != itEnd && *it == (TSize)-1; ++it)
514 *it = prev;
515 }
516 }
517
518 template < typename TIndex, typename TParallelTag >
createQGramIndex(TIndex & index,Tag<TParallelTag> parallelTag)519 void createQGramIndex(TIndex &index, Tag<TParallelTag> parallelTag)
520 {
521 typename Fibre<TIndex, QGramText>::Type const &text = indexText(index);
522 typename Fibre<TIndex, QGramSA>::Type &sa = indexSA(index);
523 typename Fibre<TIndex, QGramDir>::Type &dir = indexDir(index);
524 typename Fibre<TIndex, QGramShape>::Type &shape = indexShape(index);
525 typename Fibre<TIndex, QGramBucketMap>::Type &bucketMap = index.bucketMap;
526
527 // 1. clear counters
528 _qgramClearDir(dir, bucketMap, parallelTag);
529
530 // 2. count q-grams
531 _qgramCountQGrams(dir, bucketMap, text, shape, getStepSize(index), parallelTag);
532
533 if (_qgramDisableBuckets(index, parallelTag))
534 {
535 // 3. cumulative sum
536
537 // with disabled buckets
538 // disabled buckets should still be marked in the partial sum
539 // shift all entries by one towards the end (will be corrected by _qgramFillSuffixArray)
540 _qgramCummulativeSum(dir, True(), True(), Unsigned<1>(), parallelTag);
541
542 // 4. fill suffix array
543 _qgramFillSuffixArray(sa, text, shape, dir, bucketMap, getStepSize(index), True(), parallelTag);
544
545 // 5. correct disabled buckets
546 _qgramPostprocessBuckets(dir, parallelTag);
547 }
548 else
549 {
550 // 3. cumulative sum
551
552 // without disabled buckets
553 // shift all entries by one towards the end (will be corrected by _qgramFillSuffixArray)
554 _qgramCummulativeSum(dir, False(), False(), Unsigned<1>(), parallelTag);
555
556 // 4. fill suffix array
557 _qgramFillSuffixArray(sa, text, shape, dir, bucketMap, getStepSize(index), False(), parallelTag);
558 }
559 }
560
561 template < typename TIndex, typename TParallelTag >
createQGramIndexDirOnly(TIndex & index,Tag<TParallelTag> parallelTag)562 void createQGramIndexDirOnly(TIndex &index, Tag<TParallelTag> parallelTag)
563 {
564 typename Fibre<TIndex, QGramText>::Type const &text = indexText(index);
565 typename Fibre<TIndex, QGramSA>::Type &sa = indexSA(index);
566 typename Fibre<TIndex, QGramDir>::Type &dir = indexDir(index);
567 typename Fibre<TIndex, QGramShape>::Type &shape = indexShape(index);
568 typename Fibre<TIndex, QGramBucketMap>::Type &bucketMap = index.bucketMap;
569
570 // 1. clear counters
571 _qgramClearDir(dir, bucketMap, parallelTag);
572
573 // 2. count q-grams
574 _qgramCountQGrams(dir, bucketMap, text, shape, getStepSize(index), parallelTag);
575
576 // 3. cumulative sum (Step 4 is ommited)
577 if (_qgramDisableBuckets(index, parallelTag))
578 {
579 // with disabled buckets
580 // disabled buckets should not be marked in the partial sum
581 // don't shift entries as there is no _qgramFillSuffixArray call
582 _qgramCummulativeSum(dir, True(), False(), Unsigned<0>(), parallelTag); // no shift
583 }
584 else
585 {
586 // without disabled buckets
587 // don't shift entries as there is no _qgramFillSuffixArray call
588 _qgramCummulativeSum(dir, False(), False(), Unsigned<0>(), parallelTag);
589 }
590 }
591
592 }
593
594 #endif // #ifndef INCLUDE_SEQAN_INDEX_INDEX_QGRAM_PARALLEL_H_
595