1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Rene Rahn <rene.rahn@fu-berlin.de>
33 // ==========================================================================
34
35 #ifndef INCLUDE_SEQAN_ALIGN_DP_ALIGN_SIMD_HELPER_H_
36 #define INCLUDE_SEQAN_ALIGN_DP_ALIGN_SIMD_HELPER_H_
37
38 namespace seqan
39 {
40
41 #if SEQAN_ALIGN_SIMD_PROFILE
42 struct AlignSimdProfile_
43 {
44 double preprTimer = 0.0;
45 double alignTimer = 0.0;
46 double traceTimer = 0.0;
47
clearAlignSimdProfile_48 void clear()
49 {
50 preprTimer = 0.0;
51 alignTimer = 0.0;
52 traceTimer = 0.0;
53 }
54 };
55
56 double timer = 0.0;
57
58 AlignSimdProfile_ profile;
59 #endif
60
61 // ============================================================================
62 // Forwards
63 // ============================================================================
64
65 template <unsigned LENGTH>
66 struct VectorLength_;
67
68 // ============================================================================
69 // Tags, Classes, Enums
70 // ============================================================================
71
72 template <typename TSimdVector_, typename TSeqH_, typename TSeqV_, typename TDPProfile_>
73 struct SimdAlignVariableLengthTraits
74 {
75 using TSimdVector = TSimdVector_;
76 using TSeqH = TSeqH_;
77 using TSeqV = TSeqV_;
78 using TDPProfile = TDPProfile_;
79 };
80
81 // ============================================================================
82 // Metafunctions
83 // ============================================================================
84
85 // ============================================================================
86 // Functions
87 // ============================================================================
88
89 // ----------------------------------------------------------------------------
90 // Function _createSimdRepImpl()
91 // ----------------------------------------------------------------------------
92
93 template <typename TSimdVecs,
94 typename TStrings,
95 size_t ...I>
96 inline void
_createSimdRepImpl(TSimdVecs & vecs,TStrings const & strs,std::index_sequence<I...> const &)97 _createSimdRepImpl(TSimdVecs & vecs,
98 TStrings const & strs,
99 std::index_sequence<I...> const & /*unsued*/)
100 {
101 for (size_t pos = 0; pos < length(vecs); ++pos)
102 fillVector(vecs[pos], strs[I][pos]...);
103 }
104
105 template <typename TSimdVecs,
106 typename TStrings>
107 inline void
_createSimdRepImpl(TSimdVecs & simdStr,TStrings const & strings)108 _createSimdRepImpl(TSimdVecs & simdStr,
109 TStrings const & strings)
110 {
111 using TSimdVec = typename Value<TSimdVecs>::Type;
112 constexpr auto length = LENGTH<TSimdVec>::VALUE;
113 _createSimdRepImpl(simdStr, strings, std::make_index_sequence<length>());
114 }
115
116 // Actually precompute value if scoring scheme is score matrix and simd version.
117 template <typename TSeqValue,
118 typename TScoreValue, typename TScore>
SEQAN_FUNC_ENABLE_IF(And<Is<SimdVectorConcept<TSeqValue>>,IsScoreMatrix_<TScore>>,TSeqValue)119 inline SEQAN_FUNC_ENABLE_IF(And<Is<SimdVectorConcept<TSeqValue> >, IsScoreMatrix_<TScore> >, TSeqValue)
120 _precomputeScoreMatrixOffset(TSeqValue const & seqVal,
121 Score<TScoreValue, ScoreSimdWrapper<TScore> > const & /*score*/)
122 {
123 return createVector<TSeqValue>(TScore::VALUE_SIZE) * seqVal;
124 }
125
126 // ----------------------------------------------------------------------------
127 // Function _prepareAndRunSimdAlignment()
128 // ----------------------------------------------------------------------------
129
130 template <typename TStringSimdH,
131 typename TStringSimdV,
132 typename TSequencesH,
133 typename TSequencesV>
134 inline void
_prepareSimdAlignment(TStringSimdH & stringSimdH,TStringSimdV & stringSimdV,TSequencesH const & seqH,TSequencesV const & seqV,DPScoutState_<SimdAlignEqualLength> const &)135 _prepareSimdAlignment(TStringSimdH & stringSimdH,
136 TStringSimdV & stringSimdV,
137 TSequencesH const & seqH,
138 TSequencesV const & seqV,
139 DPScoutState_<SimdAlignEqualLength> const & /*unused*/)
140 {
141 resize(stringSimdH, length(seqH[0]));
142 resize(stringSimdV, length(seqV[0]));
143 _createSimdRepImpl(stringSimdH, seqH);
144 _createSimdRepImpl(stringSimdV, seqV);
145 }
146
147 template <typename TStringSimdH,
148 typename TStringSimdV,
149 typename TSequencesH,
150 typename TSequencesV,
151 typename TTraits>
152 inline void
_prepareSimdAlignment(TStringSimdH & stringSimdH,TStringSimdV & stringSimdV,TSequencesH const & seqH,TSequencesV const & seqV,String<size_t> & lengthsH,String<size_t> & lengthsV,DPScoutState_<SimdAlignVariableLength<TTraits>> & state)153 _prepareSimdAlignment(TStringSimdH & stringSimdH,
154 TStringSimdV & stringSimdV,
155 TSequencesH const & seqH,
156 TSequencesV const & seqV,
157 String<size_t> & lengthsH,
158 String<size_t> & lengthsV,
159 DPScoutState_<SimdAlignVariableLength<TTraits> > & state)
160 {
161 SEQAN_ASSERT_EQ(length(seqH), length(seqV));
162 SEQAN_ASSERT_EQ(static_cast<decltype(length(seqH))>(LENGTH<typename Value<TStringSimdH>::Type>::VALUE), length(seqH));
163
164 using TSimdVector = typename TTraits::TSimdVector;
165 using TSimdValueType = typename Value<TSimdVector>::Type;
166
167 using TPadStringH = ModifiedString<typename Value<TSequencesH const>::Type, ModPadding>;
168 using TPadStringV = ModifiedString<typename Value<TSequencesV const>::Type, ModPadding>;
169
170 resize(lengthsH, length(seqH), Exact{});
171 resize(lengthsV, length(seqV), Exact{});
172
173 for (unsigned i = 0; i < length(seqH); ++i)
174 {
175 lengthsH[i] = length(seqH[i]);
176 lengthsV[i] = length(seqV[i]);
177 }
178
179 // Sort and remove unique elements from length vectors.
180 auto maxLengthLambda = [](auto& lengthLhs, auto& lengthRhs) { return lengthLhs < lengthRhs; };
181 std::sort(begin(lengthsH, Standard{}), end(lengthsH, Standard{}), maxLengthLambda);
182 std::sort(begin(lengthsV, Standard{}), end(lengthsV, Standard{}), maxLengthLambda);
183
184 erase(lengthsH,
185 std::unique(begin(lengthsH, Standard{}), end(lengthsH, Standard{})) - begin(lengthsH, Standard{}),
186 length(lengthsH));
187 erase(lengthsV,
188 std::unique(begin(lengthsV, Standard{}), end(lengthsV, Standard{})) - begin(lengthsV, Standard{}),
189 length(lengthsV));
190
191 // Initialize iterator to the lengths vectors.
192 state.nextEndsH = begin(lengthsH, Rooted{});
193 state.nextEndsV = begin(lengthsV, Rooted{});
194
195 size_t maxH = back(lengthsH);
196 size_t maxV = back(lengthsV);
197
198 // Create Stringset with padded strings.
199 StringSet<TPadStringH> paddedH;
200 StringSet<TPadStringV> paddedV;
201 resize(paddedH, length(seqH));
202 resize(paddedV, length(seqV));
203
204 for(unsigned i = 0; i < length(seqH); ++i)
205 {
206 setHost(paddedH[i], seqH[i]);
207 setHost(paddedV[i], seqV[i]);
208 expand(paddedH[i], maxH);
209 expand(paddedV[i], maxV);
210
211 // Store the end points as vector in both dimensions.
212 assignValue(state.endPosVecH, i, static_cast<TSimdValueType>(length(seqH[i])));
213 assignValue(state.endPosVecV, i, static_cast<TSimdValueType>(length(seqV[i])));
214 }
215
216 // now create SIMD representation
217 resize(stringSimdH, maxH);
218 resize(stringSimdV, maxV);
219 _createSimdRepImpl(stringSimdH, paddedH);
220 _createSimdRepImpl(stringSimdV, paddedV);
221 }
222
223 template <typename TResult,
224 typename TTraces,
225 typename TSequencesH,
226 typename TSequencesV,
227 typename TScore,
228 typename TAlgo, typename TBand, typename TFreeEndGaps, typename TTraceback,
229 typename TGapModel>
230 inline void
_prepareAndRunSimdAlignment(TResult & results,TTraces & traces,TSequencesH const & seqH,TSequencesV const & seqV,TScore const & scoringScheme,AlignConfig2<TAlgo,TBand,TFreeEndGaps,TTraceback> const & alignConfig,TGapModel const &)231 _prepareAndRunSimdAlignment(TResult & results,
232 TTraces & traces,
233 TSequencesH const & seqH,
234 TSequencesV const & seqV,
235 TScore const & scoringScheme,
236 AlignConfig2<TAlgo, TBand, TFreeEndGaps, TTraceback> const & alignConfig,
237 TGapModel const & /*gapModel*/)
238 {
239 auto seqLengthH = length(seqH[0]);
240 auto seqLengthV = length(seqV[0]);
241 auto zipView = makeZipView(seqH, seqV);
242 bool allSameLength = std::all_of(begin(zipView, Standard()), end(zipView, Standard()),
243 [seqLengthH, seqLengthV](auto param)
244 {
245 return (length(std::get<0>(param)) == seqLengthH) &&
246 (length(std::get<1>(param)) == seqLengthV);
247 });
248
249 String<TResult, Alloc<OverAligned> > stringSimdH;
250 String<TResult, Alloc<OverAligned> > stringSimdV;
251 if(allSameLength)
252 {
253 DPScoutState_<SimdAlignEqualLength> state;
254 _prepareSimdAlignment(stringSimdH, stringSimdV, seqH, seqV, state);
255 results = _setUpAndRunAlignment(traces, state, stringSimdH, stringSimdV, scoringScheme, alignConfig, TGapModel());
256 }
257 else
258 {
259 using TDPProfile = typename SetupAlignmentProfile_<TAlgo, TFreeEndGaps, TGapModel, TTraceback>::Type;
260
261 DPScoutState_<SimdAlignVariableLength<SimdAlignVariableLengthTraits<TResult,
262 TSequencesH,
263 TSequencesV,
264 TDPProfile>>> state;
265 String<size_t> lengthsH;
266 String<size_t> lengthsV;
267 _prepareSimdAlignment(stringSimdH, stringSimdV, seqH, seqV, lengthsH, lengthsV, state);
268
269 results = _setUpAndRunAlignment(traces, state, stringSimdH, stringSimdV, scoringScheme, alignConfig, TGapModel());
270 }
271 }
272
273 // ----------------------------------------------------------------------------
274 // Function _alignWrapperSimd(); Score; StringSet vs. StringSet
275 // ----------------------------------------------------------------------------
276
277 template <typename TSetH,
278 typename TSetV,
279 typename TScoreValue, typename TScoreSpec,
280 typename TAlignConfig,
281 typename TGapModel,
282 std::enable_if_t<And<And<Is<ContainerConcept<TSetH>>,
283 Is<ContainerConcept<typename Value<TSetH>::Type>>>,
284 And<Is<ContainerConcept<TSetV>>,
285 Is<ContainerConcept<typename Value<TSetV>::Type>>>
286 >::VALUE,
287 int> = 0>
288 inline auto
_alignWrapperSimd(TSetH const & stringsH,TSetV const & stringsV,Score<TScoreValue,TScoreSpec> const & scoringScheme,TAlignConfig const & config,TGapModel const &)289 _alignWrapperSimd(TSetH const & stringsH,
290 TSetV const & stringsV,
291 Score<TScoreValue, TScoreSpec> const & scoringScheme,
292 TAlignConfig const & config,
293 TGapModel const & /*gaps*/)
294 {
295 typedef typename SimdVector<TScoreValue>::Type TSimdAlign;
296
297 unsigned const numAlignments = length(stringsV);
298 unsigned const sizeBatch = LENGTH<TSimdAlign>::VALUE;
299 unsigned const fullSize = sizeBatch * ((numAlignments + sizeBatch - 1) / sizeBatch);
300
301 String<TScoreValue> results;
302 resize(results, numAlignments);
303
304 StringSet<String<Nothing> > trace; // We need to declare it, but it will not be used.
305
306 // Create a SIMD scoring scheme.
307 Score<TSimdAlign, ScoreSimdWrapper<Score<TScoreValue, TScoreSpec> > > simdScoringScheme(scoringScheme);
308
309 for (auto pos = 0u; pos < fullSize; pos += sizeBatch)
310 {
311 TSimdAlign resultsBatch;
312 if (SEQAN_UNLIKELY(numAlignments < pos + sizeBatch))
313 {
314 StringSet<std::remove_const_t<typename Value<TSetH>::Type>, Dependent<> > depSetH;
315 StringSet<std::remove_const_t<typename Value<TSetV>::Type>, Dependent<> > depSetV;
316 for (unsigned i = pos; i < fullSize; ++i)
317 {
318 if (i >= numAlignments)
319 {
320 appendValue(depSetH, back(stringsH));
321 appendValue(depSetV, back(stringsV));
322 }
323 else
324 {
325 appendValue(depSetH, stringsH[i]);
326 appendValue(depSetV, stringsV[i]);
327 }
328 }
329 SEQAN_ASSERT_EQ(length(depSetH), sizeBatch);
330 SEQAN_ASSERT_EQ(length(depSetV), sizeBatch);
331
332 _prepareAndRunSimdAlignment(resultsBatch, trace, depSetH, depSetV, simdScoringScheme, config, TGapModel());
333 }
334 else
335 {
336 auto infSetH = infixWithLength(stringsH, pos, sizeBatch);
337 auto infSetV = infixWithLength(stringsV, pos, sizeBatch);
338
339 _prepareAndRunSimdAlignment(resultsBatch, trace, infSetH, infSetV, simdScoringScheme, config, TGapModel());
340 }
341
342 // TODO(rrahn): Could be parallelized!
343 for(auto x = pos; x < pos + sizeBatch && x < numAlignments; ++x)
344 results[x] = resultsBatch[x - pos];
345 }
346 return results;
347 }
348
349 // ----------------------------------------------------------------------------
350 // Function _alignWrapperSimd(); Score; String vs. StringSet
351 // ----------------------------------------------------------------------------
352
353 template <typename TSeqH,
354 typename TSetV,
355 typename TScoreValue, typename TScoreSpec,
356 typename TAlignConfig,
357 typename TGapModel,
358 std::enable_if_t<And<And<Is<ContainerConcept<TSeqH>>,
359 Not<Is<ContainerConcept<typename Value<TSeqH>::Type>>>>,
360 And<Is<ContainerConcept<TSetV>>,
361 Is<ContainerConcept<typename Value<TSetV>::Type>>>
362 >::VALUE,
363 int> = 0>
364 inline auto
_alignWrapperSimd(TSeqH const & stringH,TSetV const & stringsV,Score<TScoreValue,TScoreSpec> const & scoringScheme,TAlignConfig const & config,TGapModel const &)365 _alignWrapperSimd(TSeqH const & stringH,
366 TSetV const & stringsV,
367 Score<TScoreValue, TScoreSpec> const & scoringScheme,
368 TAlignConfig const & config,
369 TGapModel const & /*gaps*/)
370 {
371 typedef typename SimdVector<TScoreValue>::Type TSimdAlign;
372
373 unsigned const numAlignments = length(stringsV);
374 unsigned const sizeBatch = LENGTH<TSimdAlign>::VALUE;
375 unsigned const fullSize = sizeBatch * ((numAlignments + sizeBatch - 1) / sizeBatch);
376
377 String<TScoreValue> results;
378 resize(results, numAlignments);
379
380 // Prepare strings.
381 StringSet<TSeqH, Dependent<> > setH;
382 for (auto i = 0u; i < sizeBatch; ++i)
383 appendValue(setH, stringH);
384
385 StringSet<String<Nothing> > trace; // We need to declare it, but it will not be used.
386
387 // Create a SIMD scoring scheme.
388 Score<TSimdAlign, ScoreSimdWrapper<Score<TScoreValue, TScoreSpec> > > simdScoringScheme(scoringScheme);
389
390 for (auto pos = 0u; pos < fullSize; pos += sizeBatch)
391 {
392 TSimdAlign resultsBatch;
393 if (SEQAN_UNLIKELY(numAlignments < pos + sizeBatch))
394 {
395 StringSet<std::remove_const_t<typename Value<TSetV>::Type>, Dependent<> > depSetV;
396 for (unsigned i = pos; i < fullSize; ++i)
397 {
398 if (i >= numAlignments)
399 appendValue(depSetV, back(stringsV));
400 else
401 appendValue(depSetV, stringsV[i]);
402 }
403 SEQAN_ASSERT_EQ(length(depSetV), sizeBatch);
404
405 _prepareAndRunSimdAlignment(resultsBatch, trace, setH, depSetV, simdScoringScheme, config, TGapModel());
406 }
407 else
408 {
409 auto infSetV = infixWithLength(stringsV, pos, sizeBatch);
410 _prepareAndRunSimdAlignment(resultsBatch, trace, setH, infSetV, simdScoringScheme, config, TGapModel());
411 }
412 // TODO(rrahn): Could be parallelized!
413 for(auto x = pos; x < pos + sizeBatch && x < numAlignments; ++x)
414 results[x] = resultsBatch[x - pos];
415 }
416 return results;
417 }
418
419 // ----------------------------------------------------------------------------
420 // Function _alignWrapperSimd(); Gaps
421 // ----------------------------------------------------------------------------
422
423 template <typename TSetH,
424 typename TSetV,
425 typename TScoreValue, typename TScoreSpec,
426 typename TAlignConfig,
427 typename TGapModel,
428 std::enable_if_t<And<And<Is<ContainerConcept<TSetH>>,
429 Is<AlignedSequenceConcept<typename Value<TSetH>::Type>>>,
430 And<Is<ContainerConcept<TSetV>>,
431 Is<AlignedSequenceConcept<typename Value<TSetV>::Type>>>
432 >::VALUE,
433 int> = 0>
434 inline auto
_alignWrapperSimd(TSetH & gapSeqSetH,TSetV & gapSeqSetV,Score<TScoreValue,TScoreSpec> const & scoringScheme,TAlignConfig const & config,TGapModel const &)435 _alignWrapperSimd(TSetH & gapSeqSetH,
436 TSetV & gapSeqSetV,
437 Score<TScoreValue, TScoreSpec> const & scoringScheme,
438 TAlignConfig const & config,
439 TGapModel const & /*gaps*/)
440 {
441 typedef typename Value<TSetH>::Type TGapSequenceH;
442 typedef typename Value<TSetV>::Type TGapSequenceV;
443 typedef typename Size<TGapSequenceH>::Type TSize;
444 typedef typename Position<TGapSequenceH>::Type TPosition;
445 typedef TraceSegment_<TPosition, TSize> TTraceSegment;
446
447 typedef typename SimdVector<TScoreValue>::Type TSimdAlign;
448
449 #if SEQAN_ALIGN_SIMD_PROFILE
450 timer = sysTime();
451 #endif
452
453 unsigned const numAlignments = length(gapSeqSetH);
454 unsigned const sizeBatch = LENGTH<TSimdAlign>::VALUE;
455 unsigned const fullSize = sizeBatch * ((numAlignments + sizeBatch - 1) / sizeBatch);
456
457 String<TScoreValue> results;
458 resize(results, numAlignments);
459
460 // Create a SIMD scoring scheme.
461 Score<TSimdAlign, ScoreSimdWrapper<Score<TScoreValue, TScoreSpec> > > simdScoringScheme(scoringScheme);
462
463 // Prepare string sets with sequences.
464 StringSet<std::remove_const_t<typename Source<TGapSequenceH>::Type>, Dependent<> > depSetH;
465 StringSet<std::remove_const_t<typename Source<TGapSequenceV>::Type>, Dependent<> > depSetV;
466 reserve(depSetH, fullSize);
467 reserve(depSetV, fullSize);
468 for (unsigned i = 0; i < fullSize; ++i)
469 {
470 if (i >= numAlignments)
471 {
472 appendValue(depSetH, source(back(gapSeqSetH)));
473 appendValue(depSetV, source(back(gapSeqSetV)));
474 }
475 else
476 {
477 appendValue(depSetH, source(gapSeqSetH[i]));
478 appendValue(depSetV, source(gapSeqSetV[i]));
479 }
480 }
481
482 // Run alignments in batches.
483 for (auto pos = 0u; pos < fullSize; pos += sizeBatch)
484 {
485 auto infSetH = infixWithLength(depSetH, pos, sizeBatch);
486 auto infSetV = infixWithLength(depSetV, pos, sizeBatch);
487
488 TSimdAlign resultsBatch;
489
490 StringSet<String<TTraceSegment> > trace;
491 resize(trace, sizeBatch, Exact());
492
493 _prepareAndRunSimdAlignment(resultsBatch, trace, infSetH, infSetV, simdScoringScheme, config, TGapModel());
494
495 // copy results and finish traceback
496 // TODO(rrahn): Could be parallelized!
497 // to for_each call
498 for(auto x = pos; x < pos + sizeBatch && x < numAlignments; ++x)
499 {
500 results[x] = resultsBatch[x - pos];
501 _adaptTraceSegmentsTo(gapSeqSetH[x], gapSeqSetV[x], trace[x - pos]);
502 }
503 #if SEQAN_ALIGN_SIMD_PROFILE
504 profile.traceTimer += sysTime() - timer;
505 timer = sysTime();
506 #endif
507 }
508 return results;
509 }
510
511 } // namespace seqan
512 #endif // #ifndef INCLUDE_SEQAN_ALIGN_DP_ALIGN_SIMD_HELPER_H_
513