1 #include <algorithm>
2 #include <cassert>
3 #include <climits>
4 #include <cstdio>
5 #include <limits>
6 #include <vector>
7 
8 #define SIMDE_ENABLE_NATIVE_ALIASES
9 #include <simde_avx2.h> // AVX2 and lower
10 
11 #include "opal.h"
12 
13 
14 // I define aliases for SSE intrinsics, so they can be used in code not depending on SSE generation.
15 // If available, AVX2 is used because it has two times bigger register, thus everything is two times faster.
16 // #ifdef __AVX2__
17 #if 1     //A.D.: now SIMDe will be taking care of the SIMD
18 
19 const int SIMD_REG_SIZE = 256; //!< number of bits in register
20 typedef __m256i __mxxxi; //!< represents register containing integers
21 #define _mmxxx_load_si  _mm256_load_si256
22 #define _mmxxx_store_si _mm256_store_si256
23 #define _mmxxx_and_si   _mm256_and_si256
24 #define _mmxxx_testz_si _mm256_testz_si256
25 
26 #define _mmxxx_adds_epi8  _mm256_adds_epi8
27 #define _mmxxx_subs_epi8  _mm256_subs_epi8
28 #define _mmxxx_min_epu8   _mm256_min_epu8
29 #define _mmxxx_min_epi8   _mm256_min_epi8
30 #define _mmxxx_max_epu8   _mm256_max_epu8
31 #define _mmxxx_max_epi8   _mm256_max_epi8
32 #define _mmxxx_set1_epi8  _mm256_set1_epi8
33 #define _mmxxx_cmpgt_epi8 _mm256_cmpgt_epi8
34 
35 #define _mmxxx_adds_epi16  _mm256_adds_epi16
36 #define _mmxxx_subs_epi16  _mm256_subs_epi16
37 #define _mmxxx_min_epi16   _mm256_min_epi16
38 #define _mmxxx_max_epi16   _mm256_max_epi16
39 #define _mmxxx_set1_epi16  _mm256_set1_epi16
40 #define _mmxxx_cmpgt_epi16 _mm256_cmpgt_epi16
41 
42 #define _mmxxx_add_epi32   _mm256_add_epi32
43 #define _mmxxx_sub_epi32   _mm256_sub_epi32
44 #define _mmxxx_min_epi32   _mm256_min_epi32
45 #define _mmxxx_max_epi32   _mm256_max_epi32
46 #define _mmxxx_set1_epi32  _mm256_set1_epi32
47 #define _mmxxx_cmpgt_epi32 _mm256_cmpgt_epi32
48 
49 #else // SSE4.1
50 
51 const int SIMD_REG_SIZE = 128;
52 typedef __m128i __mxxxi;
53 #define _mmxxx_load_si  _mm_load_si128
54 #define _mmxxx_store_si _mm_store_si128
55 #define _mmxxx_and_si   _mm_and_si128
56 #define _mmxxx_testz_si _mm_testz_si128
57 
58 #define _mmxxx_adds_epi8  _mm_adds_epi8
59 #define _mmxxx_subs_epi8  _mm_subs_epi8
60 #define _mmxxx_min_epu8   _mm_min_epu8
61 #define _mmxxx_min_epi8   _mm_min_epi8
62 #define _mmxxx_max_epu8   _mm_max_epu8
63 #define _mmxxx_max_epi8   _mm_max_epi8
64 #define _mmxxx_set1_epi8  _mm_set1_epi8
65 #define _mmxxx_cmpgt_epi8 _mm_cmpgt_epi8
66 
67 #define _mmxxx_adds_epi16  _mm_adds_epi16
68 #define _mmxxx_subs_epi16  _mm_subs_epi16
69 #define _mmxxx_min_epi16   _mm_min_epi16
70 #define _mmxxx_max_epi16   _mm_max_epi16
71 #define _mmxxx_set1_epi16  _mm_set1_epi16
72 #define _mmxxx_cmpgt_epi16 _mm_cmpgt_epi16
73 
74 #define _mmxxx_add_epi32   _mm_add_epi32
75 #define _mmxxx_sub_epi32   _mm_sub_epi32
76 #define _mmxxx_min_epi32   _mm_min_epi32
77 #define _mmxxx_max_epi32   _mm_max_epi32
78 #define _mmxxx_set1_epi32  _mm_set1_epi32
79 #define _mmxxx_cmpgt_epi32 _mm_cmpgt_epi32
80 
81 #endif
82 
83 
84 //------------------------------------ SIMD PARAMETERS ---------------------------------//
simdIsAllZeroes(const __mxxxi & a)85 static inline int simdIsAllZeroes(const __mxxxi& a) {
86     return _mmxxx_testz_si(a, a);
87 }
88 
89 /**
90  * Contains parameters and SIMD instructions specific for certain score type.
91  */
92 
93 template<typename T> struct SimdSW {};
94 
95 template<>
96 struct SimdSW<char> {
97     typedef char type; //!< Type that will be used for score
98     static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(char)); //!< Number of sequences that can be done in parallel.
99     static const bool satArthm = true; //!< True if saturation arithmetic is used, false otherwise.
100     static const bool negRange = true; //!< True if it uses negative range for score representation, goes with saturation
addSimdSW101     static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi8(a, b); }
subSimdSW102     static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi8(a, b); }
minSimdSW103     static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epu8(a, b); }
maxSimdSW104     static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epu8(a, b); }
cmpgtSimdSW105     static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi8(a, b); }
set1SimdSW106     static inline __mxxxi set1(int a) { return _mmxxx_set1_epi8(a); }
107 };
108 
109 template<>
110 struct SimdSW<short> {
111     typedef short type;
112     static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(short));
113     static const bool satArthm = true;
114     static const bool negRange = false;
addSimdSW115     static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi16(a, b); }
subSimdSW116     static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi16(a, b); }
minSimdSW117     static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi16(a, b); }
maxSimdSW118     static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi16(a, b); }
cmpgtSimdSW119     static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi16(a, b); }
set1SimdSW120     static inline __mxxxi set1(int a) { return _mmxxx_set1_epi16(a); }
121 };
122 
123 template<>
124 struct SimdSW<int> {
125     typedef int type;
126     static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(int));
127     static const bool satArthm = false;
128     static const bool negRange = false;
addSimdSW129     static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_add_epi32(a, b); }
subSimdSW130     static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_sub_epi32(a, b); }
minSimdSW131     static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi32(a, b); }
maxSimdSW132     static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi32(a, b); }
cmpgtSimdSW133     static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi32(a, b); }
set1SimdSW134     static inline __mxxxi set1(int a) { return _mmxxx_set1_epi32(a); }
135 };
136 //--------------------------------------------------------------------------------------//
137 
138 
139 static bool loadNextSequence(int &nextDbSeqIdx, int dbLength, int &currDbSeqIdx, unsigned char * &currDbSeqPos,
140                              int &currDbSeqLength, unsigned char ** db, int dbSeqLengths[], bool calculated[],
141                              int &numEndedDbSeqs);
142 
143 
144 // For debugging
145 template<class SIMD>
print_mmxxxi(__mxxxi mm)146 void print_mmxxxi(__mxxxi mm) {
147     typename SIMD::type unpacked[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
148     _mmxxx_store_si((__mxxxi*)unpacked, mm);
149     for (int i = 0; i < SIMD::numSeqs; i++)
150         printf("%d ", unpacked[i]);
151 }
152 
153 // This structure represents cell in dynamic programming matrix, but only with E and H values.
154 struct CellEH {
155     __mxxxi H;
156     __mxxxi E;
157 };
158 
159 /**
160  * @param stopOnOverflow  If true, function will stop when first overflow happens.
161  *            If false, function will not stop but continue with next sequence.
162  *
163  */
164 template<class SIMD>
searchDatabaseSW_(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool calculated[],int overflowMethod)165 static int searchDatabaseSW_(unsigned char query[], int queryLength,
166                              unsigned char** db, int dbLength, int dbSeqLengths[],
167                              int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
168                              OpalSearchResult* results[], const int searchType, bool calculated[],
169                              int overflowMethod) {
170 
171     const typename SIMD::type LOWER_BOUND = std::numeric_limits<typename SIMD::type>::min();
172     const typename SIMD::type UPPER_BOUND = std::numeric_limits<typename SIMD::type>::max();
173 
174     bool overflowOccured = false;  // True if overflow was detected at least once.
175 
176     // ----------------------- CHECK ARGUMENTS -------------------------- //
177     // Check if Q, R or scoreMatrix have values too big for used score type
178     if (gapOpen < LOWER_BOUND || UPPER_BOUND < gapOpen || gapExt < LOWER_BOUND || UPPER_BOUND < gapExt) {
179         return 1;
180     }
181     if (!SIMD::satArthm) {
182         // These extra limits are enforced so overflow could be detected more efficiently
183         if (gapOpen <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapOpen || gapExt <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapExt) {
184             return OPAL_ERR_OVERFLOW;
185         }
186     }
187 
188     for (int r = 0; r < alphabetLength; r++)
189         for (int c = 0; c < alphabetLength; c++) {
190             int score = scoreMatrix[r * alphabetLength + c];
191             if (score < LOWER_BOUND || UPPER_BOUND < score) {
192                 return OPAL_ERR_OVERFLOW;
193             }
194             if (!SIMD::satArthm) {
195                 if (score <= LOWER_BOUND/2 || UPPER_BOUND/2 <= score)
196                     return OPAL_ERR_OVERFLOW;
197             }
198         }
199     // ------------------------------------------------------------------ //
200 
201 
202     // ------------------------ INITIALIZATION -------------------------- //
203     __mxxxi zeroes = SIMD::set1(0);
204     __mxxxi scoreZeroes; // 0 normally, but lower bound if using negative range
205     if (SIMD::negRange) {
206         scoreZeroes = SIMD::set1(LOWER_BOUND);
207     } else {
208         scoreZeroes = zeroes;
209     }
210 
211     int numEndedDbSeqs = 0; // Number of sequences that ended
212     int nextDbSeqIdx = 0;  // Index in db of next sequence that we will try to load.
213     // Index in db. -1 for null sequence (null sequence == no sequence).
214     int currDbSeqsIdxs[SIMD::numSeqs];
215     // Pointer to current element for each current database sequence. 0 for null sequence.
216     unsigned char* currDbSeqsPos[SIMD::numSeqs];
217     // Lengths of remaining parts of sequences. -1 for null sequence.
218     int currDbSeqsLengths[SIMD::numSeqs];
219 
220     // Needed in order to find the most early result, which is a nice condition to have.
221     int currDbSeqsBestScore[SIMD::numSeqs];
222     // Row index of best score for each current database sequence.
223     int currDbSeqsBestScoreRow[SIMD::numSeqs];
224     // Column index of best score for each current database sequence.
225     int currDbSeqsBestScoreColumn[SIMD::numSeqs];
226 
227     // Profile query -> here we store preprocessed score data needed in core loop.
228     // It is recalculated for each column.
229     __mxxxi P[alphabetLength];
230 
231     // Load initial sequences
232     for (int i = 0; i < SIMD::numSeqs; i++) {
233         currDbSeqsBestScore[i] = LOWER_BOUND;
234         loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
235                          currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs);
236     }
237 
238     // Q is gap open penalty, R is gap ext penalty.
239     __mxxxi Q = SIMD::set1(gapOpen);
240     __mxxxi R = SIMD::set1(gapExt);
241 
242     int rowsWithImprovement[queryLength];  // Indexes of rows where one of sequences improved score.
243 
244     // Previous Hs, previous Es, previous F, all signed short.
245     CellEH prevColumn[queryLength];  // Stores results of previous column in matrix.
246     // Initialize all values to 0
247     for (int i = 0; i < queryLength; i++) {
248         prevColumn[i].H = prevColumn[i].E = scoreZeroes;
249     }
250 
251     __mxxxi maxH = scoreZeroes;  // Best score in sequence
252     // ------------------------------------------------------------------ //
253 
254 
255     // For each column
256     while (numEndedDbSeqs < dbLength) {
257         // -------------------- CALCULATE QUERY PROFILE ------------------------- //
258         // TODO: Rognes uses pshufb here, I don't know how/why?
259         typename SIMD::type profileRow[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
260         for (unsigned char letter = 0; letter < alphabetLength; letter++) {
261             int* scoreMatrixRow = scoreMatrix + letter*alphabetLength;
262             for (int i = 0; i < SIMD::numSeqs; i++) {
263                 unsigned char* dbSeqPos = currDbSeqsPos[i];
264                 if (dbSeqPos != 0)
265                     profileRow[i] = (typename SIMD::type)scoreMatrixRow[*dbSeqPos];
266             }
267             P[letter] = _mmxxx_load_si((__mxxxi const*)profileRow);
268         }
269         // ---------------------------------------------------------------------- //
270 
271         // Previous cells: u - up, l - left, ul - up left
272         __mxxxi uF, uH, ulH;
273         uF = uH = ulH = scoreZeroes; // F[-1, c] = H[-1, c] = H[-1, c-1] = 0
274 
275         __mxxxi ofTest = scoreZeroes; // Used for detecting the overflow when not using saturated ar
276 
277         int rowsWithImprovementLength = 0;
278 
279         // ----------------------- CORE LOOP (ONE COLUMN) ----------------------- //
280         for (int r = 0; r < queryLength; r++) { // For each cell in column
281             // Calculate E = max(lH-Q, lE-R)
282             __mxxxi E = SIMD::max(SIMD::sub(prevColumn[r].H, Q), SIMD::sub(prevColumn[r].E, R));
283 
284             // Calculate F = max(uH-Q, uF-R)
285             __mxxxi F = SIMD::max(SIMD::sub(uH, Q), SIMD::sub(uF, R));
286 
287             // Calculate H
288             __mxxxi H = SIMD::max(F, E);
289             if (!SIMD::negRange) {
290                 // If not using negative range, then H could be negative at this moment so we need this
291                 H = SIMD::max(H, zeroes);
292             }
293             __mxxxi ulH_P = SIMD::add(ulH, P[query[r]]);
294             // If using negative range: if ulH_P >= 0 then we have overflow
295 
296             H = SIMD::max(H, ulH_P);
297             // If using negative range: H will always be negative, even if ulH_P overflowed
298 
299             // Save data needed for overflow detection. Not more then one condition will fire
300             if (SIMD::negRange)
301                 ofTest = _mmxxx_and_si(ofTest, ulH_P);
302             if (!SIMD::satArthm)
303                 ofTest = SIMD::min(ofTest, ulH_P);
304 
305             // If we need end location, remember row with best score.
306             if (searchType != OPAL_SEARCH_SCORE) {
307                 // We remember rows that had max scores, in order to find out the row of best score.
308                 rowsWithImprovement[rowsWithImprovementLength] = r;
309                 // TODO(martin): simdIsAllZeroes seems to bring significant slowdown, but I could
310                 // not find way to avoid it.
311                 rowsWithImprovementLength += 1 - simdIsAllZeroes(SIMD::cmpgt(H, maxH));
312             }
313 
314             maxH = SIMD::max(maxH, H); // update best score
315 
316             // Set uF, uH, ulH
317             uF = F;
318             uH = H;
319             ulH = prevColumn[r].H;
320 
321             // Remeber values so they can be used in next column.
322             prevColumn[r].E = E;
323             prevColumn[r].H = H;
324 
325             // For saturated: score is biased everywhere, but just score: E, F, H
326             // Also, all scores except ulH_P certainly have value < 0
327         }
328         // ---------------------------------------------------------------------- //
329 
330         for (int seqIdx = 0; seqIdx < SIMD::numSeqs; seqIdx++) {
331             currDbSeqsLengths[seqIdx] -= currDbSeqsLengths[seqIdx] > 0;
332         }
333 
334         typename SIMD::type unpackedMaxH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
335         _mmxxx_store_si((__mxxxi*)unpackedMaxH, maxH);
336 
337         // ------------------------ OVERFLOW DETECTION -------------------------- //
338         bool overflowed[SIMD::numSeqs];
339         if (!SIMD::satArthm) {
340             // This check is based on following assumptions:
341             //  - overflow wraps
342             //  - Q, R and all scores from scoreMatrix are between LOWER_BOUND/2 and UPPER_BOUND/2 exclusive
343             typename SIMD::type unpackedOfTest[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
344             _mmxxx_store_si((__mxxxi*)unpackedOfTest, ofTest);
345             for (int i = 0; i < SIMD::numSeqs; i++) {
346                 overflowed[i] = currDbSeqsPos[i] != 0 && unpackedOfTest[i] <= LOWER_BOUND / 2;
347                 if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
348                     // In buckets method, we stop calculation when overflow is detected.
349                     return OPAL_ERR_OVERFLOW;
350                 }
351             }
352         } else {
353             if (SIMD::negRange) {
354                 // Since I use saturation, I check if minUlH_P was non negative
355                 typename SIMD::type unpackedOfTest[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
356                 _mmxxx_store_si((__mxxxi*)unpackedOfTest, ofTest);
357                 for (int i = 0; i < SIMD::numSeqs; i++) {
358                     overflowed[i] = currDbSeqsPos[i] != 0 && unpackedOfTest[i] >= 0;
359                     if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
360                         return OPAL_ERR_OVERFLOW;
361                     }
362                 }
363             } else {
364                 // I check if upper bound is reached
365                 for (int i = 0; i < SIMD::numSeqs; i++) {
366                     overflowed[i] = currDbSeqsPos[i] != 0 && unpackedMaxH[i] == UPPER_BOUND;
367                     if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
368                         return OPAL_ERR_OVERFLOW;
369                     }
370                 }
371             }
372         }
373         // ---------------------------------------------------------------------- //
374 
375         bool overflowDetected = false;  // True if overflow was detected in last column.
376         bool sequenceEnded = false;  // True if a sequence ended in last column.
377         for (int seqIdx = 0; seqIdx < SIMD::numSeqs; seqIdx++) {
378             overflowDetected = overflowDetected || overflowed[seqIdx];
379             sequenceEnded = sequenceEnded || (currDbSeqsLengths[seqIdx] == 0);
380         }
381         overflowOccured = overflowOccured || overflowDetected;
382 
383 
384         // --------- Update end location of alignment ----------- //
385         if (searchType != OPAL_SEARCH_SCORE) {
386             for (int i = 0; i < rowsWithImprovementLength; i++) {
387                 int r = rowsWithImprovement[i];
388                 typename SIMD::type unpackedH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
389                 _mmxxx_store_si((__mxxxi*)unpackedH, prevColumn[r].H);
390                 for (int j = 0; j < SIMD::numSeqs; j++) {
391                     if (currDbSeqsPos[j] != 0 && !overflowed[j]) {  // If not null sequence or overflowedresult->endLocationQuery =
392                         if (unpackedH[j] > currDbSeqsBestScore[j]) {
393                             currDbSeqsBestScore[j] = unpackedH[j];
394                             currDbSeqsBestScoreRow[j] = r;
395                             currDbSeqsBestScoreColumn[j] = dbSeqLengths[currDbSeqsIdxs[j]]
396                                 - currDbSeqsLengths[j] - 1;
397                         }
398                     }
399                 }
400             }
401         }
402         // ------------------------------------------------------ //
403 
404 
405         // --------------------- CHECK AND HANDLE SEQUENCE END ------------------ //
406         if (overflowDetected || sequenceEnded) { // If at least one sequence ended
407             typename SIMD::type resetMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
408 
409             for (int i = 0; i < SIMD::numSeqs; i++) {
410                 if (currDbSeqsPos[i] != 0) { // If not null sequence
411                     if (overflowed[i] || currDbSeqsLengths[i] == 0) { // If sequence ended
412                         numEndedDbSeqs++;
413                         if (!overflowed[i]) {
414                             // Save score and mark as calculated
415                             calculated[currDbSeqsIdxs[i]] = true;
416                             opalSearchResultSetScore(results[currDbSeqsIdxs[i]], unpackedMaxH[i]);
417                             if (SIMD::negRange) {
418                                 results[currDbSeqsIdxs[i]]->score -= LOWER_BOUND;
419                             }
420                             if (searchType != OPAL_SEARCH_SCORE) {
421                                 results[currDbSeqsIdxs[i]]->endLocationQuery = currDbSeqsBestScoreRow[i];
422                                 results[currDbSeqsIdxs[i]]->endLocationTarget = currDbSeqsBestScoreColumn[i];
423                             } else {
424                                 results[currDbSeqsIdxs[i]]->endLocationQuery = -1;
425                                 results[currDbSeqsIdxs[i]]->endLocationTarget = -1;
426                             }
427                         }
428                         currDbSeqsBestScore[i] = LOWER_BOUND;
429                         // Load next sequence
430                         loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
431                                          currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs);
432                         // If negative range, sets to LOWER_BOUND when used with saturated add and value < 0,
433                         // otherwise sets to zero when used with and.
434                         resetMask[i] = SIMD::negRange ? LOWER_BOUND : 0;
435                     } else {
436                         // Does not change anything when used with saturated add / and.
437                         resetMask[i] = SIMD::negRange ? 0 : -1;
438                         if (currDbSeqsPos[i] != 0)
439                             currDbSeqsPos[i]++; // If not new and not null, move for one element
440                     }
441                 }
442             }
443             // Reset previous columns (Es and Hs) and maxH
444             __mxxxi resetMaskPacked = _mmxxx_load_si((__mxxxi const*)resetMask);
445             if (SIMD::negRange) {
446                 for (int i = 0; i < queryLength; i++) {
447                     prevColumn[i].H = SIMD::add(prevColumn[i].H, resetMaskPacked);
448                     prevColumn[i].E = SIMD::add(prevColumn[i].E, resetMaskPacked);
449                 }
450                 maxH = SIMD::add(maxH, resetMaskPacked);
451             } else {
452                 for (int i = 0; i < queryLength; i++) {
453                     prevColumn[i].H = _mmxxx_and_si(prevColumn[i].H, resetMaskPacked);
454                     prevColumn[i].E = _mmxxx_and_si(prevColumn[i].E, resetMaskPacked);
455                 }
456                 maxH = _mmxxx_and_si(maxH, resetMaskPacked);
457             }
458         } else { // If no sequences ended
459             // Move for one element in all sequences
460             for (int i = 0; i < SIMD::numSeqs; i++)
461                 currDbSeqsPos[i] += (currDbSeqsPos[i] != 0);
462         }
463         // ---------------------------------------------------------------------- //
464     }
465 
466     if (overflowOccured) {
467         return OPAL_ERR_OVERFLOW;
468     }
469     return 0;
470 }
471 
loadNextSequence(int & nextDbSeqIdx,int dbLength,int & currDbSeqIdx,unsigned char * & currDbSeqPos,int & currDbSeqLength,unsigned char ** db,int dbSeqLengths[],bool calculated[],int & numEndedDbSeqs)472 static inline bool loadNextSequence(int &nextDbSeqIdx, int dbLength, int &currDbSeqIdx, unsigned char* &currDbSeqPos,
473                                     int &currDbSeqLength, unsigned char** db, int dbSeqLengths[], bool calculated[],
474                                     int &numEndedDbSeqs) {
475     while (nextDbSeqIdx < dbLength && calculated[nextDbSeqIdx]) {
476         nextDbSeqIdx++;
477         numEndedDbSeqs++;
478     }
479     if (nextDbSeqIdx < dbLength) { // If there is sequence to load
480         currDbSeqIdx = nextDbSeqIdx;
481         currDbSeqPos = db[nextDbSeqIdx];
482         currDbSeqLength = dbSeqLengths[nextDbSeqIdx];
483         nextDbSeqIdx++;
484         return true;
485     } else { // If there are no more sequences to load, load "null" sequence
486         currDbSeqIdx = currDbSeqLength = -1; // Set to -1 if there are no more sequences
487         currDbSeqPos = 0;
488         return false;
489     }
490 }
491 
492 /**
493  * @param If skip[i] is true, result for sequence #i will not be calculated.
494  *     If skip is NULL, all results will be calculated.
495  */
searchDatabaseSW(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool skip[],int overflowMethod)496 static int searchDatabaseSW(unsigned char query[], int queryLength,
497                             unsigned char** db, int dbLength, int dbSeqLengths[],
498                             int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
499                             OpalSearchResult* results[], const int searchType, bool skip[], int overflowMethod) {
500     int resultCode = 0;
501     // Do buckets only if using buckets overflow method.
502     const int chunkSize = overflowMethod == OPAL_OVERFLOW_BUCKETS ? 1024 : dbLength;
503     bool* calculated = new bool[chunkSize];
504     for (int startIdx = 0; startIdx < dbLength; startIdx += chunkSize) {
505         unsigned char** db_ = db + startIdx;
506         int* dbSeqLengths_ = dbSeqLengths + startIdx;
507         OpalSearchResult** results_ = results + startIdx;
508         int dbLength_ = startIdx + chunkSize >= dbLength ? dbLength - startIdx : chunkSize;
509         for (int i = 0; i < dbLength_; i++) {
510             calculated[i] = skip ? skip[i] : false;
511         }
512         resultCode = searchDatabaseSW_< SimdSW<char> >(
513             query, queryLength, db_, dbLength_, dbSeqLengths_,
514             gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
515             searchType, calculated, overflowMethod);
516         if (resultCode == OPAL_ERR_OVERFLOW) {
517             resultCode = searchDatabaseSW_< SimdSW<short> >(
518                 query, queryLength, db_, dbLength_, dbSeqLengths_,
519                 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
520                 searchType, calculated, overflowMethod);
521             if (resultCode == OPAL_ERR_OVERFLOW) {
522                 resultCode = searchDatabaseSW_< SimdSW<int> >(
523                     query, queryLength,
524                     db_, dbLength_, dbSeqLengths_,
525                     gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
526                     searchType, calculated, overflowMethod);
527                 if (resultCode != 0)
528                     break;
529             }
530         }
531     }
532 
533     delete[] calculated;
534     return resultCode;
535 }
536 
537 
538 
539 
540 
541 
542 
543 
544 
545 //------------------------------------ SIMD PARAMETERS ---------------------------------//
546 /**
547  * Contains parameters and SIMD instructions specific for certain score type.
548  */
549 template<typename T> class Simd {};
550 
551 template<>
552 struct Simd<char> {
553     typedef char type; //!< Type that will be used for score
554     static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(char)); //!< Number of sequences that can be done in parallel.
555     static const bool satArthm = true; //!< True if saturation arithmetic is used, false otherwise.
addSimd556     static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi8(a, b); }
subSimd557     static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi8(a, b); }
minSimd558     static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi8(a, b); }
maxSimd559     static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi8(a, b); }
cmpgtSimd560     static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi8(a, b); }
set1Simd561     static inline __mxxxi set1(int a) { return _mmxxx_set1_epi8(a); }
562 };
563 
564 template<>
565 struct Simd<short> {
566     typedef short type;
567     static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(short));
568     static const bool satArthm = true;
addSimd569     static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi16(a, b); }
subSimd570     static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi16(a, b); }
minSimd571     static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi16(a, b); }
maxSimd572     static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi16(a, b); }
cmpgtSimd573     static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi16(a, b); }
set1Simd574     static inline __mxxxi set1(int a) { return _mmxxx_set1_epi16(a); }
575 };
576 
577 template<>
578 struct Simd<int> {
579     typedef int type;
580     static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(int));
581     static const bool satArthm = false;
addSimd582     static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_add_epi32(a, b); }
subSimd583     static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_sub_epi32(a, b); }
minSimd584     static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi32(a, b); }
maxSimd585     static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi32(a, b); }
cmpgtSimd586     static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi32(a, b); }
set1Simd587     static inline __mxxxi set1(int a) { return _mmxxx_set1_epi32(a); }
588 };
589 //--------------------------------------------------------------------------------------//
590 
591 
592 
593 
594 template<class SIMD, int MODE>
searchDatabase_(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool calculated[],int overflowMethod)595 static int searchDatabase_(unsigned char query[], int queryLength,
596                            unsigned char** db, int dbLength, int dbSeqLengths[],
597                            int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
598                            OpalSearchResult* results[], const int searchType, bool calculated[], int overflowMethod) {
599 
600     static const typename SIMD::type LOWER_BOUND = std::numeric_limits<typename SIMD::type>::min();
601     static const typename SIMD::type UPPER_BOUND = std::numeric_limits<typename SIMD::type>::max();
602     // Used to represent -inf. Must be larger then lower bound to avoid overflow.
603     // TOOD(martin): Is it enough to use gapExt here? Should I use gapOpen instead?
604     static const typename SIMD::type LOWER_SCORE_BOUND = LOWER_BOUND + gapExt;
605 
606     bool overflowOccured = false;  // True if oveflow was detected at least once.
607 
608     // ----------------------- CHECK ARGUMENTS -------------------------- //
609     // Check if Q, R or scoreMatrix have values too big for used score type
610     if (gapOpen < LOWER_BOUND || UPPER_BOUND < gapOpen || gapExt < LOWER_BOUND || UPPER_BOUND < gapExt) {
611         return 1;
612     }
613     if (!SIMD::satArthm) {
614         // These extra limits are enforced so overflow could be detected more efficiently
615         if (gapOpen <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapOpen || gapExt <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapExt) {
616             return 1;
617         }
618     }
619 
620     for (int r = 0; r < alphabetLength; r++)
621         for (int c = 0; c < alphabetLength; c++) {
622             int score = scoreMatrix[r * alphabetLength + c];
623             if (score < LOWER_BOUND || UPPER_BOUND < score) {
624                 return 1;
625             }
626             if (!SIMD::satArthm) {
627                 if (score <= LOWER_BOUND/2 || UPPER_BOUND/2 <= score)
628                     return 1;
629             }
630         }
631 
632     // TODO: If not saturated arithmetic check if inital values of H (-gapOpen - i * gapExt) cause overflow
633     // ------------------------------------------------------------------ //
634 
635 
636     // ------------------------ INITIALIZATION -------------------------- //
637     const __mxxxi ZERO_SIMD = SIMD::set1(0);
638     const __mxxxi LOWER_BOUND_SIMD = SIMD::set1(LOWER_BOUND);
639     const __mxxxi LOWER_SCORE_BOUND_SIMD = SIMD::set1(LOWER_SCORE_BOUND);
640 
641     int nextDbSeqIdx = 0; // index in db
642     int currDbSeqsIdxs[SIMD::numSeqs]; // index in db
643     unsigned char* currDbSeqsPos[SIMD::numSeqs]; // current element for each current database sequence
644     int currDbSeqsLengths[SIMD::numSeqs];
645     bool justLoaded[SIMD::numSeqs] = {0}; // True if sequence was just loaded into channel
646     bool seqJustLoaded = false; // True if at least one sequence was just loaded into channel
647     int shortestDbSeqLength = -1;  // length of shortest sequence among current database sequences
648     int numEndedDbSeqs = 0; // Number of sequences that ended
649 
650     // Row index of best score for each current database sequence. Used for HW and OV.
651     int currDbSeqsBestScoreColumn[SIMD::numSeqs];
652 
653     // Load initial sequences
654     for (int i = 0; i < SIMD::numSeqs; i++)
655         if (loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
656                              currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs)) {
657             justLoaded[i] = seqJustLoaded = true;
658             // Update shortest sequence length if new sequence was loaded
659             if (shortestDbSeqLength == -1 || currDbSeqsLengths[i] < shortestDbSeqLength)
660                 shortestDbSeqLength = currDbSeqsLengths[i];
661         }
662 
663     // Q is gap open penalty, R is gap ext penalty.
664     const __mxxxi Q = SIMD::set1(gapOpen);
665     const __mxxxi R = SIMD::set1(gapExt);
666 
667     // Previous H column (array), previous E column (array), previous F, all signed short
668     __mxxxi prevHs[queryLength];
669     __mxxxi prevEs[queryLength];
670     // Initialize all values
671     for (int r = 0; r < queryLength; r++) {
672         if (MODE == OPAL_MODE_OV)
673             prevHs[r] = ZERO_SIMD;
674         else { // - Q - r * R
675             if (r == 0)
676                 prevHs[0] = SIMD::sub(ZERO_SIMD, Q);
677             else
678                 prevHs[r] = SIMD::sub(prevHs[r-1], R);
679         }
680 
681         prevEs[r] = LOWER_SCORE_BOUND_SIMD;
682     }
683 
684     // u - up, ul - up left
685     __mxxxi uH, ulH;
686     if (MODE == OPAL_MODE_NW) {
687         ulH = ZERO_SIMD;
688         uH = SIMD::sub(R, Q); // -Q + R
689     }
690 
691     __mxxxi maxLastRowH = LOWER_BOUND_SIMD; // Keeps track of maximum H in last row
692     // ------------------------------------------------------------------ //
693 
694 
695     int columnsSinceLastSeqEnd = 0;
696     // For each column
697     while (numEndedDbSeqs < dbLength) {
698         // -------------------- CALCULATE QUERY PROFILE ------------------------- //
699         // TODO: Rognes uses pshufb here, I don't know how/why?
700         __mxxxi P[alphabetLength];
701         typename SIMD::type profileRow[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
702         for (unsigned char letter = 0; letter < alphabetLength; letter++) {
703             int* scoreMatrixRow = scoreMatrix + letter*alphabetLength;
704             for (int i = 0; i < SIMD::numSeqs; i++) {
705                 unsigned char* dbSeqPos = currDbSeqsPos[i];
706                 if (dbSeqPos != 0)
707                     profileRow[i] = (typename SIMD::type)scoreMatrixRow[*dbSeqPos];
708             }
709             P[letter] = _mmxxx_load_si((__mxxxi const*)profileRow);
710         }
711         // ---------------------------------------------------------------------- //
712 
713         // u - up
714         __mxxxi uF = LOWER_SCORE_BOUND_SIMD;
715 
716         // Database sequence has fixed start and end only in NW
717         if (MODE == OPAL_MODE_NW) {
718             if (seqJustLoaded) {
719                 typename SIMD::type resetMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
720                 for (int i = 0; i < SIMD::numSeqs; i++)
721                     resetMask[i] = justLoaded[i] ?  0 : -1;
722                 const __mxxxi resetMaskPacked = _mmxxx_load_si((__mxxxi const*)resetMask);
723                 ulH = _mmxxx_and_si(uH, resetMaskPacked);
724             } else {
725                 ulH = uH;
726             }
727 
728             uH = SIMD::sub(uH, R); // uH is -Q - c*R
729             // NOTE: Setup of ulH and uH for first column is done when sequence is loaded.
730         } else {
731             uH = ulH = ZERO_SIMD;
732         }
733 
734         __mxxxi minE, minF;
735         minE = minF = SIMD::set1(UPPER_BOUND);
736         __mxxxi maxH = LOWER_BOUND_SIMD; // Max H in this column
737         __mxxxi H;
738 
739         __mxxxi firstRow_uH, firstRow_ulH; // Values of uH and ulH from first row of column
740 
741         if (MODE == OPAL_MODE_NW) {
742             firstRow_uH = uH;
743             firstRow_ulH = ulH;
744         }
745 
746         __mxxxi prevMaxLastRowH = maxLastRowH;
747         // ----------------------- CORE LOOP (ONE COLUMN) ----------------------- //
748         for (int r = 0; r < queryLength; r++) { // For each cell in column
749             // Calculate E = max(lH-Q, lE-R)
750             __mxxxi E = SIMD::max(SIMD::sub(prevHs[r], Q), SIMD::sub(prevEs[r], R)); // E could overflow
751 
752             // Calculate F = max(uH-Q, uF-R)
753             __mxxxi F = SIMD::max(SIMD::sub(uH, Q), SIMD::sub(uF, R)); // F could overflow
754             minF = SIMD::min(minF, F); // For overflow detection
755 
756             // Calculate H
757             H = SIMD::max(F, E);
758             __mxxxi ulH_P = SIMD::add(ulH, P[query[r]]);
759             H = SIMD::max(H, ulH_P); // H could overflow
760 
761             maxH = SIMD::max(maxH, H); // update best score in column
762 
763             // Set uF, uH, ulH
764             uF = F;
765             uH = H;
766             ulH = prevHs[r];
767 
768             // Update prevHs, prevEs in advance for next column
769             prevEs[r] = E;
770             prevHs[r] = H;
771         }
772         // ---------------------------------------------------------------------- //
773 
774         maxLastRowH = SIMD::max(maxLastRowH, H);
775 
776         if (MODE == OPAL_MODE_NW) {
777             uH = firstRow_uH;
778             ulH = firstRow_ulH;
779         }
780 
781         // Find minE, which should be checked with minE == LOWER_BOUND for overflow
782         for (int r = 0; r < queryLength; r++)
783             minE = SIMD::min(minE, prevEs[r]);
784 
785         columnsSinceLastSeqEnd++;
786 
787         typename SIMD::type unpackedMaxH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
788         _mmxxx_store_si((__mxxxi*)unpackedMaxH, maxH);
789 
790         // ------------------------ OVERFLOW DETECTION -------------------------- //
791         bool overflowDetected = false;  // True if overflow was detected for this column.
792         bool overflowed[SIMD::numSeqs];
793         if (!SIMD::satArthm) {
794             /*           // This check is based on following assumptions:
795             //  - overflow wraps
796             //  - Q, R and all scores from scoreMatrix are between LOWER_BOUND/2 and UPPER_BOUND/2 exclusive
797             typename SIMD::type* unpackedOfTest = (typename SIMD::type *)&ofTest;
798             for (int i = 0; i < SIMD::numSeqs; i++)
799                 if (currDbSeqsPos[i] != 0 && unpackedOfTest[i] <= LOWER_BOUND/2)
800                 return 1;*/
801         } else {
802             // There is overflow if minE == LOWER_BOUND or minF == LOWER_BOUND or maxH == UPPER_BOUND
803             __mxxxi minEF = SIMD::min(minE, minF);
804             typename SIMD::type unpackedMinEF[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
805             _mmxxx_store_si((__mxxxi*)unpackedMinEF, minEF);
806             for (int i = 0; i < SIMD::numSeqs; i++) {
807                 overflowed[i] = currDbSeqsPos[i] != 0 && (unpackedMinEF[i] == LOWER_BOUND
808                                                           || unpackedMaxH[i] == UPPER_BOUND);
809                 if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
810                     // In buckets method, we stop calculation when overflow is detected.
811                     return OPAL_ERR_OVERFLOW;
812                 }
813             }
814         }
815         for (int i = 0; i < SIMD::numSeqs; i++) {
816             overflowDetected = overflowDetected || overflowed[i];
817         }
818         overflowOccured = overflowOccured || overflowDetected;
819         // ---------------------------------------------------------------------- //
820 
821         // ------------------ Store end location of best score ------------------ //
822         if (searchType != OPAL_SEARCH_SCORE && (MODE == OPAL_MODE_HW || MODE == OPAL_MODE_OV)) {
823             // Determine the column of best score.
824             __mxxxi greater = SIMD::cmpgt(maxLastRowH, prevMaxLastRowH);
825             typename SIMD::type unpackedGreater[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
826             _mmxxx_store_si((__mxxxi*)unpackedGreater, greater);
827             for (int i = 0; i < SIMD::numSeqs; i++) {
828                 if (currDbSeqsPos[i] != 0 && !overflowed[i]) {  // If not null sequence or overflowed
829                     if (unpackedGreater[i] != 0) {
830                         currDbSeqsBestScoreColumn[i] = dbSeqLengths[currDbSeqsIdxs[i]] - currDbSeqsLengths[i]
831                             + columnsSinceLastSeqEnd - 1;
832                     }
833                 }
834             }
835         }
836         // ---------------------------------------------------------------------- //
837 
838         seqJustLoaded = false;
839         // --------------------- CHECK AND HANDLE SEQUENCE END ------------------ //
840         if (overflowDetected || shortestDbSeqLength == columnsSinceLastSeqEnd) { // If at least one sequence ended
841             shortestDbSeqLength = -1;
842 
843             // Calculate best scores
844             __mxxxi bestScore;
845             if (MODE == OPAL_MODE_OV)
846                 bestScore = SIMD::max(maxH, maxLastRowH); // Maximum of last row and column
847             if (MODE == OPAL_MODE_HW)
848                 bestScore = maxLastRowH;
849             if (MODE == OPAL_MODE_NW)
850                 bestScore = H;
851             typename SIMD::type unpackedBestScore[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
852             _mmxxx_store_si((__mxxxi*)unpackedBestScore, bestScore);
853 
854             for (int i = 0; i < SIMD::numSeqs; i++) {
855                 if (currDbSeqsPos[i] != 0) { // If not null sequence
856                     justLoaded[i] = false;
857                     currDbSeqsLengths[i] -= columnsSinceLastSeqEnd;
858 
859                     if (overflowed[i] || currDbSeqsLengths[i] == 0) { // If sequence ended
860                         numEndedDbSeqs++;
861                         if (!overflowed[i]) {
862                             // Store result.
863                             int dbSeqIdx = currDbSeqsIdxs[i];
864                             OpalSearchResult *result = results[dbSeqIdx];
865                             calculated[dbSeqIdx] = true;
866                             // Set score.
867                             opalSearchResultSetScore(result, unpackedBestScore[i]);
868                             // Set end location.
869                             if (searchType == OPAL_SEARCH_SCORE) {
870                                 result->endLocationQuery = -1;
871                                 result->endLocationTarget = -1;
872                             } else {
873                                 if (MODE == OPAL_MODE_NW) {
874                                     result->endLocationQuery = queryLength - 1;
875                                     result->endLocationTarget = dbSeqLengths[dbSeqIdx] - 1;
876                                 }
877                                 if (MODE == OPAL_MODE_HW) {
878                                     result->endLocationQuery = queryLength - 1;
879                                     result->endLocationTarget = currDbSeqsBestScoreColumn[i];
880                                 }
881                                 if (MODE == OPAL_MODE_OV) {
882                                     // This unpacking will repeat unnecessarily if there are multiple sequences
883                                     // ending at the same time, however that will happen in very rare occasions.
884                                     // TODO(martin): always unpack only once.
885                                     typename SIMD::type unpackedPrevMaxLastRowH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
886                                     _mmxxx_store_si((__mxxxi*)unpackedPrevMaxLastRowH, prevMaxLastRowH);
887                                     typename SIMD::type maxScore = unpackedPrevMaxLastRowH[i];
888 
889                                     // If last column contains best score, determine row.
890                                     if (unpackedMaxH[i] > maxScore) {
891                                         result->endLocationTarget = dbSeqLengths[dbSeqIdx] - 1;
892                                         for (int r = 0; r < queryLength; r++) {
893                                             typename SIMD::type unpackedPrevH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
894                                             _mmxxx_store_si((__mxxxi*)unpackedPrevH, prevHs[r]);
895                                             if (unpackedPrevH[i] > maxScore) {
896                                                 result->endLocationQuery = r;
897                                                 maxScore = unpackedPrevH[i];
898                                             }
899                                         }
900                                     } else {
901                                         result->endLocationTarget = currDbSeqsBestScoreColumn[i];
902                                         result->endLocationQuery = queryLength - 1;
903                                     }
904                                 }
905                             }
906                         }
907                         // Load next sequence
908                         if (loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
909                                              currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs)) {
910                             justLoaded[i] = seqJustLoaded = true;
911                         }
912                     } else {
913                         if (currDbSeqsPos[i] != 0)
914                             currDbSeqsPos[i]++; // If not new and not null, move for one element
915                     }
916                     // Update shortest sequence length if sequence is not null
917                     if (currDbSeqsPos[i] != 0 && (shortestDbSeqLength == -1 || currDbSeqsLengths[i] < shortestDbSeqLength))
918                         shortestDbSeqLength = currDbSeqsLengths[i];
919                 }
920             }
921             //------------ Reset prevEs, prevHs, maxLastRowH(, ulH and uH) ------------//
922             typename SIMD::type resetMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
923             typename SIMD::type setMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8))); // inverse of resetMask
924             for (int i = 0; i < SIMD::numSeqs; i++) {
925                 resetMask[i] = justLoaded[i] ?  0 : -1;
926                 setMask[i]   = justLoaded[i] ? -1 :  0;
927             }
928             const __mxxxi resetMaskPacked = _mmxxx_load_si((__mxxxi const*)resetMask);
929             const __mxxxi setMaskPacked = _mmxxx_load_si((__mxxxi const*)setMask);
930 
931             // Set prevEs ended channels to LOWER_SCORE_BOUND
932             const __mxxxi maskedLowerScoreBoundSimd = _mmxxx_and_si(setMaskPacked, LOWER_SCORE_BOUND_SIMD);
933             for (int r = 0; r < queryLength; r++) {
934                 prevEs[r] = _mmxxx_and_si(prevEs[r], resetMaskPacked);
935                 prevEs[r] = SIMD::add(prevEs[r], maskedLowerScoreBoundSimd);
936             }
937 
938             // Set prevHs
939             for (int r = 0; r < queryLength; r++) {
940                 prevHs[r] = _mmxxx_and_si(prevHs[r], resetMaskPacked);
941                 if (MODE != OPAL_MODE_OV) {
942                     if (r == 0) {
943                         prevHs[0] = SIMD::sub(prevHs[0], _mmxxx_and_si(setMaskPacked, Q));
944                     } else {
945                         prevHs[r] = SIMD::add(prevHs[r], _mmxxx_and_si(setMaskPacked, SIMD::sub(prevHs[r-1], R)));
946                     }
947                 }
948             }
949 
950             // Set ulH and uH if NW
951             if (MODE == OPAL_MODE_NW) {
952                 ulH = _mmxxx_and_si(ulH, resetMaskPacked); // to 0
953                 // Set uH channels to -Q + R
954                 uH = _mmxxx_and_si(uH, resetMaskPacked);
955                 uH = SIMD::add(uH, _mmxxx_and_si(setMaskPacked, SIMD::sub(R, Q)));
956             }
957 
958             // Set maxLastRow ended channels to LOWER_BOUND
959             maxLastRowH = _mmxxx_and_si(maxLastRowH, resetMaskPacked);
960             maxLastRowH = SIMD::add(maxLastRowH, _mmxxx_and_si(setMaskPacked, LOWER_BOUND_SIMD));
961             //-------------------------------------------------------//
962 
963             columnsSinceLastSeqEnd = 0;
964         } else { // If no sequences ended
965             // Move for one element in all sequences
966             for (int i = 0; i < SIMD::numSeqs; i++)
967                 if (currDbSeqsPos[i] != 0)
968                     currDbSeqsPos[i]++;
969         }
970         // ---------------------------------------------------------------------- //
971     }
972 
973     if (overflowOccured) {
974         return OPAL_ERR_OVERFLOW;
975     }
976     return 0;
977 }
978 
979 /**
980  * @param [in] skip  If skip[i] is true, result for sequence #i will not be calculated.
981  *     If skip is NULL, all results will be calculated.
982  */
983 template <int MODE>
searchDatabase(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool skip[],int overflowMethod)984 static int searchDatabase(unsigned char query[], int queryLength,
985                           unsigned char** db, int dbLength, int dbSeqLengths[],
986                           int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
987                           OpalSearchResult* results[], const int searchType, bool skip[], int overflowMethod) {
988     int resultCode = 0;
989     // Do buckets only if using buckets overflow method.
990     const int chunkSize = overflowMethod == OPAL_OVERFLOW_BUCKETS ? 1024 : dbLength;
991     bool* calculated = new bool[chunkSize];
992     for (int startIdx = 0; startIdx < dbLength; startIdx += chunkSize) {
993         unsigned char** db_ = db + startIdx;
994         int* dbSeqLengths_ = dbSeqLengths + startIdx;
995         OpalSearchResult** results_ = results + startIdx;
996         int dbLength_ = startIdx + chunkSize >= dbLength ? dbLength - startIdx : chunkSize;
997         for (int i = 0; i < dbLength_; i++) {
998             calculated[i] = skip ? skip[i] : false;
999         }
1000         resultCode = searchDatabase_< Simd<char>, MODE >
1001             (query, queryLength, db_, dbLength_, dbSeqLengths_,
1002              gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
1003              searchType, calculated, overflowMethod);
1004         if (resultCode == OPAL_ERR_OVERFLOW) {
1005             resultCode = searchDatabase_< Simd<short>, MODE >
1006                 (query, queryLength, db_, dbLength_, dbSeqLengths_,
1007                  gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
1008                  searchType, calculated, overflowMethod);
1009             if (resultCode == OPAL_ERR_OVERFLOW) {
1010                 resultCode = searchDatabase_< Simd<int>, MODE >
1011                     (query, queryLength, db_, dbLength_, dbSeqLengths_,
1012                      gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
1013                      searchType, calculated, overflowMethod);
1014                 if (resultCode != 0)
1015                     break; // TODO: this does not make much sense because of buckets, improve it.
1016             }
1017         }
1018     }
1019     delete[] calculated;
1020     return resultCode;
1021 }
1022 
1023 
1024 /**
1025  * @return Zero-based index of max element in array. If there are multiple elements with
1026  *     same value, last is returned.
1027  */
1028 template <typename T>
arrayMax(T array[],int length)1029 static inline T arrayMax(T array[], int length) {
1030     assert(length > 0);
1031     int maxElementIdx = 0;
1032     for (int i = 1; i < length; i++) {
1033         if (array[i] > array[maxElementIdx]) {
1034             maxElementIdx = i;
1035         }
1036     }
1037     return array[maxElementIdx];
1038 }
1039 
1040 /**
1041  * @param length  Length of gap, must be non-negative.
1042  * @param gapOpen  Penalty for gap opening (non-negative).
1043  * @param gapExt  Penalty for gap extension (non-negative).
1044  * @return Gap penalty, as non-negative number.
1045  */
gapPenalty(int length,int gapOpen,int gapExt)1046 static int gapPenalty(int length, int gapOpen, int gapExt) {
1047     if (length > 0) {
1048         return gapOpen + gapExt * (length - 1);
1049     } else {
1050         return 0;
1051     }
1052 }
1053 
1054 /**
1055  * Calculates bottom border of band for OV mode stop conditions.
1056  */
calculateBottomBandBorderOV(int k,int Q,int T,int Go,int Ge,int M)1057 static int calculateBottomBandBorderOV(int k, int Q, int T, int Go, int Ge, int M) {
1058     int border = 0;
1059 
1060     // for d <= Q - T:
1061     border = std::max(border, std::min(Q - T, -1 * (k + Go - Ge - M * T) / Ge));
1062 
1063     // for d > Q - T:
1064     int borderCandidate = -1 * (k - M * Q + Go - Ge) / (Ge + M);
1065     if (borderCandidate > Q - T) {
1066         border = std::max(border, borderCandidate);
1067     }
1068 
1069     return std::min(border, Q - 1);
1070 }
1071 
calculateTopBandBorderHW(int k,int Q,int T,int Go,int Ge,int M)1072 static int calculateTopBandBorderHW(int k, int Q, int T, int Go, int Ge, int M) {
1073     int border = 0;
1074 
1075     // for d <= T - Q;
1076     border = std::max(border, std::min(T - Q, -1 * (k - M * Q + Go) / Ge + 1));
1077 
1078     // for d > T - Q:
1079     int borderCandidate = -1 * (k - T * M + 2 * Go + Ge * (Q - T - 2)) / (2 * Ge + M);
1080     if (borderCandidate > T - Q) {
1081         border = std::max(border, borderCandidate);
1082     }
1083 
1084     return std::min(border, T - 1);
1085 }
1086 
calculateBottomBandBorderHW(int k,int Q,int T,int Go,int Ge,int M)1087 static int calculateBottomBandBorderHW(int k, int Q, int T, int Go, int Ge, int M) {
1088     int border = 0;
1089 
1090     // for d >= Q - T:
1091     int borderCandidate = -1 * (k + Go - Ge - Q * M) / (Ge + M);
1092     if (borderCandidate >= Q - T) {
1093         border = std::max(border, borderCandidate);
1094     }
1095 
1096     // for d < Q - T:
1097     if (-2 * Go - Ge * (Q - T - 2) + M * T >= k) {
1098         border = std::max(border, Q - T - 1);
1099     }
1100 
1101     return std::min(border, Q - 1);
1102 }
1103 
calculateBottomBandBorderNW(int k,int Q,int T,int Go,int Ge,int M)1104 static int calculateBottomBandBorderNW(int k, int Q, int T, int Go, int Ge, int M) {
1105     int border = 0;
1106 
1107     // for d > Q - T:
1108     int borderCandidate = -1 * (k + 2 * Go - M * Q + Ge * (T - Q - 2)) / (2 * Ge + M);
1109     if (borderCandidate > Q - T) {
1110         border = std::max(border, borderCandidate);
1111     }
1112 
1113     // for d = Q - T:
1114     if (Q - T <= -1 * (k + Go - M * T - Ge) / Ge) {
1115         border = std::max(border, Q - T);
1116     }
1117 
1118     // for d < Q - T:
1119     if (-2 * Go - Ge * (Q - T - 2) + M * T >= k) {
1120         border = std::max(border, Q - T - 1);
1121     }
1122 
1123     return std::min(border, Q - 1);
1124 }
1125 
1126 /**
1127  * Calculates top and bottom diagonal of band that contains all cells that could be part of any
1128  * solution that gives score not smaller than k. This means that if we are interested only in solutions
1129  * that give score that is not smaller than k, it is enough to calculate only cells inside that band.
1130  * Always starts from top left corner (like NW does) no matter which mode is specified,
1131  * and stops with regard to stop conditions of specified mode.
1132  * @param k  We are interested only in scores thar are not smaller than k.
1133  * @param mode  Alignment mode, it is used for stop conditions.
1134  * @param Q  queryLength
1135  * @param T  targetLength
1136  * @param Go  gapOpen -> non negative penalty for opening of gap.
1137  * @param Ge  gapExt -> non negative penalty for extension of gap.
1138  * @param M  Max score from score matrix.
1139  * @return Pair where first is index of bottom diagonal(border), and second is index of top diagonal(border).
1140  *     Therefore, first value will be in [0, Q - 1], while second will be in [0, T - 1].
1141  *     Band spans between top and bottom diagonal, including them.
1142  *     Main diagonal has index 0, diagonals above it and below both start with index 1.
1143  *     If there is no band, (-1, -1) is returned.
1144  *
1145  *     Example of matrix where Q is 3 and T is 6, each cell is marked with index of diagonal it lies on:
1146  *
1147  *          012345                                                         xxx---
1148  *          101234   , if bottom border is 1 and top is 2, we have band:   xxxx--
1149  *          210123                                                         -xxxx-
1150  */
calculateBandBorders(int k,int mode,int Q,int T,int Go,int Ge,int M)1151 static std::pair<int, int> calculateBandBorders(int k, int mode, int Q, int T, int Go, int Ge, int M) {
1152     if (mode == OPAL_MODE_OV || mode == OPAL_MODE_SW) {
1153         // Bands for OV and SW have same conditions, so they are calculated in same way.
1154         if (M * std::min(Q, T) >= k) {  // Determine if band exists at all.
1155             // Conditions for top and bottom band are symmetric, so logic for bottom is reused for top.
1156             return std::make_pair(calculateBottomBandBorderOV(k, Q, T, Go, Ge, M),
1157                                   calculateBottomBandBorderOV(k, T, Q, Go, Ge, M));
1158         } else {
1159             return std::make_pair(-1, -1);
1160         }
1161     } else if (mode == OPAL_MODE_HW) {
1162         if (M * std::min(Q, T) - gapPenalty(Q - std::min(Q, T), Go, Ge) >= k) {
1163             return std::make_pair(calculateBottomBandBorderHW(k, Q, T, Go, Ge, M),
1164                                   calculateTopBandBorderHW(k, Q, T, Go, Ge, M));
1165         } else {
1166             return std::make_pair(-1, -1);
1167         }
1168     } else if (mode == OPAL_MODE_NW) {
1169         if (M * std::min(Q, T) - gapPenalty(std::abs(Q - T), Go, Ge) >= k) {
1170             // Conditions for top and bottom band are symmetric, so logic for bottom is reused for top.
1171             return std::make_pair(calculateBottomBandBorderNW(k, Q, T, Go, Ge, M),
1172                                   calculateBottomBandBorderNW(k, T, Q, Go, Ge, M));
1173         } else {
1174             return std::make_pair(-1, -1);
1175         }
1176     } else {
1177         assert(false);  // Invalid alignment mode.
1178     }
1179 }
1180 
1181 
1182 
1183 /**
1184  * Returns new sequence that is reverse of given sequence.
1185  */
createReverseCopy(const unsigned char * seq,int length)1186 static inline unsigned char* createReverseCopy(const unsigned char* seq, int length) {
1187     unsigned char* rSeq = (unsigned char*) malloc(length * sizeof(unsigned char));
1188     for (int i = 0; i < length; i++) {
1189         rSeq[i] = seq[length - i - 1];
1190     }
1191     return rSeq;
1192 }
1193 
1194 template <class T>
revertArray(T array[],int length)1195 static inline void revertArray(T array[], int length) {
1196     for (int i = 0; i < length / 2; i++) {
1197         T tmp = array[i];
1198         array[i] = array[length - 1 - i];
1199         array[length - 1 - i] = tmp;
1200     }
1201 }
1202 
1203 // Here I store scores for one cell in score matrix.
1204 class Cell {
1205 public:
1206     int H, E, F;
1207     enum class Field {
1208         H, E, F
1209     };
1210 };
1211 
1212 /**
1213  * Finds alignment of two sequences, if we know scoreLimit.
1214  * First alignment that has score greater or equal then scoreLimit will be returned.
1215  * If there is no such alignment, behavior will be unexpected.
1216  * Always starts from top left corner (like NW does) no matter which mode is specified,
1217  * and stops with regard to stop conditions of specified mode.
1218  * For example, for HW it will stop on last row, and for SW it will stop anywhere.
1219  * Returns score, start location (which is always (0, 0)), end location and alignment.
1220  * @param [in] query
1221  * @param [in] queryLength
1222  * @param [in] target
1223  * @param [in] targetLength
1224  * @param [in] gapOpen
1225  * @param [in] gapExt
1226  * @param [in] scoreMatrix
1227  * @param [in] alphabetLength
1228  * @param [in] scoreLimit  First alignment with score greater/equal than scoreLimit is returned.
1229  *     If there is no such score, behavior is undefined.
1230  *     TODO(martin): make this function also work when max score is smaller then scoreLimit.
1231  * @param [out] result  Pointer to already allocated object is expected here.
1232  *     Score, start location, end location and alignment will be set.
1233  *     Do not forget to free() alignment!
1234  * @param [in] mode  Mode whose stop conditions will be used when finding alignment.
1235  */
findAlignment(const unsigned char query[],const int queryLength,const unsigned char target[],const int targetLength,const int gapOpen,const int gapExt,const int * scoreMatrix,const int alphabetLength,const int scoreLimit,OpalSearchResult * result,const int mode)1236 static void findAlignment(
1237     const unsigned char query[], const int queryLength, const unsigned char target[], const int targetLength,
1238     const int gapOpen, const int gapExt, const int* scoreMatrix, const int alphabetLength,
1239     const int scoreLimit, OpalSearchResult* result, const int mode) {
1240     /*
1241     printf("Query: ");
1242     for (int i = 0; i < queryLength; i++) {
1243         printf("%d ", query[i]);
1244     }
1245     printf("\n");
1246     printf("Target: ");
1247     for (int i = 0; i < targetLength; i++) {
1248         printf("%d ", target[i]);
1249     }
1250     printf("\n");
1251     */
1252 
1253     //printf("lengths: %d %d\n", queryLength, targetLength);
1254 
1255     std::pair<int, int> bandBorders = calculateBandBorders(
1256         scoreLimit, mode, queryLength, targetLength, gapOpen, gapExt,
1257         arrayMax(scoreMatrix, alphabetLength * alphabetLength));
1258 
1259     assert(bandBorders.first >= 0 && bandBorders.first < queryLength);
1260     assert(bandBorders.second >= 0 && bandBorders.second < targetLength);
1261     //printf("band: %d %d\n", bandBorders.first, bandBorders.second);
1262 
1263     Cell** matrix = new Cell*[targetLength];  // NOTE: First index is column, second is row.
1264     Cell* initialColumn = new Cell[queryLength];
1265     const int LOWER_SCORE_BOUND = INT_MIN + std::max(gapOpen, gapExt);
1266     for (int r = 0; r < queryLength; r++) {
1267         initialColumn[r].H = -1 * gapOpen - r * gapExt;
1268         initialColumn[r].E = LOWER_SCORE_BOUND;
1269     }
1270 
1271     Cell* prevColumn = initialColumn;
1272     int maxScore = INT_MIN;  // Max score so far, but only among cells that could be final.
1273     int H = INT_MIN;  // Current score.
1274     int c;
1275     for (c = 0; c < targetLength && maxScore < scoreLimit; c++) {
1276         matrix[c] = new Cell[queryLength];
1277 
1278         // First and last row in band for this column.
1279         int rBandStart = std::max(0, c - bandBorders.second);
1280         int rBandEnd = std::min(queryLength - 1, c + bandBorders.first);
1281 
1282         int uF, uH, ulH;
1283         if (rBandStart == 0) {
1284             uF = LOWER_SCORE_BOUND;
1285             uH = -1 * gapOpen - c * gapExt;
1286             ulH = c == 0 ? 0 : uH + gapExt;
1287         } else {
1288             uH = uF = LOWER_SCORE_BOUND;  // Out of band, so set to -inf.
1289             ulH = prevColumn[rBandStart - 1].H;
1290         }
1291 
1292         for (int r = rBandStart; r <= rBandEnd; r++) {
1293             int E = std::max(prevColumn[r].H - gapOpen, prevColumn[r].E - gapExt);
1294             int F = std::max(uH - gapOpen, uF - gapExt);
1295             int score = scoreMatrix[query[r] * alphabetLength + target[c]];
1296             H = std::max(E, std::max(F, ulH + score));
1297             /*
1298             printf("E: %d ", E);
1299             printf("F: %d ", F);
1300             printf("score: %d ", score);
1301             printf("ulH: %d ", ulH);
1302             printf("H: %d ", H);
1303             */
1304 
1305             // If mode is SW, track max score of all cells.
1306             // If mode is OV, track max score in last column.
1307             if (mode == OPAL_MODE_SW
1308                 || (mode == OPAL_MODE_OV && c == targetLength - 1)) {
1309                 maxScore = std::max(maxScore, H);
1310             }
1311 
1312             uF = F;
1313             uH = H;
1314             ulH = prevColumn[r].H;
1315 
1316             matrix[c][r].H = H;
1317             matrix[c][r].E = E;
1318             matrix[c][r].F = F;
1319         }
1320 
1321         // Set all cells that are out of band to -inf.
1322         for (int r = 0; r < rBandStart; r++) {
1323             matrix[c][r].E = matrix[c][r].H = matrix[c][r].F = LOWER_SCORE_BOUND;
1324         }
1325         for (int r = rBandEnd + 1; r < queryLength; r++) {
1326             matrix[c][r].E = matrix[c][r].H = matrix[c][r].F = LOWER_SCORE_BOUND;
1327         }
1328 
1329         if (mode == OPAL_MODE_HW || mode == OPAL_MODE_OV) {
1330             maxScore = std::max(maxScore, H);  // Track max score in last row.
1331         }
1332         prevColumn = matrix[c];
1333     }
1334     int lastColumnIdx = c - 1;
1335 
1336     result->startLocationTarget = 0;
1337     result->startLocationQuery = 0;
1338     result->scoreSet = 1;
1339     // Determine score and end location of alignment.
1340     switch (mode) {
1341     case OPAL_MODE_NW:
1342         opalSearchResultSetScore(result, H);
1343         result->endLocationTarget = targetLength - 1;
1344         result->endLocationQuery = queryLength - 1;
1345         break;
1346     case OPAL_MODE_HW:
1347         opalSearchResultSetScore(result, maxScore);
1348         result->endLocationTarget = lastColumnIdx;
1349         result->endLocationQuery = queryLength - 1;
1350         break;
1351     case OPAL_MODE_SW: case OPAL_MODE_OV:
1352         opalSearchResultSetScore(result, maxScore);
1353         result->endLocationTarget = lastColumnIdx;
1354         int r;
1355         for (r = 0; r < queryLength && matrix[lastColumnIdx][r].H != maxScore; r++);
1356         assert(r < queryLength);
1357         assert(matrix[lastColumnIdx][r].H == maxScore);
1358         result->endLocationQuery = r;
1359         break;
1360     default:
1361         assert(false);
1362     }
1363 
1364     // Construct alignment.
1365     // I reserve max size possibly needed for alignment.
1366     unsigned char* alignment = (unsigned char*) malloc(
1367         sizeof(unsigned char) * (result->endLocationQuery + result->endLocationTarget));
1368     int alignmentLength = 0;
1369     int rIdx = result->endLocationQuery;
1370     int cIdx = result->endLocationTarget;
1371     Cell::Field field = Cell::Field::H;  // Current field type.
1372     while (rIdx >= 0 && cIdx >= 0) {
1373         Cell cell = matrix[cIdx][rIdx];  // Current cell.
1374 
1375         // Determine to which cell and which field we should go next, move, and add operation to alignment.
1376         switch (field) {
1377         case Cell::Field::H:
1378             if (cell.H == cell.E) {
1379                 field = Cell::Field::E;
1380             } else if (cell.H == cell.F) {
1381                 field = Cell::Field::F;
1382             } else {
1383                 alignment[alignmentLength++] = (query[rIdx] == target[cIdx] ? OPAL_ALIGN_MATCH
1384                                                 : OPAL_ALIGN_MISMATCH);
1385                 cIdx--; rIdx--;
1386             }
1387             break;
1388         case Cell::Field::E:
1389             field = (cell.E == matrix[cIdx - 1][rIdx].H - gapOpen) ? Cell::Field::H : Cell::Field::E;
1390             alignment[alignmentLength++] = OPAL_ALIGN_INS;
1391             cIdx--;
1392             break;
1393         case Cell::Field::F:
1394             field = (cell.F == matrix[cIdx][rIdx - 1].H - gapOpen) ? Cell::Field::H : Cell::Field::F;
1395             alignment[alignmentLength++] = OPAL_ALIGN_DEL;
1396             rIdx--;
1397             break;
1398         }
1399     }
1400     // I stop when matrix border is reached, so I have to add indels at start of alignment
1401     // manually (they do not have entry in operations). Only one of these two loops will trigger.
1402     while (rIdx >= 0) {
1403         alignment[alignmentLength] = OPAL_ALIGN_DEL;
1404         alignmentLength++; rIdx--;
1405     }
1406     while (cIdx >= 0) {
1407         alignment[alignmentLength] = OPAL_ALIGN_INS;
1408         alignmentLength++; cIdx--;
1409     }
1410     //printf("rIdx: %d, cIdx: %d\n", rIdx, cIdx);
1411     assert(rIdx == -1 && cIdx == -1);
1412     alignment = (unsigned char*) realloc(alignment, sizeof(unsigned char) * alignmentLength);
1413     revertArray(alignment, alignmentLength);
1414     // Store alignment to result.
1415     result->alignment = alignment;
1416     result->alignmentLength = alignmentLength;
1417 
1418     /*
1419     printf("Alignment: ");
1420     for (int j = 0; j < result->alignmentLength; j++)
1421         printf("%d ", result->alignment[j]);
1422     printf("\n");
1423     */
1424 
1425     // Cleanup
1426     delete[] initialColumn;
1427     for (int i = 0; i <= lastColumnIdx; i++) {
1428         delete[] matrix[i];
1429     }
1430     delete[] matrix;
1431 }
1432 
1433 
1434 
opalSearchDatabase(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,int mode,int overflowMethod)1435 extern int opalSearchDatabase(
1436     unsigned char query[], int queryLength,
1437     unsigned char** db, int dbLength, int dbSeqLengths[],
1438     int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
1439     OpalSearchResult* results[], const int searchType, int mode, int overflowMethod) {
1440 // A.D.: this will not happen since SIMDe always takes care of correct SIMD extensions
1441 #if 0
1442 //#if !defined(__SSE4_1__) && !defined(__AVX2__)
1443 //    return OPAL_ERR_NO_SIMD_SUPPORT;
1444 #else
1445     // Calculate score and end location.
1446     int status;
1447     // Skip recalculation of already calculated sequences.
1448     bool *skip = new bool[dbLength];
1449     for (int i = 0; i < dbLength; i++) {
1450         skip[i] = (!opalSearchResultIsEmpty(*results[i])
1451                    && (searchType == OPAL_SEARCH_SCORE
1452                        || (results[i]->endLocationQuery >= 0 && results[i]->endLocationTarget >= 0)));
1453     }
1454     if (mode == OPAL_MODE_NW) {
1455         status = searchDatabase<OPAL_MODE_NW>(
1456             query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1457             scoreMatrix, alphabetLength, results, searchType, skip, overflowMethod);
1458     } else if (mode == OPAL_MODE_HW) {
1459         status = searchDatabase<OPAL_MODE_HW>(
1460             query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1461             scoreMatrix, alphabetLength, results, searchType, skip, overflowMethod);
1462     } else if (mode == OPAL_MODE_OV) {
1463         status = searchDatabase<OPAL_MODE_OV>(
1464             query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1465             scoreMatrix, alphabetLength, results, searchType, skip, overflowMethod);
1466     } else if (mode == OPAL_MODE_SW) {
1467         status = searchDatabaseSW(
1468             query, queryLength, db, dbLength, dbSeqLengths,
1469             gapOpen, gapExt, scoreMatrix, alphabetLength,
1470             results, searchType, skip, overflowMethod);
1471     } else {
1472         status = OPAL_ERR_INVALID_MODE;
1473     }
1474     delete[] skip;
1475     if (status) return status;
1476 
1477     if (searchType == OPAL_SEARCH_ALIGNMENT) {
1478         // Calculate alignment of query with each database sequence.
1479         unsigned char* const rQuery = createReverseCopy(query, queryLength);
1480         for (int i = 0; i < dbLength; i++) {
1481             if (mode == OPAL_MODE_SW && results[i]->score == 0) {  // If it does not have alignment
1482                 results[i]->alignment = NULL;
1483                 results[i]->alignmentLength = 0;
1484                 results[i]->startLocationQuery = results[i]->startLocationTarget = -1;
1485                 results[i]->endLocationQuery = results[i]->endLocationTarget = -1;
1486             } else {
1487                 //printf("%d %d\n", results[i]->endLocationQuery, results[i]->endLocationTarget);
1488                 // Do alignment in reverse direction.
1489                 int alignQueryLength = results[i]->endLocationQuery + 1;
1490                 unsigned char* alignQuery = rQuery + queryLength - alignQueryLength;
1491                 int alignTargetLength = results[i]->endLocationTarget + 1;
1492                 unsigned char* alignTarget = createReverseCopy(db[i], alignTargetLength);
1493                 OpalSearchResult result;
1494                 findAlignment(
1495                     alignQuery, alignQueryLength, alignTarget, alignTargetLength,
1496                     gapOpen, gapExt, scoreMatrix, alphabetLength,
1497                     results[i]->score, &result, mode);
1498                 //printf("%d %d\n", results[i]->score, result.score);
1499                 assert(results[i]->score == result.score);
1500                 // Translate results.
1501                 results[i]->startLocationQuery = alignQueryLength - result.endLocationQuery - 1;
1502                 results[i]->startLocationTarget = alignTargetLength - result.endLocationTarget - 1;
1503                 results[i]->alignmentLength = result.alignmentLength;
1504                 results[i]->alignment = result.alignment;
1505                 revertArray(results[i]->alignment, results[i]->alignmentLength);
1506                 free(alignTarget);
1507             }
1508         }
1509         free(rQuery);
1510     } else {
1511         for (int i = 0; i < dbLength; i++) {
1512             results[i]->alignment = NULL;
1513             results[i]->alignmentLength = -1;
1514             results[i]->startLocationQuery = -1;
1515             results[i]->startLocationTarget = -1;
1516         }
1517     }
1518 
1519     return 0;
1520 #endif
1521 }
1522 
1523 
opalSearchDatabaseCharSW(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[])1524 extern int opalSearchDatabaseCharSW(
1525     unsigned char query[], int queryLength, unsigned char** db, int dbLength,
1526     int dbSeqLengths[], int gapOpen, int gapExt, int* scoreMatrix,
1527     int alphabetLength, OpalSearchResult* results[]) {
1528 #if !defined(__SSE4_1__) && !defined(__AVX2__)
1529     return OPAL_ERR_NO_SIMD_SUPPORT;
1530 #else
1531     bool* calculated = new bool[dbLength];
1532     for (int i = 0; i < dbLength; i++) {
1533         calculated[i] = false;
1534     }
1535     int resultCode = searchDatabaseSW_< SimdSW<char> >(
1536         query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1537         scoreMatrix, alphabetLength, results, OPAL_SEARCH_SCORE, calculated,
1538         OPAL_OVERFLOW_SIMPLE);
1539     for (int i = 0; i < dbLength; i++) {
1540         if (!calculated[i]) {
1541             results[i]->score = -1;
1542             results[i]->scoreSet = 0;
1543         }
1544     }
1545     delete[] calculated;
1546     return resultCode;
1547 #endif
1548 }
1549 
1550 
opalInitSearchResult(OpalSearchResult * result)1551 extern void opalInitSearchResult(OpalSearchResult* result) {
1552     result->scoreSet = 0;
1553     result->startLocationTarget = result->startLocationQuery = -1;
1554     result->endLocationTarget = result->endLocationQuery = -1;
1555     result->alignment = NULL;
1556     result->alignmentLength = 0;
1557 }
1558 
opalSearchResultIsEmpty(const OpalSearchResult result)1559 extern int opalSearchResultIsEmpty(const OpalSearchResult result) {
1560     return !result.scoreSet;
1561 }
1562 
opalSearchResultSetScore(OpalSearchResult * result,int score)1563 extern void opalSearchResultSetScore(OpalSearchResult* result, int score) {
1564     result->scoreSet = 1;
1565     result->score = score;
1566 }
1567