1 #include <algorithm>
2 #include <cassert>
3 #include <climits>
4 #include <cstdio>
5 #include <limits>
6 #include <vector>
7
8 #define SIMDE_ENABLE_NATIVE_ALIASES
9 #include <simde_avx2.h> // AVX2 and lower
10
11 #include "opal.h"
12
13
14 // I define aliases for SSE intrinsics, so they can be used in code not depending on SSE generation.
15 // If available, AVX2 is used because it has two times bigger register, thus everything is two times faster.
16 // #ifdef __AVX2__
17 #if 1 //A.D.: now SIMDe will be taking care of the SIMD
18
19 const int SIMD_REG_SIZE = 256; //!< number of bits in register
20 typedef __m256i __mxxxi; //!< represents register containing integers
21 #define _mmxxx_load_si _mm256_load_si256
22 #define _mmxxx_store_si _mm256_store_si256
23 #define _mmxxx_and_si _mm256_and_si256
24 #define _mmxxx_testz_si _mm256_testz_si256
25
26 #define _mmxxx_adds_epi8 _mm256_adds_epi8
27 #define _mmxxx_subs_epi8 _mm256_subs_epi8
28 #define _mmxxx_min_epu8 _mm256_min_epu8
29 #define _mmxxx_min_epi8 _mm256_min_epi8
30 #define _mmxxx_max_epu8 _mm256_max_epu8
31 #define _mmxxx_max_epi8 _mm256_max_epi8
32 #define _mmxxx_set1_epi8 _mm256_set1_epi8
33 #define _mmxxx_cmpgt_epi8 _mm256_cmpgt_epi8
34
35 #define _mmxxx_adds_epi16 _mm256_adds_epi16
36 #define _mmxxx_subs_epi16 _mm256_subs_epi16
37 #define _mmxxx_min_epi16 _mm256_min_epi16
38 #define _mmxxx_max_epi16 _mm256_max_epi16
39 #define _mmxxx_set1_epi16 _mm256_set1_epi16
40 #define _mmxxx_cmpgt_epi16 _mm256_cmpgt_epi16
41
42 #define _mmxxx_add_epi32 _mm256_add_epi32
43 #define _mmxxx_sub_epi32 _mm256_sub_epi32
44 #define _mmxxx_min_epi32 _mm256_min_epi32
45 #define _mmxxx_max_epi32 _mm256_max_epi32
46 #define _mmxxx_set1_epi32 _mm256_set1_epi32
47 #define _mmxxx_cmpgt_epi32 _mm256_cmpgt_epi32
48
49 #else // SSE4.1
50
51 const int SIMD_REG_SIZE = 128;
52 typedef __m128i __mxxxi;
53 #define _mmxxx_load_si _mm_load_si128
54 #define _mmxxx_store_si _mm_store_si128
55 #define _mmxxx_and_si _mm_and_si128
56 #define _mmxxx_testz_si _mm_testz_si128
57
58 #define _mmxxx_adds_epi8 _mm_adds_epi8
59 #define _mmxxx_subs_epi8 _mm_subs_epi8
60 #define _mmxxx_min_epu8 _mm_min_epu8
61 #define _mmxxx_min_epi8 _mm_min_epi8
62 #define _mmxxx_max_epu8 _mm_max_epu8
63 #define _mmxxx_max_epi8 _mm_max_epi8
64 #define _mmxxx_set1_epi8 _mm_set1_epi8
65 #define _mmxxx_cmpgt_epi8 _mm_cmpgt_epi8
66
67 #define _mmxxx_adds_epi16 _mm_adds_epi16
68 #define _mmxxx_subs_epi16 _mm_subs_epi16
69 #define _mmxxx_min_epi16 _mm_min_epi16
70 #define _mmxxx_max_epi16 _mm_max_epi16
71 #define _mmxxx_set1_epi16 _mm_set1_epi16
72 #define _mmxxx_cmpgt_epi16 _mm_cmpgt_epi16
73
74 #define _mmxxx_add_epi32 _mm_add_epi32
75 #define _mmxxx_sub_epi32 _mm_sub_epi32
76 #define _mmxxx_min_epi32 _mm_min_epi32
77 #define _mmxxx_max_epi32 _mm_max_epi32
78 #define _mmxxx_set1_epi32 _mm_set1_epi32
79 #define _mmxxx_cmpgt_epi32 _mm_cmpgt_epi32
80
81 #endif
82
83
84 //------------------------------------ SIMD PARAMETERS ---------------------------------//
simdIsAllZeroes(const __mxxxi & a)85 static inline int simdIsAllZeroes(const __mxxxi& a) {
86 return _mmxxx_testz_si(a, a);
87 }
88
89 /**
90 * Contains parameters and SIMD instructions specific for certain score type.
91 */
92
93 template<typename T> struct SimdSW {};
94
95 template<>
96 struct SimdSW<char> {
97 typedef char type; //!< Type that will be used for score
98 static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(char)); //!< Number of sequences that can be done in parallel.
99 static const bool satArthm = true; //!< True if saturation arithmetic is used, false otherwise.
100 static const bool negRange = true; //!< True if it uses negative range for score representation, goes with saturation
addSimdSW101 static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi8(a, b); }
subSimdSW102 static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi8(a, b); }
minSimdSW103 static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epu8(a, b); }
maxSimdSW104 static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epu8(a, b); }
cmpgtSimdSW105 static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi8(a, b); }
set1SimdSW106 static inline __mxxxi set1(int a) { return _mmxxx_set1_epi8(a); }
107 };
108
109 template<>
110 struct SimdSW<short> {
111 typedef short type;
112 static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(short));
113 static const bool satArthm = true;
114 static const bool negRange = false;
addSimdSW115 static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi16(a, b); }
subSimdSW116 static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi16(a, b); }
minSimdSW117 static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi16(a, b); }
maxSimdSW118 static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi16(a, b); }
cmpgtSimdSW119 static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi16(a, b); }
set1SimdSW120 static inline __mxxxi set1(int a) { return _mmxxx_set1_epi16(a); }
121 };
122
123 template<>
124 struct SimdSW<int> {
125 typedef int type;
126 static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(int));
127 static const bool satArthm = false;
128 static const bool negRange = false;
addSimdSW129 static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_add_epi32(a, b); }
subSimdSW130 static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_sub_epi32(a, b); }
minSimdSW131 static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi32(a, b); }
maxSimdSW132 static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi32(a, b); }
cmpgtSimdSW133 static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi32(a, b); }
set1SimdSW134 static inline __mxxxi set1(int a) { return _mmxxx_set1_epi32(a); }
135 };
136 //--------------------------------------------------------------------------------------//
137
138
139 static bool loadNextSequence(int &nextDbSeqIdx, int dbLength, int &currDbSeqIdx, unsigned char * &currDbSeqPos,
140 int &currDbSeqLength, unsigned char ** db, int dbSeqLengths[], bool calculated[],
141 int &numEndedDbSeqs);
142
143
144 // For debugging
145 template<class SIMD>
print_mmxxxi(__mxxxi mm)146 void print_mmxxxi(__mxxxi mm) {
147 typename SIMD::type unpacked[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
148 _mmxxx_store_si((__mxxxi*)unpacked, mm);
149 for (int i = 0; i < SIMD::numSeqs; i++)
150 printf("%d ", unpacked[i]);
151 }
152
153 // This structure represents cell in dynamic programming matrix, but only with E and H values.
154 struct CellEH {
155 __mxxxi H;
156 __mxxxi E;
157 };
158
159 /**
160 * @param stopOnOverflow If true, function will stop when first overflow happens.
161 * If false, function will not stop but continue with next sequence.
162 *
163 */
164 template<class SIMD>
searchDatabaseSW_(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool calculated[],int overflowMethod)165 static int searchDatabaseSW_(unsigned char query[], int queryLength,
166 unsigned char** db, int dbLength, int dbSeqLengths[],
167 int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
168 OpalSearchResult* results[], const int searchType, bool calculated[],
169 int overflowMethod) {
170
171 const typename SIMD::type LOWER_BOUND = std::numeric_limits<typename SIMD::type>::min();
172 const typename SIMD::type UPPER_BOUND = std::numeric_limits<typename SIMD::type>::max();
173
174 bool overflowOccured = false; // True if overflow was detected at least once.
175
176 // ----------------------- CHECK ARGUMENTS -------------------------- //
177 // Check if Q, R or scoreMatrix have values too big for used score type
178 if (gapOpen < LOWER_BOUND || UPPER_BOUND < gapOpen || gapExt < LOWER_BOUND || UPPER_BOUND < gapExt) {
179 return 1;
180 }
181 if (!SIMD::satArthm) {
182 // These extra limits are enforced so overflow could be detected more efficiently
183 if (gapOpen <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapOpen || gapExt <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapExt) {
184 return OPAL_ERR_OVERFLOW;
185 }
186 }
187
188 for (int r = 0; r < alphabetLength; r++)
189 for (int c = 0; c < alphabetLength; c++) {
190 int score = scoreMatrix[r * alphabetLength + c];
191 if (score < LOWER_BOUND || UPPER_BOUND < score) {
192 return OPAL_ERR_OVERFLOW;
193 }
194 if (!SIMD::satArthm) {
195 if (score <= LOWER_BOUND/2 || UPPER_BOUND/2 <= score)
196 return OPAL_ERR_OVERFLOW;
197 }
198 }
199 // ------------------------------------------------------------------ //
200
201
202 // ------------------------ INITIALIZATION -------------------------- //
203 __mxxxi zeroes = SIMD::set1(0);
204 __mxxxi scoreZeroes; // 0 normally, but lower bound if using negative range
205 if (SIMD::negRange) {
206 scoreZeroes = SIMD::set1(LOWER_BOUND);
207 } else {
208 scoreZeroes = zeroes;
209 }
210
211 int numEndedDbSeqs = 0; // Number of sequences that ended
212 int nextDbSeqIdx = 0; // Index in db of next sequence that we will try to load.
213 // Index in db. -1 for null sequence (null sequence == no sequence).
214 int currDbSeqsIdxs[SIMD::numSeqs];
215 // Pointer to current element for each current database sequence. 0 for null sequence.
216 unsigned char* currDbSeqsPos[SIMD::numSeqs];
217 // Lengths of remaining parts of sequences. -1 for null sequence.
218 int currDbSeqsLengths[SIMD::numSeqs];
219
220 // Needed in order to find the most early result, which is a nice condition to have.
221 int currDbSeqsBestScore[SIMD::numSeqs];
222 // Row index of best score for each current database sequence.
223 int currDbSeqsBestScoreRow[SIMD::numSeqs];
224 // Column index of best score for each current database sequence.
225 int currDbSeqsBestScoreColumn[SIMD::numSeqs];
226
227 // Profile query -> here we store preprocessed score data needed in core loop.
228 // It is recalculated for each column.
229 __mxxxi P[alphabetLength];
230
231 // Load initial sequences
232 for (int i = 0; i < SIMD::numSeqs; i++) {
233 currDbSeqsBestScore[i] = LOWER_BOUND;
234 loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
235 currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs);
236 }
237
238 // Q is gap open penalty, R is gap ext penalty.
239 __mxxxi Q = SIMD::set1(gapOpen);
240 __mxxxi R = SIMD::set1(gapExt);
241
242 int rowsWithImprovement[queryLength]; // Indexes of rows where one of sequences improved score.
243
244 // Previous Hs, previous Es, previous F, all signed short.
245 CellEH prevColumn[queryLength]; // Stores results of previous column in matrix.
246 // Initialize all values to 0
247 for (int i = 0; i < queryLength; i++) {
248 prevColumn[i].H = prevColumn[i].E = scoreZeroes;
249 }
250
251 __mxxxi maxH = scoreZeroes; // Best score in sequence
252 // ------------------------------------------------------------------ //
253
254
255 // For each column
256 while (numEndedDbSeqs < dbLength) {
257 // -------------------- CALCULATE QUERY PROFILE ------------------------- //
258 // TODO: Rognes uses pshufb here, I don't know how/why?
259 typename SIMD::type profileRow[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
260 for (unsigned char letter = 0; letter < alphabetLength; letter++) {
261 int* scoreMatrixRow = scoreMatrix + letter*alphabetLength;
262 for (int i = 0; i < SIMD::numSeqs; i++) {
263 unsigned char* dbSeqPos = currDbSeqsPos[i];
264 if (dbSeqPos != 0)
265 profileRow[i] = (typename SIMD::type)scoreMatrixRow[*dbSeqPos];
266 }
267 P[letter] = _mmxxx_load_si((__mxxxi const*)profileRow);
268 }
269 // ---------------------------------------------------------------------- //
270
271 // Previous cells: u - up, l - left, ul - up left
272 __mxxxi uF, uH, ulH;
273 uF = uH = ulH = scoreZeroes; // F[-1, c] = H[-1, c] = H[-1, c-1] = 0
274
275 __mxxxi ofTest = scoreZeroes; // Used for detecting the overflow when not using saturated ar
276
277 int rowsWithImprovementLength = 0;
278
279 // ----------------------- CORE LOOP (ONE COLUMN) ----------------------- //
280 for (int r = 0; r < queryLength; r++) { // For each cell in column
281 // Calculate E = max(lH-Q, lE-R)
282 __mxxxi E = SIMD::max(SIMD::sub(prevColumn[r].H, Q), SIMD::sub(prevColumn[r].E, R));
283
284 // Calculate F = max(uH-Q, uF-R)
285 __mxxxi F = SIMD::max(SIMD::sub(uH, Q), SIMD::sub(uF, R));
286
287 // Calculate H
288 __mxxxi H = SIMD::max(F, E);
289 if (!SIMD::negRange) {
290 // If not using negative range, then H could be negative at this moment so we need this
291 H = SIMD::max(H, zeroes);
292 }
293 __mxxxi ulH_P = SIMD::add(ulH, P[query[r]]);
294 // If using negative range: if ulH_P >= 0 then we have overflow
295
296 H = SIMD::max(H, ulH_P);
297 // If using negative range: H will always be negative, even if ulH_P overflowed
298
299 // Save data needed for overflow detection. Not more then one condition will fire
300 if (SIMD::negRange)
301 ofTest = _mmxxx_and_si(ofTest, ulH_P);
302 if (!SIMD::satArthm)
303 ofTest = SIMD::min(ofTest, ulH_P);
304
305 // If we need end location, remember row with best score.
306 if (searchType != OPAL_SEARCH_SCORE) {
307 // We remember rows that had max scores, in order to find out the row of best score.
308 rowsWithImprovement[rowsWithImprovementLength] = r;
309 // TODO(martin): simdIsAllZeroes seems to bring significant slowdown, but I could
310 // not find way to avoid it.
311 rowsWithImprovementLength += 1 - simdIsAllZeroes(SIMD::cmpgt(H, maxH));
312 }
313
314 maxH = SIMD::max(maxH, H); // update best score
315
316 // Set uF, uH, ulH
317 uF = F;
318 uH = H;
319 ulH = prevColumn[r].H;
320
321 // Remeber values so they can be used in next column.
322 prevColumn[r].E = E;
323 prevColumn[r].H = H;
324
325 // For saturated: score is biased everywhere, but just score: E, F, H
326 // Also, all scores except ulH_P certainly have value < 0
327 }
328 // ---------------------------------------------------------------------- //
329
330 for (int seqIdx = 0; seqIdx < SIMD::numSeqs; seqIdx++) {
331 currDbSeqsLengths[seqIdx] -= currDbSeqsLengths[seqIdx] > 0;
332 }
333
334 typename SIMD::type unpackedMaxH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
335 _mmxxx_store_si((__mxxxi*)unpackedMaxH, maxH);
336
337 // ------------------------ OVERFLOW DETECTION -------------------------- //
338 bool overflowed[SIMD::numSeqs];
339 if (!SIMD::satArthm) {
340 // This check is based on following assumptions:
341 // - overflow wraps
342 // - Q, R and all scores from scoreMatrix are between LOWER_BOUND/2 and UPPER_BOUND/2 exclusive
343 typename SIMD::type unpackedOfTest[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
344 _mmxxx_store_si((__mxxxi*)unpackedOfTest, ofTest);
345 for (int i = 0; i < SIMD::numSeqs; i++) {
346 overflowed[i] = currDbSeqsPos[i] != 0 && unpackedOfTest[i] <= LOWER_BOUND / 2;
347 if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
348 // In buckets method, we stop calculation when overflow is detected.
349 return OPAL_ERR_OVERFLOW;
350 }
351 }
352 } else {
353 if (SIMD::negRange) {
354 // Since I use saturation, I check if minUlH_P was non negative
355 typename SIMD::type unpackedOfTest[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
356 _mmxxx_store_si((__mxxxi*)unpackedOfTest, ofTest);
357 for (int i = 0; i < SIMD::numSeqs; i++) {
358 overflowed[i] = currDbSeqsPos[i] != 0 && unpackedOfTest[i] >= 0;
359 if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
360 return OPAL_ERR_OVERFLOW;
361 }
362 }
363 } else {
364 // I check if upper bound is reached
365 for (int i = 0; i < SIMD::numSeqs; i++) {
366 overflowed[i] = currDbSeqsPos[i] != 0 && unpackedMaxH[i] == UPPER_BOUND;
367 if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
368 return OPAL_ERR_OVERFLOW;
369 }
370 }
371 }
372 }
373 // ---------------------------------------------------------------------- //
374
375 bool overflowDetected = false; // True if overflow was detected in last column.
376 bool sequenceEnded = false; // True if a sequence ended in last column.
377 for (int seqIdx = 0; seqIdx < SIMD::numSeqs; seqIdx++) {
378 overflowDetected = overflowDetected || overflowed[seqIdx];
379 sequenceEnded = sequenceEnded || (currDbSeqsLengths[seqIdx] == 0);
380 }
381 overflowOccured = overflowOccured || overflowDetected;
382
383
384 // --------- Update end location of alignment ----------- //
385 if (searchType != OPAL_SEARCH_SCORE) {
386 for (int i = 0; i < rowsWithImprovementLength; i++) {
387 int r = rowsWithImprovement[i];
388 typename SIMD::type unpackedH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
389 _mmxxx_store_si((__mxxxi*)unpackedH, prevColumn[r].H);
390 for (int j = 0; j < SIMD::numSeqs; j++) {
391 if (currDbSeqsPos[j] != 0 && !overflowed[j]) { // If not null sequence or overflowedresult->endLocationQuery =
392 if (unpackedH[j] > currDbSeqsBestScore[j]) {
393 currDbSeqsBestScore[j] = unpackedH[j];
394 currDbSeqsBestScoreRow[j] = r;
395 currDbSeqsBestScoreColumn[j] = dbSeqLengths[currDbSeqsIdxs[j]]
396 - currDbSeqsLengths[j] - 1;
397 }
398 }
399 }
400 }
401 }
402 // ------------------------------------------------------ //
403
404
405 // --------------------- CHECK AND HANDLE SEQUENCE END ------------------ //
406 if (overflowDetected || sequenceEnded) { // If at least one sequence ended
407 typename SIMD::type resetMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
408
409 for (int i = 0; i < SIMD::numSeqs; i++) {
410 if (currDbSeqsPos[i] != 0) { // If not null sequence
411 if (overflowed[i] || currDbSeqsLengths[i] == 0) { // If sequence ended
412 numEndedDbSeqs++;
413 if (!overflowed[i]) {
414 // Save score and mark as calculated
415 calculated[currDbSeqsIdxs[i]] = true;
416 opalSearchResultSetScore(results[currDbSeqsIdxs[i]], unpackedMaxH[i]);
417 if (SIMD::negRange) {
418 results[currDbSeqsIdxs[i]]->score -= LOWER_BOUND;
419 }
420 if (searchType != OPAL_SEARCH_SCORE) {
421 results[currDbSeqsIdxs[i]]->endLocationQuery = currDbSeqsBestScoreRow[i];
422 results[currDbSeqsIdxs[i]]->endLocationTarget = currDbSeqsBestScoreColumn[i];
423 } else {
424 results[currDbSeqsIdxs[i]]->endLocationQuery = -1;
425 results[currDbSeqsIdxs[i]]->endLocationTarget = -1;
426 }
427 }
428 currDbSeqsBestScore[i] = LOWER_BOUND;
429 // Load next sequence
430 loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
431 currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs);
432 // If negative range, sets to LOWER_BOUND when used with saturated add and value < 0,
433 // otherwise sets to zero when used with and.
434 resetMask[i] = SIMD::negRange ? LOWER_BOUND : 0;
435 } else {
436 // Does not change anything when used with saturated add / and.
437 resetMask[i] = SIMD::negRange ? 0 : -1;
438 if (currDbSeqsPos[i] != 0)
439 currDbSeqsPos[i]++; // If not new and not null, move for one element
440 }
441 }
442 }
443 // Reset previous columns (Es and Hs) and maxH
444 __mxxxi resetMaskPacked = _mmxxx_load_si((__mxxxi const*)resetMask);
445 if (SIMD::negRange) {
446 for (int i = 0; i < queryLength; i++) {
447 prevColumn[i].H = SIMD::add(prevColumn[i].H, resetMaskPacked);
448 prevColumn[i].E = SIMD::add(prevColumn[i].E, resetMaskPacked);
449 }
450 maxH = SIMD::add(maxH, resetMaskPacked);
451 } else {
452 for (int i = 0; i < queryLength; i++) {
453 prevColumn[i].H = _mmxxx_and_si(prevColumn[i].H, resetMaskPacked);
454 prevColumn[i].E = _mmxxx_and_si(prevColumn[i].E, resetMaskPacked);
455 }
456 maxH = _mmxxx_and_si(maxH, resetMaskPacked);
457 }
458 } else { // If no sequences ended
459 // Move for one element in all sequences
460 for (int i = 0; i < SIMD::numSeqs; i++)
461 currDbSeqsPos[i] += (currDbSeqsPos[i] != 0);
462 }
463 // ---------------------------------------------------------------------- //
464 }
465
466 if (overflowOccured) {
467 return OPAL_ERR_OVERFLOW;
468 }
469 return 0;
470 }
471
loadNextSequence(int & nextDbSeqIdx,int dbLength,int & currDbSeqIdx,unsigned char * & currDbSeqPos,int & currDbSeqLength,unsigned char ** db,int dbSeqLengths[],bool calculated[],int & numEndedDbSeqs)472 static inline bool loadNextSequence(int &nextDbSeqIdx, int dbLength, int &currDbSeqIdx, unsigned char* &currDbSeqPos,
473 int &currDbSeqLength, unsigned char** db, int dbSeqLengths[], bool calculated[],
474 int &numEndedDbSeqs) {
475 while (nextDbSeqIdx < dbLength && calculated[nextDbSeqIdx]) {
476 nextDbSeqIdx++;
477 numEndedDbSeqs++;
478 }
479 if (nextDbSeqIdx < dbLength) { // If there is sequence to load
480 currDbSeqIdx = nextDbSeqIdx;
481 currDbSeqPos = db[nextDbSeqIdx];
482 currDbSeqLength = dbSeqLengths[nextDbSeqIdx];
483 nextDbSeqIdx++;
484 return true;
485 } else { // If there are no more sequences to load, load "null" sequence
486 currDbSeqIdx = currDbSeqLength = -1; // Set to -1 if there are no more sequences
487 currDbSeqPos = 0;
488 return false;
489 }
490 }
491
492 /**
493 * @param If skip[i] is true, result for sequence #i will not be calculated.
494 * If skip is NULL, all results will be calculated.
495 */
searchDatabaseSW(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool skip[],int overflowMethod)496 static int searchDatabaseSW(unsigned char query[], int queryLength,
497 unsigned char** db, int dbLength, int dbSeqLengths[],
498 int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
499 OpalSearchResult* results[], const int searchType, bool skip[], int overflowMethod) {
500 int resultCode = 0;
501 // Do buckets only if using buckets overflow method.
502 const int chunkSize = overflowMethod == OPAL_OVERFLOW_BUCKETS ? 1024 : dbLength;
503 bool* calculated = new bool[chunkSize];
504 for (int startIdx = 0; startIdx < dbLength; startIdx += chunkSize) {
505 unsigned char** db_ = db + startIdx;
506 int* dbSeqLengths_ = dbSeqLengths + startIdx;
507 OpalSearchResult** results_ = results + startIdx;
508 int dbLength_ = startIdx + chunkSize >= dbLength ? dbLength - startIdx : chunkSize;
509 for (int i = 0; i < dbLength_; i++) {
510 calculated[i] = skip ? skip[i] : false;
511 }
512 resultCode = searchDatabaseSW_< SimdSW<char> >(
513 query, queryLength, db_, dbLength_, dbSeqLengths_,
514 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
515 searchType, calculated, overflowMethod);
516 if (resultCode == OPAL_ERR_OVERFLOW) {
517 resultCode = searchDatabaseSW_< SimdSW<short> >(
518 query, queryLength, db_, dbLength_, dbSeqLengths_,
519 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
520 searchType, calculated, overflowMethod);
521 if (resultCode == OPAL_ERR_OVERFLOW) {
522 resultCode = searchDatabaseSW_< SimdSW<int> >(
523 query, queryLength,
524 db_, dbLength_, dbSeqLengths_,
525 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
526 searchType, calculated, overflowMethod);
527 if (resultCode != 0)
528 break;
529 }
530 }
531 }
532
533 delete[] calculated;
534 return resultCode;
535 }
536
537
538
539
540
541
542
543
544
545 //------------------------------------ SIMD PARAMETERS ---------------------------------//
546 /**
547 * Contains parameters and SIMD instructions specific for certain score type.
548 */
549 template<typename T> class Simd {};
550
551 template<>
552 struct Simd<char> {
553 typedef char type; //!< Type that will be used for score
554 static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(char)); //!< Number of sequences that can be done in parallel.
555 static const bool satArthm = true; //!< True if saturation arithmetic is used, false otherwise.
addSimd556 static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi8(a, b); }
subSimd557 static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi8(a, b); }
minSimd558 static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi8(a, b); }
maxSimd559 static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi8(a, b); }
cmpgtSimd560 static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi8(a, b); }
set1Simd561 static inline __mxxxi set1(int a) { return _mmxxx_set1_epi8(a); }
562 };
563
564 template<>
565 struct Simd<short> {
566 typedef short type;
567 static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(short));
568 static const bool satArthm = true;
addSimd569 static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_adds_epi16(a, b); }
subSimd570 static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_subs_epi16(a, b); }
minSimd571 static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi16(a, b); }
maxSimd572 static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi16(a, b); }
cmpgtSimd573 static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi16(a, b); }
set1Simd574 static inline __mxxxi set1(int a) { return _mmxxx_set1_epi16(a); }
575 };
576
577 template<>
578 struct Simd<int> {
579 typedef int type;
580 static const int numSeqs = SIMD_REG_SIZE / (8 * sizeof(int));
581 static const bool satArthm = false;
addSimd582 static inline __mxxxi add(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_add_epi32(a, b); }
subSimd583 static inline __mxxxi sub(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_sub_epi32(a, b); }
minSimd584 static inline __mxxxi min(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_min_epi32(a, b); }
maxSimd585 static inline __mxxxi max(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_max_epi32(a, b); }
cmpgtSimd586 static inline __mxxxi cmpgt(const __mxxxi& a, const __mxxxi& b) { return _mmxxx_cmpgt_epi32(a, b); }
set1Simd587 static inline __mxxxi set1(int a) { return _mmxxx_set1_epi32(a); }
588 };
589 //--------------------------------------------------------------------------------------//
590
591
592
593
594 template<class SIMD, int MODE>
searchDatabase_(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool calculated[],int overflowMethod)595 static int searchDatabase_(unsigned char query[], int queryLength,
596 unsigned char** db, int dbLength, int dbSeqLengths[],
597 int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
598 OpalSearchResult* results[], const int searchType, bool calculated[], int overflowMethod) {
599
600 static const typename SIMD::type LOWER_BOUND = std::numeric_limits<typename SIMD::type>::min();
601 static const typename SIMD::type UPPER_BOUND = std::numeric_limits<typename SIMD::type>::max();
602 // Used to represent -inf. Must be larger then lower bound to avoid overflow.
603 // TOOD(martin): Is it enough to use gapExt here? Should I use gapOpen instead?
604 static const typename SIMD::type LOWER_SCORE_BOUND = LOWER_BOUND + gapExt;
605
606 bool overflowOccured = false; // True if oveflow was detected at least once.
607
608 // ----------------------- CHECK ARGUMENTS -------------------------- //
609 // Check if Q, R or scoreMatrix have values too big for used score type
610 if (gapOpen < LOWER_BOUND || UPPER_BOUND < gapOpen || gapExt < LOWER_BOUND || UPPER_BOUND < gapExt) {
611 return 1;
612 }
613 if (!SIMD::satArthm) {
614 // These extra limits are enforced so overflow could be detected more efficiently
615 if (gapOpen <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapOpen || gapExt <= LOWER_BOUND/2 || UPPER_BOUND/2 <= gapExt) {
616 return 1;
617 }
618 }
619
620 for (int r = 0; r < alphabetLength; r++)
621 for (int c = 0; c < alphabetLength; c++) {
622 int score = scoreMatrix[r * alphabetLength + c];
623 if (score < LOWER_BOUND || UPPER_BOUND < score) {
624 return 1;
625 }
626 if (!SIMD::satArthm) {
627 if (score <= LOWER_BOUND/2 || UPPER_BOUND/2 <= score)
628 return 1;
629 }
630 }
631
632 // TODO: If not saturated arithmetic check if inital values of H (-gapOpen - i * gapExt) cause overflow
633 // ------------------------------------------------------------------ //
634
635
636 // ------------------------ INITIALIZATION -------------------------- //
637 const __mxxxi ZERO_SIMD = SIMD::set1(0);
638 const __mxxxi LOWER_BOUND_SIMD = SIMD::set1(LOWER_BOUND);
639 const __mxxxi LOWER_SCORE_BOUND_SIMD = SIMD::set1(LOWER_SCORE_BOUND);
640
641 int nextDbSeqIdx = 0; // index in db
642 int currDbSeqsIdxs[SIMD::numSeqs]; // index in db
643 unsigned char* currDbSeqsPos[SIMD::numSeqs]; // current element for each current database sequence
644 int currDbSeqsLengths[SIMD::numSeqs];
645 bool justLoaded[SIMD::numSeqs] = {0}; // True if sequence was just loaded into channel
646 bool seqJustLoaded = false; // True if at least one sequence was just loaded into channel
647 int shortestDbSeqLength = -1; // length of shortest sequence among current database sequences
648 int numEndedDbSeqs = 0; // Number of sequences that ended
649
650 // Row index of best score for each current database sequence. Used for HW and OV.
651 int currDbSeqsBestScoreColumn[SIMD::numSeqs];
652
653 // Load initial sequences
654 for (int i = 0; i < SIMD::numSeqs; i++)
655 if (loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
656 currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs)) {
657 justLoaded[i] = seqJustLoaded = true;
658 // Update shortest sequence length if new sequence was loaded
659 if (shortestDbSeqLength == -1 || currDbSeqsLengths[i] < shortestDbSeqLength)
660 shortestDbSeqLength = currDbSeqsLengths[i];
661 }
662
663 // Q is gap open penalty, R is gap ext penalty.
664 const __mxxxi Q = SIMD::set1(gapOpen);
665 const __mxxxi R = SIMD::set1(gapExt);
666
667 // Previous H column (array), previous E column (array), previous F, all signed short
668 __mxxxi prevHs[queryLength];
669 __mxxxi prevEs[queryLength];
670 // Initialize all values
671 for (int r = 0; r < queryLength; r++) {
672 if (MODE == OPAL_MODE_OV)
673 prevHs[r] = ZERO_SIMD;
674 else { // - Q - r * R
675 if (r == 0)
676 prevHs[0] = SIMD::sub(ZERO_SIMD, Q);
677 else
678 prevHs[r] = SIMD::sub(prevHs[r-1], R);
679 }
680
681 prevEs[r] = LOWER_SCORE_BOUND_SIMD;
682 }
683
684 // u - up, ul - up left
685 __mxxxi uH, ulH;
686 if (MODE == OPAL_MODE_NW) {
687 ulH = ZERO_SIMD;
688 uH = SIMD::sub(R, Q); // -Q + R
689 }
690
691 __mxxxi maxLastRowH = LOWER_BOUND_SIMD; // Keeps track of maximum H in last row
692 // ------------------------------------------------------------------ //
693
694
695 int columnsSinceLastSeqEnd = 0;
696 // For each column
697 while (numEndedDbSeqs < dbLength) {
698 // -------------------- CALCULATE QUERY PROFILE ------------------------- //
699 // TODO: Rognes uses pshufb here, I don't know how/why?
700 __mxxxi P[alphabetLength];
701 typename SIMD::type profileRow[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
702 for (unsigned char letter = 0; letter < alphabetLength; letter++) {
703 int* scoreMatrixRow = scoreMatrix + letter*alphabetLength;
704 for (int i = 0; i < SIMD::numSeqs; i++) {
705 unsigned char* dbSeqPos = currDbSeqsPos[i];
706 if (dbSeqPos != 0)
707 profileRow[i] = (typename SIMD::type)scoreMatrixRow[*dbSeqPos];
708 }
709 P[letter] = _mmxxx_load_si((__mxxxi const*)profileRow);
710 }
711 // ---------------------------------------------------------------------- //
712
713 // u - up
714 __mxxxi uF = LOWER_SCORE_BOUND_SIMD;
715
716 // Database sequence has fixed start and end only in NW
717 if (MODE == OPAL_MODE_NW) {
718 if (seqJustLoaded) {
719 typename SIMD::type resetMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
720 for (int i = 0; i < SIMD::numSeqs; i++)
721 resetMask[i] = justLoaded[i] ? 0 : -1;
722 const __mxxxi resetMaskPacked = _mmxxx_load_si((__mxxxi const*)resetMask);
723 ulH = _mmxxx_and_si(uH, resetMaskPacked);
724 } else {
725 ulH = uH;
726 }
727
728 uH = SIMD::sub(uH, R); // uH is -Q - c*R
729 // NOTE: Setup of ulH and uH for first column is done when sequence is loaded.
730 } else {
731 uH = ulH = ZERO_SIMD;
732 }
733
734 __mxxxi minE, minF;
735 minE = minF = SIMD::set1(UPPER_BOUND);
736 __mxxxi maxH = LOWER_BOUND_SIMD; // Max H in this column
737 __mxxxi H;
738
739 __mxxxi firstRow_uH, firstRow_ulH; // Values of uH and ulH from first row of column
740
741 if (MODE == OPAL_MODE_NW) {
742 firstRow_uH = uH;
743 firstRow_ulH = ulH;
744 }
745
746 __mxxxi prevMaxLastRowH = maxLastRowH;
747 // ----------------------- CORE LOOP (ONE COLUMN) ----------------------- //
748 for (int r = 0; r < queryLength; r++) { // For each cell in column
749 // Calculate E = max(lH-Q, lE-R)
750 __mxxxi E = SIMD::max(SIMD::sub(prevHs[r], Q), SIMD::sub(prevEs[r], R)); // E could overflow
751
752 // Calculate F = max(uH-Q, uF-R)
753 __mxxxi F = SIMD::max(SIMD::sub(uH, Q), SIMD::sub(uF, R)); // F could overflow
754 minF = SIMD::min(minF, F); // For overflow detection
755
756 // Calculate H
757 H = SIMD::max(F, E);
758 __mxxxi ulH_P = SIMD::add(ulH, P[query[r]]);
759 H = SIMD::max(H, ulH_P); // H could overflow
760
761 maxH = SIMD::max(maxH, H); // update best score in column
762
763 // Set uF, uH, ulH
764 uF = F;
765 uH = H;
766 ulH = prevHs[r];
767
768 // Update prevHs, prevEs in advance for next column
769 prevEs[r] = E;
770 prevHs[r] = H;
771 }
772 // ---------------------------------------------------------------------- //
773
774 maxLastRowH = SIMD::max(maxLastRowH, H);
775
776 if (MODE == OPAL_MODE_NW) {
777 uH = firstRow_uH;
778 ulH = firstRow_ulH;
779 }
780
781 // Find minE, which should be checked with minE == LOWER_BOUND for overflow
782 for (int r = 0; r < queryLength; r++)
783 minE = SIMD::min(minE, prevEs[r]);
784
785 columnsSinceLastSeqEnd++;
786
787 typename SIMD::type unpackedMaxH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
788 _mmxxx_store_si((__mxxxi*)unpackedMaxH, maxH);
789
790 // ------------------------ OVERFLOW DETECTION -------------------------- //
791 bool overflowDetected = false; // True if overflow was detected for this column.
792 bool overflowed[SIMD::numSeqs];
793 if (!SIMD::satArthm) {
794 /* // This check is based on following assumptions:
795 // - overflow wraps
796 // - Q, R and all scores from scoreMatrix are between LOWER_BOUND/2 and UPPER_BOUND/2 exclusive
797 typename SIMD::type* unpackedOfTest = (typename SIMD::type *)&ofTest;
798 for (int i = 0; i < SIMD::numSeqs; i++)
799 if (currDbSeqsPos[i] != 0 && unpackedOfTest[i] <= LOWER_BOUND/2)
800 return 1;*/
801 } else {
802 // There is overflow if minE == LOWER_BOUND or minF == LOWER_BOUND or maxH == UPPER_BOUND
803 __mxxxi minEF = SIMD::min(minE, minF);
804 typename SIMD::type unpackedMinEF[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
805 _mmxxx_store_si((__mxxxi*)unpackedMinEF, minEF);
806 for (int i = 0; i < SIMD::numSeqs; i++) {
807 overflowed[i] = currDbSeqsPos[i] != 0 && (unpackedMinEF[i] == LOWER_BOUND
808 || unpackedMaxH[i] == UPPER_BOUND);
809 if (overflowMethod == OPAL_OVERFLOW_BUCKETS && overflowed[i]) {
810 // In buckets method, we stop calculation when overflow is detected.
811 return OPAL_ERR_OVERFLOW;
812 }
813 }
814 }
815 for (int i = 0; i < SIMD::numSeqs; i++) {
816 overflowDetected = overflowDetected || overflowed[i];
817 }
818 overflowOccured = overflowOccured || overflowDetected;
819 // ---------------------------------------------------------------------- //
820
821 // ------------------ Store end location of best score ------------------ //
822 if (searchType != OPAL_SEARCH_SCORE && (MODE == OPAL_MODE_HW || MODE == OPAL_MODE_OV)) {
823 // Determine the column of best score.
824 __mxxxi greater = SIMD::cmpgt(maxLastRowH, prevMaxLastRowH);
825 typename SIMD::type unpackedGreater[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
826 _mmxxx_store_si((__mxxxi*)unpackedGreater, greater);
827 for (int i = 0; i < SIMD::numSeqs; i++) {
828 if (currDbSeqsPos[i] != 0 && !overflowed[i]) { // If not null sequence or overflowed
829 if (unpackedGreater[i] != 0) {
830 currDbSeqsBestScoreColumn[i] = dbSeqLengths[currDbSeqsIdxs[i]] - currDbSeqsLengths[i]
831 + columnsSinceLastSeqEnd - 1;
832 }
833 }
834 }
835 }
836 // ---------------------------------------------------------------------- //
837
838 seqJustLoaded = false;
839 // --------------------- CHECK AND HANDLE SEQUENCE END ------------------ //
840 if (overflowDetected || shortestDbSeqLength == columnsSinceLastSeqEnd) { // If at least one sequence ended
841 shortestDbSeqLength = -1;
842
843 // Calculate best scores
844 __mxxxi bestScore;
845 if (MODE == OPAL_MODE_OV)
846 bestScore = SIMD::max(maxH, maxLastRowH); // Maximum of last row and column
847 if (MODE == OPAL_MODE_HW)
848 bestScore = maxLastRowH;
849 if (MODE == OPAL_MODE_NW)
850 bestScore = H;
851 typename SIMD::type unpackedBestScore[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
852 _mmxxx_store_si((__mxxxi*)unpackedBestScore, bestScore);
853
854 for (int i = 0; i < SIMD::numSeqs; i++) {
855 if (currDbSeqsPos[i] != 0) { // If not null sequence
856 justLoaded[i] = false;
857 currDbSeqsLengths[i] -= columnsSinceLastSeqEnd;
858
859 if (overflowed[i] || currDbSeqsLengths[i] == 0) { // If sequence ended
860 numEndedDbSeqs++;
861 if (!overflowed[i]) {
862 // Store result.
863 int dbSeqIdx = currDbSeqsIdxs[i];
864 OpalSearchResult *result = results[dbSeqIdx];
865 calculated[dbSeqIdx] = true;
866 // Set score.
867 opalSearchResultSetScore(result, unpackedBestScore[i]);
868 // Set end location.
869 if (searchType == OPAL_SEARCH_SCORE) {
870 result->endLocationQuery = -1;
871 result->endLocationTarget = -1;
872 } else {
873 if (MODE == OPAL_MODE_NW) {
874 result->endLocationQuery = queryLength - 1;
875 result->endLocationTarget = dbSeqLengths[dbSeqIdx] - 1;
876 }
877 if (MODE == OPAL_MODE_HW) {
878 result->endLocationQuery = queryLength - 1;
879 result->endLocationTarget = currDbSeqsBestScoreColumn[i];
880 }
881 if (MODE == OPAL_MODE_OV) {
882 // This unpacking will repeat unnecessarily if there are multiple sequences
883 // ending at the same time, however that will happen in very rare occasions.
884 // TODO(martin): always unpack only once.
885 typename SIMD::type unpackedPrevMaxLastRowH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
886 _mmxxx_store_si((__mxxxi*)unpackedPrevMaxLastRowH, prevMaxLastRowH);
887 typename SIMD::type maxScore = unpackedPrevMaxLastRowH[i];
888
889 // If last column contains best score, determine row.
890 if (unpackedMaxH[i] > maxScore) {
891 result->endLocationTarget = dbSeqLengths[dbSeqIdx] - 1;
892 for (int r = 0; r < queryLength; r++) {
893 typename SIMD::type unpackedPrevH[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
894 _mmxxx_store_si((__mxxxi*)unpackedPrevH, prevHs[r]);
895 if (unpackedPrevH[i] > maxScore) {
896 result->endLocationQuery = r;
897 maxScore = unpackedPrevH[i];
898 }
899 }
900 } else {
901 result->endLocationTarget = currDbSeqsBestScoreColumn[i];
902 result->endLocationQuery = queryLength - 1;
903 }
904 }
905 }
906 }
907 // Load next sequence
908 if (loadNextSequence(nextDbSeqIdx, dbLength, currDbSeqsIdxs[i], currDbSeqsPos[i],
909 currDbSeqsLengths[i], db, dbSeqLengths, calculated, numEndedDbSeqs)) {
910 justLoaded[i] = seqJustLoaded = true;
911 }
912 } else {
913 if (currDbSeqsPos[i] != 0)
914 currDbSeqsPos[i]++; // If not new and not null, move for one element
915 }
916 // Update shortest sequence length if sequence is not null
917 if (currDbSeqsPos[i] != 0 && (shortestDbSeqLength == -1 || currDbSeqsLengths[i] < shortestDbSeqLength))
918 shortestDbSeqLength = currDbSeqsLengths[i];
919 }
920 }
921 //------------ Reset prevEs, prevHs, maxLastRowH(, ulH and uH) ------------//
922 typename SIMD::type resetMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8)));
923 typename SIMD::type setMask[SIMD::numSeqs] __attribute__((aligned(SIMD_REG_SIZE / 8))); // inverse of resetMask
924 for (int i = 0; i < SIMD::numSeqs; i++) {
925 resetMask[i] = justLoaded[i] ? 0 : -1;
926 setMask[i] = justLoaded[i] ? -1 : 0;
927 }
928 const __mxxxi resetMaskPacked = _mmxxx_load_si((__mxxxi const*)resetMask);
929 const __mxxxi setMaskPacked = _mmxxx_load_si((__mxxxi const*)setMask);
930
931 // Set prevEs ended channels to LOWER_SCORE_BOUND
932 const __mxxxi maskedLowerScoreBoundSimd = _mmxxx_and_si(setMaskPacked, LOWER_SCORE_BOUND_SIMD);
933 for (int r = 0; r < queryLength; r++) {
934 prevEs[r] = _mmxxx_and_si(prevEs[r], resetMaskPacked);
935 prevEs[r] = SIMD::add(prevEs[r], maskedLowerScoreBoundSimd);
936 }
937
938 // Set prevHs
939 for (int r = 0; r < queryLength; r++) {
940 prevHs[r] = _mmxxx_and_si(prevHs[r], resetMaskPacked);
941 if (MODE != OPAL_MODE_OV) {
942 if (r == 0) {
943 prevHs[0] = SIMD::sub(prevHs[0], _mmxxx_and_si(setMaskPacked, Q));
944 } else {
945 prevHs[r] = SIMD::add(prevHs[r], _mmxxx_and_si(setMaskPacked, SIMD::sub(prevHs[r-1], R)));
946 }
947 }
948 }
949
950 // Set ulH and uH if NW
951 if (MODE == OPAL_MODE_NW) {
952 ulH = _mmxxx_and_si(ulH, resetMaskPacked); // to 0
953 // Set uH channels to -Q + R
954 uH = _mmxxx_and_si(uH, resetMaskPacked);
955 uH = SIMD::add(uH, _mmxxx_and_si(setMaskPacked, SIMD::sub(R, Q)));
956 }
957
958 // Set maxLastRow ended channels to LOWER_BOUND
959 maxLastRowH = _mmxxx_and_si(maxLastRowH, resetMaskPacked);
960 maxLastRowH = SIMD::add(maxLastRowH, _mmxxx_and_si(setMaskPacked, LOWER_BOUND_SIMD));
961 //-------------------------------------------------------//
962
963 columnsSinceLastSeqEnd = 0;
964 } else { // If no sequences ended
965 // Move for one element in all sequences
966 for (int i = 0; i < SIMD::numSeqs; i++)
967 if (currDbSeqsPos[i] != 0)
968 currDbSeqsPos[i]++;
969 }
970 // ---------------------------------------------------------------------- //
971 }
972
973 if (overflowOccured) {
974 return OPAL_ERR_OVERFLOW;
975 }
976 return 0;
977 }
978
979 /**
980 * @param [in] skip If skip[i] is true, result for sequence #i will not be calculated.
981 * If skip is NULL, all results will be calculated.
982 */
983 template <int MODE>
searchDatabase(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,bool skip[],int overflowMethod)984 static int searchDatabase(unsigned char query[], int queryLength,
985 unsigned char** db, int dbLength, int dbSeqLengths[],
986 int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
987 OpalSearchResult* results[], const int searchType, bool skip[], int overflowMethod) {
988 int resultCode = 0;
989 // Do buckets only if using buckets overflow method.
990 const int chunkSize = overflowMethod == OPAL_OVERFLOW_BUCKETS ? 1024 : dbLength;
991 bool* calculated = new bool[chunkSize];
992 for (int startIdx = 0; startIdx < dbLength; startIdx += chunkSize) {
993 unsigned char** db_ = db + startIdx;
994 int* dbSeqLengths_ = dbSeqLengths + startIdx;
995 OpalSearchResult** results_ = results + startIdx;
996 int dbLength_ = startIdx + chunkSize >= dbLength ? dbLength - startIdx : chunkSize;
997 for (int i = 0; i < dbLength_; i++) {
998 calculated[i] = skip ? skip[i] : false;
999 }
1000 resultCode = searchDatabase_< Simd<char>, MODE >
1001 (query, queryLength, db_, dbLength_, dbSeqLengths_,
1002 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
1003 searchType, calculated, overflowMethod);
1004 if (resultCode == OPAL_ERR_OVERFLOW) {
1005 resultCode = searchDatabase_< Simd<short>, MODE >
1006 (query, queryLength, db_, dbLength_, dbSeqLengths_,
1007 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
1008 searchType, calculated, overflowMethod);
1009 if (resultCode == OPAL_ERR_OVERFLOW) {
1010 resultCode = searchDatabase_< Simd<int>, MODE >
1011 (query, queryLength, db_, dbLength_, dbSeqLengths_,
1012 gapOpen, gapExt, scoreMatrix, alphabetLength, results_,
1013 searchType, calculated, overflowMethod);
1014 if (resultCode != 0)
1015 break; // TODO: this does not make much sense because of buckets, improve it.
1016 }
1017 }
1018 }
1019 delete[] calculated;
1020 return resultCode;
1021 }
1022
1023
1024 /**
1025 * @return Zero-based index of max element in array. If there are multiple elements with
1026 * same value, last is returned.
1027 */
1028 template <typename T>
arrayMax(T array[],int length)1029 static inline T arrayMax(T array[], int length) {
1030 assert(length > 0);
1031 int maxElementIdx = 0;
1032 for (int i = 1; i < length; i++) {
1033 if (array[i] > array[maxElementIdx]) {
1034 maxElementIdx = i;
1035 }
1036 }
1037 return array[maxElementIdx];
1038 }
1039
1040 /**
1041 * @param length Length of gap, must be non-negative.
1042 * @param gapOpen Penalty for gap opening (non-negative).
1043 * @param gapExt Penalty for gap extension (non-negative).
1044 * @return Gap penalty, as non-negative number.
1045 */
gapPenalty(int length,int gapOpen,int gapExt)1046 static int gapPenalty(int length, int gapOpen, int gapExt) {
1047 if (length > 0) {
1048 return gapOpen + gapExt * (length - 1);
1049 } else {
1050 return 0;
1051 }
1052 }
1053
1054 /**
1055 * Calculates bottom border of band for OV mode stop conditions.
1056 */
calculateBottomBandBorderOV(int k,int Q,int T,int Go,int Ge,int M)1057 static int calculateBottomBandBorderOV(int k, int Q, int T, int Go, int Ge, int M) {
1058 int border = 0;
1059
1060 // for d <= Q - T:
1061 border = std::max(border, std::min(Q - T, -1 * (k + Go - Ge - M * T) / Ge));
1062
1063 // for d > Q - T:
1064 int borderCandidate = -1 * (k - M * Q + Go - Ge) / (Ge + M);
1065 if (borderCandidate > Q - T) {
1066 border = std::max(border, borderCandidate);
1067 }
1068
1069 return std::min(border, Q - 1);
1070 }
1071
calculateTopBandBorderHW(int k,int Q,int T,int Go,int Ge,int M)1072 static int calculateTopBandBorderHW(int k, int Q, int T, int Go, int Ge, int M) {
1073 int border = 0;
1074
1075 // for d <= T - Q;
1076 border = std::max(border, std::min(T - Q, -1 * (k - M * Q + Go) / Ge + 1));
1077
1078 // for d > T - Q:
1079 int borderCandidate = -1 * (k - T * M + 2 * Go + Ge * (Q - T - 2)) / (2 * Ge + M);
1080 if (borderCandidate > T - Q) {
1081 border = std::max(border, borderCandidate);
1082 }
1083
1084 return std::min(border, T - 1);
1085 }
1086
calculateBottomBandBorderHW(int k,int Q,int T,int Go,int Ge,int M)1087 static int calculateBottomBandBorderHW(int k, int Q, int T, int Go, int Ge, int M) {
1088 int border = 0;
1089
1090 // for d >= Q - T:
1091 int borderCandidate = -1 * (k + Go - Ge - Q * M) / (Ge + M);
1092 if (borderCandidate >= Q - T) {
1093 border = std::max(border, borderCandidate);
1094 }
1095
1096 // for d < Q - T:
1097 if (-2 * Go - Ge * (Q - T - 2) + M * T >= k) {
1098 border = std::max(border, Q - T - 1);
1099 }
1100
1101 return std::min(border, Q - 1);
1102 }
1103
calculateBottomBandBorderNW(int k,int Q,int T,int Go,int Ge,int M)1104 static int calculateBottomBandBorderNW(int k, int Q, int T, int Go, int Ge, int M) {
1105 int border = 0;
1106
1107 // for d > Q - T:
1108 int borderCandidate = -1 * (k + 2 * Go - M * Q + Ge * (T - Q - 2)) / (2 * Ge + M);
1109 if (borderCandidate > Q - T) {
1110 border = std::max(border, borderCandidate);
1111 }
1112
1113 // for d = Q - T:
1114 if (Q - T <= -1 * (k + Go - M * T - Ge) / Ge) {
1115 border = std::max(border, Q - T);
1116 }
1117
1118 // for d < Q - T:
1119 if (-2 * Go - Ge * (Q - T - 2) + M * T >= k) {
1120 border = std::max(border, Q - T - 1);
1121 }
1122
1123 return std::min(border, Q - 1);
1124 }
1125
1126 /**
1127 * Calculates top and bottom diagonal of band that contains all cells that could be part of any
1128 * solution that gives score not smaller than k. This means that if we are interested only in solutions
1129 * that give score that is not smaller than k, it is enough to calculate only cells inside that band.
1130 * Always starts from top left corner (like NW does) no matter which mode is specified,
1131 * and stops with regard to stop conditions of specified mode.
1132 * @param k We are interested only in scores thar are not smaller than k.
1133 * @param mode Alignment mode, it is used for stop conditions.
1134 * @param Q queryLength
1135 * @param T targetLength
1136 * @param Go gapOpen -> non negative penalty for opening of gap.
1137 * @param Ge gapExt -> non negative penalty for extension of gap.
1138 * @param M Max score from score matrix.
1139 * @return Pair where first is index of bottom diagonal(border), and second is index of top diagonal(border).
1140 * Therefore, first value will be in [0, Q - 1], while second will be in [0, T - 1].
1141 * Band spans between top and bottom diagonal, including them.
1142 * Main diagonal has index 0, diagonals above it and below both start with index 1.
1143 * If there is no band, (-1, -1) is returned.
1144 *
1145 * Example of matrix where Q is 3 and T is 6, each cell is marked with index of diagonal it lies on:
1146 *
1147 * 012345 xxx---
1148 * 101234 , if bottom border is 1 and top is 2, we have band: xxxx--
1149 * 210123 -xxxx-
1150 */
calculateBandBorders(int k,int mode,int Q,int T,int Go,int Ge,int M)1151 static std::pair<int, int> calculateBandBorders(int k, int mode, int Q, int T, int Go, int Ge, int M) {
1152 if (mode == OPAL_MODE_OV || mode == OPAL_MODE_SW) {
1153 // Bands for OV and SW have same conditions, so they are calculated in same way.
1154 if (M * std::min(Q, T) >= k) { // Determine if band exists at all.
1155 // Conditions for top and bottom band are symmetric, so logic for bottom is reused for top.
1156 return std::make_pair(calculateBottomBandBorderOV(k, Q, T, Go, Ge, M),
1157 calculateBottomBandBorderOV(k, T, Q, Go, Ge, M));
1158 } else {
1159 return std::make_pair(-1, -1);
1160 }
1161 } else if (mode == OPAL_MODE_HW) {
1162 if (M * std::min(Q, T) - gapPenalty(Q - std::min(Q, T), Go, Ge) >= k) {
1163 return std::make_pair(calculateBottomBandBorderHW(k, Q, T, Go, Ge, M),
1164 calculateTopBandBorderHW(k, Q, T, Go, Ge, M));
1165 } else {
1166 return std::make_pair(-1, -1);
1167 }
1168 } else if (mode == OPAL_MODE_NW) {
1169 if (M * std::min(Q, T) - gapPenalty(std::abs(Q - T), Go, Ge) >= k) {
1170 // Conditions for top and bottom band are symmetric, so logic for bottom is reused for top.
1171 return std::make_pair(calculateBottomBandBorderNW(k, Q, T, Go, Ge, M),
1172 calculateBottomBandBorderNW(k, T, Q, Go, Ge, M));
1173 } else {
1174 return std::make_pair(-1, -1);
1175 }
1176 } else {
1177 assert(false); // Invalid alignment mode.
1178 }
1179 }
1180
1181
1182
1183 /**
1184 * Returns new sequence that is reverse of given sequence.
1185 */
createReverseCopy(const unsigned char * seq,int length)1186 static inline unsigned char* createReverseCopy(const unsigned char* seq, int length) {
1187 unsigned char* rSeq = (unsigned char*) malloc(length * sizeof(unsigned char));
1188 for (int i = 0; i < length; i++) {
1189 rSeq[i] = seq[length - i - 1];
1190 }
1191 return rSeq;
1192 }
1193
1194 template <class T>
revertArray(T array[],int length)1195 static inline void revertArray(T array[], int length) {
1196 for (int i = 0; i < length / 2; i++) {
1197 T tmp = array[i];
1198 array[i] = array[length - 1 - i];
1199 array[length - 1 - i] = tmp;
1200 }
1201 }
1202
1203 // Here I store scores for one cell in score matrix.
1204 class Cell {
1205 public:
1206 int H, E, F;
1207 enum class Field {
1208 H, E, F
1209 };
1210 };
1211
1212 /**
1213 * Finds alignment of two sequences, if we know scoreLimit.
1214 * First alignment that has score greater or equal then scoreLimit will be returned.
1215 * If there is no such alignment, behavior will be unexpected.
1216 * Always starts from top left corner (like NW does) no matter which mode is specified,
1217 * and stops with regard to stop conditions of specified mode.
1218 * For example, for HW it will stop on last row, and for SW it will stop anywhere.
1219 * Returns score, start location (which is always (0, 0)), end location and alignment.
1220 * @param [in] query
1221 * @param [in] queryLength
1222 * @param [in] target
1223 * @param [in] targetLength
1224 * @param [in] gapOpen
1225 * @param [in] gapExt
1226 * @param [in] scoreMatrix
1227 * @param [in] alphabetLength
1228 * @param [in] scoreLimit First alignment with score greater/equal than scoreLimit is returned.
1229 * If there is no such score, behavior is undefined.
1230 * TODO(martin): make this function also work when max score is smaller then scoreLimit.
1231 * @param [out] result Pointer to already allocated object is expected here.
1232 * Score, start location, end location and alignment will be set.
1233 * Do not forget to free() alignment!
1234 * @param [in] mode Mode whose stop conditions will be used when finding alignment.
1235 */
findAlignment(const unsigned char query[],const int queryLength,const unsigned char target[],const int targetLength,const int gapOpen,const int gapExt,const int * scoreMatrix,const int alphabetLength,const int scoreLimit,OpalSearchResult * result,const int mode)1236 static void findAlignment(
1237 const unsigned char query[], const int queryLength, const unsigned char target[], const int targetLength,
1238 const int gapOpen, const int gapExt, const int* scoreMatrix, const int alphabetLength,
1239 const int scoreLimit, OpalSearchResult* result, const int mode) {
1240 /*
1241 printf("Query: ");
1242 for (int i = 0; i < queryLength; i++) {
1243 printf("%d ", query[i]);
1244 }
1245 printf("\n");
1246 printf("Target: ");
1247 for (int i = 0; i < targetLength; i++) {
1248 printf("%d ", target[i]);
1249 }
1250 printf("\n");
1251 */
1252
1253 //printf("lengths: %d %d\n", queryLength, targetLength);
1254
1255 std::pair<int, int> bandBorders = calculateBandBorders(
1256 scoreLimit, mode, queryLength, targetLength, gapOpen, gapExt,
1257 arrayMax(scoreMatrix, alphabetLength * alphabetLength));
1258
1259 assert(bandBorders.first >= 0 && bandBorders.first < queryLength);
1260 assert(bandBorders.second >= 0 && bandBorders.second < targetLength);
1261 //printf("band: %d %d\n", bandBorders.first, bandBorders.second);
1262
1263 Cell** matrix = new Cell*[targetLength]; // NOTE: First index is column, second is row.
1264 Cell* initialColumn = new Cell[queryLength];
1265 const int LOWER_SCORE_BOUND = INT_MIN + std::max(gapOpen, gapExt);
1266 for (int r = 0; r < queryLength; r++) {
1267 initialColumn[r].H = -1 * gapOpen - r * gapExt;
1268 initialColumn[r].E = LOWER_SCORE_BOUND;
1269 }
1270
1271 Cell* prevColumn = initialColumn;
1272 int maxScore = INT_MIN; // Max score so far, but only among cells that could be final.
1273 int H = INT_MIN; // Current score.
1274 int c;
1275 for (c = 0; c < targetLength && maxScore < scoreLimit; c++) {
1276 matrix[c] = new Cell[queryLength];
1277
1278 // First and last row in band for this column.
1279 int rBandStart = std::max(0, c - bandBorders.second);
1280 int rBandEnd = std::min(queryLength - 1, c + bandBorders.first);
1281
1282 int uF, uH, ulH;
1283 if (rBandStart == 0) {
1284 uF = LOWER_SCORE_BOUND;
1285 uH = -1 * gapOpen - c * gapExt;
1286 ulH = c == 0 ? 0 : uH + gapExt;
1287 } else {
1288 uH = uF = LOWER_SCORE_BOUND; // Out of band, so set to -inf.
1289 ulH = prevColumn[rBandStart - 1].H;
1290 }
1291
1292 for (int r = rBandStart; r <= rBandEnd; r++) {
1293 int E = std::max(prevColumn[r].H - gapOpen, prevColumn[r].E - gapExt);
1294 int F = std::max(uH - gapOpen, uF - gapExt);
1295 int score = scoreMatrix[query[r] * alphabetLength + target[c]];
1296 H = std::max(E, std::max(F, ulH + score));
1297 /*
1298 printf("E: %d ", E);
1299 printf("F: %d ", F);
1300 printf("score: %d ", score);
1301 printf("ulH: %d ", ulH);
1302 printf("H: %d ", H);
1303 */
1304
1305 // If mode is SW, track max score of all cells.
1306 // If mode is OV, track max score in last column.
1307 if (mode == OPAL_MODE_SW
1308 || (mode == OPAL_MODE_OV && c == targetLength - 1)) {
1309 maxScore = std::max(maxScore, H);
1310 }
1311
1312 uF = F;
1313 uH = H;
1314 ulH = prevColumn[r].H;
1315
1316 matrix[c][r].H = H;
1317 matrix[c][r].E = E;
1318 matrix[c][r].F = F;
1319 }
1320
1321 // Set all cells that are out of band to -inf.
1322 for (int r = 0; r < rBandStart; r++) {
1323 matrix[c][r].E = matrix[c][r].H = matrix[c][r].F = LOWER_SCORE_BOUND;
1324 }
1325 for (int r = rBandEnd + 1; r < queryLength; r++) {
1326 matrix[c][r].E = matrix[c][r].H = matrix[c][r].F = LOWER_SCORE_BOUND;
1327 }
1328
1329 if (mode == OPAL_MODE_HW || mode == OPAL_MODE_OV) {
1330 maxScore = std::max(maxScore, H); // Track max score in last row.
1331 }
1332 prevColumn = matrix[c];
1333 }
1334 int lastColumnIdx = c - 1;
1335
1336 result->startLocationTarget = 0;
1337 result->startLocationQuery = 0;
1338 result->scoreSet = 1;
1339 // Determine score and end location of alignment.
1340 switch (mode) {
1341 case OPAL_MODE_NW:
1342 opalSearchResultSetScore(result, H);
1343 result->endLocationTarget = targetLength - 1;
1344 result->endLocationQuery = queryLength - 1;
1345 break;
1346 case OPAL_MODE_HW:
1347 opalSearchResultSetScore(result, maxScore);
1348 result->endLocationTarget = lastColumnIdx;
1349 result->endLocationQuery = queryLength - 1;
1350 break;
1351 case OPAL_MODE_SW: case OPAL_MODE_OV:
1352 opalSearchResultSetScore(result, maxScore);
1353 result->endLocationTarget = lastColumnIdx;
1354 int r;
1355 for (r = 0; r < queryLength && matrix[lastColumnIdx][r].H != maxScore; r++);
1356 assert(r < queryLength);
1357 assert(matrix[lastColumnIdx][r].H == maxScore);
1358 result->endLocationQuery = r;
1359 break;
1360 default:
1361 assert(false);
1362 }
1363
1364 // Construct alignment.
1365 // I reserve max size possibly needed for alignment.
1366 unsigned char* alignment = (unsigned char*) malloc(
1367 sizeof(unsigned char) * (result->endLocationQuery + result->endLocationTarget));
1368 int alignmentLength = 0;
1369 int rIdx = result->endLocationQuery;
1370 int cIdx = result->endLocationTarget;
1371 Cell::Field field = Cell::Field::H; // Current field type.
1372 while (rIdx >= 0 && cIdx >= 0) {
1373 Cell cell = matrix[cIdx][rIdx]; // Current cell.
1374
1375 // Determine to which cell and which field we should go next, move, and add operation to alignment.
1376 switch (field) {
1377 case Cell::Field::H:
1378 if (cell.H == cell.E) {
1379 field = Cell::Field::E;
1380 } else if (cell.H == cell.F) {
1381 field = Cell::Field::F;
1382 } else {
1383 alignment[alignmentLength++] = (query[rIdx] == target[cIdx] ? OPAL_ALIGN_MATCH
1384 : OPAL_ALIGN_MISMATCH);
1385 cIdx--; rIdx--;
1386 }
1387 break;
1388 case Cell::Field::E:
1389 field = (cell.E == matrix[cIdx - 1][rIdx].H - gapOpen) ? Cell::Field::H : Cell::Field::E;
1390 alignment[alignmentLength++] = OPAL_ALIGN_INS;
1391 cIdx--;
1392 break;
1393 case Cell::Field::F:
1394 field = (cell.F == matrix[cIdx][rIdx - 1].H - gapOpen) ? Cell::Field::H : Cell::Field::F;
1395 alignment[alignmentLength++] = OPAL_ALIGN_DEL;
1396 rIdx--;
1397 break;
1398 }
1399 }
1400 // I stop when matrix border is reached, so I have to add indels at start of alignment
1401 // manually (they do not have entry in operations). Only one of these two loops will trigger.
1402 while (rIdx >= 0) {
1403 alignment[alignmentLength] = OPAL_ALIGN_DEL;
1404 alignmentLength++; rIdx--;
1405 }
1406 while (cIdx >= 0) {
1407 alignment[alignmentLength] = OPAL_ALIGN_INS;
1408 alignmentLength++; cIdx--;
1409 }
1410 //printf("rIdx: %d, cIdx: %d\n", rIdx, cIdx);
1411 assert(rIdx == -1 && cIdx == -1);
1412 alignment = (unsigned char*) realloc(alignment, sizeof(unsigned char) * alignmentLength);
1413 revertArray(alignment, alignmentLength);
1414 // Store alignment to result.
1415 result->alignment = alignment;
1416 result->alignmentLength = alignmentLength;
1417
1418 /*
1419 printf("Alignment: ");
1420 for (int j = 0; j < result->alignmentLength; j++)
1421 printf("%d ", result->alignment[j]);
1422 printf("\n");
1423 */
1424
1425 // Cleanup
1426 delete[] initialColumn;
1427 for (int i = 0; i <= lastColumnIdx; i++) {
1428 delete[] matrix[i];
1429 }
1430 delete[] matrix;
1431 }
1432
1433
1434
opalSearchDatabase(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[],const int searchType,int mode,int overflowMethod)1435 extern int opalSearchDatabase(
1436 unsigned char query[], int queryLength,
1437 unsigned char** db, int dbLength, int dbSeqLengths[],
1438 int gapOpen, int gapExt, int* scoreMatrix, int alphabetLength,
1439 OpalSearchResult* results[], const int searchType, int mode, int overflowMethod) {
1440 // A.D.: this will not happen since SIMDe always takes care of correct SIMD extensions
1441 #if 0
1442 //#if !defined(__SSE4_1__) && !defined(__AVX2__)
1443 // return OPAL_ERR_NO_SIMD_SUPPORT;
1444 #else
1445 // Calculate score and end location.
1446 int status;
1447 // Skip recalculation of already calculated sequences.
1448 bool *skip = new bool[dbLength];
1449 for (int i = 0; i < dbLength; i++) {
1450 skip[i] = (!opalSearchResultIsEmpty(*results[i])
1451 && (searchType == OPAL_SEARCH_SCORE
1452 || (results[i]->endLocationQuery >= 0 && results[i]->endLocationTarget >= 0)));
1453 }
1454 if (mode == OPAL_MODE_NW) {
1455 status = searchDatabase<OPAL_MODE_NW>(
1456 query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1457 scoreMatrix, alphabetLength, results, searchType, skip, overflowMethod);
1458 } else if (mode == OPAL_MODE_HW) {
1459 status = searchDatabase<OPAL_MODE_HW>(
1460 query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1461 scoreMatrix, alphabetLength, results, searchType, skip, overflowMethod);
1462 } else if (mode == OPAL_MODE_OV) {
1463 status = searchDatabase<OPAL_MODE_OV>(
1464 query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1465 scoreMatrix, alphabetLength, results, searchType, skip, overflowMethod);
1466 } else if (mode == OPAL_MODE_SW) {
1467 status = searchDatabaseSW(
1468 query, queryLength, db, dbLength, dbSeqLengths,
1469 gapOpen, gapExt, scoreMatrix, alphabetLength,
1470 results, searchType, skip, overflowMethod);
1471 } else {
1472 status = OPAL_ERR_INVALID_MODE;
1473 }
1474 delete[] skip;
1475 if (status) return status;
1476
1477 if (searchType == OPAL_SEARCH_ALIGNMENT) {
1478 // Calculate alignment of query with each database sequence.
1479 unsigned char* const rQuery = createReverseCopy(query, queryLength);
1480 for (int i = 0; i < dbLength; i++) {
1481 if (mode == OPAL_MODE_SW && results[i]->score == 0) { // If it does not have alignment
1482 results[i]->alignment = NULL;
1483 results[i]->alignmentLength = 0;
1484 results[i]->startLocationQuery = results[i]->startLocationTarget = -1;
1485 results[i]->endLocationQuery = results[i]->endLocationTarget = -1;
1486 } else {
1487 //printf("%d %d\n", results[i]->endLocationQuery, results[i]->endLocationTarget);
1488 // Do alignment in reverse direction.
1489 int alignQueryLength = results[i]->endLocationQuery + 1;
1490 unsigned char* alignQuery = rQuery + queryLength - alignQueryLength;
1491 int alignTargetLength = results[i]->endLocationTarget + 1;
1492 unsigned char* alignTarget = createReverseCopy(db[i], alignTargetLength);
1493 OpalSearchResult result;
1494 findAlignment(
1495 alignQuery, alignQueryLength, alignTarget, alignTargetLength,
1496 gapOpen, gapExt, scoreMatrix, alphabetLength,
1497 results[i]->score, &result, mode);
1498 //printf("%d %d\n", results[i]->score, result.score);
1499 assert(results[i]->score == result.score);
1500 // Translate results.
1501 results[i]->startLocationQuery = alignQueryLength - result.endLocationQuery - 1;
1502 results[i]->startLocationTarget = alignTargetLength - result.endLocationTarget - 1;
1503 results[i]->alignmentLength = result.alignmentLength;
1504 results[i]->alignment = result.alignment;
1505 revertArray(results[i]->alignment, results[i]->alignmentLength);
1506 free(alignTarget);
1507 }
1508 }
1509 free(rQuery);
1510 } else {
1511 for (int i = 0; i < dbLength; i++) {
1512 results[i]->alignment = NULL;
1513 results[i]->alignmentLength = -1;
1514 results[i]->startLocationQuery = -1;
1515 results[i]->startLocationTarget = -1;
1516 }
1517 }
1518
1519 return 0;
1520 #endif
1521 }
1522
1523
opalSearchDatabaseCharSW(unsigned char query[],int queryLength,unsigned char ** db,int dbLength,int dbSeqLengths[],int gapOpen,int gapExt,int * scoreMatrix,int alphabetLength,OpalSearchResult * results[])1524 extern int opalSearchDatabaseCharSW(
1525 unsigned char query[], int queryLength, unsigned char** db, int dbLength,
1526 int dbSeqLengths[], int gapOpen, int gapExt, int* scoreMatrix,
1527 int alphabetLength, OpalSearchResult* results[]) {
1528 #if !defined(__SSE4_1__) && !defined(__AVX2__)
1529 return OPAL_ERR_NO_SIMD_SUPPORT;
1530 #else
1531 bool* calculated = new bool[dbLength];
1532 for (int i = 0; i < dbLength; i++) {
1533 calculated[i] = false;
1534 }
1535 int resultCode = searchDatabaseSW_< SimdSW<char> >(
1536 query, queryLength, db, dbLength, dbSeqLengths, gapOpen, gapExt,
1537 scoreMatrix, alphabetLength, results, OPAL_SEARCH_SCORE, calculated,
1538 OPAL_OVERFLOW_SIMPLE);
1539 for (int i = 0; i < dbLength; i++) {
1540 if (!calculated[i]) {
1541 results[i]->score = -1;
1542 results[i]->scoreSet = 0;
1543 }
1544 }
1545 delete[] calculated;
1546 return resultCode;
1547 #endif
1548 }
1549
1550
opalInitSearchResult(OpalSearchResult * result)1551 extern void opalInitSearchResult(OpalSearchResult* result) {
1552 result->scoreSet = 0;
1553 result->startLocationTarget = result->startLocationQuery = -1;
1554 result->endLocationTarget = result->endLocationQuery = -1;
1555 result->alignment = NULL;
1556 result->alignmentLength = 0;
1557 }
1558
opalSearchResultIsEmpty(const OpalSearchResult result)1559 extern int opalSearchResultIsEmpty(const OpalSearchResult result) {
1560 return !result.scoreSet;
1561 }
1562
opalSearchResultSetScore(OpalSearchResult * result,int score)1563 extern void opalSearchResultSetScore(OpalSearchResult* result, int score) {
1564 result->scoreSet = 1;
1565 result->score = score;
1566 }
1567