1 /****
2 DIAMOND protein aligner
3 Copyright (C) 2016-2021 Max Planck Society for the Advancement of Science e.V.
4                         Benjamin Buchfink
5 
6 Code developed by Benjamin Buchfink <benjamin.buchfink@tue.mpg.de>
7 
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 ****/
21 
22 #include <utility>
23 #include "../../util/data_structures/mem_buffer.h"
24 
25 using std::pair;
26 
27 namespace DP { namespace BandedSwipe {
28 namespace DISPATCH_ARCH {
29 
30 template<typename _sv>
31 struct Matrix
32 {
33 	struct ColumnIterator
34 	{
ColumnIteratorMatrix::ColumnIterator35 		ColumnIterator(_sv* hgap_front, _sv* score_front) :
36 			hgap_ptr_(hgap_front),
37 			score_ptr_(score_front)
38 		{ }
39 		inline void operator++()
40 		{
41 			++hgap_ptr_; ++score_ptr_;
42 		}
hgapMatrix::ColumnIterator43 		inline _sv hgap() const
44 		{
45 			return *(hgap_ptr_ + 1);
46 		}
diagMatrix::ColumnIterator47 		inline _sv diag() const
48 		{
49 			return *score_ptr_;
50 		}
set_hgapMatrix::ColumnIterator51 		inline void set_hgap(const _sv& x)
52 		{
53 			*hgap_ptr_ = x;
54 		}
set_scoreMatrix::ColumnIterator55 		inline void set_score(const _sv& x)
56 		{
57 			*score_ptr_ = x;
58 		}
statMatrix::ColumnIterator59 		std::nullptr_t stat() {
60 			return nullptr;
61 		}
hstatMatrix::ColumnIterator62 		std::nullptr_t hstat() {
63 			return nullptr;
64 		}
trace_maskMatrix::ColumnIterator65 		std::nullptr_t trace_mask() {
66 			return nullptr;
67 		}
set_hstatMatrix::ColumnIterator68 		void set_hstat(std::nullptr_t) {}
set_zeroMatrix::ColumnIterator69 		inline void set_zero() {}
70 		_sv *hgap_ptr_, *score_ptr_;
71 	};
MatrixMatrix72 	Matrix(int band, size_t cols):
73 		band_(band)
74 	{
75 		hgap_.resize(band + 1);
76 		score_.resize(band);
77 		std::fill(hgap_.begin(), hgap_.end(), _sv());
78 		std::fill(score_.begin(), score_.end(), _sv());
79 
80 	}
beginMatrix81 	inline ColumnIterator begin(int offset, int col)
82 	{
83 		return ColumnIterator(&hgap_[offset], &score_[offset]);
84 	}
bandMatrix85 	int band() const {
86 		return band_;
87 	}
88 	_sv operator[](int i) const {
89 		return score_[i];
90 	}
91 #ifdef __APPLE__
92 	MemBuffer<_sv> hgap_, score_;
93 #else
94 	static thread_local MemBuffer<_sv> hgap_, score_;
95 #endif
96 private:
97 	int band_;
98 };
99 
100 template<typename _sv>
101 struct TracebackMatrix
102 {
103 
104 	typedef void* Stat;
105 	typedef typename ::DISPATCH_ARCH::ScoreTraits<_sv>::Score Score;
106 	static constexpr int CHANNELS = ::DISPATCH_ARCH::ScoreTraits<_sv>::CHANNELS;
107 
108 	struct ColumnIterator
109 	{
ColumnIteratorTracebackMatrix::ColumnIterator110 		ColumnIterator(_sv* hgap_front, _sv* score_front, _sv* score_front1) :
111 			hgap_ptr_(hgap_front),
112 			score_ptr_(score_front),
113 			score_ptr1_(score_front1)
114 		{ }
115 		inline void operator++()
116 		{
117 			++hgap_ptr_; ++score_ptr_; ++score_ptr1_;
118 		}
hgapTracebackMatrix::ColumnIterator119 		inline _sv hgap() const
120 		{
121 			return *(hgap_ptr_ + 1);
122 		}
diagTracebackMatrix::ColumnIterator123 		inline _sv diag() const
124 		{
125 			return *score_ptr_;
126 		}
set_hgapTracebackMatrix::ColumnIterator127 		inline void set_hgap(const _sv& x)
128 		{
129 			*hgap_ptr_ = x;
130 		}
set_scoreTracebackMatrix::ColumnIterator131 		inline void set_score(const _sv& x)
132 		{
133 			*score_ptr1_ = x;
134 		}
set_zeroTracebackMatrix::ColumnIterator135 		void set_zero()
136 		{
137 			*(score_ptr1_ - 1) = _sv();
138 		}
statTracebackMatrix::ColumnIterator139 		std::nullptr_t stat() {
140 			return nullptr;
141 		}
hstatTracebackMatrix::ColumnIterator142 		std::nullptr_t hstat() {
143 			return nullptr;
144 		}
trace_maskTracebackMatrix::ColumnIterator145 		std::nullptr_t trace_mask() {
146 			return nullptr;
147 		}
set_hstatTracebackMatrix::ColumnIterator148 		void set_hstat(std::nullptr_t) {}
149 		_sv *hgap_ptr_, *score_ptr_, *score_ptr1_;
150 	};
151 
152 	struct TracebackIterator
153 	{
TracebackIteratorTracebackMatrix::TracebackIterator154 		TracebackIterator(const Score *score, size_t band, int i, int j) :
155 			band_(band),
156 			score_(score),
157 			i(i),
158 			j(j)
159 		{
160 			assert(i >= 0 && j >= 0);
161 		}
scoreTracebackMatrix::TracebackIterator162 		Score score() const
163 		{
164 			return *score_;
165 		}
diagTracebackMatrix::TracebackIterator166 		Score diag() const
167 		{
168 			return *(score_ - band_ * CHANNELS);
169 		}
walk_diagonalTracebackMatrix::TracebackIterator170 		void walk_diagonal()
171 		{
172 			score_ -= band_ * ::DISPATCH_ARCH::ScoreTraits<_sv>::CHANNELS;
173 			--i;
174 			--j;
175 			assert(i >= -1 && j >= -1);
176 		}
walk_gapTracebackMatrix::TracebackIterator177 		pair<Edit_operation, int> walk_gap(int d0, int d1)
178 		{
179 			const int i0 = std::max(d0 + j, 0), j0 = std::max(i - d1, -1);
180 			const Score *h = score_ - (band_ - 1) * CHANNELS, *h0 = score_ - (j - j0) * (band_ - 1) * CHANNELS;
181 			const Score *v = score_ - CHANNELS, *v0 = score_ - (i - i0 + 1) * CHANNELS;
182 			const Score e = score_matrix.gap_extend();
183 			Score score = this->score() + (Score)score_matrix.gap_open() + e;
184 			int l = 1;
185 			while (v > v0 && h > h0) {
186 				if (score == *h) {
187 					walk_hgap(h, l);
188 					return std::make_pair(op_deletion, l);
189 				}
190 				else if (score == *v) {
191 					walk_vgap(v, l);
192 					return std::make_pair(op_insertion, l);
193 				}
194 				h -= (band_ - 1) * CHANNELS;
195 				v -= CHANNELS;
196 				++l;
197 				score += e;
198 			}
199 			while (v > v0) {
200 				if (score == *v) {
201 					walk_vgap(v, l);
202 					return std::make_pair(op_insertion, l);
203 				}
204 				v -= CHANNELS;
205 				++l;
206 				score += e;
207 			}
208 			while (h > h0) {
209 				if (score == *h) {
210 					walk_hgap(h, l);
211 					return std::make_pair(op_deletion, l);
212 				}
213 				h -= (band_ - 1) * CHANNELS;
214 				++l;
215 				score += e;
216 			}
217 			throw std::runtime_error("Traceback error.");
218 		}
walk_hgapTracebackMatrix::TracebackIterator219 		void walk_hgap(const Score *h, int l)
220 		{
221 			score_ = h;
222 			j -= l;
223 			assert(i >= -1 && j >= -1);
224 		}
walk_vgapTracebackMatrix::TracebackIterator225 		void walk_vgap(const Score *v, int l)
226 		{
227 			score_ = v;
228 			i -= l;
229 			assert(i >= -1 && j >= -1);
230 		}
231 		const size_t band_;
232 		const Score *score_;
233 		int i, j;
234 	};
235 
tracebackTracebackMatrix236 	TracebackIterator traceback(size_t col, int i0, int j, int query_len, size_t channel, Score score) const
237 	{
238 		const int i_ = std::max(-i0, 0),
239 			i1 = (int)std::min(band_, size_t(query_len - i0));
240 		const Score *s = (Score*)(&score_[col*band_ + i_]) + channel;
241 		for (int i = i_; i < i1; ++i, s += CHANNELS)
242 			if (*s == score)
243 				return TracebackIterator(s, band_, i0 + i, j);
244 		throw std::runtime_error("Trackback error.");
245 	}
246 
TracebackMatrixTracebackMatrix247 	TracebackMatrix(size_t band, size_t cols) :
248 		band_(band)
249 	{
250 		hgap_.resize(band + 1);
251 		score_.resize(band * (cols + 1));
252 		std::fill(hgap_.begin(), hgap_.end(), _sv());
253 		std::fill(score_.begin(), score_.begin() + band, _sv());
254 	}
255 
beginTracebackMatrix256 	inline ColumnIterator begin(size_t offset, size_t col)
257 	{
258 		return ColumnIterator(&hgap_[offset], &score_[col*band_ + offset], &score_[(col + 1)*band_ + offset]);
259 	}
260 
261 	_sv operator[](int i) const {
262 		return _sv();
263 	}
264 
265 	MemBuffer<_sv> hgap_, score_;
266 
267 private:
268 
269 	const size_t band_;
270 
271 };
272 
273 template<typename _sv>
274 struct TracebackVectorMatrix
275 {
276 	typedef typename ::DISPATCH_ARCH::ScoreTraits<_sv>::TraceMask TraceMask;
277 	typedef void* Stat;
278 	struct ColumnIterator
279 	{
ColumnIteratorTracebackVectorMatrix::ColumnIterator280 		ColumnIterator(_sv* hgap_front, _sv* score_front, TraceMask* trace_mask_front) :
281 			hgap_ptr_(hgap_front),
282 			score_ptr_(score_front),
283 			trace_mask_ptr_(trace_mask_front)
284 		{ }
285 		inline void operator++()
286 		{
287 			++hgap_ptr_; ++score_ptr_; ++trace_mask_ptr_;
288 		}
hgapTracebackVectorMatrix::ColumnIterator289 		inline _sv hgap() const
290 		{
291 			return *(hgap_ptr_ + 1);
292 		}
diagTracebackVectorMatrix::ColumnIterator293 		inline _sv diag() const
294 		{
295 			return *score_ptr_;
296 		}
trace_maskTracebackVectorMatrix::ColumnIterator297 		inline TraceMask* trace_mask() {
298 			return trace_mask_ptr_;
299 		}
set_hgapTracebackVectorMatrix::ColumnIterator300 		inline void set_hgap(const _sv& x)
301 		{
302 			*hgap_ptr_ = x;
303 		}
set_scoreTracebackVectorMatrix::ColumnIterator304 		inline void set_score(const _sv& x)
305 		{
306 			*score_ptr_ = x;
307 		}
statTracebackVectorMatrix::ColumnIterator308 		std::nullptr_t stat() {
309 			return nullptr;
310 		}
hstatTracebackVectorMatrix::ColumnIterator311 		std::nullptr_t hstat() {
312 			return nullptr;
313 		}
set_hstatTracebackVectorMatrix::ColumnIterator314 		void set_hstat(std::nullptr_t) {}
set_zeroTracebackVectorMatrix::ColumnIterator315 		inline void set_zero() {}
316 		_sv *hgap_ptr_, *score_ptr_;
317 		TraceMask* trace_mask_ptr_;
318 	};
319 
320 	struct TracebackIterator
321 	{
TracebackIteratorTracebackVectorMatrix::TracebackIterator322 		TracebackIterator(const TraceMask *mask, size_t band, int i, int j, size_t channel) :
323 			band_(band),
324 			mask_(mask),
325 			channel_mask_vgap(TraceMask::vmask(channel)),
326 			channel_mask_hgap(TraceMask::hmask(channel)),
327 			i(i),
328 			j(j)
329 		{
330 			assert(i >= 0 && j >= 0);
331 		}
maskTracebackVectorMatrix::TracebackIterator332 		TraceMask mask() const {
333 			return *mask_;
334 		}
walk_diagonalTracebackVectorMatrix::TracebackIterator335 		void walk_diagonal()
336 		{
337 			mask_ -= band_;
338 			--i;
339 			--j;
340 			assert(i >= -1 && j >= -1);
341 		}
walk_gapTracebackVectorMatrix::TracebackIterator342 		pair<Edit_operation, int> walk_gap()
343 		{
344 			if (mask_->gap & channel_mask_vgap) {
345 				int l = 0;
346 				do {
347 					++l;
348 					--i;
349 					--mask_;
350 				} while (((mask_->open & channel_mask_vgap) == 0) && (i > 0));
351 				return std::make_pair(op_insertion, l);
352 			}
353 			else {
354 				int l = 0;
355 				do {
356 					++l;
357 					--j;
358 					mask_ -= band_ - 1;
359 				} while (((mask_->open & channel_mask_hgap) == 0) && (j > 0));
360 				return std::make_pair(op_deletion, l);
361 			}
362 		}
363 		const size_t band_;
364 		const TraceMask* mask_;
365 		const decltype(TraceMask::gap) channel_mask_vgap, channel_mask_hgap;
366 		int i, j;
367 	};
368 
tracebackTracebackVectorMatrix369 	TracebackIterator traceback(size_t col, int i0, int band_i, int j, int query_len, size_t channel) const
370 	{
371 		return TracebackIterator(&trace_mask_[col*band_ + band_i], band_, i0 + band_i, j, channel);
372 	}
373 
TracebackVectorMatrixTracebackVectorMatrix374 	TracebackVectorMatrix(int band, size_t cols) :
375 		band_(band)
376 	{
377 		hgap_.resize(band + 1);
378 		score_.resize(band);
379 		trace_mask_.resize((cols + 1) * band);
380 		std::fill(hgap_.begin(), hgap_.end(), _sv());
381 		std::fill(score_.begin(), score_.end(), _sv());
382 	}
383 
beginTracebackVectorMatrix384 	inline ColumnIterator begin(int offset, int col)
385 	{
386 		return ColumnIterator(&hgap_[offset], &score_[offset], &trace_mask_[size_t(col + 1)*(size_t)band_ + (size_t)offset]);
387 	}
388 
bandTracebackVectorMatrix389 	int band() const {
390 		return band_;
391 	}
392 
393 	_sv operator[](int i) const {
394 		return _sv();
395 	}
396 
397 #ifdef __APPLE__
398 	MemBuffer<_sv> hgap_, score_;
399 #else
400 	static thread_local MemBuffer<_sv> hgap_, score_;
401 #endif
402 	MemBuffer<TraceMask> trace_mask_;
403 private:
404 	int band_;
405 };
406 
407 #ifndef __APPLE__
408 template<typename _sv> thread_local MemBuffer<_sv> Matrix<_sv>::hgap_;
409 template<typename _sv> thread_local MemBuffer<_sv> Matrix<_sv>::score_;
410 template<typename _sv> thread_local MemBuffer<_sv> TracebackVectorMatrix<_sv>::hgap_;
411 template<typename _sv> thread_local MemBuffer<_sv> TracebackVectorMatrix<_sv>::score_;
412 #endif
413 
414 template<typename Sv, bool Traceback>
415 struct SelectMatrix {
416 };
417 
418 template<typename Sv>
419 struct SelectMatrix<Sv, true> {
420 	using Type = TracebackVectorMatrix<Sv>;
421 };
422 
423 template<typename Sv>
424 struct SelectMatrix<Sv, false> {
425 	using Type = Matrix<Sv>;
426 };
427 
428 }}}