1 /****
2 DIAMOND protein aligner
3 Copyright (C) 2013-2020 Max Planck Society for the Advancement of Science e.V.
4                         Benjamin Buchfink
5                         Eberhard Karls Universitaet Tuebingen
6 
7 Code developed by Benjamin Buchfink <benjamin.buchfink@tue.mpg.de>
8 
9 This program is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 ****/
22 
23 #pragma once
24 #include <limits>
25 #include <ostream>
26 #include <math.h>
27 #include <stdint.h>
28 #include "../util/log_stream.h"
29 #include "../basic/value.h"
30 #include "../lib/alp/sls_alignment_evaluer.hpp"
31 #include "../stats/standard_matrix.h"
32 
33 const double LN_2 = 0.69314718055994530941723212145818;
34 
35 struct ScoreMatrix
36 {
37 
38 	struct Custom {};
39 
ScoreMatrixScoreMatrix40 	ScoreMatrix() :ln_k_(0.0) {}
41 	ScoreMatrix(const std::string& matrix, int gap_open, int gap_extend, int frame_shift, int stop_match_score, uint64_t db_letters = 0, int scale = 1);
42 	ScoreMatrix(const std::string &matrix_file, int gap_open, int gap_extend, int stop_match_score, const Custom&, uint64_t db_letters = 0);
43 
44 	friend std::ostream& operator<<(std::ostream& s, const ScoreMatrix&m);
45 
matrix8ScoreMatrix46 	const int8_t* matrix8() const
47 	{ return matrix8_.data; }
48 
matrix8_lowScoreMatrix49 	const int8_t* matrix8_low() const
50 	{
51 		return matrix8_low_.data;
52 	}
53 
matrix8_highScoreMatrix54 	const int8_t* matrix8_high() const
55 	{
56 		return matrix8_high_.data;
57 	}
58 
matrix8u_lowScoreMatrix59 	const int8_t* matrix8u_low() const
60 	{
61 		return matrix8u_low_.data;
62 	}
63 
matrix8u_highScoreMatrix64 	const int8_t* matrix8u_high() const
65 	{
66 		return matrix8u_high_.data;
67 	}
68 
matrix8uScoreMatrix69 	const uint8_t* matrix8u() const
70 	{ return matrix8u_.data; }
71 
matrix16ScoreMatrix72 	const int16_t* matrix16() const
73 	{ return matrix16_.data; }
74 
matrix32ScoreMatrix75 	const int* matrix32() const
76 	{
77 		return matrix32_.data;
78 	}
79 
matrix32_scaled_pointersScoreMatrix80 	std::vector<const int*> matrix32_scaled_pointers() const {
81 		return matrix32_scaled_.pointers();
82 	}
83 
operatorScoreMatrix84 	int operator()(Letter a, Letter b) const
85 	{
86 		return matrix32_.data[(int(a) << 5) + int(b)];
87 	}
88 
operatorScoreMatrix89 	int operator()(size_t a, size_t b) const
90 	{
91 		return matrix32_.data[(a << 5) + b];
92 	}
93 
rowScoreMatrix94 	const int* row(Letter a) const
95 	{
96 		return &matrix32_.data[(int)a << 5];
97 	}
98 
biased_scoreScoreMatrix99 	uint8_t biased_score(Letter a, Letter b) const
100 	{ return matrix8u_.data[(int(a) << 5) + int(b)]; }
101 
biasScoreMatrix102 	char bias() const
103 	{ return bias_; }
104 
bitscoreScoreMatrix105 	double bitscore(double raw_score) const
106 	{
107 		const double s = std::round(raw_score / scale_);	// maintain compatibility with BLAST
108 		return ( lambda() * s - ln_k()) / LN_2;
109 	}
110 
rawscoreScoreMatrix111 	double rawscore(double bitscore, double) const
112 	{ return (bitscore*LN_2 + ln_k()) / lambda(); }
113 
rawscoreScoreMatrix114 	int rawscore(double bitscore) const
115 	{ return (int)ceil(rawscore(bitscore, double ())); }
116 
117 	double evalue(int raw_score, unsigned query_len, unsigned subject_len) const;
118 	double evalue_norm(int raw_score, unsigned query_len, unsigned subject_len) const;
119 
evalue_normScoreMatrix120 	double evalue_norm(int raw_score, int query_len) const
121 	{
122 		return 1e9 * query_len * pow(2, -bitscore(raw_score * scale_));
123 	}
124 
125 	/*double bitscore(double evalue, unsigned query_len) const
126 	{ return -log(evalue/db_letters_/query_len)/log(2); }*/
127 
bitscore_normScoreMatrix128 	double bitscore_norm(double evalue, unsigned query_len) const
129 	{
130 		return -log(evalue / 1e9 / query_len) / log(2);
131 	}
132 
lambdaScoreMatrix133 	double lambda() const
134 	{
135 		return evaluer.parameters().lambda;
136 	}
137 
kScoreMatrix138 	double k() const
139 	{
140 		return evaluer.parameters().K;
141 	}
142 
ln_kScoreMatrix143 	double ln_k() const
144 	{
145 		return ln_k_;
146 	}
147 
148 	int8_t low_score() const;
149 	int8_t high_score() const;
150 
gap_openScoreMatrix151 	int gap_open() const
152 	{
153 		return gap_open_;
154 	}
155 
gap_extendScoreMatrix156 	int gap_extend() const
157 	{
158 		return gap_extend_;
159 	}
160 
frame_shiftScoreMatrix161 	int frame_shift() const
162 	{
163 		return frame_shift_;
164 	}
165 
db_lettersScoreMatrix166 	uint64_t db_letters() const
167 	{
168 		return (uint64_t)db_letters_;
169 	}
170 
set_db_lettersScoreMatrix171 	void set_db_letters(uint64_t n)
172 	{
173 		db_letters_ = (double)n;
174 	}
175 
joint_probsScoreMatrix176 	const double* joint_probs() const {
177 		return (const double*)standard_matrix_->joint_probs;
178 	}
179 
background_freqsScoreMatrix180 	const double* background_freqs() const {
181 		return standard_matrix_->background_freqs.data();
182 	}
183 
ungapped_lambdaScoreMatrix184 	double ungapped_lambda() const {
185 		return standard_matrix_->ungapped_constants().Lambda;
186 	}
187 
ideal_lambdaScoreMatrix188 	double ideal_lambda() const {
189 		return ideal_lambda_;
190 	}
191 
freq_ratiosScoreMatrix192 	const Stats::FreqRatios& freq_ratios() const {
193 		return standard_matrix_->freq_ratios;
194 	}
195 
background_scoresScoreMatrix196 	const std::array<double, TRUE_AA>& background_scores() const {
197 		return background_scores_;
198 	}
199 
200 	double avg_id_score() const;
201 	bool report_cutoff(int score, double evalue) const;
202 
203 private:
204 
205 	template<typename _t>
206 	struct Scores
207 	{
ScoresScoreMatrix::Scores208 		Scores() {}
209 		Scores(const int8_t *scores, int stop_match_score = 1, int8_t bias = 0, unsigned modulo = 32, unsigned offset = 0)
210 		{
211 			const unsigned n = value_traits.alphabet_size;
212 			for (unsigned i = 0; i < 32; ++i)
213 				for (unsigned j = 0; j < 32; ++j) {
214 					const unsigned j2 = j % modulo + offset;
215 					data[i * 32 + j] = i < n && j2 < n ? (_t)(scores[i * n + j2] + (int)bias) : SCHAR_MIN;
216 				}
217 			if (stop_match_score != 1)
218 				data[24 * 32 + 24] = stop_match_score;
219 		}
220 		Scores(const double (&freq_ratios)[Stats::NCBI_ALPH][Stats::NCBI_ALPH], double lambda, const int8_t* scores, int scale);
221 		std::vector<const _t*> pointers() const;
222 		friend std::ostream& operator<<(std::ostream& s, const Scores& scores) {
223 			for (int i = 0; i < 20; ++i) {
224 				for (int j = 0; j < 20; ++j)
225 					s << scores.data[i * 32 + j] << '\t';
226 				s << std::endl;
227 			}
228 			return s;
229 		}
230 		alignas(32) _t data[32 * 32];
231 	};
232 
233 	void init_background_scores();
234 
235 	const Stats::StandardMatrix* standard_matrix_;
236 	const int8_t* score_array_;
237 	int gap_open_, gap_extend_, frame_shift_;
238 	double db_letters_, ln_k_, scale_;
239 	std::string name_;
240 	Scores<int8_t> matrix8_;
241 	Scores<int> matrix32_, matrix32_scaled_;
242 	int8_t bias_;
243 	double ideal_lambda_;
244 	Scores<uint8_t> matrix8u_;
245 	Scores<int8_t> matrix8_low_;
246 	Scores<int8_t> matrix8_high_;
247 	Scores<int8_t> matrix8u_low_;
248 	Scores<int8_t> matrix8u_high_;
249 	Scores<int16_t> matrix16_;
250 	std::array<double, TRUE_AA> background_scores_;
251 	Sls::AlignmentEvaluer evaluer;
252 
253 };
254 
255 extern ScoreMatrix score_matrix;
256