1 /**** 2 DIAMOND protein aligner 3 Copyright (C) 2013-2020 Max Planck Society for the Advancement of Science e.V. 4 Benjamin Buchfink 5 Eberhard Karls Universitaet Tuebingen 6 7 Code developed by Benjamin Buchfink <benjamin.buchfink@tue.mpg.de> 8 9 This program is free software: you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation, either version 3 of the License, or 12 (at your option) any later version. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program. If not, see <http://www.gnu.org/licenses/>. 21 ****/ 22 23 #pragma once 24 #include <limits> 25 #include <ostream> 26 #include <math.h> 27 #include <stdint.h> 28 #include "../util/log_stream.h" 29 #include "../basic/value.h" 30 #include "../lib/alp/sls_alignment_evaluer.hpp" 31 #include "../stats/standard_matrix.h" 32 33 const double LN_2 = 0.69314718055994530941723212145818; 34 35 struct ScoreMatrix 36 { 37 38 struct Custom {}; 39 ScoreMatrixScoreMatrix40 ScoreMatrix() :ln_k_(0.0) {} 41 ScoreMatrix(const std::string& matrix, int gap_open, int gap_extend, int frame_shift, int stop_match_score, uint64_t db_letters = 0, int scale = 1); 42 ScoreMatrix(const std::string &matrix_file, int gap_open, int gap_extend, int stop_match_score, const Custom&, uint64_t db_letters = 0); 43 44 friend std::ostream& operator<<(std::ostream& s, const ScoreMatrix&m); 45 matrix8ScoreMatrix46 const int8_t* matrix8() const 47 { return matrix8_.data; } 48 matrix8_lowScoreMatrix49 const int8_t* matrix8_low() const 50 { 51 return matrix8_low_.data; 52 } 53 matrix8_highScoreMatrix54 const int8_t* matrix8_high() const 55 { 56 return matrix8_high_.data; 57 } 58 matrix8u_lowScoreMatrix59 const int8_t* matrix8u_low() const 60 { 61 return matrix8u_low_.data; 62 } 63 matrix8u_highScoreMatrix64 const int8_t* matrix8u_high() const 65 { 66 return matrix8u_high_.data; 67 } 68 matrix8uScoreMatrix69 const uint8_t* matrix8u() const 70 { return matrix8u_.data; } 71 matrix16ScoreMatrix72 const int16_t* matrix16() const 73 { return matrix16_.data; } 74 matrix32ScoreMatrix75 const int* matrix32() const 76 { 77 return matrix32_.data; 78 } 79 matrix32_scaled_pointersScoreMatrix80 std::vector<const int*> matrix32_scaled_pointers() const { 81 return matrix32_scaled_.pointers(); 82 } 83 operatorScoreMatrix84 int operator()(Letter a, Letter b) const 85 { 86 return matrix32_.data[(int(a) << 5) + int(b)]; 87 } 88 operatorScoreMatrix89 int operator()(size_t a, size_t b) const 90 { 91 return matrix32_.data[(a << 5) + b]; 92 } 93 rowScoreMatrix94 const int* row(Letter a) const 95 { 96 return &matrix32_.data[(int)a << 5]; 97 } 98 biased_scoreScoreMatrix99 uint8_t biased_score(Letter a, Letter b) const 100 { return matrix8u_.data[(int(a) << 5) + int(b)]; } 101 biasScoreMatrix102 char bias() const 103 { return bias_; } 104 bitscoreScoreMatrix105 double bitscore(double raw_score) const 106 { 107 const double s = std::round(raw_score / scale_); // maintain compatibility with BLAST 108 return ( lambda() * s - ln_k()) / LN_2; 109 } 110 rawscoreScoreMatrix111 double rawscore(double bitscore, double) const 112 { return (bitscore*LN_2 + ln_k()) / lambda(); } 113 rawscoreScoreMatrix114 int rawscore(double bitscore) const 115 { return (int)ceil(rawscore(bitscore, double ())); } 116 117 double evalue(int raw_score, unsigned query_len, unsigned subject_len) const; 118 double evalue_norm(int raw_score, unsigned query_len, unsigned subject_len) const; 119 evalue_normScoreMatrix120 double evalue_norm(int raw_score, int query_len) const 121 { 122 return 1e9 * query_len * pow(2, -bitscore(raw_score * scale_)); 123 } 124 125 /*double bitscore(double evalue, unsigned query_len) const 126 { return -log(evalue/db_letters_/query_len)/log(2); }*/ 127 bitscore_normScoreMatrix128 double bitscore_norm(double evalue, unsigned query_len) const 129 { 130 return -log(evalue / 1e9 / query_len) / log(2); 131 } 132 lambdaScoreMatrix133 double lambda() const 134 { 135 return evaluer.parameters().lambda; 136 } 137 kScoreMatrix138 double k() const 139 { 140 return evaluer.parameters().K; 141 } 142 ln_kScoreMatrix143 double ln_k() const 144 { 145 return ln_k_; 146 } 147 148 int8_t low_score() const; 149 int8_t high_score() const; 150 gap_openScoreMatrix151 int gap_open() const 152 { 153 return gap_open_; 154 } 155 gap_extendScoreMatrix156 int gap_extend() const 157 { 158 return gap_extend_; 159 } 160 frame_shiftScoreMatrix161 int frame_shift() const 162 { 163 return frame_shift_; 164 } 165 db_lettersScoreMatrix166 uint64_t db_letters() const 167 { 168 return (uint64_t)db_letters_; 169 } 170 set_db_lettersScoreMatrix171 void set_db_letters(uint64_t n) 172 { 173 db_letters_ = (double)n; 174 } 175 joint_probsScoreMatrix176 const double* joint_probs() const { 177 return (const double*)standard_matrix_->joint_probs; 178 } 179 background_freqsScoreMatrix180 const double* background_freqs() const { 181 return standard_matrix_->background_freqs.data(); 182 } 183 ungapped_lambdaScoreMatrix184 double ungapped_lambda() const { 185 return standard_matrix_->ungapped_constants().Lambda; 186 } 187 ideal_lambdaScoreMatrix188 double ideal_lambda() const { 189 return ideal_lambda_; 190 } 191 freq_ratiosScoreMatrix192 const Stats::FreqRatios& freq_ratios() const { 193 return standard_matrix_->freq_ratios; 194 } 195 background_scoresScoreMatrix196 const std::array<double, TRUE_AA>& background_scores() const { 197 return background_scores_; 198 } 199 200 double avg_id_score() const; 201 bool report_cutoff(int score, double evalue) const; 202 203 private: 204 205 template<typename _t> 206 struct Scores 207 { ScoresScoreMatrix::Scores208 Scores() {} 209 Scores(const int8_t *scores, int stop_match_score = 1, int8_t bias = 0, unsigned modulo = 32, unsigned offset = 0) 210 { 211 const unsigned n = value_traits.alphabet_size; 212 for (unsigned i = 0; i < 32; ++i) 213 for (unsigned j = 0; j < 32; ++j) { 214 const unsigned j2 = j % modulo + offset; 215 data[i * 32 + j] = i < n && j2 < n ? (_t)(scores[i * n + j2] + (int)bias) : SCHAR_MIN; 216 } 217 if (stop_match_score != 1) 218 data[24 * 32 + 24] = stop_match_score; 219 } 220 Scores(const double (&freq_ratios)[Stats::NCBI_ALPH][Stats::NCBI_ALPH], double lambda, const int8_t* scores, int scale); 221 std::vector<const _t*> pointers() const; 222 friend std::ostream& operator<<(std::ostream& s, const Scores& scores) { 223 for (int i = 0; i < 20; ++i) { 224 for (int j = 0; j < 20; ++j) 225 s << scores.data[i * 32 + j] << '\t'; 226 s << std::endl; 227 } 228 return s; 229 } 230 alignas(32) _t data[32 * 32]; 231 }; 232 233 void init_background_scores(); 234 235 const Stats::StandardMatrix* standard_matrix_; 236 const int8_t* score_array_; 237 int gap_open_, gap_extend_, frame_shift_; 238 double db_letters_, ln_k_, scale_; 239 std::string name_; 240 Scores<int8_t> matrix8_; 241 Scores<int> matrix32_, matrix32_scaled_; 242 int8_t bias_; 243 double ideal_lambda_; 244 Scores<uint8_t> matrix8u_; 245 Scores<int8_t> matrix8_low_; 246 Scores<int8_t> matrix8_high_; 247 Scores<int8_t> matrix8u_low_; 248 Scores<int8_t> matrix8u_high_; 249 Scores<int16_t> matrix16_; 250 std::array<double, TRUE_AA> background_scores_; 251 Sls::AlignmentEvaluer evaluer; 252 253 }; 254 255 extern ScoreMatrix score_matrix; 256