1 /**** 2 DIAMOND protein aligner 3 Copyright (C) 2020 QIAGEN A/S (Aarhus, Denmark) 4 Code developed by Patrick Ettenhuber <patrick.ettenhuber@qiagen.com> 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 ****/ 19 20 #include <map> 21 #include <string> 22 #include "../stats/score_matrix.h" 23 #include "../basic/match.h" 24 25 class Variable{ 26 public: 27 virtual double get(const HspContext& r) = 0; ~Variable()28 virtual ~Variable(){}; 29 }; 30 31 class QueryLength: public Variable{ 32 public: get_name()33 static const std::string get_name(){ 34 return "qlen"; 35 } get(const HspContext & r)36 double get(const HspContext& r){ 37 return r.query.source().length(); 38 } 39 }; 40 class SubjectLength: public Variable{ 41 public: get_name()42 static const std::string get_name(){ 43 return "slen"; 44 } get(const HspContext & r)45 double get(const HspContext& r){ 46 return r.subject_len; 47 } 48 }; 49 class QueryStart: public Variable{ 50 public: get_name()51 static const std::string get_name(){ 52 return "qstart"; 53 } get(const HspContext & r)54 double get(const HspContext& r){ 55 return r.oriented_query_range().begin_ + 1; 56 } 57 }; 58 class QueryEnd: public Variable{ 59 public: get_name()60 static const std::string get_name(){ 61 return "qend"; 62 } get(const HspContext & r)63 double get(const HspContext& r){ 64 return r.oriented_query_range().end_ + 1; 65 } 66 }; 67 class SubjectStart: public Variable{ 68 public: get_name()69 static const std::string get_name(){ 70 return "sstart"; 71 } get(const HspContext & r)72 double get(const HspContext& r){ 73 return r.subject_range().begin_ + 1; 74 } 75 }; 76 class SubjectEnd: public Variable{ 77 public: get_name()78 static const std::string get_name(){ 79 return "send"; 80 } get(const HspContext & r)81 double get(const HspContext& r){ 82 return r.subject_range().end_; 83 } 84 }; 85 class EValue: public Variable{ 86 public: get_name()87 static const std::string get_name(){ 88 return "evalue"; 89 } get(const HspContext & r)90 double get(const HspContext& r){ 91 return r.evalue(); 92 } 93 }; 94 class BitScore: public Variable{ 95 public: get_name()96 static const std::string get_name(){ 97 return "bitscore"; 98 } get(const HspContext & r)99 double get(const HspContext& r){ 100 return r.bit_score(); 101 } 102 }; 103 class Score: public Variable{ 104 public: get_name()105 static const std::string get_name(){ 106 return "score"; 107 } get(const HspContext & r)108 double get(const HspContext& r){ 109 return r.score(); 110 } 111 }; 112 class Length: public Variable{ 113 public: get_name()114 static const std::string get_name(){ 115 return "length"; 116 } get(const HspContext & r)117 double get(const HspContext& r){ 118 return r.length(); 119 } 120 }; 121 class PercentIdenticalMatches: public Variable{ 122 public: get_name()123 static const std::string get_name(){ 124 return "pident"; 125 } get(const HspContext & r)126 double get(const HspContext& r){ 127 return (double)r.identities() * 100 / r.length(); 128 } 129 }; 130 class NumberIdenticalMatches: public Variable{ 131 public: get_name()132 static const std::string get_name(){ 133 return "nident"; 134 } get(const HspContext & r)135 double get(const HspContext& r){ 136 return r.identities(); 137 } 138 }; 139 class NumberMismatches: public Variable{ 140 public: get_name()141 static const std::string get_name(){ 142 return "mismatch"; 143 } get(const HspContext & r)144 double get(const HspContext& r){ 145 return r.mismatches(); 146 } 147 }; 148 class NumberPositiveMatches: public Variable{ 149 public: get_name()150 static const std::string get_name(){ 151 return "positive"; 152 } get(const HspContext & r)153 double get(const HspContext& r){ 154 return r.positives(); 155 } 156 }; 157 class NumberGapOpenings: public Variable{ 158 public: get_name()159 static const std::string get_name(){ 160 return "gapopen"; 161 } get(const HspContext & r)162 double get(const HspContext& r){ 163 return r.gap_openings(); 164 } 165 }; 166 class NumberGaps: public Variable{ 167 public: get_name()168 static const std::string get_name(){ 169 return "gaps"; 170 } get(const HspContext & r)171 double get(const HspContext& r){ 172 return r.gaps(); 173 } 174 }; 175 class PercentagePositiveMatches: public Variable{ 176 public: get_name()177 static const std::string get_name(){ 178 return "ppos"; 179 } get(const HspContext & r)180 double get(const HspContext& r){ 181 return (double)r.positives() * 100.0 / r.length(); 182 } 183 }; 184 class QueryFrame: public Variable{ 185 public: get_name()186 static const std::string get_name(){ 187 return "qframe"; 188 } get(const HspContext & r)189 double get(const HspContext& r){ 190 return r.blast_query_frame(); 191 } 192 }; 193 class QueryCoveragePerHsp: public Variable{ 194 public: get_name()195 static const std::string get_name(){ 196 return "qcovhsp"; 197 } get(const HspContext & r)198 double get(const HspContext& r){ 199 return (double)r.query_source_range().length()*100.0 / r.query.source().length(); 200 } 201 }; 202 class SubjectCoveragePerHsp: public Variable{ 203 public: get_name()204 static const std::string get_name(){ 205 return "scovhsp"; 206 } get(const HspContext & r)207 double get(const HspContext& r){ 208 return (double)r.subject_range().length() * 100.0 / r.subject_len; 209 } 210 }; 211 class UngappedScore: public Variable{ 212 public: get_name()213 static const std::string get_name(){ 214 return "ungapped_score"; 215 } get(const HspContext & r)216 double get(const HspContext& r){ 217 return score_matrix.bitscore(r.ungapped_score); 218 } 219 }; 220 221 class StaticVariableRegistry{ 222 std::map<std::string, Variable*> regMap; 223 public: StaticVariableRegistry()224 StaticVariableRegistry(){ 225 // To include new variables add the instantiation here and in the clustering_variable.cpp file. Then add it to the StaticConstructor below 226 regMap[QueryLength::get_name()] = new QueryLength(); 227 regMap[SubjectLength::get_name()] = new SubjectLength(); 228 regMap[QueryStart::get_name()] = new QueryStart(); 229 regMap[QueryEnd::get_name()] = new QueryEnd(); 230 regMap[SubjectStart::get_name()] = new SubjectStart(); 231 regMap[SubjectEnd::get_name()] = new SubjectEnd(); 232 regMap[EValue::get_name()] = new EValue(); 233 regMap[BitScore::get_name()] = new BitScore(); 234 regMap[Score::get_name()] = new Score(); 235 regMap[Length::get_name()] = new Length(); 236 regMap[PercentIdenticalMatches::get_name()] = new PercentIdenticalMatches(); 237 regMap[NumberIdenticalMatches::get_name()] = new NumberIdenticalMatches(); 238 regMap[NumberMismatches::get_name()] = new NumberMismatches(); 239 regMap[NumberPositiveMatches::get_name()] = new NumberPositiveMatches(); 240 regMap[NumberGapOpenings::get_name()] = new NumberGapOpenings(); 241 regMap[NumberGaps::get_name()] = new NumberGaps(); 242 regMap[PercentagePositiveMatches::get_name()] = new PercentagePositiveMatches(); 243 regMap[QueryFrame::get_name()] = new QueryFrame(); 244 regMap[QueryCoveragePerHsp::get_name()] = new QueryCoveragePerHsp(); 245 regMap[SubjectCoveragePerHsp::get_name()] = new SubjectCoveragePerHsp(); 246 regMap[UngappedScore::get_name()] = new UngappedScore(); 247 } ~StaticVariableRegistry()248 ~StaticVariableRegistry(){ 249 for(auto it = regMap.begin(); it != regMap.end(); it++){ 250 delete it->second; 251 it->second = nullptr; 252 } 253 } get(std::string key)254 Variable* get(std::string key) const { 255 auto ca = regMap.find(key); 256 if(ca == regMap.end()){ 257 throw std::runtime_error(std::string("Unknown variable: ")+ key); 258 } 259 return ca->second; 260 } has(std::string key)261 bool has(std::string key) const { 262 return regMap.find(key) != regMap.end(); 263 } getKeys()264 vector<std::string> getKeys() const { 265 auto it = regMap.begin(); 266 vector<std::string> keys; 267 while(it != regMap.end()){ 268 keys.push_back(it->first); 269 it++; 270 } 271 return keys; 272 } 273 }; 274 275 class VariableRegistry{ 276 static StaticVariableRegistry vr; 277 public: get(std::string key)278 static Variable* get(std::string key){ 279 return vr.get(key); 280 } has(std::string key)281 static bool has(std::string key){ 282 return vr.has(key); 283 } getKeys()284 static vector<std::string> getKeys(){ 285 return vr.getKeys(); 286 } 287 }; 288