1 /****
2 DIAMOND protein aligner
3 Copyright (C) 2020 QIAGEN A/S (Aarhus, Denmark)
4 Code developed by Patrick Ettenhuber <patrick.ettenhuber@qiagen.com>
5 
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 ****/
19 
20 #include <map>
21 #include <string>
22 #include "../stats/score_matrix.h"
23 #include "../basic/match.h"
24 
25 class Variable{
26 public:
27 	virtual double get(const HspContext& r) = 0;
~Variable()28 	virtual ~Variable(){};
29 };
30 
31 class QueryLength: public Variable{
32 public:
get_name()33 	static const std::string get_name(){
34 		return "qlen";
35 	}
get(const HspContext & r)36 	double get(const HspContext& r){
37 		return r.query.source().length();
38 	}
39 };
40 class SubjectLength: public Variable{
41 public:
get_name()42 	static const std::string get_name(){
43 		return "slen";
44 	}
get(const HspContext & r)45 	double get(const HspContext& r){
46 		return r.subject_len;
47 	}
48 };
49 class QueryStart: public Variable{
50 public:
get_name()51 	static const std::string get_name(){
52 		return "qstart";
53 	}
get(const HspContext & r)54 	double get(const HspContext& r){
55 		return r.oriented_query_range().begin_ + 1;
56 	}
57 };
58 class QueryEnd: public Variable{
59 public:
get_name()60 	static const std::string get_name(){
61 		return "qend";
62 	}
get(const HspContext & r)63 	double get(const HspContext& r){
64 		return r.oriented_query_range().end_ + 1;
65 	}
66 };
67 class SubjectStart: public Variable{
68 public:
get_name()69 	static const std::string get_name(){
70 		return "sstart";
71 	}
get(const HspContext & r)72 	double get(const HspContext& r){
73 		return r.subject_range().begin_ + 1;
74 	}
75 };
76 class SubjectEnd: public Variable{
77 public:
get_name()78 	static const std::string get_name(){
79 		return "send";
80 	}
get(const HspContext & r)81 	double get(const HspContext& r){
82 		return r.subject_range().end_;
83 	}
84 };
85 class EValue: public Variable{
86 public:
get_name()87 	static const std::string get_name(){
88 		return "evalue";
89 	}
get(const HspContext & r)90 	double get(const HspContext& r){
91 		return r.evalue();
92 	}
93 };
94 class BitScore: public Variable{
95 public:
get_name()96 	static const std::string get_name(){
97 		return "bitscore";
98 	}
get(const HspContext & r)99 	double get(const HspContext& r){
100 		return r.bit_score();
101 	}
102 };
103 class Score: public Variable{
104 public:
get_name()105 	static const std::string get_name(){
106 		return "score";
107 	}
get(const HspContext & r)108 	double get(const HspContext& r){
109 		return r.score();
110 	}
111 };
112 class Length: public Variable{
113 public:
get_name()114 	static const std::string get_name(){
115 		return "length";
116 	}
get(const HspContext & r)117 	double get(const HspContext& r){
118 		return r.length();
119 	}
120 };
121 class PercentIdenticalMatches: public Variable{
122 public:
get_name()123 	static const std::string get_name(){
124 		return "pident";
125 	}
get(const HspContext & r)126 	double get(const HspContext& r){
127 		return (double)r.identities() * 100 / r.length();
128 	}
129 };
130 class NumberIdenticalMatches: public Variable{
131 public:
get_name()132 	static const std::string get_name(){
133 		return "nident";
134 	}
get(const HspContext & r)135 	double get(const HspContext& r){
136 		return r.identities();
137 	}
138 };
139 class NumberMismatches: public Variable{
140 public:
get_name()141 	static const std::string get_name(){
142 		return "mismatch";
143 	}
get(const HspContext & r)144 	double get(const HspContext& r){
145 		return r.mismatches();
146 	}
147 };
148 class NumberPositiveMatches: public Variable{
149 public:
get_name()150 	static const std::string get_name(){
151 		return "positive";
152 	}
get(const HspContext & r)153 	double get(const HspContext& r){
154 		return r.positives();
155 	}
156 };
157 class NumberGapOpenings: public Variable{
158 public:
get_name()159 	static const std::string get_name(){
160 		return "gapopen";
161 	}
get(const HspContext & r)162 	double get(const HspContext& r){
163 		return r.gap_openings();
164 	}
165 };
166 class NumberGaps: public Variable{
167 public:
get_name()168 	static const std::string get_name(){
169 		return "gaps";
170 	}
get(const HspContext & r)171 	double get(const HspContext& r){
172 		return r.gaps();
173 	}
174 };
175 class PercentagePositiveMatches: public Variable{
176 public:
get_name()177 	static const std::string get_name(){
178 		return "ppos";
179 	}
get(const HspContext & r)180 	double get(const HspContext& r){
181 		return (double)r.positives() * 100.0 / r.length();
182 	}
183 };
184 class QueryFrame: public Variable{
185 public:
get_name()186 	static const std::string get_name(){
187 		return "qframe";
188 	}
get(const HspContext & r)189 	double get(const HspContext& r){
190 		return r.blast_query_frame();
191 	}
192 };
193 class QueryCoveragePerHsp: public Variable{
194 public:
get_name()195 	static const std::string get_name(){
196 		return "qcovhsp";
197 	}
get(const HspContext & r)198 	double get(const HspContext& r){
199 		return (double)r.query_source_range().length()*100.0 / r.query.source().length();
200 	}
201 };
202 class SubjectCoveragePerHsp: public Variable{
203 public:
get_name()204 	static const std::string get_name(){
205 		return "scovhsp";
206 	}
get(const HspContext & r)207 	double get(const HspContext& r){
208 		return (double)r.subject_range().length() * 100.0 / r.subject_len;
209 	}
210 };
211 class UngappedScore: public Variable{
212 public:
get_name()213 	static const std::string get_name(){
214 		return "ungapped_score";
215 	}
get(const HspContext & r)216 	double get(const HspContext& r){
217 		return score_matrix.bitscore(r.ungapped_score);
218 	}
219 };
220 
221 class StaticVariableRegistry{
222 	std::map<std::string, Variable*> regMap;
223 public:
StaticVariableRegistry()224 	StaticVariableRegistry(){
225 		// To include new variables add the instantiation here and in the clustering_variable.cpp file. Then add it to the StaticConstructor below
226 		regMap[QueryLength::get_name()] = new QueryLength();
227 		regMap[SubjectLength::get_name()] = new SubjectLength();
228 		regMap[QueryStart::get_name()] = new QueryStart();
229 		regMap[QueryEnd::get_name()] = new QueryEnd();
230 		regMap[SubjectStart::get_name()] = new SubjectStart();
231 		regMap[SubjectEnd::get_name()] = new SubjectEnd();
232 		regMap[EValue::get_name()] = new EValue();
233 		regMap[BitScore::get_name()] = new BitScore();
234 		regMap[Score::get_name()] = new Score();
235 		regMap[Length::get_name()] = new Length();
236 		regMap[PercentIdenticalMatches::get_name()] = new PercentIdenticalMatches();
237 		regMap[NumberIdenticalMatches::get_name()] = new NumberIdenticalMatches();
238 		regMap[NumberMismatches::get_name()] = new NumberMismatches();
239 		regMap[NumberPositiveMatches::get_name()] = new NumberPositiveMatches();
240 		regMap[NumberGapOpenings::get_name()] = new NumberGapOpenings();
241 		regMap[NumberGaps::get_name()] = new NumberGaps();
242 		regMap[PercentagePositiveMatches::get_name()] = new PercentagePositiveMatches();
243 		regMap[QueryFrame::get_name()] = new QueryFrame();
244 		regMap[QueryCoveragePerHsp::get_name()] = new QueryCoveragePerHsp();
245 		regMap[SubjectCoveragePerHsp::get_name()] = new SubjectCoveragePerHsp();
246 		regMap[UngappedScore::get_name()] = new UngappedScore();
247 	}
~StaticVariableRegistry()248 	~StaticVariableRegistry(){
249 		for(auto it = regMap.begin(); it != regMap.end(); it++){
250 			delete it->second;
251 			it->second = nullptr;
252 		}
253 	}
get(std::string key)254 	Variable* get(std::string key) const {
255 		auto ca = regMap.find(key);
256 		if(ca == regMap.end()){
257 			throw std::runtime_error(std::string("Unknown variable: ")+ key);
258 		}
259 		return ca->second;
260 	}
has(std::string key)261 	bool has(std::string key) const {
262 		return regMap.find(key) != regMap.end();
263 	}
getKeys()264 	vector<std::string> getKeys() const {
265 		auto it = regMap.begin();
266 		vector<std::string> keys;
267 		while(it != regMap.end()){
268 			keys.push_back(it->first);
269 			it++;
270 		}
271 		return keys;
272 	}
273 };
274 
275 class VariableRegistry{
276 	static StaticVariableRegistry vr;
277 public:
get(std::string key)278 	static Variable* get(std::string key){
279 		return vr.get(key);
280 	}
has(std::string key)281 	static bool has(std::string key){
282 		return vr.has(key);
283 	}
getKeys()284 	static vector<std::string> getKeys(){
285 		return vr.getKeys();
286 	}
287 };
288