1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /*
21 * unique.h
22 *
23 * Encapsulates objects and routines for determining whether and to
24 * what extend the best alignment for a read is "unique." In the
25 * simplest scenario, uniqueness is determined by whether we found only
26 * one alignment. More complex scenarios might assign a uniqueness
27 * score based that is a function of (of a summarized version of): all
28 * the alignments found and their scores.
29 *
30 * Since mapping quality is related to uniqueness, objects and
31 * routings for calculating mapping quality are also included here.
32 */
33
34 #ifndef UNIQUE_H_
35 #define UNIQUE_H_
36
37 #include <algorithm>
38 #include <string>
39 #include "aligner_result.h"
40 #include "simple_func.h"
41 #include "util.h"
42 #include "scoring.h"
43
44 typedef int64_t TMapq;
45
46 /**
47 * Class that returns yes-or-no answers to the question of whether a
48 */
49 class Uniqueness {
50 public:
51
52 /**
53 * Given an AlnSetSumm, determine if the best alignment is "unique"
54 * according to some definition.
55 */
bestIsUnique(const AlnSetSumm & s,const AlnFlags & flags,bool mate1,size_t rdlen,size_t ordlen,char * inps)56 static bool bestIsUnique(
57 const AlnSetSumm& s,
58 const AlnFlags& flags,
59 bool mate1,
60 size_t rdlen,
61 size_t ordlen,
62 char *inps)
63 {
64 assert(!s.empty());
65 return !VALID_AL_SCORE(s.bestUnchosenScore(mate1));
66 }
67 };
68
69 /**
70 * Collection of routines for calculating mapping quality.
71 */
72 class Mapq {
73
74 public:
75
~Mapq()76 virtual ~Mapq() { }
77
78 virtual TMapq mapq(
79 const AlnSetSumm& s,
80 const AlnFlags& flags,
81 bool mate1,
82 size_t rdlen,
83 size_t ordlen,
84 char *inps) const = 0;
85 };
86
87 extern const TMapq unp_nosec_perf;
88 extern const TMapq unp_nosec[11];
89 extern const TMapq unp_sec_perf[11];
90 extern const TMapq unp_sec[11][11];
91 extern const TMapq pair_nosec_perf;
92
93 /**
94 * V3 of the MAPQ calculator
95 */
96 class BowtieMapq3 : public Mapq {
97
98 public:
99
BowtieMapq3(const SimpleFunc & scoreMin,const Scoring & sc)100 BowtieMapq3(
101 const SimpleFunc& scoreMin,
102 const Scoring& sc) :
103 scoreMin_(scoreMin),
104 sc_(sc)
105 { }
106
~BowtieMapq3()107 virtual ~BowtieMapq3() { }
108
109 /**
110 * Given an AlnSetSumm, return a mapping quality calculated.
111 */
mapq(const AlnSetSumm & s,const AlnFlags & flags,bool mate1,size_t rdlen,size_t ordlen,char * inps)112 virtual TMapq mapq(
113 const AlnSetSumm& s,
114 const AlnFlags& flags,
115 bool mate1,
116 size_t rdlen,
117 size_t ordlen,
118 char *inps) // put string representation of inputs here
119 const
120 {
121 if(s.paired()) {
122 return pair_nosec_perf;
123 } else {
124 bool hasSecbest = VALID_AL_SCORE(s.bestUnchosenScore(mate1));
125 if(!flags.canMax() && !s.exhausted(mate1) && !hasSecbest) {
126 return 255;
127 }
128 TAlScore scMax = (TAlScore)sc_.perfectScore(rdlen);
129 TAlScore scMin = scoreMin_.f<TAlScore>((float)rdlen);
130 assert_geq(scMax, scMin);
131 TAlScore best = scMax - s.bestScore(mate1).score(); // best score (lower=better)
132 size_t best_bin = (size_t)((double)best * (10.0 / (double)(scMax - scMin)) + 0.5);
133 assert_geq(best_bin, 0);
134 assert_lt(best_bin, 11);
135 if(hasSecbest) {
136 assert_geq(s.bestScore(mate1).score(), s.bestUnchosenScore(mate1).score());
137 size_t diff = s.bestScore(mate1).score() - s.bestUnchosenScore(mate1).score();
138 size_t diff_bin = (size_t)((double)diff * (10.0 / (double)(scMax - scMin)) + 0.5);
139 assert_geq(diff_bin, 0);
140 assert_lt(diff_bin, 11);
141 // A valid second-best alignment was found
142 if(best == scMax) {
143 // Best alignment has perfect score
144 return unp_sec_perf[best_bin];
145 } else {
146 // Best alignment has less than perfect score
147 return unp_sec[diff_bin][best_bin];
148 }
149 } else {
150 // No valid second-best alignment was found
151 if(best == scMax) {
152 // Best alignment has perfect score
153 return unp_nosec_perf;
154 } else {
155 // Best alignment has less than perfect score
156 return unp_nosec[best_bin];
157 }
158 }
159 }
160 }
161
162 protected:
163
164 SimpleFunc scoreMin_;
165 const Scoring& sc_;
166 };
167
168 /**
169 * V2 of the MAPQ calculator
170 */
171 class BowtieMapq2 : public Mapq {
172
173 public:
174
BowtieMapq2(const SimpleFunc & scoreMin,const Scoring & sc)175 BowtieMapq2(
176 const SimpleFunc& scoreMin,
177 const Scoring& sc) :
178 scoreMin_(scoreMin),
179 sc_(sc)
180 { }
181
~BowtieMapq2()182 virtual ~BowtieMapq2() { }
183
184 /**
185 * Given an AlnSetSumm, return a mapping quality calculated.
186 */
mapq(const AlnSetSumm & s,const AlnFlags & flags,bool mate1,size_t rdlen,size_t ordlen,char * inps)187 virtual TMapq mapq(
188 const AlnSetSumm& s,
189 const AlnFlags& flags,
190 bool mate1,
191 size_t rdlen,
192 size_t ordlen,
193 char *inps) // put string representation of inputs here
194 const
195 {
196 // Did the read have a second-best alignment?
197 bool hasSecbest = s.paired() ?
198 VALID_AL_SCORE(s.bestUnchosenCScore()) :
199 VALID_AL_SCORE(s.bestUnchosenScore(mate1));
200 // This corresponds to a scenario where we found one and only one
201 // alignment but didn't really look for a second one
202 if(!flags.isPrimary() ||
203 (!flags.canMax() && !s.exhausted(mate1) && !hasSecbest)) {
204 return 255;
205 }
206 // scPer = score of a perfect match
207 TAlScore scPer = (TAlScore)sc_.perfectScore(rdlen);
208 if(s.paired()) {
209 scPer += (TAlScore)sc_.perfectScore(ordlen);
210 }
211 // scMin = score of a just barely valid match
212 TAlScore scMin = scoreMin_.f<TAlScore>((float)rdlen);
213 if(s.paired()) {
214 scMin += scoreMin_.f<TAlScore>((float)ordlen);
215 }
216 TAlScore secbest = scMin-1;
217 TAlScore diff = std::max<TAlScore>(1, scPer - scMin); // scores can vary by up to this much
218 TMapq ret = 0;
219 TAlScore best = s.paired() ?
220 s.bestCScore().score() : s.bestScore(mate1).score();
221 // best score but normalized so that 0 = worst valid score
222 TAlScore bestOver = best - scMin;
223 if(sc_.monotone) {
224 // End-to-end alignment
225 if(!hasSecbest) {
226 if (bestOver >= diff * (double)0.8f) ret = 42;
227 else if(bestOver >= diff * (double)0.7f) ret = 40;
228 else if(bestOver >= diff * (double)0.6f) ret = 24;
229 else if(bestOver >= diff * (double)0.5f) ret = 23;
230 else if(bestOver >= diff * (double)0.4f) ret = 8;
231 else if(bestOver >= diff * (double)0.3f) ret = 3;
232 else ret = 0;
233 } else {
234 secbest = s.paired() ?
235 s.bestUnchosenCScore().score() : s.bestUnchosenScore(mate1).score();
236 TAlScore bestdiff = abs(abs(static_cast<long>(best))-abs(static_cast<long>(secbest)));
237 if(bestdiff >= diff * (double)0.9f) {
238 if(bestOver == diff) {
239 ret = 39;
240 } else {
241 ret = 33;
242 }
243 } else if(bestdiff >= diff * (double)0.8f) {
244 if(bestOver == diff) {
245 ret = 38;
246 } else {
247 ret = 27;
248 }
249 } else if(bestdiff >= diff * (double)0.7f) {
250 if(bestOver == diff) {
251 ret = 37;
252 } else {
253 ret = 26;
254 }
255 } else if(bestdiff >= diff * (double)0.6f) {
256 if(bestOver == diff) {
257 ret = 36;
258 } else {
259 ret = 22;
260 }
261 } else if(bestdiff >= diff * (double)0.5f) {
262 // Top third is still pretty good
263 if (bestOver == diff) {
264 ret = 35;
265 } else if(bestOver >= diff * (double)0.84f) {
266 ret = 25;
267 } else if(bestOver >= diff * (double)0.68f) {
268 ret = 16;
269 } else {
270 ret = 5;
271 }
272 } else if(bestdiff >= diff * (double)0.4f) {
273 // Top third is still pretty good
274 if (bestOver == diff) {
275 ret = 34;
276 } else if(bestOver >= diff * (double)0.84f) {
277 ret = 21;
278 } else if(bestOver >= diff * (double)0.68f) {
279 ret = 14;
280 } else {
281 ret = 4;
282 }
283 } else if(bestdiff >= diff * (double)0.3f) {
284 // Top third is still pretty good
285 if (bestOver == diff) {
286 ret = 32;
287 } else if(bestOver >= diff * (double)0.88f) {
288 ret = 18;
289 } else if(bestOver >= diff * (double)0.67f) {
290 ret = 15;
291 } else {
292 ret = 3;
293 }
294 } else if(bestdiff >= diff * (double)0.2f) {
295 // Top third is still pretty good
296 if (bestOver == diff) {
297 ret = 31;
298 } else if(bestOver >= diff * (double)0.88f) {
299 ret = 17;
300 } else if(bestOver >= diff * (double)0.67f) {
301 ret = 11;
302 } else {
303 ret = 0;
304 }
305 } else if(bestdiff >= diff * (double)0.1f) {
306 // Top third is still pretty good
307 if (bestOver == diff) {
308 ret = 30;
309 } else if(bestOver >= diff * (double)0.88f) {
310 ret = 12;
311 } else if(bestOver >= diff * (double)0.67f) {
312 ret = 7;
313 } else {
314 ret = 0;
315 }
316 } else if(bestdiff > 0) {
317 // Top third is still pretty good
318 if(bestOver >= diff * (double)0.67f) {
319 ret = 6;
320 } else {
321 ret = 2;
322 }
323 } else {
324 assert_eq(bestdiff, 0);
325 // Top third is still pretty good
326 if(bestOver >= diff * (double)0.67f) {
327 ret = 1;
328 } else {
329 ret = 0;
330 }
331 }
332 }
333 } else {
334 // Local alignment
335 if(!hasSecbest) {
336 if (bestOver >= diff * (double)0.8f) ret = 44;
337 else if(bestOver >= diff * (double)0.7f) ret = 42;
338 else if(bestOver >= diff * (double)0.6f) ret = 41;
339 else if(bestOver >= diff * (double)0.5f) ret = 36;
340 else if(bestOver >= diff * (double)0.4f) ret = 28;
341 else if(bestOver >= diff * (double)0.3f) ret = 24;
342 else ret = 22;
343 } else {
344 secbest = s.paired() ?
345 s.bestUnchosenCScore().score() : s.bestUnchosenScore(mate1).score();
346 TAlScore bestdiff = abs(abs(static_cast<long>(best))-abs(static_cast<long>(secbest)));
347 if (bestdiff >= diff * (double)0.9f) ret = 40;
348 else if(bestdiff >= diff * (double)0.8f) ret = 39;
349 else if(bestdiff >= diff * (double)0.7f) ret = 38;
350 else if(bestdiff >= diff * (double)0.6f) ret = 37;
351 else if(bestdiff >= diff * (double)0.5f) {
352 if (bestOver == diff) ret = 35;
353 else if(bestOver >= diff * (double)0.50f) ret = 25;
354 else ret = 20;
355 } else if(bestdiff >= diff * (double)0.4f) {
356 if (bestOver == diff) ret = 34;
357 else if(bestOver >= diff * (double)0.50f) ret = 21;
358 else ret = 19;
359 } else if(bestdiff >= diff * (double)0.3f) {
360 if (bestOver == diff) ret = 33;
361 else if(bestOver >= diff * (double)0.5f) ret = 18;
362 else ret = 16;
363 } else if(bestdiff >= diff * (double)0.2f) {
364 if (bestOver == diff) ret = 32;
365 else if(bestOver >= diff * (double)0.5f) ret = 17;
366 else ret = 12;
367 } else if(bestdiff >= diff * (double)0.1f) {
368 if (bestOver == diff) ret = 31;
369 else if(bestOver >= diff * (double)0.5f) ret = 14;
370 else ret = 9;
371 } else if(bestdiff > 0) {
372 if(bestOver >= diff * (double)0.5f) ret = 11;
373 else ret = 2;
374 } else {
375 assert_eq(bestdiff, 0);
376 if(bestOver >= diff * (double)0.5f) ret = 1;
377 else ret = 0;
378 }
379 }
380 }
381 // Note: modifications to inps must be synchronized
382 //if(inps != NULL) {
383 // inps = itoa10<TAlScore>(best, inps);
384 // *inps++ = ',';
385 // inps = itoa10<TAlScore>(secbest, inps);
386 // *inps++ = ',';
387 // inps = itoa10<TMapq>(ret, inps);
388 //}
389 return ret;
390 }
391
392 protected:
393
394 SimpleFunc scoreMin_;
395 const Scoring& sc_;
396 };
397
398 /**
399 * TODO: Do BowtieMapq on a per-thread basis prior to the mutex'ed output
400 * function.
401 *
402 * topCoeff :: top_coeff
403 * botCoeff :: bot_coeff
404 * mx :: mapqMax
405 * horiz :: mapqHorizon (sort of)
406 *
407 * sc1 <- tab$sc1
408 * sc2 <- tab$sc2
409 * mapq <- rep(mx, length(sc1))
410 * diff_top <- ifelse(sc1 != best & sc2 != best, abs(best - abs(pmax(sc1, sc2))), 0)
411 * mapq <- mapq - diff_top * top_coeff
412 * diff_bot <- ifelse(sc2 != horiz, abs(abs(sc2) - abs(horiz)), 0)
413 * mapq <- mapq - diff_bot * bot_coeff
414 * mapq <- round(pmax(0, pmin(mx, mapq)))
415 * tab$mapq <- mapq
416 */
417 class BowtieMapq : public Mapq {
418
419 public:
420
BowtieMapq(const SimpleFunc & scoreMin,const Scoring & sc)421 BowtieMapq(
422 const SimpleFunc& scoreMin,
423 const Scoring& sc) :
424 scoreMin_(scoreMin),
425 sc_(sc)
426 { }
427
~BowtieMapq()428 virtual ~BowtieMapq() { }
429
430 /**
431 * Given an AlnSetSumm, return a mapping quality calculated.
432 */
mapq(const AlnSetSumm & s,const AlnFlags & flags,bool mate1,size_t rdlen,size_t ordlen,char * inps)433 virtual TMapq mapq(
434 const AlnSetSumm& s,
435 const AlnFlags& flags,
436 bool mate1,
437 size_t rdlen,
438 size_t ordlen,
439 char *inps) // put string representation of inputs here
440 const
441 {
442 bool hasSecbest = VALID_AL_SCORE(s.bestUnchosenScore(mate1));
443 if(!flags.canMax() && !s.exhausted(mate1) && !hasSecbest) {
444 return 255;
445 }
446 TAlScore scPer = (TAlScore)sc_.perfectScore(rdlen);
447 TAlScore scMin = scoreMin_.f<TAlScore>((float)rdlen);
448 TAlScore secbest = scMin-1;
449 TAlScore diff = (scPer - scMin);
450 float sixth_2 = (float)(scPer - diff * (double)0.1666f * 2);
451 float sixth_3 = (float)(scPer - diff * (double)0.1666f * 3);
452 TMapq ret = 0;
453 TAlScore best = s.bestScore(mate1).score();
454 if(!hasSecbest) {
455 // Top third?
456 if(best >= sixth_2) {
457 ret = 37;
458 }
459 // Otherwise in top half?
460 else if(best >= sixth_3) {
461 ret = 25;
462 }
463 // Otherwise has no second-best?
464 else {
465 ret = 10;
466 }
467 } else {
468 secbest = s.bestUnchosenScore(mate1).score();
469 TAlScore bestdiff = abs(abs(static_cast<long>(best))-abs(static_cast<long>(secbest)));
470 if(bestdiff >= diff * 0.1666 * 5) {
471 ret = 6;
472 } else if(bestdiff >= diff * 0.1666 * 4) {
473 ret = 5;
474 } else if(bestdiff >= diff * 0.1666 * 3) {
475 ret = 4;
476 } else if(bestdiff >= diff * 0.1666 * 2) {
477 ret = 3;
478 } else if(bestdiff >= diff * 0.1666 * 1) {
479 ret = 2;
480 } else {
481 ret = 1;
482 }
483 }
484 // Note: modifications to inps must be synchronized
485 //if(inps != NULL) {
486 // inps = itoa10<TAlScore>(best, inps);
487 // *inps++ = ',';
488 // inps = itoa10<TAlScore>(secbest, inps);
489 // *inps++ = ',';
490 // inps = itoa10<TMapq>(ret, inps);
491 //}
492 return ret;
493 }
494
495 protected:
496
497 SimpleFunc scoreMin_;
498 const Scoring& sc_;
499 };
500
501 /**
502 * Create and return new MAPQ calculating object.
503 */
new_mapq(int version,const SimpleFunc & scoreMin,const Scoring & sc)504 static inline Mapq *new_mapq(
505 int version,
506 const SimpleFunc& scoreMin,
507 const Scoring& sc)
508 {
509 if(version == 3) {
510 return new BowtieMapq3(scoreMin, sc);
511 } else if(version == 2) {
512 return new BowtieMapq2(scoreMin, sc);
513 } else {
514 return new BowtieMapq(scoreMin, sc);
515 }
516 }
517
518 #endif /*ndef UNIQUE_H_*/
519