1 #include <stdlib.h>
2 #include <math.h>
3 #include "mmpriv.h"
4 #include "kvec.h"
5 
mm_select_sub_multi(void * km,float pri_ratio,float pri1,float pri2,int max_gap_ref,int min_diff,int best_n,int n_segs,const int * qlens,int * n_,mm_reg1_t * r)6 void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r)
7 {
8 	if (pri_ratio > 0.0f && *n_ > 0) {
9 		int i, k, n = *n_, n_2nd = 0;
10 		int max_dist = n_segs == 2? qlens[0] + qlens[1] + max_gap_ref : 0;
11 		for (i = k = 0; i < n; ++i) {
12 			int to_keep = 0;
13 			if (r[i].parent == i) { // primary
14 				to_keep = 1;
15 			} else if (r[i].score + min_diff >= r[r[i].parent].score) {
16 				to_keep = 1;
17 			} else {
18 				mm_reg1_t *p = &r[r[i].parent], *q = &r[i];
19 				if (p->rev == q->rev && p->rid == q->rid && q->re - p->rs < max_dist && p->re - q->rs < max_dist) { // child and parent are close on the ref
20 					if (q->score >= p->score * pri1)
21 						to_keep = 1;
22 				} else {
23 					int is_par_both = (n_segs == 2 && p->qs < qlens[0] && p->qe > qlens[0]);
24 					int is_chi_both = (n_segs == 2 && q->qs < qlens[0] && q->qe > qlens[0]);
25 					if (is_chi_both || is_chi_both == is_par_both) {
26 						if (q->score >= p->score * pri_ratio)
27 							to_keep = 1;
28 					} else { // the remaining case: is_chi_both == 0 && is_par_both == 1
29 						if (q->score >= p->score * pri2)
30 							to_keep = 1;
31 					}
32 				}
33 			}
34 			if (to_keep && r[i].parent != i) {
35 				if (n_2nd++ >= best_n) to_keep = 0; // don't keep if there are too many secondary hits
36 			}
37 			if (to_keep) r[k++] = r[i];
38 			else if (r[i].p) free(r[i].p);
39 		}
40 		if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
41 		*n_ = k;
42 	}
43 }
44 
mm_set_pe_thru(const int * qlens,int * n_regs,mm_reg1_t ** regs)45 void mm_set_pe_thru(const int *qlens, int *n_regs, mm_reg1_t **regs)
46 {
47 	int s, i, n_pri[2], pri[2];
48 	n_pri[0] = n_pri[1] = 0;
49 	pri[0] = pri[1] = -1;
50 	for (s = 0; s < 2; ++s)
51 		for (i = 0; i < n_regs[s]; ++i)
52 			if (regs[s][i].id == regs[s][i].parent)
53 				++n_pri[s], pri[s] = i;
54 	if (n_pri[0] == 1 && n_pri[1] == 1) {
55 		mm_reg1_t *p = &regs[0][pri[0]];
56 		mm_reg1_t *q = &regs[1][pri[1]];
57 		if (p->rid == q->rid && p->rev == q->rev && abs(p->rs - q->rs) < 3 && abs(p->re - q->re) < 3
58 			&& ((p->qs == 0 && qlens[1] - q->qe == 0) || (q->qs == 0 && qlens[0] - p->qe == 0)))
59 		{
60 			p->pe_thru = q->pe_thru = 1;
61 		}
62 	}
63 }
64 
65 #include "ksort.h"
66 
67 typedef struct {
68 	int s, rev;
69 	uint64_t key;
70 	mm_reg1_t *r;
71 } pair_arr_t;
72 
73 #define sort_key_pair(a) ((a).key)
74 KRADIX_SORT_INIT(pair, pair_arr_t, sort_key_pair, 8)
75 
mm_pair(void * km,int max_gap_ref,int pe_bonus,int sub_diff,int match_sc,const int * qlens,int * n_regs,mm_reg1_t ** regs)76 void mm_pair(void *km, int max_gap_ref, int pe_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs)
77 {
78 	int i, j, s, n, last[2], dp_thres, segs = 0, max_idx[2];
79 	int64_t max;
80 	pair_arr_t *a;
81 	kvec_t(uint64_t) sc = {0,0,0};
82 
83 	a = (pair_arr_t*)kmalloc(km, (n_regs[0] + n_regs[1]) * sizeof(pair_arr_t));
84 	for (s = n = 0, dp_thres = 0; s < 2; ++s) {
85 		int max = 0;
86 		for (i = 0; i < n_regs[s]; ++i) {
87 			a[n].s = s;
88 			a[n].r = &regs[s][i];
89 			a[n].rev = a[n].r->rev;
90 			a[n].key = (uint64_t)a[n].r->rid << 32 | a[n].r->rs<<1 | (s^a[n].rev);
91 			max = max > a[n].r->p->dp_max? max : a[n].r->p->dp_max;
92 			++n;
93 			segs |= 1<<s;
94 		}
95 		dp_thres += max;
96 	}
97 	if (segs != 3) {
98 		kfree(km, a); // only one end is mapped
99 		return;
100 	}
101 	dp_thres -= pe_bonus;
102 	if (dp_thres < 0) dp_thres = 0;
103 	radix_sort_pair(a, a + n);
104 
105 	max = -1;
106 	max_idx[0] = max_idx[1] = -1;
107 	last[0] = last[1] = -1;
108 	kv_resize(uint64_t, km, sc, (size_t)n);
109 	for (i = 0; i < n; ++i) {
110 		if (a[i].key & 1) { // reverse first read or forward second read
111 			mm_reg1_t *q, *r;
112 			if (last[a[i].rev] < 0) continue;
113 			r = a[i].r;
114 			q = a[last[a[i].rev]].r;
115 			if (r->rid != q->rid || r->rs - q->re > max_gap_ref) continue;
116 			for (j = last[a[i].rev]; j >= 0; --j) {
117 				int64_t score;
118 				if (a[j].rev != a[i].rev || a[j].s == a[i].s) continue;
119 				q = a[j].r;
120 				if (r->rid != q->rid || r->rs - q->re > max_gap_ref) break;
121 				if (r->p->dp_max + q->p->dp_max < dp_thres) continue;
122 				score = (int64_t)(r->p->dp_max + q->p->dp_max) << 32 | (r->hash + q->hash);
123 				if (score > max)
124 					max = score, max_idx[a[j].s] = j, max_idx[a[i].s] = i;
125 				kv_push(uint64_t, km, sc, score);
126 			}
127 		} else { // forward first read or reverse second read
128 			last[a[i].rev] = i;
129 		}
130 	}
131 	if (sc.n > 1)
132 		radix_sort_64(sc.a, sc.a + sc.n);
133 
134 	if (sc.n > 0 && max > 0) { // found at least one pair
135 		int n_sub = 0, mapq_pe;
136 		mm_reg1_t *r[2];
137 		r[0] = a[max_idx[0]].r, r[1] = a[max_idx[1]].r;
138 		r[0]->proper_frag = r[1]->proper_frag = 1;
139 		for (s = 0; s < 2; ++s) {
140 			if (r[s]->id != r[s]->parent) { // then lift to primary and update parent
141 				mm_reg1_t *p = &regs[s][r[s]->parent];
142 				for (i = 0; i < n_regs[s]; ++i)
143 					if (regs[s][i].parent == p->id)
144 						regs[s][i].parent = r[s]->id;
145 				p->mapq = 0;
146 			}
147 			if (!r[s]->sam_pri) { // then sync sam_pri
148 				for (i = 0; i < n_regs[s]; ++i)
149 					regs[s][i].sam_pri = 0;
150 				r[s]->sam_pri = 1;
151 			}
152 		}
153 		mapq_pe = r[0]->mapq > r[1]->mapq? r[0]->mapq : r[1]->mapq;
154 		for (i = 0; i < (int)sc.n; ++i)
155 			if ((sc.a[i]>>32) + sub_diff >= (uint64_t)max>>32)
156 				++n_sub;
157 		if (sc.n > 1) {
158 			int mapq_pe_alt;
159 			mapq_pe_alt = (int)(6.02f * ((max>>32) - (sc.a[sc.n - 2]>>32)) / match_sc - 4.343f * logf(n_sub)); // n_sub > 0 because it counts the optimal, too
160 			mapq_pe = mapq_pe < mapq_pe_alt? mapq_pe : mapq_pe_alt;
161 		}
162 		if (r[0]->mapq < mapq_pe) r[0]->mapq = (int)(.2f * r[0]->mapq + .8f * mapq_pe + .499f);
163 		if (r[1]->mapq < mapq_pe) r[1]->mapq = (int)(.2f * r[1]->mapq + .8f * mapq_pe + .499f);
164 		if (sc.n == 1) {
165 			if (r[0]->mapq < 2) r[0]->mapq = 2;
166 			if (r[1]->mapq < 2) r[1]->mapq = 2;
167 		} else if ((uint64_t)max>>32 > sc.a[sc.n - 2]>>32) {
168 			if (r[0]->mapq < 1) r[0]->mapq = 1;
169 			if (r[1]->mapq < 1) r[1]->mapq = 1;
170 		}
171 	}
172 
173 	kfree(km, a);
174 	kfree(km, sc.a);
175 
176 	mm_set_pe_thru(qlens, n_regs, regs);
177 }
178