1 /*
2  *
3  * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
4  *
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef __ALN_CIGAR_RJ_H
21 #define __ALN_CIGAR_RJ_H
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 
26 #define ALN_CIGAR_MAX_LEN	8191
27 
28 #define ALN_CIGAR_TYPE_NULL	0
29 #define ALN_CIGAR_TYPE_MAT	3
30 #define ALN_CIGAR_TYPE_INS	1
31 #define ALN_CIGAR_TYPE_DEL	2
32 #define ALN_CIGAR_TYPE_SKIP	7
33 #define ALN_CIGAR_TYPE_CLIP1	5
34 #define ALN_CIGAR_TYPE_CLIP2	6
35 
36 static const char aln_cigar_string[8] = "?IDM?SHN";
37 
38 typedef struct {
39 	uint16_t len:13, type:3;
40 } AlnCigar;
41 
cigars_lengths(AlnCigar * cigars,int n_cigar,int * aln_size,int * seq1_size,int * seq2_size)42 static inline void cigars_lengths(AlnCigar *cigars, int n_cigar, int *aln_size, int *seq1_size, int *seq2_size){
43 	int i;
44 	if(aln_size) *aln_size = 0;
45 	if(seq1_size) *seq1_size = 0;
46 	if(seq2_size) *seq2_size = 0;
47 	for(i=0;i<n_cigar;i++){
48 		if(seq1_size &&  (cigars[i].type & 0x01)) *seq1_size += cigars[i].len;
49 		if(seq2_size &&  (cigars[i].type & 0x02)) *seq2_size += cigars[i].len;
50 		if(aln_size  && !(cigars[i].type & 0x04)) *aln_size += cigars[i].len;
51 	}
52 }
53 
_aln_cigar_h_num_str_len(int n)54 static inline int _aln_cigar_h_num_str_len(int n){
55 	int i;
56 	i = 0;
57 	while(n){
58 		i ++;
59 		n /= 10;
60 	}
61 	return i;
62 }
63 
_aln_cigar_add_cigar(AlnCigar * cs,int n_cigar,int len,int type)64 static inline int _aln_cigar_add_cigar(AlnCigar *cs, int n_cigar, int len, int type){
65 	while(len){
66 		if(len > ALN_CIGAR_MAX_LEN){
67 			cs[n_cigar].len = ALN_CIGAR_MAX_LEN;
68 			len -= ALN_CIGAR_MAX_LEN;
69 		} else {
70 			cs[n_cigar].len = len;
71 			len = 0;
72 		}
73 		cs[n_cigar++].type = type;
74 	}
75 	return n_cigar;
76 }
77 
cigars2string(AlnCigar * cigars,int n_cigar,char * str)78 static inline char* cigars2string(AlnCigar *cigars, int n_cigar, char *str){
79 	int i, j, n, str_len, type;
80 	char *p;
81 	if(str == NULL){
82 		str_len = 0;
83 		for(i=0;i<n_cigar;i++) str_len += _aln_cigar_h_num_str_len(cigars[i].len) + 1;
84 		str = malloc(str_len + 1);
85 	}
86 	p = str;
87 	if(n_cigar){
88 		n = cigars[0].len;
89 		type = cigars[0].type;
90 		for(i=1;i<=n_cigar;i++){
91 			if(i == n_cigar || (type != cigars[i].type && n)){
92 				str_len = _aln_cigar_h_num_str_len(n) - 1;
93 				j = 0;
94 				while(n){
95 					p[str_len - j] = '0' + (n % 10);
96 					n /= 10;
97 					j ++;
98 				}
99 				p[str_len + 1] = aln_cigar_string[type];
100 				p = p + str_len + 1 + 1;
101 			}
102 			if(i == n_cigar) break;
103 			n += cigars[i].len;
104 			type = cigars[i].type;
105 		}
106 	}
107 	p[0] = 0;
108 	return str;
109 }
110 
string2cigars(AlnCigar * cigars,char * str,int len)111 static inline int string2cigars(AlnCigar *cigars, char *str, int len){
112 	int i, n, x;
113 	n = 0;
114 	x = 0;
115 	for(i=0;i<len;i++){
116 		switch(str[i]){
117 			case '0':
118 			case '1':
119 			case '2':
120 			case '3':
121 			case '4':
122 			case '5':
123 			case '6':
124 			case '7':
125 			case '8':
126 			case '9': x = x * 10 + (str[i] - '0'); break;
127 			case 'M':
128 			case 'm': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_MAT); x = 0; break;
129 			case 'I':
130 			case 'i': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_INS); x = 0; break;
131 			case 'D':
132 			case 'd': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_DEL); x = 0; break;
133 			case 'S':
134 			case 's': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_CLIP1); x = 0; break;
135 			case 'H':
136 			case 'h': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_CLIP2); x = 0; break;
137 			case 'N':
138 			case 'n': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_SKIP); x = 0; break;
139 			default : n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_NULL); x = 0; break;
140 		}
141 	}
142 	return n;
143 }
144 
rank_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)145 static inline int rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
146 	int i, ret;
147 	if(len < 0) len = 0x7FFFFFFF;
148 	for(i=0,ret=0;i<n_cigar&&len>=0;i++){
149 		if((cigars[i].type >> seq_idx) & 0x01){
150 			if(len > (int)cigars[i].len){
151 				ret += cigars[i].len;
152 			} else {
153 				ret += len;
154 			}
155 		}
156 		len -= cigars[i].len;
157 	}
158 	return ret;
159 }
160 
rev_rank_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)161 static inline int rev_rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
162 	int i, ret;
163 	if(len < 0) len = 0x7FFFFFFF;
164 	for(i=n_cigar-1,ret=0;i>=0&&len>=0;i--){
165 		if((cigars[i].type >> seq_idx) & 0x01){
166 			if(len > (int)cigars[i].len){
167 				ret += cigars[i].len;
168 			} else {
169 				ret += len;
170 			}
171 		}
172 		len -= cigars[i].len;
173 	}
174 	return ret;
175 }
176 
select_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)177 static inline int select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
178 	int i, ret;
179 	for(i=0,ret=0;i<n_cigar;i++){
180 		if((cigars[i].type >> seq_idx) & 0x01){
181 			if(len > (int)cigars[i].len){
182 				ret += cigars[i].len;
183 			} else {
184 				ret += len;
185 				break;
186 			}
187 			len -= cigars[i].len;
188 		} else {
189 			ret += cigars[i].len;
190 		}
191 	}
192 	return ret;
193 }
194 
rev_select_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)195 static inline int rev_select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
196 	int i, ret;
197 	for(i=n_cigar-1,ret=0;i>=0;i--){
198 		if((cigars[i].type >> seq_idx) & 0x01){
199 			if(len > (int)cigars[i].len){
200 				ret += cigars[i].len;
201 			} else {
202 				ret += len;
203 				break;
204 			}
205 			len -= cigars[i].len;
206 		} else {
207 			ret += cigars[i].len;
208 		}
209 	}
210 	return ret;
211 }
212 
flip_cigars(AlnCigar * cigars,int n_cigar)213 static inline void flip_cigars(AlnCigar *cigars, int n_cigar){
214 	int i;
215 	for(i=0;i<n_cigar;i++){
216 		cigars[i].type = (cigars[i].type & 0x04) | ((cigars[i].type & 0x01) << 1) | ((cigars[i].type & 0x02) >> 1);
217 	}
218 }
219 
sub_cigars(AlnCigar * dst,AlnCigar * cigars,int n_cigar,int off,int len)220 static inline int sub_cigars(AlnCigar *dst, AlnCigar *cigars, int n_cigar, int off, int len){
221 	int i, x, y, n_sub;
222 	n_sub = 0;
223 	if(len < 0) len = 0x3FFFFFFF;
224 	else if(len == 0) return 0;
225 	for(i=0,x=0;i<n_cigar;i++){
226 		y = x + cigars[i].len;
227 		if(x < off){
228 			if(y < off){
229 			} else if(y < off + len){
230 				n_sub = _aln_cigar_add_cigar(dst, n_sub, y - off, cigars[i].type);
231 			} else {
232 				n_sub = _aln_cigar_add_cigar(dst, n_sub, off + len - x, cigars[i].type);
233 				break;
234 			}
235 		} else if(x >= off + len){
236 			break;
237 		} else {
238 			if(y < off + len){
239 				n_sub = _aln_cigar_add_cigar(dst, n_sub, cigars[i].len, cigars[i].type);
240 			} else {
241 				n_sub = _aln_cigar_add_cigar(dst, n_sub, off + len - x, cigars[i].type);
242 				break;
243 			}
244 		}
245 		x = y;
246 	}
247 	return n_sub;
248 }
249 
sub_seq_cigars(AlnCigar * dst,AlnCigar * c,int n,int seq_idx,int off,int len)250 static inline int sub_seq_cigars(AlnCigar *dst, AlnCigar *c, int n, int seq_idx, int off, int len){
251 	int d, i, x, y;
252 	if(len < 0) len = 0x7FFFFFFF;
253 	d = 0;
254 	x = y = 0;
255 	for(i=0;i<n;i++){
256 		if(x > off + len) break;
257 		if((c[i].type >> seq_idx) & 0x01){
258 			y = x + c[i].len;
259 		} else {
260 			y = x;
261 		}
262 		if(y < off) continue;
263 		if(x < off){
264 			if(y < off + len){
265 				d = _aln_cigar_add_cigar(dst, d, y - off, c[i].type);
266 			} else {
267 				d = _aln_cigar_add_cigar(dst, d, off + len - x, c[i].type);
268 				break;
269 			}
270 		} else if(y < off + len){
271 			d = _aln_cigar_add_cigar(dst, d, c[i].len, c[i].type);
272 		} else {
273 			d = _aln_cigar_add_cigar(dst, d, off + len - x, c[i].type);
274 			break;
275 		}
276 		x = y;
277 	}
278 	return d;
279 }
280 
cat_cigars(AlnCigar * cigars1,int n_cigar1,AlnCigar * cigars2,int n_cigar2)281 static inline int cat_cigars(AlnCigar *cigars1, int n_cigar1, AlnCigar *cigars2, int n_cigar2){
282 	int i;
283 	if(n_cigar2 == 0) return n_cigar1;
284 	if(n_cigar1){
285 		if(cigars1[n_cigar1-1].type == cigars2[0].type){
286 			n_cigar1 = _aln_cigar_add_cigar(cigars1, n_cigar1 - 1, cigars1[n_cigar1 - 1].len + cigars2[0].len, cigars2[0].type);
287 			i = 1;
288 		} else i = 0;
289 	} else i = 0;
290 	while(i < n_cigar2){
291 		cigars1[n_cigar1].type = cigars2[i].type;
292 		cigars1[n_cigar1].len = cigars2[i].len;
293 		n_cigar1 ++;
294 		i ++;
295 	}
296 	return n_cigar1;
297 }
298 
append_cigars(AlnCigar * cs,int n,int type,int len)299 static inline int append_cigars(AlnCigar *cs, int n, int type, int len){
300 	if(n && cs[n-1].type == type){
301 		return _aln_cigar_add_cigar(cs, n-1, cs[n-1].len + len, type);
302 	} else {
303 		return _aln_cigar_add_cigar(cs, n, len, type);
304 	}
305 }
306 
reverse_cigars(AlnCigar * cs,int n)307 static inline void reverse_cigars(AlnCigar *cs, int n){
308 	int i, j;
309 	AlnCigar c;
310 	i = 0;
311 	j = n - 1;
312 	while(i < j){
313 		c = cs[i]; cs[i] = cs[j]; cs[j] = c;
314 		i ++; j --;
315 	}
316 }
317 
318 // seq1_size of c1 must be no less than c2
319 // seq1_size of dst will equal length of c1
compile_cigars(AlnCigar * dst,AlnCigar * c1,int n1,AlnCigar * c2,int n2,int seq_idx)320 static inline int compile_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2, int seq_idx){
321 	int i, j, n3, x1, x2, x3, f1, f2, f3;
322 	n3 = 0;
323 	x2 = 0;
324 	x3 = 0;
325 	f2 = ALN_CIGAR_TYPE_MAT;
326 	f3 = ALN_CIGAR_TYPE_INS;
327 	for(i=j=0;i<n1;i++){
328 		x1 = c1[i].len;
329 		f1 = (c1[i].type >> seq_idx) & 0x01;
330 		while(x1){
331 			if(x2 == 0){
332 				if(j < n2){
333 					x2 = c2[j].len;
334 					f2 = (c2[j].type >> seq_idx) & 0x01;
335 					j ++;
336 				} else {
337 					x2 = x1;
338 					f2 = f1;
339 				}
340 			}
341 			if(f1){
342 				if(f2){
343 					if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
344 					if(x1 < x2){ x2 -= x1; x3 += x1; x1 = 0; }
345 					else { x1 -= x2; x3 += x2; x2 = 0; }
346 					f3 = ALN_CIGAR_TYPE_INS;
347 				} else {
348 					if(x3 && f3 != ALN_CIGAR_TYPE_DEL){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
349 					f3 = ALN_CIGAR_TYPE_DEL;
350 					x3 += x2; x2 = 0;
351 				}
352 			} else {
353 				if(f2){
354 					if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
355 					f3 = ALN_CIGAR_TYPE_INS;
356 					x3 += x1; x1 = 0;
357 				} else {
358 					if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
359 					f3 = ALN_CIGAR_TYPE_INS;
360 					if(x1 < x2){ x2 -= x1; x3 += x1; x1 = 0; }
361 					else { x1 -= x2; x3 += x2; x2 = 0; }
362 				}
363 			}
364 		}
365 	}
366 	if(x3){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); }
367 	return n3;
368 }
369 
370 // c2 is the dst in align_cigars
apply_cigars(AlnCigar * dst,AlnCigar * c1,int n1,AlnCigar * c2,int n2)371 static inline int apply_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2){
372 	int i, j, n3, x1, x2, x3, f1, f2;
373 	n3 = 0;
374 	x2 = 0;
375 	f2 = ALN_CIGAR_TYPE_MAT;
376 	for(i=j=0;i<n1;i++){
377 		x1 = c1[i].len;
378 		f1 = c1[i].type;
379 		while(x1){
380 			if(x2 == 0){
381 				if(j < n2){
382 					x2 = c2[j].len;
383 					f2 = c2[j].type;
384 					j ++;
385 				} else {
386 					x2 = x1;
387 					f2 = ALN_CIGAR_TYPE_MAT;
388 				}
389 			}
390 			if(f2 == ALN_CIGAR_TYPE_DEL){
391 				n3 = _aln_cigar_add_cigar(dst, n3, x2, ALN_CIGAR_TYPE_DEL);
392 				x2 = 0;
393 			} else {
394 				x3 = (x1 < x2)? x1 : x2;
395 				n3 = _aln_cigar_add_cigar(dst, n3, x3, f1);
396 				x1 -= x3;
397 				x2 -= x3;
398 			}
399 		}
400 	}
401 	return n3;
402 }
403 
refine_cigars(AlnCigar * c,int n)404 static inline int refine_cigars(AlnCigar *c, int n){
405 	int i, j, x, f;
406 	x = 0;
407 	f = ALN_CIGAR_TYPE_NULL;
408 	for(i=j=0;i<n;i++){
409 		if(c[i].type == f){
410 			x += c[i].len;
411 		} else {
412 			if(x){
413 				j = _aln_cigar_add_cigar(c, j, x, f);
414 			}
415 			x = c[i].len;
416 			f = c[i].type;
417 		}
418 	}
419 	if(x){ j = _aln_cigar_add_cigar(c, j, x, f); }
420 	return j;
421 }
422 
cigars_seq2aln(char * dst,AlnCigar * c,int n,int seq_idx,char * seq)423 static inline int cigars_seq2aln(char *dst, AlnCigar *c, int n, int seq_idx, char *seq){
424 	int i, j, k, m;
425 	for(i=m=k=0;i<n;i++){
426 		if((c[i].type >> seq_idx) & 0x01){
427 			if(c[i].type & 0x04){
428 				k += c[i].len;
429 			} else {
430 				for(j=0;j<c[i].len;j++) dst[m++] = seq[k++];
431 			}
432 		} else if(!(c[i].type & 0x04)){
433 			for(j=0;j<c[i].len;j++) dst[m++] = '-';
434 		}
435 	}
436 	dst[m] = '\0';
437 	return m;
438 }
439 
440 #endif
441