1 /*
2 *
3 * Copyright (c) 2011, Jue Ruan <ruanjue@gmail.com>
4 *
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #ifndef __ALN_CIGAR_RJ_H
21 #define __ALN_CIGAR_RJ_H
22
23 #include <stdlib.h>
24 #include <stdio.h>
25
26 #define ALN_CIGAR_MAX_LEN 8191
27
28 #define ALN_CIGAR_TYPE_NULL 0
29 #define ALN_CIGAR_TYPE_MAT 3
30 #define ALN_CIGAR_TYPE_INS 1
31 #define ALN_CIGAR_TYPE_DEL 2
32 #define ALN_CIGAR_TYPE_SKIP 7
33 #define ALN_CIGAR_TYPE_CLIP1 5
34 #define ALN_CIGAR_TYPE_CLIP2 6
35
36 static const char aln_cigar_string[8] = "?IDM?SHN";
37
38 typedef struct {
39 uint16_t len:13, type:3;
40 } AlnCigar;
41
cigars_lengths(AlnCigar * cigars,int n_cigar,int * aln_size,int * seq1_size,int * seq2_size)42 static inline void cigars_lengths(AlnCigar *cigars, int n_cigar, int *aln_size, int *seq1_size, int *seq2_size){
43 int i;
44 if(aln_size) *aln_size = 0;
45 if(seq1_size) *seq1_size = 0;
46 if(seq2_size) *seq2_size = 0;
47 for(i=0;i<n_cigar;i++){
48 if(seq1_size && (cigars[i].type & 0x01)) *seq1_size += cigars[i].len;
49 if(seq2_size && (cigars[i].type & 0x02)) *seq2_size += cigars[i].len;
50 if(aln_size && !(cigars[i].type & 0x04)) *aln_size += cigars[i].len;
51 }
52 }
53
_aln_cigar_h_num_str_len(int n)54 static inline int _aln_cigar_h_num_str_len(int n){
55 int i;
56 i = 0;
57 while(n){
58 i ++;
59 n /= 10;
60 }
61 return i;
62 }
63
_aln_cigar_add_cigar(AlnCigar * cs,int n_cigar,int len,int type)64 static inline int _aln_cigar_add_cigar(AlnCigar *cs, int n_cigar, int len, int type){
65 while(len){
66 if(len > ALN_CIGAR_MAX_LEN){
67 cs[n_cigar].len = ALN_CIGAR_MAX_LEN;
68 len -= ALN_CIGAR_MAX_LEN;
69 } else {
70 cs[n_cigar].len = len;
71 len = 0;
72 }
73 cs[n_cigar++].type = type;
74 }
75 return n_cigar;
76 }
77
cigars2string(AlnCigar * cigars,int n_cigar,char * str)78 static inline char* cigars2string(AlnCigar *cigars, int n_cigar, char *str){
79 int i, j, n, str_len, type;
80 char *p;
81 if(str == NULL){
82 str_len = 0;
83 for(i=0;i<n_cigar;i++) str_len += _aln_cigar_h_num_str_len(cigars[i].len) + 1;
84 str = malloc(str_len + 1);
85 }
86 p = str;
87 if(n_cigar){
88 n = cigars[0].len;
89 type = cigars[0].type;
90 for(i=1;i<=n_cigar;i++){
91 if(i == n_cigar || (type != cigars[i].type && n)){
92 str_len = _aln_cigar_h_num_str_len(n) - 1;
93 j = 0;
94 while(n){
95 p[str_len - j] = '0' + (n % 10);
96 n /= 10;
97 j ++;
98 }
99 p[str_len + 1] = aln_cigar_string[type];
100 p = p + str_len + 1 + 1;
101 }
102 if(i == n_cigar) break;
103 n += cigars[i].len;
104 type = cigars[i].type;
105 }
106 }
107 p[0] = 0;
108 return str;
109 }
110
string2cigars(AlnCigar * cigars,char * str,int len)111 static inline int string2cigars(AlnCigar *cigars, char *str, int len){
112 int i, n, x;
113 n = 0;
114 x = 0;
115 for(i=0;i<len;i++){
116 switch(str[i]){
117 case '0':
118 case '1':
119 case '2':
120 case '3':
121 case '4':
122 case '5':
123 case '6':
124 case '7':
125 case '8':
126 case '9': x = x * 10 + (str[i] - '0'); break;
127 case 'M':
128 case 'm': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_MAT); x = 0; break;
129 case 'I':
130 case 'i': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_INS); x = 0; break;
131 case 'D':
132 case 'd': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_DEL); x = 0; break;
133 case 'S':
134 case 's': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_CLIP1); x = 0; break;
135 case 'H':
136 case 'h': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_CLIP2); x = 0; break;
137 case 'N':
138 case 'n': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_SKIP); x = 0; break;
139 default : n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_NULL); x = 0; break;
140 }
141 }
142 return n;
143 }
144
rank_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)145 static inline int rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
146 int i, ret;
147 if(len < 0) len = 0x7FFFFFFF;
148 for(i=0,ret=0;i<n_cigar&&len>=0;i++){
149 if((cigars[i].type >> seq_idx) & 0x01){
150 if(len > (int)cigars[i].len){
151 ret += cigars[i].len;
152 } else {
153 ret += len;
154 }
155 }
156 len -= cigars[i].len;
157 }
158 return ret;
159 }
160
rev_rank_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)161 static inline int rev_rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
162 int i, ret;
163 if(len < 0) len = 0x7FFFFFFF;
164 for(i=n_cigar-1,ret=0;i>=0&&len>=0;i--){
165 if((cigars[i].type >> seq_idx) & 0x01){
166 if(len > (int)cigars[i].len){
167 ret += cigars[i].len;
168 } else {
169 ret += len;
170 }
171 }
172 len -= cigars[i].len;
173 }
174 return ret;
175 }
176
select_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)177 static inline int select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
178 int i, ret;
179 for(i=0,ret=0;i<n_cigar;i++){
180 if((cigars[i].type >> seq_idx) & 0x01){
181 if(len > (int)cigars[i].len){
182 ret += cigars[i].len;
183 } else {
184 ret += len;
185 break;
186 }
187 len -= cigars[i].len;
188 } else {
189 ret += cigars[i].len;
190 }
191 }
192 return ret;
193 }
194
rev_select_cigars_seqlen(AlnCigar * cigars,int n_cigar,int len,int seq_idx)195 static inline int rev_select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
196 int i, ret;
197 for(i=n_cigar-1,ret=0;i>=0;i--){
198 if((cigars[i].type >> seq_idx) & 0x01){
199 if(len > (int)cigars[i].len){
200 ret += cigars[i].len;
201 } else {
202 ret += len;
203 break;
204 }
205 len -= cigars[i].len;
206 } else {
207 ret += cigars[i].len;
208 }
209 }
210 return ret;
211 }
212
flip_cigars(AlnCigar * cigars,int n_cigar)213 static inline void flip_cigars(AlnCigar *cigars, int n_cigar){
214 int i;
215 for(i=0;i<n_cigar;i++){
216 cigars[i].type = (cigars[i].type & 0x04) | ((cigars[i].type & 0x01) << 1) | ((cigars[i].type & 0x02) >> 1);
217 }
218 }
219
sub_cigars(AlnCigar * dst,AlnCigar * cigars,int n_cigar,int off,int len)220 static inline int sub_cigars(AlnCigar *dst, AlnCigar *cigars, int n_cigar, int off, int len){
221 int i, x, y, n_sub;
222 n_sub = 0;
223 if(len < 0) len = 0x3FFFFFFF;
224 else if(len == 0) return 0;
225 for(i=0,x=0;i<n_cigar;i++){
226 y = x + cigars[i].len;
227 if(x < off){
228 if(y < off){
229 } else if(y < off + len){
230 n_sub = _aln_cigar_add_cigar(dst, n_sub, y - off, cigars[i].type);
231 } else {
232 n_sub = _aln_cigar_add_cigar(dst, n_sub, off + len - x, cigars[i].type);
233 break;
234 }
235 } else if(x >= off + len){
236 break;
237 } else {
238 if(y < off + len){
239 n_sub = _aln_cigar_add_cigar(dst, n_sub, cigars[i].len, cigars[i].type);
240 } else {
241 n_sub = _aln_cigar_add_cigar(dst, n_sub, off + len - x, cigars[i].type);
242 break;
243 }
244 }
245 x = y;
246 }
247 return n_sub;
248 }
249
sub_seq_cigars(AlnCigar * dst,AlnCigar * c,int n,int seq_idx,int off,int len)250 static inline int sub_seq_cigars(AlnCigar *dst, AlnCigar *c, int n, int seq_idx, int off, int len){
251 int d, i, x, y;
252 if(len < 0) len = 0x7FFFFFFF;
253 d = 0;
254 x = y = 0;
255 for(i=0;i<n;i++){
256 if(x > off + len) break;
257 if((c[i].type >> seq_idx) & 0x01){
258 y = x + c[i].len;
259 } else {
260 y = x;
261 }
262 if(y < off) continue;
263 if(x < off){
264 if(y < off + len){
265 d = _aln_cigar_add_cigar(dst, d, y - off, c[i].type);
266 } else {
267 d = _aln_cigar_add_cigar(dst, d, off + len - x, c[i].type);
268 break;
269 }
270 } else if(y < off + len){
271 d = _aln_cigar_add_cigar(dst, d, c[i].len, c[i].type);
272 } else {
273 d = _aln_cigar_add_cigar(dst, d, off + len - x, c[i].type);
274 break;
275 }
276 x = y;
277 }
278 return d;
279 }
280
cat_cigars(AlnCigar * cigars1,int n_cigar1,AlnCigar * cigars2,int n_cigar2)281 static inline int cat_cigars(AlnCigar *cigars1, int n_cigar1, AlnCigar *cigars2, int n_cigar2){
282 int i;
283 if(n_cigar2 == 0) return n_cigar1;
284 if(n_cigar1){
285 if(cigars1[n_cigar1-1].type == cigars2[0].type){
286 n_cigar1 = _aln_cigar_add_cigar(cigars1, n_cigar1 - 1, cigars1[n_cigar1 - 1].len + cigars2[0].len, cigars2[0].type);
287 i = 1;
288 } else i = 0;
289 } else i = 0;
290 while(i < n_cigar2){
291 cigars1[n_cigar1].type = cigars2[i].type;
292 cigars1[n_cigar1].len = cigars2[i].len;
293 n_cigar1 ++;
294 i ++;
295 }
296 return n_cigar1;
297 }
298
append_cigars(AlnCigar * cs,int n,int type,int len)299 static inline int append_cigars(AlnCigar *cs, int n, int type, int len){
300 if(n && cs[n-1].type == type){
301 return _aln_cigar_add_cigar(cs, n-1, cs[n-1].len + len, type);
302 } else {
303 return _aln_cigar_add_cigar(cs, n, len, type);
304 }
305 }
306
reverse_cigars(AlnCigar * cs,int n)307 static inline void reverse_cigars(AlnCigar *cs, int n){
308 int i, j;
309 AlnCigar c;
310 i = 0;
311 j = n - 1;
312 while(i < j){
313 c = cs[i]; cs[i] = cs[j]; cs[j] = c;
314 i ++; j --;
315 }
316 }
317
318 // seq1_size of c1 must be no less than c2
319 // seq1_size of dst will equal length of c1
compile_cigars(AlnCigar * dst,AlnCigar * c1,int n1,AlnCigar * c2,int n2,int seq_idx)320 static inline int compile_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2, int seq_idx){
321 int i, j, n3, x1, x2, x3, f1, f2, f3;
322 n3 = 0;
323 x2 = 0;
324 x3 = 0;
325 f2 = ALN_CIGAR_TYPE_MAT;
326 f3 = ALN_CIGAR_TYPE_INS;
327 for(i=j=0;i<n1;i++){
328 x1 = c1[i].len;
329 f1 = (c1[i].type >> seq_idx) & 0x01;
330 while(x1){
331 if(x2 == 0){
332 if(j < n2){
333 x2 = c2[j].len;
334 f2 = (c2[j].type >> seq_idx) & 0x01;
335 j ++;
336 } else {
337 x2 = x1;
338 f2 = f1;
339 }
340 }
341 if(f1){
342 if(f2){
343 if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
344 if(x1 < x2){ x2 -= x1; x3 += x1; x1 = 0; }
345 else { x1 -= x2; x3 += x2; x2 = 0; }
346 f3 = ALN_CIGAR_TYPE_INS;
347 } else {
348 if(x3 && f3 != ALN_CIGAR_TYPE_DEL){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
349 f3 = ALN_CIGAR_TYPE_DEL;
350 x3 += x2; x2 = 0;
351 }
352 } else {
353 if(f2){
354 if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
355 f3 = ALN_CIGAR_TYPE_INS;
356 x3 += x1; x1 = 0;
357 } else {
358 if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
359 f3 = ALN_CIGAR_TYPE_INS;
360 if(x1 < x2){ x2 -= x1; x3 += x1; x1 = 0; }
361 else { x1 -= x2; x3 += x2; x2 = 0; }
362 }
363 }
364 }
365 }
366 if(x3){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); }
367 return n3;
368 }
369
370 // c2 is the dst in align_cigars
apply_cigars(AlnCigar * dst,AlnCigar * c1,int n1,AlnCigar * c2,int n2)371 static inline int apply_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2){
372 int i, j, n3, x1, x2, x3, f1, f2;
373 n3 = 0;
374 x2 = 0;
375 f2 = ALN_CIGAR_TYPE_MAT;
376 for(i=j=0;i<n1;i++){
377 x1 = c1[i].len;
378 f1 = c1[i].type;
379 while(x1){
380 if(x2 == 0){
381 if(j < n2){
382 x2 = c2[j].len;
383 f2 = c2[j].type;
384 j ++;
385 } else {
386 x2 = x1;
387 f2 = ALN_CIGAR_TYPE_MAT;
388 }
389 }
390 if(f2 == ALN_CIGAR_TYPE_DEL){
391 n3 = _aln_cigar_add_cigar(dst, n3, x2, ALN_CIGAR_TYPE_DEL);
392 x2 = 0;
393 } else {
394 x3 = (x1 < x2)? x1 : x2;
395 n3 = _aln_cigar_add_cigar(dst, n3, x3, f1);
396 x1 -= x3;
397 x2 -= x3;
398 }
399 }
400 }
401 return n3;
402 }
403
refine_cigars(AlnCigar * c,int n)404 static inline int refine_cigars(AlnCigar *c, int n){
405 int i, j, x, f;
406 x = 0;
407 f = ALN_CIGAR_TYPE_NULL;
408 for(i=j=0;i<n;i++){
409 if(c[i].type == f){
410 x += c[i].len;
411 } else {
412 if(x){
413 j = _aln_cigar_add_cigar(c, j, x, f);
414 }
415 x = c[i].len;
416 f = c[i].type;
417 }
418 }
419 if(x){ j = _aln_cigar_add_cigar(c, j, x, f); }
420 return j;
421 }
422
cigars_seq2aln(char * dst,AlnCigar * c,int n,int seq_idx,char * seq)423 static inline int cigars_seq2aln(char *dst, AlnCigar *c, int n, int seq_idx, char *seq){
424 int i, j, k, m;
425 for(i=m=k=0;i<n;i++){
426 if((c[i].type >> seq_idx) & 0x01){
427 if(c[i].type & 0x04){
428 k += c[i].len;
429 } else {
430 for(j=0;j<c[i].len;j++) dst[m++] = seq[k++];
431 }
432 } else if(!(c[i].type & 0x04)){
433 for(j=0;j<c[i].len;j++) dst[m++] = '-';
434 }
435 }
436 dst[m] = '\0';
437 return m;
438 }
439
440 #endif
441