1 /* this file is only included by cigar.c, multiple times */
2 
3 #define D CONCAT3(int, T, _t)
4 
5 #define UNUSED(expr) do { (void)(expr); } while (0)
6 
7 #if defined(STRIPED)
8 #define NAME parasail_cigar_striped_
9 #define LOC LOC_STRIPED
10 #else
11 #define NAME parasail_cigar_
12 #define LOC LOC_NOVEC
13 #endif
14 
15 #define INC                                                       \
16 do {                                                              \
17     cigar->len += 1;                                              \
18     if ((size_t)cigar->len >= size) {                             \
19         size = size * 2;                                          \
20         cigar->seq = realloc(cigar->seq, sizeof(uint32_t)*size);  \
21     }                                                             \
22 } while (0);
23 
24 #define RESET  \
25 do {           \
26     c_mat = 0; \
27     c_mis = 0; \
28     c_del = 0; \
29     c_ins = 0; \
30 } while (0)
31 
32 #define WRITE(VAL,CHAR)                                         \
33 do {                                                            \
34     INC;                                                        \
35     cigar->seq[cigar->len-1] = parasail_cigar_encode(VAL,CHAR); \
36 } while (0)
37 
38 /* internally I accidentally flipped I/D, so rather than go back and
39  * rewrite a bunch of code, I fix the problem by just swapping the
40  * letters here in the cigar output */
41 #define WRITE_ANY         \
42 do {                      \
43     if (c_mat) {          \
44         WRITE(c_mat,'='); \
45     }                     \
46     else if (c_mis) {     \
47         WRITE(c_mis,'X'); \
48     }                     \
49     else if (c_del) {     \
50         WRITE(c_del,'I'); \
51     }                     \
52     else if (c_ins) {     \
53         WRITE(c_ins,'D'); \
54     }                     \
55     RESET;                \
56 } while (0)
57 
CONCAT(NAME,T)58 static inline parasail_cigar_t* CONCAT(NAME, T) (
59         const char *seqA,
60         int lena,
61         const char *seqB,
62         int lenb,
63         const parasail_matrix_t *matrix,
64         parasail_result_t *result,
65         int case_sensitive,
66         const char *alphabet_aliases_)
67 {
68     char alphabet_aliases[256];
69     size_t aliases_size = 0;
70     size_t size = lena+lenb;
71     parasail_cigar_t *cigar = malloc(sizeof(parasail_cigar_t));
72     uint32_t *cigar_reverse = NULL;
73     uint32_t c_mat = 0;
74     uint32_t c_mis = 0;
75     uint32_t c_del = 0;
76     uint32_t c_ins = 0;
77     int64_t i = result->end_query;
78     int64_t j = result->end_ref;
79     int where = PARASAIL_DIAG;
80     D *HT = (D*)result->trace->trace_table;
81 #if defined(STRIPED)
82     int64_t segWidth = 0;
83     int64_t segLen = 0;
84     if (result->flag & PARASAIL_FLAG_LANES_1) {
85         segWidth = 1;
86     }
87     if (result->flag & PARASAIL_FLAG_LANES_2) {
88         segWidth = 2;
89     }
90     if (result->flag & PARASAIL_FLAG_LANES_4) {
91         segWidth = 4;
92     }
93     if (result->flag & PARASAIL_FLAG_LANES_8) {
94         segWidth = 8;
95     }
96     if (result->flag & PARASAIL_FLAG_LANES_16) {
97         segWidth = 16;
98     }
99     if (result->flag & PARASAIL_FLAG_LANES_32) {
100         segWidth = 32;
101     }
102     if (result->flag & PARASAIL_FLAG_LANES_64) {
103         segWidth = 64;
104     }
105     segLen = (lena + segWidth - 1) / segWidth;
106 #endif
107     if (NULL != alphabet_aliases_) {
108         size_t i;
109         aliases_size = strlen(alphabet_aliases_);
110         assert(aliases_size % 2 == 0 && aliases_size < 256); // even number of characters in alias
111         for (i=0; i<aliases_size; ++i) {
112             alphabet_aliases[i] = case_sensitive ? alphabet_aliases_[i] :
113                                                    toupper(alphabet_aliases_[i]);
114         }
115     }
116     cigar->seq = malloc(sizeof(uint32_t)*size);
117     cigar->len = 0;
118     cigar->beg_query = 0;
119     cigar->beg_ref = 0;
120     UNUSED(matrix);
121     /* semi-global alignment includes the end gaps */
122     if (result->flag & PARASAIL_FLAG_SG) {
123         int64_t k;
124         if (result->end_query+1 == lena) {
125             k = lenb-1;
126             while (k > j) {
127                 ++c_ins;
128                 --k;
129             }
130         }
131         else if (result->end_ref+1 == lenb) {
132             k = lena-1;
133             while (k > i) {
134                 ++c_del;
135                 --k;
136             }
137         }
138         else {
139             parasail_cigar_free(cigar);
140             return NULL;
141         }
142     }
143     while (i >= 0 || j >= 0) {
144         LOC
145         /*assert(i >= 0 && j >= 0);*/
146         if (i < 0) {
147             if (0 == c_ins) {
148                 WRITE_ANY;
149             }
150             while (j >= 0) {
151                 ++c_ins;
152                 --j;
153             }
154             break;
155         }
156         if (j < 0) {
157             if (0 == c_del) {
158                 WRITE_ANY;
159             }
160             while (i >= 0) {
161                 ++c_del;
162                 --i;
163             }
164             break;
165         }
166         if (PARASAIL_DIAG == where) {
167             if (HT[loc] & PARASAIL_DIAG) {
168                 char a = case_sensitive ? seqA[i] : toupper(seqA[i]);
169                 char b = case_sensitive ? seqB[j] : toupper(seqB[j]);
170                 int matches = (a == b);
171                 if (NULL != alphabet_aliases) {
172                     size_t i;
173                     for (i=0; i<aliases_size; i+=1) {
174                         if (alphabet_aliases[i] == a) {
175                             matches |= alphabet_aliases[i+1] == b;
176                         }
177                         else if (alphabet_aliases[i+1] == a) {
178                             matches |= alphabet_aliases[i] == b;
179                         }
180                     }
181                 }
182                 if (matches) {
183                     if (0 == c_mat) {
184                         WRITE_ANY;
185                     }
186                     c_mat += 1;
187                 }
188                 else {
189                     if (0 == c_mis) {
190                         WRITE_ANY;
191                     }
192                     c_mis += 1;
193                 }
194                 --i;
195                 --j;
196             }
197             else if (HT[loc] & PARASAIL_INS) {
198                 where = PARASAIL_INS;
199             }
200             else if (HT[loc] & PARASAIL_DEL) {
201                 where = PARASAIL_DEL;
202             }
203             /* no bits were set, so this is the zero condition */
204             else {
205                 break;
206             }
207         }
208         else if (PARASAIL_INS == where) {
209             if (0 == c_ins) {
210                 WRITE_ANY;
211             }
212             c_ins += 1;
213             --j;
214             if (HT[loc] & PARASAIL_DIAG_E) {
215                 where = PARASAIL_DIAG;
216             }
217             else if (HT[loc] & PARASAIL_INS_E) {
218                 where = PARASAIL_INS;
219             }
220             else {
221                 parasail_cigar_free(cigar);
222                 return NULL;
223             }
224         }
225         else if (PARASAIL_DEL == where) {
226             if (0 == c_del) {
227                 WRITE_ANY;
228             }
229             c_del += 1;
230             --i;
231             if (HT[loc] & PARASAIL_DIAG_F) {
232                 where = PARASAIL_DIAG;
233             }
234             else if (HT[loc] & PARASAIL_DEL_F) {
235                 where = PARASAIL_DEL;
236             }
237             else {
238                 parasail_cigar_free(cigar);
239                 return NULL;
240             }
241         }
242         else if (PARASAIL_ZERO == where) {
243             break;
244         }
245         else {
246             parasail_cigar_free(cigar);
247             return NULL;
248         }
249     }
250 
251     /* in case we missed the last write */
252     WRITE_ANY;
253 
254     cigar_reverse = parasail_reverse_uint32_t(cigar->seq, cigar->len);
255     free(cigar->seq);
256     cigar->seq = cigar_reverse;
257     cigar->beg_query = i+1;
258     cigar->beg_ref = j+1;
259 
260     return cigar;
261 }
262 
263 #undef D
264 #undef NAME
265 #undef LOC
266 
267