1 /* this file is only included by cigar.c, multiple times */
2
3 #define D CONCAT3(int, T, _t)
4
5 #define UNUSED(expr) do { (void)(expr); } while (0)
6
7 #if defined(STRIPED)
8 #define NAME parasail_cigar_striped_
9 #define LOC LOC_STRIPED
10 #else
11 #define NAME parasail_cigar_
12 #define LOC LOC_NOVEC
13 #endif
14
15 #define INC \
16 do { \
17 cigar->len += 1; \
18 if ((size_t)cigar->len >= size) { \
19 size = size * 2; \
20 cigar->seq = realloc(cigar->seq, sizeof(uint32_t)*size); \
21 } \
22 } while (0);
23
24 #define RESET \
25 do { \
26 c_mat = 0; \
27 c_mis = 0; \
28 c_del = 0; \
29 c_ins = 0; \
30 } while (0)
31
32 #define WRITE(VAL,CHAR) \
33 do { \
34 INC; \
35 cigar->seq[cigar->len-1] = parasail_cigar_encode(VAL,CHAR); \
36 } while (0)
37
38 /* internally I accidentally flipped I/D, so rather than go back and
39 * rewrite a bunch of code, I fix the problem by just swapping the
40 * letters here in the cigar output */
41 #define WRITE_ANY \
42 do { \
43 if (c_mat) { \
44 WRITE(c_mat,'='); \
45 } \
46 else if (c_mis) { \
47 WRITE(c_mis,'X'); \
48 } \
49 else if (c_del) { \
50 WRITE(c_del,'I'); \
51 } \
52 else if (c_ins) { \
53 WRITE(c_ins,'D'); \
54 } \
55 RESET; \
56 } while (0)
57
CONCAT(NAME,T)58 static inline parasail_cigar_t* CONCAT(NAME, T) (
59 const char *seqA,
60 int lena,
61 const char *seqB,
62 int lenb,
63 const parasail_matrix_t *matrix,
64 parasail_result_t *result,
65 int case_sensitive,
66 const char *alphabet_aliases_)
67 {
68 char alphabet_aliases[256];
69 size_t aliases_size = 0;
70 size_t size = lena+lenb;
71 parasail_cigar_t *cigar = malloc(sizeof(parasail_cigar_t));
72 uint32_t *cigar_reverse = NULL;
73 uint32_t c_mat = 0;
74 uint32_t c_mis = 0;
75 uint32_t c_del = 0;
76 uint32_t c_ins = 0;
77 int64_t i = result->end_query;
78 int64_t j = result->end_ref;
79 int where = PARASAIL_DIAG;
80 D *HT = (D*)result->trace->trace_table;
81 #if defined(STRIPED)
82 int64_t segWidth = 0;
83 int64_t segLen = 0;
84 if (result->flag & PARASAIL_FLAG_LANES_1) {
85 segWidth = 1;
86 }
87 if (result->flag & PARASAIL_FLAG_LANES_2) {
88 segWidth = 2;
89 }
90 if (result->flag & PARASAIL_FLAG_LANES_4) {
91 segWidth = 4;
92 }
93 if (result->flag & PARASAIL_FLAG_LANES_8) {
94 segWidth = 8;
95 }
96 if (result->flag & PARASAIL_FLAG_LANES_16) {
97 segWidth = 16;
98 }
99 if (result->flag & PARASAIL_FLAG_LANES_32) {
100 segWidth = 32;
101 }
102 if (result->flag & PARASAIL_FLAG_LANES_64) {
103 segWidth = 64;
104 }
105 segLen = (lena + segWidth - 1) / segWidth;
106 #endif
107 if (NULL != alphabet_aliases_) {
108 size_t i;
109 aliases_size = strlen(alphabet_aliases_);
110 assert(aliases_size % 2 == 0 && aliases_size < 256); // even number of characters in alias
111 for (i=0; i<aliases_size; ++i) {
112 alphabet_aliases[i] = case_sensitive ? alphabet_aliases_[i] :
113 toupper(alphabet_aliases_[i]);
114 }
115 }
116 cigar->seq = malloc(sizeof(uint32_t)*size);
117 cigar->len = 0;
118 cigar->beg_query = 0;
119 cigar->beg_ref = 0;
120 UNUSED(matrix);
121 /* semi-global alignment includes the end gaps */
122 if (result->flag & PARASAIL_FLAG_SG) {
123 int64_t k;
124 if (result->end_query+1 == lena) {
125 k = lenb-1;
126 while (k > j) {
127 ++c_ins;
128 --k;
129 }
130 }
131 else if (result->end_ref+1 == lenb) {
132 k = lena-1;
133 while (k > i) {
134 ++c_del;
135 --k;
136 }
137 }
138 else {
139 parasail_cigar_free(cigar);
140 return NULL;
141 }
142 }
143 while (i >= 0 || j >= 0) {
144 LOC
145 /*assert(i >= 0 && j >= 0);*/
146 if (i < 0) {
147 if (0 == c_ins) {
148 WRITE_ANY;
149 }
150 while (j >= 0) {
151 ++c_ins;
152 --j;
153 }
154 break;
155 }
156 if (j < 0) {
157 if (0 == c_del) {
158 WRITE_ANY;
159 }
160 while (i >= 0) {
161 ++c_del;
162 --i;
163 }
164 break;
165 }
166 if (PARASAIL_DIAG == where) {
167 if (HT[loc] & PARASAIL_DIAG) {
168 char a = case_sensitive ? seqA[i] : toupper(seqA[i]);
169 char b = case_sensitive ? seqB[j] : toupper(seqB[j]);
170 int matches = (a == b);
171 if (NULL != alphabet_aliases) {
172 size_t i;
173 for (i=0; i<aliases_size; i+=1) {
174 if (alphabet_aliases[i] == a) {
175 matches |= alphabet_aliases[i+1] == b;
176 }
177 else if (alphabet_aliases[i+1] == a) {
178 matches |= alphabet_aliases[i] == b;
179 }
180 }
181 }
182 if (matches) {
183 if (0 == c_mat) {
184 WRITE_ANY;
185 }
186 c_mat += 1;
187 }
188 else {
189 if (0 == c_mis) {
190 WRITE_ANY;
191 }
192 c_mis += 1;
193 }
194 --i;
195 --j;
196 }
197 else if (HT[loc] & PARASAIL_INS) {
198 where = PARASAIL_INS;
199 }
200 else if (HT[loc] & PARASAIL_DEL) {
201 where = PARASAIL_DEL;
202 }
203 /* no bits were set, so this is the zero condition */
204 else {
205 break;
206 }
207 }
208 else if (PARASAIL_INS == where) {
209 if (0 == c_ins) {
210 WRITE_ANY;
211 }
212 c_ins += 1;
213 --j;
214 if (HT[loc] & PARASAIL_DIAG_E) {
215 where = PARASAIL_DIAG;
216 }
217 else if (HT[loc] & PARASAIL_INS_E) {
218 where = PARASAIL_INS;
219 }
220 else {
221 parasail_cigar_free(cigar);
222 return NULL;
223 }
224 }
225 else if (PARASAIL_DEL == where) {
226 if (0 == c_del) {
227 WRITE_ANY;
228 }
229 c_del += 1;
230 --i;
231 if (HT[loc] & PARASAIL_DIAG_F) {
232 where = PARASAIL_DIAG;
233 }
234 else if (HT[loc] & PARASAIL_DEL_F) {
235 where = PARASAIL_DEL;
236 }
237 else {
238 parasail_cigar_free(cigar);
239 return NULL;
240 }
241 }
242 else if (PARASAIL_ZERO == where) {
243 break;
244 }
245 else {
246 parasail_cigar_free(cigar);
247 return NULL;
248 }
249 }
250
251 /* in case we missed the last write */
252 WRITE_ANY;
253
254 cigar_reverse = parasail_reverse_uint32_t(cigar->seq, cigar->len);
255 free(cigar->seq);
256 cigar->seq = cigar_reverse;
257 cigar->beg_query = i+1;
258 cigar->beg_ref = j+1;
259
260 return cigar;
261 }
262
263 #undef D
264 #undef NAME
265 #undef LOC
266
267