1 #include "config.h"
2 
3 #include <assert.h>
4 #include <ctype.h>
5 #include <stddef.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 
11 #include "parasail.h"
12 #include "parasail/memory.h"
13 
14 #if defined(_MSC_VER)
15 #define snprintf _snprintf
16 #endif
17 
18 /*                     0123456789 */
19 #define BAM_CIGAR_STR "MIDNSHP=XB"
20 #define BAM_CIGAR_SHIFT 4u
21 
22 /* array index is an ASCII character value from a CIGAR,
23    element value is the corresponding integer opcode between 0 and 9 */
24 const uint8_t parasail_cigar_encoded_ops[] = {
25     0,         0,         0,         0,
26     0,         0,         0,         0,
27     0,         0,         0,         0,
28     0,         0,         0,         0,
29     0,         0,         0,         0,
30     0,         0,         0,         0,
31     0,         0,         0,         0,
32     0,         0,         0,         0,
33     0 /*   */, 0 /* ! */, 0 /* " */, 0 /* # */,
34     0 /* $ */, 0 /* % */, 0 /* & */, 0 /* ' */,
35     0 /* ( */, 0 /* ) */, 0 /* * */, 0 /* + */,
36     0 /* , */, 0 /* - */, 0 /* . */, 0 /* / */,
37     0 /* 0 */, 0 /* 1 */, 0 /* 2 */, 0 /* 3 */,
38     0 /* 4 */, 0 /* 5 */, 0 /* 6 */, 0 /* 7 */,
39     0 /* 8 */, 0 /* 9 */, 0 /* : */, 0 /* ; */,
40     0 /* < */, 7 /* = */, 0 /* > */, 0 /* ? */,
41     0 /* @ */, 0 /* A */, 0 /* B */, 0 /* C */,
42     2 /* D */, 0 /* E */, 0 /* F */, 0 /* G */,
43     5 /* H */, 1 /* I */, 0 /* J */, 0 /* K */,
44     0 /* L */, 0 /* M */, 3 /* N */, 0 /* O */,
45     6 /* P */, 0 /* Q */, 0 /* R */, 4 /* S */,
46     0 /* T */, 0 /* U */, 0 /* V */, 0 /* W */,
47     8 /* X */, 0 /* Y */, 0 /* Z */, 0 /* [ */,
48     0 /* \ */, 0 /* ] */, 0 /* ^ */, 0 /* _ */,
49     0 /* ` */, 0 /* a */, 0 /* b */, 0 /* c */,
50     0 /* d */, 0 /* e */, 0 /* f */, 0 /* g */,
51     0 /* h */, 0 /* i */, 0 /* j */, 0 /* k */,
52     0 /* l */, 0 /* m */, 0 /* n */, 0 /* o */,
53     0 /* p */, 0 /* q */, 0 /* r */, 0 /* s */,
54     0 /* t */, 0 /* u */, 0 /* v */, 0 /* w */,
55     0 /* x */, 0 /* y */, 0 /* z */, 0 /* { */,
56     0 /* | */, 0 /* } */, 0 /* ~ */, 0 /*  */
57 };
58 
parasail_cigar_encode(uint32_t length,char op_letter)59 uint32_t parasail_cigar_encode(uint32_t length, char op_letter)
60 {
61     return (length << BAM_CIGAR_SHIFT) | (parasail_cigar_encoded_ops[(int)op_letter]);
62 }
63 
parasail_cigar_encode_string(const char * cigar)64 parasail_cigar_t* parasail_cigar_encode_string(const char *cigar)
65 {
66     int sscanf_retcode = 0;
67     size_t offset = 0;
68     int chars_read = 0;
69     unsigned int len = 0;
70     char op = 'M';
71     int done = 0;
72     size_t string_length = 0;
73     size_t size = 0;
74     parasail_cigar_t *ret = NULL;
75 
76     string_length = strlen(cigar);
77     size = sizeof(uint32_t)*string_length;
78     ret = malloc(sizeof(parasail_cigar_t));
79     ret->seq = malloc(size);
80     ret->len = 0;
81 
82     while (!done) {
83         sscanf_retcode = sscanf(
84                 &cigar[offset], "%u%c%n", &len, &op, &chars_read);
85         if (2 != sscanf_retcode) {
86             fprintf(stderr, "invalid CIGAR string\n");
87             parasail_cigar_free(ret);
88             return NULL;
89         }
90         offset += chars_read;
91         ret->len += 1;
92         if ((size_t)ret->len >= size) {
93             size *= 2;
94             ret->seq = realloc(ret->seq, size);
95         }
96         ret->seq[ret->len-1] = parasail_cigar_encode(len, op);
97         if (offset >= string_length) {
98             done = 1;
99         }
100     }
101 
102     return ret;
103 }
104 
parasail_cigar_decode_op(uint32_t cigar_int)105 char parasail_cigar_decode_op(uint32_t cigar_int) {
106     return (cigar_int & 0xfU) > 9 ? 'M': BAM_CIGAR_STR[cigar_int & 0xfU];
107 }
108 
parasail_cigar_decode_len(uint32_t cigar_int)109 uint32_t parasail_cigar_decode_len(uint32_t cigar_int) {
110     return cigar_int >> BAM_CIGAR_SHIFT;
111 }
112 
parasail_cigar_decode(parasail_cigar_t * cigar)113 char* parasail_cigar_decode(parasail_cigar_t *cigar)
114 {
115 #define SIZE 40
116     char *ret = NULL;
117     size_t retlen = 0;
118     size_t size = 0;
119     int i = 0;
120 
121     /* initial allocation for 1 op and 3 number characters per cigar int */
122     size = sizeof(char)*cigar->len*4;
123     ret = malloc(size+1);
124     ret[0] = '\0';
125 
126     for (i=0; i<cigar->len; ++i) {
127         char tmp[SIZE];
128         char op = parasail_cigar_decode_op(cigar->seq[i]);
129         uint32_t len = parasail_cigar_decode_len(cigar->seq[i]);
130         int snprintf_retcode = snprintf(tmp, SIZE, "%u%c", len, op);
131         if (snprintf_retcode >= SIZE) {
132             fprintf(stderr, "invalid CIGAR\n");
133             free(ret);
134             return NULL;
135         }
136         retlen += snprintf_retcode;
137         if (retlen >= size) {
138             size *= 2;
139             ret = realloc(ret, size+1);
140         }
141         strcat(ret, tmp);
142     }
143 
144     return ret;
145 }
146 
parasail_cigar_free(parasail_cigar_t * cigar)147 void parasail_cigar_free(parasail_cigar_t *cigar)
148 {
149     free(cigar->seq);
150     free(cigar);
151 }
152 
153 #define CONCAT_(X, Y) X##Y
154 #define CONCAT(X, Y) CONCAT_(X, Y)
155 #define CONCAT3_(X, Y, Z) X##Y##Z
156 #define CONCAT3(X, Y, Z) CONCAT3_(X, Y, Z)
157 #define LOC_NOVEC int64_t loc = i*lenb + j;
158 #define LOC_STRIPED int64_t loc = j*segLen*segWidth + (i%segLen)*segWidth + (i/segLen);
159 
160 #define T 8
161 #include "cigar_template.c"
162 #undef T
163 
164 #define T 8
165 #define STRIPED
166 #include "cigar_template.c"
167 #undef T
168 #undef STRIPED
169 
170 #define T 16
171 #include "cigar_template.c"
172 #undef T
173 
174 #define T 16
175 #define STRIPED
176 #include "cigar_template.c"
177 #undef T
178 #undef STRIPED
179 
180 #define T 32
181 #include "cigar_template.c"
182 #undef T
183 
184 #define T 32
185 #define STRIPED
186 #include "cigar_template.c"
187 #undef T
188 #undef STRIPED
189 
190 #define T 64
191 #include "cigar_template.c"
192 #undef T
193 
194 #define T 64
195 #define STRIPED
196 #include "cigar_template.c"
197 #undef T
198 #undef STRIPED
199 
parasail_result_get_cigar_extra(parasail_result_t * result,const char * seqA,int lena,const char * seqB,int lenb,const parasail_matrix_t * matrix,int case_sensitive,const char * alphabet_aliases)200 parasail_cigar_t* parasail_result_get_cigar_extra(
201         parasail_result_t *result,
202         const char *seqA,
203         int lena,
204         const char *seqB,
205         int lenb,
206         const parasail_matrix_t *matrix,
207         int case_sensitive,
208         const char *alphabet_aliases)
209 {
210     assert(parasail_result_is_trace(result));
211 
212     if (result->flag & PARASAIL_FLAG_STRIPED || result->flag & PARASAIL_FLAG_SCAN) {
213         if (result->flag & PARASAIL_FLAG_BITS_8) {
214             return parasail_cigar_striped_8(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
215         }
216         else if (result->flag & PARASAIL_FLAG_BITS_16) {
217             return parasail_cigar_striped_16(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
218         }
219         else if (result->flag & PARASAIL_FLAG_BITS_32) {
220             return parasail_cigar_striped_32(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
221         }
222         else if (result->flag & PARASAIL_FLAG_BITS_64) {
223             return parasail_cigar_striped_64(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
224         }
225     }
226     else {
227         return parasail_cigar_8(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
228     }
229 
230     /* should not get here, but to silence warnings */
231     return NULL;
232 }
233 
parasail_result_get_cigar(parasail_result_t * result,const char * seqA,int lena,const char * seqB,int lenb,const parasail_matrix_t * matrix)234 parasail_cigar_t* parasail_result_get_cigar(
235         parasail_result_t *result,
236         const char *seqA,
237         int lena,
238         const char *seqB,
239         int lenb,
240         const parasail_matrix_t *matrix)
241 {
242     return parasail_result_get_cigar_extra(result, seqA, lena, seqB, lenb, matrix, 0, NULL);
243 }
244 
245