1 #include "config.h"
2
3 #include <assert.h>
4 #include <ctype.h>
5 #include <stddef.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10
11 #include "parasail.h"
12 #include "parasail/memory.h"
13
14 #if defined(_MSC_VER)
15 #define snprintf _snprintf
16 #endif
17
18 /* 0123456789 */
19 #define BAM_CIGAR_STR "MIDNSHP=XB"
20 #define BAM_CIGAR_SHIFT 4u
21
22 /* array index is an ASCII character value from a CIGAR,
23 element value is the corresponding integer opcode between 0 and 9 */
24 const uint8_t parasail_cigar_encoded_ops[] = {
25 0, 0, 0, 0,
26 0, 0, 0, 0,
27 0, 0, 0, 0,
28 0, 0, 0, 0,
29 0, 0, 0, 0,
30 0, 0, 0, 0,
31 0, 0, 0, 0,
32 0, 0, 0, 0,
33 0 /* */, 0 /* ! */, 0 /* " */, 0 /* # */,
34 0 /* $ */, 0 /* % */, 0 /* & */, 0 /* ' */,
35 0 /* ( */, 0 /* ) */, 0 /* * */, 0 /* + */,
36 0 /* , */, 0 /* - */, 0 /* . */, 0 /* / */,
37 0 /* 0 */, 0 /* 1 */, 0 /* 2 */, 0 /* 3 */,
38 0 /* 4 */, 0 /* 5 */, 0 /* 6 */, 0 /* 7 */,
39 0 /* 8 */, 0 /* 9 */, 0 /* : */, 0 /* ; */,
40 0 /* < */, 7 /* = */, 0 /* > */, 0 /* ? */,
41 0 /* @ */, 0 /* A */, 0 /* B */, 0 /* C */,
42 2 /* D */, 0 /* E */, 0 /* F */, 0 /* G */,
43 5 /* H */, 1 /* I */, 0 /* J */, 0 /* K */,
44 0 /* L */, 0 /* M */, 3 /* N */, 0 /* O */,
45 6 /* P */, 0 /* Q */, 0 /* R */, 4 /* S */,
46 0 /* T */, 0 /* U */, 0 /* V */, 0 /* W */,
47 8 /* X */, 0 /* Y */, 0 /* Z */, 0 /* [ */,
48 0 /* \ */, 0 /* ] */, 0 /* ^ */, 0 /* _ */,
49 0 /* ` */, 0 /* a */, 0 /* b */, 0 /* c */,
50 0 /* d */, 0 /* e */, 0 /* f */, 0 /* g */,
51 0 /* h */, 0 /* i */, 0 /* j */, 0 /* k */,
52 0 /* l */, 0 /* m */, 0 /* n */, 0 /* o */,
53 0 /* p */, 0 /* q */, 0 /* r */, 0 /* s */,
54 0 /* t */, 0 /* u */, 0 /* v */, 0 /* w */,
55 0 /* x */, 0 /* y */, 0 /* z */, 0 /* { */,
56 0 /* | */, 0 /* } */, 0 /* ~ */, 0 /* */
57 };
58
parasail_cigar_encode(uint32_t length,char op_letter)59 uint32_t parasail_cigar_encode(uint32_t length, char op_letter)
60 {
61 return (length << BAM_CIGAR_SHIFT) | (parasail_cigar_encoded_ops[(int)op_letter]);
62 }
63
parasail_cigar_encode_string(const char * cigar)64 parasail_cigar_t* parasail_cigar_encode_string(const char *cigar)
65 {
66 int sscanf_retcode = 0;
67 size_t offset = 0;
68 int chars_read = 0;
69 unsigned int len = 0;
70 char op = 'M';
71 int done = 0;
72 size_t string_length = 0;
73 size_t size = 0;
74 parasail_cigar_t *ret = NULL;
75
76 string_length = strlen(cigar);
77 size = sizeof(uint32_t)*string_length;
78 ret = malloc(sizeof(parasail_cigar_t));
79 ret->seq = malloc(size);
80 ret->len = 0;
81
82 while (!done) {
83 sscanf_retcode = sscanf(
84 &cigar[offset], "%u%c%n", &len, &op, &chars_read);
85 if (2 != sscanf_retcode) {
86 fprintf(stderr, "invalid CIGAR string\n");
87 parasail_cigar_free(ret);
88 return NULL;
89 }
90 offset += chars_read;
91 ret->len += 1;
92 if ((size_t)ret->len >= size) {
93 size *= 2;
94 ret->seq = realloc(ret->seq, size);
95 }
96 ret->seq[ret->len-1] = parasail_cigar_encode(len, op);
97 if (offset >= string_length) {
98 done = 1;
99 }
100 }
101
102 return ret;
103 }
104
parasail_cigar_decode_op(uint32_t cigar_int)105 char parasail_cigar_decode_op(uint32_t cigar_int) {
106 return (cigar_int & 0xfU) > 9 ? 'M': BAM_CIGAR_STR[cigar_int & 0xfU];
107 }
108
parasail_cigar_decode_len(uint32_t cigar_int)109 uint32_t parasail_cigar_decode_len(uint32_t cigar_int) {
110 return cigar_int >> BAM_CIGAR_SHIFT;
111 }
112
parasail_cigar_decode(parasail_cigar_t * cigar)113 char* parasail_cigar_decode(parasail_cigar_t *cigar)
114 {
115 #define SIZE 40
116 char *ret = NULL;
117 size_t retlen = 0;
118 size_t size = 0;
119 int i = 0;
120
121 /* initial allocation for 1 op and 3 number characters per cigar int */
122 size = sizeof(char)*cigar->len*4;
123 ret = malloc(size+1);
124 ret[0] = '\0';
125
126 for (i=0; i<cigar->len; ++i) {
127 char tmp[SIZE];
128 char op = parasail_cigar_decode_op(cigar->seq[i]);
129 uint32_t len = parasail_cigar_decode_len(cigar->seq[i]);
130 int snprintf_retcode = snprintf(tmp, SIZE, "%u%c", len, op);
131 if (snprintf_retcode >= SIZE) {
132 fprintf(stderr, "invalid CIGAR\n");
133 free(ret);
134 return NULL;
135 }
136 retlen += snprintf_retcode;
137 if (retlen >= size) {
138 size *= 2;
139 ret = realloc(ret, size+1);
140 }
141 strcat(ret, tmp);
142 }
143
144 return ret;
145 }
146
parasail_cigar_free(parasail_cigar_t * cigar)147 void parasail_cigar_free(parasail_cigar_t *cigar)
148 {
149 free(cigar->seq);
150 free(cigar);
151 }
152
153 #define CONCAT_(X, Y) X##Y
154 #define CONCAT(X, Y) CONCAT_(X, Y)
155 #define CONCAT3_(X, Y, Z) X##Y##Z
156 #define CONCAT3(X, Y, Z) CONCAT3_(X, Y, Z)
157 #define LOC_NOVEC int64_t loc = i*lenb + j;
158 #define LOC_STRIPED int64_t loc = j*segLen*segWidth + (i%segLen)*segWidth + (i/segLen);
159
160 #define T 8
161 #include "cigar_template.c"
162 #undef T
163
164 #define T 8
165 #define STRIPED
166 #include "cigar_template.c"
167 #undef T
168 #undef STRIPED
169
170 #define T 16
171 #include "cigar_template.c"
172 #undef T
173
174 #define T 16
175 #define STRIPED
176 #include "cigar_template.c"
177 #undef T
178 #undef STRIPED
179
180 #define T 32
181 #include "cigar_template.c"
182 #undef T
183
184 #define T 32
185 #define STRIPED
186 #include "cigar_template.c"
187 #undef T
188 #undef STRIPED
189
190 #define T 64
191 #include "cigar_template.c"
192 #undef T
193
194 #define T 64
195 #define STRIPED
196 #include "cigar_template.c"
197 #undef T
198 #undef STRIPED
199
parasail_result_get_cigar_extra(parasail_result_t * result,const char * seqA,int lena,const char * seqB,int lenb,const parasail_matrix_t * matrix,int case_sensitive,const char * alphabet_aliases)200 parasail_cigar_t* parasail_result_get_cigar_extra(
201 parasail_result_t *result,
202 const char *seqA,
203 int lena,
204 const char *seqB,
205 int lenb,
206 const parasail_matrix_t *matrix,
207 int case_sensitive,
208 const char *alphabet_aliases)
209 {
210 assert(parasail_result_is_trace(result));
211
212 if (result->flag & PARASAIL_FLAG_STRIPED || result->flag & PARASAIL_FLAG_SCAN) {
213 if (result->flag & PARASAIL_FLAG_BITS_8) {
214 return parasail_cigar_striped_8(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
215 }
216 else if (result->flag & PARASAIL_FLAG_BITS_16) {
217 return parasail_cigar_striped_16(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
218 }
219 else if (result->flag & PARASAIL_FLAG_BITS_32) {
220 return parasail_cigar_striped_32(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
221 }
222 else if (result->flag & PARASAIL_FLAG_BITS_64) {
223 return parasail_cigar_striped_64(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
224 }
225 }
226 else {
227 return parasail_cigar_8(seqA, lena, seqB, lenb, matrix, result, case_sensitive, alphabet_aliases);
228 }
229
230 /* should not get here, but to silence warnings */
231 return NULL;
232 }
233
parasail_result_get_cigar(parasail_result_t * result,const char * seqA,int lena,const char * seqB,int lenb,const parasail_matrix_t * matrix)234 parasail_cigar_t* parasail_result_get_cigar(
235 parasail_result_t *result,
236 const char *seqA,
237 int lena,
238 const char *seqB,
239 int lenb,
240 const parasail_matrix_t *matrix)
241 {
242 return parasail_result_get_cigar_extra(result, seqA, lena, seqB, lenb, matrix, 0, NULL);
243 }
244
245