1 /**
2  * @file
3  *
4  * @author jeffrey.daily@gmail.com
5  *
6  * Copyright (c) 2015 Battelle Memorial Institute.
7  */
8 #include "config.h"
9 
10 #include <immintrin.h>
11 
12 #include "parasail/memory.h"
13 #include "parasail/internal_avx.h"
14 
parasail_memalign___m256i(size_t alignment,size_t size)15 __m256i * parasail_memalign___m256i(size_t alignment, size_t size)
16 {
17     return (__m256i *) parasail_memalign(alignment, size*sizeof(__m256i));
18 }
19 
parasail_memset___m256i(__m256i * b,__m256i c,size_t len)20 void parasail_memset___m256i(__m256i *b, __m256i c, size_t len)
21 {
22     size_t i;
23     for (i=0; i<len; ++i) {
24         _mm256_store_si256(&b[i], c);
25     }
26 }
27 
parasail_profile_create_avx_256_8(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)28 parasail_profile_t * parasail_profile_create_avx_256_8(
29         const char * const restrict s1, const int s1Len,
30         const parasail_matrix_t *matrix)
31 {
32     int32_t i = 0;
33     int32_t j = 0;
34     int32_t k = 0;
35     int32_t segNum = 0;
36     const int32_t n = matrix->size; /* number of amino acids in table */
37     const int32_t segWidth = 32; /* number of values in vector unit */
38     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
39     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
40     int32_t index = 0;
41 
42     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
43 
44     for (k=0; k<n; ++k) {
45         for (i=0; i<segLen; ++i) {
46             __m256i_8_t t;
47             j = i;
48             for (segNum=0; segNum<segWidth; ++segNum) {
49                 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
50                 j += segLen;
51             }
52             _mm256_store_si256(&vProfile[index], t.m);
53             ++index;
54         }
55     }
56 
57     profile->profile8.score = vProfile;
58     profile->free = &parasail_free___m256i;
59     return profile;
60 }
61 
parasail_profile_create_avx_256_16(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)62 parasail_profile_t * parasail_profile_create_avx_256_16(
63         const char * const restrict s1, const int s1Len,
64         const parasail_matrix_t *matrix)
65 {
66     int32_t i = 0;
67     int32_t j = 0;
68     int32_t k = 0;
69     int32_t segNum = 0;
70     const int32_t n = matrix->size; /* number of amino acids in table */
71     const int32_t segWidth = 16; /* number of values in vector unit */
72     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
73     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
74     int32_t index = 0;
75 
76     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
77 
78     for (k=0; k<n; ++k) {
79         for (i=0; i<segLen; ++i) {
80             __m256i_16_t t;
81             j = i;
82             for (segNum=0; segNum<segWidth; ++segNum) {
83                 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
84                 j += segLen;
85             }
86             _mm256_store_si256(&vProfile[index], t.m);
87             ++index;
88         }
89     }
90 
91     profile->profile16.score = vProfile;
92     profile->free = &parasail_free___m256i;
93     return profile;
94 }
95 
parasail_profile_create_avx_256_32(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)96 parasail_profile_t * parasail_profile_create_avx_256_32(
97         const char * const restrict s1, const int s1Len,
98         const parasail_matrix_t *matrix)
99 {
100     int32_t i = 0;
101     int32_t j = 0;
102     int32_t k = 0;
103     int32_t segNum = 0;
104     const int32_t n = matrix->size; /* number of amino acids in table */
105     const int32_t segWidth = 8; /* number of values in vector unit */
106     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
107     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
108     int32_t index = 0;
109 
110     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
111 
112     for (k=0; k<n; ++k) {
113         for (i=0; i<segLen; ++i) {
114             __m256i_32_t t;
115             j = i;
116             for (segNum=0; segNum<segWidth; ++segNum) {
117                 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
118                 j += segLen;
119             }
120             _mm256_store_si256(&vProfile[index], t.m);
121             ++index;
122         }
123     }
124 
125     profile->profile32.score = vProfile;
126     profile->free = &parasail_free___m256i;
127     return profile;
128 }
129 
parasail_profile_create_avx_256_64(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)130 parasail_profile_t * parasail_profile_create_avx_256_64(
131         const char * const restrict s1, const int s1Len,
132         const parasail_matrix_t *matrix)
133 {
134     int32_t i = 0;
135     int32_t j = 0;
136     int32_t k = 0;
137     int32_t segNum = 0;
138     const int32_t n = matrix->size; /* number of amino acids in table */
139     const int32_t segWidth = 4; /* number of values in vector unit */
140     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
141     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
142     int32_t index = 0;
143 
144     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
145 
146     for (k=0; k<n; ++k) {
147         for (i=0; i<segLen; ++i) {
148             __m256i_64_t t;
149             j = i;
150             for (segNum=0; segNum<segWidth; ++segNum) {
151                 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
152                 j += segLen;
153             }
154             _mm256_store_si256(&vProfile[index], t.m);
155             ++index;
156         }
157     }
158 
159     profile->profile64.score = vProfile;
160     profile->free = &parasail_free___m256i;
161     return profile;
162 }
163 
parasail_profile_create_avx_256_sat(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)164 parasail_profile_t* parasail_profile_create_avx_256_sat(
165         const char * const restrict s1, const int s1Len,
166         const parasail_matrix_t *matrix)
167 {
168     parasail_profile_t *profile8 = parasail_profile_create_avx_256_8(s1, s1Len, matrix);
169     parasail_profile_t *profile16 = parasail_profile_create_avx_256_16(s1, s1Len, matrix);
170     parasail_profile_t *profile32 = parasail_profile_create_avx_256_32(s1, s1Len, matrix);
171     profile8->profile16 = profile16->profile16;
172     profile8->profile32 = profile32->profile32;
173     free(profile16);
174     free(profile32);
175 
176     return profile8;
177 }
178 
parasail_profile_create_stats_avx_256_8(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)179 parasail_profile_t * parasail_profile_create_stats_avx_256_8(
180         const char * const restrict s1, const int s1Len,
181         const parasail_matrix_t *matrix)
182 {
183     int32_t i = 0;
184     int32_t j = 0;
185     int32_t k = 0;
186     int32_t segNum = 0;
187     const int32_t n = matrix->size; /* number of amino acids in table */
188     const int32_t segWidth = 32; /* number of values in vector unit */
189     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
190     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
191     __m256i* const restrict vProfileM = parasail_memalign___m256i(32, n * segLen);
192     __m256i* const restrict vProfileS = parasail_memalign___m256i(32, n * segLen);
193     int32_t index = 0;
194 
195     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
196 
197     for (k=0; k<n; ++k) {
198         for (i=0; i<segLen; ++i) {
199             __m256i_8_t p;
200             __m256i_8_t m;
201             __m256i_8_t s;
202             j = i;
203             for (segNum=0; segNum<segWidth; ++segNum) {
204                 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
205                 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
206                 s.v[segNum] = p.v[segNum] > 0;
207                 j += segLen;
208             }
209             _mm256_store_si256(&vProfile[index], p.m);
210             _mm256_store_si256(&vProfileM[index], m.m);
211             _mm256_store_si256(&vProfileS[index], s.m);
212             ++index;
213         }
214     }
215 
216     profile->profile8.score = vProfile;
217     profile->profile8.matches = vProfileM;
218     profile->profile8.similar = vProfileS;
219     profile->free = &parasail_free___m256i;
220     return profile;
221 }
222 
parasail_profile_create_stats_avx_256_16(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)223 parasail_profile_t * parasail_profile_create_stats_avx_256_16(
224         const char * const restrict s1, const int s1Len,
225         const parasail_matrix_t *matrix)
226 {
227     int32_t i = 0;
228     int32_t j = 0;
229     int32_t k = 0;
230     int32_t segNum = 0;
231     const int32_t n = matrix->size; /* number of amino acids in table */
232     const int32_t segWidth = 16; /* number of values in vector unit */
233     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
234     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
235     __m256i* const restrict vProfileM = parasail_memalign___m256i(32, n * segLen);
236     __m256i* const restrict vProfileS = parasail_memalign___m256i(32, n * segLen);
237     int32_t index = 0;
238 
239     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
240 
241     for (k=0; k<n; ++k) {
242         for (i=0; i<segLen; ++i) {
243             __m256i_16_t p;
244             __m256i_16_t m;
245             __m256i_16_t s;
246             j = i;
247             for (segNum=0; segNum<segWidth; ++segNum) {
248                 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
249                 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
250                 s.v[segNum] = p.v[segNum] > 0;
251                 j += segLen;
252             }
253             _mm256_store_si256(&vProfile[index], p.m);
254             _mm256_store_si256(&vProfileM[index], m.m);
255             _mm256_store_si256(&vProfileS[index], s.m);
256             ++index;
257         }
258     }
259 
260     profile->profile16.score = vProfile;
261     profile->profile16.matches = vProfileM;
262     profile->profile16.similar = vProfileS;
263     profile->free = &parasail_free___m256i;
264     return profile;
265 }
266 
parasail_profile_create_stats_avx_256_32(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)267 parasail_profile_t * parasail_profile_create_stats_avx_256_32(
268         const char * const restrict s1, const int s1Len,
269         const parasail_matrix_t *matrix)
270 {
271     int32_t i = 0;
272     int32_t j = 0;
273     int32_t k = 0;
274     int32_t segNum = 0;
275     const int32_t n = matrix->size; /* number of amino acids in table */
276     const int32_t segWidth = 8; /* number of values in vector unit */
277     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
278     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
279     __m256i* const restrict vProfileM = parasail_memalign___m256i(32, n * segLen);
280     __m256i* const restrict vProfileS = parasail_memalign___m256i(32, n * segLen);
281     int32_t index = 0;
282 
283     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
284 
285     for (k=0; k<n; ++k) {
286         for (i=0; i<segLen; ++i) {
287             __m256i_32_t p;
288             __m256i_32_t m;
289             __m256i_32_t s;
290             j = i;
291             for (segNum=0; segNum<segWidth; ++segNum) {
292                 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
293                 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
294                 s.v[segNum] = p.v[segNum] > 0;
295                 j += segLen;
296             }
297             _mm256_store_si256(&vProfile[index], p.m);
298             _mm256_store_si256(&vProfileM[index], m.m);
299             _mm256_store_si256(&vProfileS[index], s.m);
300             ++index;
301         }
302     }
303 
304     profile->profile32.score = vProfile;
305     profile->profile32.matches = vProfileM;
306     profile->profile32.similar = vProfileS;
307     profile->free = &parasail_free___m256i;
308     return profile;
309 }
310 
parasail_profile_create_stats_avx_256_64(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)311 parasail_profile_t * parasail_profile_create_stats_avx_256_64(
312         const char * const restrict s1, const int s1Len,
313         const parasail_matrix_t *matrix)
314 {
315     int32_t i = 0;
316     int32_t j = 0;
317     int32_t k = 0;
318     int32_t segNum = 0;
319     const int32_t n = matrix->size; /* number of amino acids in table */
320     const int32_t segWidth = 4; /* number of values in vector unit */
321     const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
322     __m256i* const restrict vProfile = parasail_memalign___m256i(32, n * segLen);
323     __m256i* const restrict vProfileM = parasail_memalign___m256i(32, n * segLen);
324     __m256i* const restrict vProfileS = parasail_memalign___m256i(32, n * segLen);
325     int32_t index = 0;
326 
327     parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
328 
329     for (k=0; k<n; ++k) {
330         for (i=0; i<segLen; ++i) {
331             __m256i_64_t p;
332             __m256i_64_t m;
333             __m256i_64_t s;
334             j = i;
335             for (segNum=0; segNum<segWidth; ++segNum) {
336                 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
337                 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
338                 s.v[segNum] = p.v[segNum] > 0;
339                 j += segLen;
340             }
341             _mm256_store_si256(&vProfile[index], p.m);
342             _mm256_store_si256(&vProfileM[index], m.m);
343             _mm256_store_si256(&vProfileS[index], s.m);
344             ++index;
345         }
346     }
347 
348     profile->profile64.score = vProfile;
349     profile->profile64.matches = vProfileM;
350     profile->profile64.similar = vProfileS;
351     profile->free = &parasail_free___m256i;
352     return profile;
353 }
354 
parasail_profile_create_stats_avx_256_sat(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)355 parasail_profile_t* parasail_profile_create_stats_avx_256_sat(
356         const char * const restrict s1, const int s1Len,
357         const parasail_matrix_t *matrix)
358 {
359     parasail_profile_t *profile8 = parasail_profile_create_stats_avx_256_8(s1, s1Len, matrix);
360     parasail_profile_t *profile16 = parasail_profile_create_stats_avx_256_16(s1, s1Len, matrix);
361     parasail_profile_t *profile32 = parasail_profile_create_stats_avx_256_32(s1, s1Len, matrix);
362     profile8->profile16 = profile16->profile16;
363     profile8->profile32 = profile32->profile32;
364     free(profile16);
365     free(profile32);
366 
367     return profile8;
368 }
369 
parasail_free___m256i(void * ptr)370 void parasail_free___m256i(void *ptr)
371 {
372     parasail_free((__m256i*)ptr);
373 }
374 
375