1 /**
2 * @file
3 *
4 * @author jeffrey.daily@gmail.com
5 *
6 * Copyright (c) 2015 Battelle Memorial Institute.
7 */
8 #include "config.h"
9
10 #include <emmintrin.h>
11
12 #include "parasail/memory.h"
13 #include "parasail/internal_sse.h"
14
parasail_memalign___m128i(size_t alignment,size_t size)15 __m128i * parasail_memalign___m128i(size_t alignment, size_t size)
16 {
17 return (__m128i *) parasail_memalign(alignment, size*sizeof(__m128i));
18 }
19
parasail_memset___m128i(__m128i * b,__m128i c,size_t len)20 void parasail_memset___m128i(__m128i *b, __m128i c, size_t len)
21 {
22 size_t i;
23 for (i=0; i<len; ++i) {
24 _mm_store_si128(&b[i], c);
25 }
26 }
27
parasail_profile_create_sse_128_8(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)28 parasail_profile_t * parasail_profile_create_sse_128_8(
29 const char * const restrict s1, const int s1Len,
30 const parasail_matrix_t *matrix)
31 {
32 int32_t i = 0;
33 int32_t j = 0;
34 int32_t k = 0;
35 int32_t segNum = 0;
36 const int32_t n = matrix->size; /* number of amino acids in table */
37 const int32_t segWidth = 16; /* number of values in vector unit */
38 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
39 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
40 int32_t index = 0;
41
42 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
43
44 for (k=0; k<n; ++k) {
45 for (i=0; i<segLen; ++i) {
46 __m128i_8_t t;
47 j = i;
48 for (segNum=0; segNum<segWidth; ++segNum) {
49 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
50 j += segLen;
51 }
52 _mm_store_si128(&vProfile[index], t.m);
53 ++index;
54 }
55 }
56
57 profile->profile8.score = vProfile;
58 profile->free = ¶sail_free___m128i;
59 return profile;
60 }
61
parasail_profile_create_sse_128_16(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)62 parasail_profile_t * parasail_profile_create_sse_128_16(
63 const char * const restrict s1, const int s1Len,
64 const parasail_matrix_t *matrix)
65 {
66 int32_t i = 0;
67 int32_t j = 0;
68 int32_t k = 0;
69 int32_t segNum = 0;
70 const int32_t n = matrix->size; /* number of amino acids in table */
71 const int32_t segWidth = 8; /* number of values in vector unit */
72 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
73 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
74 int32_t index = 0;
75
76 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
77
78 for (k=0; k<n; ++k) {
79 for (i=0; i<segLen; ++i) {
80 __m128i_16_t t;
81 j = i;
82 for (segNum=0; segNum<segWidth; ++segNum) {
83 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
84 j += segLen;
85 }
86 _mm_store_si128(&vProfile[index], t.m);
87 ++index;
88 }
89 }
90
91 profile->profile16.score = vProfile;
92 profile->free = ¶sail_free___m128i;
93 return profile;
94 }
95
parasail_profile_create_sse_128_32(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)96 parasail_profile_t * parasail_profile_create_sse_128_32(
97 const char * const restrict s1, const int s1Len,
98 const parasail_matrix_t *matrix)
99 {
100 int32_t i = 0;
101 int32_t j = 0;
102 int32_t k = 0;
103 int32_t segNum = 0;
104 const int32_t n = matrix->size; /* number of amino acids in table */
105 const int32_t segWidth = 4; /* number of values in vector unit */
106 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
107 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
108 int32_t index = 0;
109
110 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
111
112 for (k=0; k<n; ++k) {
113 for (i=0; i<segLen; ++i) {
114 __m128i_32_t t;
115 j = i;
116 for (segNum=0; segNum<segWidth; ++segNum) {
117 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
118 j += segLen;
119 }
120 _mm_store_si128(&vProfile[index], t.m);
121 ++index;
122 }
123 }
124
125 profile->profile32.score = vProfile;
126 profile->free = ¶sail_free___m128i;
127 return profile;
128 }
129
parasail_profile_create_sse_128_64(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)130 parasail_profile_t * parasail_profile_create_sse_128_64(
131 const char * const restrict s1, const int s1Len,
132 const parasail_matrix_t *matrix)
133 {
134 int32_t i = 0;
135 int32_t j = 0;
136 int32_t k = 0;
137 int32_t segNum = 0;
138 const int32_t n = matrix->size; /* number of amino acids in table */
139 const int32_t segWidth = 2; /* number of values in vector unit */
140 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
141 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
142 int32_t index = 0;
143
144 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
145
146 for (k=0; k<n; ++k) {
147 for (i=0; i<segLen; ++i) {
148 __m128i_64_t t;
149 j = i;
150 for (segNum=0; segNum<segWidth; ++segNum) {
151 t.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
152 j += segLen;
153 }
154 _mm_store_si128(&vProfile[index], t.m);
155 ++index;
156 }
157 }
158
159 profile->profile64.score = vProfile;
160 profile->free = ¶sail_free___m128i;
161 return profile;
162 }
163
parasail_profile_create_sse_128_sat(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)164 parasail_profile_t* parasail_profile_create_sse_128_sat(
165 const char * const restrict s1, const int s1Len,
166 const parasail_matrix_t *matrix)
167 {
168 parasail_profile_t *profile8 = parasail_profile_create_sse_128_8(s1, s1Len, matrix);
169 parasail_profile_t *profile16 = parasail_profile_create_sse_128_16(s1, s1Len, matrix);
170 parasail_profile_t *profile32 = parasail_profile_create_sse_128_32(s1, s1Len, matrix);
171 profile8->profile16 = profile16->profile16;
172 profile8->profile32 = profile32->profile32;
173 free(profile16);
174 free(profile32);
175
176 return profile8;
177 }
178
parasail_profile_create_stats_sse_128_8(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)179 parasail_profile_t * parasail_profile_create_stats_sse_128_8(
180 const char * const restrict s1, const int s1Len,
181 const parasail_matrix_t *matrix)
182 {
183 int32_t i = 0;
184 int32_t j = 0;
185 int32_t k = 0;
186 int32_t segNum = 0;
187 const int32_t n = matrix->size; /* number of amino acids in table */
188 const int32_t segWidth = 16; /* number of values in vector unit */
189 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
190 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
191 __m128i* const restrict vProfileM = parasail_memalign___m128i(16, n * segLen);
192 __m128i* const restrict vProfileS = parasail_memalign___m128i(16, n * segLen);
193 int32_t index = 0;
194
195 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
196
197 for (k=0; k<n; ++k) {
198 for (i=0; i<segLen; ++i) {
199 __m128i_8_t p;
200 __m128i_8_t m;
201 __m128i_8_t s;
202 j = i;
203 for (segNum=0; segNum<segWidth; ++segNum) {
204 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
205 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
206 s.v[segNum] = p.v[segNum] > 0;
207 j += segLen;
208 }
209 _mm_store_si128(&vProfile[index], p.m);
210 _mm_store_si128(&vProfileM[index], m.m);
211 _mm_store_si128(&vProfileS[index], s.m);
212 ++index;
213 }
214 }
215
216 profile->profile8.score = vProfile;
217 profile->profile8.matches = vProfileM;
218 profile->profile8.similar = vProfileS;
219 profile->free = ¶sail_free___m128i;
220 return profile;
221 }
222
parasail_profile_create_stats_sse_128_16(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)223 parasail_profile_t * parasail_profile_create_stats_sse_128_16(
224 const char * const restrict s1, const int s1Len,
225 const parasail_matrix_t *matrix)
226 {
227 int32_t i = 0;
228 int32_t j = 0;
229 int32_t k = 0;
230 int32_t segNum = 0;
231 const int32_t n = matrix->size; /* number of amino acids in table */
232 const int32_t segWidth = 8; /* number of values in vector unit */
233 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
234 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
235 __m128i* const restrict vProfileM = parasail_memalign___m128i(16, n * segLen);
236 __m128i* const restrict vProfileS = parasail_memalign___m128i(16, n * segLen);
237 int32_t index = 0;
238
239 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
240
241 for (k=0; k<n; ++k) {
242 for (i=0; i<segLen; ++i) {
243 __m128i_16_t p;
244 __m128i_16_t m;
245 __m128i_16_t s;
246 j = i;
247 for (segNum=0; segNum<segWidth; ++segNum) {
248 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
249 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
250 s.v[segNum] = p.v[segNum] > 0;
251 j += segLen;
252 }
253 _mm_store_si128(&vProfile[index], p.m);
254 _mm_store_si128(&vProfileM[index], m.m);
255 _mm_store_si128(&vProfileS[index], s.m);
256 ++index;
257 }
258 }
259
260 profile->profile16.score = vProfile;
261 profile->profile16.matches = vProfileM;
262 profile->profile16.similar = vProfileS;
263 profile->free = ¶sail_free___m128i;
264 return profile;
265 }
266
parasail_profile_create_stats_sse_128_32(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)267 parasail_profile_t * parasail_profile_create_stats_sse_128_32(
268 const char * const restrict s1, const int s1Len,
269 const parasail_matrix_t *matrix)
270 {
271 int32_t i = 0;
272 int32_t j = 0;
273 int32_t k = 0;
274 int32_t segNum = 0;
275 const int32_t n = matrix->size; /* number of amino acids in table */
276 const int32_t segWidth = 4; /* number of values in vector unit */
277 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
278 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
279 __m128i* const restrict vProfileM = parasail_memalign___m128i(16, n * segLen);
280 __m128i* const restrict vProfileS = parasail_memalign___m128i(16, n * segLen);
281 int32_t index = 0;
282
283 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
284
285 for (k=0; k<n; ++k) {
286 for (i=0; i<segLen; ++i) {
287 __m128i_32_t p;
288 __m128i_32_t m;
289 __m128i_32_t s;
290 j = i;
291 for (segNum=0; segNum<segWidth; ++segNum) {
292 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
293 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
294 s.v[segNum] = p.v[segNum] > 0;
295 j += segLen;
296 }
297 _mm_store_si128(&vProfile[index], p.m);
298 _mm_store_si128(&vProfileM[index], m.m);
299 _mm_store_si128(&vProfileS[index], s.m);
300 ++index;
301 }
302 }
303
304 profile->profile32.score = vProfile;
305 profile->profile32.matches = vProfileM;
306 profile->profile32.similar = vProfileS;
307 profile->free = ¶sail_free___m128i;
308 return profile;
309 }
310
parasail_profile_create_stats_sse_128_64(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)311 parasail_profile_t * parasail_profile_create_stats_sse_128_64(
312 const char * const restrict s1, const int s1Len,
313 const parasail_matrix_t *matrix)
314 {
315 int32_t i = 0;
316 int32_t j = 0;
317 int32_t k = 0;
318 int32_t segNum = 0;
319 const int32_t n = matrix->size; /* number of amino acids in table */
320 const int32_t segWidth = 2; /* number of values in vector unit */
321 const int32_t segLen = (s1Len + segWidth - 1) / segWidth;
322 __m128i* const restrict vProfile = parasail_memalign___m128i(16, n * segLen);
323 __m128i* const restrict vProfileM = parasail_memalign___m128i(16, n * segLen);
324 __m128i* const restrict vProfileS = parasail_memalign___m128i(16, n * segLen);
325 int32_t index = 0;
326
327 parasail_profile_t *profile = parasail_profile_new(s1, s1Len, matrix);
328
329 for (k=0; k<n; ++k) {
330 for (i=0; i<segLen; ++i) {
331 __m128i_64_t p;
332 __m128i_64_t m;
333 __m128i_64_t s;
334 j = i;
335 for (segNum=0; segNum<segWidth; ++segNum) {
336 p.v[segNum] = j >= s1Len ? 0 : matrix->matrix[n*k+matrix->mapper[(unsigned char)s1[j]]];
337 m.v[segNum] = j >= s1Len ? 0 : (k == matrix->mapper[(unsigned char)s1[j]]);
338 s.v[segNum] = p.v[segNum] > 0;
339 j += segLen;
340 }
341 _mm_store_si128(&vProfile[index], p.m);
342 _mm_store_si128(&vProfileM[index], m.m);
343 _mm_store_si128(&vProfileS[index], s.m);
344 ++index;
345 }
346 }
347
348 profile->profile64.score = vProfile;
349 profile->profile64.matches = vProfileM;
350 profile->profile64.similar = vProfileS;
351 profile->free = ¶sail_free___m128i;
352 return profile;
353 }
354
parasail_profile_create_stats_sse_128_sat(const char * const restrict s1,const int s1Len,const parasail_matrix_t * matrix)355 parasail_profile_t* parasail_profile_create_stats_sse_128_sat(
356 const char * const restrict s1, const int s1Len,
357 const parasail_matrix_t *matrix)
358 {
359 parasail_profile_t *profile8 = parasail_profile_create_stats_sse_128_8(s1, s1Len, matrix);
360 parasail_profile_t *profile16 = parasail_profile_create_stats_sse_128_16(s1, s1Len, matrix);
361 parasail_profile_t *profile32 = parasail_profile_create_stats_sse_128_32(s1, s1Len, matrix);
362 profile8->profile16 = profile16->profile16;
363 profile8->profile32 = profile32->profile32;
364 free(profile16);
365 free(profile32);
366
367 return profile8;
368 }
369
parasail_free___m128i(void * ptr)370 void parasail_free___m128i(void *ptr)
371 {
372 parasail_free((__m128i*)ptr);
373 }
374
375