1 /* Vectorized routines for x86 Advanced Vector Extensions (AVX)
2  *
3  * Most speed-critical code is in the .h file, to facilitate inlining.
4  *
5  * Contents:
6  *    1. Debugging/development routines
7  *    2. Benchmark
8  *    3. Unit tests
9  *    4. Test driver
10  *
11  * This code is conditionally compiled, only when <eslENABLE_AVX> was
12  * set in <esl_config.h> by the configure script, and that will only
13  * happen on x86 platforms. When <eslENABLE_AVX> is not set, we
14  * include some dummy code to silence compiler and ranlib warnings
15  * about empty translation units and no symbols, and dummy drivers
16  * that do nothing but declare success.
17  */
18 #include "esl_config.h"
19 #ifdef eslENABLE_AVX
20 
21 #include <stdio.h>
22 #include <x86intrin.h>
23 
24 #include "easel.h"
25 #include "esl_avx.h"
26 
27 
28 /*****************************************************************
29  * 1. Debugging/development routines
30  *****************************************************************/
31 
32 void
esl_avx_dump_256i_hex4(__m256i v)33 esl_avx_dump_256i_hex4(__m256i v)
34 {
35   uint64_t *val = (uint64_t *) &v;
36   printf("%016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
37 	 val[3], val[2], val[1], val[0]);
38 }
39 
40 
41 /*****************************************************************
42  * 2. Benchmark
43  *****************************************************************/
44 #ifdef eslAVX_BENCHMARK
45 
46 #include "esl_config.h"
47 
48 #include <stdio.h>
49 #include <string.h>
50 #include <math.h>
51 
52 #include "easel.h"
53 #include "esl_avx.h"
54 #include "esl_getopts.h"
55 #include "esl_random.h"
56 #include "esl_stopwatch.h"
57 
58 static ESL_OPTIONS options[] = {
59   /* name           type       default  env  range toggles reqs incomp  help                                       docgroup*/
60   { "-h",     eslARG_NONE,      FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
61   { "-s",     eslARG_INT,         "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
62   { "-N",     eslARG_INT, "200000000",  NULL, NULL,  NULL,  NULL, NULL, "number of trials",                                 0 },
63   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
64 };
65 static char usage[]  = "[-options] <esl_avx_* function suffix: e.g. hmax_epu8>";
66 static char banner[] = "benchmark driver for avx module";
67 
68 int
main(int argc,char ** argv)69 main(int argc, char **argv)
70 {
71   union { __m256i v; uint32_t x[8]; } u;
72   ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
73   ESL_RANDOMNESS *rng     = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
74   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
75   char           *fname   = esl_opt_GetArg(go, 1);
76   int             N       = esl_opt_GetInteger(go, "-N");
77   __m256i        *v;
78   int             i,z;
79 
80   /* A bunch of vectors full of random numbers. Takes ~10-20s to generate the data */
81   v = malloc(sizeof(__m256i) * N);
82   for (i = 0; i < N; i++)
83     {
84       for (z = 0; z < 8; z++) u.x[z] = esl_random_uint32(rng);
85       v[i] = u.v;
86     }
87 
88   esl_stopwatch_Start(w);
89   if (strcmp(fname, "hmax_epi8") == 0)
90     {
91       int8_t   r_i8, max_i8;
92       max_i8 = -128;
93       for (i = 0; i < N; i++)
94         { r_i8   = esl_avx_hmax_epi8(v[i]); max_i8 = ESL_MAX(max_i8, r_i8); }
95       printf("max_i8 = %" PRId8 "\n", max_i8);
96     }
97   else if (strcmp(fname, "hmax_epu8") == 0)
98     {
99       uint8_t   r_u8, max_u8;
100       max_u8 =  0;
101       for (i = 0; i < N; i++)
102         { r_u8   = esl_avx_hmax_epu8(v[i]); max_u8 = ESL_MAX(max_u8, r_u8); }
103       printf("max_u8 = %" PRIu8 "\n", max_u8);
104     }
105   else if (strcmp(fname, "hmax_epi16") == 0)
106     {
107       int16_t r_i16, max_i16;
108       max_i16 = -32768;
109       for (i = 0; i < N; i++)
110         { r_i16  = esl_avx_hmax_epi16(v[i]); max_i16 = ESL_MAX(max_i16, r_i16); }
111       printf("max_i16 = %" PRIi16 "\n", max_i16);
112     }
113   else
114     esl_fatal("No such esl_avx_* function %s\n", fname);
115 
116   esl_stopwatch_Stop(w);
117   printf("# %s", fname);
118   esl_stopwatch_Display(stdout, w, " CPU time: ");
119 
120   esl_randomness_Destroy(rng);
121   esl_stopwatch_Destroy(w);
122   esl_getopts_Destroy(go);
123   return 0;
124 }
125 #endif /*eslAVX_BENCHMARK*/
126 
127 
128 /*****************************************************************
129  * 3. Unit tests
130  *****************************************************************/
131 #ifdef eslAVX_TESTDRIVE
132 
133 #include "esl_random.h"
134 
135 static void
utest_hmax_epu8(ESL_RANDOMNESS * rng)136 utest_hmax_epu8(ESL_RANDOMNESS *rng)
137 {
138   union { __m256i v; uint8_t x[32]; } u;
139   uint8_t r1, r2;
140   int     i,z;
141 
142   for (i = 0; i < 100; i++)
143     {
144       r1 = 0;
145       for (z = 0; z < 32; z++)
146         {
147           u.x[z] = (uint8_t) (esl_rnd_Roll(rng, 256));  // 0..255
148           if (u.x[z] > r1) r1 = u.x[z];
149         }
150       r2 = esl_avx_hmax_epu8(u.v);
151       if (r1 != r2) esl_fatal("hmax_epu8 utest failed");
152     }
153 }
154 
155 static void
utest_hmax_epi8(ESL_RANDOMNESS * rng)156 utest_hmax_epi8(ESL_RANDOMNESS *rng)
157 {
158   union { __m256i v; int8_t x[32]; } u;
159   int8_t r1, r2;
160   int    i,z;
161 
162   for (i = 0; i < 100; i++)
163     {
164       r1 = 0;
165       for (z = 0; z < 32; z++)
166         {
167           u.x[z] = (int8_t) (esl_rnd_Roll(rng, 256) - 128);  // -128..127
168           if (u.x[z] > r1) r1 = u.x[z];
169         }
170       r2 = esl_avx_hmax_epi8(u.v);
171       if (r1 != r2) esl_fatal("hmax_epi8 utest failed");
172     }
173 }
174 
175 
176 static void
utest_hmax_epi16(ESL_RANDOMNESS * rng)177 utest_hmax_epi16(ESL_RANDOMNESS *rng)
178 {
179   union { __m256i v; int16_t x[16]; } u;
180   int16_t r1, r2;
181   int     i,z;
182 
183   for (i = 0; i < 100; i++)
184     {
185       r1 = -32768;
186       for (z = 0; z < 16; z++)
187         {
188           u.x[z] = (int16_t) (esl_rnd_Roll(rng, 65536) - 32768);  // -32768..32767
189           if (u.x[z] > r1) r1 = u.x[z];
190         }
191       r2 = esl_avx_hmax_epi16(u.v);
192       if (r1 != r2) esl_fatal("hmax_epi16 utest failed %d %d", r1, r2);
193     }
194 }
195 
196 #endif /*eslAVX_TESTDRIVE*/
197 
198 /*****************************************************************
199  * 4. Test driver
200  *****************************************************************/
201 
202 #ifdef eslAVX_TESTDRIVE
203 #include "esl_config.h"
204 
205 #include <stdio.h>
206 #include <math.h>
207 
208 #include "easel.h"
209 #include "esl_getopts.h"
210 #include "esl_random.h"
211 #include "esl_avx.h"
212 
213 static ESL_OPTIONS options[] = {
214   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
215   { "-h",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
216   { "-s",        eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
217   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
218 };
219 static char usage[]  = "[-options]";
220 static char banner[] = "test driver for avx512 module";
221 
222 int
main(int argc,char ** argv)223 main(int argc, char **argv)
224 {
225   ESL_GETOPTS    *go  = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
226   ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));;
227 
228   fprintf(stderr, "## %s\n", argv[0]);
229   fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));
230 
231   utest_hmax_epu8(rng);
232   utest_hmax_epi8(rng);
233   utest_hmax_epi16(rng);
234 
235   fprintf(stderr, "#  status = ok\n");
236 
237   esl_randomness_Destroy(rng);
238   esl_getopts_Destroy(go);
239   return 0;
240 }
241 
242 #endif /*eslAVX_TESTDRIVE*/
243 
244 #else  // !eslENABLE_AVX
245 #include <stdio.h>
esl_avx_silence_hack(void)246 void esl_avx_silence_hack(void) { return; }
247 #if defined eslAVX_TESTDRIVE || eslAVX_EXAMPLE || eslAVX_BENCHMARK
main(void)248 int main(void) { fprintf(stderr, "# AVX support not compiled.\n"); return 0; }
249 #endif
250 #endif // eslENABLE_AVX
251 
252