1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2019-2020 The University of Tennessee and The University
4  *                         of Tennessee Research Foundation.  All rights
5  *                         reserved.
6  * Copyright (c) 2020      Research Organization for Information Science
7  *                         and Technology (RIST).  All rights reserved.
8  * $COPYRIGHT$
9  *
10  * Additional copyrights may follow
11  *
12  * $HEADER$
13  */
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/time.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <unistd.h>
22 
23 #include "mpi.h"
24 #include "ompi/communicator/communicator.h"
25 #include "ompi/runtime/mpiruntime.h"
26 #include "ompi/datatype/ompi_datatype.h"
27 
28 typedef struct op_name_s {
29     char* name;
30     char* mpi_op_name;
31     MPI_Op op;
32 } op_name_t;
33 static op_name_t array_of_ops [] = {
34     { "max", "MPI_MAX", MPI_MAX },
35     { "min", "MPI_MIN", MPI_MIN },
36     { "sum", "MPI_SUM", MPI_SUM },
37     { "prod", "MPI_PROD", MPI_PROD },
38     { "land", "MPI_LAND", MPI_LAND },
39     { "band", "MPI_BAND", MPI_BAND },
40     { "lor", "MPI_LOR", MPI_LOR },
41     { "bor", "MPI_BOR", MPI_BOR },
42     { "lxor", "MPI_LXOR", MPI_LXOR },
43     { "bxor", "MPI_BXOR", MPI_BXOR },
44     { "replace", "MPI_REPLACE", MPI_REPLACE },
45     { NULL, "MPI_OP_NULL", MPI_OP_NULL }
46 };
47 static int do_ops[12] = { -1, };  /* index of the ops to do. Size +1 larger than the array_of_ops */
48 static int verbose = 0;
49 static int total_errors = 0;
50 
51 #define max(a,b) \
52    ({ __typeof__ (a) _a = (a); \
53        __typeof__ (b) _b = (b); \
54      _a > _b ? _a : _b; })
55 
56 #define min(a,b) \
57    ({ __typeof__ (a) _a = (a); \
58        __typeof__ (b) _b = (b); \
59      _a < _b ? _a : _b; })
60 
print_status(char * op,char * type,int type_size,int count,int max_shift,double * duration,int repeats,int correct)61 static void print_status(char* op, char* type, int type_size,
62                          int count, int max_shift, double *duration, int repeats,
63                          int correct )
64 {
65     if(correct) {
66         printf("%-10s %s %-10d%s ", op, type, type_size, (verbose ? " [\033[1;32msuccess\033[0m]" : ""));
67     } else {
68         printf("%-10s %s [\033[1;31mfail\033[0m]", op, type);
69         total_errors++;
70     }
71     if( 1 == max_shift ) {
72         printf(" count  %-10d  time (seconds) %.8f seconds\n", count, duration[0] / repeats);
73     } else {
74         printf(" count  %-10d  time (seconds / shifts) ", count);
75         for( int i = 0; i < max_shift; i++ ) {
76             printf("%.8f ", duration[i] / repeats );
77         }
78         printf("\n");
79     }
80 }
81 
82 static int do_ops_built = 0;
83 static int
build_do_ops(char * optarg,int * do_ops)84 build_do_ops( char* optarg, int* do_ops)
85 {
86     int i;
87     if( 0 == strcmp(optarg, "all") ) {
88         for( i = 0; NULL != array_of_ops[i].name; i++ ) {
89             do_ops[i] = i;
90         }
91         do_ops[i] = -1;  /* stop */
92     } else {
93         int n, idx = 0;
94         char* token, *arg = optarg;
95         while ((token = strsep(&arg, ",")) != NULL) {
96             for( i = 0; NULL != array_of_ops[i].name; i++ ) {  /* find the op */
97                 if( 0 == strcmp(array_of_ops[i].name, token) ) {
98                     /* check if the op was not already selected */
99                     for(n = 0; n < idx; n++ ) {
100                         if( i == do_ops[n] ) {
101                             break;
102                         }
103                     }
104                     if( n >= idx ) {
105                         do_ops[idx++] = i;
106                         do_ops[idx]   = -1;
107                     }
108                     break;
109                 }
110             }
111             if( NULL == array_of_ops[i].name ) {
112                 fprintf(stderr, "Unknown op %s. Ignored.\n", token);
113             }
114         }
115     }
116     do_ops_built = 1;
117     return 0;
118 }
119 
120 
121 #define MPI_OP_TEST(OPNAME, MPIOP, MPITYPE, TYPE, INBUF, INOUT_BUF, CHECK_BUF, COUNT, TYPE_PREFIX) \
122 do { \
123     const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \
124     TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \
125     skip_op_type = 0; \
126     for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \
127         duration[_k] = 0.0; \
128         for(int _r = repeats; _r > 0; _r--) { \
129             memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
130             tstart = MPI_Wtime(); \
131             MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \
132             tend = MPI_Wtime(); \
133             duration[_k] += (tend - tstart); \
134             if( check ) { \
135                 for( i = 0; i < (COUNT)-_k; i++ ) { \
136                     if(((_p2+_k)[i]) == (((_p1+_k)[i]) OPNAME ((_p3+_k)[i]))) \
137                         continue; \
138                     printf("First error at alignment %d position %d (%" TYPE_PREFIX " %s %" TYPE_PREFIX " != %" TYPE_PREFIX ")\n", \
139                            _k, i, (_p1+_k)[i], (#OPNAME), (_p3+_k)[i], (_p2+_k)[i]); \
140                     correctness = 0; \
141                     break; \
142                 } \
143             } \
144         } \
145     } \
146     goto check_and_continue; \
147 } while (0)
148 
149 #define MPI_OP_MINMAX_TEST(OPNAME, MPIOP, MPITYPE, TYPE, INBUF, INOUT_BUF, CHECK_BUF, COUNT, TYPE_PREFIX) \
150 do { \
151     const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \
152     TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \
153     skip_op_type = 0; \
154     for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \
155         duration[_k] = 0.0; \
156         for(int _r = repeats; _r > 0; _r--) { \
157             memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
158             tstart = MPI_Wtime(); \
159             MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \
160             tend = MPI_Wtime(); \
161             duration[_k] += (tend - tstart); \
162             if( check ) { \
163                 for( i = 0; i < (COUNT); i++ ) { \
164                     TYPE _v1 = *(_p1+_k), _v2 = *(_p2+_k), _v3 = *(_p3+_k); \
165                     if(_v2 == OPNAME(_v1, _v3)) \
166                         continue; \
167                     printf("First error at alignment %d position %d (%" TYPE_PREFIX " !=  %s(%" TYPE_PREFIX ", %" TYPE_PREFIX ")\n", \
168                            _k, i, _v1, (#OPNAME), _v3, _v2); \
169                     correctness = 0; \
170                     break; \
171                 } \
172             } \
173         } \
174     } \
175     goto check_and_continue; \
176 } while (0)
177 
main(int argc,char ** argv)178 int main(int argc, char **argv)
179 {
180     static void *in_buf = NULL, *inout_buf = NULL, *inout_check_buf = NULL;
181     int count, type_size = 8, rank, size, provided, correctness = 1;
182     int repeats = 1, i, c, op1_alignment = 0, res_alignment = 0;
183     int max_shift = 4;
184     double *duration, tstart, tend;
185     bool check = true;
186     char type[5] = "uifd", *op = "sum", *mpi_type;
187     int lower = 1, upper = 1000000, skip_op_type;
188     MPI_Op mpi_op;
189 
190     while( -1 != (c = getopt(argc, argv, "l:u:r:t:o:i:s:n:1:2:vfh")) ) {
191         switch(c) {
192         case 'l':
193             lower = atoi(optarg);
194             if( lower <= 0 ) {
195                 fprintf(stderr, "The lower number of elements must be positive\n");
196                 exit(-1);
197             }
198             break;
199         case 'u':
200             upper = atoi(optarg);
201             if( lower <= 0 ) {
202                 fprintf(stderr, "The upper number of elements must be positive\n");
203                 exit(-1);
204             }
205             break;
206         case 'i':
207             max_shift = atoi(optarg);
208             if( max_shift <= 0 ) {
209                 fprintf(stderr, "The max shift must be positive\n");
210                 exit(-1);
211             }
212             break;
213         case 'f':
214             check = false;
215             break;
216         case 'v':
217             verbose++;
218             break;
219         case 'r':
220             repeats = atoi(optarg);
221             if( repeats <= 0 ) {
222                 fprintf(stderr, "The number of repetitions (%d) must be positive\n", repeats);
223                 exit(-1);
224             }
225             break;
226         case 't':
227             for( i = 0; i < (int)strlen(optarg); i++ ) {
228                 if( ! (('i' == optarg[i]) || ('u' == optarg[i]) ||
229                        ('f' == optarg[i]) || ('d' == optarg[i])) ) {
230                     fprintf(stderr, "type must be i (signed int), u (unsigned int), f (float) or d (double)\n");
231                     exit(-1);
232                 }
233             }
234             strncpy(type, optarg, 4);
235             break;
236         case 'o':
237             build_do_ops( optarg, do_ops);
238             break;
239         case 's':
240             type_size = atoi(optarg);
241             if( ! ((8 == type_size) || (16 == type_size) || (32 == type_size) || (64 == type_size)) ) {
242                 fprintf(stderr, "type_size must be 8, 16, 32 or 64. %d is an invalid value\n",
243                         type_size);
244                 exit(-1);
245             }
246             break;
247         case '1':
248             op1_alignment = atoi(optarg);
249             if( op1_alignment < 0 ) {
250                 fprintf(stderr, "alignment for the first operand must be positive\n");
251                 exit(-1);
252             }
253             break;
254         case '2':
255             res_alignment = atoi(optarg);
256             if( res_alignment < 0 ) {
257                 fprintf(stderr, "alignment for the result must be positive\n");
258                 exit(-1);
259             }
260             break;
261         case 'h':
262             fprintf(stdout, "%s options are:\n"
263                     " -l <number> : lower number of elements\n"
264                     " -u <number> : upper number of elements\n"
265                     " -s <type_size> : 8, 16, 32 or 64 bits elements\n"
266                     " -t [i,u,f,d] : type of the elements to apply the operations on\n"
267                     " -r <number> : number of repetitions for each test\n"
268                     " -o <op> : comma separated list of operations to execute among\n"
269                     "           sum, min, max, prod, bor, bxor, band\n"
270                     " -i <number> : shift on all buffers to check alignment\n"
271                     " -1 <number> : (mis)alignment in elements for the first op\n"
272                     " -2 <number> : (mis)alignment in elements for the result\n"
273                     " -v: increase the verbosity level\n"
274                     " -h: this help message\n", argv[0]);
275             exit(0);
276         }
277     }
278 
279     if( !do_ops_built ) {  /* not yet done, take the default */
280             build_do_ops( "all", do_ops);
281     }
282     posix_memalign( &in_buf,          64, (upper + op1_alignment) * sizeof(double));
283     posix_memalign( &inout_buf,       64, (upper + res_alignment) * sizeof(double));
284     posix_memalign( &inout_check_buf, 64, upper * sizeof(double));
285     duration = (double*)malloc(max_shift * sizeof(double));
286 
287     ompi_mpi_init(argc, argv, MPI_THREAD_SERIALIZED, &provided, false);
288 
289     rank = ompi_comm_rank(MPI_COMM_WORLD); (void)rank;
290     size = ompi_comm_size(MPI_COMM_WORLD); (void)size;
291 
292     for(uint32_t type_idx = 0; type_idx < strlen(type); type_idx++ ) {
293         for(uint32_t op_idx = 0; do_ops[op_idx] >= 0; op_idx++ ) {
294             op     = array_of_ops[do_ops[op_idx]].name;
295             mpi_op = array_of_ops[do_ops[op_idx]].op;
296             skip_op_type = 1;
297 
298             for( count = lower; count <= upper; count += count ) {
299                 mpi_type = NULL;
300                 correctness = 1;
301                 if('i' == type[type_idx]) {
302                     if( 8 == type_size ) {
303                         int8_t *in_int8 = (int8_t*)((char*)in_buf + op1_alignment * sizeof(int8_t)),
304                             *inout_int8 = (int8_t*)((char*)inout_buf + res_alignment * sizeof(int8_t)),
305                             *inout_int8_for_check = (int8_t*)inout_check_buf;
306                         for( i = 0; i < count; i++ ) {
307                             in_int8[i] = 5;
308                             inout_int8[i] = inout_int8_for_check[i] = -3;
309                         }
310                         mpi_type = "MPI_INT8_T";
311 
312                         if( 0 == strcmp(op, "sum") ) {
313                             MPI_OP_TEST( +, mpi_op, MPI_INT8_T, int8_t,
314                                          in_int8, inout_int8, inout_int8_for_check,
315                                          count, PRId8);
316                         }
317                         if( 0 == strcmp(op, "bor") ) {
318                             MPI_OP_TEST( |, mpi_op, MPI_INT8_T, int8_t,
319                                          in_int8, inout_int8, inout_int8_for_check,
320                                          count, PRId8);
321                         }
322                         if( 0 == strcmp(op, "bxor") ) {
323                             MPI_OP_TEST( ^, mpi_op, MPI_INT8_T, int8_t,
324                                          in_int8, inout_int8, inout_int8_for_check,
325                                          count, PRId8);
326                         }
327                         if( 0 == strcmp(op, "prod") ) {
328                             MPI_OP_TEST( *, mpi_op, MPI_INT8_T, int8_t,
329                                          in_int8, inout_int8, inout_int8_for_check,
330                                          count, PRId8);
331                         }
332                         if( 0 == strcmp(op, "band") ) {
333                             MPI_OP_TEST( &, mpi_op, MPI_INT8_T, int8_t,
334                                          in_int8, inout_int8, inout_int8_for_check,
335                                          count, PRId8);
336                         }
337                         if( 0 == strcmp(op, "max") ) {
338                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_INT8_T, int8_t,
339                                                in_int8, inout_int8, inout_int8_for_check,
340                                                count, PRId8);
341                         }
342                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
343                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_INT8_T, int8_t,
344                                                in_int8, inout_int8, inout_int8_for_check,
345                                                count, PRId8);
346                         }
347                     }
348                     if( 16 == type_size ) {
349                         int16_t *in_int16 = (int16_t*)((char*)in_buf + op1_alignment * sizeof(int16_t)),
350                             *inout_int16 = (int16_t*)((char*)inout_buf + res_alignment * sizeof(int16_t)),
351                             *inout_int16_for_check = (int16_t*)inout_check_buf;
352                         for( i = 0; i < count; i++ ) {
353                             in_int16[i] = 5;
354                             inout_int16[i] = inout_int16_for_check[i] = -3;
355                         }
356                         mpi_type = "MPI_INT16_T";
357 
358                         if( 0 == strcmp(op, "sum") ) {
359                             MPI_OP_TEST( +, mpi_op, MPI_INT16_T, int16_t,
360                                          in_int16, inout_int16, inout_int16_for_check,
361                                          count, PRId16);
362                         }
363                         if( 0 == strcmp(op, "bor") ) {
364                             MPI_OP_TEST( |, mpi_op, MPI_INT16_T, int16_t,
365                                          in_int16, inout_int16, inout_int16_for_check,
366                                          count, PRId16);
367                         }
368                         if( 0 == strcmp(op, "bxor") ) {
369                             MPI_OP_TEST( ^, mpi_op, MPI_INT16_T, int16_t,
370                                          in_int16, inout_int16, inout_int16_for_check,
371                                          count, PRId16);
372                         }
373                         if( 0 == strcmp(op, "prod") ) {
374                             MPI_OP_TEST( *, mpi_op, MPI_INT16_T, int16_t,
375                                          in_int16, inout_int16, inout_int16_for_check,
376                                          count, PRId16);
377                         }
378                         if( 0 == strcmp(op, "band") ) {
379                             MPI_OP_TEST( &, mpi_op, MPI_INT16_T, int16_t,
380                                          in_int16, inout_int16, inout_int16_for_check,
381                                          count, PRId16);
382                         }
383                         if( 0 == strcmp(op, "max") ) {
384                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_INT16_T, int16_t,
385                                                in_int16, inout_int16, inout_int16_for_check,
386                                                count, PRId16);
387                         }
388                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
389                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_INT16_T, int16_t,
390                                                in_int16, inout_int16, inout_int16_for_check,
391                                                count, PRId16);
392                         }
393                     }
394                     if( 32 == type_size ) {
395                         int32_t *in_int32 = (int32_t*)((char*)in_buf + op1_alignment * sizeof(int32_t)),
396                             *inout_int32 = (int32_t*)((char*)inout_buf + res_alignment * sizeof(int32_t)),
397                             *inout_int32_for_check = (int32_t*)inout_check_buf;
398                         for( i = 0; i < count; i++ ) {
399                             in_int32[i] = 5;
400                             inout_int32[i] = inout_int32_for_check[i] = 3;
401                         }
402                         mpi_type = "MPI_INT32_T";
403 
404                         if( 0 == strcmp(op, "sum") ) {
405                             MPI_OP_TEST( +, mpi_op, MPI_INT32_T, int32_t,
406                                          in_int32, inout_int32, inout_int32_for_check,
407                                          count, PRId32);
408                         }
409                         if( 0 == strcmp(op, "bor") ) {
410                             MPI_OP_TEST( |, mpi_op, MPI_INT32_T, int32_t,
411                                          in_int32, inout_int32, inout_int32_for_check,
412                                          count, PRId32);
413                         }
414                         if( 0 == strcmp(op, "bxor") ) {
415                             MPI_OP_TEST( ^, mpi_op, MPI_INT32_T, int32_t,
416                                          in_int32, inout_int32, inout_int32_for_check,
417                                          count, PRId32);
418                         }
419                         if( 0 == strcmp(op, "prod") ) {
420                             MPI_OP_TEST( *, mpi_op, MPI_INT32_T, int32_t,
421                                          in_int32, inout_int32, inout_int32_for_check,
422                                          count, PRId32);
423                         }
424                         if( 0 == strcmp(op, "band") ) {
425                             MPI_OP_TEST( &, mpi_op, MPI_INT32_T, int32_t,
426                                          in_int32, inout_int32, inout_int32_for_check,
427                                          count, PRId32);
428                         }
429                         if( 0 == strcmp(op, "max") ) {
430                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_INT32_T, int32_t,
431                                                in_int32, inout_int32, inout_int32_for_check,
432                                                count, PRId32);
433                         }
434                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
435                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_INT32_T, int32_t,
436                                                in_int32, inout_int32, inout_int32_for_check,
437                                                count, PRId32);
438                         }
439                     }
440                     if( 64 == type_size ) {
441                         int64_t *in_int64 = (int64_t*)((char*)in_buf + op1_alignment * sizeof(int64_t)),
442                             *inout_int64 = (int64_t*)((char*)inout_buf + res_alignment * sizeof(int64_t)),
443                             *inout_int64_for_check = (int64_t*)inout_check_buf;
444                         for( i = 0; i < count; i++ ) {
445                             in_int64[i] = 5;
446                             inout_int64[i] = inout_int64_for_check[i] = 3;
447                         }
448                         mpi_type = "MPI_INT64_T";
449 
450                         if( 0 == strcmp(op, "sum") ) {
451                             MPI_OP_TEST( +, mpi_op, MPI_INT64_T, int64_t,
452                                          in_int64, inout_int64, inout_int64_for_check,
453                                          count, PRId64);
454                         }
455                         if( 0 == strcmp(op, "bor") ) {
456                             MPI_OP_TEST( |, mpi_op, MPI_INT64_T, int64_t,
457                                          in_int64, inout_int64, inout_int64_for_check,
458                                          count, PRId64);
459                         }
460                         if( 0 == strcmp(op, "bxor") ) {
461                             MPI_OP_TEST( ^, mpi_op, MPI_INT64_T, int64_t,
462                                          in_int64, inout_int64, inout_int64_for_check,
463                                          count, PRId64);
464                         }
465                         if( 0 == strcmp(op, "prod") ) {
466                             MPI_OP_TEST( *, mpi_op, MPI_INT64_T, int64_t,
467                                          in_int64, inout_int64, inout_int64_for_check,
468                                          count, PRId64);
469                         }
470                         if( 0 == strcmp(op, "band") ) {
471                             MPI_OP_TEST( &, mpi_op, MPI_INT64_T, int64_t,
472                                          in_int64, inout_int64, inout_int64_for_check,
473                                          count, PRId64);
474                         }
475                         if( 0 == strcmp(op, "max") ) {
476                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_INT64_T, int64_t,
477                                                in_int64, inout_int64, inout_int64_for_check,
478                                                count, PRId64);
479                         }
480                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
481                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_INT64_T, int64_t,
482                                                in_int64, inout_int64, inout_int64_for_check,
483                                                count, PRId64);
484                         }
485                     }
486                 }
487 
488                 if( 'u' == type[type_idx] ) {
489                     if( 8 == type_size ) {
490                         uint8_t *in_uint8 = (uint8_t*)((char*)in_buf + op1_alignment * sizeof(uint8_t)),
491                             *inout_uint8 = (uint8_t*)((char*)inout_buf + res_alignment * sizeof(uint8_t)),
492                             *inout_uint8_for_check = (uint8_t*)inout_check_buf;
493                         for( i = 0; i < count; i++ ) {
494                             in_uint8[i] = 5;
495                             inout_uint8[i] = inout_uint8_for_check[i] = 2;
496                         }
497                         mpi_type = "MPI_UINT8_T";
498 
499                         if( 0 == strcmp(op, "sum") ) {
500                             MPI_OP_TEST( +, mpi_op, MPI_UINT8_T, uint8_t,
501                                          in_uint8, inout_uint8, inout_uint8_for_check,
502                                          count, PRIu8);
503                         }
504                         if( 0 == strcmp(op, "bor") ) {
505                             MPI_OP_TEST( |, mpi_op, MPI_UINT8_T, uint8_t,
506                                          in_uint8, inout_uint8, inout_uint8_for_check,
507                                          count, PRIu8);
508                         }
509                         if( 0 == strcmp(op, "bxor") ) {
510                             MPI_OP_TEST( ^, mpi_op, MPI_UINT8_T, uint8_t,
511                                          in_uint8, inout_uint8, inout_uint8_for_check,
512                                          count, PRIu8);
513                         }
514                         if( 0 == strcmp(op, "prod") ) {
515                             MPI_OP_TEST( *, mpi_op, MPI_UINT8_T, uint8_t,
516                                          in_uint8, inout_uint8, inout_uint8_for_check,
517                                          count, PRIu8);
518                         }
519                         if( 0 == strcmp(op, "band") ) {
520                             MPI_OP_TEST( &, mpi_op, MPI_UINT8_T, uint8_t,
521                                          in_uint8, inout_uint8, inout_uint8_for_check,
522                                          count, PRIu8);
523                         }
524                         if( 0 == strcmp(op, "max") ) {
525                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_UINT8_T, uint8_t,
526                                                in_uint8, inout_uint8, inout_uint8_for_check,
527                                                count, PRIu8);
528                         }
529                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
530                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_UINT8_T, uint8_t,
531                                                in_uint8, inout_uint8, inout_uint8_for_check,
532                                                count, PRIu8);
533                         }
534                     }
535                     if( 16 == type_size ) {
536                         uint16_t *in_uint16 = (uint16_t*)((char*)in_buf + op1_alignment * sizeof(uint16_t)),
537                             *inout_uint16 = (uint16_t*)((char*)inout_buf + res_alignment * sizeof(uint16_t)),
538                             *inout_uint16_for_check = (uint16_t*)inout_check_buf;
539                         for( i = 0; i < count; i++ ) {
540                             in_uint16[i] = 5;
541                             inout_uint16[i] = inout_uint16_for_check[i] = 1234;
542                         }
543                         mpi_type = "MPI_UINT16_T";
544 
545                         if( 0 == strcmp(op, "sum") ) {
546                             MPI_OP_TEST( +, mpi_op, MPI_UINT16_T, uint16_t,
547                                          in_uint16, inout_uint16, inout_uint16_for_check,
548                                          count, PRIu16);
549                         }
550                         if( 0 == strcmp(op, "bor") ) {
551                             MPI_OP_TEST( |, mpi_op, MPI_UINT16_T, uint16_t,
552                                          in_uint16, inout_uint16, inout_uint16_for_check,
553                                          count, PRIu16);
554                         }
555                         if( 0 == strcmp(op, "bxor") ) {
556                             MPI_OP_TEST( ^, mpi_op, MPI_UINT16_T, uint16_t,
557                                          in_uint16, inout_uint16, inout_uint16_for_check,
558                                          count, PRIu16);
559                         }
560                         if( 0 == strcmp(op, "prod") ) {
561                             MPI_OP_TEST( *, mpi_op, MPI_UINT16_T, uint16_t,
562                                          in_uint16, inout_uint16, inout_uint16_for_check,
563                                          count, PRIu16);
564                         }
565                         if( 0 == strcmp(op, "band") ) {
566                             MPI_OP_TEST( &, mpi_op, MPI_UINT16_T, uint16_t,
567                                          in_uint16, inout_uint16, inout_uint16_for_check,
568                                          count, PRIu16);
569                         }
570                         if( 0 == strcmp(op, "max") ) {
571                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_UINT16_T, uint16_t,
572                                                in_uint16, inout_uint16, inout_uint16_for_check,
573                                                count, PRIu16);
574                         }
575                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
576                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_UINT16_T, uint16_t,
577                                                in_uint16, inout_uint16, inout_uint16_for_check,
578                                                count, PRIu16);
579                         }
580                     }
581                     if( 32 == type_size ) {
582                         uint32_t *in_uint32 = (uint32_t*)((char*)in_buf + op1_alignment * sizeof(uint32_t)),
583                             *inout_uint32 = (uint32_t*)((char*)inout_buf + res_alignment * sizeof(uint32_t)),
584                             *inout_uint32_for_check = (uint32_t*)inout_check_buf;
585                         for( i = 0; i < count; i++ ) {
586                             in_uint32[i] = 5;
587                             inout_uint32[i] = inout_uint32_for_check[i] = 3;
588                         }
589                         mpi_type = "MPI_UINT32_T";
590 
591                         if( 0 == strcmp(op, "sum") ) {
592                             MPI_OP_TEST( +, mpi_op, MPI_UINT32_T, uint32_t,
593                                          in_uint32, inout_uint32, inout_uint32_for_check,
594                                          count, PRIu32);
595                         }
596                         if( 0 == strcmp(op, "bor") ) {
597                             MPI_OP_TEST( |, mpi_op, MPI_UINT32_T, uint32_t,
598                                          in_uint32, inout_uint32, inout_uint32_for_check,
599                                          count, PRIu32);
600                         }
601                         if( 0 == strcmp(op, "bxor") ) {
602                             MPI_OP_TEST( ^, mpi_op, MPI_UINT32_T, uint32_t,
603                                          in_uint32, inout_uint32, inout_uint32_for_check,
604                                          count, PRIu32);
605                         }
606                         if( 0 == strcmp(op, "prod") ) {
607                             MPI_OP_TEST( *, mpi_op, MPI_UINT32_T, uint32_t,
608                                          in_uint32, inout_uint32, inout_uint32_for_check,
609                                          count, PRIu32);
610                         }
611                         if( 0 == strcmp(op, "band") ) {
612                             MPI_OP_TEST( &, mpi_op, MPI_UINT32_T, uint32_t,
613                                          in_uint32, inout_uint32, inout_uint32_for_check,
614                                          count, PRIu32);
615                         }
616                         if( 0 == strcmp(op, "max") ) {
617                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_UINT32_T, uint32_t,
618                                                in_uint32, inout_uint32, inout_uint32_for_check,
619                                                count, PRIu32);
620                         }
621                         if( 0 == strcmp(op, "min") ) {  //intentionly reversed in and out
622                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_UINT32_T, uint32_t,
623                                                in_uint32, inout_uint32, inout_uint32_for_check,
624                                                count, PRIu32);
625                         }
626                     }
627                     if( 64 == type_size ) {
628                         uint64_t *in_uint64 = (uint64_t*)((char*)in_buf + op1_alignment * sizeof(uint64_t)),
629                               *inout_uint64 = (uint64_t*)((char*)inout_buf + res_alignment * sizeof(uint64_t)),
630                             *inout_uint64_for_check = (uint64_t*)inout_check_buf;
631                         for( i = 0; i < count; i++ ) {
632                             in_uint64[i] = 5;
633                             inout_uint64[i] = inout_uint64_for_check[i] = 32433;
634                         }
635                         mpi_type = "MPI_UINT64_T";
636 
637                         if( 0 == strcmp(op, "sum") ) {
638                             MPI_OP_TEST( +, mpi_op, MPI_UINT64_T, uint64_t,
639                                          in_uint64, inout_uint64, inout_uint64_for_check,
640                                          count, PRIu64);
641                         }
642                         if( 0 == strcmp(op, "bor") ) {
643                             MPI_OP_TEST( |, mpi_op, MPI_UINT64_T, uint64_t,
644                                          in_uint64, inout_uint64, inout_uint64_for_check,
645                                          count, PRIu64);
646                         }
647                         if( 0 == strcmp(op, "bxor") ) {
648                             MPI_OP_TEST( ^, mpi_op, MPI_UINT64_T, uint64_t,
649                                          in_uint64, inout_uint64, inout_uint64_for_check,
650                                          count, PRIu64);
651                         }
652                         if( 0 == strcmp(op, "prod") ) {
653                             MPI_OP_TEST( *, mpi_op, MPI_UINT64_T, uint64_t,
654                                          in_uint64, inout_uint64, inout_uint64_for_check,
655                                          count, PRIu64);
656                         }
657                         if( 0 == strcmp(op, "band") ) {
658                             MPI_OP_TEST( &, mpi_op, MPI_UINT64_T, uint64_t,
659                                          in_uint64, inout_uint64, inout_uint64_for_check,
660                                          count, PRIu64);
661                         }
662                         if( 0 == strcmp(op, "max") ) {
663                             MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_UINT64_T, uint64_t,
664                                                in_uint64, inout_uint64, inout_uint64_for_check,
665                                                count, PRIu64);
666                         }
667                         if( 0 == strcmp(op, "min") ) {
668                             MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_UINT64_T, uint64_t,
669                                                in_uint64, inout_uint64, inout_uint64_for_check,
670                                                count, PRIu64);
671                         }
672                     }
673                 }
674 
675                 if( 'f' == type[type_idx] ) {
676                     float *in_float = (float*)((char*)in_buf + op1_alignment * sizeof(float)),
677                         *inout_float = (float*)((char*)inout_buf + res_alignment * sizeof(float)),
678                         *inout_float_for_check = (float*)inout_check_buf;
679                     for( i = 0; i < count; i++ ) {
680                         in_float[i] = 1000.0+1;
681                         inout_float[i] = inout_float_for_check[i] = 100.0+2;
682                     }
683                     mpi_type = "MPI_FLOAT";
684 
685                     if( 0 == strcmp(op, "sum") ) {
686                         MPI_OP_TEST( +, mpi_op, MPI_FLOAT, float,
687                                      in_float, inout_float, inout_float_for_check,
688                                      count, "f");
689                     }
690                     if( 0 == strcmp(op, "prod") ) {
691                         MPI_OP_TEST( *, mpi_op, MPI_FLOAT, float,
692                                      in_float, inout_float, inout_float_for_check,
693                                      count, "f");
694                     }
695                     if( 0 == strcmp(op, "max") ) {
696                         MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_FLOAT, float,
697                                            in_float, inout_float, inout_float_for_check,
698                                            count, "f");
699                     }
700                     if( 0 == strcmp(op, "min") ) {
701                         MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_FLOAT, float,
702                                            in_float, inout_float, inout_float_for_check,
703                                            count, "f");
704                     }
705                 }
706 
707                 if( 'd' == type[type_idx] ) {
708                     double *in_double = (double*)((char*)in_buf + op1_alignment * sizeof(double)),
709                         *inout_double = (double*)((char*)inout_buf + res_alignment * sizeof(double)),
710                         *inout_double_for_check = (double*)inout_check_buf;
711                     for( i = 0; i < count; i++ ) {
712                         in_double[i] = 10.0+1;
713                         inout_double[i] = inout_double_for_check[i] = 1.0+2;
714                     }
715                     mpi_type = "MPI_DOUBLE";
716 
717                     if( 0 == strcmp(op, "sum") ) {
718                         MPI_OP_TEST( +, mpi_op, MPI_DOUBLE, double,
719                                      in_double, inout_double, inout_double_for_check,
720                                      count, "g");
721                     }
722                     if( 0 == strcmp(op, "prod") ) {
723                         MPI_OP_TEST( *, mpi_op, MPI_DOUBLE, double,
724                                      in_double, inout_double, inout_double_for_check,
725                                      count, "f");
726                     }
727                     if( 0 == strcmp(op, "max") ) {
728                         MPI_OP_MINMAX_TEST(max, mpi_op,  MPI_DOUBLE, double,
729                                            in_double, inout_double, inout_double_for_check,
730                                            count, "f");
731                     }
732                     if( 0 == strcmp(op, "min") ) {
733                         MPI_OP_MINMAX_TEST(min, mpi_op,  MPI_DOUBLE, double,
734                                            in_double, inout_double, inout_double_for_check,
735                                            count, "f");
736                     }
737                 }
738         check_and_continue:
739                 if( !skip_op_type )
740                     print_status(array_of_ops[do_ops[op_idx]].mpi_op_name,
741                                  mpi_type, type_size, count, max_shift, duration, repeats, correctness);
742             }
743             if( !skip_op_type )
744                 printf("\n");
745         }
746     }
747     ompi_mpi_finalize();
748 
749     free(in_buf);
750     free(inout_buf);
751     free(inout_check_buf);
752 
753     return (0 == total_errors) ? 0 : -1;
754 }
755 
756