1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2019-2020 The University of Tennessee and The University
4 * of Tennessee Research Foundation. All rights
5 * reserved.
6 * Copyright (c) 2020 Research Organization for Information Science
7 * and Technology (RIST). All rights reserved.
8 * $COPYRIGHT$
9 *
10 * Additional copyrights may follow
11 *
12 * $HEADER$
13 */
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/time.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <unistd.h>
22
23 #include "mpi.h"
24 #include "ompi/communicator/communicator.h"
25 #include "ompi/runtime/mpiruntime.h"
26 #include "ompi/datatype/ompi_datatype.h"
27
28 typedef struct op_name_s {
29 char* name;
30 char* mpi_op_name;
31 MPI_Op op;
32 } op_name_t;
33 static op_name_t array_of_ops [] = {
34 { "max", "MPI_MAX", MPI_MAX },
35 { "min", "MPI_MIN", MPI_MIN },
36 { "sum", "MPI_SUM", MPI_SUM },
37 { "prod", "MPI_PROD", MPI_PROD },
38 { "land", "MPI_LAND", MPI_LAND },
39 { "band", "MPI_BAND", MPI_BAND },
40 { "lor", "MPI_LOR", MPI_LOR },
41 { "bor", "MPI_BOR", MPI_BOR },
42 { "lxor", "MPI_LXOR", MPI_LXOR },
43 { "bxor", "MPI_BXOR", MPI_BXOR },
44 { "replace", "MPI_REPLACE", MPI_REPLACE },
45 { NULL, "MPI_OP_NULL", MPI_OP_NULL }
46 };
47 static int do_ops[12] = { -1, }; /* index of the ops to do. Size +1 larger than the array_of_ops */
48 static int verbose = 0;
49 static int total_errors = 0;
50
51 #define max(a,b) \
52 ({ __typeof__ (a) _a = (a); \
53 __typeof__ (b) _b = (b); \
54 _a > _b ? _a : _b; })
55
56 #define min(a,b) \
57 ({ __typeof__ (a) _a = (a); \
58 __typeof__ (b) _b = (b); \
59 _a < _b ? _a : _b; })
60
print_status(char * op,char * type,int type_size,int count,int max_shift,double * duration,int repeats,int correct)61 static void print_status(char* op, char* type, int type_size,
62 int count, int max_shift, double *duration, int repeats,
63 int correct )
64 {
65 if(correct) {
66 printf("%-10s %s %-10d%s ", op, type, type_size, (verbose ? " [\033[1;32msuccess\033[0m]" : ""));
67 } else {
68 printf("%-10s %s [\033[1;31mfail\033[0m]", op, type);
69 total_errors++;
70 }
71 if( 1 == max_shift ) {
72 printf(" count %-10d time (seconds) %.8f seconds\n", count, duration[0] / repeats);
73 } else {
74 printf(" count %-10d time (seconds / shifts) ", count);
75 for( int i = 0; i < max_shift; i++ ) {
76 printf("%.8f ", duration[i] / repeats );
77 }
78 printf("\n");
79 }
80 }
81
82 static int do_ops_built = 0;
83 static int
build_do_ops(char * optarg,int * do_ops)84 build_do_ops( char* optarg, int* do_ops)
85 {
86 int i;
87 if( 0 == strcmp(optarg, "all") ) {
88 for( i = 0; NULL != array_of_ops[i].name; i++ ) {
89 do_ops[i] = i;
90 }
91 do_ops[i] = -1; /* stop */
92 } else {
93 int n, idx = 0;
94 char* token, *arg = optarg;
95 while ((token = strsep(&arg, ",")) != NULL) {
96 for( i = 0; NULL != array_of_ops[i].name; i++ ) { /* find the op */
97 if( 0 == strcmp(array_of_ops[i].name, token) ) {
98 /* check if the op was not already selected */
99 for(n = 0; n < idx; n++ ) {
100 if( i == do_ops[n] ) {
101 break;
102 }
103 }
104 if( n >= idx ) {
105 do_ops[idx++] = i;
106 do_ops[idx] = -1;
107 }
108 break;
109 }
110 }
111 if( NULL == array_of_ops[i].name ) {
112 fprintf(stderr, "Unknown op %s. Ignored.\n", token);
113 }
114 }
115 }
116 do_ops_built = 1;
117 return 0;
118 }
119
120
121 #define MPI_OP_TEST(OPNAME, MPIOP, MPITYPE, TYPE, INBUF, INOUT_BUF, CHECK_BUF, COUNT, TYPE_PREFIX) \
122 do { \
123 const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \
124 TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \
125 skip_op_type = 0; \
126 for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \
127 duration[_k] = 0.0; \
128 for(int _r = repeats; _r > 0; _r--) { \
129 memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
130 tstart = MPI_Wtime(); \
131 MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT)-_k, (MPITYPE), (MPIOP)); \
132 tend = MPI_Wtime(); \
133 duration[_k] += (tend - tstart); \
134 if( check ) { \
135 for( i = 0; i < (COUNT)-_k; i++ ) { \
136 if(((_p2+_k)[i]) == (((_p1+_k)[i]) OPNAME ((_p3+_k)[i]))) \
137 continue; \
138 printf("First error at alignment %d position %d (%" TYPE_PREFIX " %s %" TYPE_PREFIX " != %" TYPE_PREFIX ")\n", \
139 _k, i, (_p1+_k)[i], (#OPNAME), (_p3+_k)[i], (_p2+_k)[i]); \
140 correctness = 0; \
141 break; \
142 } \
143 } \
144 } \
145 } \
146 goto check_and_continue; \
147 } while (0)
148
149 #define MPI_OP_MINMAX_TEST(OPNAME, MPIOP, MPITYPE, TYPE, INBUF, INOUT_BUF, CHECK_BUF, COUNT, TYPE_PREFIX) \
150 do { \
151 const TYPE *_p1 = ((TYPE*)(INBUF)), *_p3 = ((TYPE*)(CHECK_BUF)); \
152 TYPE *_p2 = ((TYPE*)(INOUT_BUF)); \
153 skip_op_type = 0; \
154 for(int _k = 0; _k < min((COUNT), max_shift); +_k++ ) { \
155 duration[_k] = 0.0; \
156 for(int _r = repeats; _r > 0; _r--) { \
157 memcpy(_p2, _p3, sizeof(TYPE) * (COUNT)); \
158 tstart = MPI_Wtime(); \
159 MPI_Reduce_local(_p1+_k, _p2+_k, (COUNT), (MPITYPE), (MPIOP)); \
160 tend = MPI_Wtime(); \
161 duration[_k] += (tend - tstart); \
162 if( check ) { \
163 for( i = 0; i < (COUNT); i++ ) { \
164 TYPE _v1 = *(_p1+_k), _v2 = *(_p2+_k), _v3 = *(_p3+_k); \
165 if(_v2 == OPNAME(_v1, _v3)) \
166 continue; \
167 printf("First error at alignment %d position %d (%" TYPE_PREFIX " != %s(%" TYPE_PREFIX ", %" TYPE_PREFIX ")\n", \
168 _k, i, _v1, (#OPNAME), _v3, _v2); \
169 correctness = 0; \
170 break; \
171 } \
172 } \
173 } \
174 } \
175 goto check_and_continue; \
176 } while (0)
177
main(int argc,char ** argv)178 int main(int argc, char **argv)
179 {
180 static void *in_buf = NULL, *inout_buf = NULL, *inout_check_buf = NULL;
181 int count, type_size = 8, rank, size, provided, correctness = 1;
182 int repeats = 1, i, c, op1_alignment = 0, res_alignment = 0;
183 int max_shift = 4;
184 double *duration, tstart, tend;
185 bool check = true;
186 char type[5] = "uifd", *op = "sum", *mpi_type;
187 int lower = 1, upper = 1000000, skip_op_type;
188 MPI_Op mpi_op;
189
190 while( -1 != (c = getopt(argc, argv, "l:u:r:t:o:i:s:n:1:2:vfh")) ) {
191 switch(c) {
192 case 'l':
193 lower = atoi(optarg);
194 if( lower <= 0 ) {
195 fprintf(stderr, "The lower number of elements must be positive\n");
196 exit(-1);
197 }
198 break;
199 case 'u':
200 upper = atoi(optarg);
201 if( lower <= 0 ) {
202 fprintf(stderr, "The upper number of elements must be positive\n");
203 exit(-1);
204 }
205 break;
206 case 'i':
207 max_shift = atoi(optarg);
208 if( max_shift <= 0 ) {
209 fprintf(stderr, "The max shift must be positive\n");
210 exit(-1);
211 }
212 break;
213 case 'f':
214 check = false;
215 break;
216 case 'v':
217 verbose++;
218 break;
219 case 'r':
220 repeats = atoi(optarg);
221 if( repeats <= 0 ) {
222 fprintf(stderr, "The number of repetitions (%d) must be positive\n", repeats);
223 exit(-1);
224 }
225 break;
226 case 't':
227 for( i = 0; i < (int)strlen(optarg); i++ ) {
228 if( ! (('i' == optarg[i]) || ('u' == optarg[i]) ||
229 ('f' == optarg[i]) || ('d' == optarg[i])) ) {
230 fprintf(stderr, "type must be i (signed int), u (unsigned int), f (float) or d (double)\n");
231 exit(-1);
232 }
233 }
234 strncpy(type, optarg, 4);
235 break;
236 case 'o':
237 build_do_ops( optarg, do_ops);
238 break;
239 case 's':
240 type_size = atoi(optarg);
241 if( ! ((8 == type_size) || (16 == type_size) || (32 == type_size) || (64 == type_size)) ) {
242 fprintf(stderr, "type_size must be 8, 16, 32 or 64. %d is an invalid value\n",
243 type_size);
244 exit(-1);
245 }
246 break;
247 case '1':
248 op1_alignment = atoi(optarg);
249 if( op1_alignment < 0 ) {
250 fprintf(stderr, "alignment for the first operand must be positive\n");
251 exit(-1);
252 }
253 break;
254 case '2':
255 res_alignment = atoi(optarg);
256 if( res_alignment < 0 ) {
257 fprintf(stderr, "alignment for the result must be positive\n");
258 exit(-1);
259 }
260 break;
261 case 'h':
262 fprintf(stdout, "%s options are:\n"
263 " -l <number> : lower number of elements\n"
264 " -u <number> : upper number of elements\n"
265 " -s <type_size> : 8, 16, 32 or 64 bits elements\n"
266 " -t [i,u,f,d] : type of the elements to apply the operations on\n"
267 " -r <number> : number of repetitions for each test\n"
268 " -o <op> : comma separated list of operations to execute among\n"
269 " sum, min, max, prod, bor, bxor, band\n"
270 " -i <number> : shift on all buffers to check alignment\n"
271 " -1 <number> : (mis)alignment in elements for the first op\n"
272 " -2 <number> : (mis)alignment in elements for the result\n"
273 " -v: increase the verbosity level\n"
274 " -h: this help message\n", argv[0]);
275 exit(0);
276 }
277 }
278
279 if( !do_ops_built ) { /* not yet done, take the default */
280 build_do_ops( "all", do_ops);
281 }
282 posix_memalign( &in_buf, 64, (upper + op1_alignment) * sizeof(double));
283 posix_memalign( &inout_buf, 64, (upper + res_alignment) * sizeof(double));
284 posix_memalign( &inout_check_buf, 64, upper * sizeof(double));
285 duration = (double*)malloc(max_shift * sizeof(double));
286
287 ompi_mpi_init(argc, argv, MPI_THREAD_SERIALIZED, &provided, false);
288
289 rank = ompi_comm_rank(MPI_COMM_WORLD); (void)rank;
290 size = ompi_comm_size(MPI_COMM_WORLD); (void)size;
291
292 for(uint32_t type_idx = 0; type_idx < strlen(type); type_idx++ ) {
293 for(uint32_t op_idx = 0; do_ops[op_idx] >= 0; op_idx++ ) {
294 op = array_of_ops[do_ops[op_idx]].name;
295 mpi_op = array_of_ops[do_ops[op_idx]].op;
296 skip_op_type = 1;
297
298 for( count = lower; count <= upper; count += count ) {
299 mpi_type = NULL;
300 correctness = 1;
301 if('i' == type[type_idx]) {
302 if( 8 == type_size ) {
303 int8_t *in_int8 = (int8_t*)((char*)in_buf + op1_alignment * sizeof(int8_t)),
304 *inout_int8 = (int8_t*)((char*)inout_buf + res_alignment * sizeof(int8_t)),
305 *inout_int8_for_check = (int8_t*)inout_check_buf;
306 for( i = 0; i < count; i++ ) {
307 in_int8[i] = 5;
308 inout_int8[i] = inout_int8_for_check[i] = -3;
309 }
310 mpi_type = "MPI_INT8_T";
311
312 if( 0 == strcmp(op, "sum") ) {
313 MPI_OP_TEST( +, mpi_op, MPI_INT8_T, int8_t,
314 in_int8, inout_int8, inout_int8_for_check,
315 count, PRId8);
316 }
317 if( 0 == strcmp(op, "bor") ) {
318 MPI_OP_TEST( |, mpi_op, MPI_INT8_T, int8_t,
319 in_int8, inout_int8, inout_int8_for_check,
320 count, PRId8);
321 }
322 if( 0 == strcmp(op, "bxor") ) {
323 MPI_OP_TEST( ^, mpi_op, MPI_INT8_T, int8_t,
324 in_int8, inout_int8, inout_int8_for_check,
325 count, PRId8);
326 }
327 if( 0 == strcmp(op, "prod") ) {
328 MPI_OP_TEST( *, mpi_op, MPI_INT8_T, int8_t,
329 in_int8, inout_int8, inout_int8_for_check,
330 count, PRId8);
331 }
332 if( 0 == strcmp(op, "band") ) {
333 MPI_OP_TEST( &, mpi_op, MPI_INT8_T, int8_t,
334 in_int8, inout_int8, inout_int8_for_check,
335 count, PRId8);
336 }
337 if( 0 == strcmp(op, "max") ) {
338 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_INT8_T, int8_t,
339 in_int8, inout_int8, inout_int8_for_check,
340 count, PRId8);
341 }
342 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
343 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_INT8_T, int8_t,
344 in_int8, inout_int8, inout_int8_for_check,
345 count, PRId8);
346 }
347 }
348 if( 16 == type_size ) {
349 int16_t *in_int16 = (int16_t*)((char*)in_buf + op1_alignment * sizeof(int16_t)),
350 *inout_int16 = (int16_t*)((char*)inout_buf + res_alignment * sizeof(int16_t)),
351 *inout_int16_for_check = (int16_t*)inout_check_buf;
352 for( i = 0; i < count; i++ ) {
353 in_int16[i] = 5;
354 inout_int16[i] = inout_int16_for_check[i] = -3;
355 }
356 mpi_type = "MPI_INT16_T";
357
358 if( 0 == strcmp(op, "sum") ) {
359 MPI_OP_TEST( +, mpi_op, MPI_INT16_T, int16_t,
360 in_int16, inout_int16, inout_int16_for_check,
361 count, PRId16);
362 }
363 if( 0 == strcmp(op, "bor") ) {
364 MPI_OP_TEST( |, mpi_op, MPI_INT16_T, int16_t,
365 in_int16, inout_int16, inout_int16_for_check,
366 count, PRId16);
367 }
368 if( 0 == strcmp(op, "bxor") ) {
369 MPI_OP_TEST( ^, mpi_op, MPI_INT16_T, int16_t,
370 in_int16, inout_int16, inout_int16_for_check,
371 count, PRId16);
372 }
373 if( 0 == strcmp(op, "prod") ) {
374 MPI_OP_TEST( *, mpi_op, MPI_INT16_T, int16_t,
375 in_int16, inout_int16, inout_int16_for_check,
376 count, PRId16);
377 }
378 if( 0 == strcmp(op, "band") ) {
379 MPI_OP_TEST( &, mpi_op, MPI_INT16_T, int16_t,
380 in_int16, inout_int16, inout_int16_for_check,
381 count, PRId16);
382 }
383 if( 0 == strcmp(op, "max") ) {
384 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_INT16_T, int16_t,
385 in_int16, inout_int16, inout_int16_for_check,
386 count, PRId16);
387 }
388 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
389 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_INT16_T, int16_t,
390 in_int16, inout_int16, inout_int16_for_check,
391 count, PRId16);
392 }
393 }
394 if( 32 == type_size ) {
395 int32_t *in_int32 = (int32_t*)((char*)in_buf + op1_alignment * sizeof(int32_t)),
396 *inout_int32 = (int32_t*)((char*)inout_buf + res_alignment * sizeof(int32_t)),
397 *inout_int32_for_check = (int32_t*)inout_check_buf;
398 for( i = 0; i < count; i++ ) {
399 in_int32[i] = 5;
400 inout_int32[i] = inout_int32_for_check[i] = 3;
401 }
402 mpi_type = "MPI_INT32_T";
403
404 if( 0 == strcmp(op, "sum") ) {
405 MPI_OP_TEST( +, mpi_op, MPI_INT32_T, int32_t,
406 in_int32, inout_int32, inout_int32_for_check,
407 count, PRId32);
408 }
409 if( 0 == strcmp(op, "bor") ) {
410 MPI_OP_TEST( |, mpi_op, MPI_INT32_T, int32_t,
411 in_int32, inout_int32, inout_int32_for_check,
412 count, PRId32);
413 }
414 if( 0 == strcmp(op, "bxor") ) {
415 MPI_OP_TEST( ^, mpi_op, MPI_INT32_T, int32_t,
416 in_int32, inout_int32, inout_int32_for_check,
417 count, PRId32);
418 }
419 if( 0 == strcmp(op, "prod") ) {
420 MPI_OP_TEST( *, mpi_op, MPI_INT32_T, int32_t,
421 in_int32, inout_int32, inout_int32_for_check,
422 count, PRId32);
423 }
424 if( 0 == strcmp(op, "band") ) {
425 MPI_OP_TEST( &, mpi_op, MPI_INT32_T, int32_t,
426 in_int32, inout_int32, inout_int32_for_check,
427 count, PRId32);
428 }
429 if( 0 == strcmp(op, "max") ) {
430 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_INT32_T, int32_t,
431 in_int32, inout_int32, inout_int32_for_check,
432 count, PRId32);
433 }
434 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
435 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_INT32_T, int32_t,
436 in_int32, inout_int32, inout_int32_for_check,
437 count, PRId32);
438 }
439 }
440 if( 64 == type_size ) {
441 int64_t *in_int64 = (int64_t*)((char*)in_buf + op1_alignment * sizeof(int64_t)),
442 *inout_int64 = (int64_t*)((char*)inout_buf + res_alignment * sizeof(int64_t)),
443 *inout_int64_for_check = (int64_t*)inout_check_buf;
444 for( i = 0; i < count; i++ ) {
445 in_int64[i] = 5;
446 inout_int64[i] = inout_int64_for_check[i] = 3;
447 }
448 mpi_type = "MPI_INT64_T";
449
450 if( 0 == strcmp(op, "sum") ) {
451 MPI_OP_TEST( +, mpi_op, MPI_INT64_T, int64_t,
452 in_int64, inout_int64, inout_int64_for_check,
453 count, PRId64);
454 }
455 if( 0 == strcmp(op, "bor") ) {
456 MPI_OP_TEST( |, mpi_op, MPI_INT64_T, int64_t,
457 in_int64, inout_int64, inout_int64_for_check,
458 count, PRId64);
459 }
460 if( 0 == strcmp(op, "bxor") ) {
461 MPI_OP_TEST( ^, mpi_op, MPI_INT64_T, int64_t,
462 in_int64, inout_int64, inout_int64_for_check,
463 count, PRId64);
464 }
465 if( 0 == strcmp(op, "prod") ) {
466 MPI_OP_TEST( *, mpi_op, MPI_INT64_T, int64_t,
467 in_int64, inout_int64, inout_int64_for_check,
468 count, PRId64);
469 }
470 if( 0 == strcmp(op, "band") ) {
471 MPI_OP_TEST( &, mpi_op, MPI_INT64_T, int64_t,
472 in_int64, inout_int64, inout_int64_for_check,
473 count, PRId64);
474 }
475 if( 0 == strcmp(op, "max") ) {
476 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_INT64_T, int64_t,
477 in_int64, inout_int64, inout_int64_for_check,
478 count, PRId64);
479 }
480 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
481 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_INT64_T, int64_t,
482 in_int64, inout_int64, inout_int64_for_check,
483 count, PRId64);
484 }
485 }
486 }
487
488 if( 'u' == type[type_idx] ) {
489 if( 8 == type_size ) {
490 uint8_t *in_uint8 = (uint8_t*)((char*)in_buf + op1_alignment * sizeof(uint8_t)),
491 *inout_uint8 = (uint8_t*)((char*)inout_buf + res_alignment * sizeof(uint8_t)),
492 *inout_uint8_for_check = (uint8_t*)inout_check_buf;
493 for( i = 0; i < count; i++ ) {
494 in_uint8[i] = 5;
495 inout_uint8[i] = inout_uint8_for_check[i] = 2;
496 }
497 mpi_type = "MPI_UINT8_T";
498
499 if( 0 == strcmp(op, "sum") ) {
500 MPI_OP_TEST( +, mpi_op, MPI_UINT8_T, uint8_t,
501 in_uint8, inout_uint8, inout_uint8_for_check,
502 count, PRIu8);
503 }
504 if( 0 == strcmp(op, "bor") ) {
505 MPI_OP_TEST( |, mpi_op, MPI_UINT8_T, uint8_t,
506 in_uint8, inout_uint8, inout_uint8_for_check,
507 count, PRIu8);
508 }
509 if( 0 == strcmp(op, "bxor") ) {
510 MPI_OP_TEST( ^, mpi_op, MPI_UINT8_T, uint8_t,
511 in_uint8, inout_uint8, inout_uint8_for_check,
512 count, PRIu8);
513 }
514 if( 0 == strcmp(op, "prod") ) {
515 MPI_OP_TEST( *, mpi_op, MPI_UINT8_T, uint8_t,
516 in_uint8, inout_uint8, inout_uint8_for_check,
517 count, PRIu8);
518 }
519 if( 0 == strcmp(op, "band") ) {
520 MPI_OP_TEST( &, mpi_op, MPI_UINT8_T, uint8_t,
521 in_uint8, inout_uint8, inout_uint8_for_check,
522 count, PRIu8);
523 }
524 if( 0 == strcmp(op, "max") ) {
525 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_UINT8_T, uint8_t,
526 in_uint8, inout_uint8, inout_uint8_for_check,
527 count, PRIu8);
528 }
529 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
530 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_UINT8_T, uint8_t,
531 in_uint8, inout_uint8, inout_uint8_for_check,
532 count, PRIu8);
533 }
534 }
535 if( 16 == type_size ) {
536 uint16_t *in_uint16 = (uint16_t*)((char*)in_buf + op1_alignment * sizeof(uint16_t)),
537 *inout_uint16 = (uint16_t*)((char*)inout_buf + res_alignment * sizeof(uint16_t)),
538 *inout_uint16_for_check = (uint16_t*)inout_check_buf;
539 for( i = 0; i < count; i++ ) {
540 in_uint16[i] = 5;
541 inout_uint16[i] = inout_uint16_for_check[i] = 1234;
542 }
543 mpi_type = "MPI_UINT16_T";
544
545 if( 0 == strcmp(op, "sum") ) {
546 MPI_OP_TEST( +, mpi_op, MPI_UINT16_T, uint16_t,
547 in_uint16, inout_uint16, inout_uint16_for_check,
548 count, PRIu16);
549 }
550 if( 0 == strcmp(op, "bor") ) {
551 MPI_OP_TEST( |, mpi_op, MPI_UINT16_T, uint16_t,
552 in_uint16, inout_uint16, inout_uint16_for_check,
553 count, PRIu16);
554 }
555 if( 0 == strcmp(op, "bxor") ) {
556 MPI_OP_TEST( ^, mpi_op, MPI_UINT16_T, uint16_t,
557 in_uint16, inout_uint16, inout_uint16_for_check,
558 count, PRIu16);
559 }
560 if( 0 == strcmp(op, "prod") ) {
561 MPI_OP_TEST( *, mpi_op, MPI_UINT16_T, uint16_t,
562 in_uint16, inout_uint16, inout_uint16_for_check,
563 count, PRIu16);
564 }
565 if( 0 == strcmp(op, "band") ) {
566 MPI_OP_TEST( &, mpi_op, MPI_UINT16_T, uint16_t,
567 in_uint16, inout_uint16, inout_uint16_for_check,
568 count, PRIu16);
569 }
570 if( 0 == strcmp(op, "max") ) {
571 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_UINT16_T, uint16_t,
572 in_uint16, inout_uint16, inout_uint16_for_check,
573 count, PRIu16);
574 }
575 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
576 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_UINT16_T, uint16_t,
577 in_uint16, inout_uint16, inout_uint16_for_check,
578 count, PRIu16);
579 }
580 }
581 if( 32 == type_size ) {
582 uint32_t *in_uint32 = (uint32_t*)((char*)in_buf + op1_alignment * sizeof(uint32_t)),
583 *inout_uint32 = (uint32_t*)((char*)inout_buf + res_alignment * sizeof(uint32_t)),
584 *inout_uint32_for_check = (uint32_t*)inout_check_buf;
585 for( i = 0; i < count; i++ ) {
586 in_uint32[i] = 5;
587 inout_uint32[i] = inout_uint32_for_check[i] = 3;
588 }
589 mpi_type = "MPI_UINT32_T";
590
591 if( 0 == strcmp(op, "sum") ) {
592 MPI_OP_TEST( +, mpi_op, MPI_UINT32_T, uint32_t,
593 in_uint32, inout_uint32, inout_uint32_for_check,
594 count, PRIu32);
595 }
596 if( 0 == strcmp(op, "bor") ) {
597 MPI_OP_TEST( |, mpi_op, MPI_UINT32_T, uint32_t,
598 in_uint32, inout_uint32, inout_uint32_for_check,
599 count, PRIu32);
600 }
601 if( 0 == strcmp(op, "bxor") ) {
602 MPI_OP_TEST( ^, mpi_op, MPI_UINT32_T, uint32_t,
603 in_uint32, inout_uint32, inout_uint32_for_check,
604 count, PRIu32);
605 }
606 if( 0 == strcmp(op, "prod") ) {
607 MPI_OP_TEST( *, mpi_op, MPI_UINT32_T, uint32_t,
608 in_uint32, inout_uint32, inout_uint32_for_check,
609 count, PRIu32);
610 }
611 if( 0 == strcmp(op, "band") ) {
612 MPI_OP_TEST( &, mpi_op, MPI_UINT32_T, uint32_t,
613 in_uint32, inout_uint32, inout_uint32_for_check,
614 count, PRIu32);
615 }
616 if( 0 == strcmp(op, "max") ) {
617 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_UINT32_T, uint32_t,
618 in_uint32, inout_uint32, inout_uint32_for_check,
619 count, PRIu32);
620 }
621 if( 0 == strcmp(op, "min") ) { //intentionly reversed in and out
622 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_UINT32_T, uint32_t,
623 in_uint32, inout_uint32, inout_uint32_for_check,
624 count, PRIu32);
625 }
626 }
627 if( 64 == type_size ) {
628 uint64_t *in_uint64 = (uint64_t*)((char*)in_buf + op1_alignment * sizeof(uint64_t)),
629 *inout_uint64 = (uint64_t*)((char*)inout_buf + res_alignment * sizeof(uint64_t)),
630 *inout_uint64_for_check = (uint64_t*)inout_check_buf;
631 for( i = 0; i < count; i++ ) {
632 in_uint64[i] = 5;
633 inout_uint64[i] = inout_uint64_for_check[i] = 32433;
634 }
635 mpi_type = "MPI_UINT64_T";
636
637 if( 0 == strcmp(op, "sum") ) {
638 MPI_OP_TEST( +, mpi_op, MPI_UINT64_T, uint64_t,
639 in_uint64, inout_uint64, inout_uint64_for_check,
640 count, PRIu64);
641 }
642 if( 0 == strcmp(op, "bor") ) {
643 MPI_OP_TEST( |, mpi_op, MPI_UINT64_T, uint64_t,
644 in_uint64, inout_uint64, inout_uint64_for_check,
645 count, PRIu64);
646 }
647 if( 0 == strcmp(op, "bxor") ) {
648 MPI_OP_TEST( ^, mpi_op, MPI_UINT64_T, uint64_t,
649 in_uint64, inout_uint64, inout_uint64_for_check,
650 count, PRIu64);
651 }
652 if( 0 == strcmp(op, "prod") ) {
653 MPI_OP_TEST( *, mpi_op, MPI_UINT64_T, uint64_t,
654 in_uint64, inout_uint64, inout_uint64_for_check,
655 count, PRIu64);
656 }
657 if( 0 == strcmp(op, "band") ) {
658 MPI_OP_TEST( &, mpi_op, MPI_UINT64_T, uint64_t,
659 in_uint64, inout_uint64, inout_uint64_for_check,
660 count, PRIu64);
661 }
662 if( 0 == strcmp(op, "max") ) {
663 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_UINT64_T, uint64_t,
664 in_uint64, inout_uint64, inout_uint64_for_check,
665 count, PRIu64);
666 }
667 if( 0 == strcmp(op, "min") ) {
668 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_UINT64_T, uint64_t,
669 in_uint64, inout_uint64, inout_uint64_for_check,
670 count, PRIu64);
671 }
672 }
673 }
674
675 if( 'f' == type[type_idx] ) {
676 float *in_float = (float*)((char*)in_buf + op1_alignment * sizeof(float)),
677 *inout_float = (float*)((char*)inout_buf + res_alignment * sizeof(float)),
678 *inout_float_for_check = (float*)inout_check_buf;
679 for( i = 0; i < count; i++ ) {
680 in_float[i] = 1000.0+1;
681 inout_float[i] = inout_float_for_check[i] = 100.0+2;
682 }
683 mpi_type = "MPI_FLOAT";
684
685 if( 0 == strcmp(op, "sum") ) {
686 MPI_OP_TEST( +, mpi_op, MPI_FLOAT, float,
687 in_float, inout_float, inout_float_for_check,
688 count, "f");
689 }
690 if( 0 == strcmp(op, "prod") ) {
691 MPI_OP_TEST( *, mpi_op, MPI_FLOAT, float,
692 in_float, inout_float, inout_float_for_check,
693 count, "f");
694 }
695 if( 0 == strcmp(op, "max") ) {
696 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_FLOAT, float,
697 in_float, inout_float, inout_float_for_check,
698 count, "f");
699 }
700 if( 0 == strcmp(op, "min") ) {
701 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_FLOAT, float,
702 in_float, inout_float, inout_float_for_check,
703 count, "f");
704 }
705 }
706
707 if( 'd' == type[type_idx] ) {
708 double *in_double = (double*)((char*)in_buf + op1_alignment * sizeof(double)),
709 *inout_double = (double*)((char*)inout_buf + res_alignment * sizeof(double)),
710 *inout_double_for_check = (double*)inout_check_buf;
711 for( i = 0; i < count; i++ ) {
712 in_double[i] = 10.0+1;
713 inout_double[i] = inout_double_for_check[i] = 1.0+2;
714 }
715 mpi_type = "MPI_DOUBLE";
716
717 if( 0 == strcmp(op, "sum") ) {
718 MPI_OP_TEST( +, mpi_op, MPI_DOUBLE, double,
719 in_double, inout_double, inout_double_for_check,
720 count, "g");
721 }
722 if( 0 == strcmp(op, "prod") ) {
723 MPI_OP_TEST( *, mpi_op, MPI_DOUBLE, double,
724 in_double, inout_double, inout_double_for_check,
725 count, "f");
726 }
727 if( 0 == strcmp(op, "max") ) {
728 MPI_OP_MINMAX_TEST(max, mpi_op, MPI_DOUBLE, double,
729 in_double, inout_double, inout_double_for_check,
730 count, "f");
731 }
732 if( 0 == strcmp(op, "min") ) {
733 MPI_OP_MINMAX_TEST(min, mpi_op, MPI_DOUBLE, double,
734 in_double, inout_double, inout_double_for_check,
735 count, "f");
736 }
737 }
738 check_and_continue:
739 if( !skip_op_type )
740 print_status(array_of_ops[do_ops[op_idx]].mpi_op_name,
741 mpi_type, type_size, count, max_shift, duration, repeats, correctness);
742 }
743 if( !skip_op_type )
744 printf("\n");
745 }
746 }
747 ompi_mpi_finalize();
748
749 free(in_buf);
750 free(inout_buf);
751 free(inout_check_buf);
752
753 return (0 == total_errors) ? 0 : -1;
754 }
755
756