1 /*
2 (C) 2004 by Argonne National Laboratory.
3 See COPYRIGHT in top-level directory.
4 */
5 #include "collchk.h"
6
7 /* AIX requires this to be the first thing in the file. */
8 #ifndef __GNUC__
9 # if HAVE_ALLOCA_H
10 # include <alloca.h>
11 # else
12 # ifdef _AIX
13 #pragma alloca
14 # else
15 # ifndef alloca /* predefined by HP cc +Olibcalls */
16 char *alloca ();
17 # endif
18 # endif
19 # endif
20 #else
21 # if defined( HAVE_ALLOCA_H )
22 # include <alloca.h>
23 # endif
24 #endif
25
26
27 unsigned int CollChk_cirleftshift( unsigned int alpha, unsigned n );
CollChk_cirleftshift(unsigned int alpha,unsigned n)28 unsigned int CollChk_cirleftshift( unsigned int alpha, unsigned n )
29 {
30 /* Doing circular left shift of alpha by n bits */
31 unsigned int t1, t2;
32 t1 = alpha >> (sizeof(unsigned int)-n);
33 t2 = alpha << n;
34 return t1 | t2;
35 }
36
37 void CollChk_hash_add(const CollChk_hash_t *alpha,
38 const CollChk_hash_t *beta,
39 CollChk_hash_t *lamda);
CollChk_hash_add(const CollChk_hash_t * alpha,const CollChk_hash_t * beta,CollChk_hash_t * lamda)40 void CollChk_hash_add(const CollChk_hash_t *alpha,
41 const CollChk_hash_t *beta,
42 CollChk_hash_t *lamda)
43 {
44 lamda->value = (alpha->value)
45 ^ CollChk_cirleftshift(beta->value, alpha->count);
46 lamda->count = alpha->count + beta->count;
47 }
48
CollChk_hash_equal(const CollChk_hash_t * alpha,const CollChk_hash_t * beta)49 int CollChk_hash_equal(const CollChk_hash_t *alpha,
50 const CollChk_hash_t *beta)
51 {
52 return alpha->count == beta->count && alpha->value == beta->value;
53 }
54
55 #if defined( HAVE_LAM_FORTRAN_MPI_DATATYPE_IN_C )
56 #include "lam_f2c_dtype.h"
57 #endif
58
59 unsigned int CollChk_basic_value(MPI_Datatype type);
CollChk_basic_value(MPI_Datatype type)60 unsigned int CollChk_basic_value(MPI_Datatype type)
61 {
62 /*
63 MPI_Datatype's that return 0x0 are as if they are being
64 skipped/ignored in the comparison of any 2 MPI_Datatypes.
65 */
66 if ( type == MPI_DATATYPE_NULL || type == MPI_UB || type == MPI_LB )
67 return 0x0;
68 else if ( type == MPI_CHAR )
69 return 0x1;
70 #if defined( HAVE_MPI_SIGNED_CHAR )
71 else if ( type == MPI_SIGNED_CHAR )
72 return 0x3;
73 #endif
74 else if ( type == MPI_UNSIGNED_CHAR )
75 return 0x5;
76 else if ( type == MPI_BYTE )
77 return 0x7;
78 #if defined( HAVE_MPI_WCHAR )
79 else if ( type == MPI_WCHAR )
80 return 0x9;
81 #endif
82 else if ( type == MPI_SHORT )
83 return 0xb;
84 else if ( type == MPI_UNSIGNED_SHORT )
85 return 0xd;
86 else if ( type == MPI_INT )
87 return 0xf;
88 else if ( type == MPI_UNSIGNED )
89 return 0x11;
90 else if ( type == MPI_LONG )
91 return 0x13;
92 else if ( type == MPI_UNSIGNED_LONG )
93 return 0x15;
94 else if ( type == MPI_FLOAT )
95 return 0x17;
96 else if ( type == MPI_DOUBLE )
97 return 0x19;
98 else if ( type == MPI_LONG_DOUBLE )
99 return 0x1b;
100 else if ( type == MPI_LONG_LONG_INT )
101 return 0x1d;
102 /* else if ( type == MPI_LONG_LONG ) return 0x1f; */
103 #if defined( HAVE_MPI_UNSIGNED_LONG_LONG )
104 else if ( type == MPI_UNSIGNED_LONG_LONG )
105 return 0x21;
106 #endif
107
108 else if ( type == MPI_FLOAT_INT )
109 return 0x8; /* (0x17,1)@(0xf,1) */
110 else if ( type == MPI_DOUBLE_INT )
111 return 0x6; /* (0x19,1)@(0xf,1) */
112 else if ( type == MPI_LONG_INT )
113 return 0xc; /* (0x13,1)@(0xf,1) */
114 else if ( type == MPI_SHORT_INT )
115 return 0x14; /* (0xb,1)@(0xf,1) */
116 else if ( type == MPI_2INT )
117 return 0x10; /* (0xf,1)@(0xf,1) */
118 else if ( type == MPI_LONG_DOUBLE_INT )
119 return 0x4; /* (0x1b,1)@(0xf,1) */
120
121 #if defined( HAVE_FORTRAN_MPI_DATATYPE_IN_C )
122 else if ( type == MPI_COMPLEX )
123 return 0x101;
124 else if ( type == MPI_DOUBLE_COMPLEX )
125 return 0x103;
126 else if ( type == MPI_LOGICAL )
127 return 0x105;
128 else if ( type == MPI_REAL )
129 return 0x107;
130 else if ( type == MPI_DOUBLE_PRECISION )
131 return 0x109;
132 else if ( type == MPI_INTEGER )
133 return 0x10b;
134 else if ( type == MPI_CHARACTER )
135 return 0x10d;
136
137 else if ( type == MPI_2INTEGER )
138 return 0x33c; /* (0x10b,1)@(0x10b,1) */
139 else if ( type == MPI_2REAL )
140 return 0x329; /* (0x107,1)@(0x107,1) */
141 else if ( type == MPI_2DOUBLE_PRECISION )
142 return 0x33a; /* (0x109,1)@(0x109,1) */
143 #if defined( HAVE_FORTRAN_MPI_DATATYPE_2COMPLEX_IN_C )
144 else if ( type == MPI_2COMPLEX )
145 return 0x323; /* (0x101,1)@(0x101,1) */
146 else if ( type == MPI_2DOUBLE_COMPLEX )
147 return 0x325; /* (0x103,1)@(0x103,1) */
148 #endif
149
150 else if ( type == MPI_PACKED )
151 return 0x201;
152 #if defined( HAVE_FORTRAN_MPI_DATATYPE_INTEGERX_IN_C )
153 else if ( type == MPI_INTEGER1 )
154 return 0x211;
155 else if ( type == MPI_INTEGER2 )
156 return 0x213;
157 else if ( type == MPI_INTEGER4 )
158 return 0x215;
159 else if ( type == MPI_INTEGER8 )
160 return 0x217;
161 /* else if ( type == MPI_INTEGER16 ) return 0x219; */
162 #endif
163 #if defined( HAVE_FORTRAN_MPI_DATATYPE_REALX_IN_C )
164 else if ( type == MPI_REAL4 )
165 return 0x221;
166 else if ( type == MPI_REAL8 )
167 return 0x223;
168 /* else if ( type == MPI_REAL16 ) return 0x205; */
169 #endif
170 #if defined( HAVE_FORTRAN_MPI_DATATYPE_COMPLEXX_IN_C )
171 else if ( type == MPI_COMPLEX8 )
172 return 0x231;
173 else if ( type == MPI_COMPLEX16 )
174 return 0x233;
175 /* else if ( type == MPI_COMPLEX32 ) return 0x20b; */
176 #endif
177
178 #endif
179
180 else {
181 #if defined( HAVE_INT_MPI_DATATYPE )
182 fprintf( stderr, "CollChk_basic_value()) "
183 "Unknown basic MPI datatype %x.\n", type );
184 #elif defined( HAVE_PTR_MPI_DATATYPE )
185 fprintf( stderr, "CollChk_basic_value()) "
186 "Unknown basic MPI datatype %p.\n", type );
187 #else
188 fprintf( stderr, "CollChk_basic_value()) "
189 "Unknown basic MPI datatype.\n" );
190 #endif
191 fflush( stderr );
192 return 0;
193 }
194 }
195
196 unsigned int CollChk_basic_count(MPI_Datatype type);
CollChk_basic_count(MPI_Datatype type)197 unsigned int CollChk_basic_count(MPI_Datatype type)
198 {
199 /* MPI_Datatype's that return 0 are being skipped/ignored. */
200 if ( type == MPI_DATATYPE_NULL
201 || type == MPI_UB
202 || type == MPI_LB
203 ) return 0;
204
205 else if ( type == MPI_CHAR
206 #if defined( HAVE_MPI_SIGNED_CHAR )
207 || type == MPI_SIGNED_CHAR
208 #endif
209 || type == MPI_UNSIGNED_CHAR
210 || type == MPI_BYTE
211 #if defined( HAVE_MPI_WCHAR )
212 || type == MPI_WCHAR
213 #endif
214 || type == MPI_SHORT
215 || type == MPI_UNSIGNED_SHORT
216 || type == MPI_INT
217 || type == MPI_UNSIGNED
218 || type == MPI_LONG
219 || type == MPI_UNSIGNED_LONG
220 || type == MPI_FLOAT
221 || type == MPI_DOUBLE
222 || type == MPI_LONG_DOUBLE
223 || type == MPI_LONG_LONG_INT
224 /* || type == MPI_LONG_LONG */
225 #if defined( HAVE_MPI_UNSIGNED_LONG_LONG )
226 || type == MPI_UNSIGNED_LONG_LONG
227 #endif
228 ) return 1;
229
230 else if ( type == MPI_FLOAT_INT
231 || type == MPI_DOUBLE_INT
232 || type == MPI_LONG_INT
233 || type == MPI_SHORT_INT
234 || type == MPI_2INT
235 || type == MPI_LONG_DOUBLE_INT
236 ) return 2;
237
238 #if defined( HAVE_FORTRAN_MPI_DATATYPE_IN_C )
239 else if ( type == MPI_COMPLEX
240 || type == MPI_DOUBLE_COMPLEX
241 || type == MPI_LOGICAL
242 || type == MPI_REAL
243 || type == MPI_DOUBLE_PRECISION
244 || type == MPI_INTEGER
245 || type == MPI_CHARACTER
246 ) return 1;
247
248 else if ( type == MPI_2INTEGER
249 || type == MPI_2REAL
250 || type == MPI_2DOUBLE_PRECISION
251 #if defined( HAVE_FORTRAN_MPI_DATATYPE_2COMPLEX_IN_C )
252 || type == MPI_2COMPLEX
253 || type == MPI_2DOUBLE_COMPLEX
254 #endif
255 ) return 2;
256
257 else if ( type == MPI_PACKED
258 #if defined( HAVE_FORTRAN_MPI_DATATYPE_INTEGERX_IN_C )
259 || type == MPI_INTEGER1
260 || type == MPI_INTEGER2
261 || type == MPI_INTEGER4
262 || type == MPI_INTEGER8
263 /* || type == MPI_INTEGER16 */
264 #endif
265 #if defined( HAVE_FORTRAN_MPI_DATATYPE_REALX_IN_C )
266 || type == MPI_REAL4
267 || type == MPI_REAL8
268 /* || type == MPI_REAL16 */
269 #endif
270 #if defined( HAVE_FORTRAN_MPI_DATATYPE_COMPLEXX_IN_C )
271 || type == MPI_COMPLEX8
272 || type == MPI_COMPLEX16
273 /* || type == MPI_COMPLEX32 */
274 #endif
275 ) return 1;
276 #endif
277
278 else {
279 #if defined( HAVE_INT_MPI_DATATYPE )
280 fprintf( stderr, "CollChk_basic_count(): "
281 "Unknown basic MPI datatype %x.\n", type );
282 #elif defined( HAVE_PTR_MPI_DATATYPE )
283 fprintf( stderr, "CollChk_basic_count(): "
284 "Unknown basic MPI datatype %p.\n", type );
285 #else
286 fprintf( stderr, "CollChk_basic_count(): "
287 "Unknown basic MPI datatype.\n" );
288 #endif
289 fflush( stderr );
290 return 0;
291 }
292 }
293
294
295 int CollChk_derived_count(int idx, int *ints, int combiner);
CollChk_derived_count(int idx,int * ints,int combiner)296 int CollChk_derived_count(int idx, int *ints, int combiner)
297 {
298 int ii, tot_cnt;
299 #if defined( HAVE_RARE_MPI_COMBINERS )
300 int dim_A, dim_B;
301 #endif
302
303 tot_cnt = 0;
304 switch(combiner) {
305 #if defined( HAVE_RARE_MPI_COMBINERS )
306 case MPI_COMBINER_DUP :
307 case MPI_COMBINER_F90_REAL :
308 case MPI_COMBINER_F90_COMPLEX :
309 case MPI_COMBINER_F90_INTEGER :
310 case MPI_COMBINER_RESIZED :
311 return 1;
312 #endif
313
314 case MPI_COMBINER_CONTIGUOUS :
315 return ints[0];
316
317 #if defined( HAVE_RARE_MPI_COMBINERS )
318 case MPI_COMBINER_HVECTOR_INTEGER :
319 case MPI_COMBINER_INDEXED_BLOCK :
320 #endif
321 case MPI_COMBINER_VECTOR :
322 case MPI_COMBINER_HVECTOR :
323 return ints[0]*ints[1];
324
325 #if defined( HAVE_RARE_MPI_COMBINERS )
326 case MPI_COMBINER_HINDEXED_INTEGER :
327 #endif
328 case MPI_COMBINER_INDEXED :
329 case MPI_COMBINER_HINDEXED :
330 for ( ii = ints[0]; ii > 0; ii-- ) {
331 tot_cnt += ints[ ii ];
332 }
333 return tot_cnt;
334
335 #if defined( HAVE_RARE_MPI_COMBINERS )
336 case MPI_COMBINER_STRUCT_INTEGER :
337 #endif
338 case MPI_COMBINER_STRUCT :
339 return ints[idx+1];
340
341 #if defined( HAVE_RARE_MPI_COMBINERS )
342 case MPI_COMBINER_SUBARRAY :
343 dim_A = ints[ 0 ] + 1;
344 dim_B = 2 * ints[ 0 ];
345 for ( ii=dim_A; ii<=dim_B; ii++ ) {
346 tot_cnt += ints[ ii ];
347 }
348 return tot_cnt;
349 case MPI_COMBINER_DARRAY :
350 for ( ii=3; ii<=ints[2]+2; ii++ ) {
351 tot_cnt += ints[ ii ];
352 }
353 return tot_cnt;
354 #endif
355 }
356 return tot_cnt;
357 }
358
359
CollChk_dtype_hash(MPI_Datatype type,int cnt,CollChk_hash_t * type_hash)360 void CollChk_dtype_hash(MPI_Datatype type, int cnt, CollChk_hash_t *type_hash)
361 {
362 int nints, naddrs, ntypes, combiner;
363 int *ints;
364 MPI_Aint *addrs;
365 MPI_Datatype *types;
366 CollChk_hash_t curr_hash, next_hash;
367 int type_cnt;
368 int ii;
369
370 /* Don't know if this makes sense or not */
371 if ( cnt <= 0 ) {
372 /* (value,count)=(0,0) => skipping of this (type,cnt) in addition */
373 type_hash->value = 0;
374 type_hash->count = 0;
375 return;
376 }
377
378 MPI_Type_get_envelope(type, &nints, &naddrs, &ntypes, &combiner);
379 if (combiner != MPI_COMBINER_NAMED) {
380 #if ! defined( HAVE_ALLOCA )
381 ints = NULL;
382 if ( nints > 0 )
383 ints = (int *) malloc(nints * sizeof(int));
384 addrs = NULL;
385 if ( naddrs > 0 )
386 addrs = (MPI_Aint *) malloc(naddrs * sizeof(MPI_Aint));
387 types = NULL;
388 if ( ntypes > 0 )
389 types = (MPI_Datatype *) malloc(ntypes * sizeof(MPI_Datatype));
390 #else
391 ints = NULL;
392 if ( nints > 0 )
393 ints = (int *) alloca(nints * sizeof(int));
394 addrs = NULL;
395 if ( naddrs > 0 )
396 addrs = (MPI_Aint *) alloca(naddrs * sizeof(MPI_Aint));
397 types = NULL;
398 if ( ntypes > 0 )
399 types = (MPI_Datatype *) alloca(ntypes * sizeof(MPI_Datatype));
400 #endif
401
402 MPI_Type_get_contents(type, nints, naddrs, ntypes, ints, addrs, types);
403 type_cnt = CollChk_derived_count(0, ints, combiner);
404 CollChk_dtype_hash(types[0], type_cnt, &curr_hash);
405
406 /*
407 ntypes > 1 only for MPI_COMBINER_STRUCT(_INTEGER)
408 */
409 for( ii=1; ii < ntypes; ii++) {
410 type_cnt = CollChk_derived_count(ii, ints, combiner);
411 CollChk_dtype_hash(types[ii], type_cnt, &next_hash);
412 CollChk_hash_add(&curr_hash, &next_hash, &curr_hash);
413 }
414
415 #if ! defined( HAVE_ALLOCA )
416 if ( ints != NULL )
417 free( ints );
418 if ( addrs != NULL )
419 free( addrs );
420 if ( types != NULL )
421 free( types );
422 #endif
423 }
424 else {
425 curr_hash.value = CollChk_basic_value(type);
426 curr_hash.count = CollChk_basic_count(type);
427 }
428
429 type_hash->value = curr_hash.value;
430 type_hash->count = curr_hash.count;
431 for ( ii=1; ii < cnt; ii++ ) {
432 CollChk_hash_add(type_hash, &curr_hash, type_hash);
433 }
434 }
435
436 /*
437 A wrapper that calls PMPI_Allreduce() provides different send and receive
438 buffers so use of MPI_Allreduce() conforms to MPI-1 standard, section 2.2.
439 */
440 int CollChk_Allreduce_int( int ival, MPI_Op op, MPI_Comm comm );
CollChk_Allreduce_int(int ival,MPI_Op op,MPI_Comm comm)441 int CollChk_Allreduce_int( int ival, MPI_Op op, MPI_Comm comm )
442 {
443 int local_ival;
444 PMPI_Allreduce( &ival, &local_ival, 1, MPI_INT, op, comm );
445 return local_ival;
446 }
447
448 /*
449 Checking if (type,cnt) is the same in all processes within the communicator.
450 */
CollChk_dtype_bcast(MPI_Comm comm,MPI_Datatype type,int cnt,int root,char * call)451 int CollChk_dtype_bcast(MPI_Comm comm, MPI_Datatype type, int cnt, int root,
452 char* call)
453 {
454 #if 0
455 CollChk_hash_t local_hash; /* local hash value */
456 CollChk_hash_t root_hash; /* root's hash value */
457 char err_str[COLLCHK_STD_STRLEN];
458 int rank, size; /* rank, size */
459 int are_hashes_equal; /* go flag, ok flag */
460
461 /* get the rank and size */
462 MPI_Comm_rank(comm, &rank);
463 MPI_Comm_size(comm, &size);
464
465 /* get the hash values */
466 CollChk_dtype_hash(type, cnt, &local_hash);
467
468 if (rank == root) {
469 root_hash.value = local_hash.value;
470 root_hash.count = local_hash.count;
471 }
472 /* broadcast root's datatype hash to all other processes */
473 PMPI_Bcast(&root_hash, 2, MPI_UNSIGNED, root, comm);
474
475 /* Compare root's datatype hash to the local hash */
476 are_hashes_equal = CollChk_hash_equal( &local_hash, &root_hash );
477 if ( !are_hashes_equal )
478 sprintf(err_str, "Inconsistent datatype signatures detected "
479 "between rank %d and rank %d.\n", rank, root);
480 else
481 sprintf(err_str, COLLCHK_NO_ERROR_STR);
482
483 /* Find out if there is unequal hashes in the communicator */
484 are_hashes_equal = CollChk_Allreduce_int(are_hashes_equal, MPI_LAND, comm);
485
486 if ( !are_hashes_equal )
487 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
488
489 return MPI_SUCCESS;
490 #endif
491 #if defined( DEBUG )
492 fprintf( stdout, "CollChk_dtype_bcast()\n" );
493 #endif
494 return CollChk_dtype_scatter(comm, type, cnt, type, cnt, root, 1, call );
495 }
496
497
498 /*
499 The (sendtype,sendcnt) is assumed to be known in root process.
500 (recvtype,recvcnt) is known in every process. The routine checks if
501 (recvtype,recvcnt) on each process is the same as (sendtype,sendcnt)
502 on process root.
503 */
CollChk_dtype_scatter(MPI_Comm comm,MPI_Datatype sendtype,int sendcnt,MPI_Datatype recvtype,int recvcnt,int root,int are2buffs,char * call)504 int CollChk_dtype_scatter(MPI_Comm comm,
505 MPI_Datatype sendtype, int sendcnt,
506 MPI_Datatype recvtype, int recvcnt,
507 int root, int are2buffs, char *call)
508 {
509 CollChk_hash_t root_hash; /* root's hash value */
510 CollChk_hash_t recv_hash; /* local hash value */
511 char err_str[COLLCHK_STD_STRLEN];
512 int rank, size;
513 int are_hashes_equal;
514
515 #if defined( DEBUG )
516 fprintf( stdout, "CollChk_dtype_scatter()\n" );
517 #endif
518
519 /* get the rank and size */
520 MPI_Comm_rank(comm, &rank);
521 MPI_Comm_size(comm, &size);
522
523 /*
524 Scatter() only cares root's send datatype signature,
525 i.e. ignore not-root's send datatype signatyre
526 */
527 /* Set the root's hash value */
528 if (rank == root)
529 CollChk_dtype_hash(sendtype, sendcnt, &root_hash);
530
531 /* broadcast root's datatype hash to all other processes */
532 PMPI_Bcast(&root_hash, 2, MPI_UNSIGNED, root, comm);
533
534 /* Compare root_hash with the input/local hash */
535 if ( are2buffs ) {
536 CollChk_dtype_hash( recvtype, recvcnt, &recv_hash );
537 are_hashes_equal = CollChk_hash_equal( &root_hash, &recv_hash );
538 }
539 else
540 are_hashes_equal = 1;
541
542 if ( !are_hashes_equal )
543 sprintf(err_str, "Inconsistent datatype signatures detected "
544 "between rank %d and rank %d.\n", rank, root);
545 else
546 sprintf(err_str, COLLCHK_NO_ERROR_STR);
547
548 /* Find out if there is unequal hashes in the communicator */
549 are_hashes_equal = CollChk_Allreduce_int(are_hashes_equal, MPI_LAND, comm);
550
551 if ( !are_hashes_equal )
552 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
553
554 return MPI_SUCCESS;
555 }
556
557 /*
558 The vector of (sendtype,sendcnts[]) is assumed to be known in root process.
559 (recvtype,recvcnt) is known in every process. The routine checks if
560 (recvtype,recvcnt) on process P is the same as (sendtype,sendcnt[P])
561 on process root.
562 */
CollChk_dtype_scatterv(MPI_Comm comm,MPI_Datatype sendtype,const int * sendcnts,MPI_Datatype recvtype,int recvcnt,int root,int are2buffs,char * call)563 int CollChk_dtype_scatterv(MPI_Comm comm,
564 MPI_Datatype sendtype, const int *sendcnts,
565 MPI_Datatype recvtype, int recvcnt,
566 int root, int are2buffs, char *call)
567 {
568 CollChk_hash_t *hashes; /* hash array for (sendtype,sendcnts[]) */
569 CollChk_hash_t root_hash; /* root's hash value */
570 CollChk_hash_t recv_hash; /* local hash value */
571 char err_str[COLLCHK_STD_STRLEN];
572 int rank, size, idx;
573 int are_hashes_equal;
574
575 #if defined( DEBUG )
576 fprintf( stdout, "CollChk_dtype_scatterv()\n" );
577 #endif
578
579 /* get the rank and size */
580 MPI_Comm_rank(comm, &rank);
581 MPI_Comm_size(comm, &size);
582
583 /*
584 Scatter() only cares root's send datatype signature[],
585 i.e. ignore not-root's send datatype signatyre
586 */
587 hashes = NULL;
588 if ( rank == root ) {
589 /* Allocate hash buffer memory */
590 #if ! defined( HAVE_ALLOCA )
591 hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
592 #else
593 hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
594 #endif
595 for ( idx = 0; idx < size; idx++ )
596 CollChk_dtype_hash( sendtype, sendcnts[idx], &(hashes[idx]) );
597 }
598
599 /* Send the root's hash array to update other processes's root_hash */
600 PMPI_Scatter(hashes, 2, MPI_UNSIGNED, &root_hash, 2, MPI_UNSIGNED,
601 root, comm);
602
603 /* Compare root_hash with the input/local hash */
604 if ( are2buffs ) {
605 CollChk_dtype_hash( recvtype, recvcnt, &recv_hash );
606 are_hashes_equal = CollChk_hash_equal( &root_hash, &recv_hash );
607 }
608 else
609 are_hashes_equal = 1;
610
611 if ( !are_hashes_equal )
612 sprintf(err_str, "Inconsistent datatype signatures detected "
613 "between rank %d and rank %d.\n", rank, root);
614 else
615 sprintf(err_str, COLLCHK_NO_ERROR_STR);
616
617 /* Find out if there is unequal hashes in the communicator */
618 are_hashes_equal = CollChk_Allreduce_int(are_hashes_equal, MPI_LAND, comm);
619
620 #if ! defined( HAVE_ALLOCA )
621 if ( hashes != NULL )
622 free( hashes );
623 #endif
624
625 if ( !are_hashes_equal )
626 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
627
628 return MPI_SUCCESS;
629 }
630
631
632 /*
633 (sendtype,sendcnt) and (recvtype,recvcnt) are known in every process.
634 The routine checks if (recvtype,recvcnt) on local process is the same
635 as (sendtype,sendcnt) collected from all the other processes.
636 */
CollChk_dtype_allgather(MPI_Comm comm,MPI_Datatype sendtype,int sendcnt,MPI_Datatype recvtype,int recvcnt,int are2buffs,char * call)637 int CollChk_dtype_allgather(MPI_Comm comm,
638 MPI_Datatype sendtype, int sendcnt,
639 MPI_Datatype recvtype, int recvcnt,
640 int are2buffs, char *call)
641 {
642 CollChk_hash_t *hashes; /* hashes from other senders' */
643 CollChk_hash_t send_hash; /* local sender's hash value */
644 CollChk_hash_t recv_hash; /* local receiver's hash value */
645 char err_str[COLLCHK_STD_STRLEN];
646 char rank_str[COLLCHK_SM_STRLEN];
647 int *isOK2chks; /* boolean array, true:sendbuff=\=recvbuff */
648 int *err_ranks; /* array of ranks that have mismatch hashes */
649 int err_rank_size;
650 int err_str_sz, str_sz;
651 int rank, size, idx;
652
653 #if defined( DEBUG )
654 fprintf( stdout, "CollChk_dtype_allgather()\n" );
655 #endif
656
657 /* get the rank and size */
658 MPI_Comm_rank(comm, &rank);
659 MPI_Comm_size(comm, &size);
660
661 CollChk_dtype_hash( sendtype, sendcnt, &send_hash );
662 /* Allocate hash buffer memory */
663 #if ! defined( HAVE_ALLOCA )
664 hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
665 err_ranks = (int *) malloc( size * sizeof(int) );
666 isOK2chks = (int *) malloc( size * sizeof(int) );
667 #else
668 hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
669 err_ranks = (int *) alloca( size * sizeof(int) );
670 isOK2chks = (int *) alloca( size * sizeof(int) );
671 #endif
672
673 /* Gather other senders' datatype hashes as local hash arrary */
674 PMPI_Allgather(&send_hash, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
675 PMPI_Allgather(&are2buffs, 1, MPI_INT, isOK2chks, 1, MPI_INT, comm);
676
677 /* Compute the local datatype hash value */
678 CollChk_dtype_hash( recvtype, recvcnt, &recv_hash );
679
680 /* Compare the local datatype hash with other senders' datatype hashes */
681 /*
682 The checks are more exhaustive and redundant tests on all processes,
683 but matches what user expects
684 */
685 err_rank_size = 0;
686 for ( idx = 0; idx < size; idx++ ) {
687 if ( isOK2chks[idx] ) {
688 if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
689 err_ranks[ err_rank_size++ ] = idx;
690 }
691 }
692
693 if ( err_rank_size > 0 ) {
694 str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
695 "between local rank %d and remote ranks,",
696 rank);
697 /* all string size variables, *_sz, does not include NULL character */
698 err_str_sz = str_sz;
699 for ( idx = 0; idx < err_rank_size; idx++ ) {
700 str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
701 /* -3 is reserved for "..." */
702 if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
703 strcat(err_str, rank_str);
704 err_str_sz = strlen( err_str );
705 }
706 else {
707 strcat(err_str, "..." );
708 break;
709 }
710 }
711 }
712 else
713 sprintf(err_str, COLLCHK_NO_ERROR_STR);
714
715 /* Find out the total number of unequal hashes in the communicator */
716 err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
717
718 #if ! defined( HAVE_ALLOCA )
719 if ( hashes != NULL )
720 free( hashes );
721 if ( err_ranks != NULL )
722 free( err_ranks );
723 if ( isOK2chks != NULL )
724 free( isOK2chks );
725 #endif
726
727 if ( err_rank_size > 0 )
728 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
729
730 return MPI_SUCCESS;
731 }
732
733
734 /*
735 The vector of (recvtype,recvcnts[]) is assumed to be known locally.
736 The routine checks if (recvtype,recvcnts[]) on local process is the same as
737 (sendtype,sendcnts[]) collected from all the other processes.
738 */
CollChk_dtype_allgatherv(MPI_Comm comm,MPI_Datatype sendtype,int sendcnt,MPI_Datatype recvtype,const int * recvcnts,int are2buffs,char * call)739 int CollChk_dtype_allgatherv(MPI_Comm comm,
740 MPI_Datatype sendtype, int sendcnt,
741 MPI_Datatype recvtype, const int *recvcnts,
742 int are2buffs, char *call)
743 {
744 CollChk_hash_t *hashes; /* hash array for (sendtype,sendcnt) */
745 CollChk_hash_t send_hash; /* local sender's hash value */
746 CollChk_hash_t recv_hash; /* local receiver's hash value */
747 char err_str[COLLCHK_STD_STRLEN];
748 char rank_str[COLLCHK_SM_STRLEN];
749 int *isOK2chks; /* boolean array, true:sendbuff=\=recvbuff */
750 int *err_ranks; /* array of ranks that have mismatch hashes */
751 int err_rank_size;
752 int err_str_sz, str_sz;
753 int rank, size, idx;
754
755 #if defined( DEBUG )
756 fprintf( stdout, "CollChk_dtype_allgatherv()\n" );
757 #endif
758
759 /* get the rank and size */
760 MPI_Comm_rank(comm, &rank);
761 MPI_Comm_size(comm, &size);
762
763 /* Allocate hash buffer memory */
764 #if ! defined( HAVE_ALLOCA )
765 hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
766 err_ranks = (int *) malloc( size * sizeof(int) );
767 isOK2chks = (int *) malloc( size * sizeof(int) );
768 #else
769 hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
770 err_ranks = (int *) alloca( size * sizeof(int) );
771 isOK2chks = (int *) alloca( size * sizeof(int) );
772 #endif
773
774 CollChk_dtype_hash( sendtype, sendcnt, &send_hash );
775
776 /* Gather other senders' datatype hashes as local hash array */
777 PMPI_Allgather(&send_hash, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
778 PMPI_Allgather(&are2buffs, 1, MPI_INT, isOK2chks, 1, MPI_INT, comm);
779
780 /* Compare the local datatype hash with other senders' datatype hashes */
781 /*
782 The checks are more exhaustive and redundant tests on all processes,
783 but matches what user expects
784 */
785 err_rank_size = 0;
786 for ( idx = 0; idx < size; idx++ ) {
787 if ( isOK2chks[idx] ) {
788 CollChk_dtype_hash( recvtype, recvcnts[idx], &recv_hash );
789 if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
790 err_ranks[ err_rank_size++ ] = idx;
791 }
792 }
793
794 if ( err_rank_size > 0 ) {
795 str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
796 "between local rank %d and remote ranks,",
797 rank);
798 /* all string size variables, *_sz, does not include NULL character */
799 err_str_sz = str_sz;
800 for ( idx = 0; idx < err_rank_size; idx++ ) {
801 str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
802 /* -3 is reserved for "..." */
803 if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
804 strcat(err_str, rank_str);
805 err_str_sz = strlen( err_str );
806 }
807 else {
808 strcat(err_str, "..." );
809 break;
810 }
811 }
812 }
813 else
814 sprintf(err_str, COLLCHK_NO_ERROR_STR);
815
816 /* Find out the total number of unequal hashes in the communicator */
817 err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
818
819 #if ! defined( HAVE_ALLOCA )
820 if ( hashes != NULL )
821 free( hashes );
822 if ( err_ranks != NULL )
823 free( err_ranks );
824 if ( isOK2chks != NULL )
825 free( isOK2chks );
826 #endif
827
828 if ( err_rank_size > 0 )
829 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
830
831 return MPI_SUCCESS;
832 }
833
834
835 /*
836 The vector of (recvtype,recvcnts[]) is assumed to be known locally.
837 The routine checks if (recvtype,recvcnts[]) on local process is the same as
838 (sendtype,sendcnts[]) collected from all the other processes.
839 */
CollChk_dtype_alltoallv(MPI_Comm comm,MPI_Datatype sendtype,const int * sendcnts,MPI_Datatype recvtype,const int * recvcnts,char * call)840 int CollChk_dtype_alltoallv(MPI_Comm comm,
841 MPI_Datatype sendtype, const int *sendcnts,
842 MPI_Datatype recvtype, const int *recvcnts,
843 char *call)
844 {
845 CollChk_hash_t *send_hashes; /* hash array for (sendtype,sendcnt[]) */
846 CollChk_hash_t *hashes; /* hash array for (sendtype,sendcnt[]) */
847 CollChk_hash_t recv_hash; /* local receiver's hash value */
848 char err_str[COLLCHK_STD_STRLEN];
849 char rank_str[COLLCHK_SM_STRLEN];
850 int *err_ranks;
851 int err_rank_size;
852 int err_str_sz, str_sz;
853 int rank, size, idx;
854
855 #if defined( DEBUG )
856 fprintf( stdout, "CollChk_dtype_alltoallv()\n" );
857 #endif
858
859 /* get the rank and size */
860 MPI_Comm_rank(comm, &rank);
861 MPI_Comm_size(comm, &size);
862
863 /* Allocate hash buffer memory */
864 #if ! defined( HAVE_ALLOCA )
865 send_hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
866 hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
867 err_ranks = (int *) malloc( size * sizeof(int) );
868 #else
869 send_hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
870 hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
871 err_ranks = (int *) alloca( size * sizeof(int) );
872 #endif
873
874 for ( idx = 0; idx < size; idx++ )
875 CollChk_dtype_hash( sendtype, sendcnts[idx], &send_hashes[idx] );
876
877 /* Gather other senders' datatype hashes as local hash array */
878 PMPI_Alltoall(send_hashes, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
879
880 /* Compare the local datatype hash with other senders' datatype hashes */
881 /*
882 The checks are more exhaustive and redundant tests on all processes,
883 but matches what user expects
884 */
885 err_rank_size = 0;
886 for ( idx = 0; idx < size; idx++ ) {
887 CollChk_dtype_hash( recvtype, recvcnts[idx], &recv_hash );
888 if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
889 err_ranks[ err_rank_size++ ] = idx;
890 }
891
892 if ( err_rank_size > 0 ) {
893 str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
894 "between local rank %d and remote ranks,",
895 rank);
896 /* all string size variables, *_sz, does not include NULL character */
897 err_str_sz = str_sz;
898 for ( idx = 0; idx < err_rank_size; idx++ ) {
899 str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
900 /* -3 is reserved for "..." */
901 if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
902 strcat(err_str, rank_str);
903 err_str_sz = strlen( err_str );
904 }
905 else {
906 strcat(err_str, "..." );
907 break;
908 }
909 }
910 }
911 else
912 sprintf(err_str, COLLCHK_NO_ERROR_STR);
913
914 /* Find out the total number of unequal hashes in the communicator */
915 err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
916
917 #if ! defined( HAVE_ALLOCA )
918 if ( send_hashes != NULL )
919 free( send_hashes );
920 if ( hashes != NULL )
921 free( hashes );
922 if ( err_ranks != NULL )
923 free( err_ranks );
924 #endif
925
926 if ( err_rank_size > 0 )
927 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
928
929 return MPI_SUCCESS;
930 }
931
932
933 /*
934 The vector of (recvtypes[],recvcnts[]) is assumed to be known locally.
935 The routine checks if (recvtypes[],recvcnts[]) on local process is the same as
936 (sendtype[],sendcnts[]) collected from all the other processes.
937 */
CollChk_dtype_alltoallw(MPI_Comm comm,const MPI_Datatype * sendtypes,const int * sendcnts,const MPI_Datatype * recvtypes,const int * recvcnts,char * call)938 int CollChk_dtype_alltoallw(MPI_Comm comm,
939 const MPI_Datatype *sendtypes, const int *sendcnts,
940 const MPI_Datatype *recvtypes, const int *recvcnts,
941 char *call)
942 {
943 CollChk_hash_t *send_hashes; /* hash array for (sendtypes[],sendcnt[]) */
944 CollChk_hash_t *hashes; /* hash array for (sendtypes[],sendcnt[]) */
945 CollChk_hash_t recv_hash; /* local receiver's hash value */
946 char err_str[COLLCHK_STD_STRLEN];
947 char rank_str[COLLCHK_SM_STRLEN];
948 int *err_ranks;
949 int err_rank_size;
950 int err_str_sz, str_sz;
951 int rank, size, idx;
952
953 /* get the rank and size */
954 MPI_Comm_rank(comm, &rank);
955 MPI_Comm_size(comm, &size);
956
957 /* Allocate hash buffer memory */
958 #if ! defined( HAVE_ALLOCA )
959 send_hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
960 hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
961 err_ranks = (int *) malloc( size * sizeof(int) );
962 #else
963 send_hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
964 hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
965 err_ranks = (int *) alloca( size * sizeof(int) );
966 #endif
967
968 for ( idx = 0; idx < size; idx++ )
969 CollChk_dtype_hash( sendtypes[idx], sendcnts[idx], &send_hashes[idx] );
970
971 /* Gather other senders' datatype hashes as local hash array */
972 PMPI_Alltoall(send_hashes, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
973
974 /* Compare the local datatype hashes with other senders' datatype hashes */
975 /*
976 The checks are more exhaustive and redundant tests on all processes,
977 but matches what user expects
978 */
979 err_rank_size = 0;
980 for ( idx = 0; idx < size; idx++ ) {
981 CollChk_dtype_hash( recvtypes[idx], recvcnts[idx], &recv_hash );
982 if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
983 err_ranks[ err_rank_size++ ] = idx;
984 }
985
986 if ( err_rank_size > 0 ) {
987 str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
988 "between local rank %d and remote ranks,",
989 rank);
990 /* all string size variables, *_sz, does not include NULL character */
991 err_str_sz = str_sz;
992 for ( idx = 0; idx < err_rank_size; idx++ ) {
993 str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
994 /* -3 is reserved for "..." */
995 if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
996 strcat(err_str, rank_str);
997 err_str_sz = strlen( err_str );
998 }
999 else {
1000 strcat(err_str, "..." );
1001 break;
1002 }
1003 }
1004 }
1005 else
1006 sprintf(err_str, COLLCHK_NO_ERROR_STR);
1007
1008 /* Find out the total number of unequal hashes in the communicator */
1009 err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
1010
1011 #if ! defined( HAVE_ALLOCA )
1012 if ( send_hashes != NULL )
1013 free( send_hashes );
1014 if ( hashes != NULL )
1015 free( hashes );
1016 if ( err_ranks != NULL )
1017 free( err_ranks );
1018 #endif
1019
1020 if ( err_rank_size > 0 )
1021 return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
1022
1023 return MPI_SUCCESS;
1024 }
1025