1 /*
2    (C) 2004 by Argonne National Laboratory.
3        See COPYRIGHT in top-level directory.
4 */
5 #include "collchk.h"
6 
7 /* AIX requires this to be the first thing in the file.  */
8 #ifndef __GNUC__
9 # if HAVE_ALLOCA_H
10 #  include <alloca.h>
11 # else
12 #  ifdef _AIX
13  #pragma alloca
14 #  else
15 #   ifndef alloca /* predefined by HP cc +Olibcalls */
16 char *alloca ();
17 #   endif
18 #  endif
19 # endif
20 #else
21 # if defined( HAVE_ALLOCA_H )
22 # include <alloca.h>
23 # endif
24 #endif
25 
26 
27 unsigned int CollChk_cirleftshift( unsigned int alpha, unsigned n );
CollChk_cirleftshift(unsigned int alpha,unsigned n)28 unsigned int CollChk_cirleftshift( unsigned int alpha, unsigned n )
29 {
30     /* Doing circular left shift of alpha by n bits */
31     unsigned int t1, t2;
32     t1 = alpha >> (sizeof(unsigned int)-n);
33     t2 = alpha << n;
34     return t1 | t2;
35 }
36 
37 void CollChk_hash_add(const CollChk_hash_t *alpha,
38                       const CollChk_hash_t *beta,
39                             CollChk_hash_t *lamda);
CollChk_hash_add(const CollChk_hash_t * alpha,const CollChk_hash_t * beta,CollChk_hash_t * lamda)40 void CollChk_hash_add(const CollChk_hash_t *alpha,
41                       const CollChk_hash_t *beta,
42                             CollChk_hash_t *lamda)
43 {
44     lamda->value = (alpha->value)
45                  ^ CollChk_cirleftshift(beta->value, alpha->count);
46     lamda->count = alpha->count + beta->count;
47 }
48 
CollChk_hash_equal(const CollChk_hash_t * alpha,const CollChk_hash_t * beta)49 int CollChk_hash_equal(const CollChk_hash_t *alpha,
50                        const CollChk_hash_t *beta)
51 {
52     return alpha->count == beta->count && alpha->value == beta->value;
53 }
54 
55 #if defined( HAVE_LAM_FORTRAN_MPI_DATATYPE_IN_C )
56 #include "lam_f2c_dtype.h"
57 #endif
58 
59 unsigned int CollChk_basic_value(MPI_Datatype type);
CollChk_basic_value(MPI_Datatype type)60 unsigned int CollChk_basic_value(MPI_Datatype type)
61 {
62     /*
63        MPI_Datatype's that return 0x0 are as if they are being
64        skipped/ignored in the comparison of any 2 MPI_Datatypes.
65     */
66     if ( type == MPI_DATATYPE_NULL || type == MPI_UB || type == MPI_LB )
67         return 0x0;
68     else if ( type == MPI_CHAR )
69         return 0x1;
70 #if defined( HAVE_MPI_SIGNED_CHAR )
71     else if ( type == MPI_SIGNED_CHAR )
72         return 0x3;
73 #endif
74     else if ( type == MPI_UNSIGNED_CHAR )
75         return 0x5;
76     else if ( type == MPI_BYTE )
77         return 0x7;
78 #if defined( HAVE_MPI_WCHAR )
79     else if ( type == MPI_WCHAR )
80         return 0x9;
81 #endif
82     else if ( type == MPI_SHORT )
83         return 0xb;
84     else if ( type == MPI_UNSIGNED_SHORT )
85         return 0xd;
86     else if ( type == MPI_INT )
87         return 0xf;
88     else if ( type == MPI_UNSIGNED )
89         return 0x11;
90     else if ( type == MPI_LONG )
91         return 0x13;
92     else if ( type == MPI_UNSIGNED_LONG )
93         return 0x15;
94     else if ( type == MPI_FLOAT )
95         return 0x17;
96     else if ( type == MPI_DOUBLE )
97         return 0x19;
98     else if ( type == MPI_LONG_DOUBLE )
99         return 0x1b;
100     else if ( type == MPI_LONG_LONG_INT )
101         return 0x1d;
102     /* else if ( type == MPI_LONG_LONG ) return 0x1f; */
103 #if defined( HAVE_MPI_UNSIGNED_LONG_LONG )
104     else if ( type == MPI_UNSIGNED_LONG_LONG )
105         return 0x21;
106 #endif
107 
108     else if ( type == MPI_FLOAT_INT )
109         return 0x8;       /* (0x17,1)@(0xf,1) */
110     else if ( type == MPI_DOUBLE_INT )
111         return 0x6;       /* (0x19,1)@(0xf,1) */
112     else if ( type == MPI_LONG_INT )
113         return 0xc;       /* (0x13,1)@(0xf,1) */
114     else if ( type == MPI_SHORT_INT )
115         return 0x14;      /* (0xb,1)@(0xf,1) */
116     else if ( type == MPI_2INT )
117         return 0x10;      /* (0xf,1)@(0xf,1) */
118     else if ( type == MPI_LONG_DOUBLE_INT )
119         return 0x4;       /* (0x1b,1)@(0xf,1) */
120 
121 #if defined( HAVE_FORTRAN_MPI_DATATYPE_IN_C )
122     else if ( type == MPI_COMPLEX )
123         return 0x101;
124     else if ( type == MPI_DOUBLE_COMPLEX )
125         return 0x103;
126     else if ( type == MPI_LOGICAL )
127         return 0x105;
128     else if ( type == MPI_REAL )
129         return 0x107;
130     else if ( type == MPI_DOUBLE_PRECISION )
131         return 0x109;
132     else if ( type == MPI_INTEGER )
133         return 0x10b;
134     else if ( type == MPI_CHARACTER )
135         return 0x10d;
136 
137     else if ( type == MPI_2INTEGER )
138         return 0x33c;      /* (0x10b,1)@(0x10b,1) */
139     else if ( type == MPI_2REAL )
140         return 0x329;      /* (0x107,1)@(0x107,1) */
141     else if ( type == MPI_2DOUBLE_PRECISION )
142         return 0x33a;      /* (0x109,1)@(0x109,1) */
143 #if defined( HAVE_FORTRAN_MPI_DATATYPE_2COMPLEX_IN_C )
144     else if ( type == MPI_2COMPLEX )
145         return 0x323;      /* (0x101,1)@(0x101,1) */
146     else if ( type == MPI_2DOUBLE_COMPLEX )
147         return 0x325;      /* (0x103,1)@(0x103,1) */
148 #endif
149 
150     else if ( type == MPI_PACKED )
151         return 0x201;
152 #if defined( HAVE_FORTRAN_MPI_DATATYPE_INTEGERX_IN_C )
153     else if ( type == MPI_INTEGER1 )
154         return 0x211;
155     else if ( type == MPI_INTEGER2 )
156         return 0x213;
157     else if ( type == MPI_INTEGER4 )
158         return 0x215;
159     else if ( type == MPI_INTEGER8 )
160         return 0x217;
161     /* else if ( type == MPI_INTEGER16 ) return 0x219; */
162 #endif
163 #if defined( HAVE_FORTRAN_MPI_DATATYPE_REALX_IN_C )
164     else if ( type == MPI_REAL4 )
165         return 0x221;
166     else if ( type == MPI_REAL8 )
167         return 0x223;
168     /* else if ( type == MPI_REAL16 ) return 0x205; */
169 #endif
170 #if defined( HAVE_FORTRAN_MPI_DATATYPE_COMPLEXX_IN_C )
171     else if ( type == MPI_COMPLEX8 )
172         return 0x231;
173     else if ( type == MPI_COMPLEX16 )
174         return 0x233;
175     /* else if ( type == MPI_COMPLEX32 ) return 0x20b; */
176 #endif
177 
178 #endif
179 
180     else {
181 #if defined( HAVE_INT_MPI_DATATYPE )
182         fprintf( stderr, "CollChk_basic_value()) "
183                          "Unknown basic MPI datatype %x.\n", type );
184 #elif defined( HAVE_PTR_MPI_DATATYPE )
185         fprintf( stderr, "CollChk_basic_value()) "
186                          "Unknown basic MPI datatype %p.\n", type );
187 #else
188         fprintf( stderr, "CollChk_basic_value()) "
189                          "Unknown basic MPI datatype.\n" );
190 #endif
191         fflush( stderr );
192         return 0;
193     }
194 }
195 
196 unsigned int CollChk_basic_count(MPI_Datatype type);
CollChk_basic_count(MPI_Datatype type)197 unsigned int CollChk_basic_count(MPI_Datatype type)
198 {
199     /* MPI_Datatype's that return 0 are being skipped/ignored. */
200     if (    type == MPI_DATATYPE_NULL
201          || type == MPI_UB
202          || type == MPI_LB
203     ) return 0;
204 
205     else if (    type == MPI_CHAR
206 #if defined( HAVE_MPI_SIGNED_CHAR )
207               || type == MPI_SIGNED_CHAR
208 #endif
209               || type == MPI_UNSIGNED_CHAR
210               || type == MPI_BYTE
211 #if defined( HAVE_MPI_WCHAR )
212               || type == MPI_WCHAR
213 #endif
214               || type == MPI_SHORT
215               || type == MPI_UNSIGNED_SHORT
216               || type == MPI_INT
217               || type == MPI_UNSIGNED
218               || type == MPI_LONG
219               || type == MPI_UNSIGNED_LONG
220               || type == MPI_FLOAT
221               || type == MPI_DOUBLE
222               || type == MPI_LONG_DOUBLE
223               || type == MPI_LONG_LONG_INT
224               /* || type == MPI_LONG_LONG */
225 #if defined( HAVE_MPI_UNSIGNED_LONG_LONG )
226               || type == MPI_UNSIGNED_LONG_LONG
227 #endif
228     ) return 1;
229 
230     else if (    type == MPI_FLOAT_INT
231               || type == MPI_DOUBLE_INT
232               || type == MPI_LONG_INT
233               || type == MPI_SHORT_INT
234               || type == MPI_2INT
235               || type == MPI_LONG_DOUBLE_INT
236     ) return 2;
237 
238 #if defined( HAVE_FORTRAN_MPI_DATATYPE_IN_C )
239     else if (    type == MPI_COMPLEX
240               || type == MPI_DOUBLE_COMPLEX
241               || type == MPI_LOGICAL
242               || type == MPI_REAL
243               || type == MPI_DOUBLE_PRECISION
244               || type == MPI_INTEGER
245               || type == MPI_CHARACTER
246     ) return 1;
247 
248     else if (    type == MPI_2INTEGER
249               || type == MPI_2REAL
250               || type == MPI_2DOUBLE_PRECISION
251 #if defined( HAVE_FORTRAN_MPI_DATATYPE_2COMPLEX_IN_C )
252               || type == MPI_2COMPLEX
253               || type == MPI_2DOUBLE_COMPLEX
254 #endif
255     ) return 2;
256 
257     else if (    type == MPI_PACKED
258 #if defined( HAVE_FORTRAN_MPI_DATATYPE_INTEGERX_IN_C )
259               || type == MPI_INTEGER1
260               || type == MPI_INTEGER2
261               || type == MPI_INTEGER4
262               || type == MPI_INTEGER8
263         /* || type == MPI_INTEGER16 */
264 #endif
265 #if defined( HAVE_FORTRAN_MPI_DATATYPE_REALX_IN_C )
266               || type == MPI_REAL4
267               || type == MPI_REAL8
268         /* || type == MPI_REAL16 */
269 #endif
270 #if defined( HAVE_FORTRAN_MPI_DATATYPE_COMPLEXX_IN_C )
271               || type == MPI_COMPLEX8
272               || type == MPI_COMPLEX16
273         /* || type == MPI_COMPLEX32 */
274 #endif
275     ) return 1;
276 #endif
277 
278     else {
279 #if defined( HAVE_INT_MPI_DATATYPE )
280         fprintf( stderr, "CollChk_basic_count(): "
281                          "Unknown basic MPI datatype %x.\n", type );
282 #elif defined( HAVE_PTR_MPI_DATATYPE )
283         fprintf( stderr, "CollChk_basic_count(): "
284                          "Unknown basic MPI datatype %p.\n", type );
285 #else
286         fprintf( stderr, "CollChk_basic_count(): "
287                          "Unknown basic MPI datatype.\n" );
288 #endif
289         fflush( stderr );
290         return 0;
291     }
292 }
293 
294 
295 int CollChk_derived_count(int idx, int *ints, int combiner);
CollChk_derived_count(int idx,int * ints,int combiner)296 int CollChk_derived_count(int idx, int *ints, int combiner)
297 {
298     int ii, tot_cnt;
299 #if defined( HAVE_RARE_MPI_COMBINERS )
300     int dim_A, dim_B;
301 #endif
302 
303     tot_cnt = 0;
304     switch(combiner) {
305 #if defined( HAVE_RARE_MPI_COMBINERS )
306         case MPI_COMBINER_DUP :
307         case MPI_COMBINER_F90_REAL :
308         case MPI_COMBINER_F90_COMPLEX :
309         case MPI_COMBINER_F90_INTEGER :
310         case MPI_COMBINER_RESIZED :
311             return 1;
312 #endif
313 
314         case MPI_COMBINER_CONTIGUOUS :
315             return ints[0];
316 
317 #if defined( HAVE_RARE_MPI_COMBINERS )
318         case MPI_COMBINER_HVECTOR_INTEGER :
319         case MPI_COMBINER_INDEXED_BLOCK :
320 #endif
321         case MPI_COMBINER_VECTOR :
322         case MPI_COMBINER_HVECTOR :
323             return ints[0]*ints[1];
324 
325 #if defined( HAVE_RARE_MPI_COMBINERS )
326         case MPI_COMBINER_HINDEXED_INTEGER :
327 #endif
328         case MPI_COMBINER_INDEXED :
329         case MPI_COMBINER_HINDEXED :
330             for ( ii = ints[0]; ii > 0; ii-- ) {
331                  tot_cnt += ints[ ii ];
332             }
333             return tot_cnt;
334 
335 #if defined( HAVE_RARE_MPI_COMBINERS )
336         case MPI_COMBINER_STRUCT_INTEGER :
337 #endif
338         case MPI_COMBINER_STRUCT :
339             return ints[idx+1];
340 
341 #if defined( HAVE_RARE_MPI_COMBINERS )
342         case MPI_COMBINER_SUBARRAY :
343             dim_A   = ints[ 0 ] + 1;
344             dim_B   = 2 * ints[ 0 ];
345             for ( ii=dim_A; ii<=dim_B; ii++ ) {
346                 tot_cnt += ints[ ii ];
347             }
348             return tot_cnt;
349         case MPI_COMBINER_DARRAY :
350             for ( ii=3; ii<=ints[2]+2; ii++ ) {
351                 tot_cnt += ints[ ii ];
352             }
353             return tot_cnt;
354 #endif
355     }
356     return tot_cnt;
357 }
358 
359 
CollChk_dtype_hash(MPI_Datatype type,int cnt,CollChk_hash_t * type_hash)360 void CollChk_dtype_hash(MPI_Datatype type, int cnt, CollChk_hash_t *type_hash)
361 {
362     int             nints, naddrs, ntypes, combiner;
363     int             *ints;
364     MPI_Aint        *addrs;
365     MPI_Datatype    *types;
366     CollChk_hash_t  curr_hash, next_hash;
367     int             type_cnt;
368     int             ii;
369 
370     /*  Don't know if this makes sense or not */
371     if ( cnt <= 0 ) {
372         /* (value,count)=(0,0) => skipping of this (type,cnt) in addition */
373         type_hash->value = 0;
374         type_hash->count = 0;
375         return;
376     }
377 
378     MPI_Type_get_envelope(type, &nints, &naddrs, &ntypes, &combiner);
379     if (combiner != MPI_COMBINER_NAMED) {
380 #if ! defined( HAVE_ALLOCA )
381         ints = NULL;
382         if ( nints > 0 )
383             ints = (int *) malloc(nints * sizeof(int));
384         addrs = NULL;
385         if ( naddrs > 0 )
386             addrs = (MPI_Aint *) malloc(naddrs * sizeof(MPI_Aint));
387         types = NULL;
388         if ( ntypes > 0 )
389             types = (MPI_Datatype *) malloc(ntypes * sizeof(MPI_Datatype));
390 #else
391         ints = NULL;
392         if ( nints > 0 )
393             ints = (int *) alloca(nints * sizeof(int));
394         addrs = NULL;
395         if ( naddrs > 0 )
396             addrs = (MPI_Aint *) alloca(naddrs * sizeof(MPI_Aint));
397         types = NULL;
398         if ( ntypes > 0 )
399             types = (MPI_Datatype *) alloca(ntypes * sizeof(MPI_Datatype));
400 #endif
401 
402         MPI_Type_get_contents(type, nints, naddrs, ntypes, ints, addrs, types);
403         type_cnt = CollChk_derived_count(0, ints, combiner);
404         CollChk_dtype_hash(types[0], type_cnt, &curr_hash);
405 
406         /*
407             ntypes > 1 only for MPI_COMBINER_STRUCT(_INTEGER)
408         */
409         for( ii=1; ii < ntypes; ii++) {
410             type_cnt = CollChk_derived_count(ii, ints, combiner);
411             CollChk_dtype_hash(types[ii], type_cnt, &next_hash);
412             CollChk_hash_add(&curr_hash, &next_hash, &curr_hash);
413         }
414 
415 #if ! defined( HAVE_ALLOCA )
416         if ( ints != NULL )
417             free( ints );
418         if ( addrs != NULL )
419             free( addrs );
420         if ( types != NULL )
421             free( types );
422 #endif
423     }
424     else {
425         curr_hash.value = CollChk_basic_value(type);
426         curr_hash.count = CollChk_basic_count(type);
427     }
428 
429     type_hash->value = curr_hash.value;
430     type_hash->count = curr_hash.count;
431     for ( ii=1; ii < cnt; ii++ ) {
432         CollChk_hash_add(type_hash, &curr_hash, type_hash);
433     }
434 }
435 
436 /*
437     A wrapper that calls PMPI_Allreduce() provides different send and receive
438     buffers so use of MPI_Allreduce() conforms to MPI-1 standard, section 2.2.
439 */
440 int CollChk_Allreduce_int( int ival, MPI_Op op, MPI_Comm comm );
CollChk_Allreduce_int(int ival,MPI_Op op,MPI_Comm comm)441 int CollChk_Allreduce_int( int ival, MPI_Op op, MPI_Comm comm )
442 {
443     int  local_ival;
444     PMPI_Allreduce( &ival, &local_ival, 1, MPI_INT, op, comm );
445     return local_ival;
446 }
447 
448 /*
449    Checking if (type,cnt) is the same in all processes within the communicator.
450 */
CollChk_dtype_bcast(MPI_Comm comm,MPI_Datatype type,int cnt,int root,char * call)451 int CollChk_dtype_bcast(MPI_Comm comm, MPI_Datatype type, int cnt, int root,
452                         char* call)
453 {
454 #if 0
455     CollChk_hash_t  local_hash;        /* local hash value */
456     CollChk_hash_t  root_hash;         /* root's hash value */
457     char            err_str[COLLCHK_STD_STRLEN];
458     int             rank, size;        /* rank, size */
459     int             are_hashes_equal;  /* go flag, ok flag */
460 
461     /* get the rank and size */
462     MPI_Comm_rank(comm, &rank);
463     MPI_Comm_size(comm, &size);
464 
465     /* get the hash values */
466     CollChk_dtype_hash(type, cnt, &local_hash);
467 
468     if (rank == root) {
469         root_hash.value = local_hash.value;
470         root_hash.count = local_hash.count;
471     }
472     /* broadcast root's datatype hash to all other processes */
473     PMPI_Bcast(&root_hash, 2, MPI_UNSIGNED, root, comm);
474 
475     /* Compare root's datatype hash to the local hash */
476     are_hashes_equal = CollChk_hash_equal( &local_hash, &root_hash );
477     if ( !are_hashes_equal )
478         sprintf(err_str, "Inconsistent datatype signatures detected "
479                          "between rank %d and rank %d.\n", rank, root);
480     else
481         sprintf(err_str, COLLCHK_NO_ERROR_STR);
482 
483     /* Find out if there is unequal hashes in the communicator */
484     are_hashes_equal = CollChk_Allreduce_int(are_hashes_equal, MPI_LAND, comm);
485 
486     if ( !are_hashes_equal )
487         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
488 
489     return MPI_SUCCESS;
490 #endif
491 #if defined( DEBUG )
492     fprintf( stdout, "CollChk_dtype_bcast()\n" );
493 #endif
494     return CollChk_dtype_scatter(comm, type, cnt, type, cnt, root, 1, call );
495 }
496 
497 
498 /*
499   The (sendtype,sendcnt) is assumed to be known in root process.
500   (recvtype,recvcnt) is known in every process.  The routine checks if
501   (recvtype,recvcnt) on each process is the same as (sendtype,sendcnt)
502   on process root.
503 */
CollChk_dtype_scatter(MPI_Comm comm,MPI_Datatype sendtype,int sendcnt,MPI_Datatype recvtype,int recvcnt,int root,int are2buffs,char * call)504 int CollChk_dtype_scatter(MPI_Comm comm,
505                           MPI_Datatype sendtype, int sendcnt,
506                           MPI_Datatype recvtype, int recvcnt,
507                           int root, int are2buffs, char *call)
508 {
509     CollChk_hash_t  root_hash;         /* root's hash value */
510     CollChk_hash_t  recv_hash;         /* local hash value */
511     char            err_str[COLLCHK_STD_STRLEN];
512     int             rank, size;
513     int             are_hashes_equal;
514 
515 #if defined( DEBUG )
516     fprintf( stdout, "CollChk_dtype_scatter()\n" );
517 #endif
518 
519     /* get the rank and size */
520     MPI_Comm_rank(comm, &rank);
521     MPI_Comm_size(comm, &size);
522 
523     /*
524        Scatter() only cares root's send datatype signature,
525        i.e. ignore not-root's send datatype signatyre
526     */
527     /* Set the root's hash value */
528     if (rank == root)
529         CollChk_dtype_hash(sendtype, sendcnt, &root_hash);
530 
531     /* broadcast root's datatype hash to all other processes */
532     PMPI_Bcast(&root_hash, 2, MPI_UNSIGNED, root, comm);
533 
534     /* Compare root_hash with the input/local hash */
535     if ( are2buffs ) {
536         CollChk_dtype_hash( recvtype, recvcnt, &recv_hash );
537         are_hashes_equal = CollChk_hash_equal( &root_hash, &recv_hash );
538     }
539     else
540         are_hashes_equal = 1;
541 
542     if ( !are_hashes_equal )
543         sprintf(err_str, "Inconsistent datatype signatures detected "
544                          "between rank %d and rank %d.\n", rank, root);
545     else
546         sprintf(err_str, COLLCHK_NO_ERROR_STR);
547 
548     /* Find out if there is unequal hashes in the communicator */
549     are_hashes_equal = CollChk_Allreduce_int(are_hashes_equal, MPI_LAND, comm);
550 
551     if ( !are_hashes_equal )
552         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
553 
554     return MPI_SUCCESS;
555 }
556 
557 /*
558   The vector of (sendtype,sendcnts[]) is assumed to be known in root process.
559   (recvtype,recvcnt) is known in every process.  The routine checks if
560   (recvtype,recvcnt) on process P is the same as (sendtype,sendcnt[P])
561   on process root.
562 */
CollChk_dtype_scatterv(MPI_Comm comm,MPI_Datatype sendtype,const int * sendcnts,MPI_Datatype recvtype,int recvcnt,int root,int are2buffs,char * call)563 int CollChk_dtype_scatterv(MPI_Comm comm,
564                            MPI_Datatype sendtype, const int *sendcnts,
565                            MPI_Datatype recvtype, int recvcnt,
566                            int root, int are2buffs, char *call)
567 {
568     CollChk_hash_t  *hashes;         /* hash array for (sendtype,sendcnts[]) */
569     CollChk_hash_t  root_hash;       /* root's hash value */
570     CollChk_hash_t  recv_hash;       /* local hash value */
571     char            err_str[COLLCHK_STD_STRLEN];
572     int             rank, size, idx;
573     int             are_hashes_equal;
574 
575 #if defined( DEBUG )
576     fprintf( stdout, "CollChk_dtype_scatterv()\n" );
577 #endif
578 
579     /* get the rank and size */
580     MPI_Comm_rank(comm, &rank);
581     MPI_Comm_size(comm, &size);
582 
583     /*
584        Scatter() only cares root's send datatype signature[],
585        i.e. ignore not-root's send datatype signatyre
586     */
587     hashes = NULL;
588     if ( rank == root ) {
589         /* Allocate hash buffer memory */
590 #if ! defined( HAVE_ALLOCA )
591         hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
592 #else
593         hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
594 #endif
595         for ( idx = 0; idx < size; idx++ )
596             CollChk_dtype_hash( sendtype, sendcnts[idx], &(hashes[idx]) );
597     }
598 
599     /* Send the root's hash array to update other processes's root_hash */
600     PMPI_Scatter(hashes, 2, MPI_UNSIGNED, &root_hash, 2, MPI_UNSIGNED,
601                  root, comm);
602 
603     /* Compare root_hash with the input/local hash */
604     if ( are2buffs ) {
605         CollChk_dtype_hash( recvtype, recvcnt, &recv_hash );
606         are_hashes_equal = CollChk_hash_equal( &root_hash, &recv_hash );
607     }
608     else
609         are_hashes_equal = 1;
610 
611     if ( !are_hashes_equal )
612         sprintf(err_str, "Inconsistent datatype signatures detected "
613                          "between rank %d and rank %d.\n", rank, root);
614     else
615         sprintf(err_str, COLLCHK_NO_ERROR_STR);
616 
617     /* Find out if there is unequal hashes in the communicator */
618     are_hashes_equal = CollChk_Allreduce_int(are_hashes_equal, MPI_LAND, comm);
619 
620 #if ! defined( HAVE_ALLOCA )
621     if ( hashes != NULL )
622         free( hashes );
623 #endif
624 
625     if ( !are_hashes_equal )
626         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
627 
628     return MPI_SUCCESS;
629 }
630 
631 
632 /*
633    (sendtype,sendcnt) and (recvtype,recvcnt) are known in every process.
634    The routine checks if (recvtype,recvcnt) on local process is the same
635    as (sendtype,sendcnt) collected from all the other processes.
636 */
CollChk_dtype_allgather(MPI_Comm comm,MPI_Datatype sendtype,int sendcnt,MPI_Datatype recvtype,int recvcnt,int are2buffs,char * call)637 int CollChk_dtype_allgather(MPI_Comm comm,
638                             MPI_Datatype sendtype, int sendcnt,
639                             MPI_Datatype recvtype, int recvcnt,
640                             int are2buffs, char *call)
641 {
642     CollChk_hash_t  *hashes;      /* hashes from other senders' */
643     CollChk_hash_t  send_hash;    /* local sender's hash value */
644     CollChk_hash_t  recv_hash;    /* local receiver's hash value */
645     char            err_str[COLLCHK_STD_STRLEN];
646     char            rank_str[COLLCHK_SM_STRLEN];
647     int             *isOK2chks;   /* boolean array, true:sendbuff=\=recvbuff */
648     int             *err_ranks;   /* array of ranks that have mismatch hashes */
649     int             err_rank_size;
650     int             err_str_sz, str_sz;
651     int             rank, size, idx;
652 
653 #if defined( DEBUG )
654     fprintf( stdout, "CollChk_dtype_allgather()\n" );
655 #endif
656 
657     /* get the rank and size */
658     MPI_Comm_rank(comm, &rank);
659     MPI_Comm_size(comm, &size);
660 
661     CollChk_dtype_hash( sendtype, sendcnt, &send_hash );
662     /* Allocate hash buffer memory */
663 #if ! defined( HAVE_ALLOCA )
664     hashes    = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
665     err_ranks = (int *) malloc( size * sizeof(int) );
666     isOK2chks = (int *) malloc( size * sizeof(int) );
667 #else
668     hashes    = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
669     err_ranks = (int *) alloca( size * sizeof(int) );
670     isOK2chks = (int *) alloca( size * sizeof(int) );
671 #endif
672 
673     /* Gather other senders' datatype hashes as local hash arrary */
674     PMPI_Allgather(&send_hash, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
675     PMPI_Allgather(&are2buffs, 1, MPI_INT, isOK2chks, 1, MPI_INT, comm);
676 
677     /* Compute the local datatype hash value */
678     CollChk_dtype_hash( recvtype, recvcnt, &recv_hash );
679 
680     /* Compare the local datatype hash with other senders' datatype hashes */
681     /*
682        The checks are more exhaustive and redundant tests on all processes,
683        but matches what user expects
684     */
685     err_rank_size = 0;
686     for ( idx = 0; idx < size; idx++ ) {
687         if ( isOK2chks[idx] ) {
688             if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
689                 err_ranks[ err_rank_size++ ] = idx;
690         }
691     }
692 
693     if ( err_rank_size > 0 ) {
694         str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
695                                   "between local rank %d and remote ranks,",
696                                   rank);
697         /* all string size variables, *_sz, does not include NULL character */
698         err_str_sz = str_sz;
699         for ( idx = 0; idx < err_rank_size; idx++ ) {
700             str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
701             /* -3 is reserved for "..." */
702             if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
703                 strcat(err_str, rank_str);
704                 err_str_sz = strlen( err_str );
705             }
706             else {
707                 strcat(err_str, "..." );
708                 break;
709             }
710         }
711     }
712     else
713         sprintf(err_str, COLLCHK_NO_ERROR_STR);
714 
715     /* Find out the total number of unequal hashes in the communicator */
716     err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
717 
718 #if ! defined( HAVE_ALLOCA )
719     if ( hashes != NULL )
720         free( hashes );
721     if ( err_ranks != NULL )
722         free( err_ranks );
723     if ( isOK2chks != NULL )
724         free( isOK2chks );
725 #endif
726 
727     if ( err_rank_size > 0 )
728         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
729 
730     return MPI_SUCCESS;
731 }
732 
733 
734 /*
735   The vector of (recvtype,recvcnts[]) is assumed to be known locally.
736   The routine checks if (recvtype,recvcnts[]) on local process is the same as
737   (sendtype,sendcnts[]) collected from all the other processes.
738 */
CollChk_dtype_allgatherv(MPI_Comm comm,MPI_Datatype sendtype,int sendcnt,MPI_Datatype recvtype,const int * recvcnts,int are2buffs,char * call)739 int CollChk_dtype_allgatherv(MPI_Comm comm,
740                              MPI_Datatype sendtype, int sendcnt,
741                              MPI_Datatype recvtype, const int *recvcnts,
742                              int are2buffs, char *call)
743 {
744     CollChk_hash_t  *hashes;      /* hash array for (sendtype,sendcnt) */
745     CollChk_hash_t  send_hash;    /* local sender's hash value */
746     CollChk_hash_t  recv_hash;    /* local receiver's hash value */
747     char            err_str[COLLCHK_STD_STRLEN];
748     char            rank_str[COLLCHK_SM_STRLEN];
749     int             *isOK2chks;   /* boolean array, true:sendbuff=\=recvbuff */
750     int             *err_ranks;   /* array of ranks that have mismatch hashes */
751     int             err_rank_size;
752     int             err_str_sz, str_sz;
753     int             rank, size, idx;
754 
755 #if defined( DEBUG )
756     fprintf( stdout, "CollChk_dtype_allgatherv()\n" );
757 #endif
758 
759     /* get the rank and size */
760     MPI_Comm_rank(comm, &rank);
761     MPI_Comm_size(comm, &size);
762 
763     /* Allocate hash buffer memory */
764 #if ! defined( HAVE_ALLOCA )
765     hashes    = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
766     err_ranks = (int *) malloc( size * sizeof(int) );
767     isOK2chks = (int *) malloc( size * sizeof(int) );
768 #else
769     hashes    = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
770     err_ranks = (int *) alloca( size * sizeof(int) );
771     isOK2chks = (int *) alloca( size * sizeof(int) );
772 #endif
773 
774     CollChk_dtype_hash( sendtype, sendcnt, &send_hash );
775 
776     /* Gather other senders' datatype hashes as local hash array */
777     PMPI_Allgather(&send_hash, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
778     PMPI_Allgather(&are2buffs, 1, MPI_INT, isOK2chks, 1, MPI_INT, comm);
779 
780     /* Compare the local datatype hash with other senders' datatype hashes */
781     /*
782        The checks are more exhaustive and redundant tests on all processes,
783        but matches what user expects
784     */
785     err_rank_size = 0;
786     for ( idx = 0; idx < size; idx++ ) {
787         if ( isOK2chks[idx] ) {
788             CollChk_dtype_hash( recvtype, recvcnts[idx], &recv_hash );
789             if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
790                 err_ranks[ err_rank_size++ ] = idx;
791         }
792     }
793 
794     if ( err_rank_size > 0 ) {
795         str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
796                                   "between local rank %d and remote ranks,",
797                                   rank);
798         /* all string size variables, *_sz, does not include NULL character */
799         err_str_sz = str_sz;
800         for ( idx = 0; idx < err_rank_size; idx++ ) {
801             str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
802             /* -3 is reserved for "..." */
803             if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
804                 strcat(err_str, rank_str);
805                 err_str_sz = strlen( err_str );
806             }
807             else {
808                 strcat(err_str, "..." );
809                 break;
810             }
811         }
812     }
813     else
814         sprintf(err_str, COLLCHK_NO_ERROR_STR);
815 
816     /* Find out the total number of unequal hashes in the communicator */
817     err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
818 
819 #if ! defined( HAVE_ALLOCA )
820     if ( hashes != NULL )
821         free( hashes );
822     if ( err_ranks != NULL )
823         free( err_ranks );
824     if ( isOK2chks != NULL )
825         free( isOK2chks );
826 #endif
827 
828     if ( err_rank_size > 0 )
829         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
830 
831     return MPI_SUCCESS;
832 }
833 
834 
835 /*
836   The vector of (recvtype,recvcnts[]) is assumed to be known locally.
837   The routine checks if (recvtype,recvcnts[]) on local process is the same as
838   (sendtype,sendcnts[]) collected from all the other processes.
839 */
CollChk_dtype_alltoallv(MPI_Comm comm,MPI_Datatype sendtype,const int * sendcnts,MPI_Datatype recvtype,const int * recvcnts,char * call)840 int CollChk_dtype_alltoallv(MPI_Comm comm,
841                             MPI_Datatype sendtype, const int *sendcnts,
842                             MPI_Datatype recvtype, const int *recvcnts,
843                             char *call)
844 {
845     CollChk_hash_t  *send_hashes;    /* hash array for (sendtype,sendcnt[]) */
846     CollChk_hash_t  *hashes;         /* hash array for (sendtype,sendcnt[]) */
847     CollChk_hash_t  recv_hash;       /* local receiver's hash value */
848     char            err_str[COLLCHK_STD_STRLEN];
849     char            rank_str[COLLCHK_SM_STRLEN];
850     int             *err_ranks;
851     int             err_rank_size;
852     int             err_str_sz, str_sz;
853     int             rank, size, idx;
854 
855 #if defined( DEBUG )
856     fprintf( stdout, "CollChk_dtype_alltoallv()\n" );
857 #endif
858 
859     /* get the rank and size */
860     MPI_Comm_rank(comm, &rank);
861     MPI_Comm_size(comm, &size);
862 
863     /* Allocate hash buffer memory */
864 #if ! defined( HAVE_ALLOCA )
865     send_hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
866     hashes      = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
867     err_ranks   = (int *) malloc( size * sizeof(int) );
868 #else
869     send_hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
870     hashes      = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
871     err_ranks   = (int *) alloca( size * sizeof(int) );
872 #endif
873 
874     for ( idx = 0; idx < size; idx++ )
875         CollChk_dtype_hash( sendtype, sendcnts[idx], &send_hashes[idx] );
876 
877     /* Gather other senders' datatype hashes as local hash array */
878     PMPI_Alltoall(send_hashes, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
879 
880     /* Compare the local datatype hash with other senders' datatype hashes */
881     /*
882        The checks are more exhaustive and redundant tests on all processes,
883        but matches what user expects
884     */
885     err_rank_size = 0;
886     for ( idx = 0; idx < size; idx++ ) {
887         CollChk_dtype_hash( recvtype, recvcnts[idx], &recv_hash );
888         if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
889             err_ranks[ err_rank_size++ ] = idx;
890     }
891 
892     if ( err_rank_size > 0 ) {
893         str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
894                                   "between local rank %d and remote ranks,",
895                                   rank);
896         /* all string size variables, *_sz, does not include NULL character */
897         err_str_sz = str_sz;
898         for ( idx = 0; idx < err_rank_size; idx++ ) {
899             str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
900             /* -3 is reserved for "..." */
901             if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
902                 strcat(err_str, rank_str);
903                 err_str_sz = strlen( err_str );
904             }
905             else {
906                 strcat(err_str, "..." );
907                 break;
908             }
909         }
910     }
911     else
912         sprintf(err_str, COLLCHK_NO_ERROR_STR);
913 
914     /* Find out the total number of unequal hashes in the communicator */
915     err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
916 
917 #if ! defined( HAVE_ALLOCA )
918     if ( send_hashes != NULL )
919         free( send_hashes );
920     if ( hashes != NULL )
921         free( hashes );
922     if ( err_ranks != NULL )
923         free( err_ranks );
924 #endif
925 
926     if ( err_rank_size > 0 )
927         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
928 
929     return MPI_SUCCESS;
930 }
931 
932 
933 /*
934   The vector of (recvtypes[],recvcnts[]) is assumed to be known locally.
935   The routine checks if (recvtypes[],recvcnts[]) on local process is the same as
936   (sendtype[],sendcnts[]) collected from all the other processes.
937 */
CollChk_dtype_alltoallw(MPI_Comm comm,const MPI_Datatype * sendtypes,const int * sendcnts,const MPI_Datatype * recvtypes,const int * recvcnts,char * call)938 int CollChk_dtype_alltoallw(MPI_Comm comm,
939                             const MPI_Datatype *sendtypes, const int *sendcnts,
940                             const MPI_Datatype *recvtypes, const int *recvcnts,
941                             char *call)
942 {
943     CollChk_hash_t  *send_hashes;  /* hash array for (sendtypes[],sendcnt[]) */
944     CollChk_hash_t  *hashes;       /* hash array for (sendtypes[],sendcnt[]) */
945     CollChk_hash_t  recv_hash;     /* local receiver's hash value */
946     char            err_str[COLLCHK_STD_STRLEN];
947     char            rank_str[COLLCHK_SM_STRLEN];
948     int             *err_ranks;
949     int             err_rank_size;
950     int             err_str_sz, str_sz;
951     int             rank, size, idx;
952 
953     /* get the rank and size */
954     MPI_Comm_rank(comm, &rank);
955     MPI_Comm_size(comm, &size);
956 
957     /* Allocate hash buffer memory */
958 #if ! defined( HAVE_ALLOCA )
959     send_hashes = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
960     hashes      = (CollChk_hash_t *) malloc( size * sizeof(CollChk_hash_t) );
961     err_ranks   = (int *) malloc( size * sizeof(int) );
962 #else
963     send_hashes = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
964     hashes      = (CollChk_hash_t *) alloca( size * sizeof(CollChk_hash_t) );
965     err_ranks   = (int *) alloca( size * sizeof(int) );
966 #endif
967 
968     for ( idx = 0; idx < size; idx++ )
969         CollChk_dtype_hash( sendtypes[idx], sendcnts[idx], &send_hashes[idx] );
970 
971     /* Gather other senders' datatype hashes as local hash array */
972     PMPI_Alltoall(send_hashes, 2, MPI_UNSIGNED, hashes, 2, MPI_UNSIGNED, comm);
973 
974     /* Compare the local datatype hashes with other senders' datatype hashes */
975     /*
976        The checks are more exhaustive and redundant tests on all processes,
977        but matches what user expects
978     */
979     err_rank_size = 0;
980     for ( idx = 0; idx < size; idx++ ) {
981         CollChk_dtype_hash( recvtypes[idx], recvcnts[idx], &recv_hash );
982         if ( ! CollChk_hash_equal( &recv_hash, &(hashes[idx]) ) )
983             err_ranks[ err_rank_size++ ] = idx;
984     }
985 
986     if ( err_rank_size > 0 ) {
987         str_sz = sprintf(err_str, "Inconsistent datatype signatures detected "
988                                   "between local rank %d and remote ranks,",
989                                   rank);
990         /* all string size variables, *_sz, does not include NULL character */
991         err_str_sz = str_sz;
992         for ( idx = 0; idx < err_rank_size; idx++ ) {
993             str_sz = sprintf(rank_str, " %d", err_ranks[idx] );
994             /* -3 is reserved for "..." */
995             if ( str_sz + err_str_sz < COLLCHK_STD_STRLEN-3 ) {
996                 strcat(err_str, rank_str);
997                 err_str_sz = strlen( err_str );
998             }
999             else {
1000                 strcat(err_str, "..." );
1001                 break;
1002             }
1003         }
1004     }
1005     else
1006         sprintf(err_str, COLLCHK_NO_ERROR_STR);
1007 
1008     /* Find out the total number of unequal hashes in the communicator */
1009     err_rank_size = CollChk_Allreduce_int(err_rank_size, MPI_SUM, comm);
1010 
1011 #if ! defined( HAVE_ALLOCA )
1012     if ( send_hashes != NULL )
1013         free( send_hashes );
1014     if ( hashes != NULL )
1015         free( hashes );
1016     if ( err_ranks != NULL )
1017         free( err_ranks );
1018 #endif
1019 
1020     if ( err_rank_size > 0 )
1021         return CollChk_err_han(err_str, COLLCHK_ERR_DTYPE, call, comm);
1022 
1023     return MPI_SUCCESS;
1024 }
1025