1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  *  (C) 2012 by Argonne National Laboratory.
4  *      See COPYRIGHT in top-level directory.
5  */
6 
7 /*
8  * Tests that the performance of a struct that contains a vector type
9  * exploits the vector type correctly
10  *
11  * If PACK_IS_NATIVE is defined, MPI_Pack stores exactly the same bytes as the
12  * user would pack manually; in that case, there is a consistency check.
13  */
14 
15 #include "mpi.h"
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #include "mpitestconf.h"
21 
22 #ifdef MPICH2
23 /* MPICH2 (as of 6/2012) packs the native bytes */
24 #define PACK_IS_NATIVE
25 #endif
26 
27 
28 static int verbose = 0;
29 
main(int argc,char ** argv)30 int main( int argc, char **argv )
31 {
32     int          vcount, vstride;
33     int32_t      counts[2];
34     int          v2stride, typesize, packsize, i, position, errs = 0;
35     double       *outbuf, *outbuf2;
36     double       *vsource;
37     MPI_Datatype vtype, stype;
38     MPI_Aint     lb, extent;
39     double       t0, t1;
40     double       tspack, tvpack, tmanual;
41     int          ntry;
42     int          blocklengths[2];
43     MPI_Aint     displacements[2];
44     MPI_Datatype typesArray[2];
45 
46     MPI_Init( &argc, &argv );
47 
48     /* Create a struct consisting of a two 32-bit ints, followed by a
49        vector of stride 3 but count 128k (less than a few MB of data area) */
50     vcount  = 128000;
51     vstride = 3;
52     MPI_Type_vector( vcount, 1, vstride, MPI_DOUBLE, &vtype );
53 
54     vsource = (double *)malloc( (vcount + 1) * (vstride + 1) * sizeof(double) );
55     if (!vsource) {
56 	fprintf( stderr, "Unable to allocate vsource\n" );
57 	MPI_Abort( MPI_COMM_WORLD, 1 );
58     }
59     for (i=0; i<vcount*vstride; i++) {
60 	vsource[i] = i;
61     }
62     blocklengths[0] = 2; MPI_Get_address( &counts[0], &displacements[0] );
63     blocklengths[1] = 1; MPI_Get_address( vsource, &displacements[1] );
64     if (verbose) {
65 	printf( "%p = %p?\n", vsource, (void *)displacements[1] );
66     }
67     typesArray[0] = MPI_INT32_T;
68     typesArray[1] = vtype;
69     MPI_Type_create_struct( 2, blocklengths, displacements, typesArray,
70 			    &stype );
71     MPI_Type_commit( &stype );
72     MPI_Type_commit( &vtype );
73 
74 #if defined(MPICH2) && defined(PRINT_DATATYPE_INTERNALS)
75     /* To use MPIDU_Datatype_debug to print the datatype internals,
76        you must configure MPICH2 with --enable-g=log */
77     if (verbose) {
78 	printf( "Original struct datatype:\n" );
79 	MPIDU_Datatype_debug( stype, 10 );
80     }
81 #endif
82 
83     MPI_Pack_size( 1, stype, MPI_COMM_WORLD, &packsize );
84     outbuf  = (double *)malloc( packsize );
85     outbuf2 = (double *)malloc( packsize );
86     if (!outbuf) {
87 	fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
88 	MPI_Abort( MPI_COMM_WORLD, 1 );
89     }
90     if (!outbuf2) {
91 	fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
92 	MPI_Abort( MPI_COMM_WORLD, 1 );
93     }
94     position = 0;
95     /* Warm up the code and data */
96     MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position,
97 	      MPI_COMM_WORLD );
98 
99     tspack = 1e12;
100     for (ntry = 0; ntry < 5; ntry++) {
101 	position = 0;
102 	t0 = MPI_Wtime();
103 	MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position,
104 		  MPI_COMM_WORLD );
105 	t1 = MPI_Wtime() - t0;
106 	if (t1 < tspack) tspack = t1;
107     }
108     MPI_Type_free( &stype );
109 
110     /* An equivalent packing, using the 2 ints and the vector separately */
111     tvpack = 1e12;
112     for (ntry = 0; ntry < 5; ntry++) {
113 	position = 0;
114 	t0 = MPI_Wtime();
115 	MPI_Pack( counts, 2, MPI_INT32_T, outbuf, packsize, &position,
116 		  MPI_COMM_WORLD );
117 	MPI_Pack( vsource, 1, vtype, outbuf, packsize, &position,
118 		  MPI_COMM_WORLD );
119 	t1 = MPI_Wtime() - t0;
120 	if (t1 < tvpack) tvpack = t1;
121     }
122     MPI_Type_free( &vtype );
123 
124     /* Note that we exploit the fact that the vector type contains vblock
125        instances of a contiguous type of size 24, or a single block of 24*vblock
126        bytes.
127     */
128     tmanual = 1e12;
129     for (ntry = 0; ntry < 5; ntry++) {
130 	const double * restrict ppe = (const double *)vsource;
131 	double * restrict ppo = outbuf2;
132 	int j;
133 	t0 = MPI_Wtime();
134 	position = 0;
135 	*(int32_t *)ppo          = counts[0];
136 	*( ((int32_t *)ppo) + 1) = counts[1];
137 	ppo++;
138 	/* Some hand optimization because this file is not normally
139 	   compiled with optimization by the test suite */
140 	j = vcount;
141 	while (j) {
142 	    *ppo++ = *ppe;
143 	    ppe += vstride;
144 	    *ppo++ = *ppe;
145 	    ppe += vstride;
146 	    *ppo++ = *ppe;
147 	    ppe += vstride;
148 	    *ppo++ = *ppe;
149 	    ppe += vstride;
150 	    j -= 4;
151 	}
152 	position += (1 + vcount);
153 	position *= sizeof(double);
154 	t1 = MPI_Wtime() - t0;
155 	if (t1 < tmanual) tmanual = t1;
156 
157 	/* Check on correctness */
158 #ifdef PACK_IS_NATIVE
159 	if (memcmp( outbuf, outbuf2, position ) != 0) {
160 	    printf( "Panic(manual) - pack buffers differ\n" );
161 	    for (j=0; j<8; j++) {
162 		printf( "%d: %llx\t%llx\n", j, (long long unsigned)outbuf[j],
163 			(long long unsigned)outbuf2[j] );
164 	    }
165 	}
166 #endif
167     }
168 
169     if (verbose) {
170 	printf( "Bytes packed = %d\n", position );
171 	printf( "MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
172 		tspack, tvpack, tmanual );
173     }
174 
175     if (4 * tmanual < tspack) {
176 	errs++;
177 	printf( "MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack, tmanual )
178 ;
179 	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
180 	printf( "For most informative results, be sure to compile this test with optimization\n" );
181     }
182     if (4 * tmanual < tvpack) {
183 	errs++;
184 	printf( "MPI_Pack using vector = %e, manual pack time = %e\n", tvpack,
185 		tmanual );
186 	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
187 	printf( "For most informative results, be sure to compile this test with optimization\n" );
188     }
189     if (4 * tvpack < tspack) {
190 	errs++;
191 	printf( "MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack );
192 	printf( "MPI_Pack time using vector should be about the same as the struct containing the vector\n" );
193 	printf( "For most informative results, be sure to compile this test with optimization\n" );
194     }
195 
196     if (errs) {
197         printf( " Found %d errors\n", errs );
198     }
199     else {
200         printf( " No Errors\n" );
201     }
202 
203     free( vsource );
204     free( outbuf );
205     free( outbuf2 );
206 
207     MPI_Finalize();
208     return 0;
209 }
210