1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3 * (C) 2012 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 */
6
7 /*
8 * Tests that the performance of a struct that contains a vector type
9 * exploits the vector type correctly
10 *
11 * If PACK_IS_NATIVE is defined, MPI_Pack stores exactly the same bytes as the
12 * user would pack manually; in that case, there is a consistency check.
13 */
14
15 #include "mpi.h"
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19
20 #include "mpitestconf.h"
21
22 #ifdef MPICH2
23 /* MPICH2 (as of 6/2012) packs the native bytes */
24 #define PACK_IS_NATIVE
25 #endif
26
27
28 static int verbose = 0;
29
main(int argc,char ** argv)30 int main( int argc, char **argv )
31 {
32 int vcount, vstride;
33 int32_t counts[2];
34 int v2stride, typesize, packsize, i, position, errs = 0;
35 double *outbuf, *outbuf2;
36 double *vsource;
37 MPI_Datatype vtype, stype;
38 MPI_Aint lb, extent;
39 double t0, t1;
40 double tspack, tvpack, tmanual;
41 int ntry;
42 int blocklengths[2];
43 MPI_Aint displacements[2];
44 MPI_Datatype typesArray[2];
45
46 MPI_Init( &argc, &argv );
47
48 /* Create a struct consisting of a two 32-bit ints, followed by a
49 vector of stride 3 but count 128k (less than a few MB of data area) */
50 vcount = 128000;
51 vstride = 3;
52 MPI_Type_vector( vcount, 1, vstride, MPI_DOUBLE, &vtype );
53
54 vsource = (double *)malloc( (vcount + 1) * (vstride + 1) * sizeof(double) );
55 if (!vsource) {
56 fprintf( stderr, "Unable to allocate vsource\n" );
57 MPI_Abort( MPI_COMM_WORLD, 1 );
58 }
59 for (i=0; i<vcount*vstride; i++) {
60 vsource[i] = i;
61 }
62 blocklengths[0] = 2; MPI_Get_address( &counts[0], &displacements[0] );
63 blocklengths[1] = 1; MPI_Get_address( vsource, &displacements[1] );
64 if (verbose) {
65 printf( "%p = %p?\n", vsource, (void *)displacements[1] );
66 }
67 typesArray[0] = MPI_INT32_T;
68 typesArray[1] = vtype;
69 MPI_Type_create_struct( 2, blocklengths, displacements, typesArray,
70 &stype );
71 MPI_Type_commit( &stype );
72 MPI_Type_commit( &vtype );
73
74 #if defined(MPICH2) && defined(PRINT_DATATYPE_INTERNALS)
75 /* To use MPIDU_Datatype_debug to print the datatype internals,
76 you must configure MPICH2 with --enable-g=log */
77 if (verbose) {
78 printf( "Original struct datatype:\n" );
79 MPIDU_Datatype_debug( stype, 10 );
80 }
81 #endif
82
83 MPI_Pack_size( 1, stype, MPI_COMM_WORLD, &packsize );
84 outbuf = (double *)malloc( packsize );
85 outbuf2 = (double *)malloc( packsize );
86 if (!outbuf) {
87 fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
88 MPI_Abort( MPI_COMM_WORLD, 1 );
89 }
90 if (!outbuf2) {
91 fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
92 MPI_Abort( MPI_COMM_WORLD, 1 );
93 }
94 position = 0;
95 /* Warm up the code and data */
96 MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position,
97 MPI_COMM_WORLD );
98
99 tspack = 1e12;
100 for (ntry = 0; ntry < 5; ntry++) {
101 position = 0;
102 t0 = MPI_Wtime();
103 MPI_Pack( MPI_BOTTOM, 1, stype, outbuf, packsize, &position,
104 MPI_COMM_WORLD );
105 t1 = MPI_Wtime() - t0;
106 if (t1 < tspack) tspack = t1;
107 }
108 MPI_Type_free( &stype );
109
110 /* An equivalent packing, using the 2 ints and the vector separately */
111 tvpack = 1e12;
112 for (ntry = 0; ntry < 5; ntry++) {
113 position = 0;
114 t0 = MPI_Wtime();
115 MPI_Pack( counts, 2, MPI_INT32_T, outbuf, packsize, &position,
116 MPI_COMM_WORLD );
117 MPI_Pack( vsource, 1, vtype, outbuf, packsize, &position,
118 MPI_COMM_WORLD );
119 t1 = MPI_Wtime() - t0;
120 if (t1 < tvpack) tvpack = t1;
121 }
122 MPI_Type_free( &vtype );
123
124 /* Note that we exploit the fact that the vector type contains vblock
125 instances of a contiguous type of size 24, or a single block of 24*vblock
126 bytes.
127 */
128 tmanual = 1e12;
129 for (ntry = 0; ntry < 5; ntry++) {
130 const double * restrict ppe = (const double *)vsource;
131 double * restrict ppo = outbuf2;
132 int j;
133 t0 = MPI_Wtime();
134 position = 0;
135 *(int32_t *)ppo = counts[0];
136 *( ((int32_t *)ppo) + 1) = counts[1];
137 ppo++;
138 /* Some hand optimization because this file is not normally
139 compiled with optimization by the test suite */
140 j = vcount;
141 while (j) {
142 *ppo++ = *ppe;
143 ppe += vstride;
144 *ppo++ = *ppe;
145 ppe += vstride;
146 *ppo++ = *ppe;
147 ppe += vstride;
148 *ppo++ = *ppe;
149 ppe += vstride;
150 j -= 4;
151 }
152 position += (1 + vcount);
153 position *= sizeof(double);
154 t1 = MPI_Wtime() - t0;
155 if (t1 < tmanual) tmanual = t1;
156
157 /* Check on correctness */
158 #ifdef PACK_IS_NATIVE
159 if (memcmp( outbuf, outbuf2, position ) != 0) {
160 printf( "Panic(manual) - pack buffers differ\n" );
161 for (j=0; j<8; j++) {
162 printf( "%d: %llx\t%llx\n", j, (long long unsigned)outbuf[j],
163 (long long unsigned)outbuf2[j] );
164 }
165 }
166 #endif
167 }
168
169 if (verbose) {
170 printf( "Bytes packed = %d\n", position );
171 printf( "MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
172 tspack, tvpack, tmanual );
173 }
174
175 if (4 * tmanual < tspack) {
176 errs++;
177 printf( "MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack, tmanual )
178 ;
179 printf( "MPI_Pack time should be less than 4 times the manual time\n" );
180 printf( "For most informative results, be sure to compile this test with optimization\n" );
181 }
182 if (4 * tmanual < tvpack) {
183 errs++;
184 printf( "MPI_Pack using vector = %e, manual pack time = %e\n", tvpack,
185 tmanual );
186 printf( "MPI_Pack time should be less than 4 times the manual time\n" );
187 printf( "For most informative results, be sure to compile this test with optimization\n" );
188 }
189 if (4 * tvpack < tspack) {
190 errs++;
191 printf( "MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack );
192 printf( "MPI_Pack time using vector should be about the same as the struct containing the vector\n" );
193 printf( "For most informative results, be sure to compile this test with optimization\n" );
194 }
195
196 if (errs) {
197 printf( " Found %d errors\n", errs );
198 }
199 else {
200 printf( " No Errors\n" );
201 }
202
203 free( vsource );
204 free( outbuf );
205 free( outbuf2 );
206
207 MPI_Finalize();
208 return 0;
209 }
210