1 /*
2 * Copyright (c) 2013-2017 The University of Tennessee and The University
3 * of Tennessee Research Foundation. All rights
4 * reserved.
5 * Copyright (c) 2013-2016 Inria. All rights reserved.
6 * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
7 * $COPYRIGHT$
8 *
9 * Additional copyrights may follow
10 *
11 * $HEADER$
12 */
13
14 /*
15 pml monitoring tester.
16
17 Designed by George Bosilca <bosilca@icl.utk.edu>, Emmanuel Jeannot <emmanuel.jeannot@inria.fr> and
18 Clement Foyer <clement.foyer@inria.fr>
19 Contact the authors for questions.
20
21 To be run as:
22
23 mpirun -np 4 --mca pml_monitoring_enable 2 ./test_pvar_access
24
25 Then, the output should be:
26 Flushing phase 1:
27 I 0 1 108 bytes 27 msgs sent
28 I 1 2 104 bytes 26 msgs sent
29 I 2 3 104 bytes 26 msgs sent
30 I 3 0 104 bytes 26 msgs sent
31 Flushing phase 2:
32 I 0 1 20 bytes 4 msgs sent
33 I 0 2 20528 bytes 9 msgs sent
34 I 1 0 20 bytes 4 msgs sent
35 I 1 2 104 bytes 26 msgs sent
36 I 1 3 236 bytes 56 msgs sent
37 I 2 0 20528 bytes 9 msgs sent
38 I 2 3 112 bytes 27 msgs sent
39 I 3 1 220 bytes 52 msgs sent
40 I 3 2 20 bytes 4 msgs sent
41
42 */
43
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <mpi.h>
47
48 static MPI_T_pvar_handle count_handle;
49 static MPI_T_pvar_handle msize_handle;
50 static const char count_pvar_name[] = "pml_monitoring_messages_count";
51 static const char msize_pvar_name[] = "pml_monitoring_messages_size";
52 static int count_pvar_idx, msize_pvar_idx;
53 static int world_rank, world_size;
54
print_vars(int rank,int size,size_t * msg_count,size_t * msg_size)55 static void print_vars(int rank, int size, size_t* msg_count, size_t*msg_size)
56 {
57 int i;
58 for(i = 0; i < size; ++i) {
59 if(0 != msg_size[i])
60 printf("I\t%d\t%d\t%zu bytes\t%zu msgs sent\n", rank, i, msg_size[i], msg_count[i]);
61 }
62 }
63
main(int argc,char * argv[])64 int main(int argc, char* argv[])
65 {
66 int rank, size, n, to, from, tagno, MPIT_result, provided, count;
67 MPI_T_pvar_session session;
68 MPI_Status status;
69 MPI_Comm newcomm;
70 MPI_Request request;
71 size_t*msg_count_p1, *msg_size_p1;
72 size_t*msg_count_p2, *msg_size_p2;
73
74 /* first phase : make a token circulated in MPI_COMM_WORLD */
75 n = -1;
76 MPI_Init(&argc, &argv);
77 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
78 MPI_Comm_size(MPI_COMM_WORLD, &size);
79 world_size = size;
80 world_rank = rank;
81 to = (rank + 1) % size;
82 from = (rank - 1) % size;
83 tagno = 201;
84
85 MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided);
86 if (MPIT_result != MPI_SUCCESS)
87 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
88
89 /* Retrieve the pvar indices */
90 MPIT_result = MPI_T_pvar_get_index(count_pvar_name, MPI_T_PVAR_CLASS_SIZE, &count_pvar_idx);
91 if (MPIT_result != MPI_SUCCESS) {
92 printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n",
93 count_pvar_name);
94 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
95 }
96 MPIT_result = MPI_T_pvar_get_index(msize_pvar_name, MPI_T_PVAR_CLASS_SIZE, &msize_pvar_idx);
97 if (MPIT_result != MPI_SUCCESS) {
98 printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n",
99 msize_pvar_name);
100 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
101 }
102
103 /* Get session for pvar binding */
104 MPIT_result = MPI_T_pvar_session_create(&session);
105 if (MPIT_result != MPI_SUCCESS) {
106 printf("cannot create a session for \"%s\" and \"%s\" pvars\n",
107 count_pvar_name, msize_pvar_name);
108 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
109 }
110
111 /* Allocating a new PVAR in a session will reset the counters */
112 MPIT_result = MPI_T_pvar_handle_alloc(session, count_pvar_idx,
113 MPI_COMM_WORLD, &count_handle, &count);
114 if (MPIT_result != MPI_SUCCESS) {
115 printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n",
116 count_pvar_name);
117 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
118 }
119 MPIT_result = MPI_T_pvar_handle_alloc(session, msize_pvar_idx,
120 MPI_COMM_WORLD, &msize_handle, &count);
121 if (MPIT_result != MPI_SUCCESS) {
122 printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n",
123 msize_pvar_name);
124 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
125 }
126
127 /* Allocate arrays to retrieve results */
128 msg_count_p1 = calloc(count * 4, sizeof(size_t));
129 msg_size_p1 = &msg_count_p1[count];
130 msg_count_p2 = &msg_count_p1[2*count];
131 msg_size_p2 = &msg_count_p1[3*count];
132
133 /* Start pvar */
134 MPIT_result = MPI_T_pvar_start(session, count_handle);
135 if (MPIT_result != MPI_SUCCESS) {
136 printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
137 count_pvar_name);
138 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
139 }
140 MPIT_result = MPI_T_pvar_start(session, msize_handle);
141 if (MPIT_result != MPI_SUCCESS) {
142 printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
143 msize_pvar_name);
144 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
145 }
146
147 if (rank == 0) {
148 n = 25;
149 MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request);
150 }
151 while (1) {
152 MPI_Irecv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, &request);
153 MPI_Wait(&request, &status);
154 if (rank == 0) {n--;tagno++;}
155 MPI_Isend(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD, &request);
156 if (rank != 0) {n--;tagno++;}
157 if (n<0){
158 break;
159 }
160 }
161
162 /* Test stopping variable then get values */
163 MPIT_result = MPI_T_pvar_stop(session, count_handle);
164 if (MPIT_result != MPI_SUCCESS) {
165 printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
166 count_pvar_name);
167 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
168 }
169 MPIT_result = MPI_T_pvar_stop(session, msize_handle);
170 if (MPIT_result != MPI_SUCCESS) {
171 printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
172 msize_pvar_name);
173 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
174 }
175
176 MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p1);
177 if (MPIT_result != MPI_SUCCESS) {
178 printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
179 count_pvar_name);
180 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
181 }
182 MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p1);
183 if (MPIT_result != MPI_SUCCESS) {
184 printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
185 msize_pvar_name);
186 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
187 }
188
189 /* Circulate a token to proper display the results */
190 if(0 == world_rank) {
191 printf("Flushing phase 1:\n");
192 print_vars(world_rank, world_size, msg_count_p1, msg_size_p1);
193 MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
194 MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
195 } else {
196 MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
197 print_vars(world_rank, world_size, msg_count_p1, msg_size_p1);
198 MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
199 }
200
201 /* Add to the phase 1 the display token ring message count */
202 MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p1);
203 if (MPIT_result != MPI_SUCCESS) {
204 printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
205 count_pvar_name);
206 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
207 }
208 MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p1);
209 if (MPIT_result != MPI_SUCCESS) {
210 printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
211 msize_pvar_name);
212 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
213 }
214
215 /*
216 Second phase. Work with different communicators.
217 even ranks will circulate a token
218 while odd ranks will perform a all_to_all
219 */
220 MPIT_result = MPI_T_pvar_start(session, count_handle);
221 if (MPIT_result != MPI_SUCCESS) {
222 printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
223 count_pvar_name);
224 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
225 }
226 MPIT_result = MPI_T_pvar_start(session, msize_handle);
227 if (MPIT_result != MPI_SUCCESS) {
228 printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
229 msize_pvar_name);
230 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
231 }
232
233 MPI_Comm_split(MPI_COMM_WORLD, rank%2, rank, &newcomm);
234
235 if(rank%2){ /*even ranks (in COMM_WORD) circulate a token*/
236 MPI_Comm_rank(newcomm, &rank);
237 MPI_Comm_size(newcomm, &size);
238 if( size > 1 ) {
239 to = (rank + 1) % size;
240 from = (rank - 1) % size;
241 tagno = 201;
242 if (rank == 0){
243 n = 50;
244 MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm);
245 }
246 while (1){
247 MPI_Recv(&n, 1, MPI_INT, from, tagno, newcomm, &status);
248 if (rank == 0) {n--; tagno++;}
249 MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm);
250 if (rank != 0) {n--; tagno++;}
251 if (n<0){
252 break;
253 }
254 }
255 }
256 } else { /*odd ranks (in COMM_WORD) will perform a all_to_all and a barrier*/
257 int send_buff[10240];
258 int recv_buff[10240];
259 MPI_Comm_rank(newcomm, &rank);
260 MPI_Comm_size(newcomm, &size);
261 MPI_Alltoall(send_buff, 10240/size, MPI_INT, recv_buff, 10240/size, MPI_INT, newcomm);
262 MPI_Comm_split(newcomm, rank%2, rank, &newcomm);
263 MPI_Barrier(newcomm);
264 }
265
266 MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p2);
267 if (MPIT_result != MPI_SUCCESS) {
268 printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
269 count_pvar_name);
270 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
271 }
272 MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p2);
273 if (MPIT_result != MPI_SUCCESS) {
274 printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
275 msize_pvar_name);
276 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
277 }
278
279 /* Taking only in account the second phase */
280 for(int i = 0; i < size; ++i) {
281 msg_count_p2[i] -= msg_count_p1[i];
282 msg_size_p2[i] -= msg_size_p1[i];
283 }
284
285 /* Circulate a token to proper display the results */
286 if(0 == world_rank) {
287 printf("Flushing phase 2:\n");
288 print_vars(world_rank, world_size, msg_count_p2, msg_size_p2);
289 MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
290 MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
291 } else {
292 MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
293 print_vars(world_rank, world_size, msg_count_p2, msg_size_p2);
294 MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
295 }
296
297 MPIT_result = MPI_T_pvar_handle_free(session, &count_handle);
298 if (MPIT_result != MPI_SUCCESS) {
299 printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n",
300 count_pvar_name);
301 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
302 }
303 MPIT_result = MPI_T_pvar_handle_free(session, &msize_handle);
304 if (MPIT_result != MPI_SUCCESS) {
305 printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n",
306 msize_pvar_name);
307 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
308 }
309
310 MPIT_result = MPI_T_pvar_session_free(&session);
311 if (MPIT_result != MPI_SUCCESS) {
312 printf("cannot close a session for \"%s\" and \"%s\" pvars\n",
313 count_pvar_name, msize_pvar_name);
314 MPI_Abort(MPI_COMM_WORLD, MPIT_result);
315 }
316
317 (void)MPI_T_finalize();
318
319 free(msg_count_p1);
320
321 MPI_Finalize();
322 return EXIT_SUCCESS;
323 }
324