1 /*
2  * Copyright (c) 2013-2017 The University of Tennessee and The University
3  *                         of Tennessee Research Foundation.  All rights
4  *                         reserved.
5  * Copyright (c) 2013-2016 Inria.  All rights reserved.
6  * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
7  * $COPYRIGHT$
8  *
9  * Additional copyrights may follow
10  *
11  * $HEADER$
12  */
13 
14 /*
15 pml monitoring tester.
16 
17 Designed by George Bosilca <bosilca@icl.utk.edu>,  Emmanuel Jeannot <emmanuel.jeannot@inria.fr> and
18 Clement Foyer <clement.foyer@inria.fr>
19 Contact the authors for questions.
20 
21 To be run as:
22 
23 mpirun -np 4 --mca pml_monitoring_enable 2 ./test_pvar_access
24 
25 Then, the output should be:
26 Flushing phase 1:
27 I       0       1       108 bytes       27 msgs sent
28 I       1       2       104 bytes       26 msgs sent
29 I       2       3       104 bytes       26 msgs sent
30 I       3       0       104 bytes       26 msgs sent
31 Flushing phase 2:
32 I       0       1       20 bytes        4 msgs sent
33 I       0       2       20528 bytes     9 msgs sent
34 I       1       0       20 bytes        4 msgs sent
35 I       1       2       104 bytes       26 msgs sent
36 I       1       3       236 bytes       56 msgs sent
37 I       2       0       20528 bytes     9 msgs sent
38 I       2       3       112 bytes       27 msgs sent
39 I       3       1       220 bytes       52 msgs sent
40 I       3       2       20 bytes        4 msgs sent
41 
42 */
43 
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <mpi.h>
47 
48 static MPI_T_pvar_handle count_handle;
49 static MPI_T_pvar_handle msize_handle;
50 static const char count_pvar_name[] = "pml_monitoring_messages_count";
51 static const char msize_pvar_name[] = "pml_monitoring_messages_size";
52 static int count_pvar_idx, msize_pvar_idx;
53 static int world_rank, world_size;
54 
print_vars(int rank,int size,size_t * msg_count,size_t * msg_size)55 static void print_vars(int rank, int size, size_t* msg_count, size_t*msg_size)
56 {
57     int i;
58     for(i = 0; i < size; ++i) {
59         if(0 != msg_size[i])
60             printf("I\t%d\t%d\t%zu bytes\t%zu msgs sent\n", rank, i, msg_size[i], msg_count[i]);
61     }
62 }
63 
main(int argc,char * argv[])64 int main(int argc, char* argv[])
65 {
66     int rank, size, n, to, from, tagno, MPIT_result, provided, count;
67     MPI_T_pvar_session session;
68     MPI_Status status;
69     MPI_Comm newcomm;
70     MPI_Request request;
71     size_t*msg_count_p1, *msg_size_p1;
72     size_t*msg_count_p2, *msg_size_p2;
73 
74     /* first phase : make a token circulated in MPI_COMM_WORLD */
75     n = -1;
76     MPI_Init(&argc, &argv);
77     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
78     MPI_Comm_size(MPI_COMM_WORLD, &size);
79     world_size = size;
80     world_rank = rank;
81     to = (rank + 1) % size;
82     from = (rank - 1) % size;
83     tagno = 201;
84 
85     MPIT_result = MPI_T_init_thread(MPI_THREAD_SINGLE, &provided);
86     if (MPIT_result != MPI_SUCCESS)
87         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
88 
89     /* Retrieve the pvar indices */
90     MPIT_result = MPI_T_pvar_get_index(count_pvar_name, MPI_T_PVAR_CLASS_SIZE, &count_pvar_idx);
91     if (MPIT_result != MPI_SUCCESS) {
92         printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n",
93                count_pvar_name);
94         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
95     }
96     MPIT_result = MPI_T_pvar_get_index(msize_pvar_name, MPI_T_PVAR_CLASS_SIZE, &msize_pvar_idx);
97     if (MPIT_result != MPI_SUCCESS) {
98         printf("cannot find monitoring MPI_T \"%s\" pvar, check that you have monitoring pml\n",
99                msize_pvar_name);
100         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
101     }
102 
103     /* Get session for pvar binding */
104     MPIT_result = MPI_T_pvar_session_create(&session);
105     if (MPIT_result != MPI_SUCCESS) {
106         printf("cannot create a session for \"%s\" and \"%s\" pvars\n",
107                count_pvar_name, msize_pvar_name);
108         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
109     }
110 
111     /* Allocating a new PVAR in a session will reset the counters */
112     MPIT_result = MPI_T_pvar_handle_alloc(session, count_pvar_idx,
113                                           MPI_COMM_WORLD, &count_handle, &count);
114     if (MPIT_result != MPI_SUCCESS) {
115         printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n",
116                count_pvar_name);
117         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
118     }
119     MPIT_result = MPI_T_pvar_handle_alloc(session, msize_pvar_idx,
120                                           MPI_COMM_WORLD, &msize_handle, &count);
121     if (MPIT_result != MPI_SUCCESS) {
122         printf("failed to allocate handle on \"%s\" pvar, check that you have monitoring pml\n",
123                msize_pvar_name);
124         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
125     }
126 
127     /* Allocate arrays to retrieve results */
128     msg_count_p1 = calloc(count * 4, sizeof(size_t));
129     msg_size_p1 = &msg_count_p1[count];
130     msg_count_p2 = &msg_count_p1[2*count];
131     msg_size_p2 = &msg_count_p1[3*count];
132 
133     /* Start pvar */
134     MPIT_result = MPI_T_pvar_start(session, count_handle);
135     if (MPIT_result != MPI_SUCCESS) {
136         printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
137                count_pvar_name);
138         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
139     }
140     MPIT_result = MPI_T_pvar_start(session, msize_handle);
141     if (MPIT_result != MPI_SUCCESS) {
142         printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
143                msize_pvar_name);
144         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
145     }
146 
147     if (rank == 0) {
148         n = 25;
149         MPI_Isend(&n,1,MPI_INT,to,tagno,MPI_COMM_WORLD,&request);
150     }
151     while (1) {
152         MPI_Irecv(&n, 1, MPI_INT, from, tagno, MPI_COMM_WORLD, &request);
153         MPI_Wait(&request, &status);
154         if (rank == 0) {n--;tagno++;}
155         MPI_Isend(&n, 1, MPI_INT, to, tagno, MPI_COMM_WORLD, &request);
156         if (rank != 0) {n--;tagno++;}
157         if (n<0){
158             break;
159         }
160     }
161 
162     /* Test stopping variable then get values */
163     MPIT_result = MPI_T_pvar_stop(session, count_handle);
164     if (MPIT_result != MPI_SUCCESS) {
165         printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
166                count_pvar_name);
167         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
168     }
169     MPIT_result = MPI_T_pvar_stop(session, msize_handle);
170     if (MPIT_result != MPI_SUCCESS) {
171         printf("failed to stop handle on \"%s\" pvar, check that you have monitoring pml\n",
172                msize_pvar_name);
173         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
174     }
175 
176     MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p1);
177     if (MPIT_result != MPI_SUCCESS) {
178         printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
179                count_pvar_name);
180         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
181     }
182     MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p1);
183     if (MPIT_result != MPI_SUCCESS) {
184         printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
185                msize_pvar_name);
186         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
187     }
188 
189     /* Circulate a token to proper display the results */
190     if(0 == world_rank) {
191         printf("Flushing phase 1:\n");
192         print_vars(world_rank, world_size, msg_count_p1, msg_size_p1);
193         MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
194         MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
195     } else {
196         MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
197         print_vars(world_rank, world_size, msg_count_p1, msg_size_p1);
198         MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
199     }
200 
201     /* Add to the phase 1 the display token ring message count */
202     MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p1);
203     if (MPIT_result != MPI_SUCCESS) {
204         printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
205                count_pvar_name);
206         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
207     }
208     MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p1);
209     if (MPIT_result != MPI_SUCCESS) {
210         printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
211                msize_pvar_name);
212         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
213     }
214 
215     /*
216       Second phase. Work with different communicators.
217       even ranks will circulate a token
218       while odd ranks will perform a all_to_all
219     */
220     MPIT_result = MPI_T_pvar_start(session, count_handle);
221     if (MPIT_result != MPI_SUCCESS) {
222         printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
223                count_pvar_name);
224         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
225     }
226     MPIT_result = MPI_T_pvar_start(session, msize_handle);
227     if (MPIT_result != MPI_SUCCESS) {
228         printf("failed to start handle on \"%s\" pvar, check that you have monitoring pml\n",
229                msize_pvar_name);
230         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
231     }
232 
233     MPI_Comm_split(MPI_COMM_WORLD, rank%2, rank, &newcomm);
234 
235     if(rank%2){ /*even ranks (in COMM_WORD) circulate a token*/
236         MPI_Comm_rank(newcomm, &rank);
237         MPI_Comm_size(newcomm, &size);
238         if( size > 1 ) {
239             to = (rank + 1) % size;
240             from = (rank - 1) % size;
241             tagno = 201;
242             if (rank == 0){
243                 n = 50;
244                 MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm);
245             }
246             while (1){
247                 MPI_Recv(&n, 1, MPI_INT, from, tagno, newcomm, &status);
248                 if (rank == 0) {n--; tagno++;}
249                 MPI_Send(&n, 1, MPI_INT, to, tagno, newcomm);
250                 if (rank != 0) {n--; tagno++;}
251                 if (n<0){
252                     break;
253                 }
254             }
255         }
256     } else { /*odd ranks (in COMM_WORD) will perform a all_to_all and a barrier*/
257         int send_buff[10240];
258         int recv_buff[10240];
259         MPI_Comm_rank(newcomm, &rank);
260         MPI_Comm_size(newcomm, &size);
261         MPI_Alltoall(send_buff, 10240/size, MPI_INT, recv_buff, 10240/size, MPI_INT, newcomm);
262         MPI_Comm_split(newcomm, rank%2, rank, &newcomm);
263         MPI_Barrier(newcomm);
264     }
265 
266     MPIT_result = MPI_T_pvar_read(session, count_handle, msg_count_p2);
267     if (MPIT_result != MPI_SUCCESS) {
268         printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
269                count_pvar_name);
270         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
271     }
272     MPIT_result = MPI_T_pvar_read(session, msize_handle, msg_size_p2);
273     if (MPIT_result != MPI_SUCCESS) {
274         printf("failed to fetch handle on \"%s\" pvar, check that you have monitoring pml\n",
275                msize_pvar_name);
276         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
277     }
278 
279     /* Taking only in account the second phase */
280     for(int i = 0; i < size; ++i) {
281         msg_count_p2[i] -= msg_count_p1[i];
282         msg_size_p2[i] -= msg_size_p1[i];
283     }
284 
285     /* Circulate a token to proper display the results */
286     if(0 == world_rank) {
287         printf("Flushing phase 2:\n");
288         print_vars(world_rank, world_size, msg_count_p2, msg_size_p2);
289         MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
290         MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
291     } else {
292         MPI_Recv(NULL, 0, MPI_BYTE, (world_rank - 1) % world_size, 300, MPI_COMM_WORLD, &status);
293         print_vars(world_rank, world_size, msg_count_p2, msg_size_p2);
294         MPI_Send(NULL, 0, MPI_BYTE, (world_rank + 1) % world_size, 300, MPI_COMM_WORLD);
295     }
296 
297     MPIT_result = MPI_T_pvar_handle_free(session, &count_handle);
298     if (MPIT_result != MPI_SUCCESS) {
299         printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n",
300                count_pvar_name);
301         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
302     }
303     MPIT_result = MPI_T_pvar_handle_free(session, &msize_handle);
304     if (MPIT_result != MPI_SUCCESS) {
305         printf("failed to free handle on \"%s\" pvar, check that you have monitoring pml\n",
306                msize_pvar_name);
307         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
308     }
309 
310     MPIT_result = MPI_T_pvar_session_free(&session);
311     if (MPIT_result != MPI_SUCCESS) {
312         printf("cannot close a session for \"%s\" and \"%s\" pvars\n",
313                count_pvar_name, msize_pvar_name);
314         MPI_Abort(MPI_COMM_WORLD, MPIT_result);
315     }
316 
317     (void)MPI_T_finalize();
318 
319     free(msg_count_p1);
320 
321     MPI_Finalize();
322     return EXIT_SUCCESS;
323 }
324