1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3 *
4 * Copyright (C) 1997 University of Chicago.
5 * See COPYRIGHT notice in top-level directory.
6 */
7
8 #include <assert.h>
9 #include "adio.h"
10 #include "adio_extern.h"
11 #ifdef AGGREGATION_PROFILE
12 #include "mpe.h"
13 #endif
14
15 /*
16 #define DEBUG
17 #define DEBUG2
18 */
19
20 #define COUNT_EXCH 0
21 #define BLOCK_LENS 1
22 #define INDICES 2
23 #define FPIND_DISP_OFF_SZ 3
24
25
26 typedef struct {
27 int count;
28 ADIO_Offset fp_ind;
29 ADIO_Offset disp;
30 ADIO_Offset byte_off;
31 ADIO_Offset sz;
32 ADIO_Offset ext;
33 ADIO_Offset type_sz;
34 } amount_and_extra_data_t;
35
36 /* Debugging function to print out an ADIOI_Flatlist_node. */
ADIOI_Print_flatlist_node(ADIOI_Flatlist_node * flatlist_node_p)37 void ADIOI_Print_flatlist_node(ADIOI_Flatlist_node *flatlist_node_p)
38 {
39 int i;
40 if (flatlist_node_p == NULL)
41 {
42 fprintf(stderr, "print flatlist node of NULL ptr\n");
43 return;
44 }
45 fprintf(stderr, "print flatlist node count = %d (idx,blocklen)\n",
46 (int)flatlist_node_p->count);
47 for (i = 0; i < flatlist_node_p->count; i++)
48 {
49 if (i % 5 == 0 && i != 0)
50 {
51 fprintf(stderr, "%d=(%lld,%lld)\n", i, (long long)flatlist_node_p->indices[i],
52 (long long)flatlist_node_p->blocklens[i]);
53 }
54 else
55 fprintf(stderr, "%d=(%lld,%lld) ", i, (long long)flatlist_node_p->indices[i],
56 (long long)flatlist_node_p->blocklens[i]);
57 }
58 fprintf(stderr, "\n");
59 }
60
61 /* Since ADIOI_Flatten_datatype won't add a contig datatype to the
62 * ADIOI_Flatlist, we can force it to do so with this function. */
ADIOI_Add_contig_flattened(MPI_Datatype contig_type)63 ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type)
64 {
65 MPI_Count contig_type_sz = -1;
66 ADIOI_Flatlist_node *flat_node_p = ADIOI_Flatlist;
67
68 /* Add contig type to the end of the list if it doesn't already
69 * exist. */
70 while (flat_node_p->next)
71 {
72 if (flat_node_p->type == contig_type)
73 return flat_node_p;
74 flat_node_p = flat_node_p->next;
75 }
76 if (flat_node_p->type == contig_type)
77 return flat_node_p;
78
79 MPI_Type_size_x(contig_type, &contig_type_sz);
80 if ((flat_node_p->next = (ADIOI_Flatlist_node *) ADIOI_Malloc
81 (sizeof(ADIOI_Flatlist_node))) == NULL)
82 {
83 fprintf(stderr, "ADIOI_Add_contig_flattened: malloc next failed\n");
84 }
85 flat_node_p = flat_node_p->next;
86 flat_node_p->type = contig_type;
87 if ((flat_node_p->blocklens = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL)
88 {
89 fprintf(stderr, "ADIOI_Flatlist_node: malloc blocklens failed\n");
90 }
91 if ((flat_node_p->indices = (ADIO_Offset *)
92 ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL)
93 {
94 fprintf(stderr, "ADIOI_Flatlist_node: malloc indices failed\n");
95 }
96 flat_node_p->blocklens[0] = contig_type_sz;
97 flat_node_p->indices[0] = 0;
98 flat_node_p->count = 1;
99 flat_node_p->next = NULL;
100 return flat_node_p;
101 }
102
103 /* ADIOI_Exchange_file_views - Sends all the aggregators the file
104 * views and file view states of the clients. It fills in the
105 * client_file_view_state_arr for the aggregators and the
106 * my_mem_view_state for the client. It also initializes the
107 * agg_file_view_state for all clients, which is the view for each
108 * aggregator of a client's filetype. */
ADIOI_Exch_file_views(int myrank,int nprocs,int file_ptr_type,ADIO_File fd,int count,MPI_Datatype datatype,ADIO_Offset off,view_state * my_mem_view_state_arr,view_state * agg_file_view_state_arr,view_state * client_file_view_state_arr)109 void ADIOI_Exch_file_views(int myrank, int nprocs, int file_ptr_type,
110 ADIO_File fd, int count,
111 MPI_Datatype datatype, ADIO_Offset off,
112 view_state *my_mem_view_state_arr,
113 view_state *agg_file_view_state_arr,
114 view_state *client_file_view_state_arr)
115 {
116 /* Convert my own fileview to an ADIOI_Flattened type and a
117 * disp. MPI_Alltoall the count of ADIOI_Flatlist nodes.
118 * MPI_Isend/Irecv the block_lens, indices of ADIOI_Flatlist node
119 * to/from each of the aggregators with the rest of the file view
120 * state. */
121
122 int i = -1, j = -1;
123 amount_and_extra_data_t *send_count_arr = NULL;
124 amount_and_extra_data_t *recv_count_arr = NULL;
125 int send_req_arr_sz = 0;
126 int recv_req_arr_sz = 0;
127 MPI_Request *send_req_arr = NULL, *recv_req_arr = NULL;
128 MPI_Status *statuses = NULL;
129 ADIO_Offset disp_off_sz_ext_typesz[6];
130 MPI_Aint memtype_extent, filetype_extent, lb;
131 int ret = -1;
132
133 /* parameters for datatypes */
134 ADIOI_Flatlist_node *flat_mem_p = NULL, *flat_file_p = NULL;
135 MPI_Count memtype_sz = -1;
136 int memtype_is_contig = -1;
137 ADIO_Offset filetype_sz = -1;
138
139 #ifdef AGGREGATION_PROFILE
140 MPE_Log_event (5014, 0, NULL);
141 #endif
142 /* The memtype will be freed after the call. The filetype will be
143 * freed in the close and should have been flattened in the file
144 * view. */
145 MPI_Type_size_x(datatype, &memtype_sz);
146 MPI_Type_get_extent(datatype, &lb, &memtype_extent);
147 if (memtype_sz == memtype_extent) {
148 memtype_is_contig = 1;
149 flat_mem_p = ADIOI_Add_contig_flattened(datatype);
150 flat_mem_p->blocklens[0] = memtype_sz*count;
151 }
152 else {
153 flat_mem_p = ADIOI_Flatten_and_find(datatype);
154 }
155
156 MPI_Type_get_extent(fd->filetype, &lb, &filetype_extent);
157 MPI_Type_size_x(fd->filetype, &filetype_sz);
158 if (filetype_extent == filetype_sz) {
159 flat_file_p = ADIOI_Add_contig_flattened(fd->filetype);
160 flat_file_p->blocklens[0] = memtype_sz*count;
161 filetype_extent = memtype_sz*count;
162 filetype_sz = filetype_extent;
163 }
164 else {
165 flat_file_p = ADIOI_Flatlist;
166 while (flat_file_p->type != fd->filetype)
167 flat_file_p = flat_file_p->next;
168 }
169
170 disp_off_sz_ext_typesz[0] = fd->fp_ind;
171 disp_off_sz_ext_typesz[1] = fd->disp;
172 disp_off_sz_ext_typesz[2] = off;
173 disp_off_sz_ext_typesz[3] = memtype_sz*count;
174 disp_off_sz_ext_typesz[4] = (ADIO_Offset) filetype_extent;
175 disp_off_sz_ext_typesz[5] = (ADIO_Offset) filetype_sz;
176
177 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
178 recv_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
179 send_count_arr = ADIOI_Calloc(nprocs, sizeof(amount_and_extra_data_t));
180 } else {
181 send_count_arr = ADIOI_Calloc(fd->hints->cb_nodes,
182 sizeof(amount_and_extra_data_t));
183
184 /* only aggregators receive data */
185 if (fd->is_agg) {
186 recv_count_arr = ADIOI_Calloc(nprocs,
187 sizeof(amount_and_extra_data_t));
188 recv_req_arr = ADIOI_Malloc (nprocs * sizeof(MPI_Request));
189 for (i=0; i < nprocs; i++)
190 MPI_Irecv (&recv_count_arr[i], sizeof(amount_and_extra_data_t),
191 MPI_BYTE, i, COUNT_EXCH, fd->comm, &recv_req_arr[i]);
192 }
193
194 /* only send data to aggregators */
195 send_req_arr = ADIOI_Calloc (fd->hints->cb_nodes, sizeof(MPI_Request));
196 for (i=0; i < fd->hints->cb_nodes; i++) {
197 send_count_arr[i].count = flat_file_p->count;
198 send_count_arr[i].fp_ind = disp_off_sz_ext_typesz[0];
199 send_count_arr[i].disp = disp_off_sz_ext_typesz[1];
200 send_count_arr[i].byte_off = disp_off_sz_ext_typesz[2];
201 send_count_arr[i].sz = disp_off_sz_ext_typesz[3];
202 send_count_arr[i].ext = disp_off_sz_ext_typesz[4];
203 send_count_arr[i].type_sz = disp_off_sz_ext_typesz[5];
204 MPI_Isend (&send_count_arr[i], sizeof(amount_and_extra_data_t),
205 MPI_BYTE, fd->hints->ranklist[i], COUNT_EXCH, fd->comm,
206 &send_req_arr[i]);
207 }
208 }
209
210
211 /* Every client has to build mem and file view_states for each aggregator.
212 * We initialize their values here. and we also initialize
213 * send_count_arr */
214
215 if (memtype_is_contig) {
216 /* if memory is contigous, we now replace memtype_sz and
217 * memtype_extent with the full access size */
218 memtype_sz *= count;
219 memtype_extent = memtype_sz;
220 }
221
222 for (i = 0; i < fd->hints->cb_nodes; i++)
223 {
224 int tmp_agg_idx = fd->hints->ranklist[i];
225 memset(&(my_mem_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
226 my_mem_view_state_arr[tmp_agg_idx].sz =
227 disp_off_sz_ext_typesz[3];
228 my_mem_view_state_arr[tmp_agg_idx].ext =
229 (ADIO_Offset) memtype_extent;
230 my_mem_view_state_arr[tmp_agg_idx].type_sz =
231 (ADIO_Offset) memtype_sz;
232 my_mem_view_state_arr[tmp_agg_idx].flat_type_p = flat_mem_p;
233 ADIOI_init_view_state(file_ptr_type,
234 1,
235 &(my_mem_view_state_arr[tmp_agg_idx]),
236 TEMP_OFF);
237 ADIOI_init_view_state(file_ptr_type,
238 1,
239 &(my_mem_view_state_arr[tmp_agg_idx]),
240 REAL_OFF);
241
242 memset(&(agg_file_view_state_arr[tmp_agg_idx]), 0, sizeof(view_state));
243 agg_file_view_state_arr[tmp_agg_idx].fp_ind =
244 disp_off_sz_ext_typesz[0];
245 agg_file_view_state_arr[tmp_agg_idx].disp =
246 disp_off_sz_ext_typesz[1];
247 agg_file_view_state_arr[tmp_agg_idx].byte_off =
248 disp_off_sz_ext_typesz[2];
249 agg_file_view_state_arr[tmp_agg_idx].sz =
250 disp_off_sz_ext_typesz[3];
251 agg_file_view_state_arr[tmp_agg_idx].ext =
252 disp_off_sz_ext_typesz[4];
253 agg_file_view_state_arr[tmp_agg_idx].type_sz =
254 disp_off_sz_ext_typesz[5];
255 agg_file_view_state_arr[tmp_agg_idx].flat_type_p = flat_file_p;
256
257 ADIOI_init_view_state(file_ptr_type,
258 1,
259 &(agg_file_view_state_arr[tmp_agg_idx]),
260 TEMP_OFF);
261 ADIOI_init_view_state(file_ptr_type,
262 1,
263 &(agg_file_view_state_arr[tmp_agg_idx]),
264 REAL_OFF);
265
266 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
267 send_count_arr[tmp_agg_idx].count = flat_file_p->count;
268 send_count_arr[tmp_agg_idx].fp_ind = disp_off_sz_ext_typesz[0];
269 send_count_arr[tmp_agg_idx].disp = disp_off_sz_ext_typesz[1];
270 send_count_arr[tmp_agg_idx].byte_off = disp_off_sz_ext_typesz[2];
271 send_count_arr[tmp_agg_idx].sz = disp_off_sz_ext_typesz[3];
272 send_count_arr[tmp_agg_idx].ext = disp_off_sz_ext_typesz[4];
273 send_count_arr[tmp_agg_idx].type_sz = disp_off_sz_ext_typesz[5];
274 }
275 }
276
277 #ifdef DEBUG2
278 fprintf(stderr, "my own flattened memtype: ");
279 ADIOI_Print_flatlist_node(flat_mem_p);
280 fprintf(stderr, "my own flattened filetype: ");
281 ADIOI_Print_flatlist_node(flat_file_p);
282 #endif
283
284 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
285 ret = MPI_Alltoall(send_count_arr, sizeof(amount_and_extra_data_t),
286 MPI_BYTE,
287 recv_count_arr, sizeof(amount_and_extra_data_t),
288 MPI_BYTE, fd->comm);
289 if (ret != MPI_SUCCESS)
290 {
291 fprintf(stderr, "ADIOI_Exchange_file_views: MPI_Alltoall failed "
292 "with error %d", ret);
293 return;
294 }
295 } else {
296 statuses = (MPI_Status *) ADIOI_Malloc(1 + nprocs * sizeof(MPI_Status));
297 if (fd->is_agg) {
298 MPI_Waitall(nprocs, recv_req_arr, statuses);
299 ADIOI_Free(recv_req_arr);
300 }
301 MPI_Waitall(fd->hints->cb_nodes, send_req_arr, statuses);
302 ADIOI_Free(statuses);
303 ADIOI_Free(send_req_arr);
304 }
305 #ifdef DEBUG2
306 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
307 fprintf(stderr, "send_count_arr:");
308 for (i = 0; i < nprocs; i++)
309 {
310 fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
311 }
312 fprintf(stderr, "\n");
313 fprintf(stderr, "recv_count_arr:");
314 for (i = 0; i < nprocs; i++)
315 {
316 fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
317 }
318 fprintf(stderr, "\n");
319 } else {
320 fprintf(stderr, "send_count_arr:");
321 for (i = 0; i < fd->hints->cb_nodes; i++)
322 {
323 fprintf(stderr, "[%d]=%d ", i, send_count_arr[i].count);
324 }
325 fprintf(stderr, "\n");
326 if (fd->is_agg) {
327 fprintf(stderr, "recv_count_arr:");
328 for (i = 0; i < nprocs; i++)
329 {
330 fprintf(stderr, "[%d]=%d ", i, recv_count_arr[i].count);
331 }
332 fprintf(stderr, "\n");
333 }
334 }
335 #endif
336
337 if (fd->hints->cb_alltoall == ADIOI_HINT_DISABLE) {
338 for (i=0; i < fd->hints->cb_nodes; i++)
339 if (send_count_arr[i].count > 0)
340 send_req_arr_sz++;
341 }
342 /* Figure out how many counts to send/recv */
343 for (i = 0; i < nprocs; i++)
344 {
345 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
346 if (send_count_arr[i].count > 0)
347 send_req_arr_sz++;
348 }
349 /* Only aggregators should recv*/
350 if (fd->is_agg) {
351 if (recv_count_arr[i].count > 0)
352 {
353 if ((client_file_view_state_arr[i].flat_type_p =
354 (ADIOI_Flatlist_node *) ADIOI_Malloc(
355 sizeof(ADIOI_Flatlist_node))) == NULL)
356 {
357 fprintf(stderr, "ADIOI_Exchange_file_views: malloc "
358 "flat_type_p failed\n");
359 }
360 client_file_view_state_arr[i].flat_type_p->count =
361 recv_count_arr[i].count;
362 client_file_view_state_arr[i].flat_type_p->indices =
363 (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count,
364 sizeof(ADIO_Offset));
365 client_file_view_state_arr[i].flat_type_p->blocklens =
366 (ADIO_Offset *) ADIOI_Calloc(recv_count_arr[i].count,
367 sizeof(ADIO_Offset));
368
369 /* Copy the extra data out of the stuff we Alltoall'd */
370 memcpy (&client_file_view_state_arr[i].fp_ind,
371 &recv_count_arr[i].fp_ind,
372 6*sizeof(ADIO_Offset));
373
374 recv_req_arr_sz++;
375 }
376 }
377 }
378
379 /* Since ADIOI_Calloc may do other things we add the +1
380 * to avoid a 0-size malloc */
381 send_req_arr = (MPI_Request *) ADIOI_Calloc(2*(send_req_arr_sz)+1,
382 sizeof(MPI_Request));
383
384 j = 0;
385 if (recv_req_arr_sz > 0) {
386 assert (fd->is_agg);
387 recv_req_arr = (MPI_Request *) ADIOI_Calloc(2*(recv_req_arr_sz),
388 sizeof(MPI_Request));
389 for (i = 0; i < nprocs; i++) {
390 if (recv_count_arr[i].count > 0) {
391 MPI_Irecv(client_file_view_state_arr[i].flat_type_p->indices,
392 recv_count_arr[i].count, ADIO_OFFSET, i,
393 INDICES, fd->comm, &recv_req_arr[j]);
394 j++;
395 MPI_Irecv(client_file_view_state_arr[i].flat_type_p->blocklens,
396 recv_count_arr[i].count, ADIO_OFFSET, i,
397 BLOCK_LENS, fd->comm, &recv_req_arr[j]);
398 j++;
399 }
400 }
401 }
402
403 if (fd->hints->cb_alltoall != ADIOI_HINT_DISABLE) {
404 j = 0;
405 for (i = 0; i < nprocs; i++) {
406 if (send_count_arr[i].count > 0) {
407 MPI_Isend(flat_file_p->indices,
408 send_count_arr[i].count, ADIO_OFFSET, i,
409 INDICES, fd->comm, &send_req_arr[j]);
410 j++;
411 MPI_Isend(flat_file_p->blocklens,
412 send_count_arr[i].count, ADIO_OFFSET, i,
413 BLOCK_LENS, fd->comm, &send_req_arr[j]);
414 j++;
415 }
416 }
417 } else {
418 j = 0;
419 for (i = 0; i < fd->hints->cb_nodes; i++) {
420 if (send_count_arr[i].count > 0) {
421 MPI_Isend(flat_file_p->indices,
422 send_count_arr[i].count, ADIO_OFFSET,
423 fd->hints->ranklist[i], INDICES, fd->comm,
424 &send_req_arr[j]);
425 j++;
426 MPI_Isend(flat_file_p->blocklens,
427 send_count_arr[i].count, ADIO_OFFSET,
428 fd->hints->ranklist[i], BLOCK_LENS, fd->comm,
429 &send_req_arr[j]);
430 j++;
431 }
432 }
433 }
434
435 /* Since ADIOI_Malloc may do other things we add the +1
436 * to avoid a 0-size malloc */
437 statuses = (MPI_Status *)
438 ADIOI_Malloc(1 + 2 * ADIOI_MAX(send_req_arr_sz,recv_req_arr_sz)
439 * sizeof(MPI_Status));
440
441 if (send_req_arr_sz > 0) {
442 MPI_Waitall(2 * send_req_arr_sz, send_req_arr, statuses);
443 ADIOI_Free(send_count_arr);
444 ADIOI_Free(send_req_arr);
445 }
446 if (recv_req_arr_sz > 0) {
447 MPI_Waitall(2 * recv_req_arr_sz, recv_req_arr, statuses);
448 ADIOI_Free(recv_count_arr);
449 ADIOI_Free(recv_req_arr);
450 }
451 ADIOI_Free(statuses);
452
453 if (fd->is_agg == 1)
454 {
455 ADIOI_init_view_state(file_ptr_type,
456 nprocs,
457 client_file_view_state_arr,
458 TEMP_OFF);
459 ADIOI_init_view_state(file_ptr_type,
460 nprocs,
461 client_file_view_state_arr,
462 REAL_OFF);
463 }
464
465 #ifdef DEBUG
466 if (fd->is_agg == 1)
467 {
468 ADIOI_Flatlist_node *fr_node_p = ADIOI_Flatlist;
469 for (i = 0; i < nprocs; i++)
470 {
471 fprintf(stderr, "client_file_view_state_arr[%d]=(fp_ind=%Ld,"
472 "disp=%Ld,byte_off=%Ld,sz=%Ld,ext=%Ld\n", i,
473 client_file_view_state_arr[i].fp_ind,
474 client_file_view_state_arr[i].disp,
475 client_file_view_state_arr[i].byte_off,
476 client_file_view_state_arr[i].sz,
477 client_file_view_state_arr[i].ext);
478 }
479
480 while (fr_node_p->type !=
481 fd->file_realm_types[fd->my_cb_nodes_index])
482 fr_node_p = fr_node_p->next;
483 assert(fr_node_p != NULL);
484
485 fprintf(stderr, "my file realm (idx=%d,st_off=%Ld) ",
486 fd->my_cb_nodes_index,
487 fd->file_realm_st_offs[fd->my_cb_nodes_index]);
488 ADIOI_Print_flatlist_node(fr_node_p);
489 }
490 #endif
491
492 #ifdef DEBUG2
493 if (fd->is_agg == 1)
494 {
495 for (i = 0; i < nprocs; i++)
496 {
497 fprintf(stderr, "client_file_view_state_arr[%d]: ", i);
498 ADIOI_Print_flatlist_node(
499 client_file_view_state_arr[i].flat_type_p);
500 }
501 }
502 #endif
503 #ifdef AGGREGATION_PROFILE
504 MPE_Log_event (5015, 0, NULL);
505 #endif
506 }
507