1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "adio.h"
7 #include "adio_extern.h"
8 #ifdef AGGREGATION_PROFILE
9 #include "mpe.h"
10 #endif
11 #include <assert.h>
12 /* #define DEBUG */
13 
14 void ADIOI_Calc_file_realms_user_size(ADIO_File fd, int fr_size,
15                                       int nprocs_for_coll,
16                                       ADIO_Offset * file_realm_st_offs,
17                                       MPI_Datatype * file_realm_types);
18 void ADIOI_Calc_file_realms_aar(ADIO_File fd, int nprocs_for_coll,
19                                 int pfr_enabled,
20                                 ADIO_Offset min_st_offset,
21                                 ADIO_Offset max_end_offset,
22                                 ADIO_Offset * file_realm_st_offs, MPI_Datatype * file_realm_types);
23 void ADIOI_Calc_file_realms_fsize(ADIO_File fd,
24                                   int nprocs_for_coll,
25                                   ADIO_Offset max_end_offset,
26                                   ADIO_Offset * file_realm_st_offs,
27                                   MPI_Datatype * file_realm_types);
28 void ADIOI_Create_fr_simpletype(int size, int nprocs_for_coll, MPI_Datatype * simpletype);
29 static void align_fr(int fr_size, ADIO_Offset fr_off, int alignment,
30                      int *aligned_fr_size, ADIO_Offset * aligned_fr_off);
31 void ADIOI_Verify_fr(int nprocs_for_coll, ADIO_Offset * file_realm_st_offs,
32                      MPI_Datatype * file_realm_types);
33 
ADIOI_Calc_file_realms(ADIO_File fd,ADIO_Offset min_st_offset,ADIO_Offset max_end_offset)34 void ADIOI_Calc_file_realms(ADIO_File fd, ADIO_Offset min_st_offset, ADIO_Offset max_end_offset)
35 {
36     int nprocs_for_coll;
37     int file_realm_calc_type;
38 
39     MPI_Datatype *file_realm_types = NULL;
40     ADIO_Offset *file_realm_st_offs = NULL;
41 
42 #ifdef AGGREGATION_PROFILE
43     MPE_Log_event(5004, 0, NULL);
44 #endif
45 #ifdef DEBUG
46     printf("ADIOI_Calc_file_realms\n");
47 #endif
48 
49     nprocs_for_coll = fd->hints->cb_nodes;
50     file_realm_calc_type = fd->hints->cb_fr_type;
51 
52     /* If PFRs are disabled we know these pointers are not allocated */
53     if (fd->hints->cb_pfr != ADIOI_HINT_ENABLE) {
54         fd->file_realm_st_offs = NULL;
55         fd->file_realm_types = NULL;
56     }
57 
58     if (nprocs_for_coll == 1) {
59         /* if there's only one aggregator, we can reset the file
60          * realms every single time */
61         if (fd->file_realm_st_offs == NULL) {
62             file_realm_st_offs = (ADIO_Offset *)
63                 ADIOI_Malloc(sizeof(ADIO_Offset));
64             file_realm_types = (MPI_Datatype *)
65                 ADIOI_Malloc(sizeof(MPI_Datatype));
66         } else {
67             file_realm_st_offs = fd->file_realm_st_offs;
68             file_realm_types = fd->file_realm_types;
69         }
70         *file_realm_st_offs = min_st_offset;
71         MPI_Type_contiguous((max_end_offset - min_st_offset + 1), MPI_BYTE, file_realm_types);
72         MPI_Type_commit(file_realm_types);
73         ADIOI_Flatten_datatype(*file_realm_types);
74     } else if (fd->file_realm_st_offs == NULL) {
75         file_realm_st_offs = (ADIO_Offset *)
76             ADIOI_Malloc(nprocs_for_coll * sizeof(ADIO_Offset));
77         file_realm_types = (MPI_Datatype *)
78             ADIOI_Malloc(nprocs_for_coll * sizeof(MPI_Datatype));
79 
80         if (file_realm_calc_type == ADIOI_FR_AAR) {
81             ADIOI_Calc_file_realms_aar(fd, nprocs_for_coll,
82                                        fd->hints->cb_pfr,
83                                        min_st_offset, max_end_offset,
84                                        file_realm_st_offs, file_realm_types);
85             /* flatten file realm datatype for future use - only one
86              * because all are the same*/
87             ADIOI_Flatten_datatype(file_realm_types[0]);
88         } else if (file_realm_calc_type == ADIOI_FR_FSZ) {
89             ADIOI_Calc_file_realms_fsize(fd, nprocs_for_coll, max_end_offset,
90                                          file_realm_st_offs, file_realm_types);
91             /* flatten file realm datatype for future use - only one
92              * because all are the same*/
93             ADIOI_Flatten_datatype(file_realm_types[0]);
94         } else if (file_realm_calc_type == ADIOI_FR_USR_REALMS) {
95             /* copy user provided realm datatypes and realm offsets in
96              * hints to file descriptor. may also want to verify that
97              * the provided file realms are covering (for pfr at
98              * least) and non-overlapping */
99         } else if (file_realm_calc_type > 0) {
100             ADIOI_Calc_file_realms_user_size(fd, file_realm_calc_type,
101                                              nprocs_for_coll, file_realm_st_offs, file_realm_types);
102             /* flatten file realm datatype for future use - only one
103              * because all are the same */
104             ADIOI_Flatten_datatype(file_realm_types[0]);
105         }
106     }
107     fd->file_realm_st_offs = file_realm_st_offs;
108     fd->file_realm_types = file_realm_types;
109 #ifdef AGGREGATION_PROFILE
110     MPE_Log_event(5005, 0, NULL);
111 #endif
112 }
113 
ADIOI_Calc_file_realms_user_size(ADIO_File fd,int fr_size,int nprocs_for_coll,ADIO_Offset * file_realm_st_offs,MPI_Datatype * file_realm_types)114 void ADIOI_Calc_file_realms_user_size(ADIO_File fd, int fr_size,
115                                       int nprocs_for_coll,
116                                       ADIO_Offset * file_realm_st_offs,
117                                       MPI_Datatype * file_realm_types)
118 {
119     int i;
120     int aligned_fr_size;
121     ADIO_Offset aligned_fr_off;
122     MPI_Datatype simpletype;
123 
124     align_fr(fr_size, 0, fd->hints->cb_fr_alignment, &aligned_fr_size, &aligned_fr_off);
125     fr_size = aligned_fr_size;
126     ADIOI_Create_fr_simpletype(fr_size, nprocs_for_coll, &simpletype);
127 
128     if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE)
129         file_realm_st_offs[0] = 0;
130     else
131         file_realm_st_offs[0] = aligned_fr_off;
132     file_realm_types[0] = simpletype;
133 #ifdef DEBUG
134     printf("file_realm[0] = (%lld, %d)\n", (long long) file_realm_st_offs[0], fr_size);
135 #endif
136 
137     for (i = 1; i < nprocs_for_coll; i++) {
138         file_realm_st_offs[i] = file_realm_st_offs[i - 1] + fr_size;
139         file_realm_types[i] = simpletype;
140 #ifdef DEBUG
141         printf("file_realm[%d] = (%lld, %d)\n", i, (long long) file_realm_st_offs[i],
142                aligned_fr_size);
143 #endif
144     }
145 }
146 
147 /* takes an extra romio_cb_pfr param to decide whether file realms
148  * should start at byte 0 of the file*/
ADIOI_Calc_file_realms_aar(ADIO_File fd,int nprocs_for_coll,int cb_pfr,ADIO_Offset min_st_offset,ADIO_Offset max_end_offset,ADIO_Offset * file_realm_st_offs,MPI_Datatype * file_realm_types)149 void ADIOI_Calc_file_realms_aar(ADIO_File fd, int nprocs_for_coll, int cb_pfr,
150                                 ADIO_Offset min_st_offset,
151                                 ADIO_Offset max_end_offset,
152                                 ADIO_Offset * file_realm_st_offs, MPI_Datatype * file_realm_types)
153 {
154     int fr_size, aligned_fr_size, i;
155     MPI_Datatype simpletype;
156     ADIO_Offset aligned_start_off;
157     char value[9];
158 
159     fr_size = (max_end_offset - min_st_offset + nprocs_for_coll) / nprocs_for_coll;
160     align_fr(fr_size, min_st_offset, fd->hints->cb_fr_alignment,
161              &aligned_fr_size, &aligned_start_off);
162     fr_size = aligned_fr_size;
163     ADIOI_Create_fr_simpletype(fr_size, nprocs_for_coll, &simpletype);
164     if (cb_pfr == ADIOI_HINT_ENABLE)
165         file_realm_st_offs[0] = 0;
166     else
167         file_realm_st_offs[0] = aligned_start_off;
168     file_realm_types[0] = simpletype;
169 
170 #ifdef DEBUG
171     printf("file_realm[0] = (%lld, %d)\n", (long long) file_realm_st_offs[0], fr_size);
172 #endif
173     for (i = 1; i < nprocs_for_coll; i++) {
174         file_realm_st_offs[i] = file_realm_st_offs[i - 1] + fr_size;
175         file_realm_types[i] = simpletype;
176 #ifdef DEBUG
177         printf("file_realm[%d] = (%lld, %d)\n", i, (long long) file_realm_st_offs[i], fr_size);
178 #endif
179     }
180     if (fd->hints->cb_pfr == ADIOI_HINT_ENABLE) {
181         MPL_snprintf(value, sizeof(value), "%d", fr_size);
182         ADIOI_Info_set(fd->info, "romio_cb_fr_type", value);
183     }
184 }
185 
ADIOI_Calc_file_realms_fsize(ADIO_File fd,int nprocs_for_coll,ADIO_Offset max_end_offset,ADIO_Offset * file_realm_st_offs,MPI_Datatype * file_realm_types)186 void ADIOI_Calc_file_realms_fsize(ADIO_File fd, int nprocs_for_coll,
187                                   ADIO_Offset max_end_offset,
188                                   ADIO_Offset * file_realm_st_offs, MPI_Datatype * file_realm_types)
189 {
190     int fr_size, aligned_fr_size, error_code, i;
191     int fsize;
192     ADIO_Offset aligned_fr_off;
193     ADIO_Fcntl_t fcntl_struct;
194     MPI_Datatype simpletype;
195 
196     ADIO_Fcntl(fd, ADIO_FCNTL_GET_FSIZE, &fcntl_struct, &error_code);
197 
198     /* use impending file size since a write call may lengthen the file */
199     fsize = MPL_MAX(fcntl_struct.fsize, max_end_offset + 1);
200     fr_size = (fsize + nprocs_for_coll - 1) / nprocs_for_coll;
201     align_fr(fr_size, 0, fd->hints->cb_fr_alignment, &aligned_fr_size, &aligned_fr_off);
202     ADIOI_Create_fr_simpletype(fr_size, nprocs_for_coll, &simpletype);
203 
204     for (i = 0; i < nprocs_for_coll; i++) {
205         file_realm_st_offs[i] = fr_size * i;
206         file_realm_types[i] = simpletype;
207     }
208 }
209 
210 /* creates a datatype with an empty trailing edge */
ADIOI_Create_fr_simpletype(int size,int nprocs_for_coll,MPI_Datatype * simpletype)211 void ADIOI_Create_fr_simpletype(int size, int nprocs_for_coll, MPI_Datatype * simpletype)
212 {
213     int count = 2, blocklens[2];
214     MPI_Aint indices[2];
215     MPI_Datatype old_types[2];
216 
217     blocklens[0] = size;
218     blocklens[1] = 1;
219     indices[0] = 0;
220     indices[1] = size * nprocs_for_coll;
221     old_types[0] = MPI_BYTE;
222     old_types[1] = MPI_UB;
223 
224     MPI_Type_struct(count, blocklens, indices, old_types, simpletype);
225 
226     MPI_Type_commit(simpletype);
227 }
228 
229 /* Verify that file realms are covering (PFRs) and non-overlapping */
ADIOI_Verify_fr(int nprocs_for_coll,ADIO_Offset * file_realm_st_offs,MPI_Datatype * file_realm_types)230 void ADIOI_Verify_fr(int nprocs_for_coll, ADIO_Offset * file_realm_st_offs,
231                      MPI_Datatype * file_realm_types)
232 {
233 }
234 
ADIOI_Agg_idx(int rank,ADIO_File fd)235 int ADIOI_Agg_idx(int rank, ADIO_File fd)
236 {
237     int i, cb_nodes, *ranklist;
238     cb_nodes = fd->hints->cb_nodes;
239     ranklist = fd->hints->ranklist;
240 
241     for (i = 0; i < cb_nodes; i++) {
242         if (ranklist[i])
243             return i;
244     }
245     return -1;
246 }
247 
align_fr(int fr_size,ADIO_Offset fr_off,int alignment,int * aligned_fr_size,ADIO_Offset * aligned_fr_off)248 static void align_fr(int fr_size, ADIO_Offset fr_off, int alignment,
249                      int *aligned_fr_size, ADIO_Offset * aligned_fr_off)
250 {
251     *aligned_fr_off = fr_off - (fr_off % alignment);
252     *aligned_fr_size = ((fr_off + fr_size) / alignment) * alignment - *aligned_fr_off;
253     if ((fr_off + fr_size) % alignment)
254         *aligned_fr_size += alignment;
255 
256     assert(!((*aligned_fr_off % alignment) || (*aligned_fr_size % alignment)));
257 }
258