1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2005 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2008-2020 University of Houston. All rights reserved.
14 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
15 * reserved.
16 * Copyright (c) 2015-2018 Research Organization for Information Science
17 * and Technology (RIST). All rights reserved.
18 * Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
19 * Copyright (c) 2018 DataDirect Networks. All rights reserved.
20 * $COPYRIGHT$
21 *
22 * Additional copyrights may follow
23 *
24 * $HEADER$
25 */
26
27 #include "ompi_config.h"
28
29 #include "mpi.h"
30 #include "opal/class/opal_list.h"
31 #include "opal/mca/base/base.h"
32 #include "ompi/mca/io/io.h"
33 #include "ompi/mca/fs/base/base.h"
34 #include "io_ompio.h"
35 #include "ompi/mca/common/ompio/common_ompio_request.h"
36 #include "ompi/mca/common/ompio/common_ompio_buffer.h"
37
38 #ifdef HAVE_IME_NATIVE_H
39 #include "ompi/mca/fs/ime/fs_ime.h"
40 #endif
41
42
43 int mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE;
44 int mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE;
45 int mca_io_ompio_num_aggregators = -1;
46 int mca_io_ompio_record_offset_info = 0;
47 int mca_io_ompio_coll_timing_info = 0;
48 int mca_io_ompio_max_aggregators_ratio=8;
49 int mca_io_ompio_aggregators_cutoff_threshold=3;
50 int mca_io_ompio_overwrite_amode = 1;
51 int mca_io_ompio_verbose_info_parsing = 0;
52
53 int mca_io_ompio_grouping_option=5;
54
55 /*
56 * Private functions
57 */
58 static int register_component(void);
59 static int open_component(void);
60 static int close_component(void);
61 static int init_query(bool enable_progress_threads,
62 bool enable_mpi_threads);
63 static const struct mca_io_base_module_2_0_0_t *
64 file_query (struct ompi_file_t *file,
65 struct mca_io_base_file_t **private_data,
66 int *priority);
67 static int file_unquery(struct ompi_file_t *file,
68 struct mca_io_base_file_t *private_data);
69
70 static int delete_query(const char *filename, struct opal_info_t *info,
71 struct mca_io_base_delete_t **private_data,
72 bool *usable, int *priorty);
73
74 static int delete_select(const char *filename, struct opal_info_t *info,
75 struct mca_io_base_delete_t *private_data);
76
77 static int register_datarep(const char *,
78 MPI_Datarep_conversion_function*,
79 MPI_Datarep_conversion_function*,
80 MPI_Datarep_extent_function*,
81 void*);
82 /*
83 static int io_progress(void);
84
85 */
86
87 /*
88 * Private variables
89 */
90 static int priority_param = 30;
91 static int delete_priority_param = 30;
92
93
94 /*
95 * Global, component-wide OMPIO mutex because OMPIO is not thread safe
96 */
97 opal_mutex_t mca_io_ompio_mutex = {{0}};
98
99
100
101 /*
102 * Public string showing this component's version number
103 */
104 const char *mca_io_ompio_component_version_string =
105 "OMPI/MPI OMPIO io MCA component version " OMPI_VERSION;
106
107
108 mca_io_base_component_2_0_0_t mca_io_ompio_component = {
109 /* First, the mca_base_component_t struct containing meta information
110 about the component itself */
111
112 .io_version = {
113 MCA_IO_BASE_VERSION_2_0_0,
114 .mca_component_name = "ompio",
115 MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
116 OMPI_RELEASE_VERSION),
117 .mca_open_component = open_component,
118 .mca_close_component = close_component,
119 .mca_register_component_params = register_component,
120 },
121 .io_data = {
122 /* The component is checkpoint ready */
123 MCA_BASE_METADATA_PARAM_CHECKPOINT
124 },
125
126 /* Initial configuration / Open a new file */
127
128 .io_init_query = init_query,
129 .io_file_query = file_query,
130 .io_file_unquery = file_unquery,
131
132 /* Delete a file */
133
134 .io_delete_query = delete_query,
135 .io_delete_select = delete_select,
136
137 .io_register_datarep = register_datarep,
138 };
139
register_component(void)140 static int register_component(void)
141 {
142 priority_param = 30;
143 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
144 "priority", "Priority of the io ompio component",
145 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
146 OPAL_INFO_LVL_9,
147 MCA_BASE_VAR_SCOPE_READONLY,
148 &priority_param);
149 delete_priority_param = 30;
150 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
151 "delete_priority", "Delete priority of the io ompio component",
152 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
153 OPAL_INFO_LVL_9,
154 MCA_BASE_VAR_SCOPE_READONLY,
155 &delete_priority_param);
156
157 mca_io_ompio_record_offset_info = 0;
158 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
159 "record_file_offset_info",
160 "The information of the file offset/length",
161 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
162 OPAL_INFO_LVL_9,
163 MCA_BASE_VAR_SCOPE_READONLY,
164 &mca_io_ompio_record_offset_info);
165
166 mca_io_ompio_coll_timing_info = 0;
167 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
168 "coll_timing_info",
169 "Enable collective algorithm timing information",
170 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
171 OPAL_INFO_LVL_9,
172 MCA_BASE_VAR_SCOPE_READONLY,
173 &mca_io_ompio_coll_timing_info);
174
175 mca_io_ompio_cycle_buffer_size = OMPIO_DEFAULT_CYCLE_BUF_SIZE;
176 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
177 "cycle_buffer_size",
178 "Data size issued by individual reads/writes per call",
179 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
180 OPAL_INFO_LVL_9,
181 MCA_BASE_VAR_SCOPE_READONLY,
182 &mca_io_ompio_cycle_buffer_size);
183
184 mca_io_ompio_bytes_per_agg = OMPIO_PREALLOC_MAX_BUF_SIZE;
185 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
186 "bytes_per_agg",
187 "Size of temporary buffer for collective I/O operations",
188 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
189 OPAL_INFO_LVL_9,
190 MCA_BASE_VAR_SCOPE_READONLY,
191 &mca_io_ompio_bytes_per_agg);
192
193 mca_io_ompio_num_aggregators = -1;
194 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
195 "num_aggregators",
196 "number of aggregators for collective I/O operations",
197 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
198 OPAL_INFO_LVL_9,
199 MCA_BASE_VAR_SCOPE_READONLY,
200 &mca_io_ompio_num_aggregators);
201
202
203 mca_io_ompio_grouping_option = 5;
204 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
205 "grouping_option",
206 "Option for grouping of processes in the aggregator selection "
207 "1: Data volume based grouping 2: maximizing group size uniformity 3: maximimze "
208 "data contiguity 4: hybrid optimization 5: simple (default) "
209 "6: skip refinement step 7: simple+: grouping based on default file view",
210 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
211 OPAL_INFO_LVL_9,
212 MCA_BASE_VAR_SCOPE_READONLY,
213 &mca_io_ompio_grouping_option);
214
215 mca_io_ompio_max_aggregators_ratio = 8;
216 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
217 "max_aggregators_ratio",
218 "Maximum number of processes that can be an aggregator expressed as "
219 "the ratio to the number of process used to open the file"
220 " i.e 1 out of n processes can be an aggregator, with n being specified"
221 " by this mca parameter.",
222 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
223 OPAL_INFO_LVL_9,
224 MCA_BASE_VAR_SCOPE_READONLY,
225 &mca_io_ompio_max_aggregators_ratio);
226
227
228 mca_io_ompio_aggregators_cutoff_threshold=3;
229 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
230 "aggregators_cutoff_threshold",
231 "Relativ cutoff threshold for incrementing the number of aggregators "
232 "in the simple aggregator selection algorithm (5). Lower value "
233 "for this parameter will lead to higher no. of aggregators.",
234 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
235 OPAL_INFO_LVL_9,
236 MCA_BASE_VAR_SCOPE_READONLY,
237 &mca_io_ompio_aggregators_cutoff_threshold);
238
239 mca_io_ompio_overwrite_amode = 1;
240 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
241 "overwrite_amode",
242 "Overwrite WRONLY amode to RDWR to enable data sieving "
243 "1: allow overwrite (default) "
244 "0: do not overwrite amode provided by application ",
245 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
246 OPAL_INFO_LVL_9,
247 MCA_BASE_VAR_SCOPE_READONLY,
248 &mca_io_ompio_overwrite_amode);
249
250 mca_io_ompio_verbose_info_parsing = 0;
251 (void) mca_base_component_var_register(&mca_io_ompio_component.io_version,
252 "verbose_info_parsing",
253 "Provide visual output when parsing info objects "
254 "0: no verbose output (default) "
255 "1: verbose output by rank 0 "
256 "2: verbose output by all ranks ",
257 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
258 OPAL_INFO_LVL_9,
259 MCA_BASE_VAR_SCOPE_READONLY,
260 &mca_io_ompio_verbose_info_parsing);
261
262 return OMPI_SUCCESS;
263 }
264
open_component(void)265 static int open_component(void)
266 {
267 /* Create the mutex */
268 OBJ_CONSTRUCT(&mca_io_ompio_mutex, opal_mutex_t);
269
270 mca_common_ompio_request_init ();
271
272 return mca_common_ompio_set_callbacks(ompi_io_ompio_generate_current_file_view,
273 mca_io_ompio_get_mca_parameter_value);
274 }
275
276
close_component(void)277 static int close_component(void)
278 {
279 mca_common_ompio_request_fini ();
280 mca_common_ompio_buffer_alloc_fini();
281 OBJ_DESTRUCT(&mca_io_ompio_mutex);
282
283 #ifdef HAVE_IME_NATIVE_H
284 mca_fs_ime_native_fini();
285 #endif
286
287 return OMPI_SUCCESS;
288 }
289
290
init_query(bool enable_progress_threads,bool enable_mpi_threads)291 static int init_query(bool enable_progress_threads,
292 bool enable_mpi_threads)
293 {
294 return OMPI_SUCCESS;
295 }
296
297
298 static const struct mca_io_base_module_2_0_0_t *
file_query(struct ompi_file_t * file,struct mca_io_base_file_t ** private_data,int * priority)299 file_query(struct ompi_file_t *file,
300 struct mca_io_base_file_t **private_data,
301 int *priority)
302 {
303 mca_common_ompio_data_t *data;
304
305 *priority = priority_param;
306
307 /* Allocate a space for this module to hang private data (e.g.,
308 the OMPIO file handle) */
309 data = calloc(1, sizeof(mca_common_ompio_data_t));
310 if (NULL == data) {
311 return NULL;
312 }
313
314 *private_data = (struct mca_io_base_file_t*) data;
315
316 /* All done */
317 return &mca_io_ompio_module;
318 }
319
320
file_unquery(struct ompi_file_t * file,struct mca_io_base_file_t * private_data)321 static int file_unquery(struct ompi_file_t *file,
322 struct mca_io_base_file_t *private_data)
323 {
324 /* Free the ompio module-specific data that was allocated in
325 _file_query(), above */
326
327 if (NULL != private_data) {
328 free(private_data);
329 }
330
331 return OMPI_SUCCESS;
332 }
333
334
delete_query(const char * filename,struct opal_info_t * info,struct mca_io_base_delete_t ** private_data,bool * usable,int * priority)335 static int delete_query(const char *filename, struct opal_info_t *info,
336 struct mca_io_base_delete_t **private_data,
337 bool *usable, int *priority)
338 {
339 *priority = delete_priority_param;
340 *usable = true;
341 *private_data = NULL;
342
343 return OMPI_SUCCESS;
344 }
345
delete_select(const char * filename,struct opal_info_t * info,struct mca_io_base_delete_t * private_data)346 static int delete_select(const char *filename, struct opal_info_t *info,
347 struct mca_io_base_delete_t *private_data)
348 {
349 int ret;
350
351 OPAL_THREAD_LOCK (&mca_io_ompio_mutex);
352 ret = mca_common_ompio_file_delete (filename, info);
353 OPAL_THREAD_UNLOCK (&mca_io_ompio_mutex);
354
355 return ret;
356 }
357
register_datarep(const char * datarep,MPI_Datarep_conversion_function * read_fn,MPI_Datarep_conversion_function * write_fn,MPI_Datarep_extent_function * extent_fn,void * state)358 static int register_datarep(const char * datarep,
359 MPI_Datarep_conversion_function* read_fn,
360 MPI_Datarep_conversion_function* write_fn,
361 MPI_Datarep_extent_function* extent_fn,
362 void* state)
363 {
364 return OMPI_ERROR;
365 }
366
367 /*
368 static int io_progress (void)
369 {
370 return OMPI_SUCCESS;
371 }
372 */
373