1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2007 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2008-2020 University of Houston. All rights reserved.
14  * Copyright (c) 2018      Research Organization for Information Science
15  *                         and Technology (RIST). All rights reserved.
16  * Copyright (c) 2018      DataDirect Networks. All rights reserved.
17  * $COPYRIGHT$
18  *
19  * Additional copyrights may follow
20  *
21  * $HEADER$
22  */
23 
24 #ifndef MCA_COMMON_OMPIO_H
25 #define MCA_COMMON_OMPIO_H
26 
27 #include <fcntl.h>
28 
29 #include "mpi.h"
30 #include "opal/class/opal_list.h"
31 #include "ompi/errhandler/errhandler.h"
32 #include "ompi/file/file.h"
33 #include "ompi/mca/io/io.h"
34 #include "ompi/mca/fs/fs.h"
35 #include "ompi/mca/fcoll/fcoll.h"
36 #include "ompi/mca/fbtl/fbtl.h"
37 #include "ompi/mca/sharedfp/sharedfp.h"
38 #include "ompi/communicator/communicator.h"
39 #include "ompi/info/info.h"
40 #include "opal/datatype/opal_convertor.h"
41 #include "ompi/datatype/ompi_datatype.h"
42 #include "ompi/request/request.h"
43 
44 #define OMPIO_MIN(a, b) (((a) < (b)) ? (a) : (b))
45 #define OMPIO_MAX(a, b) (((a) < (b)) ? (b) : (a))
46 
47 #define OMPIO_MCA_GET(fh, name) ((fh)->f_get_mca_parameter_value(#name, strlen(#name)+1))
48 #define OMPIO_MCA_PRINT_INFO(_fh,_infostr,_infoval, _msg ) {            \
49     int _verbose = _fh->f_get_mca_parameter_value("verbose_info_parsing", strlen("verbose_info_parsing")); \
50     if ( 1==_verbose && 0==_fh->f_rank ) printf("File: %s info: %s value %s %s\n", _fh->f_filename, _infostr, _infoval, _msg); \
51     if ( 2==_verbose ) printf("File: %s info: %s value %s %s\n", _fh->f_filename, _infostr, _infoval, _msg); \
52     }
53 
54 
55 /*
56  * Flags
57  */
58 #define OMPIO_CONTIGUOUS_MEMORY      0x00000001
59 #define OMPIO_UNIFORM_FVIEW          0x00000002
60 #define OMPIO_FILE_IS_OPEN           0x00000004
61 #define OMPIO_FILE_VIEW_IS_SET       0x00000008
62 #define OMPIO_CONTIGUOUS_FVIEW       0x00000010
63 #define OMPIO_AGGREGATOR_IS_SET      0x00000020
64 #define OMPIO_SHAREDFP_IS_SET        0x00000040
65 #define OMPIO_LOCK_ENTIRE_FILE       0x00000080
66 #define OMPIO_LOCK_NEVER             0x00000100
67 #define OMPIO_LOCK_NOT_THIS_OP       0x00000200
68 #define OMPIO_DATAREP_NATIVE         0x00000400
69 #define OMPIO_COLLECTIVE_OP          0x00000800
70 
71 #define OMPIO_ROOT                    0
72 
73 /*AGGREGATOR GROUPING DECISIONS*/
74 #define OMPIO_MERGE                     1
75 #define OMPIO_SPLIT                     2
76 #define OMPIO_RETAIN                    3
77 
78 #define DATA_VOLUME                     1
79 #define UNIFORM_DISTRIBUTION            2
80 #define CONTIGUITY                      3
81 #define OPTIMIZE_GROUPING               4
82 #define SIMPLE                          5
83 #define NO_REFINEMENT                   6
84 #define SIMPLE_PLUS                     7
85 
86 #define OMPIO_LOCK_ENTIRE_REGION  10
87 #define OMPIO_LOCK_SELECTIVE      11
88 
89 #define OMPIO_FCOLL_WANT_TIME_BREAKDOWN 0
90 #define MCA_IO_DEFAULT_FILE_VIEW_SIZE 4*1024*1024
91 
92 #define OMPIO_UNIFORM_DIST_THRESHOLD     0.5
93 #define OMPIO_CONTG_THRESHOLD        1048576
94 #define OMPIO_CONTG_FACTOR                 8
95 #define OMPIO_DEFAULT_STRIPE_SIZE    1048576
96 #define OMPIO_PROCS_PER_GROUP_TAG          0
97 #define OMPIO_PROCS_IN_GROUP_TAG           1
98 #define OMPIO_MERGE_THRESHOLD            0.5
99 
100 #define OMPIO_PERM_NULL               -1
101 #define OMPIO_IOVEC_INITIAL_SIZE      100
102 
103 enum ompio_fs_type
104 {
105     NONE = 0,
106     UFS = 1,
107     PVFS2 = 2,
108     LUSTRE = 3,
109     PLFS = 4,
110     IME = 5,
111     GPFS = 6
112 };
113 
114 typedef struct mca_common_ompio_io_array_t {
115     void                 *memory_address;
116     /* we need that of type OMPI_MPI_OFFSET_TYPE */
117     void                 *offset;
118     size_t               length;
119     /*mca_common_ompio_server_t io_server;*/
120 } mca_common_ompio_io_array_t;
121 
122 
123 typedef struct mca_common_ompio_access_array_t{
124     OMPI_MPI_OFFSET_TYPE *offsets;
125     int *lens;
126     MPI_Aint *mem_ptrs;
127     int count;
128 } mca_common_ompio_access_array_t;
129 
130 
131 /* forward declaration to keep the compiler happy. */
132 struct ompio_file_t;
133 typedef int (*mca_common_ompio_generate_current_file_view_fn_t) (struct ompio_file_t *fh,
134 							         size_t max_data,
135 							         struct iovec **f_iov,
136 							         int *iov_count);
137 
138 /* functions to retrieve the number of aggregators and the size of the
139    temporary buffer on aggregators from the fcoll modules */
140 typedef int (*mca_common_ompio_get_mca_parameter_value_fn_t) ( char *mca_parameter_name, int name_length );
141 
142 
143 struct mca_common_ompio_print_queue;
144 
145 /**
146  * Back-end structure for MPI_File
147  */
148 struct ompio_file_t {
149     /* General parameters */
150     int                    fd;
151     struct ompi_file_t    *f_fh;     /* pointer back to the file_t structure */
152     OMPI_MPI_OFFSET_TYPE   f_offset; /* byte offset of current position */
153     OMPI_MPI_OFFSET_TYPE   f_disp;   /* file_view displacement */
154     int                    f_rank;
155     int                    f_size;
156     int                    f_amode;
157     int                    f_perm;
158     ompi_communicator_t   *f_comm;
159     const char            *f_filename;
160     char                  *f_datarep;
161     opal_convertor_t      *f_mem_convertor;
162     opal_convertor_t      *f_file_convertor;
163     opal_info_t           *f_info;
164     int32_t                f_flags;
165     void                  *f_fs_ptr;
166     int                    f_fs_block_size;
167     int                    f_atomicity;
168     size_t                 f_stripe_size;
169     int                    f_stripe_count;
170     size_t                 f_cc_size;
171     size_t                 f_avg_view_size;
172     int                    f_bytes_per_agg;
173     enum ompio_fs_type     f_fstype;
174     ompi_request_t        *f_split_coll_req;
175     bool                   f_split_coll_in_use;
176     /* Place for selected sharedfp module to hang it's data.
177        Note: Neither f_sharedfp nor f_sharedfp_component seemed appropriate for this.
178     */
179     void                  *f_sharedfp_data;
180 
181 
182     /* File View parameters */
183     struct iovec     *f_decoded_iov;
184     uint32_t          f_iov_count;
185     ompi_datatype_t  *f_iov_type;
186     size_t            f_position_in_file_view; /* in bytes */
187     size_t            f_total_bytes; /* total bytes read/written within 1 Fview*/
188     int               f_index_in_file_view;
189     ptrdiff_t         f_view_extent;
190     size_t            f_view_size;
191     ompi_datatype_t  *f_etype;
192     ompi_datatype_t  *f_filetype;
193     ompi_datatype_t  *f_orig_filetype; /* the fileview passed by the user to us */
194     size_t            f_etype_size;
195 
196     /* contains IO requests that needs to be read/written */
197     mca_common_ompio_io_array_t *f_io_array;
198     int                      f_num_of_io_entries;
199 
200     /* Hooks for modules to hang things */
201     mca_base_component_t *f_fs_component;
202     mca_base_component_t *f_fcoll_component;
203     mca_base_component_t *f_fbtl_component;
204     mca_base_component_t *f_sharedfp_component;
205 
206     /* structure of function pointers */
207     mca_fs_base_module_t       *f_fs;
208     mca_fcoll_base_module_t    *f_fcoll;
209     mca_fbtl_base_module_t     *f_fbtl;
210     mca_sharedfp_base_module_t *f_sharedfp;
211 
212     /* Timing information  */
213     struct mca_common_ompio_print_queue *f_coll_write_time;
214     struct mca_common_ompio_print_queue *f_coll_read_time;
215 
216     /*initial list of aggregators and groups*/
217     int *f_init_aggr_list;
218     int  f_init_num_aggrs;
219     int  f_init_procs_per_group;
220     int *f_init_procs_in_group;
221 
222     /* final of aggregators and groups*/
223     int *f_aggr_list;
224     int  f_num_aggrs;
225     int *f_procs_in_group;
226     int  f_procs_per_group;
227 
228     /* internal ompio functions required by fbtl and fcoll */
229     mca_common_ompio_generate_current_file_view_fn_t f_generate_current_file_view;
230 
231     mca_common_ompio_get_mca_parameter_value_fn_t          f_get_mca_parameter_value;
232 };
233 typedef struct ompio_file_t ompio_file_t;
234 
235 struct mca_common_ompio_data_t {
236     ompio_file_t ompio_fh;
237 };
238 typedef struct mca_common_ompio_data_t mca_common_ompio_data_t;
239 
240 
241 #include "common_ompio_print_queue.h"
242 #include "common_ompio_aggregators.h"
243 
244 OMPI_DECLSPEC int mca_common_ompio_file_write (ompio_file_t *fh, const void *buf,  int count,
245                                                struct ompi_datatype_t *datatype,
246                                                ompi_status_public_t *status);
247 
248 OMPI_DECLSPEC int mca_common_ompio_file_write_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,  const void *buf,
249                                                   int count,  struct ompi_datatype_t *datatype,
250                                                   ompi_status_public_t *status);
251 
252 OMPI_DECLSPEC int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, int count,
253                                                 struct ompi_datatype_t *datatype, ompi_request_t **request);
254 
255 OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh,  OMPI_MPI_OFFSET_TYPE offset,
256                                                    const void *buf,  int count,  struct ompi_datatype_t *datatype,
257                                                    ompi_request_t **request);
258 
259 OMPI_DECLSPEC int mca_common_ompio_file_write_all (ompio_file_t *fh, const void *buf,
260                                                    int count, struct ompi_datatype_t *datatype,
261                                                    ompi_status_public_t *status);
262 
263 OMPI_DECLSPEC int mca_common_ompio_file_write_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
264                                                       int count, struct ompi_datatype_t *datatype,
265                                                       ompi_status_public_t *status);
266 
267 OMPI_DECLSPEC int mca_common_ompio_file_iwrite_all (ompio_file_t *fp, const void *buf,
268                                                     int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
269 
270 OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset, const void *buf,
271                                                        int count, struct ompi_datatype_t *datatype, ompi_request_t **request);
272 
273 OMPI_DECLSPEC int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles,
274                                                     size_t bytes_per_cycle, size_t max_data, uint32_t iov_count,
275                                                     struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw,
276                                                     size_t *spc, mca_common_ompio_io_array_t **io_array,
277                                                     int *num_io_entries );
278 
279 
280 OMPI_DECLSPEC int mca_common_ompio_file_read (ompio_file_t *fh,  void *buf,  int count,
281                                               struct ompi_datatype_t *datatype, ompi_status_public_t *status);
282 
283 OMPI_DECLSPEC int mca_common_ompio_file_read_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,  void *buf,
284                                                  int count, struct ompi_datatype_t *datatype,
285                                                  ompi_status_public_t * status);
286 
287 OMPI_DECLSPEC int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, int count,
288                                                struct ompi_datatype_t *datatype, ompi_request_t **request);
289 
290 OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
291                                                   void *buf, int count, struct ompi_datatype_t *datatype,
292                                                   ompi_request_t **request);
293 
294 OMPI_DECLSPEC int mca_common_ompio_file_read_all (ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype,
295                                                   ompi_status_public_t * status);
296 
297 OMPI_DECLSPEC int mca_common_ompio_file_read_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
298                                                      void *buf, int count, struct ompi_datatype_t *datatype,
299                                                      ompi_status_public_t * status);
300 
301 OMPI_DECLSPEC int mca_common_ompio_file_iread_all (ompio_file_t *fp, void *buf, int count, struct ompi_datatype_t *datatype,
302                                                    ompi_request_t **request);
303 
304 OMPI_DECLSPEC int mca_common_ompio_file_iread_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset,
305                                                       void *buf, int count, struct ompi_datatype_t *datatype,
306                                                       ompi_request_t **request);
307 
308 OMPI_DECLSPEC int mca_common_ompio_file_open (ompi_communicator_t *comm, const char *filename,
309                                               int amode, opal_info_t *info,
310                                               ompio_file_t *ompio_fh, bool use_sharedfp);
311 
312 OMPI_DECLSPEC int mca_common_ompio_file_delete (const char *filename,
313                                                 struct opal_info_t *info);
314 OMPI_DECLSPEC int mca_common_ompio_create_incomplete_file_handle (const char *filename,
315                                                                   ompio_file_t **fh);
316 
317 OMPI_DECLSPEC int mca_common_ompio_file_close (ompio_file_t *ompio_fh);
318 OMPI_DECLSPEC int mca_common_ompio_file_get_size (ompio_file_t *ompio_fh, OMPI_MPI_OFFSET_TYPE *size);
319 OMPI_DECLSPEC int mca_common_ompio_file_get_position (ompio_file_t *fh,OMPI_MPI_OFFSET_TYPE *offset);
320 OMPI_DECLSPEC int mca_common_ompio_set_explicit_offset (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset);
321 OMPI_DECLSPEC int mca_common_ompio_set_file_defaults (ompio_file_t *fh);
322 OMPI_DECLSPEC int mca_common_ompio_set_view (ompio_file_t *fh,  OMPI_MPI_OFFSET_TYPE disp,
323                                              ompi_datatype_t *etype,  ompi_datatype_t *filetype, const char *datarep,
324                                              opal_info_t *info);
325 
326 
327 /*
328  * Function that takes in a datatype and buffer, and decodes that datatype
329  * into an iovec using the convertor_raw function
330  */
331 OMPI_DECLSPEC int mca_common_ompio_decode_datatype (struct ompio_file_t *fh,
332                                                     struct ompi_datatype_t *datatype,
333                                                     int count,
334                                                     const void *buf,
335                                                     size_t *max_data,
336                                                     opal_convertor_t *convertor,
337                                                     struct iovec **iov,
338                                                     uint32_t *iov_count);
339 
340 OMPI_DECLSPEC int mca_common_ompio_set_callbacks(mca_common_ompio_generate_current_file_view_fn_t generate_current_file_view,
341                                                  mca_common_ompio_get_mca_parameter_value_fn_t get_mca_parameter_value);
342 #endif /* MCA_COMMON_OMPIO_H */
343