1 /* -*- Mode: C; c-basic-offset:4 ; -*- */ 2 /* 3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana 4 * University Research and Technology 5 * Corporation. All rights reserved. 6 * Copyright (c) 2004-2007 The University of Tennessee and The University 7 * of Tennessee Research Foundation. All rights 8 * reserved. 9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 10 * University of Stuttgart. All rights reserved. 11 * Copyright (c) 2004-2005 The Regents of the University of California. 12 * All rights reserved. 13 * Copyright (c) 2008-2020 University of Houston. All rights reserved. 14 * Copyright (c) 2018 Research Organization for Information Science 15 * and Technology (RIST). All rights reserved. 16 * Copyright (c) 2018 DataDirect Networks. All rights reserved. 17 * $COPYRIGHT$ 18 * 19 * Additional copyrights may follow 20 * 21 * $HEADER$ 22 */ 23 24 #ifndef MCA_COMMON_OMPIO_H 25 #define MCA_COMMON_OMPIO_H 26 27 #include <fcntl.h> 28 29 #include "mpi.h" 30 #include "opal/class/opal_list.h" 31 #include "ompi/errhandler/errhandler.h" 32 #include "ompi/file/file.h" 33 #include "ompi/mca/io/io.h" 34 #include "ompi/mca/fs/fs.h" 35 #include "ompi/mca/fcoll/fcoll.h" 36 #include "ompi/mca/fbtl/fbtl.h" 37 #include "ompi/mca/sharedfp/sharedfp.h" 38 #include "ompi/communicator/communicator.h" 39 #include "ompi/info/info.h" 40 #include "opal/datatype/opal_convertor.h" 41 #include "ompi/datatype/ompi_datatype.h" 42 #include "ompi/request/request.h" 43 44 #define OMPIO_MIN(a, b) (((a) < (b)) ? (a) : (b)) 45 #define OMPIO_MAX(a, b) (((a) < (b)) ? (b) : (a)) 46 47 #define OMPIO_MCA_GET(fh, name) ((fh)->f_get_mca_parameter_value(#name, strlen(#name)+1)) 48 #define OMPIO_MCA_PRINT_INFO(_fh,_infostr,_infoval, _msg ) { \ 49 int _verbose = _fh->f_get_mca_parameter_value("verbose_info_parsing", strlen("verbose_info_parsing")); \ 50 if ( 1==_verbose && 0==_fh->f_rank ) printf("File: %s info: %s value %s %s\n", _fh->f_filename, _infostr, _infoval, _msg); \ 51 if ( 2==_verbose ) printf("File: %s info: %s value %s %s\n", _fh->f_filename, _infostr, _infoval, _msg); \ 52 } 53 54 55 /* 56 * Flags 57 */ 58 #define OMPIO_CONTIGUOUS_MEMORY 0x00000001 59 #define OMPIO_UNIFORM_FVIEW 0x00000002 60 #define OMPIO_FILE_IS_OPEN 0x00000004 61 #define OMPIO_FILE_VIEW_IS_SET 0x00000008 62 #define OMPIO_CONTIGUOUS_FVIEW 0x00000010 63 #define OMPIO_AGGREGATOR_IS_SET 0x00000020 64 #define OMPIO_SHAREDFP_IS_SET 0x00000040 65 #define OMPIO_LOCK_ENTIRE_FILE 0x00000080 66 #define OMPIO_LOCK_NEVER 0x00000100 67 #define OMPIO_LOCK_NOT_THIS_OP 0x00000200 68 #define OMPIO_DATAREP_NATIVE 0x00000400 69 #define OMPIO_COLLECTIVE_OP 0x00000800 70 71 #define OMPIO_ROOT 0 72 73 /*AGGREGATOR GROUPING DECISIONS*/ 74 #define OMPIO_MERGE 1 75 #define OMPIO_SPLIT 2 76 #define OMPIO_RETAIN 3 77 78 #define DATA_VOLUME 1 79 #define UNIFORM_DISTRIBUTION 2 80 #define CONTIGUITY 3 81 #define OPTIMIZE_GROUPING 4 82 #define SIMPLE 5 83 #define NO_REFINEMENT 6 84 #define SIMPLE_PLUS 7 85 86 #define OMPIO_LOCK_ENTIRE_REGION 10 87 #define OMPIO_LOCK_SELECTIVE 11 88 89 #define OMPIO_FCOLL_WANT_TIME_BREAKDOWN 0 90 #define MCA_IO_DEFAULT_FILE_VIEW_SIZE 4*1024*1024 91 92 #define OMPIO_UNIFORM_DIST_THRESHOLD 0.5 93 #define OMPIO_CONTG_THRESHOLD 1048576 94 #define OMPIO_CONTG_FACTOR 8 95 #define OMPIO_DEFAULT_STRIPE_SIZE 1048576 96 #define OMPIO_PROCS_PER_GROUP_TAG 0 97 #define OMPIO_PROCS_IN_GROUP_TAG 1 98 #define OMPIO_MERGE_THRESHOLD 0.5 99 100 #define OMPIO_PERM_NULL -1 101 #define OMPIO_IOVEC_INITIAL_SIZE 100 102 103 enum ompio_fs_type 104 { 105 NONE = 0, 106 UFS = 1, 107 PVFS2 = 2, 108 LUSTRE = 3, 109 PLFS = 4, 110 IME = 5, 111 GPFS = 6 112 }; 113 114 typedef struct mca_common_ompio_io_array_t { 115 void *memory_address; 116 /* we need that of type OMPI_MPI_OFFSET_TYPE */ 117 void *offset; 118 size_t length; 119 /*mca_common_ompio_server_t io_server;*/ 120 } mca_common_ompio_io_array_t; 121 122 123 typedef struct mca_common_ompio_access_array_t{ 124 OMPI_MPI_OFFSET_TYPE *offsets; 125 int *lens; 126 MPI_Aint *mem_ptrs; 127 int count; 128 } mca_common_ompio_access_array_t; 129 130 131 /* forward declaration to keep the compiler happy. */ 132 struct ompio_file_t; 133 typedef int (*mca_common_ompio_generate_current_file_view_fn_t) (struct ompio_file_t *fh, 134 size_t max_data, 135 struct iovec **f_iov, 136 int *iov_count); 137 138 /* functions to retrieve the number of aggregators and the size of the 139 temporary buffer on aggregators from the fcoll modules */ 140 typedef int (*mca_common_ompio_get_mca_parameter_value_fn_t) ( char *mca_parameter_name, int name_length ); 141 142 143 struct mca_common_ompio_print_queue; 144 145 /** 146 * Back-end structure for MPI_File 147 */ 148 struct ompio_file_t { 149 /* General parameters */ 150 int fd; 151 struct ompi_file_t *f_fh; /* pointer back to the file_t structure */ 152 OMPI_MPI_OFFSET_TYPE f_offset; /* byte offset of current position */ 153 OMPI_MPI_OFFSET_TYPE f_disp; /* file_view displacement */ 154 int f_rank; 155 int f_size; 156 int f_amode; 157 int f_perm; 158 ompi_communicator_t *f_comm; 159 const char *f_filename; 160 char *f_datarep; 161 opal_convertor_t *f_mem_convertor; 162 opal_convertor_t *f_file_convertor; 163 opal_info_t *f_info; 164 int32_t f_flags; 165 void *f_fs_ptr; 166 int f_fs_block_size; 167 int f_atomicity; 168 size_t f_stripe_size; 169 int f_stripe_count; 170 size_t f_cc_size; 171 size_t f_avg_view_size; 172 int f_bytes_per_agg; 173 enum ompio_fs_type f_fstype; 174 ompi_request_t *f_split_coll_req; 175 bool f_split_coll_in_use; 176 /* Place for selected sharedfp module to hang it's data. 177 Note: Neither f_sharedfp nor f_sharedfp_component seemed appropriate for this. 178 */ 179 void *f_sharedfp_data; 180 181 182 /* File View parameters */ 183 struct iovec *f_decoded_iov; 184 uint32_t f_iov_count; 185 ompi_datatype_t *f_iov_type; 186 size_t f_position_in_file_view; /* in bytes */ 187 size_t f_total_bytes; /* total bytes read/written within 1 Fview*/ 188 int f_index_in_file_view; 189 ptrdiff_t f_view_extent; 190 size_t f_view_size; 191 ompi_datatype_t *f_etype; 192 ompi_datatype_t *f_filetype; 193 ompi_datatype_t *f_orig_filetype; /* the fileview passed by the user to us */ 194 size_t f_etype_size; 195 196 /* contains IO requests that needs to be read/written */ 197 mca_common_ompio_io_array_t *f_io_array; 198 int f_num_of_io_entries; 199 200 /* Hooks for modules to hang things */ 201 mca_base_component_t *f_fs_component; 202 mca_base_component_t *f_fcoll_component; 203 mca_base_component_t *f_fbtl_component; 204 mca_base_component_t *f_sharedfp_component; 205 206 /* structure of function pointers */ 207 mca_fs_base_module_t *f_fs; 208 mca_fcoll_base_module_t *f_fcoll; 209 mca_fbtl_base_module_t *f_fbtl; 210 mca_sharedfp_base_module_t *f_sharedfp; 211 212 /* Timing information */ 213 struct mca_common_ompio_print_queue *f_coll_write_time; 214 struct mca_common_ompio_print_queue *f_coll_read_time; 215 216 /*initial list of aggregators and groups*/ 217 int *f_init_aggr_list; 218 int f_init_num_aggrs; 219 int f_init_procs_per_group; 220 int *f_init_procs_in_group; 221 222 /* final of aggregators and groups*/ 223 int *f_aggr_list; 224 int f_num_aggrs; 225 int *f_procs_in_group; 226 int f_procs_per_group; 227 228 /* internal ompio functions required by fbtl and fcoll */ 229 mca_common_ompio_generate_current_file_view_fn_t f_generate_current_file_view; 230 231 mca_common_ompio_get_mca_parameter_value_fn_t f_get_mca_parameter_value; 232 }; 233 typedef struct ompio_file_t ompio_file_t; 234 235 struct mca_common_ompio_data_t { 236 ompio_file_t ompio_fh; 237 }; 238 typedef struct mca_common_ompio_data_t mca_common_ompio_data_t; 239 240 241 #include "common_ompio_print_queue.h" 242 #include "common_ompio_aggregators.h" 243 244 OMPI_DECLSPEC int mca_common_ompio_file_write (ompio_file_t *fh, const void *buf, int count, 245 struct ompi_datatype_t *datatype, 246 ompi_status_public_t *status); 247 248 OMPI_DECLSPEC int mca_common_ompio_file_write_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf, 249 int count, struct ompi_datatype_t *datatype, 250 ompi_status_public_t *status); 251 252 OMPI_DECLSPEC int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, int count, 253 struct ompi_datatype_t *datatype, ompi_request_t **request); 254 255 OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, 256 const void *buf, int count, struct ompi_datatype_t *datatype, 257 ompi_request_t **request); 258 259 OMPI_DECLSPEC int mca_common_ompio_file_write_all (ompio_file_t *fh, const void *buf, 260 int count, struct ompi_datatype_t *datatype, 261 ompi_status_public_t *status); 262 263 OMPI_DECLSPEC int mca_common_ompio_file_write_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, const void *buf, 264 int count, struct ompi_datatype_t *datatype, 265 ompi_status_public_t *status); 266 267 OMPI_DECLSPEC int mca_common_ompio_file_iwrite_all (ompio_file_t *fp, const void *buf, 268 int count, struct ompi_datatype_t *datatype, ompi_request_t **request); 269 270 OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset, const void *buf, 271 int count, struct ompi_datatype_t *datatype, ompi_request_t **request); 272 273 OMPI_DECLSPEC int mca_common_ompio_build_io_array ( ompio_file_t *fh, int index, int cycles, 274 size_t bytes_per_cycle, size_t max_data, uint32_t iov_count, 275 struct iovec *decoded_iov, int *ii, int *jj, size_t *tbw, 276 size_t *spc, mca_common_ompio_io_array_t **io_array, 277 int *num_io_entries ); 278 279 280 OMPI_DECLSPEC int mca_common_ompio_file_read (ompio_file_t *fh, void *buf, int count, 281 struct ompi_datatype_t *datatype, ompi_status_public_t *status); 282 283 OMPI_DECLSPEC int mca_common_ompio_file_read_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, void *buf, 284 int count, struct ompi_datatype_t *datatype, 285 ompi_status_public_t * status); 286 287 OMPI_DECLSPEC int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, int count, 288 struct ompi_datatype_t *datatype, ompi_request_t **request); 289 290 OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, 291 void *buf, int count, struct ompi_datatype_t *datatype, 292 ompi_request_t **request); 293 294 OMPI_DECLSPEC int mca_common_ompio_file_read_all (ompio_file_t *fh, void *buf, int count, struct ompi_datatype_t *datatype, 295 ompi_status_public_t * status); 296 297 OMPI_DECLSPEC int mca_common_ompio_file_read_at_all (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset, 298 void *buf, int count, struct ompi_datatype_t *datatype, 299 ompi_status_public_t * status); 300 301 OMPI_DECLSPEC int mca_common_ompio_file_iread_all (ompio_file_t *fp, void *buf, int count, struct ompi_datatype_t *datatype, 302 ompi_request_t **request); 303 304 OMPI_DECLSPEC int mca_common_ompio_file_iread_at_all (ompio_file_t *fp, OMPI_MPI_OFFSET_TYPE offset, 305 void *buf, int count, struct ompi_datatype_t *datatype, 306 ompi_request_t **request); 307 308 OMPI_DECLSPEC int mca_common_ompio_file_open (ompi_communicator_t *comm, const char *filename, 309 int amode, opal_info_t *info, 310 ompio_file_t *ompio_fh, bool use_sharedfp); 311 312 OMPI_DECLSPEC int mca_common_ompio_file_delete (const char *filename, 313 struct opal_info_t *info); 314 OMPI_DECLSPEC int mca_common_ompio_create_incomplete_file_handle (const char *filename, 315 ompio_file_t **fh); 316 317 OMPI_DECLSPEC int mca_common_ompio_file_close (ompio_file_t *ompio_fh); 318 OMPI_DECLSPEC int mca_common_ompio_file_get_size (ompio_file_t *ompio_fh, OMPI_MPI_OFFSET_TYPE *size); 319 OMPI_DECLSPEC int mca_common_ompio_file_get_position (ompio_file_t *fh,OMPI_MPI_OFFSET_TYPE *offset); 320 OMPI_DECLSPEC int mca_common_ompio_set_explicit_offset (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset); 321 OMPI_DECLSPEC int mca_common_ompio_set_file_defaults (ompio_file_t *fh); 322 OMPI_DECLSPEC int mca_common_ompio_set_view (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE disp, 323 ompi_datatype_t *etype, ompi_datatype_t *filetype, const char *datarep, 324 opal_info_t *info); 325 326 327 /* 328 * Function that takes in a datatype and buffer, and decodes that datatype 329 * into an iovec using the convertor_raw function 330 */ 331 OMPI_DECLSPEC int mca_common_ompio_decode_datatype (struct ompio_file_t *fh, 332 struct ompi_datatype_t *datatype, 333 int count, 334 const void *buf, 335 size_t *max_data, 336 opal_convertor_t *convertor, 337 struct iovec **iov, 338 uint32_t *iov_count); 339 340 OMPI_DECLSPEC int mca_common_ompio_set_callbacks(mca_common_ompio_generate_current_file_view_fn_t generate_current_file_view, 341 mca_common_ompio_get_mca_parameter_value_fn_t get_mca_parameter_value); 342 #endif /* MCA_COMMON_OMPIO_H */ 343