1 /*
2  *  Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  *                          University Research and Technology
4  *                          Corporation.  All rights reserved.
5  *  Copyright (c) 2004-2016 The University of Tennessee and The University
6  *                          of Tennessee Research Foundation.  All rights
7  *                          reserved.
8  *  Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                          University of Stuttgart.  All rights reserved.
10  *  Copyright (c) 2004-2005 The Regents of the University of California.
11  *                          All rights reserved.
12  *  Copyright (c) 2008-2016 University of Houston. All rights reserved.
13  *  $COPYRIGHT$
14  *
15  *  Additional copyrights may follow
16  *
17  *  $HEADER$
18  */
19 
20 #include "ompi_config.h"
21 
22 #include "ompi/communicator/communicator.h"
23 #include "ompi/info/info.h"
24 #include "ompi/file/file.h"
25 #include "ompi/mca/fs/fs.h"
26 #include "ompi/mca/fs/base/base.h"
27 #include "ompi/mca/fcoll/fcoll.h"
28 #include "ompi/mca/fcoll/base/base.h"
29 #include "ompi/mca/fbtl/fbtl.h"
30 #include "ompi/mca/fbtl/base/base.h"
31 
32 #include "io_ompio.h"
33 #include "ompi/mca/common/ompio/common_ompio_request.h"
34 #include "math.h"
35 #include <unistd.h>
36 
37 /* Read and write routines are split into two interfaces.
38 **   The
39 **   mca_io_ompio_file_read/write[_at]
40 **
41 ** routines are the ones registered with the ompio modules.
42 ** The
43 **
44 ** ompio_io_ompio_file_read/write[_at]
45 **
46 ** routesin are used e.g. from the shared file pointer modules.
47 ** The main difference is, that the first one takes an ompi_file_t
48 ** as a file pointer argument, while the second uses the ompio internal
49 ** mca_io_ompio_file_t structure.
50 */
51 
mca_io_ompio_file_read(ompi_file_t * fp,void * buf,int count,struct ompi_datatype_t * datatype,ompi_status_public_t * status)52 int mca_io_ompio_file_read (ompi_file_t *fp,
53 			    void *buf,
54 			    int count,
55 			    struct ompi_datatype_t *datatype,
56 			    ompi_status_public_t *status)
57 {
58     int ret = OMPI_SUCCESS;
59     mca_io_ompio_data_t *data;
60 
61     data = (mca_io_ompio_data_t *) fp->f_io_selected_data;
62     OPAL_THREAD_LOCK(&fp->f_mutex);
63     ret = mca_common_ompio_file_read(&data->ompio_fh,buf,count,datatype,status);
64     OPAL_THREAD_UNLOCK(&fp->f_mutex);
65 
66     return ret;
67 }
68 
mca_io_ompio_file_read_at(ompi_file_t * fh,OMPI_MPI_OFFSET_TYPE offset,void * buf,int count,struct ompi_datatype_t * datatype,ompi_status_public_t * status)69 int mca_io_ompio_file_read_at (ompi_file_t *fh,
70 			       OMPI_MPI_OFFSET_TYPE offset,
71 			       void *buf,
72 			       int count,
73 			       struct ompi_datatype_t *datatype,
74 			       ompi_status_public_t * status)
75 {
76     int ret = OMPI_SUCCESS;
77     mca_io_ompio_data_t *data;
78 
79     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
80     OPAL_THREAD_LOCK(&fh->f_mutex);
81     ret = mca_common_ompio_file_read_at(&data->ompio_fh, offset,buf,count,datatype,status);
82     OPAL_THREAD_UNLOCK(&fh->f_mutex);
83 
84     return ret;
85 }
86 
mca_io_ompio_file_iread(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype,ompi_request_t ** request)87 int mca_io_ompio_file_iread (ompi_file_t *fh,
88 			     void *buf,
89 			     int count,
90 			     struct ompi_datatype_t *datatype,
91 			     ompi_request_t **request)
92 {
93     int ret = OMPI_SUCCESS;
94     mca_io_ompio_data_t *data;
95 
96     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
97     OPAL_THREAD_LOCK(&fh->f_mutex);
98     ret = mca_common_ompio_file_iread(&data->ompio_fh,buf,count,datatype,request);
99     OPAL_THREAD_UNLOCK(&fh->f_mutex);
100 
101     return ret;
102 }
103 
104 
mca_io_ompio_file_iread_at(ompi_file_t * fh,OMPI_MPI_OFFSET_TYPE offset,void * buf,int count,struct ompi_datatype_t * datatype,ompi_request_t ** request)105 int mca_io_ompio_file_iread_at (ompi_file_t *fh,
106 				OMPI_MPI_OFFSET_TYPE offset,
107 				void *buf,
108 				int count,
109 				struct ompi_datatype_t *datatype,
110 				ompi_request_t **request)
111 {
112     int ret = OMPI_SUCCESS;
113     mca_io_ompio_data_t *data;
114 
115     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
116     OPAL_THREAD_LOCK(&fh->f_mutex);
117     ret = mca_common_ompio_file_iread_at(&data->ompio_fh,offset,buf,count,datatype,request);
118     OPAL_THREAD_UNLOCK(&fh->f_mutex);
119 
120     return ret;
121 }
122 
123 
124 /* Infrastructure for collective operations  */
125 /******************************************************/
mca_io_ompio_file_read_all(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype,ompi_status_public_t * status)126 int mca_io_ompio_file_read_all (ompi_file_t *fh,
127 				void *buf,
128 				int count,
129 				struct ompi_datatype_t *datatype,
130 				ompi_status_public_t * status)
131 {
132     int ret = OMPI_SUCCESS;
133     mca_io_ompio_data_t *data;
134 
135     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
136 
137     OPAL_THREAD_LOCK(&fh->f_mutex);
138     ret = data->ompio_fh.
139         f_fcoll->fcoll_file_read_all (&data->ompio_fh,
140                                      buf,
141                                      count,
142                                      datatype,
143                                      status);
144     OPAL_THREAD_UNLOCK(&fh->f_mutex);
145     if ( MPI_STATUS_IGNORE != status ) {
146 	size_t size;
147 
148 	opal_datatype_type_size (&datatype->super, &size);
149 	status->_ucount = count * size;
150     }
151 
152     return ret;
153 }
154 
mca_io_ompio_file_iread_all(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype,ompi_request_t ** request)155 int mca_io_ompio_file_iread_all (ompi_file_t *fh,
156 				void *buf,
157 				int count,
158 				struct ompi_datatype_t *datatype,
159 				ompi_request_t **request)
160 {
161     int ret = OMPI_SUCCESS;
162     mca_io_ompio_data_t *data=NULL;
163     mca_io_ompio_file_t *fp=NULL;
164 
165     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
166     fp = &data->ompio_fh;
167 
168     OPAL_THREAD_LOCK(&fh->f_mutex);
169     if ( NULL != fp->f_fcoll->fcoll_file_iread_all ) {
170 	ret = fp->f_fcoll->fcoll_file_iread_all (&data->ompio_fh,
171 						 buf,
172 						 count,
173 						 datatype,
174 						 request);
175     }
176     else {
177 	/* this fcoll component does not support non-blocking
178 	   collective I/O operations. WE fake it with
179 	   individual non-blocking I/O operations. */
180 	ret = mca_common_ompio_file_iread ( fp, buf, count, datatype, request );
181     }
182     OPAL_THREAD_UNLOCK(&fh->f_mutex);
183 
184     return ret;
185 }
186 
187 
mca_io_ompio_file_read_at_all(ompi_file_t * fh,OMPI_MPI_OFFSET_TYPE offset,void * buf,int count,struct ompi_datatype_t * datatype,ompi_status_public_t * status)188 int mca_io_ompio_file_read_at_all (ompi_file_t *fh,
189 				   OMPI_MPI_OFFSET_TYPE offset,
190 				   void *buf,
191 				   int count,
192 				   struct ompi_datatype_t *datatype,
193 				   ompi_status_public_t * status)
194 {
195     int ret = OMPI_SUCCESS;
196     mca_io_ompio_data_t *data;
197 
198     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
199     OPAL_THREAD_LOCK(&fh->f_mutex);
200     ret = mca_common_ompio_file_read_at_all(&data->ompio_fh,offset,buf,count,datatype,status);
201     OPAL_THREAD_UNLOCK(&fh->f_mutex);
202 
203     return ret;
204 }
205 
mca_io_ompio_file_iread_at_all(ompi_file_t * fh,OMPI_MPI_OFFSET_TYPE offset,void * buf,int count,struct ompi_datatype_t * datatype,ompi_request_t ** request)206 int mca_io_ompio_file_iread_at_all (ompi_file_t *fh,
207 				    OMPI_MPI_OFFSET_TYPE offset,
208 				    void *buf,
209 				    int count,
210 				    struct ompi_datatype_t *datatype,
211 				    ompi_request_t **request)
212 {
213     int ret = OMPI_SUCCESS;
214     mca_io_ompio_data_t *data;
215     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
216 
217     OPAL_THREAD_LOCK(&fh->f_mutex);
218     ret = mca_common_ompio_file_iread_at_all ( &data->ompio_fh, offset, buf, count, datatype, request );
219     OPAL_THREAD_UNLOCK(&fh->f_mutex);
220     return ret;
221 }
222 
223 
224 /* Infrastructure for shared file pointer operations
225 ** (individual and ordered)*/
226 /******************************************************/
mca_io_ompio_file_read_shared(ompi_file_t * fp,void * buf,int count,struct ompi_datatype_t * datatype,ompi_status_public_t * status)227 int mca_io_ompio_file_read_shared (ompi_file_t *fp,
228 				   void *buf,
229 				   int count,
230 				   struct ompi_datatype_t *datatype,
231 				   ompi_status_public_t * status)
232 {
233     int ret = OMPI_SUCCESS;
234     mca_io_ompio_data_t *data;
235     mca_io_ompio_file_t *fh;
236     mca_sharedfp_base_module_t * shared_fp_base_module;
237 
238     data = (mca_io_ompio_data_t *) fp->f_io_selected_data;
239     fh = &data->ompio_fh;
240 
241     /*get the shared fp module associated with this file*/
242     shared_fp_base_module = (mca_sharedfp_base_module_t *)(fh->f_sharedfp);
243     if ( NULL == shared_fp_base_module ){
244         opal_output(0, "No shared file pointer component found for the given communicator. Can not execute\n");
245 	return OMPI_ERROR;
246     }
247     OPAL_THREAD_LOCK(&fp->f_mutex);
248     ret = shared_fp_base_module->sharedfp_read(fh,buf,count,datatype,status);
249     OPAL_THREAD_UNLOCK(&fp->f_mutex);
250 
251     return ret;
252 }
253 
mca_io_ompio_file_iread_shared(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype,ompi_request_t ** request)254 int mca_io_ompio_file_iread_shared (ompi_file_t *fh,
255 				    void *buf,
256 				    int count,
257 				    struct ompi_datatype_t *datatype,
258 				    ompi_request_t **request)
259 {
260     int ret = OMPI_SUCCESS;
261     mca_io_ompio_data_t *data;
262     mca_io_ompio_file_t *ompio_fh;
263     mca_sharedfp_base_module_t * shared_fp_base_module;
264 
265     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
266     ompio_fh = &data->ompio_fh;
267 
268     /*get the shared fp module associated with this file*/
269     shared_fp_base_module = (mca_sharedfp_base_module_t *)(ompio_fh->f_sharedfp);
270     if ( NULL == shared_fp_base_module ){
271         opal_output(0, "No shared file pointer component found for the given communicator. Can not execute\n");
272 	return OMPI_ERROR;
273     }
274     OPAL_THREAD_LOCK(&fh->f_mutex);
275     ret = shared_fp_base_module->sharedfp_iread(ompio_fh,buf,count,datatype,request);
276     OPAL_THREAD_UNLOCK(&fh->f_mutex);
277 
278     return ret;
279 }
280 
mca_io_ompio_file_read_ordered(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype,ompi_status_public_t * status)281 int mca_io_ompio_file_read_ordered (ompi_file_t *fh,
282 				    void *buf,
283 				    int count,
284 				    struct ompi_datatype_t *datatype,
285 				    ompi_status_public_t * status)
286 {
287     int ret = OMPI_SUCCESS;
288     mca_io_ompio_data_t *data;
289     mca_io_ompio_file_t *ompio_fh;
290     mca_sharedfp_base_module_t * shared_fp_base_module;
291 
292     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
293     ompio_fh = &data->ompio_fh;
294 
295     /*get the shared fp module associated with this file*/
296     shared_fp_base_module = (mca_sharedfp_base_module_t *)(ompio_fh->f_sharedfp);
297     if ( NULL == shared_fp_base_module ){
298         opal_output(0, "No shared file pointer component found for the given communicator. Can not execute\n");
299 	return OMPI_ERROR;
300     }
301     OPAL_THREAD_LOCK(&fh->f_mutex);
302     ret = shared_fp_base_module->sharedfp_read_ordered(ompio_fh,buf,count,datatype,status);
303     OPAL_THREAD_UNLOCK(&fh->f_mutex);
304     return ret;
305 }
306 
mca_io_ompio_file_read_ordered_begin(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype)307 int mca_io_ompio_file_read_ordered_begin (ompi_file_t *fh,
308 					  void *buf,
309 					  int count,
310 					  struct ompi_datatype_t *datatype)
311 {
312     int ret = OMPI_SUCCESS;
313     mca_io_ompio_data_t *data;
314     mca_io_ompio_file_t *ompio_fh;
315     mca_sharedfp_base_module_t * shared_fp_base_module;
316 
317     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
318     ompio_fh = &data->ompio_fh;
319 
320     /*get the shared fp module associated with this file*/
321     shared_fp_base_module = ompio_fh->f_sharedfp;
322     if ( NULL == shared_fp_base_module ){
323         opal_output(0, "No shared file pointer component found for the given communicator. Can not execute\n");
324 	return OMPI_ERROR;
325     }
326     OPAL_THREAD_LOCK(&fh->f_mutex);
327     ret = shared_fp_base_module->sharedfp_read_ordered_begin(ompio_fh,buf,count,datatype);
328     OPAL_THREAD_UNLOCK(&fh->f_mutex);
329 
330     return ret;
331 }
332 
mca_io_ompio_file_read_ordered_end(ompi_file_t * fh,void * buf,ompi_status_public_t * status)333 int mca_io_ompio_file_read_ordered_end (ompi_file_t *fh,
334 					void *buf,
335 					ompi_status_public_t * status)
336 {
337     int ret = OMPI_SUCCESS;
338     mca_io_ompio_data_t *data;
339     mca_io_ompio_file_t *ompio_fh;
340     mca_sharedfp_base_module_t * shared_fp_base_module;
341 
342     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
343     ompio_fh = &data->ompio_fh;
344 
345     /*get the shared fp module associated with this file*/
346     shared_fp_base_module = ompio_fh->f_sharedfp;
347     if ( NULL == shared_fp_base_module ){
348         opal_output(0, "No shared file pointer component found for the given communicator. Can not execute\n");
349 	return OMPI_ERROR;
350     }
351     OPAL_THREAD_LOCK(&fh->f_mutex);
352     ret = shared_fp_base_module->sharedfp_read_ordered_end(ompio_fh,buf,status);
353     OPAL_THREAD_UNLOCK(&fh->f_mutex);
354 
355     return ret;
356 }
357 
358 
359 /* Split collectives . Not really used but infrastructure is in place */
360 /**********************************************************************/
mca_io_ompio_file_read_all_begin(ompi_file_t * fh,void * buf,int count,struct ompi_datatype_t * datatype)361 int mca_io_ompio_file_read_all_begin (ompi_file_t *fh,
362 				      void *buf,
363 				      int count,
364 				      struct ompi_datatype_t *datatype)
365 {
366     int ret = OMPI_SUCCESS;
367     mca_io_ompio_file_t *fp;
368     mca_io_ompio_data_t *data;
369 
370     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
371     fp = &data->ompio_fh;
372     if ( true == fp->f_split_coll_in_use ) {
373 	printf("Only one split collective I/O operation allowed per file handle at any given point in time!\n");
374 	return MPI_ERR_OTHER;
375     }
376     /* No need for locking fh->f_mutex, that is done in file_iread_all */
377     ret = mca_io_ompio_file_iread_all ( fh, buf, count, datatype, &fp->f_split_coll_req );
378     fp->f_split_coll_in_use = true;
379 
380     return ret;
381 }
382 
mca_io_ompio_file_read_all_end(ompi_file_t * fh,void * buf,ompi_status_public_t * status)383 int mca_io_ompio_file_read_all_end (ompi_file_t *fh,
384 				    void *buf,
385 				    ompi_status_public_t * status)
386 {
387     int ret = OMPI_SUCCESS;
388     mca_io_ompio_file_t *fp;
389     mca_io_ompio_data_t *data;
390 
391     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
392     fp = &data->ompio_fh;
393     ret = ompi_request_wait ( &fp->f_split_coll_req, status );
394 
395     /* remove the flag again */
396     fp->f_split_coll_in_use = false;
397     return ret;
398 }
399 
mca_io_ompio_file_read_at_all_begin(ompi_file_t * fh,OMPI_MPI_OFFSET_TYPE offset,void * buf,int count,struct ompi_datatype_t * datatype)400 int mca_io_ompio_file_read_at_all_begin (ompi_file_t *fh,
401 					 OMPI_MPI_OFFSET_TYPE offset,
402 					 void *buf,
403 					 int count,
404 					 struct ompi_datatype_t *datatype)
405 {
406     int ret = OMPI_SUCCESS;
407     mca_io_ompio_data_t *data;
408     mca_io_ompio_file_t *fp=NULL;
409     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
410     fp = &data->ompio_fh;
411 
412     if ( true == fp->f_split_coll_in_use ) {
413 	printf("Only one split collective I/O operation allowed per file handle at any given point in time!\n");
414 	return MPI_ERR_REQUEST;
415     }
416     OPAL_THREAD_LOCK(&fh->f_mutex);
417     ret = mca_common_ompio_file_iread_at_all ( fp, offset, buf, count, datatype, &fp->f_split_coll_req );
418     OPAL_THREAD_UNLOCK(&fh->f_mutex);
419     fp->f_split_coll_in_use = true;
420     return ret;
421 }
422 
mca_io_ompio_file_read_at_all_end(ompi_file_t * fh,void * buf,ompi_status_public_t * status)423 int mca_io_ompio_file_read_at_all_end (ompi_file_t *fh,
424 				       void *buf,
425 				       ompi_status_public_t * status)
426 {
427     int ret = OMPI_SUCCESS;
428     mca_io_ompio_data_t *data;
429     mca_io_ompio_file_t *fp=NULL;
430 
431     data = (mca_io_ompio_data_t *) fh->f_io_selected_data;
432     fp = &data->ompio_fh;
433     ret = ompi_request_wait ( &fp->f_split_coll_req, status );
434 
435     /* remove the flag again */
436     fp->f_split_coll_in_use = false;
437     return ret;
438 }
439