1 /*
2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2011 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * Copyright (c) 2008-2021 University of Houston. All rights reserved.
13  * Copyright (c) 2015-2018 Research Organization for Information Science
14  *                         and Technology (RIST). All rights reserved.
15  * $COPYRIGHT$
16  *
17  * Additional copyrights may follow
18  *
19  * $HEADER$
20  */
21 
22 #include "ompi_config.h"
23 #include "fbtl_posix.h"
24 
25 #include <unistd.h>
26 #include <sys/uio.h>
27 #if HAVE_AIO_H
28 #include <aio.h>
29 #endif
30 
31 #include "mpi.h"
32 #include "ompi/constants.h"
33 #include "ompi/mca/fbtl/fbtl.h"
34 
35 #define MAX_ATTEMPTS 10
36 
mca_fbtl_posix_ipwritev(ompio_file_t * fh,ompi_request_t * request)37 ssize_t  mca_fbtl_posix_ipwritev (ompio_file_t *fh,
38 				 ompi_request_t *request)
39 {
40 #if defined(FBTL_POSIX_HAVE_AIO)
41     mca_fbtl_posix_request_data_t *data;
42     mca_ompio_request_t *req = (mca_ompio_request_t *) request;
43     int i=0, ret;
44     off_t start_offset, end_offset, total_length;
45 
46     data = (mca_fbtl_posix_request_data_t *) malloc ( sizeof (mca_fbtl_posix_request_data_t));
47     if ( NULL == data ) {
48         opal_output (1,"mca_fbtl_posix_ipwritev: could not allocate memory\n");
49         return 0;
50     }
51 
52     data->aio_req_count = fh->f_num_of_io_entries;
53     data->aio_open_reqs = fh->f_num_of_io_entries;
54     data->aio_req_type  = FBTL_POSIX_WRITE;
55     data->aio_req_chunks = fbtl_posix_max_aio_active_reqs;
56     data->aio_total_len = 0;
57     data->aio_reqs = (struct aiocb *) malloc (sizeof(struct aiocb) *
58                                               fh->f_num_of_io_entries);
59     if (NULL == data->aio_reqs) {
60         opal_output (1,"mca_fbtl_posix_ipwritev: could not allocate memory\n");
61         free(data);
62         return 0;
63     }
64 
65     data->aio_req_status = (int *) malloc (sizeof(int) * fh->f_num_of_io_entries);
66     if (NULL == data->aio_req_status) {
67         opal_output (1,"mca_fbtl_posix_ipwritev: could not allocate memory\n");
68         free(data->aio_reqs);
69         free(data);
70         return 0;
71     }
72     data->aio_fh = fh;
73 
74     for ( i=0; i<fh->f_num_of_io_entries; i++ ) {
75         data->aio_reqs[i].aio_offset  = (OMPI_MPI_OFFSET_TYPE)(intptr_t)
76             fh->f_io_array[i].offset;
77         data->aio_reqs[i].aio_buf     = fh->f_io_array[i].memory_address;
78         data->aio_reqs[i].aio_nbytes  = fh->f_io_array[i].length;
79         data->aio_reqs[i].aio_fildes  = fh->fd;
80         data->aio_reqs[i].aio_reqprio = 0;
81         data->aio_reqs[i].aio_sigevent.sigev_notify = SIGEV_NONE;
82 	data->aio_req_status[i]        = EINPROGRESS;
83     }
84 
85     data->aio_first_active_req = 0;
86     if ( data->aio_req_count > data->aio_req_chunks ) {
87 	data->aio_last_active_req = data->aio_req_chunks;
88     }
89     else {
90 	data->aio_last_active_req = data->aio_req_count;
91     }
92 
93     start_offset = data->aio_reqs[data->aio_first_active_req].aio_offset;
94     end_offset   = data->aio_reqs[data->aio_last_active_req-1].aio_offset + data->aio_reqs[data->aio_last_active_req-1].aio_nbytes;
95     total_length = (end_offset - start_offset);
96     ret = mca_fbtl_posix_lock( &data->aio_lock, data->aio_fh, F_WRLCK, start_offset, total_length, OMPIO_LOCK_ENTIRE_REGION );
97     if ( 0 < ret ) {
98         opal_output(1, "mca_fbtl_posix_ipwritev: error in mca_fbtl_posix_lock() error ret=%d %s", ret, strerror(errno));
99         mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh );
100         free(data->aio_reqs);
101         free(data->aio_req_status);
102         free(data);
103         return OMPI_ERROR;
104     }
105 
106     for (i=0; i < data->aio_last_active_req; i++) {
107         int counter=0;
108 	while ( MAX_ATTEMPTS > counter ) {
109 	    if (-1 != aio_write(&data->aio_reqs[i])) {
110    	        break;
111 	    }
112 	    counter++;
113 	    mca_common_ompio_progress();
114 	}
115 	if ( MAX_ATTEMPTS == counter ) {
116             opal_output(1, "mca_fbtl_posix_ipwritev: error in aio_write():  %s", strerror(errno));
117             mca_fbtl_posix_unlock ( &data->aio_lock, data->aio_fh );
118             free(data->aio_req_status);
119             free(data->aio_reqs);
120             free(data);
121             return OMPI_ERROR;
122         }
123     }
124 
125     req->req_data = data;
126     req->req_progress_fn = mca_fbtl_posix_progress;
127     req->req_free_fn     = mca_fbtl_posix_request_free;
128 #endif
129     return OMPI_SUCCESS;
130 }
131