1 /*
2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  *                         University Research and Technology
4  *                         Corporation.  All rights reserved.
5  * Copyright (c) 2004-2011 The University of Tennessee and The University
6  *                         of Tennessee Research Foundation.  All rights
7  *                         reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  *                         University of Stuttgart.  All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  *                         All rights reserved.
12  * Copyright (c) 2017      University of Houston. All rights reserved.
13  * Copyright (c) 2018      Research Organization for Information Science
14  *                         and Technology (RIST). All rights reserved.
15  * $COPYRIGHT$
16  *
17  * Additional copyrights may follow
18  *
19  * $HEADER$
20  */
21 
22 #include "ompi_config.h"
23 #include "fbtl_posix.h"
24 
25 #include "mpi.h"
26 #include <unistd.h>
27 #include <sys/uio.h>
28 #include <errno.h>
29 #include <limits.h>
30 #include "ompi/constants.h"
31 #include "ompi/mca/fbtl/fbtl.h"
32 
33 #define MAX_ERRCOUNT 100
34 
35 /*
36   op:    can be F_WRLCK or F_RDLCK
37   flags: can be OMPIO_LOCK_ENTIRE_REGION or OMPIO_LOCK_SELECTIVE. This is typically set by the operation, not the fs component.
38          e.g. a collective and an individual component might require different level of protection through locking,
39          also one might need to do different things for blocking (pwritev,preadv) operations and non-blocking (aio) operations.
40 
41   fh->f_flags can contain similar sounding flags, those were set by the fs component and/or user requests.
42 
43   Support for MPI atomicity operations are envisioned, but not yet tested.
44 */
45 
mca_fbtl_posix_lock(struct flock * lock,ompio_file_t * fh,int op,OMPI_MPI_OFFSET_TYPE offset,off_t len,int flags)46 int mca_fbtl_posix_lock ( struct flock *lock, ompio_file_t *fh, int op,
47                           OMPI_MPI_OFFSET_TYPE offset, off_t len, int flags)
48 {
49     off_t lmod, bmod;
50     int ret, err_count;
51 
52     lock->l_type   = op;
53     lock->l_whence = SEEK_SET;
54     lock->l_start  =-1;
55     lock->l_len    =-1;
56     if ( 0 == len ) {
57         return 0;
58     }
59 
60     if ( fh->f_flags & OMPIO_LOCK_ENTIRE_FILE ) {
61         lock->l_start = (off_t) 0;
62         lock->l_len   = 0;
63     }
64     else {
65         if ( (fh->f_flags & OMPIO_LOCK_NEVER) ||
66              (fh->f_flags & OMPIO_LOCK_NOT_THIS_OP )){
67             /* OMPIO_LOCK_NEVER:
68                  ompio tells us not to worry about locking. This can be due to three
69                  reasons:
70                  1. user enforced
71                  2. single node job where the locking is handled already in the kernel
72                  3. file view is set to distinct regions such that multiple processes
73                     do not collide on the block level. ( not entirely sure yet how
74                     to check for this except in trivial cases).
75                OMPI_LOCK_NOT_THIS_OP:
76                  will typically be set by fcoll components indicating that the file partitioning
77                  ensures no overlap in blocks.
78             */
79             return 0;
80         }
81         if ( flags == OMPIO_LOCK_ENTIRE_REGION ) {
82             lock->l_start = (off_t) offset;
83             lock->l_len   = len;
84         }
85         else {
86             /* We only try to lock the first block in the data range if
87                the starting offset is not the starting offset of a file system
88                block. And the last block in the data range if the offset+len
89                is not equal to the end of a file system block.
90                If we need to lock both beginning + end, we combine
91                the two into a single lock.
92             */
93             bmod = offset % fh->f_fs_block_size;
94             if ( bmod  ) {
95                 lock->l_start = (off_t) offset;
96                 lock->l_len   = bmod;
97             }
98             lmod = (offset+len)%fh->f_fs_block_size;
99             if ( lmod ) {
100                 if ( !bmod ) {
101                     lock->l_start = (offset+len-lmod );
102                     lock->l_len   = lmod;
103                 }
104                 else {
105                     lock->l_len = len;
106                 }
107             }
108             if ( -1 == lock->l_start && -1 == lock->l_len ) {
109                 /* no need to lock in this instance */
110                 return 0;
111             }
112         }
113     }
114 
115 
116 #ifdef OMPIO_DEBUG
117     printf("%d: acquiring lock for offset %ld length %ld requested offset %ld request len %ld \n",
118            fh->f_rank, lock->l_start, lock->l_len, offset, len);
119 #endif
120     errno=0;
121     err_count=0;
122     do {
123         ret = fcntl ( fh->fd, F_SETLKW, lock);
124         if ( ret ) {
125 #ifdef OMPIO_DEBUG
126             printf("[%d] ret = %d errno=%d %s\n", fh->f_rank, ret, errno, strerror(errno) );
127 #endif
128             err_count++;
129         }
130     } while (  ret && ((errno == EINTR) || ((errno == EINPROGRESS) && err_count < MAX_ERRCOUNT )));
131 
132 
133     return ret;
134 }
135 
mca_fbtl_posix_unlock(struct flock * lock,ompio_file_t * fh)136 void  mca_fbtl_posix_unlock ( struct flock *lock, ompio_file_t *fh )
137 {
138     if ( -1 == lock->l_start && -1 == lock->l_len ) {
139         return;
140     }
141 
142     lock->l_type = F_UNLCK;
143 #ifdef OMPIO_DEBUG
144     printf("%d: releasing lock for offset %ld length %ld\n", fh->f_rank, lock->l_start, lock->l_len);
145 #endif
146     fcntl ( fh->fd, F_SETLK, lock);
147     lock->l_start = -1;
148     lock->l_len   = -1;
149 
150     return;
151 }
152