1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4  *                         University Research and Technology
5  *                         Corporation.  All rights reserved.
6  * Copyright (c) 2004-2009 The University of Tennessee and The University
7  *                         of Tennessee Research Foundation.  All rights
8  *                         reserved.
9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10  *                         University of Stuttgart.  All rights reserved.
11  * Copyright (c) 2004-2005 The Regents of the University of California.
12  *                         All rights reserved.
13  * Copyright (c) 2006-2007 Voltaire. All rights reserved.
14  * Copyright (c) 2009-2010 Cisco Systems, Inc.  All rights reserved.
15  * Copyright (c) 2010-2018 Los Alamos National Security, LLC.
16  *                         All rights reserved.
17  * $COPYRIGHT$
18  *
19  * Additional copyrights may follow
20  *
21  * $HEADER$
22  */
23 /**
24  * @file
25  */
26 #ifndef MCA_BTL_VADER_FIFO_H
27 #define MCA_BTL_VADER_FIFO_H
28 
29 #include "btl_vader.h"
30 #include "btl_vader_endpoint.h"
31 #include "btl_vader_frag.h"
32 
33 #if SIZEOF_VOID_P == 8
34   #define vader_item_cmpset(x, y, z) opal_atomic_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z))
35   #define vader_item_swap(x, y)      opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y))
36 
37   #define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll
38   #define MCA_BTL_VADER_OFFSET_BITS 32
39   #define MCA_BTL_VADER_BITNESS     64
40 
41   typedef int64_t fifo_value_t;
42 #else
43   #define vader_item_cmpset(x, y, z) opal_atomic_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z))
44   #define vader_item_swap(x, y)      opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y))
45 
46   #define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl
47   #define MCA_BTL_VADER_OFFSET_BITS 24
48   #define MCA_BTL_VADER_BITNESS     32
49 
50   typedef int32_t fifo_value_t;
51 #endif
52 
53 #define VADER_FIFO_FREE  ((fifo_value_t)-2)
54 
55 /*
56  * Shared Memory FIFOs
57  *
58  * The FIFO is implemented as a linked list of frag headers. The fifo has multiple
59  * producers and a single consumer (in the single thread case) so the tail needs
60  * to be modified by an atomic or protected by a atomic lock.
61  *
62  * Since the frags live in shared memory that is mapped differently into
63  * each address space, the head and tail pointers are relative (each process must
64  * add its own offset).
65  *
66  * We introduce some padding at the end of the structure but it is probably unnecessary.
67  */
68 
69 /* lock free fifo */
70 typedef struct vader_fifo_t {
71     volatile fifo_value_t fifo_head;
72     volatile fifo_value_t fifo_tail;
73     volatile int32_t      fbox_available;
74 } vader_fifo_t;
75 
76 /* large enough to ensure the fifo is on its own cache line */
77 #define MCA_BTL_VADER_FIFO_SIZE 128
78 
79 /***
80  * One or more FIFO components may be a pointer that must be
81  * accessed by multiple processes.  Since the shared region may
82  * be mmapped differently into each process's address space,
83  * these pointers will be relative to some base address.  Here,
84  * we define inline functions to translate between relative
85  * addresses and virtual addresses.
86  */
87 
88 /* This only works for finding the relative address for a pointer within my_segment */
virtual2relative(char * addr)89 static inline fifo_value_t virtual2relative (char *addr)
90 {
91     return (fifo_value_t) ((intptr_t) (addr - mca_btl_vader_component.my_segment)) | ((fifo_value_t)MCA_BTL_VADER_LOCAL_RANK << MCA_BTL_VADER_OFFSET_BITS);
92 }
93 
virtual2relativepeer(struct mca_btl_base_endpoint_t * endpoint,char * addr)94 static inline fifo_value_t virtual2relativepeer (struct mca_btl_base_endpoint_t *endpoint, char *addr)
95 {
96     return (fifo_value_t) ((intptr_t) (addr - endpoint->segment_base)) | ((fifo_value_t)endpoint->peer_smp_rank << MCA_BTL_VADER_OFFSET_BITS);
97 }
98 
relative2virtual(fifo_value_t offset)99 static inline void *relative2virtual (fifo_value_t offset)
100 {
101     return (void *)(intptr_t)((offset & MCA_BTL_VADER_OFFSET_MASK) + mca_btl_vader_component.endpoints[offset >> MCA_BTL_VADER_OFFSET_BITS].segment_base);
102 }
103 
104 #include "btl_vader_fbox.h"
105 
106 /**
107  * vader_fifo_read:
108  *
109  * @brief reads a single fragment from a local fifo
110  *
111  * @param[inout]   fifo - FIFO to read from
112  * @param[out]     ep   - returns the endpoint the fifo element was read from
113  *
114  * @returns a fragment header or NULL
115  *
116  * This function does not currently support multiple readers.
117  */
vader_fifo_read(vader_fifo_t * fifo,struct mca_btl_base_endpoint_t ** ep)118 static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct mca_btl_base_endpoint_t **ep)
119 {
120     mca_btl_vader_hdr_t *hdr;
121     fifo_value_t value;
122 
123     if (VADER_FIFO_FREE == fifo->fifo_head) {
124         return NULL;
125     }
126 
127     opal_atomic_rmb ();
128 
129     value = fifo->fifo_head;
130 
131     *ep = &mca_btl_vader_component.endpoints[value >> MCA_BTL_VADER_OFFSET_BITS];
132     hdr = (mca_btl_vader_hdr_t *) relative2virtual (value);
133 
134     fifo->fifo_head = VADER_FIFO_FREE;
135 
136     assert (hdr->next != value);
137 
138     if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) {
139         opal_atomic_rmb();
140 
141         if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) {
142             while (VADER_FIFO_FREE == hdr->next) {
143                 opal_atomic_rmb ();
144             }
145 
146             fifo->fifo_head = hdr->next;
147         }
148     } else {
149         fifo->fifo_head = hdr->next;
150     }
151 
152     opal_atomic_wmb ();
153     return hdr;
154 }
155 
vader_fifo_init(vader_fifo_t * fifo)156 static inline void vader_fifo_init (vader_fifo_t *fifo)
157 {
158     /* due to a compiler bug in Oracle C 5.15 the following line was broken into two. Not
159      * ideal but oh well. See #5814 */
160     /* fifo->fifo_head = fifo->fifo_tail = VADER_FIFO_FREE; */
161     fifo->fifo_head = VADER_FIFO_FREE;
162     fifo->fifo_tail = VADER_FIFO_FREE;
163     fifo->fbox_available = mca_btl_vader_component.fbox_max;
164     mca_btl_vader_component.my_fifo = fifo;
165 }
166 
vader_fifo_write(vader_fifo_t * fifo,fifo_value_t value)167 static inline void vader_fifo_write (vader_fifo_t *fifo, fifo_value_t value)
168 {
169     fifo_value_t prev;
170 
171     opal_atomic_wmb ();
172     prev = vader_item_swap (&fifo->fifo_tail, value);
173     opal_atomic_rmb ();
174 
175     assert (prev != value);
176 
177     if (OPAL_LIKELY(VADER_FIFO_FREE != prev)) {
178         mca_btl_vader_hdr_t *hdr = (mca_btl_vader_hdr_t *) relative2virtual (prev);
179         hdr->next = value;
180     } else {
181         fifo->fifo_head = value;
182     }
183 
184     opal_atomic_wmb ();
185 }
186 
187 /**
188  * vader_fifo_write_ep:
189  *
190  * @brief write a frag (relative to this process' base) to another rank's fifo
191  *
192  * @param[in]  hdr - fragment header to write
193  * @param[in]  ep  - endpoint to write the fragment to
194  *
195  * This function is used to send a fragment to a remote peer. {hdr} must belong
196  * to the current process.
197  */
vader_fifo_write_ep(mca_btl_vader_hdr_t * hdr,struct mca_btl_base_endpoint_t * ep)198 static inline bool vader_fifo_write_ep (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
199 {
200     fifo_value_t rhdr = virtual2relative ((char *) hdr);
201     if (ep->fbox_out.buffer) {
202         /* if there is a fast box for this peer then use the fast box to send the fragment header.
203          * this is done to ensure fragment ordering */
204         opal_atomic_wmb ();
205         return mca_btl_vader_fbox_sendi (ep, 0xfe, &rhdr, sizeof (rhdr), NULL, 0);
206     }
207     mca_btl_vader_try_fbox_setup (ep, hdr);
208     hdr->next = VADER_FIFO_FREE;
209     vader_fifo_write (ep->fifo, rhdr);
210 
211     return true;
212 }
213 
214 /**
215  * vader_fifo_write_back:
216  *
217  * @brief write a frag (relative to the remote process' base) to the remote fifo
218  *
219  * @param[in]  hdr - fragment header to write
220  * @param[in]  ep  - endpoint the fragment belongs to
221  *
222  * This function is used to return a fragment to the sending process. It differs from vader_fifo_write_ep
223  * in that it uses the {ep} to produce the relative address.
224  */
vader_fifo_write_back(mca_btl_vader_hdr_t * hdr,struct mca_btl_base_endpoint_t * ep)225 static inline void vader_fifo_write_back (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
226 {
227     hdr->next = VADER_FIFO_FREE;
228     vader_fifo_write(ep->fifo, virtual2relativepeer (ep, (char *) hdr));
229 }
230 
231 #endif /* MCA_BTL_VADER_FIFO_H */
232