1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2009 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2006-2007 Voltaire. All rights reserved.
14 * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
15 * Copyright (c) 2010-2018 Los Alamos National Security, LLC.
16 * All rights reserved.
17 * $COPYRIGHT$
18 *
19 * Additional copyrights may follow
20 *
21 * $HEADER$
22 */
23 /**
24 * @file
25 */
26 #ifndef MCA_BTL_VADER_FIFO_H
27 #define MCA_BTL_VADER_FIFO_H
28
29 #include "btl_vader.h"
30 #include "btl_vader_endpoint.h"
31 #include "btl_vader_frag.h"
32
33 #if SIZEOF_VOID_P == 8
34 #define vader_item_cmpset(x, y, z) opal_atomic_cmpset_64((volatile int64_t *)(x), (int64_t)(y), (int64_t)(z))
35 #define vader_item_swap(x, y) opal_atomic_swap_64((volatile int64_t *)(x), (int64_t)(y))
36
37 #define MCA_BTL_VADER_OFFSET_MASK 0xffffffffll
38 #define MCA_BTL_VADER_OFFSET_BITS 32
39 #define MCA_BTL_VADER_BITNESS 64
40
41 typedef int64_t fifo_value_t;
42 #else
43 #define vader_item_cmpset(x, y, z) opal_atomic_cmpset_32((volatile int32_t *)(x), (int32_t)(y), (int32_t)(z))
44 #define vader_item_swap(x, y) opal_atomic_swap_32((volatile int32_t *)(x), (int32_t)(y))
45
46 #define MCA_BTL_VADER_OFFSET_MASK 0x00ffffffl
47 #define MCA_BTL_VADER_OFFSET_BITS 24
48 #define MCA_BTL_VADER_BITNESS 32
49
50 typedef int32_t fifo_value_t;
51 #endif
52
53 #define VADER_FIFO_FREE ((fifo_value_t)-2)
54
55 /*
56 * Shared Memory FIFOs
57 *
58 * The FIFO is implemented as a linked list of frag headers. The fifo has multiple
59 * producers and a single consumer (in the single thread case) so the tail needs
60 * to be modified by an atomic or protected by a atomic lock.
61 *
62 * Since the frags live in shared memory that is mapped differently into
63 * each address space, the head and tail pointers are relative (each process must
64 * add its own offset).
65 *
66 * We introduce some padding at the end of the structure but it is probably unnecessary.
67 */
68
69 /* lock free fifo */
70 typedef struct vader_fifo_t {
71 volatile fifo_value_t fifo_head;
72 volatile fifo_value_t fifo_tail;
73 volatile int32_t fbox_available;
74 } vader_fifo_t;
75
76 /* large enough to ensure the fifo is on its own cache line */
77 #define MCA_BTL_VADER_FIFO_SIZE 128
78
79 /***
80 * One or more FIFO components may be a pointer that must be
81 * accessed by multiple processes. Since the shared region may
82 * be mmapped differently into each process's address space,
83 * these pointers will be relative to some base address. Here,
84 * we define inline functions to translate between relative
85 * addresses and virtual addresses.
86 */
87
88 /* This only works for finding the relative address for a pointer within my_segment */
virtual2relative(char * addr)89 static inline fifo_value_t virtual2relative (char *addr)
90 {
91 return (fifo_value_t) ((intptr_t) (addr - mca_btl_vader_component.my_segment)) | ((fifo_value_t)MCA_BTL_VADER_LOCAL_RANK << MCA_BTL_VADER_OFFSET_BITS);
92 }
93
virtual2relativepeer(struct mca_btl_base_endpoint_t * endpoint,char * addr)94 static inline fifo_value_t virtual2relativepeer (struct mca_btl_base_endpoint_t *endpoint, char *addr)
95 {
96 return (fifo_value_t) ((intptr_t) (addr - endpoint->segment_base)) | ((fifo_value_t)endpoint->peer_smp_rank << MCA_BTL_VADER_OFFSET_BITS);
97 }
98
relative2virtual(fifo_value_t offset)99 static inline void *relative2virtual (fifo_value_t offset)
100 {
101 return (void *)(intptr_t)((offset & MCA_BTL_VADER_OFFSET_MASK) + mca_btl_vader_component.endpoints[offset >> MCA_BTL_VADER_OFFSET_BITS].segment_base);
102 }
103
104 #include "btl_vader_fbox.h"
105
106 /**
107 * vader_fifo_read:
108 *
109 * @brief reads a single fragment from a local fifo
110 *
111 * @param[inout] fifo - FIFO to read from
112 * @param[out] ep - returns the endpoint the fifo element was read from
113 *
114 * @returns a fragment header or NULL
115 *
116 * This function does not currently support multiple readers.
117 */
vader_fifo_read(vader_fifo_t * fifo,struct mca_btl_base_endpoint_t ** ep)118 static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct mca_btl_base_endpoint_t **ep)
119 {
120 mca_btl_vader_hdr_t *hdr;
121 fifo_value_t value;
122
123 if (VADER_FIFO_FREE == fifo->fifo_head) {
124 return NULL;
125 }
126
127 opal_atomic_rmb ();
128
129 value = fifo->fifo_head;
130
131 *ep = &mca_btl_vader_component.endpoints[value >> MCA_BTL_VADER_OFFSET_BITS];
132 hdr = (mca_btl_vader_hdr_t *) relative2virtual (value);
133
134 fifo->fifo_head = VADER_FIFO_FREE;
135
136 assert (hdr->next != value);
137
138 if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) {
139 opal_atomic_rmb();
140
141 if (!vader_item_cmpset (&fifo->fifo_tail, value, VADER_FIFO_FREE)) {
142 while (VADER_FIFO_FREE == hdr->next) {
143 opal_atomic_rmb ();
144 }
145
146 fifo->fifo_head = hdr->next;
147 }
148 } else {
149 fifo->fifo_head = hdr->next;
150 }
151
152 opal_atomic_wmb ();
153 return hdr;
154 }
155
vader_fifo_init(vader_fifo_t * fifo)156 static inline void vader_fifo_init (vader_fifo_t *fifo)
157 {
158 /* due to a compiler bug in Oracle C 5.15 the following line was broken into two. Not
159 * ideal but oh well. See #5814 */
160 /* fifo->fifo_head = fifo->fifo_tail = VADER_FIFO_FREE; */
161 fifo->fifo_head = VADER_FIFO_FREE;
162 fifo->fifo_tail = VADER_FIFO_FREE;
163 fifo->fbox_available = mca_btl_vader_component.fbox_max;
164 mca_btl_vader_component.my_fifo = fifo;
165 }
166
vader_fifo_write(vader_fifo_t * fifo,fifo_value_t value)167 static inline void vader_fifo_write (vader_fifo_t *fifo, fifo_value_t value)
168 {
169 fifo_value_t prev;
170
171 opal_atomic_wmb ();
172 prev = vader_item_swap (&fifo->fifo_tail, value);
173 opal_atomic_rmb ();
174
175 assert (prev != value);
176
177 if (OPAL_LIKELY(VADER_FIFO_FREE != prev)) {
178 mca_btl_vader_hdr_t *hdr = (mca_btl_vader_hdr_t *) relative2virtual (prev);
179 hdr->next = value;
180 } else {
181 fifo->fifo_head = value;
182 }
183
184 opal_atomic_wmb ();
185 }
186
187 /**
188 * vader_fifo_write_ep:
189 *
190 * @brief write a frag (relative to this process' base) to another rank's fifo
191 *
192 * @param[in] hdr - fragment header to write
193 * @param[in] ep - endpoint to write the fragment to
194 *
195 * This function is used to send a fragment to a remote peer. {hdr} must belong
196 * to the current process.
197 */
vader_fifo_write_ep(mca_btl_vader_hdr_t * hdr,struct mca_btl_base_endpoint_t * ep)198 static inline bool vader_fifo_write_ep (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
199 {
200 fifo_value_t rhdr = virtual2relative ((char *) hdr);
201 if (ep->fbox_out.buffer) {
202 /* if there is a fast box for this peer then use the fast box to send the fragment header.
203 * this is done to ensure fragment ordering */
204 opal_atomic_wmb ();
205 return mca_btl_vader_fbox_sendi (ep, 0xfe, &rhdr, sizeof (rhdr), NULL, 0);
206 }
207 mca_btl_vader_try_fbox_setup (ep, hdr);
208 hdr->next = VADER_FIFO_FREE;
209 vader_fifo_write (ep->fifo, rhdr);
210
211 return true;
212 }
213
214 /**
215 * vader_fifo_write_back:
216 *
217 * @brief write a frag (relative to the remote process' base) to the remote fifo
218 *
219 * @param[in] hdr - fragment header to write
220 * @param[in] ep - endpoint the fragment belongs to
221 *
222 * This function is used to return a fragment to the sending process. It differs from vader_fifo_write_ep
223 * in that it uses the {ep} to produce the relative address.
224 */
vader_fifo_write_back(mca_btl_vader_hdr_t * hdr,struct mca_btl_base_endpoint_t * ep)225 static inline void vader_fifo_write_back (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
226 {
227 hdr->next = VADER_FIFO_FREE;
228 vader_fifo_write(ep->fifo, virtual2relativepeer (ep, (char *) hdr));
229 }
230
231 #endif /* MCA_BTL_VADER_FIFO_H */
232