1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2015.  ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6 
7 #ifdef HAVE_CONFIG_H
8 #  include "config.h"
9 #endif
10 
11 #include "arbiter.h"
12 
13 #include <ucs/debug/assert.h>
14 #include <ucs/debug/log.h>
15 
16 
ucs_arbiter_init(ucs_arbiter_t * arbiter)17 void ucs_arbiter_init(ucs_arbiter_t *arbiter)
18 {
19     ucs_list_head_init(&arbiter->list);
20 }
21 
ucs_arbiter_group_init(ucs_arbiter_group_t * group)22 void ucs_arbiter_group_init(ucs_arbiter_group_t *group)
23 {
24     group->tail = NULL;
25     UCS_ARBITER_GROUP_GUARD_INIT(group);
26 }
27 
ucs_arbiter_cleanup(ucs_arbiter_t * arbiter)28 void ucs_arbiter_cleanup(ucs_arbiter_t *arbiter)
29 {
30     ucs_assert_always(ucs_arbiter_is_empty(arbiter));
31 }
32 
ucs_arbiter_group_cleanup(ucs_arbiter_group_t * group)33 void ucs_arbiter_group_cleanup(ucs_arbiter_group_t *group)
34 {
35     UCS_ARBITER_GROUP_GUARD_CHECK(group);
36     ucs_assert_always(ucs_arbiter_group_is_empty(group));
37 }
38 
ucs_arbiter_group_head_is_scheduled(ucs_arbiter_elem_t * head)39 static inline int ucs_arbiter_group_head_is_scheduled(ucs_arbiter_elem_t *head)
40 {
41     return head->list.next != NULL;
42 }
43 
ucs_arbiter_group_head_reset(ucs_arbiter_elem_t * head)44 static inline void ucs_arbiter_group_head_reset(ucs_arbiter_elem_t *head)
45 {
46     head->list.next = NULL; /* Not scheduled yet */
47 }
48 
ucs_arbiter_elem_set_scheduled(ucs_arbiter_elem_t * elem,ucs_arbiter_group_t * group)49 static inline void ucs_arbiter_elem_set_scheduled(ucs_arbiter_elem_t *elem,
50                                                   ucs_arbiter_group_t *group)
51 {
52     elem->group = group;
53 }
54 
ucs_arbiter_group_push_elem_always(ucs_arbiter_group_t * group,ucs_arbiter_elem_t * elem)55 void ucs_arbiter_group_push_elem_always(ucs_arbiter_group_t *group,
56                                         ucs_arbiter_elem_t *elem)
57 {
58     ucs_arbiter_elem_t *tail = group->tail;
59 
60     if (tail == NULL) {
61         /* group is empty */
62         ucs_arbiter_group_head_reset(elem);
63         elem->next = elem;        /* Connect to itself */
64     } else {
65         elem->next = tail->next;  /* Point to first element */
66         tail->next = elem;        /* Point previous element to new one */
67     }
68 
69     group->tail = elem;   /* Update group tail */
70     ucs_arbiter_elem_set_scheduled(elem, group);
71 }
72 
ucs_arbiter_group_push_head_elem_always(ucs_arbiter_group_t * group,ucs_arbiter_elem_t * elem)73 void ucs_arbiter_group_push_head_elem_always(ucs_arbiter_group_t *group,
74                                              ucs_arbiter_elem_t *elem)
75 {
76     ucs_arbiter_elem_t *tail = group->tail;
77     ucs_arbiter_elem_t *head;
78 
79     UCS_ARBITER_GROUP_GUARD_CHECK(group);
80 
81     ucs_arbiter_group_head_reset(elem);
82     ucs_arbiter_elem_set_scheduled(elem, group);
83 
84     if (tail == NULL) {
85         elem->next  = elem;   /* Connect to itself */
86         group->tail = elem;   /* Update group tail */
87         return;
88     }
89 
90     head       = tail->next;
91     elem->next = head;        /* Point to first element */
92     tail->next = elem;        /* Point previous element to new one */
93 
94     if (!ucs_arbiter_group_head_is_scheduled(head)) {
95         return;
96     }
97 
98     ucs_list_replace(&head->list, &elem->list);
99 }
100 
ucs_arbiter_group_purge(ucs_arbiter_t * arbiter,ucs_arbiter_group_t * group,ucs_arbiter_callback_t cb,void * cb_arg)101 void ucs_arbiter_group_purge(ucs_arbiter_t *arbiter,
102                              ucs_arbiter_group_t *group,
103                              ucs_arbiter_callback_t cb, void *cb_arg)
104 {
105     ucs_arbiter_elem_t *tail            = group->tail;
106     ucs_arbiter_elem_t dummy_group_head = {};
107     ucs_arbiter_elem_t *ptr, *next, *prev;
108     ucs_arbiter_cb_result_t result;
109     ucs_arbiter_elem_t *head;
110     int sched_group;
111 
112     if (tail == NULL) {
113         return; /* Empty group */
114     }
115 
116     UCS_ARBITER_GROUP_GUARD_CHECK(group);
117 
118     head = tail->next;
119     next = head;
120     prev = tail;
121 
122     sched_group = ucs_arbiter_group_head_is_scheduled(head);
123     if (sched_group) {
124         /* put a placeholder on the arbiter queue */
125         ucs_list_replace(&head->list, &dummy_group_head.list);
126     }
127 
128     do {
129         ptr       = next;
130         next      = ptr->next;
131         /* Can't touch the element after cb is called if it gets removed. But it
132          * can be reused later as well, so it's group should be NULL. */
133         ucs_arbiter_elem_init(ptr);
134         result    = cb(arbiter, group, ptr, cb_arg);
135 
136         if (result == UCS_ARBITER_CB_RESULT_REMOVE_ELEM) {
137             if (ptr == head) {
138                 head = next;
139                 if (ptr == tail) {
140                     /* Last element is being removed - mark group as empty */
141                     group->tail = NULL;
142                     if (sched_group) {
143                         ucs_list_del(&dummy_group_head.list);
144                     }
145                     /* Break here to avoid further processing of the group */
146                     return;
147                 }
148             } else if (ptr == tail) {
149                 group->tail = prev;
150                 /* tail->next should point to head, make sure next is head
151                  * (it is assigned 2 lines below) */
152                 ucs_assert_always(next == head);
153             }
154             prev->next = next;
155         } else {
156             /* keep the element */
157             ucs_arbiter_elem_set_scheduled(ptr, group);
158             prev       = ptr;
159         }
160     } while (ptr != tail);
161 
162     ucs_assert(group->tail != NULL);
163 
164     if (sched_group) {
165         /* restore group head (could be old or new) instead of the dummy element */
166         ucs_list_replace(&dummy_group_head.list, &head->list);
167     } else {
168         /* mark the group head (could be old or new) as unscheduled */
169         ucs_arbiter_group_head_reset(head);
170     }
171 }
172 
ucs_arbiter_group_num_elems(ucs_arbiter_group_t * group)173 size_t ucs_arbiter_group_num_elems(ucs_arbiter_group_t *group)
174 {
175     ucs_arbiter_elem_t *elem = group->tail;
176     size_t num_elems;
177 
178     if (elem == NULL) {
179         return 0;
180     }
181 
182     num_elems = 0;
183     do {
184         ++num_elems;
185         elem = elem->next;
186     } while (elem != group->tail);
187 
188     return num_elems;
189 }
190 
191 
ucs_arbiter_group_is_scheduled(ucs_arbiter_group_t * group)192 int ucs_arbiter_group_is_scheduled(ucs_arbiter_group_t *group)
193 {
194     ucs_arbiter_elem_t *head;
195 
196     if (ucs_arbiter_group_is_empty(group)) {
197         return 0;
198     }
199 
200     head = group->tail->next;
201     return ucs_arbiter_group_head_is_scheduled(head);
202 }
203 
204 static void
ucs_arbiter_schedule_head_if_not_scheduled(ucs_arbiter_t * arbiter,ucs_arbiter_elem_t * head)205 ucs_arbiter_schedule_head_if_not_scheduled(ucs_arbiter_t *arbiter,
206                                            ucs_arbiter_elem_t *head)
207 {
208     if (!ucs_arbiter_group_head_is_scheduled(head)) {
209         ucs_list_add_tail(&arbiter->list, &head->list);
210     }
211 }
212 
ucs_arbiter_group_schedule_nonempty(ucs_arbiter_t * arbiter,ucs_arbiter_group_t * group)213 void ucs_arbiter_group_schedule_nonempty(ucs_arbiter_t *arbiter,
214                                          ucs_arbiter_group_t *group)
215 {
216     ucs_arbiter_elem_t *tail = group->tail;
217     ucs_arbiter_elem_t *head;
218 
219     ucs_assert(tail != NULL);
220     head = tail->next;
221 
222     ucs_assert(head != NULL);
223     ucs_arbiter_schedule_head_if_not_scheduled(arbiter, head);
224     UCS_ARBITER_GROUP_ARBITER_SET(group, arbiter);
225 }
226 
ucs_arbiter_group_desched_nonempty(ucs_arbiter_t * arbiter,ucs_arbiter_group_t * group)227 void ucs_arbiter_group_desched_nonempty(ucs_arbiter_t *arbiter,
228                                         ucs_arbiter_group_t *group)
229 {
230     ucs_arbiter_elem_t *head = group->tail->next;
231 
232     if (!ucs_arbiter_group_head_is_scheduled(head)) {
233         return;
234     }
235 
236     UCS_ARBITER_GROUP_ARBITER_CHECK(group, arbiter);
237     UCS_ARBITER_GROUP_ARBITER_SET(group, NULL);
238     ucs_list_del(&head->list);
239     ucs_arbiter_group_head_reset(head);
240 }
241 
242 static inline void
ucs_arbiter_remove_and_reset_if_scheduled(ucs_arbiter_elem_t * elem)243 ucs_arbiter_remove_and_reset_if_scheduled(ucs_arbiter_elem_t *elem)
244 {
245     if (ucs_unlikely(ucs_arbiter_group_head_is_scheduled(elem))) {
246          ucs_list_del(&elem->list);
247          ucs_arbiter_group_head_reset(elem);
248      }
249 }
250 
251 static inline void
ucs_arbiter_group_head_replace(ucs_arbiter_group_t * group,ucs_arbiter_elem_t * group_head,ucs_arbiter_elem_t * new_group_head)252 ucs_arbiter_group_head_replace(ucs_arbiter_group_t *group,
253                                ucs_arbiter_elem_t *group_head,
254                                ucs_arbiter_elem_t *new_group_head)
255 {
256     /* check if this is really the group head */
257     ucs_assert(!ucs_arbiter_group_is_empty(group));
258     ucs_assert(group->tail->next == group_head);
259 
260     if (group_head->next == group_head) {
261         group->tail          = new_group_head;
262     } else {
263         new_group_head->next = group_head->next;
264     }
265     group->tail->next = new_group_head;
266 }
267 
ucs_arbiter_dispatch_nonempty(ucs_arbiter_t * arbiter,unsigned per_group,ucs_arbiter_callback_t cb,void * cb_arg)268 void ucs_arbiter_dispatch_nonempty(ucs_arbiter_t *arbiter, unsigned per_group,
269                                    ucs_arbiter_callback_t cb, void *cb_arg)
270 {
271     ucs_arbiter_elem_t *group_head;
272     ucs_arbiter_cb_result_t result;
273     unsigned group_dispatch_count;
274     ucs_arbiter_group_t *group;
275     UCS_LIST_HEAD(resched_list);
276     ucs_arbiter_elem_t dummy;
277 
278     ucs_assert(!ucs_list_is_empty(&arbiter->list));
279 
280     ucs_arbiter_group_head_reset(&dummy);
281 
282     do {
283         group_head = ucs_list_extract_head(&arbiter->list, ucs_arbiter_elem_t,
284                                            list);
285         ucs_assert(group_head != NULL);
286 
287         /* Reset group head to allow the group to be moved to another arbiter by
288          * the dispatch callback. For example, when a DC endpoint is moved from
289          * waiting-for-DCI arbiter to waiting-for-TX-resources arbiter.
290          */
291         ucs_arbiter_group_head_reset(group_head);
292 
293         group_dispatch_count = 0;
294         group                = group_head->group;
295         dummy.group          = group;
296         UCS_ARBITER_GROUP_GUARD_CHECK(group);
297 
298         for (;;) {
299             ucs_assert(group_head->group   == group);
300             ucs_assert(dummy.group         == group);
301             ucs_assert(group_dispatch_count < per_group);
302 
303             /* reset the dispatched element here because:
304              * 1. if the element is removed from the arbiter it must be kept in
305              *    initialized state otherwise push will fail
306              * 2. we can't reset the element after calling the callback because
307              *    the callback could release the element.
308              */
309             ucs_arbiter_elem_init(group_head);
310             ucs_assert(!ucs_arbiter_group_head_is_scheduled(group_head));
311 
312             /* replace group head by a dummy element, to allow scheduling more
313              * elements on this group from the dispatch callback.
314              */
315             ucs_arbiter_group_head_replace(group, group_head, &dummy);
316 
317             /* dispatch the element */
318             ucs_trace_poll("dispatching arbiter element %p", group_head);
319             UCS_ARBITER_GROUP_GUARD_ENTER(group);
320             result = cb(arbiter, group, group_head, cb_arg);
321             UCS_ARBITER_GROUP_GUARD_EXIT(group);
322             ucs_trace_poll("dispatch result: %d", result);
323             ++group_dispatch_count;
324 
325             /* recursive push to head (during dispatch) is not allowed */
326             ucs_assert(group->tail->next == &dummy);
327 
328             /* element is not removed */
329             if (ucs_unlikely(result != UCS_ARBITER_CB_RESULT_REMOVE_ELEM)) {
330                 /* restore group pointer */
331                 ucs_arbiter_elem_set_scheduled(group_head, group);
332 
333                 /* the head should not move, since dummy replaces it */
334                 ucs_assert(!ucs_arbiter_group_head_is_scheduled(group_head));
335 
336                 /* replace dummy element by group_head */
337                 ucs_arbiter_group_head_replace(group, &dummy, group_head);
338 
339                 if (result == UCS_ARBITER_CB_RESULT_DESCHED_GROUP) {
340                     /* take over a recursively scheduled group */
341                     if (ucs_unlikely(ucs_arbiter_group_head_is_scheduled(&dummy))) {
342                         ucs_list_replace(&dummy.list, &group_head->list);
343                         UCS_ARBITER_GROUP_ARBITER_SET(group, dummy.group->arbiter);
344                         ucs_arbiter_group_head_reset(&dummy);
345                     } else {
346                         UCS_ARBITER_GROUP_ARBITER_SET(group, NULL);
347                     }
348                 } else {
349                     /* remove a recursively scheduled group, give priority
350                      * to the original order */
351                     ucs_arbiter_remove_and_reset_if_scheduled(&dummy);
352 
353                     if (result == UCS_ARBITER_CB_RESULT_NEXT_GROUP) {
354                         /* add to arbiter tail */
355                         ucs_list_add_tail(&arbiter->list, &group_head->list);
356                     } else if (result == UCS_ARBITER_CB_RESULT_RESCHED_GROUP) {
357                         /* add to resched list */
358                         ucs_list_add_tail(&resched_list, &group_head->list);
359                     } else if (result == UCS_ARBITER_CB_RESULT_STOP) {
360                         /* exit the outmost loop and make sure that next dispatch()
361                          * will continue from the current group */
362                         ucs_list_add_head(&arbiter->list, &group_head->list);
363                         goto out;
364                     } else {
365                         ucs_bug("unexpected return value from arbiter callback");
366                     }
367                 }
368 
369                 break;
370             }
371 
372             /* last element removed */
373             if (dummy.next == &dummy) {
374                 group->tail = NULL; /* group is empty now */
375                 group_head  = NULL; /* for debugging */
376                 ucs_arbiter_remove_and_reset_if_scheduled(&dummy);
377                 UCS_ARBITER_GROUP_ARBITER_SET(group, NULL);
378                 break;
379             }
380 
381             /* non-last element removed */
382             group_head        = dummy.next;  /* Update group head */
383             group->tail->next = group_head;  /* Tail points to new head */
384 
385             if (ucs_unlikely(ucs_arbiter_group_head_is_scheduled(&dummy))) {
386                 /* take over a recursively scheduled group */
387                 ucs_list_replace(&dummy.list, &group_head->list);
388                 ucs_arbiter_group_head_reset(&dummy);
389                 /* the group is already scheduled, continue to next group */
390                 break;
391             } else if (group_dispatch_count >= per_group) {
392                 /* add to arbiter tail and continue to next group */
393                 ucs_list_add_tail(&arbiter->list, &group_head->list);
394                 break;
395             }
396 
397             /* continue with new group head */
398             ucs_arbiter_group_head_reset(group_head);
399         }
400     } while (!ucs_list_is_empty(&arbiter->list));
401 
402 out:
403     ucs_list_splice_tail(&arbiter->list, &resched_list);
404 }
405 
ucs_arbiter_dump(ucs_arbiter_t * arbiter,FILE * stream)406 void ucs_arbiter_dump(ucs_arbiter_t *arbiter, FILE *stream)
407 {
408     static const int max_groups = 100;
409     ucs_arbiter_elem_t *group_head, *elem;
410     int count;
411 
412     fprintf(stream, "-------\n");
413     if (ucs_list_is_empty(&arbiter->list)) {
414         fprintf(stream, "(empty)\n");
415         goto out;
416     }
417 
418     count = 0;
419     ucs_list_for_each(group_head, &arbiter->list, list) {
420         elem = group_head;
421         if (ucs_list_head(&arbiter->list, ucs_arbiter_elem_t, list) == group_head) {
422             fprintf(stream, "=> ");
423         } else {
424             fprintf(stream, " * ");
425         }
426         do {
427             fprintf(stream, "[%p", elem);
428             if (elem == group_head) {
429                 fprintf(stream, " prev_g:%p", elem->list.prev);
430                 fprintf(stream, " next_g:%p", elem->list.next);
431             }
432             fprintf(stream, " next_e:%p grp:%p]", elem->next, elem->group);
433             if (elem->next != group_head) {
434                 fprintf(stream, "->");
435             }
436             elem = elem->next;
437         } while (elem != group_head);
438         fprintf(stream, "\n");
439         ++count;
440         if (count > max_groups) {
441             fprintf(stream, "more than %d groups - not printing any more\n",
442                     max_groups);
443             break;
444         }
445     }
446 
447 out:
448     fprintf(stream, "-------\n");
449 }
450