1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6
7 #ifdef HAVE_CONFIG_H
8 # include "config.h"
9 #endif
10
11 #include "arbiter.h"
12
13 #include <ucs/debug/assert.h>
14 #include <ucs/debug/log.h>
15
16
ucs_arbiter_init(ucs_arbiter_t * arbiter)17 void ucs_arbiter_init(ucs_arbiter_t *arbiter)
18 {
19 ucs_list_head_init(&arbiter->list);
20 }
21
ucs_arbiter_group_init(ucs_arbiter_group_t * group)22 void ucs_arbiter_group_init(ucs_arbiter_group_t *group)
23 {
24 group->tail = NULL;
25 UCS_ARBITER_GROUP_GUARD_INIT(group);
26 }
27
ucs_arbiter_cleanup(ucs_arbiter_t * arbiter)28 void ucs_arbiter_cleanup(ucs_arbiter_t *arbiter)
29 {
30 ucs_assert_always(ucs_arbiter_is_empty(arbiter));
31 }
32
ucs_arbiter_group_cleanup(ucs_arbiter_group_t * group)33 void ucs_arbiter_group_cleanup(ucs_arbiter_group_t *group)
34 {
35 UCS_ARBITER_GROUP_GUARD_CHECK(group);
36 ucs_assert_always(ucs_arbiter_group_is_empty(group));
37 }
38
ucs_arbiter_group_head_is_scheduled(ucs_arbiter_elem_t * head)39 static inline int ucs_arbiter_group_head_is_scheduled(ucs_arbiter_elem_t *head)
40 {
41 return head->list.next != NULL;
42 }
43
ucs_arbiter_group_head_reset(ucs_arbiter_elem_t * head)44 static inline void ucs_arbiter_group_head_reset(ucs_arbiter_elem_t *head)
45 {
46 head->list.next = NULL; /* Not scheduled yet */
47 }
48
ucs_arbiter_elem_set_scheduled(ucs_arbiter_elem_t * elem,ucs_arbiter_group_t * group)49 static inline void ucs_arbiter_elem_set_scheduled(ucs_arbiter_elem_t *elem,
50 ucs_arbiter_group_t *group)
51 {
52 elem->group = group;
53 }
54
ucs_arbiter_group_push_elem_always(ucs_arbiter_group_t * group,ucs_arbiter_elem_t * elem)55 void ucs_arbiter_group_push_elem_always(ucs_arbiter_group_t *group,
56 ucs_arbiter_elem_t *elem)
57 {
58 ucs_arbiter_elem_t *tail = group->tail;
59
60 if (tail == NULL) {
61 /* group is empty */
62 ucs_arbiter_group_head_reset(elem);
63 elem->next = elem; /* Connect to itself */
64 } else {
65 elem->next = tail->next; /* Point to first element */
66 tail->next = elem; /* Point previous element to new one */
67 }
68
69 group->tail = elem; /* Update group tail */
70 ucs_arbiter_elem_set_scheduled(elem, group);
71 }
72
ucs_arbiter_group_push_head_elem_always(ucs_arbiter_group_t * group,ucs_arbiter_elem_t * elem)73 void ucs_arbiter_group_push_head_elem_always(ucs_arbiter_group_t *group,
74 ucs_arbiter_elem_t *elem)
75 {
76 ucs_arbiter_elem_t *tail = group->tail;
77 ucs_arbiter_elem_t *head;
78
79 UCS_ARBITER_GROUP_GUARD_CHECK(group);
80
81 ucs_arbiter_group_head_reset(elem);
82 ucs_arbiter_elem_set_scheduled(elem, group);
83
84 if (tail == NULL) {
85 elem->next = elem; /* Connect to itself */
86 group->tail = elem; /* Update group tail */
87 return;
88 }
89
90 head = tail->next;
91 elem->next = head; /* Point to first element */
92 tail->next = elem; /* Point previous element to new one */
93
94 if (!ucs_arbiter_group_head_is_scheduled(head)) {
95 return;
96 }
97
98 ucs_list_replace(&head->list, &elem->list);
99 }
100
ucs_arbiter_group_purge(ucs_arbiter_t * arbiter,ucs_arbiter_group_t * group,ucs_arbiter_callback_t cb,void * cb_arg)101 void ucs_arbiter_group_purge(ucs_arbiter_t *arbiter,
102 ucs_arbiter_group_t *group,
103 ucs_arbiter_callback_t cb, void *cb_arg)
104 {
105 ucs_arbiter_elem_t *tail = group->tail;
106 ucs_arbiter_elem_t dummy_group_head = {};
107 ucs_arbiter_elem_t *ptr, *next, *prev;
108 ucs_arbiter_cb_result_t result;
109 ucs_arbiter_elem_t *head;
110 int sched_group;
111
112 if (tail == NULL) {
113 return; /* Empty group */
114 }
115
116 UCS_ARBITER_GROUP_GUARD_CHECK(group);
117
118 head = tail->next;
119 next = head;
120 prev = tail;
121
122 sched_group = ucs_arbiter_group_head_is_scheduled(head);
123 if (sched_group) {
124 /* put a placeholder on the arbiter queue */
125 ucs_list_replace(&head->list, &dummy_group_head.list);
126 }
127
128 do {
129 ptr = next;
130 next = ptr->next;
131 /* Can't touch the element after cb is called if it gets removed. But it
132 * can be reused later as well, so it's group should be NULL. */
133 ucs_arbiter_elem_init(ptr);
134 result = cb(arbiter, group, ptr, cb_arg);
135
136 if (result == UCS_ARBITER_CB_RESULT_REMOVE_ELEM) {
137 if (ptr == head) {
138 head = next;
139 if (ptr == tail) {
140 /* Last element is being removed - mark group as empty */
141 group->tail = NULL;
142 if (sched_group) {
143 ucs_list_del(&dummy_group_head.list);
144 }
145 /* Break here to avoid further processing of the group */
146 return;
147 }
148 } else if (ptr == tail) {
149 group->tail = prev;
150 /* tail->next should point to head, make sure next is head
151 * (it is assigned 2 lines below) */
152 ucs_assert_always(next == head);
153 }
154 prev->next = next;
155 } else {
156 /* keep the element */
157 ucs_arbiter_elem_set_scheduled(ptr, group);
158 prev = ptr;
159 }
160 } while (ptr != tail);
161
162 ucs_assert(group->tail != NULL);
163
164 if (sched_group) {
165 /* restore group head (could be old or new) instead of the dummy element */
166 ucs_list_replace(&dummy_group_head.list, &head->list);
167 } else {
168 /* mark the group head (could be old or new) as unscheduled */
169 ucs_arbiter_group_head_reset(head);
170 }
171 }
172
ucs_arbiter_group_num_elems(ucs_arbiter_group_t * group)173 size_t ucs_arbiter_group_num_elems(ucs_arbiter_group_t *group)
174 {
175 ucs_arbiter_elem_t *elem = group->tail;
176 size_t num_elems;
177
178 if (elem == NULL) {
179 return 0;
180 }
181
182 num_elems = 0;
183 do {
184 ++num_elems;
185 elem = elem->next;
186 } while (elem != group->tail);
187
188 return num_elems;
189 }
190
191
ucs_arbiter_group_is_scheduled(ucs_arbiter_group_t * group)192 int ucs_arbiter_group_is_scheduled(ucs_arbiter_group_t *group)
193 {
194 ucs_arbiter_elem_t *head;
195
196 if (ucs_arbiter_group_is_empty(group)) {
197 return 0;
198 }
199
200 head = group->tail->next;
201 return ucs_arbiter_group_head_is_scheduled(head);
202 }
203
204 static void
ucs_arbiter_schedule_head_if_not_scheduled(ucs_arbiter_t * arbiter,ucs_arbiter_elem_t * head)205 ucs_arbiter_schedule_head_if_not_scheduled(ucs_arbiter_t *arbiter,
206 ucs_arbiter_elem_t *head)
207 {
208 if (!ucs_arbiter_group_head_is_scheduled(head)) {
209 ucs_list_add_tail(&arbiter->list, &head->list);
210 }
211 }
212
ucs_arbiter_group_schedule_nonempty(ucs_arbiter_t * arbiter,ucs_arbiter_group_t * group)213 void ucs_arbiter_group_schedule_nonempty(ucs_arbiter_t *arbiter,
214 ucs_arbiter_group_t *group)
215 {
216 ucs_arbiter_elem_t *tail = group->tail;
217 ucs_arbiter_elem_t *head;
218
219 ucs_assert(tail != NULL);
220 head = tail->next;
221
222 ucs_assert(head != NULL);
223 ucs_arbiter_schedule_head_if_not_scheduled(arbiter, head);
224 UCS_ARBITER_GROUP_ARBITER_SET(group, arbiter);
225 }
226
ucs_arbiter_group_desched_nonempty(ucs_arbiter_t * arbiter,ucs_arbiter_group_t * group)227 void ucs_arbiter_group_desched_nonempty(ucs_arbiter_t *arbiter,
228 ucs_arbiter_group_t *group)
229 {
230 ucs_arbiter_elem_t *head = group->tail->next;
231
232 if (!ucs_arbiter_group_head_is_scheduled(head)) {
233 return;
234 }
235
236 UCS_ARBITER_GROUP_ARBITER_CHECK(group, arbiter);
237 UCS_ARBITER_GROUP_ARBITER_SET(group, NULL);
238 ucs_list_del(&head->list);
239 ucs_arbiter_group_head_reset(head);
240 }
241
242 static inline void
ucs_arbiter_remove_and_reset_if_scheduled(ucs_arbiter_elem_t * elem)243 ucs_arbiter_remove_and_reset_if_scheduled(ucs_arbiter_elem_t *elem)
244 {
245 if (ucs_unlikely(ucs_arbiter_group_head_is_scheduled(elem))) {
246 ucs_list_del(&elem->list);
247 ucs_arbiter_group_head_reset(elem);
248 }
249 }
250
251 static inline void
ucs_arbiter_group_head_replace(ucs_arbiter_group_t * group,ucs_arbiter_elem_t * group_head,ucs_arbiter_elem_t * new_group_head)252 ucs_arbiter_group_head_replace(ucs_arbiter_group_t *group,
253 ucs_arbiter_elem_t *group_head,
254 ucs_arbiter_elem_t *new_group_head)
255 {
256 /* check if this is really the group head */
257 ucs_assert(!ucs_arbiter_group_is_empty(group));
258 ucs_assert(group->tail->next == group_head);
259
260 if (group_head->next == group_head) {
261 group->tail = new_group_head;
262 } else {
263 new_group_head->next = group_head->next;
264 }
265 group->tail->next = new_group_head;
266 }
267
ucs_arbiter_dispatch_nonempty(ucs_arbiter_t * arbiter,unsigned per_group,ucs_arbiter_callback_t cb,void * cb_arg)268 void ucs_arbiter_dispatch_nonempty(ucs_arbiter_t *arbiter, unsigned per_group,
269 ucs_arbiter_callback_t cb, void *cb_arg)
270 {
271 ucs_arbiter_elem_t *group_head;
272 ucs_arbiter_cb_result_t result;
273 unsigned group_dispatch_count;
274 ucs_arbiter_group_t *group;
275 UCS_LIST_HEAD(resched_list);
276 ucs_arbiter_elem_t dummy;
277
278 ucs_assert(!ucs_list_is_empty(&arbiter->list));
279
280 ucs_arbiter_group_head_reset(&dummy);
281
282 do {
283 group_head = ucs_list_extract_head(&arbiter->list, ucs_arbiter_elem_t,
284 list);
285 ucs_assert(group_head != NULL);
286
287 /* Reset group head to allow the group to be moved to another arbiter by
288 * the dispatch callback. For example, when a DC endpoint is moved from
289 * waiting-for-DCI arbiter to waiting-for-TX-resources arbiter.
290 */
291 ucs_arbiter_group_head_reset(group_head);
292
293 group_dispatch_count = 0;
294 group = group_head->group;
295 dummy.group = group;
296 UCS_ARBITER_GROUP_GUARD_CHECK(group);
297
298 for (;;) {
299 ucs_assert(group_head->group == group);
300 ucs_assert(dummy.group == group);
301 ucs_assert(group_dispatch_count < per_group);
302
303 /* reset the dispatched element here because:
304 * 1. if the element is removed from the arbiter it must be kept in
305 * initialized state otherwise push will fail
306 * 2. we can't reset the element after calling the callback because
307 * the callback could release the element.
308 */
309 ucs_arbiter_elem_init(group_head);
310 ucs_assert(!ucs_arbiter_group_head_is_scheduled(group_head));
311
312 /* replace group head by a dummy element, to allow scheduling more
313 * elements on this group from the dispatch callback.
314 */
315 ucs_arbiter_group_head_replace(group, group_head, &dummy);
316
317 /* dispatch the element */
318 ucs_trace_poll("dispatching arbiter element %p", group_head);
319 UCS_ARBITER_GROUP_GUARD_ENTER(group);
320 result = cb(arbiter, group, group_head, cb_arg);
321 UCS_ARBITER_GROUP_GUARD_EXIT(group);
322 ucs_trace_poll("dispatch result: %d", result);
323 ++group_dispatch_count;
324
325 /* recursive push to head (during dispatch) is not allowed */
326 ucs_assert(group->tail->next == &dummy);
327
328 /* element is not removed */
329 if (ucs_unlikely(result != UCS_ARBITER_CB_RESULT_REMOVE_ELEM)) {
330 /* restore group pointer */
331 ucs_arbiter_elem_set_scheduled(group_head, group);
332
333 /* the head should not move, since dummy replaces it */
334 ucs_assert(!ucs_arbiter_group_head_is_scheduled(group_head));
335
336 /* replace dummy element by group_head */
337 ucs_arbiter_group_head_replace(group, &dummy, group_head);
338
339 if (result == UCS_ARBITER_CB_RESULT_DESCHED_GROUP) {
340 /* take over a recursively scheduled group */
341 if (ucs_unlikely(ucs_arbiter_group_head_is_scheduled(&dummy))) {
342 ucs_list_replace(&dummy.list, &group_head->list);
343 UCS_ARBITER_GROUP_ARBITER_SET(group, dummy.group->arbiter);
344 ucs_arbiter_group_head_reset(&dummy);
345 } else {
346 UCS_ARBITER_GROUP_ARBITER_SET(group, NULL);
347 }
348 } else {
349 /* remove a recursively scheduled group, give priority
350 * to the original order */
351 ucs_arbiter_remove_and_reset_if_scheduled(&dummy);
352
353 if (result == UCS_ARBITER_CB_RESULT_NEXT_GROUP) {
354 /* add to arbiter tail */
355 ucs_list_add_tail(&arbiter->list, &group_head->list);
356 } else if (result == UCS_ARBITER_CB_RESULT_RESCHED_GROUP) {
357 /* add to resched list */
358 ucs_list_add_tail(&resched_list, &group_head->list);
359 } else if (result == UCS_ARBITER_CB_RESULT_STOP) {
360 /* exit the outmost loop and make sure that next dispatch()
361 * will continue from the current group */
362 ucs_list_add_head(&arbiter->list, &group_head->list);
363 goto out;
364 } else {
365 ucs_bug("unexpected return value from arbiter callback");
366 }
367 }
368
369 break;
370 }
371
372 /* last element removed */
373 if (dummy.next == &dummy) {
374 group->tail = NULL; /* group is empty now */
375 group_head = NULL; /* for debugging */
376 ucs_arbiter_remove_and_reset_if_scheduled(&dummy);
377 UCS_ARBITER_GROUP_ARBITER_SET(group, NULL);
378 break;
379 }
380
381 /* non-last element removed */
382 group_head = dummy.next; /* Update group head */
383 group->tail->next = group_head; /* Tail points to new head */
384
385 if (ucs_unlikely(ucs_arbiter_group_head_is_scheduled(&dummy))) {
386 /* take over a recursively scheduled group */
387 ucs_list_replace(&dummy.list, &group_head->list);
388 ucs_arbiter_group_head_reset(&dummy);
389 /* the group is already scheduled, continue to next group */
390 break;
391 } else if (group_dispatch_count >= per_group) {
392 /* add to arbiter tail and continue to next group */
393 ucs_list_add_tail(&arbiter->list, &group_head->list);
394 break;
395 }
396
397 /* continue with new group head */
398 ucs_arbiter_group_head_reset(group_head);
399 }
400 } while (!ucs_list_is_empty(&arbiter->list));
401
402 out:
403 ucs_list_splice_tail(&arbiter->list, &resched_list);
404 }
405
ucs_arbiter_dump(ucs_arbiter_t * arbiter,FILE * stream)406 void ucs_arbiter_dump(ucs_arbiter_t *arbiter, FILE *stream)
407 {
408 static const int max_groups = 100;
409 ucs_arbiter_elem_t *group_head, *elem;
410 int count;
411
412 fprintf(stream, "-------\n");
413 if (ucs_list_is_empty(&arbiter->list)) {
414 fprintf(stream, "(empty)\n");
415 goto out;
416 }
417
418 count = 0;
419 ucs_list_for_each(group_head, &arbiter->list, list) {
420 elem = group_head;
421 if (ucs_list_head(&arbiter->list, ucs_arbiter_elem_t, list) == group_head) {
422 fprintf(stream, "=> ");
423 } else {
424 fprintf(stream, " * ");
425 }
426 do {
427 fprintf(stream, "[%p", elem);
428 if (elem == group_head) {
429 fprintf(stream, " prev_g:%p", elem->list.prev);
430 fprintf(stream, " next_g:%p", elem->list.next);
431 }
432 fprintf(stream, " next_e:%p grp:%p]", elem->next, elem->group);
433 if (elem->next != group_head) {
434 fprintf(stream, "->");
435 }
436 elem = elem->next;
437 } while (elem != group_head);
438 fprintf(stream, "\n");
439 ++count;
440 if (count > max_groups) {
441 fprintf(stream, "more than %d groups - not printing any more\n",
442 max_groups);
443 break;
444 }
445 }
446
447 out:
448 fprintf(stream, "-------\n");
449 }
450