1 /*
2  * Copyright 2011-2015 Samy Al Bahra.
3  * Copyright 2011 David Joseph.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <ck_barrier.h>
29 #include <ck_cc.h>
30 #include <ck_pr.h>
31 #include <ck_spinlock.h>
32 
33 struct ck_barrier_combining_queue {
34 	struct ck_barrier_combining_group *head;
35 	struct ck_barrier_combining_group *tail;
36 };
37 
38 static struct ck_barrier_combining_group *
ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue * queue)39 ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
40 {
41 	struct ck_barrier_combining_group *front = NULL;
42 
43 	if (queue->head != NULL) {
44 		front = queue->head;
45 		queue->head = queue->head->next;
46 	}
47 
48 	return front;
49 }
50 
51 static void
ck_barrier_combining_insert(struct ck_barrier_combining_group * parent,struct ck_barrier_combining_group * tnode,struct ck_barrier_combining_group ** child)52 ck_barrier_combining_insert(struct ck_barrier_combining_group *parent,
53     struct ck_barrier_combining_group *tnode,
54     struct ck_barrier_combining_group **child)
55 {
56 
57 	*child = tnode;
58 	tnode->parent = parent;
59 
60 	/*
61 	 * After inserting, we must increment the parent group's count for
62 	 * number of threads expected to reach it; otherwise, the
63 	 * barrier may end prematurely.
64 	 */
65 	parent->k++;
66 	return;
67 }
68 
69 /*
70  * This implementation of software combining tree barriers
71  * uses level order traversal to insert new thread groups
72  * into the barrier's tree. We use a queue to implement this
73  * traversal.
74  */
75 static void
ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue * queue,struct ck_barrier_combining_group * node_value)76 ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue,
77     struct ck_barrier_combining_group *node_value)
78 {
79 
80 	node_value->next = NULL;
81 	if (queue->head == NULL) {
82 		queue->head = queue->tail = node_value;
83 		return;
84 	}
85 
86 	queue->tail->next = node_value;
87 	queue->tail = node_value;
88 
89 	return;
90 }
91 
92 
93 void
ck_barrier_combining_group_init(struct ck_barrier_combining * root,struct ck_barrier_combining_group * tnode,unsigned int nthr)94 ck_barrier_combining_group_init(struct ck_barrier_combining *root,
95     struct ck_barrier_combining_group *tnode,
96     unsigned int nthr)
97 {
98 	struct ck_barrier_combining_group *node;
99 	struct ck_barrier_combining_queue queue;
100 
101 	queue.head = queue.tail = NULL;
102 
103 	tnode->k = nthr;
104 	tnode->count = 0;
105 	tnode->sense = 0;
106 	tnode->left = tnode->right = NULL;
107 
108 	/*
109 	 * Finds the first available node for linkage into the combining
110 	 * tree. The use of a spinlock is excusable as this is a one-time
111 	 * initialization cost.
112 	 */
113 	ck_spinlock_fas_lock(&root->mutex);
114 	ck_barrier_combining_queue_enqueue(&queue, root->root);
115 	while (queue.head != NULL) {
116 		node = ck_barrier_combining_queue_dequeue(&queue);
117 
118 		/* If the left child is free, link the group there. */
119 		if (node->left == NULL) {
120 			ck_barrier_combining_insert(node, tnode, &node->left);
121 			goto leave;
122 		}
123 
124 		/* If the right child is free, link the group there. */
125 		if (node->right == NULL) {
126 			ck_barrier_combining_insert(node, tnode, &node->right);
127 			goto leave;
128 		}
129 
130 		/*
131 		 * If unsuccessful, try inserting as a child of the children of the
132 		 * current node.
133 		 */
134 		ck_barrier_combining_queue_enqueue(&queue, node->left);
135 		ck_barrier_combining_queue_enqueue(&queue, node->right);
136 	}
137 
138 leave:
139 	ck_spinlock_fas_unlock(&root->mutex);
140 	return;
141 }
142 
143 void
ck_barrier_combining_init(struct ck_barrier_combining * root,struct ck_barrier_combining_group * init_root)144 ck_barrier_combining_init(struct ck_barrier_combining *root,
145     struct ck_barrier_combining_group *init_root)
146 {
147 
148 	init_root->k = 0;
149 	init_root->count = 0;
150 	init_root->sense = 0;
151 	init_root->parent = init_root->left = init_root->right = NULL;
152 	ck_spinlock_fas_init(&root->mutex);
153 	root->root = init_root;
154 	return;
155 }
156 
157 static void
ck_barrier_combining_aux(struct ck_barrier_combining * barrier,struct ck_barrier_combining_group * tnode,unsigned int sense)158 ck_barrier_combining_aux(struct ck_barrier_combining *barrier,
159     struct ck_barrier_combining_group *tnode,
160     unsigned int sense)
161 {
162 
163 	/*
164 	 * If this is the last thread in the group, it moves on to the parent group.
165 	 * Otherwise, it spins on this group's sense.
166 	 */
167 	if (ck_pr_faa_uint(&tnode->count, 1) == tnode->k - 1) {
168 		/*
169 		 * If we are and will be the last thread entering the barrier for the
170 		 * current group then signal the parent group if one exists.
171 		 */
172 		if (tnode->parent != NULL)
173 			ck_barrier_combining_aux(barrier, tnode->parent, sense);
174 
175 		/*
176 		 * Once the thread returns from its parent(s), it reinitializes the group's
177 		 * arrival count and signals other threads to continue by flipping the group
178 		 * sense. Order of these operations is not important since we assume a static
179 		 * number of threads are members of a barrier for the lifetime of the barrier.
180 		 * Since count is explicitly reinitialized, it is guaranteed that at any point
181 		 * tnode->count is equivalent to tnode->k if and only if that many threads
182 		 * are at the barrier.
183 		 */
184 		ck_pr_store_uint(&tnode->count, 0);
185 		ck_pr_fence_store();
186 		ck_pr_store_uint(&tnode->sense, ~tnode->sense);
187 	} else {
188 		while (sense != ck_pr_load_uint(&tnode->sense))
189 			ck_pr_stall();
190 	}
191 	ck_pr_fence_memory();
192 
193 	return;
194 }
195 
196 void
ck_barrier_combining(struct ck_barrier_combining * barrier,struct ck_barrier_combining_group * tnode,struct ck_barrier_combining_state * state)197 ck_barrier_combining(struct ck_barrier_combining *barrier,
198     struct ck_barrier_combining_group *tnode,
199     struct ck_barrier_combining_state *state)
200 {
201 
202 	ck_barrier_combining_aux(barrier, tnode, state->sense);
203 
204 	/* Reverse the execution context's sense for the next barrier. */
205 	state->sense = ~state->sense;
206 	return;
207 }
208