xref: /freebsd/sys/contrib/ck/src/ck_ec.c (revision 74e9b5f2)
1 #include <ck_ec.h>
2 #include <ck_limits.h>
3 
4 #include "ck_ec_timeutil.h"
5 
6 #define DEFAULT_BUSY_LOOP_ITER 100U
7 
8 /*
9  * The 2ms, 8x/iter default parameter hit 1.024 seconds after 3
10  * iterations.
11  */
12 #define DEFAULT_INITIAL_WAIT_NS 2000000L  /* Start at 2 ms */
13 /* Grow the wait time 8x/iteration. */
14 #define DEFAULT_WAIT_SCALE_FACTOR 8
15 #define DEFAULT_WAIT_SHIFT_COUNT 0
16 
17 struct ck_ec32_slow_path_state {
18 	struct ck_ec32 *ec;
19 	uint32_t flagged_word;
20 };
21 
22 #ifdef CK_F_EC64
23 struct ck_ec64_slow_path_state {
24 	struct ck_ec64 *ec;
25 	uint64_t flagged_word;
26 };
27 #endif
28 
29 /* Once we've waited for >= 1 sec, go for the full deadline. */
30 static const struct timespec final_wait_time = {
31 	.tv_sec = 1
32 };
33 
34 void
ck_ec32_wake(struct ck_ec32 * ec,const struct ck_ec_ops * ops)35 ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops)
36 {
37 	/* Spurious wake-ups are OK. Clear the flag before futexing. */
38 	ck_pr_and_32(&ec->counter, (1U << 31) - 1);
39 	ops->wake32(ops, &ec->counter);
40 	return;
41 }
42 
43 int
ck_ec32_wait_slow(struct ck_ec32 * ec,const struct ck_ec_ops * ops,uint32_t old_value,const struct timespec * deadline)44 ck_ec32_wait_slow(struct ck_ec32 *ec,
45     const struct ck_ec_ops *ops,
46     uint32_t old_value,
47     const struct timespec *deadline)
48 {
49 	return ck_ec32_wait_pred_slow(ec, ops, old_value,
50 				      NULL, NULL, deadline);
51 }
52 
53 #ifdef CK_F_EC64
54 void
ck_ec64_wake(struct ck_ec64 * ec,const struct ck_ec_ops * ops)55 ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops)
56 {
57 	ck_pr_and_64(&ec->counter, ~1);
58 	ops->wake64(ops, &ec->counter);
59 	return;
60 }
61 
62 int
ck_ec64_wait_slow(struct ck_ec64 * ec,const struct ck_ec_ops * ops,uint64_t old_value,const struct timespec * deadline)63 ck_ec64_wait_slow(struct ck_ec64 *ec,
64     const struct ck_ec_ops *ops,
65     uint64_t old_value,
66     const struct timespec *deadline)
67 {
68 	return ck_ec64_wait_pred_slow(ec, ops, old_value,
69 				      NULL, NULL, deadline);
70 }
71 #endif
72 
73 int
ck_ec_deadline_impl(struct timespec * new_deadline,const struct ck_ec_ops * ops,const struct timespec * timeout)74 ck_ec_deadline_impl(struct timespec *new_deadline,
75     const struct ck_ec_ops *ops,
76     const struct timespec *timeout)
77 {
78 	struct timespec now;
79 	int r;
80 
81 	if (timeout == NULL) {
82 		new_deadline->tv_sec = TIME_MAX;
83 		new_deadline->tv_nsec = NSEC_MAX;
84 		return 0;
85 	}
86 
87 	r = ops->gettime(ops, &now);
88 	if (r != 0) {
89 		return -1;
90 	}
91 
92 	*new_deadline = timespec_add(now, *timeout);
93 	return 0;
94 }
95 
96 /* The rest of the file implements wait_pred_slow. */
97 
98 /*
99  * Returns a timespec value for deadline_ptr. If deadline_ptr is NULL,
100  * returns a timespec far in the future.
101  */
102 static struct timespec
canonical_deadline(const struct timespec * deadline_ptr)103 canonical_deadline(const struct timespec *deadline_ptr)
104 {
105 
106 	if (deadline_ptr == NULL) {
107 		return (struct timespec) { .tv_sec = TIME_MAX };
108 	}
109 
110 	return *deadline_ptr;
111 }
112 
113 /*
114  * Really slow (sleeping) path for ck_ec_wait.	Drives the exponential
115  * backoff scheme to sleep for longer and longer periods of time,
116  * until either the sleep function returns true (the eventcount's
117  * value has changed), or the predicate returns non-0 (something else
118  * has changed).
119  *
120  * If deadline is ever reached, returns -1 (timeout).
121  *
122  * TODO: add some form of randomisation to the intermediate timeout
123  * values.
124  */
125 static int
exponential_backoff(struct ck_ec_wait_state * wait_state,bool (* sleep)(const void * sleep_state,const struct ck_ec_wait_state * wait_state,const struct timespec * partial_deadline),const void * sleep_state,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),const struct timespec * deadline)126 exponential_backoff(struct ck_ec_wait_state *wait_state,
127     bool (*sleep)(const void *sleep_state,
128 	const struct ck_ec_wait_state *wait_state,
129 	const struct timespec *partial_deadline),
130     const void *sleep_state,
131     int (*pred)(const struct ck_ec_wait_state *state,
132 	struct timespec *deadline),
133     const struct timespec *deadline)
134 {
135 	struct timespec begin;
136 	struct timespec stop_backoff;
137 	const struct ck_ec_ops *ops = wait_state->ops;
138 	const uint32_t scale_factor = (ops->wait_scale_factor != 0)
139 	    ? ops->wait_scale_factor
140 	    : DEFAULT_WAIT_SCALE_FACTOR;
141 	const uint32_t shift_count = (ops->wait_shift_count != 0)
142 	    ? ops->wait_shift_count
143 	    : DEFAULT_WAIT_SHIFT_COUNT;
144 	uint32_t wait_ns = (ops->initial_wait_ns != 0)
145 	    ? ops->initial_wait_ns
146 	    : DEFAULT_INITIAL_WAIT_NS;
147 	bool first = true;
148 
149 	for (;;) {
150 		struct timespec now;
151 		struct timespec partial_deadline;
152 
153 		if (check_deadline(&now, ops, *deadline) == true) {
154 			/* Timeout. Bail out. */
155 			return -1;
156 		}
157 
158 		if (first) {
159 			begin = now;
160 			wait_state->start = begin;
161 			stop_backoff = timespec_add(begin, final_wait_time);
162 			first = false;
163 		}
164 
165 		wait_state->now = now;
166 		if (timespec_cmp(now, stop_backoff) >= 0) {
167 			partial_deadline = *deadline;
168 		} else {
169 			do {
170 				partial_deadline =
171 				    timespec_add_ns(begin, wait_ns);
172 				wait_ns =
173 				    wait_time_scale(wait_ns,
174 						    scale_factor,
175 						    shift_count);
176 			} while (timespec_cmp(partial_deadline, now) <= 0);
177 		}
178 
179 		if (pred != NULL) {
180 			int r = pred(wait_state, &partial_deadline);
181 			if (r != 0) {
182 				return r;
183 			}
184 		}
185 
186 		/* Canonicalize deadlines in the far future to NULL. */
187 		if (sleep(sleep_state, wait_state,
188 			  ((partial_deadline.tv_sec == TIME_MAX)
189 			   ? NULL :  &partial_deadline)) == true) {
190 			return 0;
191 		}
192 	}
193 }
194 
195 /*
196  * Loops up to BUSY_LOOP_ITER times, or until ec's counter value
197  * (including the flag) differs from old_value.
198  *
199  * Returns the new value in ec.
200  */
201 #define DEF_WAIT_EASY(W)						\
202 	static uint##W##_t ck_ec##W##_wait_easy(struct ck_ec##W* ec,	\
203 						const struct ck_ec_ops *ops, \
204 						uint##W##_t expected)	\
205 	{								\
206 		uint##W##_t current = ck_pr_load_##W(&ec->counter);	\
207 		size_t n = (ops->busy_loop_iter != 0)			\
208 		    ? ops->busy_loop_iter				\
209 		    : DEFAULT_BUSY_LOOP_ITER;				\
210 									\
211 		for (size_t i = 0;					\
212 		     i < n && current == expected;			\
213 		     i++) {						\
214 			ck_pr_stall();					\
215 			current = ck_pr_load_##W(&ec->counter);		\
216 		}							\
217 									\
218 		return current;						\
219 	}
220 
221 DEF_WAIT_EASY(32)
222 #ifdef CK_F_EC64
223 DEF_WAIT_EASY(64)
224 #endif
225 #undef DEF_WAIT_EASY
226 /*
227  * Attempts to upgrade ec->counter from unflagged to flagged.
228  *
229  * Returns true if the event count has changed. Otherwise, ec's
230  * counter word is equal to flagged on return, or has been at some
231  * time before the return.
232  */
233 #define DEF_UPGRADE(W)							\
234 	static bool ck_ec##W##_upgrade(struct ck_ec##W* ec,		\
235 				       uint##W##_t current,		\
236 				       uint##W##_t unflagged,		\
237 				       uint##W##_t flagged)		\
238 	{								\
239 		uint##W##_t old_word;					\
240 									\
241 		if (current == flagged) {				\
242 			/* Nothing to do, no change. */			\
243 			return false;					\
244 		}							\
245 									\
246 		if (current != unflagged) {				\
247 			/* We have a different counter value! */	\
248 			return true;					\
249 		}							\
250 									\
251 		/*							\
252 		 * Flag the counter value. The CAS only fails if the	\
253 		 * counter is already flagged, or has a new value.	\
254 		 */							\
255 		return (ck_pr_cas_##W##_value(&ec->counter,		\
256 					      unflagged, flagged,	\
257 					      &old_word) == false &&	\
258 			old_word != flagged);				\
259 	}
260 
261 DEF_UPGRADE(32)
262 #ifdef CK_F_EC64
263 DEF_UPGRADE(64)
264 #endif
265 #undef DEF_UPGRADE
266 
267 /*
268  * Blocks until partial_deadline on the ck_ec. Returns true if the
269  * eventcount's value has changed. If partial_deadline is NULL, wait
270  * forever.
271  */
272 static bool
ck_ec32_wait_slow_once(const void * vstate,const struct ck_ec_wait_state * wait_state,const struct timespec * partial_deadline)273 ck_ec32_wait_slow_once(const void *vstate,
274     const struct ck_ec_wait_state *wait_state,
275     const struct timespec *partial_deadline)
276 {
277 	const struct ck_ec32_slow_path_state *state = vstate;
278 	const struct ck_ec32 *ec = state->ec;
279 	const uint32_t flagged_word = state->flagged_word;
280 
281 	wait_state->ops->wait32(wait_state, &ec->counter,
282 				flagged_word, partial_deadline);
283 	return ck_pr_load_32(&ec->counter) != flagged_word;
284 }
285 
286 #ifdef CK_F_EC64
287 static bool
ck_ec64_wait_slow_once(const void * vstate,const struct ck_ec_wait_state * wait_state,const struct timespec * partial_deadline)288 ck_ec64_wait_slow_once(const void *vstate,
289     const struct ck_ec_wait_state *wait_state,
290     const struct timespec *partial_deadline)
291 {
292 	const struct ck_ec64_slow_path_state *state = vstate;
293 	const struct ck_ec64 *ec = state->ec;
294 	const uint64_t flagged_word = state->flagged_word;
295 
296 	/* futex_wait will only compare the low 32 bits. Perform a
297 	 * full comparison here to maximise the changes of catching an
298 	 * ABA in the low 32 bits.
299 	 */
300 	if (ck_pr_load_64(&ec->counter) != flagged_word) {
301 		return true;
302 	}
303 
304 	wait_state->ops->wait64(wait_state, &ec->counter,
305 				flagged_word, partial_deadline);
306 	return ck_pr_load_64(&ec->counter) != flagged_word;
307 }
308 #endif
309 
310 /*
311  * The full wait logic is a lot of code (> 1KB). Encourage the
312  * compiler to lay this all out linearly with LIKELY annotations on
313  * every early exit.
314  */
315 #define WAIT_SLOW_BODY(W, ec, ops, pred, data, deadline_ptr,		\
316 		       old_value, unflagged, flagged)			\
317 	do {								\
318 		struct ck_ec_wait_state wait_state = {			\
319 			.ops = ops,					\
320 			.data = data					\
321 		};							\
322 		const struct ck_ec##W##_slow_path_state state = {	\
323 			.ec = ec,					\
324 			.flagged_word = flagged				\
325 		};							\
326 		const struct timespec deadline =			\
327 			canonical_deadline(deadline_ptr);		\
328 									\
329 		/* Detect infinite past deadlines. */			\
330 		if (CK_CC_LIKELY(deadline.tv_sec <= 0)) {		\
331 			return -1;					\
332 		}							\
333 									\
334 		for (;;) {						\
335 			uint##W##_t current;				\
336 			int r;						\
337 									\
338 			current = ck_ec##W##_wait_easy(ec, ops, unflagged); \
339 									\
340 			/*						\
341 			 * We're about to wait harder (i.e.,		\
342 			 * potentially with futex). Make sure the	\
343 			 * counter word is flagged.			\
344 			 */						\
345 			if (CK_CC_LIKELY(				\
346 				ck_ec##W##_upgrade(ec, current,		\
347 					unflagged, flagged) == true)) { \
348 				ck_pr_fence_acquire();			\
349 				return 0;				\
350 			}						\
351 									\
352 			/*						\
353 			 * By now, ec->counter == flagged_word (at	\
354 			 * some point in the past). Spin some more to	\
355 			 * heuristically let any in-flight SP inc/add	\
356 			 * to retire. This does not affect		\
357 			 * correctness, but practically eliminates	\
358 			 * lost wake-ups.				\
359 			 */						\
360 			current = ck_ec##W##_wait_easy(ec, ops, flagged); \
361 			if (CK_CC_LIKELY(current != flagged_word)) {	\
362 				ck_pr_fence_acquire();			\
363 				return 0;				\
364 			}						\
365 									\
366 			r = exponential_backoff(&wait_state,		\
367 						ck_ec##W##_wait_slow_once, \
368 						&state,			\
369 						pred, &deadline); \
370 			if (r != 0) {					\
371 				return r;				\
372 			}						\
373 									\
374 			if (ck_ec##W##_value(ec) != old_value) {	\
375 				ck_pr_fence_acquire();			\
376 				return 0;				\
377 			}						\
378 									\
379 			/* Spurious wake-up. Redo the slow path. */	\
380 		}							\
381 	} while (0)
382 
383 int
ck_ec32_wait_pred_slow(struct ck_ec32 * ec,const struct ck_ec_ops * ops,uint32_t old_value,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),void * data,const struct timespec * deadline_ptr)384 ck_ec32_wait_pred_slow(struct ck_ec32 *ec,
385     const struct ck_ec_ops *ops,
386     uint32_t old_value,
387     int (*pred)(const struct ck_ec_wait_state *state,
388 	struct timespec *deadline),
389     void *data,
390     const struct timespec *deadline_ptr)
391 {
392 	const uint32_t unflagged_word = old_value;
393 	const uint32_t flagged_word = old_value | (1UL << 31);
394 
395 	if (CK_CC_UNLIKELY(ck_ec32_value(ec) != old_value)) {
396 		return 0;
397 	}
398 
399 	WAIT_SLOW_BODY(32, ec, ops, pred, data, deadline_ptr,
400 		       old_value, unflagged_word, flagged_word);
401 }
402 
403 #ifdef CK_F_EC64
404 int
ck_ec64_wait_pred_slow(struct ck_ec64 * ec,const struct ck_ec_ops * ops,uint64_t old_value,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),void * data,const struct timespec * deadline_ptr)405 ck_ec64_wait_pred_slow(struct ck_ec64 *ec,
406     const struct ck_ec_ops *ops,
407     uint64_t old_value,
408     int (*pred)(const struct ck_ec_wait_state *state,
409 	struct timespec *deadline),
410     void *data,
411     const struct timespec *deadline_ptr)
412 {
413 	const uint64_t unflagged_word = old_value << 1;
414 	const uint64_t flagged_word = unflagged_word | 1;
415 
416 	if (CK_CC_UNLIKELY(ck_ec64_value(ec) != old_value)) {
417 		return 0;
418 	}
419 
420 	WAIT_SLOW_BODY(64, ec, ops, pred, data, deadline_ptr,
421 		       old_value, unflagged_word, flagged_word);
422 }
423 #endif
424 
425 #undef WAIT_SLOW_BODY
426