1 /*
2 * Copyright 2018 Paul Khuong, Google LLC.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 /*
28 * Overview
29 * ========
30 *
31 * ck_ec implements 32- and 64- bit event counts. Event counts let us
32 * easily integrate OS-level blocking (e.g., futexes) in lock-free
33 * protocols. Waiters block conditionally, if the event count's value
34 * is still equal to some old value.
35 *
36 * Event counts come in four variants: 32 and 64 bit (with one bit
37 * stolen for internal signaling, so 31 and 63 bit counters), and
38 * single or multiple producers (wakers). Waiters are always multiple
39 * consumers. The 32 bit variants are smaller, and more efficient,
40 * especially in single producer mode. The 64 bit variants are larger,
41 * but practically invulnerable to ABA.
42 *
43 * The 32 bit variant is always available. The 64 bit variant is only
44 * available if CK supports 64-bit atomic operations. Currently,
45 * specialization for single producer is only implemented for x86 and
46 * x86-64, on compilers that support GCC extended inline assembly;
47 * other platforms fall back to the multiple producer code path.
48 *
49 * A typical usage pattern is:
50 *
51 * 1. On the producer side:
52 *
53 * - Make changes to some shared data structure, without involving
54 * the event count at all.
55 * - After each change, call ck_ec_inc on the event count. The call
56 * acts as a write-write barrier, and wakes up any consumer blocked
57 * on the event count (waiting for new changes).
58 *
59 * 2. On the consumer side:
60 *
61 * - Snapshot ck_ec_value of the event count. The call acts as a
62 * read barrier.
63 * - Read and process the shared data structure.
64 * - Wait for new changes by calling ck_ec_wait with the snapshot value.
65 *
66 * Some data structures may opt for tighter integration with their
67 * event count. For example, an SPMC ring buffer or disruptor might
68 * use the event count's value as the write pointer. If the buffer is
69 * regularly full, it might also make sense to store the read pointer
70 * in an MP event count.
71 *
72 * This event count implementation supports tighter integration in two
73 * ways.
74 *
75 * Producers may opt to increment by an arbitrary value (less than
76 * INT32_MAX / INT64_MAX), in order to encode, e.g., byte
77 * offsets. Larger increment values make wraparound more likely, so
78 * the increments should still be relatively small.
79 *
80 * Consumers may pass a predicate to ck_ec_wait_pred. This predicate
81 * can make `ck_ec_wait_pred` return early, before the event count's
82 * value changes, and can override the deadline passed to futex_wait.
83 * This lets consumer block on one eventcount, while optimistically
84 * looking at other waking conditions.
85 *
86 * API Reference
87 * =============
88 *
89 * When compiled as C11 or later, this header defines type-generic
90 * macros for ck_ec32 and ck_ec64; the reference describes this
91 * type-generic API.
92 *
93 * ck_ec needs additional OS primitives to determine the current time,
94 * to wait on an address, and to wake all threads waiting on a given
95 * address. These are defined with fields in a struct ck_ec_ops. Each
96 * ck_ec_ops may additionally define the number of spin loop
97 * iterations in the slow path, as well as the initial wait time in
98 * the internal exponential backoff, the exponential scale factor, and
99 * the right shift count (< 32).
100 *
101 * The ops, in addition to the single/multiple producer flag, are
102 * encapsulated in a struct ck_ec_mode, passed to most ck_ec
103 * operations.
104 *
105 * ec is a struct ck_ec32 *, or a struct ck_ec64 *.
106 *
107 * value is an uint32_t for ck_ec32, and an uint64_t for ck_ec64. It
108 * never exceeds INT32_MAX and INT64_MAX respectively.
109 *
110 * mode is a struct ck_ec_mode *.
111 *
112 * deadline is either NULL, or a `const struct timespec *` that will
113 * be treated as an absolute deadline.
114 *
115 * `void ck_ec_init(ec, value)`: initializes the event count to value.
116 *
117 * `value ck_ec_value(ec)`: returns the current value of the event
118 * counter. This read acts as a read (acquire) barrier.
119 *
120 * `bool ck_ec_has_waiters(ec)`: returns whether some thread has
121 * marked the event count as requiring an OS wakeup.
122 *
123 * `void ck_ec_inc(ec, mode)`: increments the value of the event
124 * counter by one. This writes acts as a write barrier. Wakes up
125 * any waiting thread.
126 *
127 * `value ck_ec_add(ec, mode, value)`: increments the event counter by
128 * `value`, and returns the event counter's previous value. This
129 * write acts as a write barrier. Wakes up any waiting thread.
130 *
131 * `int ck_ec_deadline(struct timespec *new_deadline,
132 * mode,
133 * const struct timespec *timeout)`:
134 * computes a deadline `timeout` away from the current time. If
135 * timeout is NULL, computes a deadline in the infinite future. The
136 * resulting deadline is written to `new_deadline`. Returns 0 on
137 * success, and -1 if ops->gettime failed (without touching errno).
138 *
139 * `int ck_ec_wait(ec, mode, value, deadline)`: waits until the event
140 * counter's value differs from `value`, or, if `deadline` is
141 * provided and non-NULL, until the current time is after that
142 * deadline. Use a deadline with tv_sec = 0 for a non-blocking
143 * execution. Returns 0 if the event counter has changed, and -1 on
144 * timeout. This function acts as a read (acquire) barrier.
145 *
146 * `int ck_ec_wait_pred(ec, mode, value, pred, data, deadline)`: waits
147 * until the event counter's value differs from `value`, or until
148 * `pred` returns non-zero, or, if `deadline` is provided and
149 * non-NULL, until the current time is after that deadline. Use a
150 * deadline with tv_sec = 0 for a non-blocking execution. Returns 0 if
151 * the event counter has changed, `pred`'s return value if non-zero,
152 * and -1 on timeout. This function acts as a read (acquire) barrier.
153 *
154 * `pred` is always called as `pred(data, iteration_deadline, now)`,
155 * where `iteration_deadline` is a timespec of the deadline for this
156 * exponential backoff iteration, and `now` is the current time. If
157 * `pred` returns a non-zero value, that value is immediately returned
158 * to the waiter. Otherwise, `pred` is free to modify
159 * `iteration_deadline` (moving it further in the future is a bad
160 * idea).
161 *
162 * Implementation notes
163 * ====================
164 *
165 * The multiple producer implementation is a regular locked event
166 * count, with a single flag bit to denote the need to wake up waiting
167 * threads.
168 *
169 * The single producer specialization is heavily tied to
170 * [x86-TSO](https://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf), and
171 * to non-atomic read-modify-write instructions (e.g., `inc mem`);
172 * these non-atomic RMW let us write to the same memory locations with
173 * atomic and non-atomic instructions, without suffering from process
174 * scheduling stalls.
175 *
176 * The reason we can mix atomic and non-atomic writes to the `counter`
177 * word is that every non-atomic write obviates the need for the
178 * atomically flipped flag bit: we only use non-atomic writes to
179 * update the event count, and the atomic flag only informs the
180 * producer that we would like a futex_wake, because of the update.
181 * We only require the non-atomic RMW counter update to prevent
182 * preemption from introducing arbitrarily long worst case delays.
183 *
184 * Correctness does not rely on the usual ordering argument: in the
185 * absence of fences, there is no strict ordering between atomic and
186 * non-atomic writes. The key is instead x86-TSO's guarantee that a
187 * read is satisfied from the most recent buffered write in the local
188 * store queue if there is one, or from memory if there is no write to
189 * that address in the store queue.
190 *
191 * x86-TSO's constraint on reads suffices to guarantee that the
192 * producer will never forget about a counter update. If the last
193 * update is still queued, the new update will be based on the queued
194 * value. Otherwise, the new update will be based on the value in
195 * memory, which may or may not have had its flag flipped. In either
196 * case, the value of the counter (modulo flag) is correct.
197 *
198 * When the producer forwards the counter's value from its store
199 * queue, the new update might not preserve a flag flip. Any waiter
200 * thus has to check from time to time to determine if it wasn't
201 * woken up because the flag bit was silently cleared.
202 *
203 * In reality, the store queue in x86-TSO stands for in-flight
204 * instructions in the chip's out-of-order backend. In the vast
205 * majority of cases, instructions will only remain in flight for a
206 * few hundred or thousand of cycles. That's why ck_ec_wait spins on
207 * the `counter` word for ~100 iterations after flipping its flag bit:
208 * if the counter hasn't changed after that many iterations, it is
209 * very likely that the producer's next counter update will observe
210 * the flag flip.
211 *
212 * That's still not a hard guarantee of correctness. Conservatively,
213 * we can expect that no instruction will remain in flight for more
214 * than 1 second... if only because some interrupt will have forced
215 * the chip to store its architectural state in memory, at which point
216 * an instruction is either fully retired or rolled back. Interrupts,
217 * particularly the pre-emption timer, are why single-producer updates
218 * must happen in a single non-atomic read-modify-write instruction.
219 * Having a single instruction as the critical section means we only
220 * have to consider the worst-case execution time for that
221 * instruction. That's easier than doing the same for a pair of
222 * instructions, which an unlucky pre-emption could delay for
223 * arbitrarily long.
224 *
225 * Thus, after a short spin loop, ck_ec_wait enters an exponential
226 * backoff loop, where each "sleep" is instead a futex_wait. The
227 * backoff is only necessary to handle rare cases where the flag flip
228 * was overwritten after the spin loop. Eventually, more than one
229 * second will have elapsed since the flag flip, and the sleep timeout
230 * becomes infinite: since the flag bit has been set for much longer
231 * than the time for which an instruction may remain in flight, the
232 * flag will definitely be observed at the next counter update.
233 *
234 * The 64 bit ck_ec_wait pulls another trick: futexes only handle 32
235 * bit ints, so we must treat the 64 bit counter's low 32 bits as an
236 * int in futex_wait. That's a bit dodgy, but fine in practice, given
237 * that the OS's futex code will always read whatever value is
238 * currently in memory: even if the producer thread were to wait on
239 * its own event count, the syscall and ring transition would empty
240 * the store queue (the out-of-order execution backend).
241 *
242 * Finally, what happens when the producer is migrated to another core
243 * or otherwise pre-empted? Migration must already incur a barrier, so
244 * that thread always sees its own writes, so that's safe. As for
245 * pre-emption, that requires storing the architectural state, which
246 * means every instruction must either be executed fully or not at
247 * all when pre-emption happens.
248 */
249
250 #ifndef CK_EC_H
251 #define CK_EC_H
252 #include <ck_cc.h>
253 #include <ck_pr.h>
254 #include <ck_stdbool.h>
255 #include <ck_stdint.h>
256 #include <ck_stddef.h>
257 #include <sys/time.h>
258
259 /*
260 * If we have ck_pr_faa_64 (and, presumably, ck_pr_load_64), we
261 * support 63 bit counters.
262 */
263 #ifdef CK_F_PR_FAA_64
264 #define CK_F_EC64
265 #endif /* CK_F_PR_FAA_64 */
266
267 /*
268 * GCC inline assembly lets us exploit non-atomic read-modify-write
269 * instructions on x86/x86_64 for a fast single-producer mode.
270 *
271 * If we CK_F_EC_SP is not defined, CK_EC always uses the slower
272 * multiple producer code.
273 */
274 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
275 #define CK_F_EC_SP
276 #endif /* GNUC && (__i386__ || __x86_64__) */
277
278 struct ck_ec_ops;
279
280 struct ck_ec_wait_state {
281 struct timespec start; /* Time when we entered ck_ec_wait. */
282 struct timespec now; /* Time now. */
283 const struct ck_ec_ops *ops;
284 void *data; /* Opaque pointer for the predicate's internal state. */
285
286 };
287
288 /*
289 * ck_ec_ops define system-specific functions to get the current time,
290 * atomically wait on an address if it still has some expected value,
291 * and to wake all threads waiting on an address.
292 *
293 * Each platform is expected to have few (one) opaque pointer to a
294 * const ops struct, and reuse it for all ck_ec_mode structs.
295 */
296 struct ck_ec_ops {
297 /* Populates out with the current time. Returns non-zero on failure. */
298 int (*gettime)(const struct ck_ec_ops *, struct timespec *out);
299
300 /*
301 * Waits on address if its value is still `expected`. If
302 * deadline is non-NULL, stops waiting once that deadline is
303 * reached. May return early for any reason.
304 */
305 void (*wait32)(const struct ck_ec_wait_state *, const uint32_t *,
306 uint32_t expected, const struct timespec *deadline);
307
308 /*
309 * Same as wait32, but for a 64 bit counter. Only used if
310 * CK_F_EC64 is defined.
311 *
312 * If underlying blocking primitive only supports 32 bit
313 * control words, it should be safe to block on the least
314 * significant half of the 64 bit address.
315 */
316 void (*wait64)(const struct ck_ec_wait_state *, const uint64_t *,
317 uint64_t expected, const struct timespec *deadline);
318
319 /* Wakes up all threads waiting on address. */
320 void (*wake32)(const struct ck_ec_ops *, const uint32_t *address);
321
322 /*
323 * Same as wake32, but for a 64 bit counter. Only used if
324 * CK_F_EC64 is defined.
325 *
326 * When wait64 truncates the control word at address to `only`
327 * consider its least significant half, wake64 should perform
328 * any necessary fixup (e.g., on big endian platforms).
329 */
330 void (*wake64)(const struct ck_ec_ops *, const uint64_t *address);
331
332 /*
333 * Number of iterations for the initial busy wait. 0 defaults
334 * to 100 (not ABI stable).
335 */
336 uint32_t busy_loop_iter;
337
338 /*
339 * Delay in nanoseconds for the first iteration of the
340 * exponential backoff. 0 defaults to 2 ms (not ABI stable).
341 */
342 uint32_t initial_wait_ns;
343
344 /*
345 * Scale factor for the exponential backoff. 0 defaults to 8x
346 * (not ABI stable).
347 */
348 uint32_t wait_scale_factor;
349
350 /*
351 * Right shift count for the exponential backoff. The update
352 * after each iteration is
353 * wait_ns = (wait_ns * wait_scale_factor) >> wait_shift_count,
354 * until one second has elapsed. After that, the deadline goes
355 * to infinity.
356 */
357 uint32_t wait_shift_count;
358 };
359
360 /*
361 * ck_ec_mode wraps the ops table, and informs the fast path whether
362 * it should attempt to specialize for single producer mode.
363 *
364 * mode structs are expected to be exposed by value, e.g.,
365 *
366 * extern const struct ck_ec_ops system_ec_ops;
367 *
368 * static const struct ck_ec_mode ec_sp = {
369 * .ops = &system_ec_ops,
370 * .single_producer = true
371 * };
372 *
373 * static const struct ck_ec_mode ec_mp = {
374 * .ops = &system_ec_ops,
375 * .single_producer = false
376 * };
377 *
378 * ck_ec_mode structs are only passed to inline functions defined in
379 * this header, and never escape to their slow paths, so they should
380 * not result in any object file size increase.
381 */
382 struct ck_ec_mode {
383 const struct ck_ec_ops *ops;
384 /*
385 * If single_producer is true, the event count has a unique
386 * incrementer. The implementation will specialize ck_ec_inc
387 * and ck_ec_add if possible (if CK_F_EC_SP is defined).
388 */
389 bool single_producer;
390 };
391
392 struct ck_ec32 {
393 /* Flag is "sign" bit, value in bits 0:30. */
394 uint32_t counter;
395 };
396
397 typedef struct ck_ec32 ck_ec32_t;
398
399 #ifdef CK_F_EC64
400 struct ck_ec64 {
401 /*
402 * Flag is bottom bit, value in bits 1:63. Eventcount only
403 * works on x86-64 (i.e., little endian), so the futex int
404 * lies in the first 4 (bottom) bytes.
405 */
406 uint64_t counter;
407 };
408
409 typedef struct ck_ec64 ck_ec64_t;
410 #endif /* CK_F_EC64 */
411
412 #define CK_EC_INITIALIZER { .counter = 0 }
413
414 /*
415 * Initializes the event count to `value`. The value must not
416 * exceed INT32_MAX.
417 */
418 static void ck_ec32_init(struct ck_ec32 *ec, uint32_t value);
419
420 #ifndef CK_F_EC64
421 #define ck_ec_init ck_ec32_init
422 #else
423 /*
424 * Initializes the event count to `value`. The value must not
425 * exceed INT64_MAX.
426 */
427 static void ck_ec64_init(struct ck_ec64 *ec, uint64_t value);
428
429 #if __STDC_VERSION__ >= 201112L
430 #define ck_ec_init(EC, VALUE) \
431 (_Generic(*(EC), \
432 struct ck_ec32 : ck_ec32_init, \
433 struct ck_ec64 : ck_ec64_init)((EC), (VALUE)))
434 #endif /* __STDC_VERSION__ */
435 #endif /* CK_F_EC64 */
436
437 /*
438 * Returns the counter value in the event count. The value is at most
439 * INT32_MAX.
440 */
441 static uint32_t ck_ec32_value(const struct ck_ec32* ec);
442
443 #ifndef CK_F_EC64
444 #define ck_ec_value ck_ec32_value
445 #else
446 /*
447 * Returns the counter value in the event count. The value is at most
448 * INT64_MAX.
449 */
450 static uint64_t ck_ec64_value(const struct ck_ec64* ec);
451
452 #if __STDC_VERSION__ >= 201112L
453 #define ck_ec_value(EC) \
454 (_Generic(*(EC), \
455 struct ck_ec32 : ck_ec32_value, \
456 struct ck_ec64 : ck_ec64_value)((EC)))
457 #endif /* __STDC_VERSION__ */
458 #endif /* CK_F_EC64 */
459
460 /*
461 * Returns whether there may be slow pathed waiters that need an
462 * explicit OS wakeup for this event count.
463 */
464 static bool ck_ec32_has_waiters(const struct ck_ec32 *ec);
465
466 #ifndef CK_F_EC64
467 #define ck_ec_has_waiters ck_ec32_has_waiters
468 #else
469 static bool ck_ec64_has_waiters(const struct ck_ec64 *ec);
470
471 #if __STDC_VERSION__ >= 201112L
472 #define ck_ec_has_waiters(EC) \
473 (_Generic(*(EC), \
474 struct ck_ec32 : ck_ec32_has_waiters, \
475 struct ck_ec64 : ck_ec64_has_waiters)((EC)))
476 #endif /* __STDC_VERSION__ */
477 #endif /* CK_F_EC64 */
478
479 /*
480 * Increments the counter value in the event count by one, and wakes
481 * up any waiter.
482 */
483 static void ck_ec32_inc(struct ck_ec32 *ec, const struct ck_ec_mode *mode);
484
485 #ifndef CK_F_EC64
486 #define ck_ec_inc ck_ec32_inc
487 #else
488 static void ck_ec64_inc(struct ck_ec64 *ec, const struct ck_ec_mode *mode);
489
490 #if __STDC_VERSION__ >= 201112L
491 #define ck_ec_inc(EC, MODE) \
492 (_Generic(*(EC), \
493 struct ck_ec32 : ck_ec32_inc, \
494 struct ck_ec64 : ck_ec64_inc)((EC), (MODE)))
495 #endif /* __STDC_VERSION__ */
496 #endif /* CK_F_EC64 */
497
498 /*
499 * Increments the counter value in the event count by delta, wakes
500 * up any waiter, and returns the previous counter value.
501 */
502 static uint32_t ck_ec32_add(struct ck_ec32 *ec,
503 const struct ck_ec_mode *mode,
504 uint32_t delta);
505
506 #ifndef CK_F_EC64
507 #define ck_ec_add ck_ec32_add
508 #else
509 static uint64_t ck_ec64_add(struct ck_ec64 *ec,
510 const struct ck_ec_mode *mode,
511 uint64_t delta);
512
513 #if __STDC_VERSION__ >= 201112L
514 #define ck_ec_add(EC, MODE, DELTA) \
515 (_Generic(*(EC), \
516 struct ck_ec32 : ck_ec32_add, \
517 struct ck_ec64 : ck_ec64_add)((EC), (MODE), (DELTA)))
518 #endif /* __STDC_VERSION__ */
519 #endif /* CK_F_EC64 */
520
521 /*
522 * Populates `new_deadline` with a deadline `timeout` in the future.
523 * Returns 0 on success, and -1 if clock_gettime failed, in which
524 * case errno is left as is.
525 */
526 static int ck_ec_deadline(struct timespec *new_deadline,
527 const struct ck_ec_mode *mode,
528 const struct timespec *timeout);
529
530 /*
531 * Waits until the counter value in the event count differs from
532 * old_value, or, if deadline is non-NULL, until CLOCK_MONOTONIC is
533 * past the deadline.
534 *
535 * Returns 0 on success, and -1 on timeout.
536 */
537 static int ck_ec32_wait(struct ck_ec32 *ec,
538 const struct ck_ec_mode *mode,
539 uint32_t old_value,
540 const struct timespec *deadline);
541
542 #ifndef CK_F_EC64
543 #define ck_ec_wait ck_ec32_wait
544 #else
545 static int ck_ec64_wait(struct ck_ec64 *ec,
546 const struct ck_ec_mode *mode,
547 uint64_t old_value,
548 const struct timespec *deadline);
549
550 #if __STDC_VERSION__ >= 201112L
551 #define ck_ec_wait(EC, MODE, OLD_VALUE, DEADLINE) \
552 (_Generic(*(EC), \
553 struct ck_ec32 : ck_ec32_wait, \
554 struct ck_ec64 : ck_ec64_wait)((EC), (MODE), \
555 (OLD_VALUE), (DEADLINE)))
556
557 #endif /* __STDC_VERSION__ */
558 #endif /* CK_F_EC64 */
559
560 /*
561 * Waits until the counter value in the event count differs from
562 * old_value, pred returns non-zero, or, if deadline is non-NULL,
563 * until CLOCK_MONOTONIC is past the deadline.
564 *
565 * Returns 0 on success, -1 on timeout, and the return value of pred
566 * if it returns non-zero.
567 *
568 * A NULL pred represents a function that always returns 0.
569 */
570 static int ck_ec32_wait_pred(struct ck_ec32 *ec,
571 const struct ck_ec_mode *mode,
572 uint32_t old_value,
573 int (*pred)(const struct ck_ec_wait_state *,
574 struct timespec *deadline),
575 void *data,
576 const struct timespec *deadline);
577
578 #ifndef CK_F_EC64
579 #define ck_ec_wait_pred ck_ec32_wait_pred
580 #else
581 static int ck_ec64_wait_pred(struct ck_ec64 *ec,
582 const struct ck_ec_mode *mode,
583 uint64_t old_value,
584 int (*pred)(const struct ck_ec_wait_state *,
585 struct timespec *deadline),
586 void *data,
587 const struct timespec *deadline);
588
589 #if __STDC_VERSION__ >= 201112L
590 #define ck_ec_wait_pred(EC, MODE, OLD_VALUE, PRED, DATA, DEADLINE) \
591 (_Generic(*(EC), \
592 struct ck_ec32 : ck_ec32_wait_pred, \
593 struct ck_ec64 : ck_ec64_wait_pred) \
594 ((EC), (MODE), (OLD_VALUE), (PRED), (DATA), (DEADLINE)))
595 #endif /* __STDC_VERSION__ */
596 #endif /* CK_F_EC64 */
597
598 /*
599 * Inline implementation details. 32 bit first, then 64 bit
600 * conditionally.
601 */
ck_ec32_init(struct ck_ec32 * ec,uint32_t value)602 CK_CC_FORCE_INLINE void ck_ec32_init(struct ck_ec32 *ec, uint32_t value)
603 {
604 ec->counter = value & ~(1UL << 31);
605 return;
606 }
607
ck_ec32_value(const struct ck_ec32 * ec)608 CK_CC_FORCE_INLINE uint32_t ck_ec32_value(const struct ck_ec32 *ec)
609 {
610 uint32_t ret = ck_pr_load_32(&ec->counter) & ~(1UL << 31);
611
612 ck_pr_fence_acquire();
613 return ret;
614 }
615
ck_ec32_has_waiters(const struct ck_ec32 * ec)616 CK_CC_FORCE_INLINE bool ck_ec32_has_waiters(const struct ck_ec32 *ec)
617 {
618 return ck_pr_load_32(&ec->counter) & (1UL << 31);
619 }
620
621 /* Slow path for ck_ec{32,64}_{inc,add} */
622 void ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops);
623
ck_ec32_inc(struct ck_ec32 * ec,const struct ck_ec_mode * mode)624 CK_CC_FORCE_INLINE void ck_ec32_inc(struct ck_ec32 *ec,
625 const struct ck_ec_mode *mode)
626 {
627 #if !defined(CK_F_EC_SP)
628 /* Nothing to specialize if we don't have EC_SP. */
629 ck_ec32_add(ec, mode, 1);
630 return;
631 #else
632 char flagged;
633
634 #if __GNUC__ >= 6
635 /*
636 * We don't want to wake if the sign bit is 0. We do want to
637 * wake if the sign bit just flipped from 1 to 0. We don't
638 * care what happens when our increment caused the sign bit to
639 * flip from 0 to 1 (that's once per 2^31 increment).
640 *
641 * This leaves us with four cases:
642 *
643 * old sign bit | new sign bit | SF | OF | ZF
644 * -------------------------------------------
645 * 0 | 0 | 0 | 0 | ?
646 * 0 | 1 | 1 | 0 | ?
647 * 1 | 1 | 1 | 0 | ?
648 * 1 | 0 | 0 | 0 | 1
649 *
650 * In the first case, we don't want to hit ck_ec32_wake. In
651 * the last two cases, we do want to call ck_ec32_wake. In the
652 * second case, we don't care, so we arbitrarily choose to
653 * call ck_ec32_wake.
654 *
655 * The "le" condition checks if SF != OF, or ZF == 1, which
656 * meets our requirements.
657 */
658 #define CK_EC32_INC_ASM(PREFIX) \
659 __asm__ volatile(PREFIX " incl %0" \
660 : "+m"(ec->counter), "=@ccle"(flagged) \
661 :: "cc", "memory")
662 #else
663 #define CK_EC32_INC_ASM(PREFIX) \
664 __asm__ volatile(PREFIX " incl %0; setle %1" \
665 : "+m"(ec->counter), "=r"(flagged) \
666 :: "cc", "memory")
667 #endif /* __GNUC__ */
668
669 if (mode->single_producer == true) {
670 ck_pr_fence_store();
671 CK_EC32_INC_ASM("");
672 } else {
673 ck_pr_fence_store_atomic();
674 CK_EC32_INC_ASM("lock");
675 }
676 #undef CK_EC32_INC_ASM
677
678 if (CK_CC_UNLIKELY(flagged)) {
679 ck_ec32_wake(ec, mode->ops);
680 }
681
682 return;
683 #endif /* CK_F_EC_SP */
684 }
685
ck_ec32_add_epilogue(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t old)686 CK_CC_FORCE_INLINE uint32_t ck_ec32_add_epilogue(struct ck_ec32 *ec,
687 const struct ck_ec_mode *mode,
688 uint32_t old)
689 {
690 const uint32_t flag_mask = 1U << 31;
691 uint32_t ret;
692
693 ret = old & ~flag_mask;
694 /* These two only differ if the flag bit is set. */
695 if (CK_CC_UNLIKELY(old != ret)) {
696 ck_ec32_wake(ec, mode->ops);
697 }
698
699 return ret;
700 }
701
ck_ec32_add_mp(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t delta)702 static CK_CC_INLINE uint32_t ck_ec32_add_mp(struct ck_ec32 *ec,
703 const struct ck_ec_mode *mode,
704 uint32_t delta)
705 {
706 uint32_t old;
707
708 ck_pr_fence_store_atomic();
709 old = ck_pr_faa_32(&ec->counter, delta);
710 return ck_ec32_add_epilogue(ec, mode, old);
711 }
712
713 #ifdef CK_F_EC_SP
ck_ec32_add_sp(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t delta)714 static CK_CC_INLINE uint32_t ck_ec32_add_sp(struct ck_ec32 *ec,
715 const struct ck_ec_mode *mode,
716 uint32_t delta)
717 {
718 uint32_t old;
719
720 /*
721 * Correctness of this racy write depends on actually
722 * having an update to write. Exit here if the update
723 * is a no-op.
724 */
725 if (CK_CC_UNLIKELY(delta == 0)) {
726 return ck_ec32_value(ec);
727 }
728
729 ck_pr_fence_store();
730 old = delta;
731 __asm__ volatile("xaddl %1, %0"
732 : "+m"(ec->counter), "+r"(old)
733 :: "cc", "memory");
734 return ck_ec32_add_epilogue(ec, mode, old);
735 }
736 #endif /* CK_F_EC_SP */
737
ck_ec32_add(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t delta)738 CK_CC_FORCE_INLINE uint32_t ck_ec32_add(struct ck_ec32 *ec,
739 const struct ck_ec_mode *mode,
740 uint32_t delta)
741 {
742 #ifdef CK_F_EC_SP
743 if (mode->single_producer == true) {
744 return ck_ec32_add_sp(ec, mode, delta);
745 }
746 #endif
747
748 return ck_ec32_add_mp(ec, mode, delta);
749 }
750
751 int ck_ec_deadline_impl(struct timespec *new_deadline,
752 const struct ck_ec_ops *ops,
753 const struct timespec *timeout);
754
ck_ec_deadline(struct timespec * new_deadline,const struct ck_ec_mode * mode,const struct timespec * timeout)755 CK_CC_FORCE_INLINE int ck_ec_deadline(struct timespec *new_deadline,
756 const struct ck_ec_mode *mode,
757 const struct timespec *timeout)
758 {
759 return ck_ec_deadline_impl(new_deadline, mode->ops, timeout);
760 }
761
762
763 int ck_ec32_wait_slow(struct ck_ec32 *ec,
764 const struct ck_ec_ops *ops,
765 uint32_t old_value,
766 const struct timespec *deadline);
767
ck_ec32_wait(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t old_value,const struct timespec * deadline)768 CK_CC_FORCE_INLINE int ck_ec32_wait(struct ck_ec32 *ec,
769 const struct ck_ec_mode *mode,
770 uint32_t old_value,
771 const struct timespec *deadline)
772 {
773 if (ck_ec32_value(ec) != old_value) {
774 return 0;
775 }
776
777 return ck_ec32_wait_slow(ec, mode->ops, old_value, deadline);
778 }
779
780 int ck_ec32_wait_pred_slow(struct ck_ec32 *ec,
781 const struct ck_ec_ops *ops,
782 uint32_t old_value,
783 int (*pred)(const struct ck_ec_wait_state *state,
784 struct timespec *deadline),
785 void *data,
786 const struct timespec *deadline);
787
788 CK_CC_FORCE_INLINE int
ck_ec32_wait_pred(struct ck_ec32 * ec,const struct ck_ec_mode * mode,uint32_t old_value,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),void * data,const struct timespec * deadline)789 ck_ec32_wait_pred(struct ck_ec32 *ec,
790 const struct ck_ec_mode *mode,
791 uint32_t old_value,
792 int (*pred)(const struct ck_ec_wait_state *state,
793 struct timespec *deadline),
794 void *data,
795 const struct timespec *deadline)
796 {
797 if (ck_ec32_value(ec) != old_value) {
798 return 0;
799 }
800
801 return ck_ec32_wait_pred_slow(ec, mode->ops, old_value,
802 pred, data, deadline);
803 }
804
805 #ifdef CK_F_EC64
ck_ec64_init(struct ck_ec64 * ec,uint64_t value)806 CK_CC_FORCE_INLINE void ck_ec64_init(struct ck_ec64 *ec, uint64_t value)
807 {
808 ec->counter = value << 1;
809 return;
810 }
811
ck_ec64_value(const struct ck_ec64 * ec)812 CK_CC_FORCE_INLINE uint64_t ck_ec64_value(const struct ck_ec64 *ec)
813 {
814 uint64_t ret = ck_pr_load_64(&ec->counter) >> 1;
815
816 ck_pr_fence_acquire();
817 return ret;
818 }
819
ck_ec64_has_waiters(const struct ck_ec64 * ec)820 CK_CC_FORCE_INLINE bool ck_ec64_has_waiters(const struct ck_ec64 *ec)
821 {
822 return ck_pr_load_64(&ec->counter) & 1;
823 }
824
825 void ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops);
826
ck_ec64_inc(struct ck_ec64 * ec,const struct ck_ec_mode * mode)827 CK_CC_FORCE_INLINE void ck_ec64_inc(struct ck_ec64 *ec,
828 const struct ck_ec_mode *mode)
829 {
830 /* We always xadd, so there's no special optimization here. */
831 (void)ck_ec64_add(ec, mode, 1);
832 return;
833 }
834
ck_ec_add64_epilogue(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t old)835 CK_CC_FORCE_INLINE uint64_t ck_ec_add64_epilogue(struct ck_ec64 *ec,
836 const struct ck_ec_mode *mode,
837 uint64_t old)
838 {
839 uint64_t ret = old >> 1;
840
841 if (CK_CC_UNLIKELY(old & 1)) {
842 ck_ec64_wake(ec, mode->ops);
843 }
844
845 return ret;
846 }
847
ck_ec64_add_mp(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t delta)848 static CK_CC_INLINE uint64_t ck_ec64_add_mp(struct ck_ec64 *ec,
849 const struct ck_ec_mode *mode,
850 uint64_t delta)
851 {
852 uint64_t inc = 2 * delta; /* The low bit is the flag bit. */
853
854 ck_pr_fence_store_atomic();
855 return ck_ec_add64_epilogue(ec, mode, ck_pr_faa_64(&ec->counter, inc));
856 }
857
858 #ifdef CK_F_EC_SP
859 /* Single-producer specialisation. */
ck_ec64_add_sp(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t delta)860 static CK_CC_INLINE uint64_t ck_ec64_add_sp(struct ck_ec64 *ec,
861 const struct ck_ec_mode *mode,
862 uint64_t delta)
863 {
864 uint64_t old;
865
866 /*
867 * Correctness of this racy write depends on actually
868 * having an update to write. Exit here if the update
869 * is a no-op.
870 */
871 if (CK_CC_UNLIKELY(delta == 0)) {
872 return ck_ec64_value(ec);
873 }
874
875 ck_pr_fence_store();
876 old = 2 * delta; /* The low bit is the flag bit. */
877 __asm__ volatile("xaddq %1, %0"
878 : "+m"(ec->counter), "+r"(old)
879 :: "cc", "memory");
880 return ck_ec_add64_epilogue(ec, mode, old);
881 }
882 #endif /* CK_F_EC_SP */
883
884 /*
885 * Dispatch on mode->single_producer in this FORCE_INLINE function:
886 * the end result is always small, but not all compilers have enough
887 * foresight to inline and get the reduction.
888 */
ck_ec64_add(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t delta)889 CK_CC_FORCE_INLINE uint64_t ck_ec64_add(struct ck_ec64 *ec,
890 const struct ck_ec_mode *mode,
891 uint64_t delta)
892 {
893 #ifdef CK_F_EC_SP
894 if (mode->single_producer == true) {
895 return ck_ec64_add_sp(ec, mode, delta);
896 }
897 #endif
898
899 return ck_ec64_add_mp(ec, mode, delta);
900 }
901
902 int ck_ec64_wait_slow(struct ck_ec64 *ec,
903 const struct ck_ec_ops *ops,
904 uint64_t old_value,
905 const struct timespec *deadline);
906
ck_ec64_wait(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t old_value,const struct timespec * deadline)907 CK_CC_FORCE_INLINE int ck_ec64_wait(struct ck_ec64 *ec,
908 const struct ck_ec_mode *mode,
909 uint64_t old_value,
910 const struct timespec *deadline)
911 {
912 if (ck_ec64_value(ec) != old_value) {
913 return 0;
914 }
915
916 return ck_ec64_wait_slow(ec, mode->ops, old_value, deadline);
917 }
918
919 int ck_ec64_wait_pred_slow(struct ck_ec64 *ec,
920 const struct ck_ec_ops *ops,
921 uint64_t old_value,
922 int (*pred)(const struct ck_ec_wait_state *state,
923 struct timespec *deadline),
924 void *data,
925 const struct timespec *deadline);
926
927
928 CK_CC_FORCE_INLINE int
ck_ec64_wait_pred(struct ck_ec64 * ec,const struct ck_ec_mode * mode,uint64_t old_value,int (* pred)(const struct ck_ec_wait_state * state,struct timespec * deadline),void * data,const struct timespec * deadline)929 ck_ec64_wait_pred(struct ck_ec64 *ec,
930 const struct ck_ec_mode *mode,
931 uint64_t old_value,
932 int (*pred)(const struct ck_ec_wait_state *state,
933 struct timespec *deadline),
934 void *data,
935 const struct timespec *deadline)
936 {
937 if (ck_ec64_value(ec) != old_value) {
938 return 0;
939 }
940
941 return ck_ec64_wait_pred_slow(ec, mode->ops, old_value,
942 pred, data, deadline);
943 }
944 #endif /* CK_F_EC64 */
945 #endif /* !CK_EC_H */
946