1 #ifndef __OPTIMIZE__
2 #ifdef __clang__
3 #ifdef __x86_64__
4 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3 -mcx16.
5 #else
6 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3.
7 #endif
8 #else
9 #ifdef __x86_64__
10 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3 -fwhole-program -mcx16.
11 #else
12 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3 -fwhole-program.
13 #endif
14 #endif
15 #endif
16 
17 #define value_to_string(v) ((value_t)(v))
18 #define raw_value_to_string(v) ((raw_value_t)(v))
19 
20 /* A more powerful assert that treats the assertion as an assumption when
21  * assertions are disabled.
22  */
23 #ifndef NDEBUG
24 #define ASSERT(expr) assert(expr)
25 #elif defined(__clang__)
26 #define ASSERT(expr) __builtin_assume(expr)
27 #else
28 /* GCC doesn't have __builtin_assume, so we need something else. */
29 #define ASSERT(expr)                                                           \
30   do {                                                                         \
31     /* The following is an idiom for teaching the compiler an assumption. */   \
32     if (!(expr)) {                                                             \
33       __builtin_unreachable();                                                 \
34     }                                                                          \
35   } while (0)
36 #endif
37 
38 #define BITS_TO_BYTES(size) ((size) / 8 + ((size) % 8 == 0 ? 0 : 1))
39 #define BITS_FOR(value)                                                        \
40   ((value) == 0 ? 0 : (sizeof(unsigned long long) * 8 - __builtin_clzll(value)))
41 
42 /* The size of the compressed state data in bytes. */
43 enum { STATE_SIZE_BYTES = BITS_TO_BYTES(STATE_SIZE_BITS) };
44 
45 /* the size of auxliary members of the state struct */
46 enum { BOUND_BITS = BITS_FOR(BOUND) };
47 #if COUNTEREXAMPLE_TRACE != CEX_OFF || LIVENESS_COUNT > 0
48 #if POINTER_BITS != 0
49 enum { PREVIOUS_BITS = POINTER_BITS };
50 #elif defined(__linux__) && defined(__x86_64__) && !defined(__ILP32__)
51 /* assume 5-level paging, and hence the top 2 bytes of any user pointer are
52  * always 0 and not required.
53  * https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt
54  */
55 enum { PREVIOUS_BITS = 56 };
56 #else
57 enum { PREVIOUS_BITS = sizeof(void *) * 8 };
58 #endif
59 #else
60 enum { PREVIOUS_BITS = 0 };
61 #endif
62 #if COUNTEREXAMPLE_TRACE != CEX_OFF
63 enum { RULE_TAKEN_BITS = BITS_FOR(RULE_TAKEN_LIMIT) };
64 #else
65 enum { RULE_TAKEN_BITS = 0 };
66 #endif
67 enum {
68   STATE_OTHER_BYTES =
69       BITS_TO_BYTES(BOUND_BITS + PREVIOUS_BITS + RULE_TAKEN_BITS +
70                     (USE_SCALARSET_SCHEDULES ? SCHEDULE_BITS : 0))
71 };
72 
73 /* Implement _Thread_local for GCC <4.9, which is missing this. */
74 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
75 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
76 #define _Thread_local __thread
77 #endif
78 #endif
79 
80 #ifdef __clang__
81 #define NONNULL _Nonnull
82 #else
83 #define NONNULL /* nothing; other compilers don't have _Nonnull */
84 #endif
85 
86 /* A word about atomics... There are two different atomic operation mechanisms
87  * used in this code and it may not immediately be obvious why one was not
88  * sufficient. The two are:
89  *
90  *   1. GCC __atomic built-ins: Used for variables that are sometimes accessed
91  *      with atomic semantics and sometimes as regular memory operations. The
92  *      C11 atomics cannot give us this and the __atomic built-ins are
93  *      implemented by the major compilers.
94  *   2. GCC __sync built-ins: used for 128-bit atomic accesses on x86-64. It
95  *      seems the __atomic built-ins do not result in a CMPXCHG instruction, but
96  *      rather in a less efficient library call. See
97  *      https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878.
98  *
99  * Though this is intended to be a C11 program, we avoid the C11 atomics to be
100  * compatible with GCC <4.9.
101  */
102 
103 /* Identifier of the current thread. This counts up from 0 and thus is suitable
104  * to use for, e.g., indexing into arrays. The initial thread has ID 0.
105  */
106 static _Thread_local size_t thread_id;
107 
108 /* The threads themselves. Note that we have no element for the initial thread,
109  * so *your* thread is 'threads[thread_id - 1]'.
110  */
111 static pthread_t threads[THREADS - 1];
112 
113 /* What we are currently doing. Either "warming up" (running single threaded
114  * building up queue occupancy) or "free running" (running multithreaded).
115  */
116 static enum { WARMUP, RUN } phase = WARMUP;
117 
118 /* Number of errors we've noted so far. If a thread sees this hit or exceed
119  * MAX_ERRORS, they should attempt to exit gracefully as soon as possible.
120  */
121 static unsigned long error_count;
122 
123 /* Number of rules that have been processed. There are two representations of
124  * this: a thread-local count of how many rules we have fired thus far and a
125  * global array of *final* counts of fired rules per-thread that is updated and
126  * used as threads are exiting. The purpose of this duplication is to let the
127  * compiler layout the thread-local variable in a cache-friendly way and use
128  * this during checking, rather than having all threads contending on the global
129  * array whose entries are likely all within the same cache line.
130  */
131 static _Thread_local uintmax_t rules_fired_local;
132 static uintmax_t rules_fired[THREADS];
133 
134 /* Checkpoint to restore to after reporting an error. This is only used if we
135  * are tolerating more than one error before exiting.
136  */
137 static _Thread_local sigjmp_buf checkpoint;
138 
139 _Static_assert(MAX_ERRORS > 0, "illegal MAX_ERRORS value");
140 
141 /* Whether we need to save and restore checkpoints. This is determined by
142  * whether we ever need to perform the action "discard the current state and
143  * skip to checking the next." This scenario can occur for two reasons:
144  *   1. We are running multithreaded, have just found an error and have not yet
145  *      hit MAX_ERRORS. In this case we want to longjmp back to resume checking.
146  *   2. We failed an assume statement. In this case we want to mark the current
147  *      state as invalid and resume checking with the next state.
148  * In either scenario the actual longjmp performed is the same, but by knowing
149  * statically whether either can occur we can avoid calling setjmp if both are
150  * impossible.
151  */
152 enum { JMP_BUF_NEEDED = MAX_ERRORS > 1 || ASSUME_STATEMENTS_COUNT > 0 };
153 
154 /*******************************************************************************
155  * Sandbox support.                                                            *
156  *                                                                             *
157  * Because we're running generated code, it seems wise to use OS mechanisms to *
158  * reduce our privileges, where possible.                                      *
159  ******************************************************************************/
160 
sandbox(void)161 static void sandbox(void) {
162 
163   if (!SANDBOX_ENABLED) {
164     return;
165   }
166 
167 #ifdef __APPLE__
168   {
169     char *err;
170 #pragma clang diagnostic push
171 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
172     int r = sandbox_init(kSBXProfilePureComputation, SANDBOX_NAMED, &err);
173 #pragma clang diagnostic pop
174 
175     if (__builtin_expect(r != 0, 0)) {
176       fprintf(stderr, "sandbox_init failed: %s\n", err);
177       free(err);
178       exit(EXIT_FAILURE);
179     }
180 
181     return;
182   }
183 #endif
184 
185 #ifdef __FreeBSD__
186   {
187     if (__builtin_expect(cap_enter() != 0, 0)) {
188       perror("cap_enter");
189       exit(EXIT_FAILURE);
190     }
191     return;
192   }
193 #endif
194 
195 #if defined(__linux__)
196 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
197   {
198     /* Disable the addition of new privileges via execve and friends. */
199     int r = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
200     if (__builtin_expect(r != 0, 0)) {
201       perror("prctl(PR_SET_NO_NEW_PRIVS) failed");
202       exit(EXIT_FAILURE);
203     }
204 
205     /* A BPF program that traps on any syscall we want to disallow. */
206     static struct sock_filter filter[] = {
207 
208         /* Load syscall number. */
209         BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
210 
211     /* Enable exiting. */
212 #ifdef __NR_exit_group
213         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 0, 1),
214         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
215 #endif
216 
217     /* Enable syscalls used by printf. */
218 #ifdef __NR_fstat
219         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat, 0, 1),
220         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
221 #endif
222 #ifdef __NR_fstat64
223         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat64, 0, 1),
224         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
225 #endif
226 #ifdef __NR_write
227         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_write, 0, 1),
228         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
229 #endif
230 
231     /* Enable syscalls used by malloc. */
232 #ifdef __NR_brk
233         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_brk, 0, 1),
234         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
235 #endif
236 #ifdef __NR_mmap
237         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap, 0, 1),
238         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
239 #endif
240 #ifdef __NR_mmap2
241         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap2, 0, 1),
242         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
243 #endif
244 #ifdef __NR_munmap
245         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, 0, 1),
246         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
247 #endif
248 
249     /* If we're running multithreaded, enable syscalls used by pthreads. */
250 #ifdef __NR_clone
251         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clone, 0, 1),
252         BPF_STMT(BPF_RET | BPF_K,
253                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
254 #endif
255 #ifdef __NR_close
256         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_close, 0, 1),
257         BPF_STMT(BPF_RET | BPF_K,
258                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
259 #endif
260 #ifdef __NR_exit
261         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit, 0, 1),
262         BPF_STMT(BPF_RET | BPF_K,
263                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
264 #endif
265 #ifdef __NR_futex
266         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 0, 1),
267         BPF_STMT(BPF_RET | BPF_K,
268                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
269 #endif
270 #ifdef __NR_get_robust_list
271         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_get_robust_list, 0, 1),
272         BPF_STMT(BPF_RET | BPF_K,
273                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
274 #endif
275 #ifdef __NR_madvise
276         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_madvise, 0, 1),
277         BPF_STMT(BPF_RET | BPF_K,
278                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
279 #endif
280 #ifdef __NR_mprotect
281         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mprotect, 0, 1),
282         BPF_STMT(BPF_RET | BPF_K,
283                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
284 #endif
285 #ifdef __NR_open
286         /* XXX: it would be nice to avoid open() but pthreads seems to open
287          * libgcc.
288          */
289         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 0, 1),
290         BPF_STMT(BPF_RET | BPF_K,
291                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
292 #endif
293 #ifdef __NR_read
294         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_read, 0, 1),
295         BPF_STMT(BPF_RET | BPF_K,
296                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
297 #endif
298 #ifdef __NR_set_robust_list
299         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_robust_list, 0, 1),
300         BPF_STMT(BPF_RET | BPF_K,
301                  THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
302 #endif
303 
304     /* on platforms without vDSO support, time() makes an actual syscall, so
305      * we need to allow them
306      */
307 #ifdef __NR_clock_gettime
308         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_gettime, 0, 1),
309         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
310 #endif
311 #ifdef __NR_clock_gettime64
312         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_gettime64, 0, 1),
313         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
314 #endif
315 #ifdef __NR_gettimeofday
316         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_gettimeofday, 0, 1),
317         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
318 #endif
319 #ifdef __NR_time
320         BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_time, 0, 1),
321         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
322 #endif
323 
324         /* Deny everything else. On a disallowed syscall, we trap instead of
325          * killing to allow the user to debug the failure. If you are debugging
326          * seccomp denials, strace the checker and find the number of the denied
327          * syscall in the first si_value parameter reported in the terminating
328          * SIG_SYS.
329          */
330         BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
331     };
332 
333     static const struct sock_fprog filter_program = {
334         .len = sizeof(filter) / sizeof(filter[0]),
335         .filter = filter,
336     };
337 
338     /* Apply the above filter to ourselves. */
339     r = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &filter_program, 0, 0);
340     if (__builtin_expect(r != 0, 0)) {
341       perror("prctl(PR_SET_SECCOMP) failed");
342       exit(EXIT_FAILURE);
343     }
344 
345     return;
346   }
347 #endif
348 #endif
349 
350 #ifdef __OpenBSD__
351   {
352     if (__builtin_expect(pledge("stdio", "") != 0, 0)) {
353       perror("pledge");
354       exit(EXIT_FAILURE);
355     }
356     return;
357   }
358 #endif
359 
360   /* No sandbox available. */
361   fprintf(stderr, "no sandboxing facilities available\n");
362   exit(EXIT_FAILURE);
363 }
364 
365 /******************************************************************************/
366 
367 /* ANSI colour code support */
368 
369 static bool istty;
370 
green()371 static const char *green() {
372   if (COLOR == ON || (COLOR == AUTO && istty))
373     return "\033[32m";
374   return "";
375 }
376 
red()377 static const char *red() {
378   if (COLOR == ON || (COLOR == AUTO && istty))
379     return "\033[31m";
380   return "";
381 }
382 
yellow()383 static const char *yellow() {
384   if (COLOR == ON || (COLOR == AUTO && istty))
385     return "\033[33m";
386   return "";
387 }
388 
bold()389 static const char *bold() {
390   if (COLOR == ON || (COLOR == AUTO && istty))
391     return "\033[1m";
392   return "";
393 }
394 
reset()395 static const char *reset() {
396   if (COLOR == ON || (COLOR == AUTO && istty))
397     return "\033[0m";
398   return "";
399 }
400 
401 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
402 
403 #define UINT128_MAX                                                            \
404   ((((unsigned __int128)UINT64_MAX) << 64) | ((unsigned __int128)UINT64_MAX))
405 
406 #define INT128_MAX ((((__int128)INT64_MAX) << 64) | ((__int128)UINT64_MAX))
407 #define INT128_MIN (-INT128_MAX - 1)
408 
409 struct string_buffer {
410   char data[41];
411 };
412 
value_u128_to_string(unsigned __int128 v)413 static struct string_buffer value_u128_to_string(unsigned __int128 v) {
414 
415   struct string_buffer buffer;
416 
417   if (v == 0) {
418     buffer.data[0] = '0';
419     buffer.data[1] = '\0';
420     return buffer;
421   }
422 
423   size_t i = sizeof(buffer.data);
424   while (v != 0) {
425     i--;
426     buffer.data[i] = '0' + v % 10;
427     v /= 10;
428   }
429 
430   memmove(buffer.data, &buffer.data[i], sizeof(buffer) - i);
431   buffer.data[sizeof(buffer) - i] = '\0';
432 
433   return buffer;
434 }
435 static __attribute__((unused)) struct string_buffer
value_128_to_string(__int128 v)436 value_128_to_string(__int128 v) {
437 
438   if (v == INT128_MIN) {
439     struct string_buffer buffer;
440     strcpy(buffer.data, "-170141183460469231731687303715884105728");
441     return buffer;
442   }
443 
444   bool negative = v < 0;
445   if (negative) {
446     v = -v;
447   }
448 
449   struct string_buffer buffer = value_u128_to_string(v);
450 
451   if (negative) {
452     memmove(&buffer.data[1], buffer.data, strlen(buffer.data) + 1);
453     buffer.data[0] = '-';
454   }
455 
456   return buffer;
457 }
458 #endif
459 
460 /* Is value_t a signed type? We need this as a function because value_t is a
461  * typedef that can be tweaked by the user.
462  */
value_is_signed(void)463 static __attribute__((const)) bool value_is_signed(void) {
464 #ifdef __clang__
465 #pragma clang diagnostic push
466 #pragma clang diagnostic ignored "-Wtautological-compare"
467 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
468 #elif defined(__GNUC__)
469 #pragma GCC diagnostic push
470 #pragma GCC diagnostic ignored "-Wtype-limits"
471 #endif
472   return (value_t)-1 < 0;
473 #ifdef __clang__
474 #pragma clang diagnostic pop
475 #elif defined(__GNUC__)
476 #pragma GCC diagnostic pop
477 #endif
478 }
479 
480 /*******************************************************************************
481  * MurmurHash by Austin Appleby                                                *
482  *                                                                             *
483  * More information on this at https://github.com/aappleby/smhasher/           *
484  ******************************************************************************/
485 
MurmurHash64A(const void * NONNULL key,size_t len)486 static uint64_t MurmurHash64A(const void *NONNULL key, size_t len) {
487 
488   static const uint64_t seed = 0;
489 
490   static const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
491   static const unsigned r = 47;
492 
493   uint64_t h = seed ^ (len * m);
494 
495   const unsigned char *data = key;
496   const unsigned char *end = data + len / sizeof(uint64_t) * sizeof(uint64_t);
497 
498   while (data != end) {
499 
500     uint64_t k;
501     memcpy(&k, data, sizeof(k));
502     data += sizeof(k);
503 
504     k *= m;
505     k ^= k >> r;
506     k *= m;
507 
508     h ^= k;
509     h *= m;
510   }
511 
512   const unsigned char *data2 = data;
513 
514   switch (len & 7) {
515     case 7: h ^= (uint64_t)data2[6] << 48; /* fall through */
516     case 6: h ^= (uint64_t)data2[5] << 40; /* fall through */
517     case 5: h ^= (uint64_t)data2[4] << 32; /* fall through */
518     case 4: h ^= (uint64_t)data2[3] << 24; /* fall through */
519     case 3: h ^= (uint64_t)data2[2] << 16; /* fall through */
520     case 2: h ^= (uint64_t)data2[1] << 8; /* fall through */
521     case 1: h ^= (uint64_t)data2[0];
522     h *= m;
523   }
524 
525   h ^= h >> r;
526   h *= m;
527   h ^= h >> r;
528 
529   return h;
530 }
531 
532 /******************************************************************************/
533 
534 /* Signal an out-of-memory condition and terminate abruptly. */
oom(void)535 static _Noreturn void oom(void) {
536   fputs("out of memory", stderr);
537   exit(EXIT_FAILURE);
538 }
539 
xmalloc(size_t size)540 static void *xmalloc(size_t size) {
541   void *p = malloc(size);
542   if (__builtin_expect(p == NULL, 0)) {
543     oom();
544   }
545   return p;
546 }
547 
xcalloc(size_t count,size_t size)548 static void *xcalloc(size_t count, size_t size) {
549   void *p = calloc(count, size);
550   if (__builtin_expect(p == NULL, 0)) {
551     oom();
552   }
553   return p;
554 }
555 
put(const char * NONNULL s)556 static void put(const char *NONNULL s) {
557   for (; *s != '\0'; ++s) {
558     putchar_unlocked(*s);
559   }
560 }
561 
put_int(intmax_t u)562 static void put_int(intmax_t u) {
563   char buffer[128] = {0};
564   snprintf(buffer, sizeof(buffer), "%" PRIdMAX, u);
565   put(buffer);
566 }
567 
put_uint(uintmax_t u)568 static void put_uint(uintmax_t u) {
569   char buffer[128] = {0};
570   snprintf(buffer, sizeof(buffer), "%" PRIuMAX, u);
571   put(buffer);
572 }
573 
put_val(value_t v)574 static __attribute__((unused)) void put_val(value_t v) {
575   if (value_is_signed()) {
576     put_int((intmax_t)v);
577   } else {
578     put_uint((uintmax_t)v);
579   }
580 }
581 
xml_printf(const char * NONNULL s)582 static void xml_printf(const char *NONNULL s) {
583   while (*s != '\0') {
584     switch (*s) {
585       case '"': put("&quot;"); break;
586       case '<': put("&lt;");   break;
587       case '>': put("&gt;");   break;
588       case '&': put("&amp;");  break;
589       default:  putchar_unlocked(*s); break;
590     }
591     s++;
592   }
593 }
594 
595 /* Support for tracing specific operations. This can be enabled during checker
596  * generation with '--trace ...' and is useful for debugging Rumur itself.
597  */
trace(const char * NONNULL fmt,...)598 static __attribute__((format(printf, 1, 2))) void trace(const char *NONNULL fmt,
599                                                         ...) {
600 
601   va_list ap;
602   va_start(ap, fmt);
603 
604   flockfile(stdout);
605 
606   (void)fprintf(stderr, "%sTRACE%s:", yellow(), reset());
607   (void)vfprintf(stderr, fmt, ap);
608   (void)fprintf(stderr, "\n");
609 
610   funlockfile(stdout);
611   va_end(ap);
612 }
613 
614 /* Wrap up trace() as a macro. It looks as if the following could just be
615  * incorporated into trace(). However, present compilers seem unwilling to
616  * inline varargs functions or do interprocedural analysis across a call to one.
617  * As a result, the compiler does not notice when tracing is disabled and a call
618  * to trace() would be a no-op that can be elided. By making the call a macro we
619  * make the category comparison visible to the compiler's optimising passes.
620  */
621 #define TRACE(category, args...)                                               \
622   do {                                                                         \
623     if ((category) & TRACES_ENABLED) {                                         \
624       trace(args);                                                             \
625     }                                                                          \
626   } while (0)
627 
628 /*******************************************************************************
629  * Arithmetic wrappers                                                         *
630  *                                                                             *
631  * For compilers that support them, we call the overflow built-ins to check    *
632  * undefined operations during arithmetic. For others, we just emit the bare   *
633  * operation.                                                                  *
634  ******************************************************************************/
635 
636 #if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 5)
637 
638 #define ADD(a, b, c) __builtin_add_overflow((a), (b), (c))
639 #define MUL(a, b, c) __builtin_mul_overflow((a), (b), (c))
640 #define SUB(a, b, c) __builtin_sub_overflow((a), (b), (c))
641 
642 #else
643 
644   #define ADD(a, b, c) ({ *(c) = (a) + (b); false; })
645   #define MUL(a, b, c) ({ *(c) = (a) * (b); false; })
646   #define SUB(a, b, c) ({ *(c) = (a) - (b); false; })
647 
648 #endif
649 
650 /******************************************************************************/
651 
652 /* The state of the current model. */
653 struct state {
654 
655 #if LIVENESS_COUNT > 0
656   uintptr_t liveness[LIVENESS_COUNT / sizeof(uintptr_t) / CHAR_BIT +
657                      (LIVENESS_COUNT % sizeof(uintptr_t) == 0 &&
658                               LIVENESS_COUNT / sizeof(uintptr_t) % CHAR_BIT == 0
659                           ? 0
660                           : 1)];
661 #endif
662 
663   uint8_t data[STATE_SIZE_BYTES];
664 
665 #if PACK_STATE
666   /* the following effective fields are packed into here:
667    *
668    *  * uint64_t bound;
669    *  * const struct state *previous;
670    *  * uint64_t rule_taken;
671    *  * uint8_t schedules[BITS_TO_BYTES(SCHEDULE_BITS)];
672    *
673    * They are bit-packed, so may take up less space. E.g. if the maximum value
674    * of `bound` is known to be 5, it will be stored in 3 bits instead of 64.
675    */
676   uint8_t other[STATE_OTHER_BYTES];
677 #else
678   uint64_t bound;
679   const struct state *previous;
680   uint64_t rule_taken;
681   uint8_t schedules[USE_SCALARSET_SCHEDULES ? BITS_TO_BYTES(SCHEDULE_BITS) : 0];
682 #endif
683 };
684 
685 struct handle {
686   uint8_t *base;
687   size_t offset;
688   size_t width;
689 };
690 
handle_align(struct handle h)691 static struct handle handle_align(struct handle h) {
692 
693   ASSERT(h.offset < CHAR_BIT && "handle has an offset outside the base byte");
694 
695   size_t width = h.width + h.offset;
696   if (width % 8 != 0) {
697     width += 8 - width % 8;
698   }
699 
700   return (struct handle){
701       .base = h.base,
702       .offset = 0,
703       .width = width,
704   };
705 }
706 
is_big_endian(void)707 static bool is_big_endian(void) {
708 
709   union {
710     uint32_t x;
711     uint8_t y[sizeof(uint32_t)];
712   } z;
713 
714   z.y[0] = 1;
715   z.y[1] = 2;
716   z.y[2] = 3;
717   z.y[3] = 4;
718 
719   return z.x == 0x01020304;
720 }
721 
copy_out64(const uint8_t * p,size_t extent)722 static uint64_t copy_out64(const uint8_t *p, size_t extent) {
723 
724   ASSERT(extent <= sizeof(uint64_t));
725 
726   uint64_t x = 0;
727   memcpy(&x, p, extent);
728 
729   if (is_big_endian()) {
730     x = __builtin_bswap64(x);
731   }
732 
733   return x;
734 }
735 
736 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
byte_swap128(unsigned __int128 x)737 static unsigned __int128 byte_swap128(unsigned __int128 x) {
738 
739   union {
740     unsigned __int128 a;
741     uint64_t b[2];
742   } in, out;
743 
744   in.a = x;
745   out.b[0] = __builtin_bswap64(in.b[1]);
746   out.b[1] = __builtin_bswap64(in.b[0]);
747 
748   return out.a;
749 }
750 
copy_out128(const uint8_t * p,size_t extent)751 static unsigned __int128 copy_out128(const uint8_t *p, size_t extent) {
752 
753   ASSERT(extent <= sizeof(unsigned __int128));
754 
755   unsigned __int128 x = 0;
756   memcpy(&x, p, extent);
757 
758   if (is_big_endian()) {
759     x = byte_swap128(x);
760   }
761 
762   return x;
763 }
764 #endif
765 
766 /* If you are in the Rumur repository modifying the following function, remember
767  * to also update ../../misc/read-raw.smt2.
768  */
read_raw(struct handle h)769 static __attribute__((pure)) uint64_t read_raw(struct handle h) {
770 
771   /* a uint64_t is the maximum value we support reading */
772   ASSERT(h.width <= 64 && "read of too wide value");
773 
774   if (h.width == 0) {
775     return 0;
776   }
777 
778   /* Generate a handle that is offset- and width-aligned on byte boundaries.
779    * Essentially, we widen the handle to align it. The motivation for this is
780    * that we can only do byte-granularity reads, so we need to "over-read" if we
781    * have an unaligned handle.
782    */
783   struct handle aligned = handle_align(h);
784   ASSERT(aligned.offset == 0);
785 
786   /* The code below attempts to provide four alternatives for reading out the
787    * bits corresponding to a value of simple type referenced by a handle, and to
788    * give the compiler enough hints to steer it towards picking one of these and
789    * removing the other three as dead code:
790    *
791    *   1. Read into a single 64-bit variable. Enabled when the maximum width for
792    *      an unaligned handle spans 0 - 8 bytes.
793    *   2. Read into two 64-bit variables and then combine the result using
794    *      shifts and ORs. Enabled when the maximum width for an unaligned handle
795    *      spans 9 - 16 bytes and the compiler does not provide the `__int128`
796    *      type.
797    *   3. Read into a single 128-bit variable. Enabled when the compiler does
798    *      provide the `__int128` type and the maximum width for an unaligned
799    *      handle spans 9 - 16 bytes.
800    *   4. Read into two 128-bit chunks, and then combine the result using shifts
801    *      and ORs. Enabled when the compiler provides the `__int128` type and
802    *      the maximum width for an unaligned handle spans 17 - 32 bytes.
803    */
804 
805 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
806 
807   /* If a byte-unaligned value_t cannot be fully read into a single uint64_t
808    * using byte-aligned reads...
809    */
810   if (aligned.width > (sizeof(uint64_t) - 1) * 8) {
811 
812     /* Read the low double-word of this (possibly quad-word-sized) value. */
813     unsigned __int128 low = 0;
814     size_t low_size = aligned.width / 8;
815     /* optimisation hint: */
816     ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
817     if (low_size > sizeof(low)) {
818       low_size = sizeof(low);
819     }
820     {
821       const uint8_t *src = aligned.base;
822       low = copy_out128(src, low_size);
823     }
824 
825     low >>= h.offset;
826 
827     size_t high_size = aligned.width / 8 - low_size;
828 
829     /* If the value could not be read into a single double-word... */
830     ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
831     if (high_size != 0) {
832       unsigned __int128 high = 0;
833       const uint8_t *src = aligned.base + sizeof(low);
834       high = copy_out128(src, high_size);
835 
836       high <<= sizeof(low) * 8 - h.offset;
837 
838       /* Combine the two halves into a single double-word. */
839       low |= high;
840     }
841 
842     if (h.width < sizeof(low) * 8) {
843       unsigned __int128 mask = (((unsigned __int128)1) << h.width) - 1;
844       low &= mask;
845     }
846 
847     ASSERT(low <= UINT64_MAX && "read of value larger than a uint64_t");
848 
849     return (uint64_t)low;
850   }
851 #endif
852 
853   /* Read the low word of this (possibly two-word-sized) value. */
854   uint64_t low = 0;
855   size_t low_size = aligned.width / 8;
856   /* optimisation hint: */
857   ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
858   if (low_size > sizeof(low)) {
859     low_size = sizeof(low);
860   }
861   {
862     const uint8_t *src = aligned.base;
863     low = copy_out64(src, low_size);
864   }
865 
866   low >>= h.offset;
867 
868   size_t high_size = aligned.width / 8 - low_size;
869 
870   /* If the value could not be read into a single word... */
871   ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
872   if (high_size != 0) {
873     const uint8_t *src = aligned.base + sizeof(low);
874     uint64_t high = copy_out64(src, high_size);
875 
876     high <<= sizeof(low) * 8 - h.offset;
877 
878     /* Combine the high and low words. Note that we know we can store the final
879      * result in a single word because the width is guaranteed to be <= 64.
880      */
881     low |= high;
882   }
883 
884   if (h.width < sizeof(low) * 8) {
885     uint64_t mask = (UINT64_C(1) << h.width) - 1;
886     low &= mask;
887   }
888 
889   return low;
890 }
891 
copy_in64(uint8_t * p,uint64_t v,size_t extent)892 static void copy_in64(uint8_t *p, uint64_t v, size_t extent) {
893   ASSERT(extent <= sizeof(v));
894 
895   if (is_big_endian()) {
896     v = __builtin_bswap64(v);
897   }
898 
899   memcpy(p, &v, extent);
900 }
901 
902 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
copy_in128(uint8_t * p,unsigned __int128 v,size_t extent)903 static void copy_in128(uint8_t *p, unsigned __int128 v, size_t extent) {
904   ASSERT(extent <= sizeof(v));
905 
906   if (is_big_endian()) {
907     v = byte_swap128(v);
908   }
909 
910   memcpy(p, &v, extent);
911 }
912 #endif
913 
914 /* If you are in the Rumur repository modifying the following function, remember
915  * to also update ../../misc/write-raw.smt2.
916  */
write_raw(struct handle h,uint64_t v)917 static void write_raw(struct handle h, uint64_t v) {
918 
919   if (h.width == 0) {
920     return;
921   }
922 
923   /* sanitise input value */
924   if (h.width < sizeof(v) * 8) {
925     v &= (UINT64_C(1) << h.width) - 1;
926   }
927 
928   /* Generate a offset- and width-aligned handle on byte boundaries. */
929   struct handle aligned = handle_align(h);
930   ASSERT(aligned.offset == 0);
931 
932 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
933 
934   /* If a byte-unaligned value_t cannot be fully written within a single
935    * byte-aligned uint64_t...
936    */
937   if (aligned.width > (sizeof(uint64_t) - 1) * 8) {
938 
939     /* Read the low double-word of this region. */
940     unsigned __int128 low = 0;
941     size_t low_size = aligned.width / 8;
942     ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
943     if (low_size > sizeof(low)) {
944       low_size = sizeof(low);
945     }
946     {
947       const uint8_t *src = aligned.base;
948       low = copy_out128(src, low_size);
949     }
950 
951     {
952       unsigned __int128 or_mask = ((unsigned __int128)v) << h.offset;
953       if (low_size < sizeof(low)) {
954         or_mask &= (((unsigned __int128)1) << (low_size * 8)) - 1;
955       }
956       unsigned __int128 and_mask = (((unsigned __int128)1) << h.offset) - 1;
957       if (h.width + h.offset < sizeof(low) * 8) {
958         size_t high_bits = aligned.width - h.offset - h.width;
959         and_mask |= ((((unsigned __int128)1) << high_bits) - 1)
960                     << (low_size * 8 - high_bits);
961       }
962 
963       low = (low & and_mask) | or_mask;
964     }
965 
966     {
967       uint8_t *dest = aligned.base;
968       copy_in128(dest, low, low_size);
969     }
970 
971     /* Now do the second double-word if necessary. */
972 
973     size_t high_size = aligned.width / 8 - low_size;
974 
975     ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
976     if (high_size != 0) {
977       unsigned __int128 high = 0;
978       {
979         const uint8_t *src = aligned.base + sizeof(low);
980         high = copy_out128(src, high_size);
981       }
982 
983       {
984         unsigned __int128 or_mask =
985             ((unsigned __int128)v) >> (sizeof(low) * 8 - h.offset);
986         unsigned __int128 and_mask = ~(
987             (((unsigned __int128)1) << (h.width + h.offset - sizeof(low) * 8)) -
988             1);
989 
990         high = (high & and_mask) | or_mask;
991       }
992 
993       {
994         uint8_t *dest = aligned.base + sizeof(low);
995         copy_in128(dest, high, high_size);
996       }
997     }
998 
999     return;
1000   }
1001 #endif
1002 
1003   /* Replicate the above logic for uint64_t. */
1004 
1005   uint64_t low = 0;
1006   size_t low_size = aligned.width / 8;
1007   ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
1008   if (low_size > sizeof(low)) {
1009     low_size = sizeof(low);
1010   }
1011   {
1012     const uint8_t *src = aligned.base;
1013     low = copy_out64(src, low_size);
1014   }
1015 
1016   {
1017     uint64_t or_mask = ((uint64_t)v) << h.offset;
1018     if (low_size < sizeof(low)) {
1019       or_mask &= (UINT64_C(1) << (low_size * 8)) - 1;
1020     }
1021     uint64_t and_mask = (UINT64_C(1) << h.offset) - 1;
1022     if (h.width + h.offset < sizeof(low) * 8) {
1023       size_t high_bits = aligned.width - h.offset - h.width;
1024       and_mask |= ((UINT64_C(1) << high_bits) - 1)
1025                   << (low_size * 8 - high_bits);
1026     }
1027 
1028     low = (low & and_mask) | or_mask;
1029   }
1030 
1031   {
1032     uint8_t *dest = aligned.base;
1033     copy_in64(dest, low, low_size);
1034   }
1035 
1036   size_t high_size = aligned.width / 8 - low_size;
1037 
1038   ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
1039   if (high_size != 0) {
1040     uint64_t high = 0;
1041     {
1042       const uint8_t *src = aligned.base + sizeof(low);
1043       high = copy_out64(src, high_size);
1044     }
1045 
1046     {
1047       uint64_t or_mask = ((uint64_t)v) >> (sizeof(low) * 8 - h.offset);
1048       uint64_t and_mask =
1049           ~((UINT64_C(1) << (h.width + h.offset - sizeof(low) * 8)) - 1);
1050 
1051       high = (high & and_mask) | or_mask;
1052     }
1053 
1054     {
1055       uint8_t *dest = aligned.base + sizeof(low);
1056       copy_in64(dest, high, high_size);
1057     }
1058   }
1059 }
1060 
1061 #if BOUND > 0
1062 #if PACK_STATE
state_bound_handle(const struct state * NONNULL s)1063 static struct handle state_bound_handle(const struct state *NONNULL s) {
1064 
1065   struct handle h = (struct handle){
1066       .base = (uint8_t *)s->other,
1067       .offset = 0,
1068       .width = BITS_FOR(BOUND),
1069   };
1070 
1071   return h;
1072 }
1073 #endif
1074 
1075 _Static_assert((uintmax_t)BOUND <= UINT64_MAX,
1076                "bound limit does not fit in a uint64_t");
1077 
1078 static __attribute__((pure)) uint64_t
state_bound_get(const struct state * NONNULL s)1079 state_bound_get(const struct state *NONNULL s) {
1080   assert(s != NULL);
1081 
1082 #if PACK_STATE
1083   struct handle h = state_bound_handle(s);
1084   return read_raw(h);
1085 #else
1086   return s->bound;
1087 #endif
1088 }
1089 
state_bound_set(struct state * NONNULL s,uint64_t bound)1090 static void state_bound_set(struct state *NONNULL s, uint64_t bound) {
1091   assert(s != NULL);
1092 
1093 #if PACK_STATE
1094   struct handle h = state_bound_handle(s);
1095   write_raw(h, bound);
1096 #else
1097   s->bound = bound;
1098 #endif
1099 }
1100 #endif
1101 
1102 #if COUNTEREXAMPLE_TRACE != CEX_OFF || LIVENESS_COUNT > 0
1103 #if PACK_STATE
state_previous_handle(const struct state * NONNULL s)1104 static struct handle state_previous_handle(const struct state *NONNULL s) {
1105 
1106   size_t offset = BOUND_BITS;
1107 
1108   struct handle h = (struct handle){
1109       .base = (uint8_t *)s->other + offset / 8,
1110       .offset = offset % 8,
1111       .width = PREVIOUS_BITS,
1112   };
1113 
1114   return h;
1115 }
1116 #endif
1117 
1118 static __attribute__((pure)) const struct state *
state_previous_get(const struct state * NONNULL s)1119 state_previous_get(const struct state *NONNULL s) {
1120 #if PACK_STATE
1121   struct handle h = state_previous_handle(s);
1122   return (const struct state *)(uintptr_t)read_raw(h);
1123 #else
1124   return s->previous;
1125 #endif
1126 }
1127 
state_previous_set(struct state * NONNULL s,const struct state * previous)1128 static void state_previous_set(struct state *NONNULL s,
1129                                const struct state *previous) {
1130 #if PACK_STATE
1131   ASSERT(
1132       (PREVIOUS_BITS == sizeof(void *) * 8 ||
1133        ((uintptr_t)previous >> PREVIOUS_BITS) == 0) &&
1134       "upper bits of pointer are non-zero (incorrect --pointer-bits setting?)");
1135   struct handle h = state_previous_handle(s);
1136   write_raw(h, (uint64_t)(uintptr_t)previous);
1137 #else
1138   s->previous = previous;
1139 #endif
1140 }
1141 #endif
1142 
1143 #if COUNTEREXAMPLE_TRACE != CEX_OFF
1144 #if PACK_STATE
state_rule_taken_handle(const struct state * NONNULL s)1145 static struct handle state_rule_taken_handle(const struct state *NONNULL s) {
1146 
1147   size_t offset = BOUND_BITS + PREVIOUS_BITS;
1148 
1149   struct handle h = (struct handle){
1150       .base = (uint8_t *)s->other + offset / 8,
1151       .offset = offset % 8,
1152       .width = RULE_TAKEN_BITS,
1153   };
1154 
1155   return h;
1156 }
1157 #endif
1158 
1159 static __attribute__((pure)) uint64_t
state_rule_taken_get(const struct state * NONNULL s)1160 state_rule_taken_get(const struct state *NONNULL s) {
1161   assert(s != NULL);
1162 #if PACK_STATE
1163   struct handle h = state_rule_taken_handle(s);
1164   return read_raw(h);
1165 #else
1166   return s->rule_taken;
1167 #endif
1168 }
1169 
state_rule_taken_set(struct state * NONNULL s,uint64_t rule_taken)1170 static void state_rule_taken_set(struct state *NONNULL s, uint64_t rule_taken) {
1171   assert(s != NULL);
1172 #if PACK_STATE
1173   struct handle h = state_rule_taken_handle(s);
1174   write_raw(h, rule_taken);
1175 #else
1176   s->rule_taken = rule_taken;
1177 #endif
1178 }
1179 #endif
1180 
state_schedule_handle(const struct state * NONNULL s,size_t offset,size_t width)1181 static struct handle state_schedule_handle(const struct state *NONNULL s,
1182                                            size_t offset, size_t width) {
1183 
1184   /* the maximum schedule width handle ever derived should be SCHEDULE_BITS, and
1185    * should only ever occur when writing the entire schedule
1186    */
1187 #if !defined(__clang__) && defined(__GNUC__)
1188 #pragma GCC diagnostic push
1189 #pragma GCC diagnostic ignored "-Wtype-limits"
1190 #endif
1191   ASSERT(width <= SCHEDULE_BITS && (width < SCHEDULE_BITS || offset == 0) &&
1192          "out-of-bounds handle derived to access schedule data");
1193 #if !defined(__clang__) && defined(__GNUC__)
1194 #pragma GCC diagnostic pop
1195 #endif
1196 
1197   uint8_t *b;
1198   size_t o;
1199 
1200 #if PACK_STATE
1201   b = (uint8_t *)s->other;
1202   o = BOUND_BITS + PREVIOUS_BITS + RULE_TAKEN_BITS + offset;
1203 #else
1204   b = (uint8_t *)s->schedules;
1205   o = offset;
1206 #endif
1207 
1208   struct handle h = (struct handle){
1209       .base = (uint8_t *)b + o / 8,
1210       .offset = o % 8,
1211       .width = width,
1212   };
1213 
1214   return h;
1215 }
1216 
1217 static __attribute__((pure, unused)) size_t
state_schedule_get(const struct state * NONNULL s,size_t offset,size_t width)1218 state_schedule_get(const struct state *NONNULL s, size_t offset, size_t width) {
1219   assert(s != NULL);
1220 
1221   struct handle h = state_schedule_handle(s, offset, width);
1222   return (size_t)read_raw(h);
1223 }
1224 
state_schedule_set(struct state * NONNULL s,size_t offset,size_t width,size_t v)1225 static __attribute__((unused)) void state_schedule_set(struct state *NONNULL s,
1226                                                        size_t offset,
1227                                                        size_t width, size_t v) {
1228   assert(s != NULL);
1229 
1230   /* we should never attempt to write an invalid schedule index that will be
1231    * truncated
1232    */
1233   if (width < 64) {
1234     ASSERT((((UINT64_C(1) << width) - 1) & (uint64_t)v) == (uint64_t)v &&
1235            "truncation in writing schedule data to state");
1236   }
1237 
1238   struct handle h = state_schedule_handle(s, offset, width);
1239   write_raw(h, (uint64_t)v);
1240 }
1241 
1242 /*******************************************************************************
1243  * State allocator.                                                            *
1244  *                                                                             *
1245  * The following implements a simple bump allocator for states. The purpose of *
1246  * this (rather than simply mallocing individual states) is to speed up        *
1247  * allocation by taking global locks less frequently and decrease allocator    *
1248  * metadata overhead.                                                          *
1249  ******************************************************************************/
1250 
1251 /* An initial size of thread-local allocator pools ~8MB. */
1252 static _Thread_local size_t arena_count =
1253     (sizeof(struct state) > 8 * 1024 * 1024)
1254         ? 1
1255         : (8 * 1024 * 1024 / sizeof(struct state));
1256 
1257 static _Thread_local struct state *arena_base;
1258 static _Thread_local struct state *arena_limit;
1259 
state_new(void)1260 static struct state *state_new(void) {
1261 
1262   if (arena_base == arena_limit) {
1263     /* Allocation pool is empty. We need to set up a new pool. */
1264     for (;;) {
1265       if (arena_count == 1) {
1266         arena_base = xmalloc(sizeof(*arena_base));
1267       } else {
1268         arena_base = calloc(arena_count, sizeof(*arena_base));
1269         if (__builtin_expect(arena_base == NULL, 0)) {
1270           /* Memory pressure high. Decrease our attempted allocation and try
1271            * again.
1272            */
1273           arena_count /= 2;
1274           continue;
1275         }
1276       }
1277 
1278       arena_limit = arena_base + arena_count;
1279       break;
1280     }
1281   }
1282 
1283   assert(arena_base != NULL);
1284   assert(arena_base != arena_limit);
1285 
1286   struct state *s = arena_base;
1287   arena_base++;
1288   return s;
1289 }
1290 
state_free(struct state * s)1291 static void state_free(struct state *s) {
1292 
1293   if (s == NULL) {
1294     return;
1295   }
1296 
1297   assert(s + 1 == arena_base);
1298   arena_base--;
1299 }
1300 
1301 /*******************************************************************************
1302  * statistics for memory usage                                                 *
1303  *                                                                             *
1304  * This functionality is only used when `--trace memory_usage` is given on the *
1305  * command line.                                                               *
1306  ******************************************************************************/
1307 
1308 /* number of allocated state structs per depth of expansion */
1309 static size_t allocated[BOUND == 0 ? 1 : (BOUND + 1)];
1310 
1311 /* note a new allocation of a state struct at the given depth */
register_allocation(size_t depth)1312 static void register_allocation(size_t depth) {
1313 
1314   /* if we are not tracing memory usage, make this a no-op */
1315   if (!(TC_MEMORY_USAGE & TRACES_ENABLED)) {
1316     return;
1317   }
1318 
1319   ASSERT(depth < sizeof(allocated) / sizeof(allocated[0]) &&
1320          "out of range access to allocated array");
1321 
1322   /* increment the number of known allocated states, avoiding an expensive
1323    * atomic if we are single-threaded
1324    */
1325   if (THREADS == 1) {
1326     allocated[depth]++;
1327   } else {
1328     (void)__sync_add_and_fetch(&allocated[depth], 1);
1329   }
1330 }
1331 
1332 /* print a summary of the allocation results we have accrued */
print_allocation_summary(void)1333 static void print_allocation_summary(void) {
1334 
1335   /* it is assumed we are running single-threaded here and do not need atomic
1336    * accesses
1337    */
1338 
1339   if (BOUND == 0) {
1340     TRACE(TC_MEMORY_USAGE,
1341           "allocated %zu state structure(s), totaling %zu bytes",
1342           allocated[0], allocated[0] * sizeof(struct state));
1343   } else {
1344     for (size_t i = 0; i < sizeof(allocated) / sizeof(allocated[0]); i++) {
1345 
1346       if (allocated[i] == 0) {
1347         /* no state at this depth was reached, therefore no states at deeper
1348          * depths were reached either and we are done
1349          */
1350         for (size_t j = i + 1; j < sizeof(allocated) / sizeof(allocated[0]);
1351              j++) {
1352           assert(allocated[j] == 0 &&
1353                  "state allocated at a deeper depth than an empty level");
1354         }
1355         break;
1356       }
1357 
1358       TRACE(TC_MEMORY_USAGE,
1359             "depth %zu: allocated %zu state structure(s), totaling %zu bytes",
1360             i, allocated[i], allocated[i] * sizeof(struct state));
1361     }
1362   }
1363 }
1364 
1365 /******************************************************************************/
1366 
1367 /* Print a counterexample trace terminating at the given state. This function
1368  * assumes that the caller already holds a lock on stdout.
1369  */
1370 static void print_counterexample(const struct state *NONNULL s
1371                                  __attribute__((unused)));
1372 
1373 /* "Exit" the current thread. This takes into account which thread we are. I.e.
1374  * the correct way to exit the checker is for every thread to eventually call
1375  * this function.
1376  */
1377 static _Noreturn int exit_with(int status);
1378 
1379 static __attribute__((format(printf, 2, 3))) _Noreturn void
error(const struct state * NONNULL s,const char * NONNULL fmt,...)1380 error(const struct state *NONNULL s, const char *NONNULL fmt, ...) {
1381 
1382   unsigned long prior_errors =
1383       __atomic_fetch_add(&error_count, 1, __ATOMIC_SEQ_CST);
1384 
1385   if (__builtin_expect(prior_errors < MAX_ERRORS, 1)) {
1386 
1387     flockfile(stdout);
1388 
1389     va_list ap;
1390     va_start(ap, fmt);
1391 
1392     if (MACHINE_READABLE_OUTPUT) {
1393       put("<error includes_trace=\"");
1394       put((s == NULL || COUNTEREXAMPLE_TRACE == CEX_OFF) ? "false" : "true");
1395       put("\">\n");
1396 
1397       put("<message>");
1398       {
1399         va_list ap2;
1400         va_copy(ap2, ap);
1401 
1402         int size = vsnprintf(NULL, 0, fmt, ap2);
1403         va_end(ap2);
1404         if (__builtin_expect(size < 0, 0)) {
1405           fputs("vsnprintf failed", stderr);
1406           exit(EXIT_FAILURE);
1407         }
1408 
1409         char *buffer = xmalloc(size + 1);
1410         if (__builtin_expect(vsnprintf(buffer, size + 1, fmt, ap) != size, 0)) {
1411           fputs("vsnprintf failed", stderr);
1412           exit(EXIT_FAILURE);
1413         }
1414 
1415         xml_printf(buffer);
1416         free(buffer);
1417       }
1418       put("</message>\n");
1419 
1420       if (s != NULL && COUNTEREXAMPLE_TRACE != CEX_OFF) {
1421         print_counterexample(s);
1422       }
1423 
1424       put("</error>\n");
1425 
1426     } else {
1427       if (s != NULL) {
1428         put("The following is the error trace for the error:\n\n");
1429       } else {
1430         put("Result:\n\n");
1431       }
1432 
1433       put("\t");
1434       put(red());
1435       put(bold());
1436       {
1437         va_list ap2;
1438         va_copy(ap2, ap);
1439 
1440         int size = vsnprintf(NULL, 0, fmt, ap2);
1441         va_end(ap2);
1442         if (__builtin_expect(size < 0, 0)) {
1443           fputs("vsnprintf failed", stderr);
1444           exit(EXIT_FAILURE);
1445         }
1446 
1447         char *buffer = xmalloc(size + 1);
1448         if (__builtin_expect(vsnprintf(buffer, size + 1, fmt, ap) != size, 0)) {
1449           fputs("vsnprintf failed", stderr);
1450           exit(EXIT_FAILURE);
1451         }
1452 
1453         put(buffer);
1454         free(buffer);
1455       }
1456       put(reset());
1457       put("\n\n");
1458 
1459       if (s != NULL && COUNTEREXAMPLE_TRACE != CEX_OFF) {
1460         print_counterexample(s);
1461         put("End of the error trace.\n\n");
1462       }
1463     }
1464 
1465     va_end(ap);
1466 
1467     funlockfile(stdout);
1468   }
1469 
1470 #ifdef __clang__
1471 #pragma clang diagnostic push
1472 #pragma clang diagnostic ignored "-Wtautological-compare"
1473 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
1474 #elif defined(__GNUC__)
1475 #pragma GCC diagnostic push
1476 #pragma GCC diagnostic ignored "-Wtype-limits"
1477 #endif
1478   if (prior_errors < MAX_ERRORS - 1) {
1479 #ifdef __clang__
1480 #pragma clang diagnostic pop
1481 #elif defined(__GNUC__)
1482 #pragma GCC diagnostic pop
1483 #endif
1484     assert(JMP_BUF_NEEDED && "longjmping without a setup jmp_buf");
1485     siglongjmp(checkpoint, 1);
1486   }
1487 
1488   exit_with(EXIT_FAILURE);
1489 }
1490 
deadlock(const struct state * NONNULL s)1491 static void deadlock(const struct state *NONNULL s) {
1492   if (JMP_BUF_NEEDED) {
1493     if (sigsetjmp(checkpoint, 0)) {
1494       /* error() longjmped back to us. */
1495       return;
1496     }
1497   }
1498   error(s, "deadlock");
1499 }
1500 
state_cmp(const struct state * NONNULL a,const struct state * NONNULL b)1501 static int state_cmp(const struct state *NONNULL a,
1502                      const struct state *NONNULL b) {
1503   return memcmp(a->data, b->data, sizeof(a->data));
1504 }
1505 
state_eq(const struct state * NONNULL a,const struct state * NONNULL b)1506 static bool state_eq(const struct state *NONNULL a,
1507                      const struct state *NONNULL b) {
1508   return state_cmp(a, b) == 0;
1509 }
1510 
1511 static void handle_copy(struct handle a, struct handle b);
1512 
state_dup(const struct state * NONNULL s)1513 static struct state *state_dup(const struct state *NONNULL s) {
1514   struct state *n = state_new();
1515   memcpy(n->data, s->data, sizeof(n->data));
1516 #if COUNTEREXAMPLE_TRACE != CEX_OFF || LIVENESS_COUNT > 0
1517   state_previous_set(n, s);
1518 #endif
1519 #if BOUND > 0
1520   assert(state_bound_get(s) < BOUND && "exceeding bounded exploration depth");
1521   state_bound_set(n, state_bound_get(s) + 1);
1522 #endif
1523 #if LIVENESS_COUNT > 0
1524   memset(n->liveness, 0, sizeof(n->liveness));
1525 #endif
1526 
1527   if (USE_SCALARSET_SCHEDULES) {
1528     /* copy schedule data related to past scalarset permutations */
1529     struct handle sch_src = state_schedule_handle(s, 0, SCHEDULE_BITS);
1530     struct handle sch_dst = state_schedule_handle(n, 0, SCHEDULE_BITS);
1531     handle_copy(sch_dst, sch_src);
1532   }
1533 
1534   return n;
1535 }
1536 
state_hash(const struct state * NONNULL s)1537 static size_t state_hash(const struct state *NONNULL s) {
1538   return (size_t)MurmurHash64A(s->data, sizeof(s->data));
1539 }
1540 
1541 #if COUNTEREXAMPLE_TRACE != CEX_OFF
1542 static __attribute__((unused)) size_t
state_depth(const struct state * NONNULL s)1543 state_depth(const struct state *NONNULL s) {
1544 #if BOUND > 0
1545   uint64_t bound = state_bound_get(s);
1546   ASSERT(bound <= BOUND && "claimed state bound exceeds limit");
1547   return (size_t)bound + 1;
1548 #else
1549   size_t d = 0;
1550   while (s != NULL) {
1551     d++;
1552     s = state_previous_get(s);
1553   }
1554   return d;
1555 #endif
1556 }
1557 #endif
1558 
1559 /* A type-safe const cast. */
1560 static __attribute__((unused)) struct state *
state_drop_const(const struct state * s)1561 state_drop_const(const struct state *s) {
1562   return (struct state *)s;
1563 }
1564 
1565 /* These functions are generated. */
1566 static void state_canonicalise_heuristic(struct state *NONNULL s);
1567 static void state_canonicalise_exhaustive(struct state *NONNULL s);
1568 
state_canonicalise(struct state * NONNULL s)1569 static void state_canonicalise(struct state *NONNULL s) {
1570 
1571   assert(s != NULL && "attempt to canonicalise NULL state");
1572 
1573   switch (SYMMETRY_REDUCTION) {
1574 
1575   case SYMMETRY_REDUCTION_OFF:
1576     break;
1577 
1578   case SYMMETRY_REDUCTION_HEURISTIC:
1579     state_canonicalise_heuristic(s);
1580     break;
1581 
1582   case SYMMETRY_REDUCTION_EXHAUSTIVE:
1583     state_canonicalise_exhaustive(s);
1584     break;
1585   }
1586 }
1587 
1588 /* This function is generated. */
1589 static __attribute__((unused)) void state_print_field_offsets(void);
1590 
1591 /* Print a state to stderr. This function is generated. This function assumes
1592  * that the caller already holds a lock on stdout.
1593  */
1594 static __attribute__((unused)) void state_print(const struct state *previous,
1595                                                 const struct state *NONNULL s);
1596 
1597 /* Print the first rule that resulted in s. This function is generated. This
1598  * function assumes that the caller holds a lock on stdout.
1599  */
1600 static __attribute__((unused)) void
1601 print_transition(const struct state *NONNULL s);
1602 
print_counterexample(const struct state * NONNULL s)1603 static void print_counterexample(const struct state *NONNULL s
1604                                  __attribute__((unused))) {
1605 
1606   assert(s != NULL && "missing state in request for counterexample trace");
1607 
1608 #if COUNTEREXAMPLE_TRACE != CEX_OFF
1609   /* Construct an array of the states we need to print by walking backwards to
1610    * the initial starting state.
1611    */
1612   size_t trace_length = state_depth(s);
1613 
1614   const struct state **cex = xcalloc(trace_length, sizeof(cex[0]));
1615 
1616   {
1617     size_t i = trace_length - 1;
1618     for (const struct state *p = s; p != NULL; p = state_previous_get(p)) {
1619       assert(i < trace_length &&
1620              "error in counterexample trace traversal logic");
1621       cex[i] = p;
1622       i--;
1623     }
1624   }
1625 
1626   for (size_t i = 0; i < trace_length; i++) {
1627 
1628     const struct state *current = cex[i];
1629     const struct state *previous = i == 0 ? NULL : cex[i - 1];
1630 
1631     print_transition(current);
1632 
1633     if (MACHINE_READABLE_OUTPUT) {
1634       put("<state>\n");
1635     }
1636     state_print(COUNTEREXAMPLE_TRACE == FULL ? NULL : previous, current);
1637     if (MACHINE_READABLE_OUTPUT) {
1638       put("</state>\n");
1639     } else {
1640       put("----------\n\n");
1641     }
1642   }
1643 
1644   free(cex);
1645 #endif
1646 }
1647 
1648 static __attribute__((unused)) struct handle
state_handle(const struct state * NONNULL s,size_t offset,size_t width)1649 state_handle(const struct state *NONNULL s, size_t offset, size_t width) {
1650 
1651   assert(sizeof(s->data) * CHAR_BIT - width >= offset &&
1652          "generating an out of bounds handle in state_handle()");
1653 
1654   return (struct handle){
1655       .base = (uint8_t *)s->data + offset / CHAR_BIT,
1656       .offset = offset % CHAR_BIT,
1657       .width = width,
1658   };
1659 }
1660 
handle_read_raw(const struct state * NONNULL s,struct handle h)1661 static raw_value_t handle_read_raw(const struct state *NONNULL s,
1662                                    struct handle h) {
1663 
1664   /* Check if this read is larger than the variable we will store it in. This
1665    * can only occur if the user has manually overridden value_t with the
1666    * --value-type command line argument.
1667    */
1668   if (__builtin_expect(h.width > sizeof(raw_value_t) * 8, 0)) {
1669     error(s, "read of a handle that is wider than the value type");
1670   }
1671 
1672   ASSERT(h.width <= MAX_SIMPLE_WIDTH &&
1673          "read of a handle that is larger than "
1674          "the maximum width of a simple type in this model");
1675 
1676   uint64_t raw = (raw_value_t)read_raw(h);
1677 
1678   TRACE(TC_HANDLE_READS,
1679         "read value %" PRIRAWVAL " from handle { %p, %zu, %zu }",
1680         raw_value_to_string(raw), h.base, h.offset, h.width);
1681 
1682   return raw;
1683 }
1684 
decode_value(value_t lb,value_t ub,raw_value_t v)1685 static value_t decode_value(value_t lb, value_t ub, raw_value_t v) {
1686 
1687   value_t dest = 0;
1688 
1689   bool r __attribute__((unused)) =
1690       SUB(v, 1, &v) || ADD(v, lb, &dest) || dest < lb || dest > ub;
1691 
1692   ASSERT(!r && "read of out-of-range value");
1693 
1694   return dest;
1695 }
1696 
1697 static __attribute__((unused)) value_t
handle_read(const char * NONNULL context,const char * rule_name,const char * NONNULL name,const struct state * NONNULL s,value_t lb,value_t ub,struct handle h)1698 handle_read(const char *NONNULL context, const char *rule_name,
1699             const char *NONNULL name, const struct state *NONNULL s, value_t lb,
1700             value_t ub, struct handle h) {
1701 
1702   assert(context != NULL);
1703   assert(name != NULL);
1704 
1705   /* If we happen to be reading from the current state, do a sanity check that
1706    * we're only reading within bounds.
1707    */
1708   assert((h.base != (uint8_t *)s->data /* not a read from the current state */
1709           || sizeof(s->data) * CHAR_BIT - h.width >= h.offset) /* in bounds */
1710          && "out of bounds read in handle_read()");
1711 
1712   raw_value_t dest = handle_read_raw(s, h);
1713 
1714   if (__builtin_expect(dest == 0, 0)) {
1715     error(s, "%sread of undefined value in %s%s%s", context, name,
1716           rule_name == NULL ? "" : " within ",
1717           rule_name == NULL ? "" : rule_name);
1718   }
1719 
1720   return decode_value(lb, ub, dest);
1721 }
1722 
handle_write_raw(const struct state * NONNULL s,struct handle h,raw_value_t value)1723 static void handle_write_raw(const struct state *NONNULL s, struct handle h,
1724                              raw_value_t value) {
1725 
1726   /* Check if this write is larger than the variable we will are reading from.
1727    * This can only occur if the user has manually overridden value_t with the
1728    * --value-type command line argument.
1729    */
1730   if (__builtin_expect(h.width > sizeof(raw_value_t) * 8, 0)) {
1731     error(s, "write of a handle that is wider than the value type");
1732   }
1733 
1734   ASSERT(h.width <= MAX_SIMPLE_WIDTH &&
1735          "write to a handle that is larger than "
1736          "the maximum width of a simple type in this model");
1737 
1738   TRACE(TC_HANDLE_WRITES,
1739         "writing value %" PRIRAWVAL " to handle { %p, %zu, %zu }",
1740         raw_value_to_string(value), h.base, h.offset, h.width);
1741 
1742 #ifdef __clang__
1743 #pragma clang diagnostic push
1744 #pragma clang diagnostic ignored "-Wtautological-compare"
1745 #elif defined(__GNUC__)
1746 #pragma GCC diagnostic push
1747 #pragma GCC diagnostic ignored "-Wtype-limits"
1748 #endif
1749   ASSERT((uintmax_t)value <= UINT64_MAX &&
1750          "truncating value during handle_write_raw");
1751 #ifdef __clang__
1752 #pragma clang diagnostic pop
1753 #elif defined(__GNUC__)
1754 #pragma GCC diagnostic pop
1755 #endif
1756 
1757   write_raw(h, (uint64_t)value);
1758 }
1759 
1760 static __attribute__((unused)) void
handle_write(const char * NONNULL context,const char * rule_name,const char * NONNULL name,const struct state * NONNULL s,value_t lb,value_t ub,struct handle h,value_t value)1761 handle_write(const char *NONNULL context, const char *rule_name,
1762              const char *NONNULL name, const struct state *NONNULL s,
1763              value_t lb, value_t ub, struct handle h, value_t value) {
1764 
1765   assert(context != NULL);
1766   assert(name != NULL);
1767 
1768   /* If we happen to be writing to the current state, do a sanity check that
1769    * we're only writing within bounds.
1770    */
1771   assert((h.base != (uint8_t *)s->data /* not a write to the current state */
1772           || sizeof(s->data) * CHAR_BIT - h.width >= h.offset) /* in bounds */
1773          && "out of bounds write in handle_write()");
1774 
1775   raw_value_t r;
1776   if (__builtin_expect(
1777           value < lb || value > ub || SUB(value, lb, &r) || ADD(r, 1, &r), 0)) {
1778     error(s, "%swrite of out-of-range value into %s%s%s", context, name,
1779           rule_name == NULL ? "" : " within ",
1780           rule_name == NULL ? "" : rule_name);
1781   }
1782 
1783   handle_write_raw(s, h, r);
1784 }
1785 
handle_zero(struct handle h)1786 static __attribute__((unused)) void handle_zero(struct handle h) {
1787 
1788   uint8_t *p = h.base + h.offset / 8;
1789 
1790   /* Zero out up to a byte-aligned offset. */
1791   if (h.offset % 8 != 0) {
1792     uint8_t mask = (UINT8_C(1) << (h.offset % 8)) - 1;
1793     if (h.width < 8 - h.offset % 8) {
1794       mask |= UINT8_MAX & ~((UINT8_C(1) << (h.offset % 8 + h.width)) - 1);
1795     }
1796     *p &= mask;
1797     p++;
1798     if (h.width < 8 - h.offset % 8) {
1799       return;
1800     }
1801     h.width -= 8 - h.offset % 8;
1802   }
1803 
1804   /* Zero out as many bytes as we can. */
1805   memset(p, 0, h.width / 8);
1806   p += h.width / 8;
1807   h.width -= h.width / 8 * 8;
1808 
1809   /* Zero out the trailing bits in the final byte. */
1810   if (h.width > 0) {
1811     uint8_t mask = ~((UINT8_C(1) << h.width) - 1);
1812     *p &= mask;
1813   }
1814 }
1815 
handle_copy(struct handle a,struct handle b)1816 static void handle_copy(struct handle a, struct handle b) {
1817 
1818   ASSERT(a.width == b.width && "copying between handles of different sizes");
1819 
1820   /* FIXME: This does a bit-by-bit copy which almost certainly could be
1821    * accelerated by detecting byte-boundaries and complementary alignment and
1822    * then calling memcpy when possible.
1823    */
1824 
1825   for (size_t i = 0; i < a.width; i++) {
1826 
1827     uint8_t *dst = a.base + (a.offset + i) / 8;
1828     size_t dst_off = (a.offset + i) % 8;
1829 
1830     const uint8_t *src = b.base + (b.offset + i) / 8;
1831     size_t src_off = (b.offset + i) % 8;
1832 
1833     uint8_t or_mask = ((*src >> src_off) & UINT8_C(1)) << dst_off;
1834     uint8_t and_mask = ~(UINT8_C(1) << dst_off);
1835 
1836     *dst = (*dst & and_mask) | or_mask;
1837   }
1838 }
1839 
handle_eq(struct handle a,struct handle b)1840 static __attribute__((unused)) bool handle_eq(struct handle a,
1841                                               struct handle b) {
1842 
1843   ASSERT(a.width == b.width && "comparing handles of different sizes");
1844 
1845   /* FIXME: as with handle_copy, we do a bit-by-bit comparison which could be
1846    * made more efficient.
1847    */
1848 
1849   for (size_t i = 0; i < a.width; i++) {
1850 
1851     uint8_t *x = a.base + (a.offset + i) / 8;
1852     size_t x_off = (a.offset + i) % 8;
1853     bool x_bit = (*x >> x_off) & 0x1;
1854 
1855     const uint8_t *y = b.base + (b.offset + i) / 8;
1856     size_t y_off = (b.offset + i) % 8;
1857     bool y_bit = (*y >> y_off) & 0x1;
1858 
1859     if (x_bit != y_bit) {
1860       return false;
1861     }
1862   }
1863 
1864   return true;
1865 }
1866 
1867 static __attribute__((unused)) struct handle
handle_narrow(struct handle h,size_t offset,size_t width)1868 handle_narrow(struct handle h, size_t offset, size_t width) {
1869 
1870   ASSERT(h.offset + offset + width <= h.offset + h.width &&
1871          "narrowing a handle with values that actually expand it");
1872 
1873   size_t r __attribute__((unused));
1874   assert(!ADD(h.offset, offset, &r) && "narrowing handle overflows a size_t");
1875 
1876   return (struct handle){
1877       .base = h.base + (h.offset + offset) / CHAR_BIT,
1878       .offset = (h.offset + offset) % CHAR_BIT,
1879       .width = width,
1880   };
1881 }
1882 
1883 static __attribute__((unused)) struct handle
handle_index(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,size_t element_width,value_t index_min,value_t index_max,struct handle root,value_t index)1884 handle_index(const char *NONNULL context, const char *rule_name,
1885              const char *NONNULL expr, const struct state *NONNULL s,
1886              size_t element_width, value_t index_min, value_t index_max,
1887              struct handle root, value_t index) {
1888 
1889   assert(expr != NULL);
1890 
1891   if (__builtin_expect(index < index_min || index > index_max, 0)) {
1892     error(s, "%sindex out of range in expression %s%s%s", context, expr,
1893           rule_name == NULL ? "" : " within ",
1894           rule_name == NULL ? "" : rule_name);
1895   }
1896 
1897   size_t r1, r2;
1898   if (__builtin_expect(
1899           SUB(index, index_min, &r1) || MUL(r1, element_width, &r2), 0)) {
1900     error(s, "%soverflow when indexing array in expression %s%s%s", context,
1901           expr, rule_name == NULL ? "" : " within ",
1902           rule_name == NULL ? "" : rule_name);
1903   }
1904 
1905   size_t r __attribute__((unused));
1906   assert(!ADD(root.offset, r2, &r) && "indexing handle overflows a size_t");
1907 
1908   return (struct handle){
1909       .base = root.base + (root.offset + r2) / CHAR_BIT,
1910       .offset = (root.offset + r2) % CHAR_BIT,
1911       .width = element_width,
1912   };
1913 }
1914 
1915 static __attribute__((unused)) value_t
handle_isundefined(const struct state * NONNULL s,struct handle h)1916 handle_isundefined(const struct state *NONNULL s, struct handle h) {
1917   raw_value_t v = handle_read_raw(s, h);
1918 
1919   return v == 0;
1920 }
1921 
1922 /* Overflow-safe helpers for doing bounded arithmetic. */
1923 
add(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1924 static __attribute__((unused)) value_t add(const char *NONNULL context,
1925                                            const char *rule_name,
1926                                            const char *NONNULL expr,
1927                                            const struct state *NONNULL s,
1928                                            value_t a, value_t b) {
1929 
1930   assert(context != NULL);
1931   assert(expr != NULL);
1932 
1933   value_t r;
1934   if (__builtin_expect(ADD(a, b, &r), 0)) {
1935     error(s, "%sinteger overflow in addition in expression %s%s%s", context,
1936           expr, rule_name == NULL ? "" : " within ",
1937           rule_name == NULL ? "" : rule_name);
1938   }
1939   return r;
1940 }
1941 
sub(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1942 static __attribute__((unused)) value_t sub(const char *NONNULL context,
1943                                            const char *rule_name,
1944                                            const char *NONNULL expr,
1945                                            const struct state *NONNULL s,
1946                                            value_t a, value_t b) {
1947 
1948   assert(context != NULL);
1949   assert(expr != NULL);
1950 
1951   value_t r;
1952   if (__builtin_expect(SUB(a, b, &r), 0)) {
1953     error(s, "%sinteger overflow in subtraction in expression %s%s%s", context,
1954           expr, rule_name == NULL ? "" : " within ",
1955           rule_name == NULL ? "" : rule_name);
1956   }
1957   return r;
1958 }
1959 
mul(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1960 static __attribute__((unused)) value_t mul(const char *NONNULL context,
1961                                            const char *rule_name,
1962                                            const char *NONNULL expr,
1963                                            const struct state *NONNULL s,
1964                                            value_t a, value_t b) {
1965 
1966   assert(context != NULL);
1967   assert(expr != NULL);
1968 
1969   value_t r;
1970   if (__builtin_expect(MUL(a, b, &r), 0)) {
1971     error(s, "%sinteger overflow in multiplication in expression %s%s%s",
1972           context, expr, rule_name == NULL ? "" : " within ",
1973           rule_name == NULL ? "" : rule_name);
1974   }
1975   return r;
1976 }
1977 
divide(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1978 static __attribute__((unused)) value_t divide(const char *NONNULL context,
1979                                               const char *rule_name,
1980                                               const char *NONNULL expr,
1981                                               const struct state *NONNULL s,
1982                                               value_t a, value_t b) {
1983 
1984   assert(context != NULL);
1985   assert(expr != NULL);
1986 
1987   if (__builtin_expect(b == 0, 0)) {
1988     error(s, "%sdivision by zero in expression %s%s%s", context, expr,
1989           rule_name == NULL ? "" : " within ",
1990           rule_name == NULL ? "" : rule_name);
1991   }
1992 
1993   if (__builtin_expect(VALUE_MIN != 0 && a == VALUE_MIN && b == (value_t)-1,
1994                        0)) {
1995     error(s, "%sinteger overflow in division in expression %s%s%s", context,
1996           expr, rule_name == NULL ? "" : " within ",
1997           rule_name == NULL ? "" : rule_name);
1998   }
1999 
2000   return a / b;
2001 }
2002 
mod(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)2003 static __attribute__((unused)) value_t mod(const char *NONNULL context,
2004                                            const char *rule_name,
2005                                            const char *NONNULL expr,
2006                                            const struct state *NONNULL s,
2007                                            value_t a, value_t b) {
2008 
2009   assert(context != NULL);
2010   assert(expr != NULL);
2011 
2012   if (__builtin_expect(b == 0, 0)) {
2013     error(s, "%smodulus by zero in expression %s%s%s", context, expr,
2014           rule_name == NULL ? "" : " within ",
2015           rule_name == NULL ? "" : rule_name);
2016   }
2017 
2018   /* Is INT_MIN % -1 UD? Reading the C spec I am not sure. */
2019   if (__builtin_expect(VALUE_MIN != 0 && a == VALUE_MIN && b == (value_t)-1,
2020                        0)) {
2021     error(s, "%sinteger overflow in modulo in expression %s%s%s", context, expr,
2022           rule_name == NULL ? "" : " within ",
2023           rule_name == NULL ? "" : rule_name);
2024   }
2025 
2026   return a % b;
2027 }
2028 
2029 static __attribute__((unused)) value_t
negate(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a)2030 negate(const char *NONNULL context, const char *rule_name,
2031        const char *NONNULL expr, const struct state *NONNULL s, value_t a) {
2032 
2033   assert(context != NULL);
2034   assert(expr != NULL);
2035 
2036   if (__builtin_expect(VALUE_MIN != 0 && a == VALUE_MIN, 0)) {
2037     error(s, "%sinteger overflow in negation in expression %s%s%s", context,
2038           expr, rule_name == NULL ? "" : " within ",
2039           rule_name == NULL ? "" : rule_name);
2040   }
2041 
2042   return -a;
2043 }
2044 
2045 /* wrappers for << and >> to avoid C undefined behaviour */
2046 static __attribute__((unused)) value_t rsh(value_t a, value_t b);
lsh(value_t a,value_t b)2047 static __attribute__((unused)) value_t lsh(value_t a, value_t b) {
2048 
2049 #ifdef __clang__
2050 #pragma clang diagnostic push
2051 #pragma clang diagnostic ignored "-Wtautological-compare"
2052 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
2053 #elif defined(__GNUC__)
2054 #pragma GCC diagnostic push
2055 #pragma GCC diagnostic ignored "-Wtype-limits"
2056 #endif
2057   if (value_is_signed() && b <= -(value_t)(sizeof(a) * 8)) {
2058     return 0;
2059   }
2060 
2061   if (b >= (value_t)(sizeof(a) * 8)) {
2062     return 0;
2063   }
2064 
2065   if (b < 0) {
2066     return rsh(a, -b);
2067   }
2068 #ifdef __clang__
2069 #pragma clang diagnostic pop
2070 #elif defined(__GNUC__)
2071 #pragma GCC diagnostic pop
2072 #endif
2073 
2074   return (value_t)((raw_value_t)a << b);
2075 }
2076 
rsh(value_t a,value_t b)2077 static value_t rsh(value_t a, value_t b) {
2078 
2079 #ifdef __clang__
2080 #pragma clang diagnostic push
2081 #pragma clang diagnostic ignored "-Wtautological-compare"
2082 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
2083 #elif defined(__GNUC__)
2084 #pragma GCC diagnostic push
2085 #pragma GCC diagnostic ignored "-Wtype-limits"
2086 #endif
2087   if (value_is_signed() && b <= -(value_t)(sizeof(a) * 8)) {
2088     return 0;
2089   }
2090 
2091   if (b >= (value_t)(sizeof(a) * 8)) {
2092     return 0;
2093   }
2094 
2095   if (b < 0) {
2096     return lsh(a, -b);
2097   }
2098 #ifdef __clang__
2099 #pragma clang diagnostic pop
2100 #elif defined(__GNUC__)
2101 #pragma GCC diagnostic pop
2102 #endif
2103 
2104   return a >> b;
2105 }
2106 
2107 /* Bitwise NOT wrapper. We only need this to suppress some spurious GCC
2108  * warnings:
2109  *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=38341
2110  *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59098
2111  */
bnot(value_t v)2112 static __attribute__((unused)) value_t bnot(value_t v) {
2113   return (value_t)~(raw_value_t)v;
2114 }
2115 
2116 /* use Heap's algorithm for generating permutations to implement a
2117  * permutation-to-number mapping
2118  */
2119 static __attribute__((unused)) size_t
permutation_to_index(const size_t * NONNULL permutation,size_t * NONNULL stack,size_t * NONNULL working,size_t count)2120 permutation_to_index(const size_t *NONNULL permutation, size_t *NONNULL stack,
2121                      size_t *NONNULL working, size_t count) {
2122 
2123   /* byte extent of the permutation arrays */
2124   size_t bytes = sizeof(permutation[0]) * count;
2125 
2126   size_t index = 0;
2127 
2128   /* clear our stack */
2129   memset(stack, 0, bytes);
2130 
2131   /* initialise the first (identity) permutation */
2132   for (size_t i = 0; i < count; ++i) {
2133     working[i] = i;
2134   }
2135 
2136   if (memcmp(permutation, working, bytes) == 0) {
2137     return index;
2138   }
2139 
2140   size_t i = 0;
2141   while (i < count) {
2142     if (stack[i] < i) {
2143       ++index;
2144       if (i % 2 == 0) {
2145         /* swap working[0] and working[i] */
2146         size_t tmp = working[0];
2147         working[0] = working[i];
2148         working[i] = tmp;
2149       } else {
2150         /* swap working[stack[i]] and working[i] */
2151         size_t tmp = working[stack[i]];
2152         working[stack[i]] = working[i];
2153         working[i] = tmp;
2154       }
2155       if (memcmp(permutation, working, bytes) == 0) {
2156         return index;
2157       }
2158       ++stack[i];
2159       i = 0;
2160     } else {
2161       stack[i] = 0;
2162       ++i;
2163     }
2164   }
2165 
2166   /* the permutation should have been one of the possible ones */
2167   ASSERT(!"invalid permutation passed to permutation_to_index");
2168 }
2169 static __attribute__((unused)) void
index_to_permutation(size_t index,size_t * NONNULL permutation,size_t * NONNULL stack,size_t count)2170 index_to_permutation(size_t index, size_t *NONNULL permutation,
2171                      size_t *NONNULL stack, size_t count) {
2172 
2173   /* byte extent of the permutation arrays */
2174   size_t bytes = sizeof(permutation[0]) * count;
2175 
2176   size_t ind = 0;
2177 
2178   /* clear our stack */
2179   memset(stack, 0, bytes);
2180 
2181   /* initialise the first (identity) permutation */
2182   for (size_t i = 0; i < count; ++i) {
2183     permutation[i] = i;
2184   }
2185 
2186   if (ind == index) {
2187     return;
2188   }
2189 
2190   size_t i = 0;
2191   while (i < count) {
2192     if (stack[i] < i) {
2193       ++ind;
2194       if (i % 2 == 0) {
2195         /* swap permutation[0] and permutation[i] */
2196         size_t tmp = permutation[0];
2197         permutation[0] = permutation[i];
2198         permutation[i] = tmp;
2199       } else {
2200         /* swap permutation[stack[i]] and permutation[i] */
2201         size_t tmp = permutation[stack[i]];
2202         permutation[stack[i]] = permutation[i];
2203         permutation[i] = tmp;
2204       }
2205       if (ind == index) {
2206         return;
2207       }
2208       ++stack[i];
2209       i = 0;
2210     } else {
2211       stack[i] = 0;
2212       ++i;
2213     }
2214   }
2215 
2216   /* the target permutation should have been one of the possible ones */
2217   ASSERT(!"invalid index passed to index_to_permutation");
2218 }
2219 
2220 /*******************************************************************************
2221  * State queue node                                                            *
2222  *                                                                             *
2223  * Queue nodes are 4K-sized, 4K-aligned linked-list nodes. They contain        *
2224  * pending states and then a pointer to the next node in the queue.            *
2225  ******************************************************************************/
2226 
2227 struct queue_node {
2228   struct state
2229       *s[(4096 - sizeof(struct queue_node *)) / sizeof(struct state *)];
2230   struct queue_node *next;
2231 };
2232 
2233 _Static_assert(sizeof(struct queue_node) == 4096,
2234                "incorrect queue_node size calculation");
2235 
queue_node_new(void)2236 static struct queue_node *queue_node_new(void) {
2237   struct queue_node *p = NULL;
2238 
2239   int r = posix_memalign((void **)&p, sizeof(*p), sizeof(*p));
2240 
2241   assert((r == 0 || r == ENOMEM) && "invalid alignment to posix_memalign");
2242 
2243   if (__builtin_expect(r != 0, 0)) {
2244     oom();
2245   }
2246 
2247   memset(p, 0, sizeof(*p));
2248 
2249   return p;
2250 }
2251 
queue_node_free(struct queue_node * p)2252 static void queue_node_free(struct queue_node *p) { free(p); }
2253 
2254 /******************************************************************************/
2255 
2256 /*******************************************************************************
2257  * Queue node handles                                                          *
2258  *                                                                             *
2259  * These are pointers to a member-aligned address within a queue_node. The     *
2260  * idea is that you can have a queue_handle_t pointing at either one of the    *
2261  * elements of the `s` member or at the chained `next` pointer. Since          *
2262  * queue_node pointers are always 4K-aligned, you can examine the low 12 bits  *
2263  * of the queue node handle to determine which member you are pointing at.     *
2264  ******************************************************************************/
2265 
2266 typedef uintptr_t queue_handle_t;
2267 
queue_handle_from_node_ptr(const struct queue_node * n)2268 static queue_handle_t queue_handle_from_node_ptr(const struct queue_node *n) {
2269   return (queue_handle_t)n;
2270 }
2271 
queue_handle_base(queue_handle_t h)2272 static struct queue_node *queue_handle_base(queue_handle_t h) {
2273   return (struct queue_node *)(h - h % sizeof(struct queue_node));
2274 }
2275 
queue_handle_is_state_pptr(queue_handle_t h)2276 static bool queue_handle_is_state_pptr(queue_handle_t h) {
2277   return h % sizeof(struct queue_node) <
2278          __builtin_offsetof(struct queue_node, next);
2279 }
2280 
queue_handle_to_state_pptr(queue_handle_t h)2281 static struct state **queue_handle_to_state_pptr(queue_handle_t h) {
2282   assert(queue_handle_is_state_pptr(h) &&
2283          "invalid use of queue_handle_to_state_pptr");
2284 
2285   return (struct state **)h;
2286 }
2287 
queue_handle_to_node_pptr(queue_handle_t h)2288 static struct queue_node **queue_handle_to_node_pptr(queue_handle_t h) {
2289   assert(!queue_handle_is_state_pptr(h) &&
2290          "invalid use of queue_handle_to_node_pptr");
2291 
2292   return (struct queue_node **)h;
2293 }
2294 
queue_handle_next(queue_handle_t h)2295 static queue_handle_t queue_handle_next(queue_handle_t h) {
2296   return h + sizeof(struct state *);
2297 }
2298 
2299 /******************************************************************************/
2300 
2301 /*******************************************************************************
2302  * Hazard pointers                                                             *
2303  *                                                                             *
2304  * The idea of "hazard pointers" comes from Maged Michael, "Hazard Pointers:   *
2305  * Safe Memory Reclamation for Lock-Free Objects" in TPDS 15(8) 2004. The      *
2306  * basic concept is to maintain a collection of safe-to-dereference pointers.  *
2307  * Before freeing a pointer, you look in this collection to see if it is in    *
2308  * use and you must always add a pointer to this collection before             *
2309  * dereferencing it. The finer details of how we keep this consistent and why  *
2310  * the size of this collection can be statically known ahead of time are a     *
2311  * little complicated, but the paper explains this in further detail.          *
2312  ******************************************************************************/
2313 
2314 /* Queue node pointers currently safe to dereference. */
2315 static const struct queue_node *hazarded[THREADS];
2316 
2317 /* Protect a pointer that we wish to dereference. */
hazard(queue_handle_t h)2318 static void hazard(queue_handle_t h) {
2319 
2320   /* Find the queue node this handle lies within. */
2321   const struct queue_node *p = queue_handle_base(h);
2322 
2323   /* You can't protect the null pointer because it is invalid to dereference it.
2324    */
2325   assert(p != NULL && "attempt to hazard an invalid pointer");
2326 
2327   /* Each thread is only allowed a single hazarded pointer at a time. */
2328   assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) == NULL &&
2329          "hazarding multiple pointers at once");
2330 
2331   __atomic_store_n(&hazarded[thread_id], p, __ATOMIC_SEQ_CST);
2332 }
2333 
2334 /* Drop protection on a pointer whose target we are done accessing. */
unhazard(queue_handle_t h)2335 static void unhazard(queue_handle_t h) {
2336 
2337   /* Find the queue node this handle lies within. */
2338   const struct queue_node *p __attribute__((unused)) = queue_handle_base(h);
2339 
2340   assert(p != NULL && "attempt to unhazard an invalid pointer");
2341 
2342   assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) != NULL &&
2343          "unhazarding a pointer when none are hazarded");
2344 
2345   assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) == p &&
2346          "unhazarding a pointer that differs from the one hazarded");
2347 
2348   __atomic_store_n(&hazarded[thread_id], NULL, __ATOMIC_SEQ_CST);
2349 }
2350 
2351 /* Free a pointer or, if not possible, defer this to later. */
reclaim(queue_handle_t h)2352 static void reclaim(queue_handle_t h) {
2353 
2354   /* Find the queue node this handle lies within. */
2355   struct queue_node *p = queue_handle_base(h);
2356 
2357   assert(p != NULL && "reclaiming a null pointer");
2358 
2359   /* The reclaimer is not allowed to be freeing something while also holding a
2360    * hazarded pointer.
2361    */
2362   assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) == NULL &&
2363          "reclaiming a pointer while holding a hazarded pointer");
2364 
2365   /* Pointers that we failed to free initially because they were in use
2366    * (hazarded) at the time they were passed to reclaim().
2367    *
2368    * Why are we sure we will only ever have a maximum of `THREADS - 1` pointers
2369    * outstanding? Anything passed to reclaim() is expected to be
2370    * now-unreachable, so the only outstanding references to such are threads
2371    * racing with us. Because each thread can only have one hazarded pointer at a
2372    * time, the maximum number of in use pointers right now is `THREADS - 1`
2373    * (because the current thread does not have one).
2374    *
2375    * There is an edge case where another thread (1) held a hazarded pointer we
2376    * previously tried to reclaim and thus ended up on our deferred list, then
2377    * (2) in-between that time and now dropped this reference and acquired a
2378    * hazarded pointer to `p`. This still will not exceed our count of
2379    * `THREADS - 1` as the order in which we scan the deferred list and then add
2380    * `p` to it below ensures that we will discover the originally hazarded
2381    * pointer (now no longer conflicted) and clear its slot, leaving this
2382    * available for `p`.
2383    */
2384   static _Thread_local struct queue_node *deferred[THREADS - 1];
2385   static const size_t DEFERRED_SIZE = sizeof(deferred) / sizeof(deferred[0]);
2386 
2387   /* First try to free any previously deferred pointers. */
2388   for (size_t i = 0; i < DEFERRED_SIZE; i++) {
2389     if (deferred[i] != NULL) {
2390       bool conflict = false;
2391       for (size_t j = 0; j < sizeof(hazarded) / sizeof(hazarded[0]); j++) {
2392         if (j == thread_id) {
2393           /* No need to check for conflicts with ourself. */
2394           assert(__atomic_load_n(&hazarded[j], __ATOMIC_SEQ_CST) == NULL);
2395           continue;
2396         }
2397         if (deferred[i] == __atomic_load_n(&hazarded[j], __ATOMIC_SEQ_CST)) {
2398           /* This pointer is in use by thread j. */
2399           conflict = true;
2400           break;
2401         }
2402       }
2403       if (!conflict) {
2404         queue_node_free(deferred[i]);
2405         deferred[i] = NULL;
2406       }
2407     }
2408   }
2409 
2410   /* Now deal with the pointer we were passed. The most likely case is that no
2411    * one else is using this pointer, so try this first.
2412    */
2413   bool conflict = false;
2414   for (size_t i = 0; i < sizeof(hazarded) / sizeof(hazarded[i]); i++) {
2415     if (i == thread_id) {
2416       /* No need to check for conflicts with ourself. */
2417       assert(__atomic_load_n(&hazarded[i], __ATOMIC_SEQ_CST) == NULL);
2418       continue;
2419     }
2420     if (p == __atomic_load_n(&hazarded[i], __ATOMIC_SEQ_CST)) {
2421       /* Bad luck :( */
2422       conflict = true;
2423       break;
2424     }
2425   }
2426 
2427   if (!conflict) {
2428     /* We're done! */
2429     queue_node_free(p);
2430     return;
2431   }
2432 
2433   /* If we reached here, we need to defer this reclamation to later. */
2434   for (size_t i = 0; i < DEFERRED_SIZE; i++) {
2435     if (deferred[i] == NULL) {
2436       deferred[i] = p;
2437       return;
2438     }
2439   }
2440 
2441   ASSERT(!"deferred more than `THREADS` reclamations");
2442 }
2443 
2444 /******************************************************************************/
2445 
2446 /*******************************************************************************
2447  * Atomic operations on double word values                                     *
2448  ******************************************************************************/
2449 
2450 #if __SIZEOF_POINTER__ <= 4
2451 typedef uint64_t dword_t;
2452 #elif __SIZEOF_POINTER__ <= 8
2453 typedef unsigned __int128 dword_t;
2454 #else
2455 #error "unexpected pointer size; what scalar type to use for dword_t?"
2456 #endif
2457 
atomic_read(dword_t * p)2458 static dword_t atomic_read(dword_t *p) {
2459 
2460   if (THREADS == 1) {
2461     return *p;
2462   }
2463 
2464 #if defined(__x86_64__) || defined(__i386__)
2465   /* x86-64: MOV is not guaranteed to be atomic on 128-bit naturally aligned
2466    *   memory. The way to work around this is apparently the following
2467    *   degenerate CMPXCHG16B.
2468    * i386: __atomic_load_n emits code calling a libatomic function that takes a
2469    *   lock, making this no longer lock free. Force a CMPXCHG8B by using the
2470    *   __sync built-in instead.
2471    */
2472   return __sync_val_compare_and_swap(p, 0, 0);
2473 #endif
2474 
2475   return __atomic_load_n(p, __ATOMIC_SEQ_CST);
2476 }
2477 
atomic_write(dword_t * p,dword_t v)2478 static void atomic_write(dword_t *p, dword_t v) {
2479 
2480   if (THREADS == 1) {
2481     *p = v;
2482     return;
2483   }
2484 
2485 #if defined(__x86_64__) || defined(__i386__)
2486   /* As explained above, we need some extra gymnastics to avoid a call to
2487    * libatomic on x86-64 and i386.
2488    */
2489   dword_t expected;
2490   dword_t old = 0;
2491   do {
2492     expected = old;
2493     old = __sync_val_compare_and_swap(p, expected, v);
2494   } while (expected != old);
2495   return;
2496 #endif
2497 
2498   __atomic_store_n(p, v, __ATOMIC_SEQ_CST);
2499 }
2500 
atomic_cas(dword_t * p,dword_t expected,dword_t new)2501 static bool atomic_cas(dword_t *p, dword_t expected, dword_t new) {
2502 
2503   if (THREADS == 1) {
2504     if (*p == expected) {
2505       *p = new;
2506       return true;
2507     }
2508     return false;
2509   }
2510 
2511 #if defined(__x86_64__) || defined(__i386__)
2512   /* Make GCC >= 7.1 emit cmpxchg on x86-64 and i386. See
2513    * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878.
2514    */
2515   return __sync_bool_compare_and_swap(p, expected, new);
2516 #endif
2517 
2518   return __atomic_compare_exchange_n(p, &expected, new, false, __ATOMIC_SEQ_CST,
2519                                      __ATOMIC_SEQ_CST);
2520 }
2521 
atomic_cas_val(dword_t * p,dword_t expected,dword_t new)2522 static dword_t atomic_cas_val(dword_t *p, dword_t expected, dword_t new) {
2523 
2524   if (THREADS == 1) {
2525     dword_t old = *p;
2526     if (old == expected) {
2527       *p = new;
2528     }
2529     return old;
2530   }
2531 
2532 #if defined(__x86_64__) || defined(__i386__)
2533   /* Make GCC >= 7.1 emit cmpxchg on x86-64 and i386. See
2534    * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878.
2535    */
2536   return __sync_val_compare_and_swap(p, expected, new);
2537 #endif
2538 
2539   (void)__atomic_compare_exchange_n(p, &expected, new, false, __ATOMIC_SEQ_CST,
2540                                     __ATOMIC_SEQ_CST);
2541   return expected;
2542 }
2543 
2544 /******************************************************************************/
2545 
2546 /*******************************************************************************
2547  * Double pointers                                                             *
2548  *                                                                             *
2549  * A scalar type that can store the value of two pointers.                     *
2550  ******************************************************************************/
2551 
2552 #if __SIZEOF_POINTER__ <= 4
2553 typedef uint64_t double_ptr_t;
2554 #elif __SIZEOF_POINTER__ <= 8
2555 typedef unsigned __int128 double_ptr_t;
2556 #else
2557 #error "unexpected pointer size; what scalar type to use for double_ptr_t?"
2558 #endif
2559 
double_ptr_extract1(double_ptr_t p)2560 static uintptr_t double_ptr_extract1(double_ptr_t p) {
2561 
2562   _Static_assert(sizeof(p) > sizeof(uintptr_t),
2563                  "double_ptr_t is not big enough to fit a pointer");
2564 
2565   uintptr_t q;
2566   memcpy(&q, &p, sizeof(q));
2567 
2568   return q;
2569 }
2570 
double_ptr_extract2(double_ptr_t p)2571 static uintptr_t double_ptr_extract2(double_ptr_t p) {
2572 
2573   _Static_assert(sizeof(p) >= 2 * sizeof(uintptr_t),
2574                  "double_ptr_t is not big enough to fit two pointers");
2575 
2576   uintptr_t q;
2577   memcpy(&q, (unsigned char *)&p + sizeof(void *), sizeof(q));
2578 
2579   return q;
2580 }
2581 
double_ptr_make(uintptr_t q1,uintptr_t q2)2582 static double_ptr_t double_ptr_make(uintptr_t q1, uintptr_t q2) {
2583 
2584   double_ptr_t p = 0;
2585 
2586   _Static_assert(sizeof(p) >= 2 * sizeof(uintptr_t),
2587                  "double_ptr_t is not big enough to fit two pointers");
2588 
2589   memcpy(&p, &q1, sizeof(q1));
2590   memcpy((unsigned char *)&p + sizeof(q1), &q2, sizeof(q2));
2591 
2592   return p;
2593 }
2594 
2595 /******************************************************************************/
2596 
2597 /*******************************************************************************
2598  * State queue                                                                 *
2599  *                                                                             *
2600  * The following implements a per-thread queue for pending states. The only    *
2601  * supported operations are enqueueing and dequeueing states. A property we    *
2602  * maintain is that all states within all queues pass the current model's      *
2603  * invariants.                                                                 *
2604  ******************************************************************************/
2605 
2606 static struct {
2607   double_ptr_t ends;
2608   size_t count;
2609 } q[THREADS];
2610 
queue_enqueue(struct state * NONNULL s,size_t queue_id)2611 static size_t queue_enqueue(struct state *NONNULL s, size_t queue_id) {
2612   assert(queue_id < sizeof(q) / sizeof(q[0]) && "out of bounds queue access");
2613 
2614   /* Look up the tail of the queue. */
2615 
2616   double_ptr_t ends = atomic_read(&q[queue_id].ends);
2617 
2618 retry:;
2619   queue_handle_t tail = double_ptr_extract2(ends);
2620 
2621   if (tail == 0) {
2622     /* There's nothing currently in the queue. */
2623 
2624     assert(double_ptr_extract1(ends) == 0 &&
2625            "tail of queue 0 while head is non-0");
2626 
2627     struct queue_node *n = queue_node_new();
2628     n->s[0] = s;
2629 
2630     double_ptr_t new = double_ptr_make(queue_handle_from_node_ptr(n),
2631                                        queue_handle_from_node_ptr(n));
2632 
2633     double_ptr_t old = atomic_cas_val(&q[queue_id].ends, ends, new);
2634     if (old != ends) {
2635       /* Failed. */
2636       queue_node_free(n);
2637       ends = old;
2638       goto retry;
2639     }
2640 
2641   } else {
2642     /* The queue is non-empty, so we'll need to access the last element. */
2643 
2644     /* Try to protect our upcoming access to the tail. */
2645     hazard(tail);
2646     {
2647       double_ptr_t ends_check = atomic_read(&q[queue_id].ends);
2648       if (ends != ends_check) {
2649         /* Failed. Someone else modified the queue in the meantime. */
2650         unhazard(tail);
2651         ends = ends_check;
2652         goto retry;
2653       }
2654     }
2655 
2656     /* We've now notified other threads that we're going to be accessing the
2657      * tail, so we can safely dereference its pointer.
2658      */
2659 
2660     struct queue_node *new_node = NULL;
2661     queue_handle_t next_tail = queue_handle_next(tail);
2662 
2663     if (queue_handle_is_state_pptr(next_tail)) {
2664       /* There's an available slot in this queue node; no need to create a new
2665        * one.
2666        */
2667 
2668       {
2669         struct state **target = queue_handle_to_state_pptr(next_tail);
2670         struct state *null = NULL;
2671         if (!__atomic_compare_exchange_n(target, &null, s, false,
2672                                          __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
2673           /* Failed. Someone else enqueued before we could. */
2674           unhazard(tail);
2675           goto retry;
2676         }
2677       }
2678 
2679     } else {
2680       /* There's no remaining slot in this queue node. We'll need to create a
2681        * new (empty) queue node, add our state to this one and then append this
2682        * node to the queue.
2683        */
2684 
2685       /* Create the new node. */
2686       new_node = queue_node_new();
2687       new_node->s[0] = s;
2688 
2689       /* Try to update the chained pointer of the current tail to point to this
2690        * new node.
2691        */
2692       struct queue_node **target = queue_handle_to_node_pptr(next_tail);
2693       struct queue_node *null = NULL;
2694       if (!__atomic_compare_exchange_n(target, &null, new_node, false,
2695                                        __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
2696         /* Failed. Someone else enqueued before we could. */
2697         queue_node_free(new_node);
2698         unhazard(tail);
2699         goto retry;
2700       }
2701 
2702       /* We now need the tail to point at our new node. */
2703       next_tail = queue_handle_from_node_ptr(new_node);
2704     }
2705 
2706     queue_handle_t head = double_ptr_extract1(ends);
2707     double_ptr_t new = double_ptr_make(head, next_tail);
2708 
2709     /* Try to update the queue. */
2710     {
2711       double_ptr_t old = atomic_cas_val(&q[queue_id].ends, ends, new);
2712       if (old != ends) {
2713         /* Failed. Someone else dequeued before we could finish. We know the
2714          * operation that beat us was a dequeue and not an enqueue, because by
2715          * writing to next_tail we have prevented any other enqueue from
2716          * succeeding.
2717          */
2718 
2719         /* Undo the update of next_tail. We know this is safe (non-racy) because
2720          * no other enqueue can proceed and a dequeue will never look into
2721          * next_tail due to the way it handles the case when head == tail.
2722          */
2723         next_tail = queue_handle_next(tail);
2724         if (queue_handle_is_state_pptr(next_tail)) {
2725           /* We previously wrote into an existing queue node. */
2726           struct state **target = queue_handle_to_state_pptr(next_tail);
2727           struct state *temp = s;
2728           bool r __attribute__((unused)) = __atomic_compare_exchange_n(
2729               target, &temp, NULL, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
2730           assert(r && "undo of write to next_tail failed");
2731         } else {
2732           /* We previously wrote into a new queue node. */
2733           struct queue_node **target = queue_handle_to_node_pptr(next_tail);
2734           struct queue_node *temp = new_node;
2735           bool r __attribute__((unused)) = __atomic_compare_exchange_n(
2736               target, &temp, NULL, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
2737           assert(r && "undo of write to next_tail failed");
2738 
2739           queue_node_free(new_node);
2740         }
2741 
2742         unhazard(tail);
2743         ends = old;
2744         goto retry;
2745       }
2746     }
2747 
2748     /* Success! */
2749 
2750     unhazard(tail);
2751   }
2752 
2753   size_t count = __atomic_add_fetch(&q[queue_id].count, 1, __ATOMIC_SEQ_CST);
2754 
2755   TRACE(TC_QUEUE, "enqueued state %p into queue %zu, queue length is now %zu",
2756         s, queue_id, count);
2757 
2758   return count;
2759 }
2760 
queue_dequeue(size_t * NONNULL queue_id)2761 static const struct state *queue_dequeue(size_t *NONNULL queue_id) {
2762   assert(queue_id != NULL && *queue_id < sizeof(q) / sizeof(q[0]) &&
2763          "out of bounds queue access");
2764 
2765   const struct state *s = NULL;
2766 
2767   for (size_t attempts = 0; attempts < sizeof(q) / sizeof(q[0]); attempts++) {
2768 
2769     double_ptr_t ends = atomic_read(&q[*queue_id].ends);
2770 
2771   retry:;
2772     queue_handle_t head = double_ptr_extract1(ends);
2773 
2774     if (head != 0) {
2775       /* This queue is non-empty. */
2776 
2777       /* Try to protect our upcoming accesses to the head. */
2778       hazard(head);
2779       {
2780         double_ptr_t ends_check = atomic_read(&q[*queue_id].ends);
2781         if (ends != ends_check) {
2782           /* Failed. Someone else updated the queue. */
2783           unhazard(head);
2784           ends = ends_check;
2785           goto retry;
2786         }
2787       }
2788 
2789       queue_handle_t tail = double_ptr_extract2(ends);
2790 
2791       double_ptr_t new;
2792       if (head == tail) {
2793         /* There is only a single element in the queue. We will need to update
2794          * both head and tail.
2795          */
2796         new = double_ptr_make(0, 0);
2797       } else if (queue_handle_is_state_pptr(head)) {
2798         /* There are multiple elements in the queue; we can deal only with the
2799          * head.
2800          */
2801         new = double_ptr_make(queue_handle_next(head), tail);
2802       } else {
2803         /* The head of the queue is the end of a queue node. I.e. the only thing
2804          * remaining in this queue node is the chained pointer to the next queue
2805          * node.
2806          */
2807 
2808         /* Load the next queue node. */
2809         struct queue_node **n = queue_handle_to_node_pptr(head);
2810         struct queue_node *new_head = __atomic_load_n(n, __ATOMIC_SEQ_CST);
2811         new = double_ptr_make(queue_handle_from_node_ptr(new_head), tail);
2812 
2813         /* Try to replace the current head with the next node. */
2814         double_ptr_t old = atomic_cas_val(&q[*queue_id].ends, ends, new);
2815         /* Either way, now we'll need to retry, but if we succeeded we also need
2816          * to free the queue node we just removed.
2817          */
2818         unhazard(head);
2819         if (old == ends) {
2820           /* Succeeded. */
2821           reclaim(head);
2822         }
2823         ends = old;
2824         goto retry;
2825       }
2826 
2827       /* Try to remove the head. */
2828       {
2829         double_ptr_t old = atomic_cas_val(&q[*queue_id].ends, ends, new);
2830         if (old != ends) {
2831           /* Failed. Someone else either enqueued or dequeued. */
2832           unhazard(head);
2833           ends = old;
2834           goto retry;
2835         }
2836       }
2837 
2838       /* We now have either a pointer to a state or we've just removed an empty
2839        * queue node that was the last in the queue.
2840        */
2841 
2842       if (queue_handle_is_state_pptr(head)) {
2843         struct state **st = queue_handle_to_state_pptr(head);
2844         s = *st;
2845       }
2846 
2847       unhazard(head);
2848 
2849       if (head == tail || !queue_handle_is_state_pptr(head)) {
2850         reclaim(head);
2851       }
2852 
2853       if (s == NULL) {
2854         /* Move to the next queue to try. */
2855         *queue_id = (*queue_id + 1) % (sizeof(q) / sizeof(q[0]));
2856         continue;
2857       }
2858 
2859       size_t count =
2860           __atomic_sub_fetch(&q[*queue_id].count, 1, __ATOMIC_SEQ_CST);
2861 
2862       TRACE(TC_QUEUE,
2863             "dequeued state %p from queue %zu, queue length is now %zu",
2864             s, *queue_id, count);
2865 
2866       return s;
2867     }
2868 
2869     /* Move to the next queue to try. */
2870     *queue_id = (*queue_id + 1) % (sizeof(q) / sizeof(q[0]));
2871   }
2872 
2873   return s;
2874 }
2875 
2876 /******************************************************************************/
2877 
2878 /*******************************************************************************
2879  * Reference counted pointers                                                  *
2880  *                                                                             *
2881  * These are capable of encapsulating any generic pointer (void*). Note that   *
2882  * we rely on the existence of double-word atomics. On x86-64, you need to     *
2883  * use compiler flag '-mcx16' to get an efficient 128-bit cmpxchg.             *
2884  *                                                                             *
2885  * Of these functions, only the following are thread safe:                     *
2886  *                                                                             *
2887  *   * refcounted_ptr_get                                                      *
2888  *   * refcounted_ptr_peek                                                     *
2889  *   * refcounted_ptr_put                                                      *
2890  *   * refcounted_ptr_set                                                      *
2891  *                                                                             *
2892  * The caller is expected to coordinate with other threads to exclude them     *
2893  * operating on the relevant refcounted_ptr_t when using one of the other      *
2894  * functions:                                                                  *
2895  *                                                                             *
2896  *   * refcounted_ptr_shift                                                    *
2897  *                                                                             *
2898  ******************************************************************************/
2899 
2900 struct refcounted_ptr {
2901   void *ptr;
2902   size_t count;
2903 };
2904 
2905 #if __SIZEOF_POINTER__ <= 4
2906 typedef uint64_t refcounted_ptr_t;
2907 #elif __SIZEOF_POINTER__ <= 8
2908 typedef unsigned __int128 refcounted_ptr_t;
2909 #else
2910 #error "unexpected pointer size; what scalar type to use for refcounted_ptr_t?"
2911 #endif
2912 
2913 _Static_assert(sizeof(struct refcounted_ptr) <= sizeof(refcounted_ptr_t),
2914                "refcounted_ptr does not fit in a refcounted_ptr_t, which we "
2915                "need to operate on it atomically");
2916 
refcounted_ptr_set(refcounted_ptr_t * NONNULL p,void * ptr)2917 static void refcounted_ptr_set(refcounted_ptr_t *NONNULL p, void *ptr) {
2918 
2919 #ifndef NDEBUG
2920   /* Read the current state of the pointer. */
2921   refcounted_ptr_t old = atomic_read(p);
2922   struct refcounted_ptr p2;
2923   memcpy(&p2, &old, sizeof(old));
2924   assert(p2.count == 0 && "overwriting a pointer source while someone still "
2925                           "has a reference to this pointer");
2926 #endif
2927 
2928   /* Set the current source pointer with no outstanding references. */
2929   struct refcounted_ptr p3;
2930   p3.ptr = ptr;
2931   p3.count = 0;
2932 
2933   /* Commit the result. */
2934   refcounted_ptr_t new;
2935   memcpy(&new, &p3, sizeof(p3));
2936   atomic_write(p, new);
2937 }
2938 
refcounted_ptr_get(refcounted_ptr_t * NONNULL p)2939 static void *refcounted_ptr_get(refcounted_ptr_t *NONNULL p) {
2940 
2941   refcounted_ptr_t old, new;
2942   void *ret;
2943   bool r;
2944 
2945   do {
2946 
2947     /* Read the current state of the pointer. */
2948     old = atomic_read(p);
2949     struct refcounted_ptr p2;
2950     memcpy(&p2, &old, sizeof(old));
2951 
2952     /* Take a reference to it. */
2953     p2.count++;
2954     ret = p2.ptr;
2955 
2956     /* Try to commit our results. */
2957     memcpy(&new, &p2, sizeof(new));
2958     r = atomic_cas(p, old, new);
2959   } while (!r);
2960 
2961   return ret;
2962 }
2963 
refcounted_ptr_put(refcounted_ptr_t * NONNULL p,void * ptr)2964 static void refcounted_ptr_put(refcounted_ptr_t *NONNULL p,
2965                                void *ptr __attribute__((unused))) {
2966 
2967   refcounted_ptr_t old, new;
2968   bool r;
2969 
2970   do {
2971 
2972     /* Read the current state of the pointer. */
2973     old = atomic_read(p);
2974     struct refcounted_ptr p2;
2975     memcpy(&p2, &old, sizeof(old));
2976 
2977     /* Release our reference to it. */
2978     ASSERT(p2.ptr == ptr && "releasing a reference to a pointer after someone "
2979                             "has changed the pointer source");
2980     ASSERT(p2.count > 0 && "releasing a reference to a pointer when it had no "
2981                            "outstanding references");
2982     p2.count--;
2983 
2984     /* Try to commit our results. */
2985     memcpy(&new, &p2, sizeof(new));
2986     r = atomic_cas(p, old, new);
2987   } while (!r);
2988 }
2989 
refcounted_ptr_peek(refcounted_ptr_t * NONNULL p)2990 static void *refcounted_ptr_peek(refcounted_ptr_t *NONNULL p) {
2991 
2992   /* Read out the state of the pointer. This rather unpleasant expression is
2993    * designed to emit an atomic load at a smaller granularity than the entire
2994    * refcounted_ptr structure. Because we only need the pointer -- and not the
2995    * count -- we can afford to just atomically read the first word.
2996    */
2997   void *ptr = __atomic_load_n(
2998       (void **)((void *)p + __builtin_offsetof(struct refcounted_ptr, ptr)),
2999       __ATOMIC_SEQ_CST);
3000 
3001   return ptr;
3002 }
3003 
refcounted_ptr_shift(refcounted_ptr_t * NONNULL current,refcounted_ptr_t * NONNULL next)3004 static void refcounted_ptr_shift(refcounted_ptr_t *NONNULL current,
3005                                  refcounted_ptr_t *NONNULL next) {
3006 
3007   /* None of the operations in this function are performed atomically because we
3008    * assume the caller has synchronised with other threads via other means.
3009    */
3010 
3011   /* The pointer we're about to overwrite should not be referenced. */
3012   struct refcounted_ptr p __attribute__((unused));
3013   memcpy(&p, current, sizeof(*current));
3014   ASSERT(p.count == 0 && "overwriting a pointer that still has outstanding "
3015                          "references");
3016 
3017   /* Shift the next value into the current pointer. */
3018   *current = *next;
3019 
3020   /* Blank the value we just shifted over. */
3021   *next = 0;
3022 }
3023 
3024 /******************************************************************************/
3025 
3026 /*******************************************************************************
3027  * Thread rendezvous support                                                   *
3028  ******************************************************************************/
3029 
3030 static pthread_mutex_t rendezvous_lock; /* mutual exclusion mechanism for below. */
3031 static pthread_cond_t rendezvous_cond;  /* sleep mechanism for below. */
3032 static size_t running_count = 1;        /* how many threads are opted in to rendezvous? */
3033 static size_t rendezvous_pending = 1;   /* how many threads are opted in and not sleeping? */
3034 
rendezvous_init(void)3035 static void rendezvous_init(void) {
3036   int r = pthread_mutex_init(&rendezvous_lock, NULL);
3037   if (__builtin_expect(r != 0, 0)) {
3038     fprintf(stderr, "pthread_mutex_init failed: %s\n", strerror(r));
3039     exit(EXIT_FAILURE);
3040   }
3041 
3042   r = pthread_cond_init(&rendezvous_cond, NULL);
3043   if (__builtin_expect(r != 0, 0)) {
3044     fprintf(stderr, "pthread_cond_init failed: %s\n", strerror(r));
3045     exit(EXIT_FAILURE);
3046   }
3047 }
3048 
3049 /** call this at the start of a rendezvous point
3050  *
3051  * This is a low level function, not expected to be directly used outside of the
3052  * context of the rendezvous implementation.
3053  *
3054  * \return True if the caller was the last to arrive and henceforth dubbed the
3055  *   'leader'.
3056  */
rendezvous_arrive(void)3057 static bool rendezvous_arrive(void) {
3058   int r __attribute__((unused)) = pthread_mutex_lock(&rendezvous_lock);
3059   assert(r == 0);
3060 
3061   /* Take a token from the rendezvous down-counter. */
3062   assert(rendezvous_pending > 0);
3063   rendezvous_pending--;
3064 
3065   /* If we were the last to arrive then it was our arrival that dropped the
3066    * counter to zero.
3067    */
3068   return rendezvous_pending == 0;
3069 }
3070 
3071 /** call this at the end of a rendezvous point
3072  *
3073  * This is a low level function, not expected to be directly used outside of the
3074  * context of the rendezvous implementation.
3075  *
3076  * \param leader Whether the caller is the 'leader'. If you call this when you
3077  *   are the 'leader' it will unblock all 'followers' at the rendezvous point.
3078  * \param action Optional code for the leader to run.
3079  */
rendezvous_depart(bool leader,void (* action)(void))3080 static void rendezvous_depart(bool leader, void (*action)(void)) {
3081   int r __attribute((unused));
3082 
3083   if (leader) {
3084     if (action != NULL) {
3085       action();
3086     }
3087 
3088     /* Reset the counter for the next rendezvous. */
3089     assert(rendezvous_pending == 0 &&
3090            "a rendezvous point is being exited "
3091            "while some participating threads have yet to arrive");
3092     rendezvous_pending = running_count;
3093 
3094     /* Wake up the 'followers'. */
3095     r = pthread_cond_broadcast(&rendezvous_cond);
3096     assert(r == 0);
3097 
3098   } else {
3099 
3100     /* Wait on the 'leader' to wake us up. */
3101     r = pthread_cond_wait(&rendezvous_cond, &rendezvous_lock);
3102     assert(r == 0);
3103   }
3104 
3105   r = pthread_mutex_unlock(&rendezvous_lock);
3106   assert(r == 0);
3107 }
3108 
3109 /* Exposed friendly function for performing a rendezvous. */
rendezvous(void (* action)(void))3110 static void rendezvous(void (*action)(void)) {
3111   bool leader = rendezvous_arrive();
3112   if (leader) {
3113     TRACE(TC_SET, "arrived at rendezvous point as leader");
3114   }
3115   rendezvous_depart(leader, action);
3116 }
3117 
3118 /* Remove the caller from the pool of threads who participate in this
3119  * rendezvous.
3120  */
rendezvous_opt_out(void (* action)(void))3121 static void rendezvous_opt_out(void (*action)(void)) {
3122 
3123 retry:;
3124 
3125   /* "Arrive" at the rendezvous to decrement the count of outstanding threads.
3126    */
3127   bool leader = rendezvous_arrive();
3128 
3129   if (leader && running_count > 1) {
3130     /* We unfortunately opted out of this rendezvous while the remaining threads
3131      * were arriving at one and we were the last to arrive. Let's pretend we are
3132      * participating in the rendezvous and unblock them.
3133      */
3134     rendezvous_depart(true, action);
3135 
3136     /* Re-attempt opting-out. */
3137     goto retry;
3138   }
3139 
3140   /* Remove ourselves from the known threads. */
3141   assert(running_count > 0);
3142   running_count--;
3143 
3144   int r __attribute__((unused)) = pthread_mutex_unlock(&rendezvous_lock);
3145   assert(r == 0);
3146 }
3147 
3148 /******************************************************************************/
3149 
3150 /*******************************************************************************
3151  * 'Slots', an opaque wrapper around a state pointer                           *
3152  *                                                                             *
3153  * See usage of this in the state set below for its purpose.                   *
3154  ******************************************************************************/
3155 
3156 typedef uintptr_t slot_t;
3157 
slot_empty(void)3158 static __attribute__((const)) slot_t slot_empty(void) { return 0; }
3159 
slot_is_empty(slot_t s)3160 static __attribute__((const)) bool slot_is_empty(slot_t s) {
3161   return s == slot_empty();
3162 }
3163 
slot_tombstone(void)3164 static __attribute__((const)) slot_t slot_tombstone(void) {
3165   static const slot_t TOMBSTONE = ~(slot_t)0;
3166   return TOMBSTONE;
3167 }
3168 
slot_is_tombstone(slot_t s)3169 static __attribute__((const)) bool slot_is_tombstone(slot_t s) {
3170   return s == slot_tombstone();
3171 }
3172 
slot_to_state(slot_t s)3173 static struct state *slot_to_state(slot_t s) {
3174   ASSERT(!slot_is_empty(s));
3175   ASSERT(!slot_is_tombstone(s));
3176   return (struct state *)s;
3177 }
3178 
state_to_slot(const struct state * s)3179 static slot_t state_to_slot(const struct state *s) { return (slot_t)s; }
3180 
3181 /******************************************************************************/
3182 
3183 /*******************************************************************************
3184  * State set                                                                   *
3185  *                                                                             *
3186  * The following implementation provides a set for storing the seen states.    *
3187  * There is no support for removing elements, only thread-safe insertion of    *
3188  * elements.                                                                   *
3189  ******************************************************************************/
3190 
3191 enum {
3192   INITIAL_SET_SIZE_EXPONENT =
3193       sizeof(unsigned long long) * 8 - 1 -
3194       __builtin_clzll(SET_CAPACITY / sizeof(struct state *) /
3195                       sizeof(struct state))
3196 };
3197 
3198 struct set {
3199   slot_t *bucket;
3200   size_t size_exponent;
3201 };
3202 
3203 /* Some utility functions for dealing with exponents. */
3204 
set_size(const struct set * NONNULL set)3205 static size_t set_size(const struct set *NONNULL set) {
3206   return ((size_t)1) << set->size_exponent;
3207 }
3208 
set_index(const struct set * NONNULL set,size_t index)3209 static size_t set_index(const struct set *NONNULL set, size_t index) {
3210   return index & (set_size(set) - 1);
3211 }
3212 
3213 /* The states we have encountered. This collection will only ever grow while
3214  * checking the model. Note that we have a global reference-counted pointer and
3215  * a local bare pointer. See below for an explanation.
3216  */
3217 static refcounted_ptr_t global_seen;
3218 static _Thread_local struct set *local_seen;
3219 
3220 /* Number of elements in the global set (i.e. occupancy). */
3221 static size_t seen_count;
3222 
3223 /* The "next" 'global_seen' value. See below for an explanation. */
3224 static refcounted_ptr_t next_global_seen;
3225 
3226 /* Now the explanation I teased... When the set capacity exceeds a threshold
3227  * (see 'set_expand' related logic below) it is expanded and the reference
3228  * tracking within the 'refcounted_ptr_t's comes in to play. We need to allocate
3229  * a new seen set ('next_global_seen'), copy over all the elements (done in
3230  * 'set_migrate'), and then "shift" the new set to become the current set. The
3231  * role of the reference counts of both 'global_seen' and 'next_global_seen' in
3232  * all of this is to detect when the last thread releases its reference to the
3233  * old seen set and hence can deallocate it.
3234  */
3235 
3236 /* The next chunk to migrate from the old set to the new set. What exactly a
3237  * "chunk" is is covered in 'set_migrate'.
3238  */
3239 static size_t next_migration;
3240 
3241 /* A mechanism for synchronisation in 'set_expand'. */
3242 static pthread_mutex_t set_expand_mutex;
3243 
set_expand_lock(void)3244 static void set_expand_lock(void) {
3245   if (THREADS > 1) {
3246     int r __attribute__((unused)) = pthread_mutex_lock(&set_expand_mutex);
3247     ASSERT(r == 0);
3248   }
3249 }
3250 
set_expand_unlock(void)3251 static void set_expand_unlock(void) {
3252   if (THREADS > 1) {
3253     int r __attribute__((unused)) = pthread_mutex_unlock(&set_expand_mutex);
3254     ASSERT(r == 0);
3255   }
3256 }
3257 
set_init(void)3258 static void set_init(void) {
3259 
3260   if (THREADS > 1) {
3261     int r = pthread_mutex_init(&set_expand_mutex, NULL);
3262     if (__builtin_expect(r < 0, 0)) {
3263       fprintf(stderr, "pthread_mutex_init failed: %s\n", strerror(r));
3264       exit(EXIT_FAILURE);
3265     }
3266   }
3267 
3268   /* Allocate the set we'll store seen states in at some conservative initial
3269    * size.
3270    */
3271   struct set *set = xmalloc(sizeof(*set));
3272   set->size_exponent = INITIAL_SET_SIZE_EXPONENT;
3273   set->bucket = xcalloc(set_size(set), sizeof(set->bucket[0]));
3274 
3275   /* Stash this somewhere for threads to later retrieve it from. Note that we
3276    * initialize its reference count to zero as we (the setup logic) are not
3277    * using it beyond this function.
3278    */
3279   refcounted_ptr_set(&global_seen, set);
3280 }
3281 
set_thread_init(void)3282 static void set_thread_init(void) {
3283   /* Take a local reference to the global seen set. */
3284   local_seen = refcounted_ptr_get(&global_seen);
3285 }
3286 
set_update(void)3287 static void set_update(void) {
3288   /* Guard against the case where we've been called from exit_with() and we're
3289    * not finishing a migration, but just opting out of the rendezvous protocol.
3290    */
3291   if (refcounted_ptr_peek(&next_global_seen) != NULL) {
3292 
3293     /* Clean up the old set. Note that we are using a pointer we have already
3294      * given up our reference count to here, but we rely on the caller to ensure
3295      * this access is safe.
3296      */
3297     free(local_seen->bucket);
3298     free(local_seen);
3299 
3300     /* Reset migration state for the next time we expand the set. */
3301     next_migration = 0;
3302 
3303     /* Update the global pointer to the new set. We know all the above
3304      * migrations have completed and no one needs the old set.
3305      */
3306     refcounted_ptr_shift(&global_seen, &next_global_seen);
3307   }
3308 }
3309 
set_migrate(void)3310 static void set_migrate(void) {
3311 
3312   TRACE(TC_SET, "assisting in set migration...");
3313 
3314   /* Size of a migration chunk. Threads in this function grab a chunk at a time
3315    * to migrate.
3316    */
3317   enum { CHUNK_SIZE = 4096 / sizeof(local_seen->bucket[0]) /* slots */ };
3318 
3319   /* Take a pointer to the target set for the migration. */
3320   struct set *next = refcounted_ptr_get(&next_global_seen);
3321 
3322   for (;;) {
3323 
3324     size_t chunk = __atomic_fetch_add(&next_migration, 1, __ATOMIC_SEQ_CST);
3325     size_t start = chunk * CHUNK_SIZE;
3326     size_t end = start + CHUNK_SIZE;
3327 
3328     /* Bail out if we've finished migrating all of the set. */
3329     if (start >= set_size(local_seen)) {
3330       break;
3331     }
3332 
3333     /* TODO: The following algorithm assumes insertions can collide. That is, it
3334      * operates atomically on slots because another thread could be migrating
3335      * and also targeting the same slot. If we were to more closely wick to the
3336      * Maier design this would not be required.
3337      */
3338 
3339     for (size_t i = start; i < end; i++) {
3340 
3341       /* retrieve the slot element and mark it as migrated */
3342       slot_t s = __atomic_exchange_n(&local_seen->bucket[i], slot_tombstone(),
3343                                      __ATOMIC_SEQ_CST);
3344       ASSERT(!slot_is_tombstone(s) && "attempted double slot migration");
3345 
3346       /* If the current slot contained a state, rehash it and insert it into the
3347        * new set. Note we don't need to do any state comparisons because we know
3348        * everything in the old set is unique.
3349        */
3350       if (!slot_is_empty(s)) {
3351         size_t index = set_index(next, state_hash(slot_to_state(s)));
3352         /* insert and shuffle any colliding entries one along */
3353         for (size_t j = index; !slot_is_empty(s); j = set_index(next, j + 1)) {
3354           s = __atomic_exchange_n(&next->bucket[j], s, __ATOMIC_SEQ_CST);
3355         }
3356       }
3357     }
3358   }
3359 
3360   /* Release our reference to the old set now we're done with it. */
3361   refcounted_ptr_put(&global_seen, local_seen);
3362 
3363   /* Now we need to make sure all the threads get to this point before any one
3364    * thread leaves. The purpose of this is to guarantee we only ever have at
3365    * most two seen sets "in flight". Without this rendezvous, one thread could
3366    * race ahead, fill the new set, and then decide to expand again while some
3367    * are still working on the old set. It's possible to make such a scheme work
3368    * but the synchronisation requirements just seem too complicated.
3369    */
3370   rendezvous(set_update);
3371 
3372   /* We're now ready to resume model checking. Note that we already have a
3373    * (reference counted) pointer to the now-current global seen set, so we don't
3374    * need to take a fresh reference to it.
3375    */
3376   local_seen = next;
3377 }
3378 
set_expand(void)3379 static void set_expand(void) {
3380 
3381   /* Using double-checked locking, we look to see if someone else has already
3382    * started expanding the set. We do this by first checking before acquiring
3383    * the set mutex, and then later again checking after we've acquired the
3384    * mutex. The idea here is that, with multiple threads, you'll frequently find
3385    * someone else beat you to expansion and you can jump straight to helping
3386    * them with migration without having the expense of acquiring the set mutex.
3387    */
3388   if (THREADS > 1 && refcounted_ptr_peek(&next_global_seen) != NULL) {
3389     /* Someone else already expanded it. Join them in the migration effort. */
3390     TRACE(TC_SET, "attempted expansion failed because another thread got there "
3391                   "first");
3392     set_migrate();
3393     return;
3394   }
3395 
3396   set_expand_lock();
3397 
3398   /* Check again, as described above. */
3399   if (THREADS > 1 && refcounted_ptr_peek(&next_global_seen) != NULL) {
3400     set_expand_unlock();
3401     TRACE(TC_SET, "attempted expansion failed because another thread got there "
3402                   "first");
3403     set_migrate();
3404     return;
3405   }
3406 
3407   TRACE(TC_SET, "expanding set from %zu slots to %zu slots...",
3408         (((size_t)1) << local_seen->size_exponent) / sizeof(slot_t),
3409         (((size_t)1) << (local_seen->size_exponent + 1)) / sizeof(slot_t));
3410 
3411   /* Create a set of double the size. */
3412   struct set *set = xmalloc(sizeof(*set));
3413   set->size_exponent = local_seen->size_exponent + 1;
3414   set->bucket = xcalloc(set_size(set), sizeof(set->bucket[0]));
3415 
3416   /* Advertise this as the newly expanded global set. */
3417   refcounted_ptr_set(&next_global_seen, set);
3418 
3419   /* We now need to migrate all slots from the old set to the new one, but we
3420    * can do this multithreaded.
3421    */
3422   set_expand_unlock();
3423   set_migrate();
3424 }
3425 
set_insert(struct state * NONNULL s,size_t * NONNULL count)3426 static bool set_insert(struct state *NONNULL s, size_t *NONNULL count) {
3427 
3428 restart:;
3429 
3430   if (__atomic_load_n(&seen_count, __ATOMIC_SEQ_CST) * 100 /
3431           set_size(local_seen) >=
3432       SET_EXPAND_THRESHOLD)
3433     set_expand();
3434 
3435   size_t index = set_index(local_seen, state_hash(s));
3436 
3437   size_t attempts = 0;
3438   for (size_t i = index; attempts < set_size(local_seen);
3439        i = set_index(local_seen, i + 1)) {
3440 
3441     /* Guess that the current slot is empty and try to insert here. */
3442     slot_t c = slot_empty();
3443     if (__atomic_compare_exchange_n(&local_seen->bucket[i], &c,
3444                                     state_to_slot(s), false, __ATOMIC_SEQ_CST,
3445                                     __ATOMIC_SEQ_CST)) {
3446       /* Success */
3447       *count = __atomic_add_fetch(&seen_count, 1, __ATOMIC_SEQ_CST);
3448       TRACE(TC_SET, "added state %p, set size is now %zu", s, *count);
3449 
3450       /* The maximum possible size of the seen state set should be constrained
3451        * by the number of possible states based on how many bits we are using to
3452        * represent the state data.
3453        */
3454       if (STATE_SIZE_BITS < sizeof(size_t) * CHAR_BIT) {
3455         assert(*count <= ((size_t)1) << STATE_SIZE_BITS &&
3456                "seen set size "
3457                "exceeds total possible number of states");
3458       }
3459 
3460       /* Update statistics if `--trace memory_usage` is in effect. Note that we
3461        * do this here (when a state is being added to the seen set) rather than
3462        * when the state was originally allocated to ensure that the final
3463        * allocation figures do not include transient states that we allocated
3464        * and then discarded as duplicates.
3465        */
3466       size_t depth = 0;
3467 #if BOUND > 0
3468       depth = (size_t)state_bound_get(s);
3469 #endif
3470       register_allocation(depth);
3471 
3472       return true;
3473     }
3474 
3475     if (slot_is_tombstone(c)) {
3476       /* This slot has been migrated. We need to rendezvous with other migrating
3477        * threads and restart our insertion attempt on the newly expanded set.
3478        */
3479       set_migrate();
3480       goto restart;
3481     }
3482 
3483     /* If we find this already in the set, we're done. */
3484     if (state_eq(s, slot_to_state(c))) {
3485       TRACE(TC_SET, "skipped adding state %p that was already in set", s);
3486       return false;
3487     }
3488 
3489     attempts++;
3490   }
3491 
3492   /* If we reach here, the set is full. Expand it and retry the insertion. */
3493   set_expand();
3494   return set_insert(s, count);
3495 }
3496 
3497 /* Find an existing element in the set.
3498  *
3499  * Why would you ever want to do this? If you already have the state, why do you
3500  * want to find a copy of it? The answer is for liveness information. When
3501  * checking liveness properties, a duplicate of your current state that is
3502  * already contained in the state set might know some of the liveness properties
3503  * are satisfied that your current state considers unknown.
3504  */
3505 static __attribute__((unused)) const struct state *
set_find(const struct state * NONNULL s)3506 set_find(const struct state *NONNULL s) {
3507 
3508   assert(s != NULL);
3509 
3510   size_t index = set_index(local_seen, state_hash(s));
3511 
3512   size_t attempts = 0;
3513   for (size_t i = index; attempts < set_size(local_seen);
3514        i = set_index(local_seen, i + 1)) {
3515 
3516     slot_t slot = __atomic_load_n(&local_seen->bucket[i], __ATOMIC_SEQ_CST);
3517 
3518     /* This function is only expected to be called during the final liveness
3519      * scan, in which all threads participate. So we should never encounter a
3520      * set undergoing migration.
3521      */
3522     ASSERT(!slot_is_tombstone(slot) &&
3523            "tombstone encountered during final phase");
3524 
3525     if (slot_is_empty(slot)) {
3526       /* reached the end of the linear block in which this state could lie */
3527       break;
3528     }
3529 
3530     const struct state *n = slot_to_state(slot);
3531     ASSERT(n != NULL && "null pointer stored in state set");
3532 
3533     if (state_eq(s, n)) {
3534       /* found */
3535       return n;
3536     }
3537 
3538     attempts++;
3539   }
3540 
3541   /* not found */
3542   return NULL;
3543 }
3544 
3545 /******************************************************************************/
3546 
3547 static time_t START_TIME;
3548 
gettime()3549 static unsigned long long gettime() {
3550   return (unsigned long long)(time(NULL) - START_TIME);
3551 }
3552 
3553 #if LIVENESS_COUNT > 0
3554 /* Set one of the liveness bits (i.e. mark the matching property as 'hit') in a
3555  * state and all its predecessors.
3556  */
mark_liveness(struct state * NONNULL s,size_t index,bool shared)3557 static __attribute__((unused)) void mark_liveness(struct state *NONNULL s,
3558                                                   size_t index, bool shared) {
3559 
3560   assert(s != NULL);
3561   ASSERT(index < sizeof(s->liveness) * CHAR_BIT &&
3562          "out of range liveness write");
3563 
3564   size_t word_index = index / (sizeof(s->liveness[0]) * CHAR_BIT);
3565   size_t bit_index = index % (sizeof(s->liveness[0]) * CHAR_BIT);
3566 
3567   uintptr_t previous_value;
3568   uintptr_t *target = &s->liveness[word_index];
3569   uintptr_t mask = ((uintptr_t)1) << bit_index;
3570 
3571   if (shared) {
3572     /* If this state is shared (accessible by other threads) we need to operate
3573      * on its liveness data atomically.
3574      */
3575     previous_value = __atomic_fetch_or(target, mask, __ATOMIC_SEQ_CST);
3576   } else {
3577     /* Otherwise we can use a cheaper ordinary OR. */
3578     previous_value = *target;
3579     *target |= mask;
3580   }
3581 
3582   /* Cheat a little and cast away the constness of the previous state for which
3583    * we may need to update liveness data.
3584    */
3585   struct state *previous = state_drop_const(state_previous_get(s));
3586 
3587   /* If the given bit was already set, we know all the predecessors of this
3588    * state have already had their corresponding bit marked. However, if it was
3589    * not we now need to recurse to mark them. Note that we assume any
3590    * predecessors of this state are globally visible and hence shared. The
3591    * recursion depth here can be indeterminately deep, but we assume the
3592    * compiler can tail-optimise this call.
3593    */
3594   if ((previous_value & mask) != mask && previous != NULL) {
3595     mark_liveness(previous, index, true);
3596   }
3597 }
3598 
3599 /* number of unknown liveness properties for a given state */
unknown_liveness(const struct state * NONNULL s)3600 static unsigned long unknown_liveness(const struct state *NONNULL s) {
3601 
3602   assert(s != NULL);
3603 
3604   unsigned long unknown = 0;
3605 
3606   for (size_t i = 0; i < sizeof(s->liveness) / sizeof(s->liveness[0]); i++) {
3607 
3608     uintptr_t word = __atomic_load_n(&s->liveness[i], __ATOMIC_SEQ_CST);
3609 
3610     for (size_t j = 0; j < sizeof(s->liveness[0]) * CHAR_BIT; j++) {
3611       if (i * sizeof(s->liveness[0]) * CHAR_BIT + j >= LIVENESS_COUNT) {
3612         break;
3613       }
3614 
3615       if (!((word >> j) & 0x1)) {
3616         unknown++;
3617       }
3618     }
3619   }
3620 
3621   return unknown;
3622 }
3623 
3624 /** learn new liveness information about the state `s` from its successor
3625  *
3626  * Note that typically `state_previous_get(successor) != s` because `successor`
3627  * is actually one of the de-duped aliases of the original successor to `s`.
3628  *
3629  * \param s State to learn information about
3630  * \param successor Successor to s
3631  * \return Number of new liveness information facts learnt
3632  */
learn_liveness(struct state * NONNULL s,const struct state * NONNULL successor)3633 static unsigned long learn_liveness(struct state *NONNULL s,
3634                                     const struct state *NONNULL successor) {
3635 
3636   assert(s != NULL);
3637   assert(successor != NULL);
3638 
3639   unsigned long new_info = 0;
3640 
3641   for (size_t i = 0; i < sizeof(s->liveness) / sizeof(s->liveness[0]); i++) {
3642 
3643     uintptr_t word_src =
3644         __atomic_load_n(&successor->liveness[i], __ATOMIC_SEQ_CST);
3645     uintptr_t word_dst = __atomic_load_n(&s->liveness[i], __ATOMIC_SEQ_CST);
3646 
3647     for (size_t j = 0; j < sizeof(s->liveness[0]) * CHAR_BIT; j++) {
3648       if (i * sizeof(s->liveness[0]) * CHAR_BIT + j >= LIVENESS_COUNT) {
3649         break;
3650       }
3651 
3652       bool live_src = !!((word_src >> j) & 0x1);
3653       bool live_dst = !!((word_dst >> j) & 0x1);
3654 
3655       if (!live_dst && live_src) {
3656         mark_liveness(s, i * sizeof(s->liveness[0]) * CHAR_BIT + j, true);
3657         new_info++;
3658       }
3659     }
3660   }
3661 
3662   return new_info;
3663 }
3664 #endif
3665 
3666 /* Prototypes for generated functions. */
3667 static void init(void);
3668 static _Noreturn void explore(void);
3669 #if LIVENESS_COUNT > 0
3670 static void check_liveness_final(void);
3671 static unsigned long check_liveness_summarise(void);
3672 #endif
3673 
exit_with(int status)3674 static int exit_with(int status) {
3675 
3676   /* Opt out of the thread-wide rendezvous protocol. */
3677   refcounted_ptr_put(&global_seen, local_seen);
3678   rendezvous_opt_out(set_update);
3679   local_seen = NULL;
3680 
3681   /* Make fired rule count visible globally. */
3682   rules_fired[thread_id] = rules_fired_local;
3683 
3684   if (thread_id == 0) {
3685     /* We are the initial thread. Wait on the others before exiting. */
3686 #ifdef __clang__
3687 #pragma clang diagnostic push
3688 #pragma clang diagnostic ignored "-Wtautological-compare"
3689 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
3690 #elif defined(__GNUC__)
3691 #pragma GCC diagnostic push
3692 #pragma GCC diagnostic ignored "-Wtype-limits"
3693 #endif
3694     for (size_t i = 0; phase == RUN && i < sizeof(threads) / sizeof(threads[0]);
3695          i++) {
3696 #ifdef __clang__
3697 #pragma clang diagnostic pop
3698 #elif defined(__GNUC__)
3699 #pragma GCC diagnostic pop
3700 #endif
3701       void *ret;
3702       int r = pthread_join(threads[i], &ret);
3703       if (__builtin_expect(r != 0, 0)) {
3704         flockfile(stdout);
3705         fprintf(stderr, "failed to join thread: %s\n", strerror(r));
3706         funlockfile(stdout);
3707         continue;
3708       }
3709       status |= (int)(intptr_t)ret;
3710     }
3711 
3712     /* We're now single-threaded again. */
3713 
3714     /* Reacquire a pointer to the seen set. Note that this may not be the same
3715      * value as what we previously had in local_seen because the other threads
3716      * may have expanded and migrated the seen set in the meantime.
3717      */
3718     local_seen = refcounted_ptr_get(&global_seen);
3719 
3720     if (error_count == 0) {
3721       /* If we didn't see any other errors, print cover information. */
3722 #ifdef __clang__
3723 #pragma clang diagnostic push
3724 #pragma clang diagnostic ignored "-Wtautological-compare"
3725 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
3726 #elif defined(__GNUC__)
3727 #pragma GCC diagnostic push
3728 #pragma GCC diagnostic ignored "-Wtype-limits"
3729 #endif
3730       for (size_t i = 0; i < sizeof(covers) / sizeof(covers[0]); i++) {
3731 #ifdef __clang__
3732 #pragma clang diagnostic pop
3733 #elif defined(__GNUC__)
3734 #pragma GCC diagnostic pop
3735 #endif
3736         if (MACHINE_READABLE_OUTPUT) {
3737           put("<cover_result message=\"");
3738           xml_printf(COVER_MESSAGES[i]);
3739           put("\" count=\"");
3740           put_uint(covers[i]);
3741           put("\"/>\n");
3742         }
3743         if (covers[i] == 0) {
3744           if (!MACHINE_READABLE_OUTPUT) {
3745             put("\t");
3746             put(red());
3747             put(bold());
3748             put("cover \"");
3749             put(COVER_MESSAGES[i]);
3750             put("\" not hit");
3751             put(reset());
3752             put("\n");
3753           }
3754           error_count++;
3755           status = EXIT_FAILURE;
3756         } else if (!MACHINE_READABLE_OUTPUT) {
3757           put("\t");
3758           put(green());
3759           put(bold());
3760           put("cover \"");
3761           put(COVER_MESSAGES[i]);
3762           put("\" hit ");
3763           put_uint(covers[i]);
3764           put(" times");
3765           put(reset());
3766           put("\n");
3767         }
3768       }
3769     }
3770 
3771 #if LIVENESS_COUNT > 0
3772     /* If we have liveness properties to assess and have seen no previous
3773      * errors, do a final check of them now.
3774      */
3775     if (error_count == 0) {
3776       check_liveness_final();
3777 
3778       unsigned long failed = check_liveness_summarise();
3779       if (failed > 0) {
3780         error_count += failed;
3781         status = EXIT_FAILURE;
3782       }
3783     }
3784 #endif
3785 
3786     if (!MACHINE_READABLE_OUTPUT) {
3787       put("\n"
3788           "===================================================================="
3789           "======\n"
3790           "\n"
3791           "Status:\n"
3792           "\n");
3793       if (error_count == 0) {
3794         put("\t");
3795         put(green());
3796         put(bold());
3797         put("No error found.");
3798         put(reset());
3799         put("\n");
3800       } else {
3801         put("\t");
3802         put(red());
3803         put(bold());
3804         put_uint(error_count);
3805         put(" error(s) found.");
3806         put(reset());
3807         put("\n");
3808       }
3809       put("\n");
3810     }
3811 
3812     /* Calculate the total number of rules fired. */
3813     uintmax_t fire_count = 0;
3814     for (size_t i = 0; i < sizeof(rules_fired) / sizeof(rules_fired[0]); i++) {
3815       fire_count += rules_fired[i];
3816     }
3817 
3818     /* Paranoid check that we didn't miscount during set insertions/expansions.
3819      */
3820 #ifndef NDEBUG
3821     size_t count = 0;
3822     for (size_t i = 0; i < set_size(local_seen); i++) {
3823       if (!slot_is_empty(local_seen->bucket[i])) {
3824         count++;
3825       }
3826     }
3827 #endif
3828     assert(count == seen_count && "seen set count is inconsistent at exit");
3829 
3830     if (MACHINE_READABLE_OUTPUT) {
3831       put("<summary states=\"");
3832       put_uint(seen_count);
3833       put("\" rules_fired=\"");
3834       put_uint(fire_count);
3835       put("\" errors=\"");
3836       put_uint(error_count);
3837       put("\" duration_seconds=\"");
3838       put_uint(gettime());
3839       put("\"/>\n");
3840       put("</rumur_run>\n");
3841     } else {
3842       put("State Space Explored:\n"
3843           "\n"
3844           "\t");
3845       put_uint(seen_count);
3846       put(" states, ");
3847       put_uint(fire_count);
3848       put(" rules fired in ");
3849       put_uint(gettime());
3850       put("s.\n");
3851     }
3852 
3853     /* print memory usage statistics if `--trace memory_usage` is in effect */
3854     print_allocation_summary();
3855 
3856     exit(status);
3857   } else {
3858     pthread_exit((void *)(intptr_t)status);
3859   }
3860 }
3861 
thread_main(void * arg)3862 static void *thread_main(void *arg) {
3863 
3864   /* Initialize (thread-local) thread identifier. */
3865   thread_id = (size_t)(uintptr_t)arg;
3866 
3867   set_thread_init();
3868 
3869   explore();
3870 }
3871 
start_secondary_threads(void)3872 static void start_secondary_threads(void) {
3873 
3874   /* XXX: Kind of hacky. We've left the rendezvous down-counter at 1 until now
3875    * in case we triggered a rendezvous before starting the other threads (it
3876    * *can* happen). We bump it immediately -- i.e. before starting *any* of the
3877    * secondary threads -- because any of them could race us and trigger a
3878    * rendezvous before we exit the below loop and they need to know about the
3879    * thundering herd bearing down on them. It is safe to do this without holding
3880    * rendezvous_lock because we are still single threaded at this point.
3881    */
3882   assert(running_count == 1);
3883   running_count = THREADS;
3884   assert(rendezvous_pending == 1);
3885   rendezvous_pending = THREADS;
3886 
3887 #ifdef __clang__
3888 #pragma clang diagnostic push
3889 #pragma clang diagnostic ignored "-Wtautological-compare"
3890 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
3891 #elif defined(__GNUC__)
3892 #pragma GCC diagnostic push
3893 #pragma GCC diagnostic ignored "-Wtype-limits"
3894 #endif
3895   for (size_t i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) {
3896 #ifdef __clang__
3897 #pragma clang diagnostic pop
3898 #elif defined(__GNUC__)
3899 #pragma GCC diagnostic pop
3900 #endif
3901     int r = pthread_create(&threads[i], NULL, thread_main,
3902                            (void *)(uintptr_t)(i + 1));
3903     if (__builtin_expect(r != 0, 0)) {
3904       fprintf(stderr, "pthread_create failed: %s\n", strerror(r));
3905       exit(EXIT_FAILURE);
3906     }
3907   }
3908 }
3909 
main(void)3910 int main(void) {
3911 
3912   if (COLOR == AUTO)
3913     istty = isatty(STDOUT_FILENO) != 0;
3914 
3915   /* We don't need to read anything from stdin, so discard it. */
3916   (void)fclose(stdin);
3917 
3918   sandbox();
3919 
3920   if (MACHINE_READABLE_OUTPUT) {
3921     put("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
3922         "<rumur_run>\n"
3923         "<information state_size_bits=\"");
3924     put_uint(STATE_SIZE_BITS);
3925     put("\" state_size_bytes=\"");
3926     put_uint(STATE_SIZE_BYTES);
3927     put("\" hash_table_slots=\"");
3928     put_uint(((size_t)1) << INITIAL_SET_SIZE_EXPONENT);
3929     put("\"/>\n");
3930   } else {
3931     put("Memory usage:\n"
3932         "\n"
3933         "\t* The size of each state is ");
3934     put_uint(STATE_SIZE_BITS);
3935     put(" bits (rounded up to ");
3936     put_uint(STATE_SIZE_BYTES);
3937     put(" bytes).\n"
3938         "\t* The size of the hash table is ");
3939     put_uint(((size_t)1) << INITIAL_SET_SIZE_EXPONENT);
3940     put(" slots.\n"
3941         "\n");
3942   }
3943 
3944 #ifndef NDEBUG
3945   state_print_field_offsets();
3946 #endif
3947 
3948   START_TIME = time(NULL);
3949 
3950   rendezvous_init();
3951 
3952   set_init();
3953 
3954   set_thread_init();
3955 
3956   init();
3957 
3958   if (!MACHINE_READABLE_OUTPUT) {
3959     put("Progress Report:\n\n");
3960   }
3961 
3962   explore();
3963 }
3964