1 #ifndef __OPTIMIZE__
2 #ifdef __clang__
3 #ifdef __x86_64__
4 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3 -mcx16.
5 #else
6 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3.
7 #endif
8 #else
9 #ifdef __x86_64__
10 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3 -fwhole-program -mcx16.
11 #else
12 #warning you are compiling without optimizations enabled. I would suggest -march=native -O3 -fwhole-program.
13 #endif
14 #endif
15 #endif
16
17 #define value_to_string(v) ((value_t)(v))
18 #define raw_value_to_string(v) ((raw_value_t)(v))
19
20 /* A more powerful assert that treats the assertion as an assumption when
21 * assertions are disabled.
22 */
23 #ifndef NDEBUG
24 #define ASSERT(expr) assert(expr)
25 #elif defined(__clang__)
26 #define ASSERT(expr) __builtin_assume(expr)
27 #else
28 /* GCC doesn't have __builtin_assume, so we need something else. */
29 #define ASSERT(expr) \
30 do { \
31 /* The following is an idiom for teaching the compiler an assumption. */ \
32 if (!(expr)) { \
33 __builtin_unreachable(); \
34 } \
35 } while (0)
36 #endif
37
38 #define BITS_TO_BYTES(size) ((size) / 8 + ((size) % 8 == 0 ? 0 : 1))
39 #define BITS_FOR(value) \
40 ((value) == 0 ? 0 : (sizeof(unsigned long long) * 8 - __builtin_clzll(value)))
41
42 /* The size of the compressed state data in bytes. */
43 enum { STATE_SIZE_BYTES = BITS_TO_BYTES(STATE_SIZE_BITS) };
44
45 /* the size of auxliary members of the state struct */
46 enum { BOUND_BITS = BITS_FOR(BOUND) };
47 #if COUNTEREXAMPLE_TRACE != CEX_OFF || LIVENESS_COUNT > 0
48 #if POINTER_BITS != 0
49 enum { PREVIOUS_BITS = POINTER_BITS };
50 #elif defined(__linux__) && defined(__x86_64__) && !defined(__ILP32__)
51 /* assume 5-level paging, and hence the top 2 bytes of any user pointer are
52 * always 0 and not required.
53 * https://www.kernel.org/doc/Documentation/x86/x86_64/mm.txt
54 */
55 enum { PREVIOUS_BITS = 56 };
56 #else
57 enum { PREVIOUS_BITS = sizeof(void *) * 8 };
58 #endif
59 #else
60 enum { PREVIOUS_BITS = 0 };
61 #endif
62 #if COUNTEREXAMPLE_TRACE != CEX_OFF
63 enum { RULE_TAKEN_BITS = BITS_FOR(RULE_TAKEN_LIMIT) };
64 #else
65 enum { RULE_TAKEN_BITS = 0 };
66 #endif
67 enum {
68 STATE_OTHER_BYTES =
69 BITS_TO_BYTES(BOUND_BITS + PREVIOUS_BITS + RULE_TAKEN_BITS +
70 (USE_SCALARSET_SCHEDULES ? SCHEDULE_BITS : 0))
71 };
72
73 /* Implement _Thread_local for GCC <4.9, which is missing this. */
74 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
75 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
76 #define _Thread_local __thread
77 #endif
78 #endif
79
80 #ifdef __clang__
81 #define NONNULL _Nonnull
82 #else
83 #define NONNULL /* nothing; other compilers don't have _Nonnull */
84 #endif
85
86 /* A word about atomics... There are two different atomic operation mechanisms
87 * used in this code and it may not immediately be obvious why one was not
88 * sufficient. The two are:
89 *
90 * 1. GCC __atomic built-ins: Used for variables that are sometimes accessed
91 * with atomic semantics and sometimes as regular memory operations. The
92 * C11 atomics cannot give us this and the __atomic built-ins are
93 * implemented by the major compilers.
94 * 2. GCC __sync built-ins: used for 128-bit atomic accesses on x86-64. It
95 * seems the __atomic built-ins do not result in a CMPXCHG instruction, but
96 * rather in a less efficient library call. See
97 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878.
98 *
99 * Though this is intended to be a C11 program, we avoid the C11 atomics to be
100 * compatible with GCC <4.9.
101 */
102
103 /* Identifier of the current thread. This counts up from 0 and thus is suitable
104 * to use for, e.g., indexing into arrays. The initial thread has ID 0.
105 */
106 static _Thread_local size_t thread_id;
107
108 /* The threads themselves. Note that we have no element for the initial thread,
109 * so *your* thread is 'threads[thread_id - 1]'.
110 */
111 static pthread_t threads[THREADS - 1];
112
113 /* What we are currently doing. Either "warming up" (running single threaded
114 * building up queue occupancy) or "free running" (running multithreaded).
115 */
116 static enum { WARMUP, RUN } phase = WARMUP;
117
118 /* Number of errors we've noted so far. If a thread sees this hit or exceed
119 * MAX_ERRORS, they should attempt to exit gracefully as soon as possible.
120 */
121 static unsigned long error_count;
122
123 /* Number of rules that have been processed. There are two representations of
124 * this: a thread-local count of how many rules we have fired thus far and a
125 * global array of *final* counts of fired rules per-thread that is updated and
126 * used as threads are exiting. The purpose of this duplication is to let the
127 * compiler layout the thread-local variable in a cache-friendly way and use
128 * this during checking, rather than having all threads contending on the global
129 * array whose entries are likely all within the same cache line.
130 */
131 static _Thread_local uintmax_t rules_fired_local;
132 static uintmax_t rules_fired[THREADS];
133
134 /* Checkpoint to restore to after reporting an error. This is only used if we
135 * are tolerating more than one error before exiting.
136 */
137 static _Thread_local sigjmp_buf checkpoint;
138
139 _Static_assert(MAX_ERRORS > 0, "illegal MAX_ERRORS value");
140
141 /* Whether we need to save and restore checkpoints. This is determined by
142 * whether we ever need to perform the action "discard the current state and
143 * skip to checking the next." This scenario can occur for two reasons:
144 * 1. We are running multithreaded, have just found an error and have not yet
145 * hit MAX_ERRORS. In this case we want to longjmp back to resume checking.
146 * 2. We failed an assume statement. In this case we want to mark the current
147 * state as invalid and resume checking with the next state.
148 * In either scenario the actual longjmp performed is the same, but by knowing
149 * statically whether either can occur we can avoid calling setjmp if both are
150 * impossible.
151 */
152 enum { JMP_BUF_NEEDED = MAX_ERRORS > 1 || ASSUME_STATEMENTS_COUNT > 0 };
153
154 /*******************************************************************************
155 * Sandbox support. *
156 * *
157 * Because we're running generated code, it seems wise to use OS mechanisms to *
158 * reduce our privileges, where possible. *
159 ******************************************************************************/
160
sandbox(void)161 static void sandbox(void) {
162
163 if (!SANDBOX_ENABLED) {
164 return;
165 }
166
167 #ifdef __APPLE__
168 {
169 char *err;
170 #pragma clang diagnostic push
171 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
172 int r = sandbox_init(kSBXProfilePureComputation, SANDBOX_NAMED, &err);
173 #pragma clang diagnostic pop
174
175 if (__builtin_expect(r != 0, 0)) {
176 fprintf(stderr, "sandbox_init failed: %s\n", err);
177 free(err);
178 exit(EXIT_FAILURE);
179 }
180
181 return;
182 }
183 #endif
184
185 #ifdef __FreeBSD__
186 {
187 if (__builtin_expect(cap_enter() != 0, 0)) {
188 perror("cap_enter");
189 exit(EXIT_FAILURE);
190 }
191 return;
192 }
193 #endif
194
195 #if defined(__linux__)
196 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
197 {
198 /* Disable the addition of new privileges via execve and friends. */
199 int r = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
200 if (__builtin_expect(r != 0, 0)) {
201 perror("prctl(PR_SET_NO_NEW_PRIVS) failed");
202 exit(EXIT_FAILURE);
203 }
204
205 /* A BPF program that traps on any syscall we want to disallow. */
206 static struct sock_filter filter[] = {
207
208 /* Load syscall number. */
209 BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
210
211 /* Enable exiting. */
212 #ifdef __NR_exit_group
213 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit_group, 0, 1),
214 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
215 #endif
216
217 /* Enable syscalls used by printf. */
218 #ifdef __NR_fstat
219 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat, 0, 1),
220 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
221 #endif
222 #ifdef __NR_fstat64
223 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_fstat64, 0, 1),
224 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
225 #endif
226 #ifdef __NR_write
227 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_write, 0, 1),
228 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
229 #endif
230
231 /* Enable syscalls used by malloc. */
232 #ifdef __NR_brk
233 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_brk, 0, 1),
234 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
235 #endif
236 #ifdef __NR_mmap
237 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap, 0, 1),
238 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
239 #endif
240 #ifdef __NR_mmap2
241 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mmap2, 0, 1),
242 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
243 #endif
244 #ifdef __NR_munmap
245 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap, 0, 1),
246 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
247 #endif
248
249 /* If we're running multithreaded, enable syscalls used by pthreads. */
250 #ifdef __NR_clone
251 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clone, 0, 1),
252 BPF_STMT(BPF_RET | BPF_K,
253 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
254 #endif
255 #ifdef __NR_close
256 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_close, 0, 1),
257 BPF_STMT(BPF_RET | BPF_K,
258 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
259 #endif
260 #ifdef __NR_exit
261 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_exit, 0, 1),
262 BPF_STMT(BPF_RET | BPF_K,
263 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
264 #endif
265 #ifdef __NR_futex
266 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex, 0, 1),
267 BPF_STMT(BPF_RET | BPF_K,
268 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
269 #endif
270 #ifdef __NR_get_robust_list
271 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_get_robust_list, 0, 1),
272 BPF_STMT(BPF_RET | BPF_K,
273 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
274 #endif
275 #ifdef __NR_madvise
276 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_madvise, 0, 1),
277 BPF_STMT(BPF_RET | BPF_K,
278 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
279 #endif
280 #ifdef __NR_mprotect
281 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_mprotect, 0, 1),
282 BPF_STMT(BPF_RET | BPF_K,
283 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
284 #endif
285 #ifdef __NR_open
286 /* XXX: it would be nice to avoid open() but pthreads seems to open
287 * libgcc.
288 */
289 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_open, 0, 1),
290 BPF_STMT(BPF_RET | BPF_K,
291 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
292 #endif
293 #ifdef __NR_read
294 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_read, 0, 1),
295 BPF_STMT(BPF_RET | BPF_K,
296 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
297 #endif
298 #ifdef __NR_set_robust_list
299 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_robust_list, 0, 1),
300 BPF_STMT(BPF_RET | BPF_K,
301 THREADS > 1 ? SECCOMP_RET_ALLOW : SECCOMP_RET_TRAP),
302 #endif
303
304 /* on platforms without vDSO support, time() makes an actual syscall, so
305 * we need to allow them
306 */
307 #ifdef __NR_clock_gettime
308 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_gettime, 0, 1),
309 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
310 #endif
311 #ifdef __NR_clock_gettime64
312 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_gettime64, 0, 1),
313 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
314 #endif
315 #ifdef __NR_gettimeofday
316 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_gettimeofday, 0, 1),
317 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
318 #endif
319 #ifdef __NR_time
320 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_time, 0, 1),
321 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
322 #endif
323
324 /* Deny everything else. On a disallowed syscall, we trap instead of
325 * killing to allow the user to debug the failure. If you are debugging
326 * seccomp denials, strace the checker and find the number of the denied
327 * syscall in the first si_value parameter reported in the terminating
328 * SIG_SYS.
329 */
330 BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
331 };
332
333 static const struct sock_fprog filter_program = {
334 .len = sizeof(filter) / sizeof(filter[0]),
335 .filter = filter,
336 };
337
338 /* Apply the above filter to ourselves. */
339 r = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &filter_program, 0, 0);
340 if (__builtin_expect(r != 0, 0)) {
341 perror("prctl(PR_SET_SECCOMP) failed");
342 exit(EXIT_FAILURE);
343 }
344
345 return;
346 }
347 #endif
348 #endif
349
350 #ifdef __OpenBSD__
351 {
352 if (__builtin_expect(pledge("stdio", "") != 0, 0)) {
353 perror("pledge");
354 exit(EXIT_FAILURE);
355 }
356 return;
357 }
358 #endif
359
360 /* No sandbox available. */
361 fprintf(stderr, "no sandboxing facilities available\n");
362 exit(EXIT_FAILURE);
363 }
364
365 /******************************************************************************/
366
367 /* ANSI colour code support */
368
369 static bool istty;
370
green()371 static const char *green() {
372 if (COLOR == ON || (COLOR == AUTO && istty))
373 return "\033[32m";
374 return "";
375 }
376
red()377 static const char *red() {
378 if (COLOR == ON || (COLOR == AUTO && istty))
379 return "\033[31m";
380 return "";
381 }
382
yellow()383 static const char *yellow() {
384 if (COLOR == ON || (COLOR == AUTO && istty))
385 return "\033[33m";
386 return "";
387 }
388
bold()389 static const char *bold() {
390 if (COLOR == ON || (COLOR == AUTO && istty))
391 return "\033[1m";
392 return "";
393 }
394
reset()395 static const char *reset() {
396 if (COLOR == ON || (COLOR == AUTO && istty))
397 return "\033[0m";
398 return "";
399 }
400
401 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
402
403 #define UINT128_MAX \
404 ((((unsigned __int128)UINT64_MAX) << 64) | ((unsigned __int128)UINT64_MAX))
405
406 #define INT128_MAX ((((__int128)INT64_MAX) << 64) | ((__int128)UINT64_MAX))
407 #define INT128_MIN (-INT128_MAX - 1)
408
409 struct string_buffer {
410 char data[41];
411 };
412
value_u128_to_string(unsigned __int128 v)413 static struct string_buffer value_u128_to_string(unsigned __int128 v) {
414
415 struct string_buffer buffer;
416
417 if (v == 0) {
418 buffer.data[0] = '0';
419 buffer.data[1] = '\0';
420 return buffer;
421 }
422
423 size_t i = sizeof(buffer.data);
424 while (v != 0) {
425 i--;
426 buffer.data[i] = '0' + v % 10;
427 v /= 10;
428 }
429
430 memmove(buffer.data, &buffer.data[i], sizeof(buffer) - i);
431 buffer.data[sizeof(buffer) - i] = '\0';
432
433 return buffer;
434 }
435 static __attribute__((unused)) struct string_buffer
value_128_to_string(__int128 v)436 value_128_to_string(__int128 v) {
437
438 if (v == INT128_MIN) {
439 struct string_buffer buffer;
440 strcpy(buffer.data, "-170141183460469231731687303715884105728");
441 return buffer;
442 }
443
444 bool negative = v < 0;
445 if (negative) {
446 v = -v;
447 }
448
449 struct string_buffer buffer = value_u128_to_string(v);
450
451 if (negative) {
452 memmove(&buffer.data[1], buffer.data, strlen(buffer.data) + 1);
453 buffer.data[0] = '-';
454 }
455
456 return buffer;
457 }
458 #endif
459
460 /* Is value_t a signed type? We need this as a function because value_t is a
461 * typedef that can be tweaked by the user.
462 */
value_is_signed(void)463 static __attribute__((const)) bool value_is_signed(void) {
464 #ifdef __clang__
465 #pragma clang diagnostic push
466 #pragma clang diagnostic ignored "-Wtautological-compare"
467 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
468 #elif defined(__GNUC__)
469 #pragma GCC diagnostic push
470 #pragma GCC diagnostic ignored "-Wtype-limits"
471 #endif
472 return (value_t)-1 < 0;
473 #ifdef __clang__
474 #pragma clang diagnostic pop
475 #elif defined(__GNUC__)
476 #pragma GCC diagnostic pop
477 #endif
478 }
479
480 /*******************************************************************************
481 * MurmurHash by Austin Appleby *
482 * *
483 * More information on this at https://github.com/aappleby/smhasher/ *
484 ******************************************************************************/
485
MurmurHash64A(const void * NONNULL key,size_t len)486 static uint64_t MurmurHash64A(const void *NONNULL key, size_t len) {
487
488 static const uint64_t seed = 0;
489
490 static const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
491 static const unsigned r = 47;
492
493 uint64_t h = seed ^ (len * m);
494
495 const unsigned char *data = key;
496 const unsigned char *end = data + len / sizeof(uint64_t) * sizeof(uint64_t);
497
498 while (data != end) {
499
500 uint64_t k;
501 memcpy(&k, data, sizeof(k));
502 data += sizeof(k);
503
504 k *= m;
505 k ^= k >> r;
506 k *= m;
507
508 h ^= k;
509 h *= m;
510 }
511
512 const unsigned char *data2 = data;
513
514 switch (len & 7) {
515 case 7: h ^= (uint64_t)data2[6] << 48; /* fall through */
516 case 6: h ^= (uint64_t)data2[5] << 40; /* fall through */
517 case 5: h ^= (uint64_t)data2[4] << 32; /* fall through */
518 case 4: h ^= (uint64_t)data2[3] << 24; /* fall through */
519 case 3: h ^= (uint64_t)data2[2] << 16; /* fall through */
520 case 2: h ^= (uint64_t)data2[1] << 8; /* fall through */
521 case 1: h ^= (uint64_t)data2[0];
522 h *= m;
523 }
524
525 h ^= h >> r;
526 h *= m;
527 h ^= h >> r;
528
529 return h;
530 }
531
532 /******************************************************************************/
533
534 /* Signal an out-of-memory condition and terminate abruptly. */
oom(void)535 static _Noreturn void oom(void) {
536 fputs("out of memory", stderr);
537 exit(EXIT_FAILURE);
538 }
539
xmalloc(size_t size)540 static void *xmalloc(size_t size) {
541 void *p = malloc(size);
542 if (__builtin_expect(p == NULL, 0)) {
543 oom();
544 }
545 return p;
546 }
547
xcalloc(size_t count,size_t size)548 static void *xcalloc(size_t count, size_t size) {
549 void *p = calloc(count, size);
550 if (__builtin_expect(p == NULL, 0)) {
551 oom();
552 }
553 return p;
554 }
555
put(const char * NONNULL s)556 static void put(const char *NONNULL s) {
557 for (; *s != '\0'; ++s) {
558 putchar_unlocked(*s);
559 }
560 }
561
put_int(intmax_t u)562 static void put_int(intmax_t u) {
563 char buffer[128] = {0};
564 snprintf(buffer, sizeof(buffer), "%" PRIdMAX, u);
565 put(buffer);
566 }
567
put_uint(uintmax_t u)568 static void put_uint(uintmax_t u) {
569 char buffer[128] = {0};
570 snprintf(buffer, sizeof(buffer), "%" PRIuMAX, u);
571 put(buffer);
572 }
573
put_val(value_t v)574 static __attribute__((unused)) void put_val(value_t v) {
575 if (value_is_signed()) {
576 put_int((intmax_t)v);
577 } else {
578 put_uint((uintmax_t)v);
579 }
580 }
581
xml_printf(const char * NONNULL s)582 static void xml_printf(const char *NONNULL s) {
583 while (*s != '\0') {
584 switch (*s) {
585 case '"': put("""); break;
586 case '<': put("<"); break;
587 case '>': put(">"); break;
588 case '&': put("&"); break;
589 default: putchar_unlocked(*s); break;
590 }
591 s++;
592 }
593 }
594
595 /* Support for tracing specific operations. This can be enabled during checker
596 * generation with '--trace ...' and is useful for debugging Rumur itself.
597 */
trace(const char * NONNULL fmt,...)598 static __attribute__((format(printf, 1, 2))) void trace(const char *NONNULL fmt,
599 ...) {
600
601 va_list ap;
602 va_start(ap, fmt);
603
604 flockfile(stdout);
605
606 (void)fprintf(stderr, "%sTRACE%s:", yellow(), reset());
607 (void)vfprintf(stderr, fmt, ap);
608 (void)fprintf(stderr, "\n");
609
610 funlockfile(stdout);
611 va_end(ap);
612 }
613
614 /* Wrap up trace() as a macro. It looks as if the following could just be
615 * incorporated into trace(). However, present compilers seem unwilling to
616 * inline varargs functions or do interprocedural analysis across a call to one.
617 * As a result, the compiler does not notice when tracing is disabled and a call
618 * to trace() would be a no-op that can be elided. By making the call a macro we
619 * make the category comparison visible to the compiler's optimising passes.
620 */
621 #define TRACE(category, args...) \
622 do { \
623 if ((category) & TRACES_ENABLED) { \
624 trace(args); \
625 } \
626 } while (0)
627
628 /*******************************************************************************
629 * Arithmetic wrappers *
630 * *
631 * For compilers that support them, we call the overflow built-ins to check *
632 * undefined operations during arithmetic. For others, we just emit the bare *
633 * operation. *
634 ******************************************************************************/
635
636 #if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 5)
637
638 #define ADD(a, b, c) __builtin_add_overflow((a), (b), (c))
639 #define MUL(a, b, c) __builtin_mul_overflow((a), (b), (c))
640 #define SUB(a, b, c) __builtin_sub_overflow((a), (b), (c))
641
642 #else
643
644 #define ADD(a, b, c) ({ *(c) = (a) + (b); false; })
645 #define MUL(a, b, c) ({ *(c) = (a) * (b); false; })
646 #define SUB(a, b, c) ({ *(c) = (a) - (b); false; })
647
648 #endif
649
650 /******************************************************************************/
651
652 /* The state of the current model. */
653 struct state {
654
655 #if LIVENESS_COUNT > 0
656 uintptr_t liveness[LIVENESS_COUNT / sizeof(uintptr_t) / CHAR_BIT +
657 (LIVENESS_COUNT % sizeof(uintptr_t) == 0 &&
658 LIVENESS_COUNT / sizeof(uintptr_t) % CHAR_BIT == 0
659 ? 0
660 : 1)];
661 #endif
662
663 uint8_t data[STATE_SIZE_BYTES];
664
665 #if PACK_STATE
666 /* the following effective fields are packed into here:
667 *
668 * * uint64_t bound;
669 * * const struct state *previous;
670 * * uint64_t rule_taken;
671 * * uint8_t schedules[BITS_TO_BYTES(SCHEDULE_BITS)];
672 *
673 * They are bit-packed, so may take up less space. E.g. if the maximum value
674 * of `bound` is known to be 5, it will be stored in 3 bits instead of 64.
675 */
676 uint8_t other[STATE_OTHER_BYTES];
677 #else
678 uint64_t bound;
679 const struct state *previous;
680 uint64_t rule_taken;
681 uint8_t schedules[USE_SCALARSET_SCHEDULES ? BITS_TO_BYTES(SCHEDULE_BITS) : 0];
682 #endif
683 };
684
685 struct handle {
686 uint8_t *base;
687 size_t offset;
688 size_t width;
689 };
690
handle_align(struct handle h)691 static struct handle handle_align(struct handle h) {
692
693 ASSERT(h.offset < CHAR_BIT && "handle has an offset outside the base byte");
694
695 size_t width = h.width + h.offset;
696 if (width % 8 != 0) {
697 width += 8 - width % 8;
698 }
699
700 return (struct handle){
701 .base = h.base,
702 .offset = 0,
703 .width = width,
704 };
705 }
706
is_big_endian(void)707 static bool is_big_endian(void) {
708
709 union {
710 uint32_t x;
711 uint8_t y[sizeof(uint32_t)];
712 } z;
713
714 z.y[0] = 1;
715 z.y[1] = 2;
716 z.y[2] = 3;
717 z.y[3] = 4;
718
719 return z.x == 0x01020304;
720 }
721
copy_out64(const uint8_t * p,size_t extent)722 static uint64_t copy_out64(const uint8_t *p, size_t extent) {
723
724 ASSERT(extent <= sizeof(uint64_t));
725
726 uint64_t x = 0;
727 memcpy(&x, p, extent);
728
729 if (is_big_endian()) {
730 x = __builtin_bswap64(x);
731 }
732
733 return x;
734 }
735
736 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
byte_swap128(unsigned __int128 x)737 static unsigned __int128 byte_swap128(unsigned __int128 x) {
738
739 union {
740 unsigned __int128 a;
741 uint64_t b[2];
742 } in, out;
743
744 in.a = x;
745 out.b[0] = __builtin_bswap64(in.b[1]);
746 out.b[1] = __builtin_bswap64(in.b[0]);
747
748 return out.a;
749 }
750
copy_out128(const uint8_t * p,size_t extent)751 static unsigned __int128 copy_out128(const uint8_t *p, size_t extent) {
752
753 ASSERT(extent <= sizeof(unsigned __int128));
754
755 unsigned __int128 x = 0;
756 memcpy(&x, p, extent);
757
758 if (is_big_endian()) {
759 x = byte_swap128(x);
760 }
761
762 return x;
763 }
764 #endif
765
766 /* If you are in the Rumur repository modifying the following function, remember
767 * to also update ../../misc/read-raw.smt2.
768 */
read_raw(struct handle h)769 static __attribute__((pure)) uint64_t read_raw(struct handle h) {
770
771 /* a uint64_t is the maximum value we support reading */
772 ASSERT(h.width <= 64 && "read of too wide value");
773
774 if (h.width == 0) {
775 return 0;
776 }
777
778 /* Generate a handle that is offset- and width-aligned on byte boundaries.
779 * Essentially, we widen the handle to align it. The motivation for this is
780 * that we can only do byte-granularity reads, so we need to "over-read" if we
781 * have an unaligned handle.
782 */
783 struct handle aligned = handle_align(h);
784 ASSERT(aligned.offset == 0);
785
786 /* The code below attempts to provide four alternatives for reading out the
787 * bits corresponding to a value of simple type referenced by a handle, and to
788 * give the compiler enough hints to steer it towards picking one of these and
789 * removing the other three as dead code:
790 *
791 * 1. Read into a single 64-bit variable. Enabled when the maximum width for
792 * an unaligned handle spans 0 - 8 bytes.
793 * 2. Read into two 64-bit variables and then combine the result using
794 * shifts and ORs. Enabled when the maximum width for an unaligned handle
795 * spans 9 - 16 bytes and the compiler does not provide the `__int128`
796 * type.
797 * 3. Read into a single 128-bit variable. Enabled when the compiler does
798 * provide the `__int128` type and the maximum width for an unaligned
799 * handle spans 9 - 16 bytes.
800 * 4. Read into two 128-bit chunks, and then combine the result using shifts
801 * and ORs. Enabled when the compiler provides the `__int128` type and
802 * the maximum width for an unaligned handle spans 17 - 32 bytes.
803 */
804
805 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
806
807 /* If a byte-unaligned value_t cannot be fully read into a single uint64_t
808 * using byte-aligned reads...
809 */
810 if (aligned.width > (sizeof(uint64_t) - 1) * 8) {
811
812 /* Read the low double-word of this (possibly quad-word-sized) value. */
813 unsigned __int128 low = 0;
814 size_t low_size = aligned.width / 8;
815 /* optimisation hint: */
816 ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
817 if (low_size > sizeof(low)) {
818 low_size = sizeof(low);
819 }
820 {
821 const uint8_t *src = aligned.base;
822 low = copy_out128(src, low_size);
823 }
824
825 low >>= h.offset;
826
827 size_t high_size = aligned.width / 8 - low_size;
828
829 /* If the value could not be read into a single double-word... */
830 ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
831 if (high_size != 0) {
832 unsigned __int128 high = 0;
833 const uint8_t *src = aligned.base + sizeof(low);
834 high = copy_out128(src, high_size);
835
836 high <<= sizeof(low) * 8 - h.offset;
837
838 /* Combine the two halves into a single double-word. */
839 low |= high;
840 }
841
842 if (h.width < sizeof(low) * 8) {
843 unsigned __int128 mask = (((unsigned __int128)1) << h.width) - 1;
844 low &= mask;
845 }
846
847 ASSERT(low <= UINT64_MAX && "read of value larger than a uint64_t");
848
849 return (uint64_t)low;
850 }
851 #endif
852
853 /* Read the low word of this (possibly two-word-sized) value. */
854 uint64_t low = 0;
855 size_t low_size = aligned.width / 8;
856 /* optimisation hint: */
857 ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
858 if (low_size > sizeof(low)) {
859 low_size = sizeof(low);
860 }
861 {
862 const uint8_t *src = aligned.base;
863 low = copy_out64(src, low_size);
864 }
865
866 low >>= h.offset;
867
868 size_t high_size = aligned.width / 8 - low_size;
869
870 /* If the value could not be read into a single word... */
871 ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
872 if (high_size != 0) {
873 const uint8_t *src = aligned.base + sizeof(low);
874 uint64_t high = copy_out64(src, high_size);
875
876 high <<= sizeof(low) * 8 - h.offset;
877
878 /* Combine the high and low words. Note that we know we can store the final
879 * result in a single word because the width is guaranteed to be <= 64.
880 */
881 low |= high;
882 }
883
884 if (h.width < sizeof(low) * 8) {
885 uint64_t mask = (UINT64_C(1) << h.width) - 1;
886 low &= mask;
887 }
888
889 return low;
890 }
891
copy_in64(uint8_t * p,uint64_t v,size_t extent)892 static void copy_in64(uint8_t *p, uint64_t v, size_t extent) {
893 ASSERT(extent <= sizeof(v));
894
895 if (is_big_endian()) {
896 v = __builtin_bswap64(v);
897 }
898
899 memcpy(p, &v, extent);
900 }
901
902 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
copy_in128(uint8_t * p,unsigned __int128 v,size_t extent)903 static void copy_in128(uint8_t *p, unsigned __int128 v, size_t extent) {
904 ASSERT(extent <= sizeof(v));
905
906 if (is_big_endian()) {
907 v = byte_swap128(v);
908 }
909
910 memcpy(p, &v, extent);
911 }
912 #endif
913
914 /* If you are in the Rumur repository modifying the following function, remember
915 * to also update ../../misc/write-raw.smt2.
916 */
write_raw(struct handle h,uint64_t v)917 static void write_raw(struct handle h, uint64_t v) {
918
919 if (h.width == 0) {
920 return;
921 }
922
923 /* sanitise input value */
924 if (h.width < sizeof(v) * 8) {
925 v &= (UINT64_C(1) << h.width) - 1;
926 }
927
928 /* Generate a offset- and width-aligned handle on byte boundaries. */
929 struct handle aligned = handle_align(h);
930 ASSERT(aligned.offset == 0);
931
932 #ifdef __SIZEOF_INT128__ /* if we have the type `__int128` */
933
934 /* If a byte-unaligned value_t cannot be fully written within a single
935 * byte-aligned uint64_t...
936 */
937 if (aligned.width > (sizeof(uint64_t) - 1) * 8) {
938
939 /* Read the low double-word of this region. */
940 unsigned __int128 low = 0;
941 size_t low_size = aligned.width / 8;
942 ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
943 if (low_size > sizeof(low)) {
944 low_size = sizeof(low);
945 }
946 {
947 const uint8_t *src = aligned.base;
948 low = copy_out128(src, low_size);
949 }
950
951 {
952 unsigned __int128 or_mask = ((unsigned __int128)v) << h.offset;
953 if (low_size < sizeof(low)) {
954 or_mask &= (((unsigned __int128)1) << (low_size * 8)) - 1;
955 }
956 unsigned __int128 and_mask = (((unsigned __int128)1) << h.offset) - 1;
957 if (h.width + h.offset < sizeof(low) * 8) {
958 size_t high_bits = aligned.width - h.offset - h.width;
959 and_mask |= ((((unsigned __int128)1) << high_bits) - 1)
960 << (low_size * 8 - high_bits);
961 }
962
963 low = (low & and_mask) | or_mask;
964 }
965
966 {
967 uint8_t *dest = aligned.base;
968 copy_in128(dest, low, low_size);
969 }
970
971 /* Now do the second double-word if necessary. */
972
973 size_t high_size = aligned.width / 8 - low_size;
974
975 ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
976 if (high_size != 0) {
977 unsigned __int128 high = 0;
978 {
979 const uint8_t *src = aligned.base + sizeof(low);
980 high = copy_out128(src, high_size);
981 }
982
983 {
984 unsigned __int128 or_mask =
985 ((unsigned __int128)v) >> (sizeof(low) * 8 - h.offset);
986 unsigned __int128 and_mask = ~(
987 (((unsigned __int128)1) << (h.width + h.offset - sizeof(low) * 8)) -
988 1);
989
990 high = (high & and_mask) | or_mask;
991 }
992
993 {
994 uint8_t *dest = aligned.base + sizeof(low);
995 copy_in128(dest, high, high_size);
996 }
997 }
998
999 return;
1000 }
1001 #endif
1002
1003 /* Replicate the above logic for uint64_t. */
1004
1005 uint64_t low = 0;
1006 size_t low_size = aligned.width / 8;
1007 ASSERT(low_size <= sizeof(low) || aligned.width > (sizeof(low) - 1) * 8);
1008 if (low_size > sizeof(low)) {
1009 low_size = sizeof(low);
1010 }
1011 {
1012 const uint8_t *src = aligned.base;
1013 low = copy_out64(src, low_size);
1014 }
1015
1016 {
1017 uint64_t or_mask = ((uint64_t)v) << h.offset;
1018 if (low_size < sizeof(low)) {
1019 or_mask &= (UINT64_C(1) << (low_size * 8)) - 1;
1020 }
1021 uint64_t and_mask = (UINT64_C(1) << h.offset) - 1;
1022 if (h.width + h.offset < sizeof(low) * 8) {
1023 size_t high_bits = aligned.width - h.offset - h.width;
1024 and_mask |= ((UINT64_C(1) << high_bits) - 1)
1025 << (low_size * 8 - high_bits);
1026 }
1027
1028 low = (low & and_mask) | or_mask;
1029 }
1030
1031 {
1032 uint8_t *dest = aligned.base;
1033 copy_in64(dest, low, low_size);
1034 }
1035
1036 size_t high_size = aligned.width / 8 - low_size;
1037
1038 ASSERT(high_size == 0 || aligned.width > (sizeof(low) - 1) * 8);
1039 if (high_size != 0) {
1040 uint64_t high = 0;
1041 {
1042 const uint8_t *src = aligned.base + sizeof(low);
1043 high = copy_out64(src, high_size);
1044 }
1045
1046 {
1047 uint64_t or_mask = ((uint64_t)v) >> (sizeof(low) * 8 - h.offset);
1048 uint64_t and_mask =
1049 ~((UINT64_C(1) << (h.width + h.offset - sizeof(low) * 8)) - 1);
1050
1051 high = (high & and_mask) | or_mask;
1052 }
1053
1054 {
1055 uint8_t *dest = aligned.base + sizeof(low);
1056 copy_in64(dest, high, high_size);
1057 }
1058 }
1059 }
1060
1061 #if BOUND > 0
1062 #if PACK_STATE
state_bound_handle(const struct state * NONNULL s)1063 static struct handle state_bound_handle(const struct state *NONNULL s) {
1064
1065 struct handle h = (struct handle){
1066 .base = (uint8_t *)s->other,
1067 .offset = 0,
1068 .width = BITS_FOR(BOUND),
1069 };
1070
1071 return h;
1072 }
1073 #endif
1074
1075 _Static_assert((uintmax_t)BOUND <= UINT64_MAX,
1076 "bound limit does not fit in a uint64_t");
1077
1078 static __attribute__((pure)) uint64_t
state_bound_get(const struct state * NONNULL s)1079 state_bound_get(const struct state *NONNULL s) {
1080 assert(s != NULL);
1081
1082 #if PACK_STATE
1083 struct handle h = state_bound_handle(s);
1084 return read_raw(h);
1085 #else
1086 return s->bound;
1087 #endif
1088 }
1089
state_bound_set(struct state * NONNULL s,uint64_t bound)1090 static void state_bound_set(struct state *NONNULL s, uint64_t bound) {
1091 assert(s != NULL);
1092
1093 #if PACK_STATE
1094 struct handle h = state_bound_handle(s);
1095 write_raw(h, bound);
1096 #else
1097 s->bound = bound;
1098 #endif
1099 }
1100 #endif
1101
1102 #if COUNTEREXAMPLE_TRACE != CEX_OFF || LIVENESS_COUNT > 0
1103 #if PACK_STATE
state_previous_handle(const struct state * NONNULL s)1104 static struct handle state_previous_handle(const struct state *NONNULL s) {
1105
1106 size_t offset = BOUND_BITS;
1107
1108 struct handle h = (struct handle){
1109 .base = (uint8_t *)s->other + offset / 8,
1110 .offset = offset % 8,
1111 .width = PREVIOUS_BITS,
1112 };
1113
1114 return h;
1115 }
1116 #endif
1117
1118 static __attribute__((pure)) const struct state *
state_previous_get(const struct state * NONNULL s)1119 state_previous_get(const struct state *NONNULL s) {
1120 #if PACK_STATE
1121 struct handle h = state_previous_handle(s);
1122 return (const struct state *)(uintptr_t)read_raw(h);
1123 #else
1124 return s->previous;
1125 #endif
1126 }
1127
state_previous_set(struct state * NONNULL s,const struct state * previous)1128 static void state_previous_set(struct state *NONNULL s,
1129 const struct state *previous) {
1130 #if PACK_STATE
1131 ASSERT(
1132 (PREVIOUS_BITS == sizeof(void *) * 8 ||
1133 ((uintptr_t)previous >> PREVIOUS_BITS) == 0) &&
1134 "upper bits of pointer are non-zero (incorrect --pointer-bits setting?)");
1135 struct handle h = state_previous_handle(s);
1136 write_raw(h, (uint64_t)(uintptr_t)previous);
1137 #else
1138 s->previous = previous;
1139 #endif
1140 }
1141 #endif
1142
1143 #if COUNTEREXAMPLE_TRACE != CEX_OFF
1144 #if PACK_STATE
state_rule_taken_handle(const struct state * NONNULL s)1145 static struct handle state_rule_taken_handle(const struct state *NONNULL s) {
1146
1147 size_t offset = BOUND_BITS + PREVIOUS_BITS;
1148
1149 struct handle h = (struct handle){
1150 .base = (uint8_t *)s->other + offset / 8,
1151 .offset = offset % 8,
1152 .width = RULE_TAKEN_BITS,
1153 };
1154
1155 return h;
1156 }
1157 #endif
1158
1159 static __attribute__((pure)) uint64_t
state_rule_taken_get(const struct state * NONNULL s)1160 state_rule_taken_get(const struct state *NONNULL s) {
1161 assert(s != NULL);
1162 #if PACK_STATE
1163 struct handle h = state_rule_taken_handle(s);
1164 return read_raw(h);
1165 #else
1166 return s->rule_taken;
1167 #endif
1168 }
1169
state_rule_taken_set(struct state * NONNULL s,uint64_t rule_taken)1170 static void state_rule_taken_set(struct state *NONNULL s, uint64_t rule_taken) {
1171 assert(s != NULL);
1172 #if PACK_STATE
1173 struct handle h = state_rule_taken_handle(s);
1174 write_raw(h, rule_taken);
1175 #else
1176 s->rule_taken = rule_taken;
1177 #endif
1178 }
1179 #endif
1180
state_schedule_handle(const struct state * NONNULL s,size_t offset,size_t width)1181 static struct handle state_schedule_handle(const struct state *NONNULL s,
1182 size_t offset, size_t width) {
1183
1184 /* the maximum schedule width handle ever derived should be SCHEDULE_BITS, and
1185 * should only ever occur when writing the entire schedule
1186 */
1187 #if !defined(__clang__) && defined(__GNUC__)
1188 #pragma GCC diagnostic push
1189 #pragma GCC diagnostic ignored "-Wtype-limits"
1190 #endif
1191 ASSERT(width <= SCHEDULE_BITS && (width < SCHEDULE_BITS || offset == 0) &&
1192 "out-of-bounds handle derived to access schedule data");
1193 #if !defined(__clang__) && defined(__GNUC__)
1194 #pragma GCC diagnostic pop
1195 #endif
1196
1197 uint8_t *b;
1198 size_t o;
1199
1200 #if PACK_STATE
1201 b = (uint8_t *)s->other;
1202 o = BOUND_BITS + PREVIOUS_BITS + RULE_TAKEN_BITS + offset;
1203 #else
1204 b = (uint8_t *)s->schedules;
1205 o = offset;
1206 #endif
1207
1208 struct handle h = (struct handle){
1209 .base = (uint8_t *)b + o / 8,
1210 .offset = o % 8,
1211 .width = width,
1212 };
1213
1214 return h;
1215 }
1216
1217 static __attribute__((pure, unused)) size_t
state_schedule_get(const struct state * NONNULL s,size_t offset,size_t width)1218 state_schedule_get(const struct state *NONNULL s, size_t offset, size_t width) {
1219 assert(s != NULL);
1220
1221 struct handle h = state_schedule_handle(s, offset, width);
1222 return (size_t)read_raw(h);
1223 }
1224
state_schedule_set(struct state * NONNULL s,size_t offset,size_t width,size_t v)1225 static __attribute__((unused)) void state_schedule_set(struct state *NONNULL s,
1226 size_t offset,
1227 size_t width, size_t v) {
1228 assert(s != NULL);
1229
1230 /* we should never attempt to write an invalid schedule index that will be
1231 * truncated
1232 */
1233 if (width < 64) {
1234 ASSERT((((UINT64_C(1) << width) - 1) & (uint64_t)v) == (uint64_t)v &&
1235 "truncation in writing schedule data to state");
1236 }
1237
1238 struct handle h = state_schedule_handle(s, offset, width);
1239 write_raw(h, (uint64_t)v);
1240 }
1241
1242 /*******************************************************************************
1243 * State allocator. *
1244 * *
1245 * The following implements a simple bump allocator for states. The purpose of *
1246 * this (rather than simply mallocing individual states) is to speed up *
1247 * allocation by taking global locks less frequently and decrease allocator *
1248 * metadata overhead. *
1249 ******************************************************************************/
1250
1251 /* An initial size of thread-local allocator pools ~8MB. */
1252 static _Thread_local size_t arena_count =
1253 (sizeof(struct state) > 8 * 1024 * 1024)
1254 ? 1
1255 : (8 * 1024 * 1024 / sizeof(struct state));
1256
1257 static _Thread_local struct state *arena_base;
1258 static _Thread_local struct state *arena_limit;
1259
state_new(void)1260 static struct state *state_new(void) {
1261
1262 if (arena_base == arena_limit) {
1263 /* Allocation pool is empty. We need to set up a new pool. */
1264 for (;;) {
1265 if (arena_count == 1) {
1266 arena_base = xmalloc(sizeof(*arena_base));
1267 } else {
1268 arena_base = calloc(arena_count, sizeof(*arena_base));
1269 if (__builtin_expect(arena_base == NULL, 0)) {
1270 /* Memory pressure high. Decrease our attempted allocation and try
1271 * again.
1272 */
1273 arena_count /= 2;
1274 continue;
1275 }
1276 }
1277
1278 arena_limit = arena_base + arena_count;
1279 break;
1280 }
1281 }
1282
1283 assert(arena_base != NULL);
1284 assert(arena_base != arena_limit);
1285
1286 struct state *s = arena_base;
1287 arena_base++;
1288 return s;
1289 }
1290
state_free(struct state * s)1291 static void state_free(struct state *s) {
1292
1293 if (s == NULL) {
1294 return;
1295 }
1296
1297 assert(s + 1 == arena_base);
1298 arena_base--;
1299 }
1300
1301 /*******************************************************************************
1302 * statistics for memory usage *
1303 * *
1304 * This functionality is only used when `--trace memory_usage` is given on the *
1305 * command line. *
1306 ******************************************************************************/
1307
1308 /* number of allocated state structs per depth of expansion */
1309 static size_t allocated[BOUND == 0 ? 1 : (BOUND + 1)];
1310
1311 /* note a new allocation of a state struct at the given depth */
register_allocation(size_t depth)1312 static void register_allocation(size_t depth) {
1313
1314 /* if we are not tracing memory usage, make this a no-op */
1315 if (!(TC_MEMORY_USAGE & TRACES_ENABLED)) {
1316 return;
1317 }
1318
1319 ASSERT(depth < sizeof(allocated) / sizeof(allocated[0]) &&
1320 "out of range access to allocated array");
1321
1322 /* increment the number of known allocated states, avoiding an expensive
1323 * atomic if we are single-threaded
1324 */
1325 if (THREADS == 1) {
1326 allocated[depth]++;
1327 } else {
1328 (void)__sync_add_and_fetch(&allocated[depth], 1);
1329 }
1330 }
1331
1332 /* print a summary of the allocation results we have accrued */
print_allocation_summary(void)1333 static void print_allocation_summary(void) {
1334
1335 /* it is assumed we are running single-threaded here and do not need atomic
1336 * accesses
1337 */
1338
1339 if (BOUND == 0) {
1340 TRACE(TC_MEMORY_USAGE,
1341 "allocated %zu state structure(s), totaling %zu bytes",
1342 allocated[0], allocated[0] * sizeof(struct state));
1343 } else {
1344 for (size_t i = 0; i < sizeof(allocated) / sizeof(allocated[0]); i++) {
1345
1346 if (allocated[i] == 0) {
1347 /* no state at this depth was reached, therefore no states at deeper
1348 * depths were reached either and we are done
1349 */
1350 for (size_t j = i + 1; j < sizeof(allocated) / sizeof(allocated[0]);
1351 j++) {
1352 assert(allocated[j] == 0 &&
1353 "state allocated at a deeper depth than an empty level");
1354 }
1355 break;
1356 }
1357
1358 TRACE(TC_MEMORY_USAGE,
1359 "depth %zu: allocated %zu state structure(s), totaling %zu bytes",
1360 i, allocated[i], allocated[i] * sizeof(struct state));
1361 }
1362 }
1363 }
1364
1365 /******************************************************************************/
1366
1367 /* Print a counterexample trace terminating at the given state. This function
1368 * assumes that the caller already holds a lock on stdout.
1369 */
1370 static void print_counterexample(const struct state *NONNULL s
1371 __attribute__((unused)));
1372
1373 /* "Exit" the current thread. This takes into account which thread we are. I.e.
1374 * the correct way to exit the checker is for every thread to eventually call
1375 * this function.
1376 */
1377 static _Noreturn int exit_with(int status);
1378
1379 static __attribute__((format(printf, 2, 3))) _Noreturn void
error(const struct state * NONNULL s,const char * NONNULL fmt,...)1380 error(const struct state *NONNULL s, const char *NONNULL fmt, ...) {
1381
1382 unsigned long prior_errors =
1383 __atomic_fetch_add(&error_count, 1, __ATOMIC_SEQ_CST);
1384
1385 if (__builtin_expect(prior_errors < MAX_ERRORS, 1)) {
1386
1387 flockfile(stdout);
1388
1389 va_list ap;
1390 va_start(ap, fmt);
1391
1392 if (MACHINE_READABLE_OUTPUT) {
1393 put("<error includes_trace=\"");
1394 put((s == NULL || COUNTEREXAMPLE_TRACE == CEX_OFF) ? "false" : "true");
1395 put("\">\n");
1396
1397 put("<message>");
1398 {
1399 va_list ap2;
1400 va_copy(ap2, ap);
1401
1402 int size = vsnprintf(NULL, 0, fmt, ap2);
1403 va_end(ap2);
1404 if (__builtin_expect(size < 0, 0)) {
1405 fputs("vsnprintf failed", stderr);
1406 exit(EXIT_FAILURE);
1407 }
1408
1409 char *buffer = xmalloc(size + 1);
1410 if (__builtin_expect(vsnprintf(buffer, size + 1, fmt, ap) != size, 0)) {
1411 fputs("vsnprintf failed", stderr);
1412 exit(EXIT_FAILURE);
1413 }
1414
1415 xml_printf(buffer);
1416 free(buffer);
1417 }
1418 put("</message>\n");
1419
1420 if (s != NULL && COUNTEREXAMPLE_TRACE != CEX_OFF) {
1421 print_counterexample(s);
1422 }
1423
1424 put("</error>\n");
1425
1426 } else {
1427 if (s != NULL) {
1428 put("The following is the error trace for the error:\n\n");
1429 } else {
1430 put("Result:\n\n");
1431 }
1432
1433 put("\t");
1434 put(red());
1435 put(bold());
1436 {
1437 va_list ap2;
1438 va_copy(ap2, ap);
1439
1440 int size = vsnprintf(NULL, 0, fmt, ap2);
1441 va_end(ap2);
1442 if (__builtin_expect(size < 0, 0)) {
1443 fputs("vsnprintf failed", stderr);
1444 exit(EXIT_FAILURE);
1445 }
1446
1447 char *buffer = xmalloc(size + 1);
1448 if (__builtin_expect(vsnprintf(buffer, size + 1, fmt, ap) != size, 0)) {
1449 fputs("vsnprintf failed", stderr);
1450 exit(EXIT_FAILURE);
1451 }
1452
1453 put(buffer);
1454 free(buffer);
1455 }
1456 put(reset());
1457 put("\n\n");
1458
1459 if (s != NULL && COUNTEREXAMPLE_TRACE != CEX_OFF) {
1460 print_counterexample(s);
1461 put("End of the error trace.\n\n");
1462 }
1463 }
1464
1465 va_end(ap);
1466
1467 funlockfile(stdout);
1468 }
1469
1470 #ifdef __clang__
1471 #pragma clang diagnostic push
1472 #pragma clang diagnostic ignored "-Wtautological-compare"
1473 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
1474 #elif defined(__GNUC__)
1475 #pragma GCC diagnostic push
1476 #pragma GCC diagnostic ignored "-Wtype-limits"
1477 #endif
1478 if (prior_errors < MAX_ERRORS - 1) {
1479 #ifdef __clang__
1480 #pragma clang diagnostic pop
1481 #elif defined(__GNUC__)
1482 #pragma GCC diagnostic pop
1483 #endif
1484 assert(JMP_BUF_NEEDED && "longjmping without a setup jmp_buf");
1485 siglongjmp(checkpoint, 1);
1486 }
1487
1488 exit_with(EXIT_FAILURE);
1489 }
1490
deadlock(const struct state * NONNULL s)1491 static void deadlock(const struct state *NONNULL s) {
1492 if (JMP_BUF_NEEDED) {
1493 if (sigsetjmp(checkpoint, 0)) {
1494 /* error() longjmped back to us. */
1495 return;
1496 }
1497 }
1498 error(s, "deadlock");
1499 }
1500
state_cmp(const struct state * NONNULL a,const struct state * NONNULL b)1501 static int state_cmp(const struct state *NONNULL a,
1502 const struct state *NONNULL b) {
1503 return memcmp(a->data, b->data, sizeof(a->data));
1504 }
1505
state_eq(const struct state * NONNULL a,const struct state * NONNULL b)1506 static bool state_eq(const struct state *NONNULL a,
1507 const struct state *NONNULL b) {
1508 return state_cmp(a, b) == 0;
1509 }
1510
1511 static void handle_copy(struct handle a, struct handle b);
1512
state_dup(const struct state * NONNULL s)1513 static struct state *state_dup(const struct state *NONNULL s) {
1514 struct state *n = state_new();
1515 memcpy(n->data, s->data, sizeof(n->data));
1516 #if COUNTEREXAMPLE_TRACE != CEX_OFF || LIVENESS_COUNT > 0
1517 state_previous_set(n, s);
1518 #endif
1519 #if BOUND > 0
1520 assert(state_bound_get(s) < BOUND && "exceeding bounded exploration depth");
1521 state_bound_set(n, state_bound_get(s) + 1);
1522 #endif
1523 #if LIVENESS_COUNT > 0
1524 memset(n->liveness, 0, sizeof(n->liveness));
1525 #endif
1526
1527 if (USE_SCALARSET_SCHEDULES) {
1528 /* copy schedule data related to past scalarset permutations */
1529 struct handle sch_src = state_schedule_handle(s, 0, SCHEDULE_BITS);
1530 struct handle sch_dst = state_schedule_handle(n, 0, SCHEDULE_BITS);
1531 handle_copy(sch_dst, sch_src);
1532 }
1533
1534 return n;
1535 }
1536
state_hash(const struct state * NONNULL s)1537 static size_t state_hash(const struct state *NONNULL s) {
1538 return (size_t)MurmurHash64A(s->data, sizeof(s->data));
1539 }
1540
1541 #if COUNTEREXAMPLE_TRACE != CEX_OFF
1542 static __attribute__((unused)) size_t
state_depth(const struct state * NONNULL s)1543 state_depth(const struct state *NONNULL s) {
1544 #if BOUND > 0
1545 uint64_t bound = state_bound_get(s);
1546 ASSERT(bound <= BOUND && "claimed state bound exceeds limit");
1547 return (size_t)bound + 1;
1548 #else
1549 size_t d = 0;
1550 while (s != NULL) {
1551 d++;
1552 s = state_previous_get(s);
1553 }
1554 return d;
1555 #endif
1556 }
1557 #endif
1558
1559 /* A type-safe const cast. */
1560 static __attribute__((unused)) struct state *
state_drop_const(const struct state * s)1561 state_drop_const(const struct state *s) {
1562 return (struct state *)s;
1563 }
1564
1565 /* These functions are generated. */
1566 static void state_canonicalise_heuristic(struct state *NONNULL s);
1567 static void state_canonicalise_exhaustive(struct state *NONNULL s);
1568
state_canonicalise(struct state * NONNULL s)1569 static void state_canonicalise(struct state *NONNULL s) {
1570
1571 assert(s != NULL && "attempt to canonicalise NULL state");
1572
1573 switch (SYMMETRY_REDUCTION) {
1574
1575 case SYMMETRY_REDUCTION_OFF:
1576 break;
1577
1578 case SYMMETRY_REDUCTION_HEURISTIC:
1579 state_canonicalise_heuristic(s);
1580 break;
1581
1582 case SYMMETRY_REDUCTION_EXHAUSTIVE:
1583 state_canonicalise_exhaustive(s);
1584 break;
1585 }
1586 }
1587
1588 /* This function is generated. */
1589 static __attribute__((unused)) void state_print_field_offsets(void);
1590
1591 /* Print a state to stderr. This function is generated. This function assumes
1592 * that the caller already holds a lock on stdout.
1593 */
1594 static __attribute__((unused)) void state_print(const struct state *previous,
1595 const struct state *NONNULL s);
1596
1597 /* Print the first rule that resulted in s. This function is generated. This
1598 * function assumes that the caller holds a lock on stdout.
1599 */
1600 static __attribute__((unused)) void
1601 print_transition(const struct state *NONNULL s);
1602
print_counterexample(const struct state * NONNULL s)1603 static void print_counterexample(const struct state *NONNULL s
1604 __attribute__((unused))) {
1605
1606 assert(s != NULL && "missing state in request for counterexample trace");
1607
1608 #if COUNTEREXAMPLE_TRACE != CEX_OFF
1609 /* Construct an array of the states we need to print by walking backwards to
1610 * the initial starting state.
1611 */
1612 size_t trace_length = state_depth(s);
1613
1614 const struct state **cex = xcalloc(trace_length, sizeof(cex[0]));
1615
1616 {
1617 size_t i = trace_length - 1;
1618 for (const struct state *p = s; p != NULL; p = state_previous_get(p)) {
1619 assert(i < trace_length &&
1620 "error in counterexample trace traversal logic");
1621 cex[i] = p;
1622 i--;
1623 }
1624 }
1625
1626 for (size_t i = 0; i < trace_length; i++) {
1627
1628 const struct state *current = cex[i];
1629 const struct state *previous = i == 0 ? NULL : cex[i - 1];
1630
1631 print_transition(current);
1632
1633 if (MACHINE_READABLE_OUTPUT) {
1634 put("<state>\n");
1635 }
1636 state_print(COUNTEREXAMPLE_TRACE == FULL ? NULL : previous, current);
1637 if (MACHINE_READABLE_OUTPUT) {
1638 put("</state>\n");
1639 } else {
1640 put("----------\n\n");
1641 }
1642 }
1643
1644 free(cex);
1645 #endif
1646 }
1647
1648 static __attribute__((unused)) struct handle
state_handle(const struct state * NONNULL s,size_t offset,size_t width)1649 state_handle(const struct state *NONNULL s, size_t offset, size_t width) {
1650
1651 assert(sizeof(s->data) * CHAR_BIT - width >= offset &&
1652 "generating an out of bounds handle in state_handle()");
1653
1654 return (struct handle){
1655 .base = (uint8_t *)s->data + offset / CHAR_BIT,
1656 .offset = offset % CHAR_BIT,
1657 .width = width,
1658 };
1659 }
1660
handle_read_raw(const struct state * NONNULL s,struct handle h)1661 static raw_value_t handle_read_raw(const struct state *NONNULL s,
1662 struct handle h) {
1663
1664 /* Check if this read is larger than the variable we will store it in. This
1665 * can only occur if the user has manually overridden value_t with the
1666 * --value-type command line argument.
1667 */
1668 if (__builtin_expect(h.width > sizeof(raw_value_t) * 8, 0)) {
1669 error(s, "read of a handle that is wider than the value type");
1670 }
1671
1672 ASSERT(h.width <= MAX_SIMPLE_WIDTH &&
1673 "read of a handle that is larger than "
1674 "the maximum width of a simple type in this model");
1675
1676 uint64_t raw = (raw_value_t)read_raw(h);
1677
1678 TRACE(TC_HANDLE_READS,
1679 "read value %" PRIRAWVAL " from handle { %p, %zu, %zu }",
1680 raw_value_to_string(raw), h.base, h.offset, h.width);
1681
1682 return raw;
1683 }
1684
decode_value(value_t lb,value_t ub,raw_value_t v)1685 static value_t decode_value(value_t lb, value_t ub, raw_value_t v) {
1686
1687 value_t dest = 0;
1688
1689 bool r __attribute__((unused)) =
1690 SUB(v, 1, &v) || ADD(v, lb, &dest) || dest < lb || dest > ub;
1691
1692 ASSERT(!r && "read of out-of-range value");
1693
1694 return dest;
1695 }
1696
1697 static __attribute__((unused)) value_t
handle_read(const char * NONNULL context,const char * rule_name,const char * NONNULL name,const struct state * NONNULL s,value_t lb,value_t ub,struct handle h)1698 handle_read(const char *NONNULL context, const char *rule_name,
1699 const char *NONNULL name, const struct state *NONNULL s, value_t lb,
1700 value_t ub, struct handle h) {
1701
1702 assert(context != NULL);
1703 assert(name != NULL);
1704
1705 /* If we happen to be reading from the current state, do a sanity check that
1706 * we're only reading within bounds.
1707 */
1708 assert((h.base != (uint8_t *)s->data /* not a read from the current state */
1709 || sizeof(s->data) * CHAR_BIT - h.width >= h.offset) /* in bounds */
1710 && "out of bounds read in handle_read()");
1711
1712 raw_value_t dest = handle_read_raw(s, h);
1713
1714 if (__builtin_expect(dest == 0, 0)) {
1715 error(s, "%sread of undefined value in %s%s%s", context, name,
1716 rule_name == NULL ? "" : " within ",
1717 rule_name == NULL ? "" : rule_name);
1718 }
1719
1720 return decode_value(lb, ub, dest);
1721 }
1722
handle_write_raw(const struct state * NONNULL s,struct handle h,raw_value_t value)1723 static void handle_write_raw(const struct state *NONNULL s, struct handle h,
1724 raw_value_t value) {
1725
1726 /* Check if this write is larger than the variable we will are reading from.
1727 * This can only occur if the user has manually overridden value_t with the
1728 * --value-type command line argument.
1729 */
1730 if (__builtin_expect(h.width > sizeof(raw_value_t) * 8, 0)) {
1731 error(s, "write of a handle that is wider than the value type");
1732 }
1733
1734 ASSERT(h.width <= MAX_SIMPLE_WIDTH &&
1735 "write to a handle that is larger than "
1736 "the maximum width of a simple type in this model");
1737
1738 TRACE(TC_HANDLE_WRITES,
1739 "writing value %" PRIRAWVAL " to handle { %p, %zu, %zu }",
1740 raw_value_to_string(value), h.base, h.offset, h.width);
1741
1742 #ifdef __clang__
1743 #pragma clang diagnostic push
1744 #pragma clang diagnostic ignored "-Wtautological-compare"
1745 #elif defined(__GNUC__)
1746 #pragma GCC diagnostic push
1747 #pragma GCC diagnostic ignored "-Wtype-limits"
1748 #endif
1749 ASSERT((uintmax_t)value <= UINT64_MAX &&
1750 "truncating value during handle_write_raw");
1751 #ifdef __clang__
1752 #pragma clang diagnostic pop
1753 #elif defined(__GNUC__)
1754 #pragma GCC diagnostic pop
1755 #endif
1756
1757 write_raw(h, (uint64_t)value);
1758 }
1759
1760 static __attribute__((unused)) void
handle_write(const char * NONNULL context,const char * rule_name,const char * NONNULL name,const struct state * NONNULL s,value_t lb,value_t ub,struct handle h,value_t value)1761 handle_write(const char *NONNULL context, const char *rule_name,
1762 const char *NONNULL name, const struct state *NONNULL s,
1763 value_t lb, value_t ub, struct handle h, value_t value) {
1764
1765 assert(context != NULL);
1766 assert(name != NULL);
1767
1768 /* If we happen to be writing to the current state, do a sanity check that
1769 * we're only writing within bounds.
1770 */
1771 assert((h.base != (uint8_t *)s->data /* not a write to the current state */
1772 || sizeof(s->data) * CHAR_BIT - h.width >= h.offset) /* in bounds */
1773 && "out of bounds write in handle_write()");
1774
1775 raw_value_t r;
1776 if (__builtin_expect(
1777 value < lb || value > ub || SUB(value, lb, &r) || ADD(r, 1, &r), 0)) {
1778 error(s, "%swrite of out-of-range value into %s%s%s", context, name,
1779 rule_name == NULL ? "" : " within ",
1780 rule_name == NULL ? "" : rule_name);
1781 }
1782
1783 handle_write_raw(s, h, r);
1784 }
1785
handle_zero(struct handle h)1786 static __attribute__((unused)) void handle_zero(struct handle h) {
1787
1788 uint8_t *p = h.base + h.offset / 8;
1789
1790 /* Zero out up to a byte-aligned offset. */
1791 if (h.offset % 8 != 0) {
1792 uint8_t mask = (UINT8_C(1) << (h.offset % 8)) - 1;
1793 if (h.width < 8 - h.offset % 8) {
1794 mask |= UINT8_MAX & ~((UINT8_C(1) << (h.offset % 8 + h.width)) - 1);
1795 }
1796 *p &= mask;
1797 p++;
1798 if (h.width < 8 - h.offset % 8) {
1799 return;
1800 }
1801 h.width -= 8 - h.offset % 8;
1802 }
1803
1804 /* Zero out as many bytes as we can. */
1805 memset(p, 0, h.width / 8);
1806 p += h.width / 8;
1807 h.width -= h.width / 8 * 8;
1808
1809 /* Zero out the trailing bits in the final byte. */
1810 if (h.width > 0) {
1811 uint8_t mask = ~((UINT8_C(1) << h.width) - 1);
1812 *p &= mask;
1813 }
1814 }
1815
handle_copy(struct handle a,struct handle b)1816 static void handle_copy(struct handle a, struct handle b) {
1817
1818 ASSERT(a.width == b.width && "copying between handles of different sizes");
1819
1820 /* FIXME: This does a bit-by-bit copy which almost certainly could be
1821 * accelerated by detecting byte-boundaries and complementary alignment and
1822 * then calling memcpy when possible.
1823 */
1824
1825 for (size_t i = 0; i < a.width; i++) {
1826
1827 uint8_t *dst = a.base + (a.offset + i) / 8;
1828 size_t dst_off = (a.offset + i) % 8;
1829
1830 const uint8_t *src = b.base + (b.offset + i) / 8;
1831 size_t src_off = (b.offset + i) % 8;
1832
1833 uint8_t or_mask = ((*src >> src_off) & UINT8_C(1)) << dst_off;
1834 uint8_t and_mask = ~(UINT8_C(1) << dst_off);
1835
1836 *dst = (*dst & and_mask) | or_mask;
1837 }
1838 }
1839
handle_eq(struct handle a,struct handle b)1840 static __attribute__((unused)) bool handle_eq(struct handle a,
1841 struct handle b) {
1842
1843 ASSERT(a.width == b.width && "comparing handles of different sizes");
1844
1845 /* FIXME: as with handle_copy, we do a bit-by-bit comparison which could be
1846 * made more efficient.
1847 */
1848
1849 for (size_t i = 0; i < a.width; i++) {
1850
1851 uint8_t *x = a.base + (a.offset + i) / 8;
1852 size_t x_off = (a.offset + i) % 8;
1853 bool x_bit = (*x >> x_off) & 0x1;
1854
1855 const uint8_t *y = b.base + (b.offset + i) / 8;
1856 size_t y_off = (b.offset + i) % 8;
1857 bool y_bit = (*y >> y_off) & 0x1;
1858
1859 if (x_bit != y_bit) {
1860 return false;
1861 }
1862 }
1863
1864 return true;
1865 }
1866
1867 static __attribute__((unused)) struct handle
handle_narrow(struct handle h,size_t offset,size_t width)1868 handle_narrow(struct handle h, size_t offset, size_t width) {
1869
1870 ASSERT(h.offset + offset + width <= h.offset + h.width &&
1871 "narrowing a handle with values that actually expand it");
1872
1873 size_t r __attribute__((unused));
1874 assert(!ADD(h.offset, offset, &r) && "narrowing handle overflows a size_t");
1875
1876 return (struct handle){
1877 .base = h.base + (h.offset + offset) / CHAR_BIT,
1878 .offset = (h.offset + offset) % CHAR_BIT,
1879 .width = width,
1880 };
1881 }
1882
1883 static __attribute__((unused)) struct handle
handle_index(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,size_t element_width,value_t index_min,value_t index_max,struct handle root,value_t index)1884 handle_index(const char *NONNULL context, const char *rule_name,
1885 const char *NONNULL expr, const struct state *NONNULL s,
1886 size_t element_width, value_t index_min, value_t index_max,
1887 struct handle root, value_t index) {
1888
1889 assert(expr != NULL);
1890
1891 if (__builtin_expect(index < index_min || index > index_max, 0)) {
1892 error(s, "%sindex out of range in expression %s%s%s", context, expr,
1893 rule_name == NULL ? "" : " within ",
1894 rule_name == NULL ? "" : rule_name);
1895 }
1896
1897 size_t r1, r2;
1898 if (__builtin_expect(
1899 SUB(index, index_min, &r1) || MUL(r1, element_width, &r2), 0)) {
1900 error(s, "%soverflow when indexing array in expression %s%s%s", context,
1901 expr, rule_name == NULL ? "" : " within ",
1902 rule_name == NULL ? "" : rule_name);
1903 }
1904
1905 size_t r __attribute__((unused));
1906 assert(!ADD(root.offset, r2, &r) && "indexing handle overflows a size_t");
1907
1908 return (struct handle){
1909 .base = root.base + (root.offset + r2) / CHAR_BIT,
1910 .offset = (root.offset + r2) % CHAR_BIT,
1911 .width = element_width,
1912 };
1913 }
1914
1915 static __attribute__((unused)) value_t
handle_isundefined(const struct state * NONNULL s,struct handle h)1916 handle_isundefined(const struct state *NONNULL s, struct handle h) {
1917 raw_value_t v = handle_read_raw(s, h);
1918
1919 return v == 0;
1920 }
1921
1922 /* Overflow-safe helpers for doing bounded arithmetic. */
1923
add(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1924 static __attribute__((unused)) value_t add(const char *NONNULL context,
1925 const char *rule_name,
1926 const char *NONNULL expr,
1927 const struct state *NONNULL s,
1928 value_t a, value_t b) {
1929
1930 assert(context != NULL);
1931 assert(expr != NULL);
1932
1933 value_t r;
1934 if (__builtin_expect(ADD(a, b, &r), 0)) {
1935 error(s, "%sinteger overflow in addition in expression %s%s%s", context,
1936 expr, rule_name == NULL ? "" : " within ",
1937 rule_name == NULL ? "" : rule_name);
1938 }
1939 return r;
1940 }
1941
sub(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1942 static __attribute__((unused)) value_t sub(const char *NONNULL context,
1943 const char *rule_name,
1944 const char *NONNULL expr,
1945 const struct state *NONNULL s,
1946 value_t a, value_t b) {
1947
1948 assert(context != NULL);
1949 assert(expr != NULL);
1950
1951 value_t r;
1952 if (__builtin_expect(SUB(a, b, &r), 0)) {
1953 error(s, "%sinteger overflow in subtraction in expression %s%s%s", context,
1954 expr, rule_name == NULL ? "" : " within ",
1955 rule_name == NULL ? "" : rule_name);
1956 }
1957 return r;
1958 }
1959
mul(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1960 static __attribute__((unused)) value_t mul(const char *NONNULL context,
1961 const char *rule_name,
1962 const char *NONNULL expr,
1963 const struct state *NONNULL s,
1964 value_t a, value_t b) {
1965
1966 assert(context != NULL);
1967 assert(expr != NULL);
1968
1969 value_t r;
1970 if (__builtin_expect(MUL(a, b, &r), 0)) {
1971 error(s, "%sinteger overflow in multiplication in expression %s%s%s",
1972 context, expr, rule_name == NULL ? "" : " within ",
1973 rule_name == NULL ? "" : rule_name);
1974 }
1975 return r;
1976 }
1977
divide(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)1978 static __attribute__((unused)) value_t divide(const char *NONNULL context,
1979 const char *rule_name,
1980 const char *NONNULL expr,
1981 const struct state *NONNULL s,
1982 value_t a, value_t b) {
1983
1984 assert(context != NULL);
1985 assert(expr != NULL);
1986
1987 if (__builtin_expect(b == 0, 0)) {
1988 error(s, "%sdivision by zero in expression %s%s%s", context, expr,
1989 rule_name == NULL ? "" : " within ",
1990 rule_name == NULL ? "" : rule_name);
1991 }
1992
1993 if (__builtin_expect(VALUE_MIN != 0 && a == VALUE_MIN && b == (value_t)-1,
1994 0)) {
1995 error(s, "%sinteger overflow in division in expression %s%s%s", context,
1996 expr, rule_name == NULL ? "" : " within ",
1997 rule_name == NULL ? "" : rule_name);
1998 }
1999
2000 return a / b;
2001 }
2002
mod(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a,value_t b)2003 static __attribute__((unused)) value_t mod(const char *NONNULL context,
2004 const char *rule_name,
2005 const char *NONNULL expr,
2006 const struct state *NONNULL s,
2007 value_t a, value_t b) {
2008
2009 assert(context != NULL);
2010 assert(expr != NULL);
2011
2012 if (__builtin_expect(b == 0, 0)) {
2013 error(s, "%smodulus by zero in expression %s%s%s", context, expr,
2014 rule_name == NULL ? "" : " within ",
2015 rule_name == NULL ? "" : rule_name);
2016 }
2017
2018 /* Is INT_MIN % -1 UD? Reading the C spec I am not sure. */
2019 if (__builtin_expect(VALUE_MIN != 0 && a == VALUE_MIN && b == (value_t)-1,
2020 0)) {
2021 error(s, "%sinteger overflow in modulo in expression %s%s%s", context, expr,
2022 rule_name == NULL ? "" : " within ",
2023 rule_name == NULL ? "" : rule_name);
2024 }
2025
2026 return a % b;
2027 }
2028
2029 static __attribute__((unused)) value_t
negate(const char * NONNULL context,const char * rule_name,const char * NONNULL expr,const struct state * NONNULL s,value_t a)2030 negate(const char *NONNULL context, const char *rule_name,
2031 const char *NONNULL expr, const struct state *NONNULL s, value_t a) {
2032
2033 assert(context != NULL);
2034 assert(expr != NULL);
2035
2036 if (__builtin_expect(VALUE_MIN != 0 && a == VALUE_MIN, 0)) {
2037 error(s, "%sinteger overflow in negation in expression %s%s%s", context,
2038 expr, rule_name == NULL ? "" : " within ",
2039 rule_name == NULL ? "" : rule_name);
2040 }
2041
2042 return -a;
2043 }
2044
2045 /* wrappers for << and >> to avoid C undefined behaviour */
2046 static __attribute__((unused)) value_t rsh(value_t a, value_t b);
lsh(value_t a,value_t b)2047 static __attribute__((unused)) value_t lsh(value_t a, value_t b) {
2048
2049 #ifdef __clang__
2050 #pragma clang diagnostic push
2051 #pragma clang diagnostic ignored "-Wtautological-compare"
2052 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
2053 #elif defined(__GNUC__)
2054 #pragma GCC diagnostic push
2055 #pragma GCC diagnostic ignored "-Wtype-limits"
2056 #endif
2057 if (value_is_signed() && b <= -(value_t)(sizeof(a) * 8)) {
2058 return 0;
2059 }
2060
2061 if (b >= (value_t)(sizeof(a) * 8)) {
2062 return 0;
2063 }
2064
2065 if (b < 0) {
2066 return rsh(a, -b);
2067 }
2068 #ifdef __clang__
2069 #pragma clang diagnostic pop
2070 #elif defined(__GNUC__)
2071 #pragma GCC diagnostic pop
2072 #endif
2073
2074 return (value_t)((raw_value_t)a << b);
2075 }
2076
rsh(value_t a,value_t b)2077 static value_t rsh(value_t a, value_t b) {
2078
2079 #ifdef __clang__
2080 #pragma clang diagnostic push
2081 #pragma clang diagnostic ignored "-Wtautological-compare"
2082 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
2083 #elif defined(__GNUC__)
2084 #pragma GCC diagnostic push
2085 #pragma GCC diagnostic ignored "-Wtype-limits"
2086 #endif
2087 if (value_is_signed() && b <= -(value_t)(sizeof(a) * 8)) {
2088 return 0;
2089 }
2090
2091 if (b >= (value_t)(sizeof(a) * 8)) {
2092 return 0;
2093 }
2094
2095 if (b < 0) {
2096 return lsh(a, -b);
2097 }
2098 #ifdef __clang__
2099 #pragma clang diagnostic pop
2100 #elif defined(__GNUC__)
2101 #pragma GCC diagnostic pop
2102 #endif
2103
2104 return a >> b;
2105 }
2106
2107 /* Bitwise NOT wrapper. We only need this to suppress some spurious GCC
2108 * warnings:
2109 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=38341
2110 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59098
2111 */
bnot(value_t v)2112 static __attribute__((unused)) value_t bnot(value_t v) {
2113 return (value_t)~(raw_value_t)v;
2114 }
2115
2116 /* use Heap's algorithm for generating permutations to implement a
2117 * permutation-to-number mapping
2118 */
2119 static __attribute__((unused)) size_t
permutation_to_index(const size_t * NONNULL permutation,size_t * NONNULL stack,size_t * NONNULL working,size_t count)2120 permutation_to_index(const size_t *NONNULL permutation, size_t *NONNULL stack,
2121 size_t *NONNULL working, size_t count) {
2122
2123 /* byte extent of the permutation arrays */
2124 size_t bytes = sizeof(permutation[0]) * count;
2125
2126 size_t index = 0;
2127
2128 /* clear our stack */
2129 memset(stack, 0, bytes);
2130
2131 /* initialise the first (identity) permutation */
2132 for (size_t i = 0; i < count; ++i) {
2133 working[i] = i;
2134 }
2135
2136 if (memcmp(permutation, working, bytes) == 0) {
2137 return index;
2138 }
2139
2140 size_t i = 0;
2141 while (i < count) {
2142 if (stack[i] < i) {
2143 ++index;
2144 if (i % 2 == 0) {
2145 /* swap working[0] and working[i] */
2146 size_t tmp = working[0];
2147 working[0] = working[i];
2148 working[i] = tmp;
2149 } else {
2150 /* swap working[stack[i]] and working[i] */
2151 size_t tmp = working[stack[i]];
2152 working[stack[i]] = working[i];
2153 working[i] = tmp;
2154 }
2155 if (memcmp(permutation, working, bytes) == 0) {
2156 return index;
2157 }
2158 ++stack[i];
2159 i = 0;
2160 } else {
2161 stack[i] = 0;
2162 ++i;
2163 }
2164 }
2165
2166 /* the permutation should have been one of the possible ones */
2167 ASSERT(!"invalid permutation passed to permutation_to_index");
2168 }
2169 static __attribute__((unused)) void
index_to_permutation(size_t index,size_t * NONNULL permutation,size_t * NONNULL stack,size_t count)2170 index_to_permutation(size_t index, size_t *NONNULL permutation,
2171 size_t *NONNULL stack, size_t count) {
2172
2173 /* byte extent of the permutation arrays */
2174 size_t bytes = sizeof(permutation[0]) * count;
2175
2176 size_t ind = 0;
2177
2178 /* clear our stack */
2179 memset(stack, 0, bytes);
2180
2181 /* initialise the first (identity) permutation */
2182 for (size_t i = 0; i < count; ++i) {
2183 permutation[i] = i;
2184 }
2185
2186 if (ind == index) {
2187 return;
2188 }
2189
2190 size_t i = 0;
2191 while (i < count) {
2192 if (stack[i] < i) {
2193 ++ind;
2194 if (i % 2 == 0) {
2195 /* swap permutation[0] and permutation[i] */
2196 size_t tmp = permutation[0];
2197 permutation[0] = permutation[i];
2198 permutation[i] = tmp;
2199 } else {
2200 /* swap permutation[stack[i]] and permutation[i] */
2201 size_t tmp = permutation[stack[i]];
2202 permutation[stack[i]] = permutation[i];
2203 permutation[i] = tmp;
2204 }
2205 if (ind == index) {
2206 return;
2207 }
2208 ++stack[i];
2209 i = 0;
2210 } else {
2211 stack[i] = 0;
2212 ++i;
2213 }
2214 }
2215
2216 /* the target permutation should have been one of the possible ones */
2217 ASSERT(!"invalid index passed to index_to_permutation");
2218 }
2219
2220 /*******************************************************************************
2221 * State queue node *
2222 * *
2223 * Queue nodes are 4K-sized, 4K-aligned linked-list nodes. They contain *
2224 * pending states and then a pointer to the next node in the queue. *
2225 ******************************************************************************/
2226
2227 struct queue_node {
2228 struct state
2229 *s[(4096 - sizeof(struct queue_node *)) / sizeof(struct state *)];
2230 struct queue_node *next;
2231 };
2232
2233 _Static_assert(sizeof(struct queue_node) == 4096,
2234 "incorrect queue_node size calculation");
2235
queue_node_new(void)2236 static struct queue_node *queue_node_new(void) {
2237 struct queue_node *p = NULL;
2238
2239 int r = posix_memalign((void **)&p, sizeof(*p), sizeof(*p));
2240
2241 assert((r == 0 || r == ENOMEM) && "invalid alignment to posix_memalign");
2242
2243 if (__builtin_expect(r != 0, 0)) {
2244 oom();
2245 }
2246
2247 memset(p, 0, sizeof(*p));
2248
2249 return p;
2250 }
2251
queue_node_free(struct queue_node * p)2252 static void queue_node_free(struct queue_node *p) { free(p); }
2253
2254 /******************************************************************************/
2255
2256 /*******************************************************************************
2257 * Queue node handles *
2258 * *
2259 * These are pointers to a member-aligned address within a queue_node. The *
2260 * idea is that you can have a queue_handle_t pointing at either one of the *
2261 * elements of the `s` member or at the chained `next` pointer. Since *
2262 * queue_node pointers are always 4K-aligned, you can examine the low 12 bits *
2263 * of the queue node handle to determine which member you are pointing at. *
2264 ******************************************************************************/
2265
2266 typedef uintptr_t queue_handle_t;
2267
queue_handle_from_node_ptr(const struct queue_node * n)2268 static queue_handle_t queue_handle_from_node_ptr(const struct queue_node *n) {
2269 return (queue_handle_t)n;
2270 }
2271
queue_handle_base(queue_handle_t h)2272 static struct queue_node *queue_handle_base(queue_handle_t h) {
2273 return (struct queue_node *)(h - h % sizeof(struct queue_node));
2274 }
2275
queue_handle_is_state_pptr(queue_handle_t h)2276 static bool queue_handle_is_state_pptr(queue_handle_t h) {
2277 return h % sizeof(struct queue_node) <
2278 __builtin_offsetof(struct queue_node, next);
2279 }
2280
queue_handle_to_state_pptr(queue_handle_t h)2281 static struct state **queue_handle_to_state_pptr(queue_handle_t h) {
2282 assert(queue_handle_is_state_pptr(h) &&
2283 "invalid use of queue_handle_to_state_pptr");
2284
2285 return (struct state **)h;
2286 }
2287
queue_handle_to_node_pptr(queue_handle_t h)2288 static struct queue_node **queue_handle_to_node_pptr(queue_handle_t h) {
2289 assert(!queue_handle_is_state_pptr(h) &&
2290 "invalid use of queue_handle_to_node_pptr");
2291
2292 return (struct queue_node **)h;
2293 }
2294
queue_handle_next(queue_handle_t h)2295 static queue_handle_t queue_handle_next(queue_handle_t h) {
2296 return h + sizeof(struct state *);
2297 }
2298
2299 /******************************************************************************/
2300
2301 /*******************************************************************************
2302 * Hazard pointers *
2303 * *
2304 * The idea of "hazard pointers" comes from Maged Michael, "Hazard Pointers: *
2305 * Safe Memory Reclamation for Lock-Free Objects" in TPDS 15(8) 2004. The *
2306 * basic concept is to maintain a collection of safe-to-dereference pointers. *
2307 * Before freeing a pointer, you look in this collection to see if it is in *
2308 * use and you must always add a pointer to this collection before *
2309 * dereferencing it. The finer details of how we keep this consistent and why *
2310 * the size of this collection can be statically known ahead of time are a *
2311 * little complicated, but the paper explains this in further detail. *
2312 ******************************************************************************/
2313
2314 /* Queue node pointers currently safe to dereference. */
2315 static const struct queue_node *hazarded[THREADS];
2316
2317 /* Protect a pointer that we wish to dereference. */
hazard(queue_handle_t h)2318 static void hazard(queue_handle_t h) {
2319
2320 /* Find the queue node this handle lies within. */
2321 const struct queue_node *p = queue_handle_base(h);
2322
2323 /* You can't protect the null pointer because it is invalid to dereference it.
2324 */
2325 assert(p != NULL && "attempt to hazard an invalid pointer");
2326
2327 /* Each thread is only allowed a single hazarded pointer at a time. */
2328 assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) == NULL &&
2329 "hazarding multiple pointers at once");
2330
2331 __atomic_store_n(&hazarded[thread_id], p, __ATOMIC_SEQ_CST);
2332 }
2333
2334 /* Drop protection on a pointer whose target we are done accessing. */
unhazard(queue_handle_t h)2335 static void unhazard(queue_handle_t h) {
2336
2337 /* Find the queue node this handle lies within. */
2338 const struct queue_node *p __attribute__((unused)) = queue_handle_base(h);
2339
2340 assert(p != NULL && "attempt to unhazard an invalid pointer");
2341
2342 assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) != NULL &&
2343 "unhazarding a pointer when none are hazarded");
2344
2345 assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) == p &&
2346 "unhazarding a pointer that differs from the one hazarded");
2347
2348 __atomic_store_n(&hazarded[thread_id], NULL, __ATOMIC_SEQ_CST);
2349 }
2350
2351 /* Free a pointer or, if not possible, defer this to later. */
reclaim(queue_handle_t h)2352 static void reclaim(queue_handle_t h) {
2353
2354 /* Find the queue node this handle lies within. */
2355 struct queue_node *p = queue_handle_base(h);
2356
2357 assert(p != NULL && "reclaiming a null pointer");
2358
2359 /* The reclaimer is not allowed to be freeing something while also holding a
2360 * hazarded pointer.
2361 */
2362 assert(__atomic_load_n(&hazarded[thread_id], __ATOMIC_SEQ_CST) == NULL &&
2363 "reclaiming a pointer while holding a hazarded pointer");
2364
2365 /* Pointers that we failed to free initially because they were in use
2366 * (hazarded) at the time they were passed to reclaim().
2367 *
2368 * Why are we sure we will only ever have a maximum of `THREADS - 1` pointers
2369 * outstanding? Anything passed to reclaim() is expected to be
2370 * now-unreachable, so the only outstanding references to such are threads
2371 * racing with us. Because each thread can only have one hazarded pointer at a
2372 * time, the maximum number of in use pointers right now is `THREADS - 1`
2373 * (because the current thread does not have one).
2374 *
2375 * There is an edge case where another thread (1) held a hazarded pointer we
2376 * previously tried to reclaim and thus ended up on our deferred list, then
2377 * (2) in-between that time and now dropped this reference and acquired a
2378 * hazarded pointer to `p`. This still will not exceed our count of
2379 * `THREADS - 1` as the order in which we scan the deferred list and then add
2380 * `p` to it below ensures that we will discover the originally hazarded
2381 * pointer (now no longer conflicted) and clear its slot, leaving this
2382 * available for `p`.
2383 */
2384 static _Thread_local struct queue_node *deferred[THREADS - 1];
2385 static const size_t DEFERRED_SIZE = sizeof(deferred) / sizeof(deferred[0]);
2386
2387 /* First try to free any previously deferred pointers. */
2388 for (size_t i = 0; i < DEFERRED_SIZE; i++) {
2389 if (deferred[i] != NULL) {
2390 bool conflict = false;
2391 for (size_t j = 0; j < sizeof(hazarded) / sizeof(hazarded[0]); j++) {
2392 if (j == thread_id) {
2393 /* No need to check for conflicts with ourself. */
2394 assert(__atomic_load_n(&hazarded[j], __ATOMIC_SEQ_CST) == NULL);
2395 continue;
2396 }
2397 if (deferred[i] == __atomic_load_n(&hazarded[j], __ATOMIC_SEQ_CST)) {
2398 /* This pointer is in use by thread j. */
2399 conflict = true;
2400 break;
2401 }
2402 }
2403 if (!conflict) {
2404 queue_node_free(deferred[i]);
2405 deferred[i] = NULL;
2406 }
2407 }
2408 }
2409
2410 /* Now deal with the pointer we were passed. The most likely case is that no
2411 * one else is using this pointer, so try this first.
2412 */
2413 bool conflict = false;
2414 for (size_t i = 0; i < sizeof(hazarded) / sizeof(hazarded[i]); i++) {
2415 if (i == thread_id) {
2416 /* No need to check for conflicts with ourself. */
2417 assert(__atomic_load_n(&hazarded[i], __ATOMIC_SEQ_CST) == NULL);
2418 continue;
2419 }
2420 if (p == __atomic_load_n(&hazarded[i], __ATOMIC_SEQ_CST)) {
2421 /* Bad luck :( */
2422 conflict = true;
2423 break;
2424 }
2425 }
2426
2427 if (!conflict) {
2428 /* We're done! */
2429 queue_node_free(p);
2430 return;
2431 }
2432
2433 /* If we reached here, we need to defer this reclamation to later. */
2434 for (size_t i = 0; i < DEFERRED_SIZE; i++) {
2435 if (deferred[i] == NULL) {
2436 deferred[i] = p;
2437 return;
2438 }
2439 }
2440
2441 ASSERT(!"deferred more than `THREADS` reclamations");
2442 }
2443
2444 /******************************************************************************/
2445
2446 /*******************************************************************************
2447 * Atomic operations on double word values *
2448 ******************************************************************************/
2449
2450 #if __SIZEOF_POINTER__ <= 4
2451 typedef uint64_t dword_t;
2452 #elif __SIZEOF_POINTER__ <= 8
2453 typedef unsigned __int128 dword_t;
2454 #else
2455 #error "unexpected pointer size; what scalar type to use for dword_t?"
2456 #endif
2457
atomic_read(dword_t * p)2458 static dword_t atomic_read(dword_t *p) {
2459
2460 if (THREADS == 1) {
2461 return *p;
2462 }
2463
2464 #if defined(__x86_64__) || defined(__i386__)
2465 /* x86-64: MOV is not guaranteed to be atomic on 128-bit naturally aligned
2466 * memory. The way to work around this is apparently the following
2467 * degenerate CMPXCHG16B.
2468 * i386: __atomic_load_n emits code calling a libatomic function that takes a
2469 * lock, making this no longer lock free. Force a CMPXCHG8B by using the
2470 * __sync built-in instead.
2471 */
2472 return __sync_val_compare_and_swap(p, 0, 0);
2473 #endif
2474
2475 return __atomic_load_n(p, __ATOMIC_SEQ_CST);
2476 }
2477
atomic_write(dword_t * p,dword_t v)2478 static void atomic_write(dword_t *p, dword_t v) {
2479
2480 if (THREADS == 1) {
2481 *p = v;
2482 return;
2483 }
2484
2485 #if defined(__x86_64__) || defined(__i386__)
2486 /* As explained above, we need some extra gymnastics to avoid a call to
2487 * libatomic on x86-64 and i386.
2488 */
2489 dword_t expected;
2490 dword_t old = 0;
2491 do {
2492 expected = old;
2493 old = __sync_val_compare_and_swap(p, expected, v);
2494 } while (expected != old);
2495 return;
2496 #endif
2497
2498 __atomic_store_n(p, v, __ATOMIC_SEQ_CST);
2499 }
2500
atomic_cas(dword_t * p,dword_t expected,dword_t new)2501 static bool atomic_cas(dword_t *p, dword_t expected, dword_t new) {
2502
2503 if (THREADS == 1) {
2504 if (*p == expected) {
2505 *p = new;
2506 return true;
2507 }
2508 return false;
2509 }
2510
2511 #if defined(__x86_64__) || defined(__i386__)
2512 /* Make GCC >= 7.1 emit cmpxchg on x86-64 and i386. See
2513 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878.
2514 */
2515 return __sync_bool_compare_and_swap(p, expected, new);
2516 #endif
2517
2518 return __atomic_compare_exchange_n(p, &expected, new, false, __ATOMIC_SEQ_CST,
2519 __ATOMIC_SEQ_CST);
2520 }
2521
atomic_cas_val(dword_t * p,dword_t expected,dword_t new)2522 static dword_t atomic_cas_val(dword_t *p, dword_t expected, dword_t new) {
2523
2524 if (THREADS == 1) {
2525 dword_t old = *p;
2526 if (old == expected) {
2527 *p = new;
2528 }
2529 return old;
2530 }
2531
2532 #if defined(__x86_64__) || defined(__i386__)
2533 /* Make GCC >= 7.1 emit cmpxchg on x86-64 and i386. See
2534 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878.
2535 */
2536 return __sync_val_compare_and_swap(p, expected, new);
2537 #endif
2538
2539 (void)__atomic_compare_exchange_n(p, &expected, new, false, __ATOMIC_SEQ_CST,
2540 __ATOMIC_SEQ_CST);
2541 return expected;
2542 }
2543
2544 /******************************************************************************/
2545
2546 /*******************************************************************************
2547 * Double pointers *
2548 * *
2549 * A scalar type that can store the value of two pointers. *
2550 ******************************************************************************/
2551
2552 #if __SIZEOF_POINTER__ <= 4
2553 typedef uint64_t double_ptr_t;
2554 #elif __SIZEOF_POINTER__ <= 8
2555 typedef unsigned __int128 double_ptr_t;
2556 #else
2557 #error "unexpected pointer size; what scalar type to use for double_ptr_t?"
2558 #endif
2559
double_ptr_extract1(double_ptr_t p)2560 static uintptr_t double_ptr_extract1(double_ptr_t p) {
2561
2562 _Static_assert(sizeof(p) > sizeof(uintptr_t),
2563 "double_ptr_t is not big enough to fit a pointer");
2564
2565 uintptr_t q;
2566 memcpy(&q, &p, sizeof(q));
2567
2568 return q;
2569 }
2570
double_ptr_extract2(double_ptr_t p)2571 static uintptr_t double_ptr_extract2(double_ptr_t p) {
2572
2573 _Static_assert(sizeof(p) >= 2 * sizeof(uintptr_t),
2574 "double_ptr_t is not big enough to fit two pointers");
2575
2576 uintptr_t q;
2577 memcpy(&q, (unsigned char *)&p + sizeof(void *), sizeof(q));
2578
2579 return q;
2580 }
2581
double_ptr_make(uintptr_t q1,uintptr_t q2)2582 static double_ptr_t double_ptr_make(uintptr_t q1, uintptr_t q2) {
2583
2584 double_ptr_t p = 0;
2585
2586 _Static_assert(sizeof(p) >= 2 * sizeof(uintptr_t),
2587 "double_ptr_t is not big enough to fit two pointers");
2588
2589 memcpy(&p, &q1, sizeof(q1));
2590 memcpy((unsigned char *)&p + sizeof(q1), &q2, sizeof(q2));
2591
2592 return p;
2593 }
2594
2595 /******************************************************************************/
2596
2597 /*******************************************************************************
2598 * State queue *
2599 * *
2600 * The following implements a per-thread queue for pending states. The only *
2601 * supported operations are enqueueing and dequeueing states. A property we *
2602 * maintain is that all states within all queues pass the current model's *
2603 * invariants. *
2604 ******************************************************************************/
2605
2606 static struct {
2607 double_ptr_t ends;
2608 size_t count;
2609 } q[THREADS];
2610
queue_enqueue(struct state * NONNULL s,size_t queue_id)2611 static size_t queue_enqueue(struct state *NONNULL s, size_t queue_id) {
2612 assert(queue_id < sizeof(q) / sizeof(q[0]) && "out of bounds queue access");
2613
2614 /* Look up the tail of the queue. */
2615
2616 double_ptr_t ends = atomic_read(&q[queue_id].ends);
2617
2618 retry:;
2619 queue_handle_t tail = double_ptr_extract2(ends);
2620
2621 if (tail == 0) {
2622 /* There's nothing currently in the queue. */
2623
2624 assert(double_ptr_extract1(ends) == 0 &&
2625 "tail of queue 0 while head is non-0");
2626
2627 struct queue_node *n = queue_node_new();
2628 n->s[0] = s;
2629
2630 double_ptr_t new = double_ptr_make(queue_handle_from_node_ptr(n),
2631 queue_handle_from_node_ptr(n));
2632
2633 double_ptr_t old = atomic_cas_val(&q[queue_id].ends, ends, new);
2634 if (old != ends) {
2635 /* Failed. */
2636 queue_node_free(n);
2637 ends = old;
2638 goto retry;
2639 }
2640
2641 } else {
2642 /* The queue is non-empty, so we'll need to access the last element. */
2643
2644 /* Try to protect our upcoming access to the tail. */
2645 hazard(tail);
2646 {
2647 double_ptr_t ends_check = atomic_read(&q[queue_id].ends);
2648 if (ends != ends_check) {
2649 /* Failed. Someone else modified the queue in the meantime. */
2650 unhazard(tail);
2651 ends = ends_check;
2652 goto retry;
2653 }
2654 }
2655
2656 /* We've now notified other threads that we're going to be accessing the
2657 * tail, so we can safely dereference its pointer.
2658 */
2659
2660 struct queue_node *new_node = NULL;
2661 queue_handle_t next_tail = queue_handle_next(tail);
2662
2663 if (queue_handle_is_state_pptr(next_tail)) {
2664 /* There's an available slot in this queue node; no need to create a new
2665 * one.
2666 */
2667
2668 {
2669 struct state **target = queue_handle_to_state_pptr(next_tail);
2670 struct state *null = NULL;
2671 if (!__atomic_compare_exchange_n(target, &null, s, false,
2672 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
2673 /* Failed. Someone else enqueued before we could. */
2674 unhazard(tail);
2675 goto retry;
2676 }
2677 }
2678
2679 } else {
2680 /* There's no remaining slot in this queue node. We'll need to create a
2681 * new (empty) queue node, add our state to this one and then append this
2682 * node to the queue.
2683 */
2684
2685 /* Create the new node. */
2686 new_node = queue_node_new();
2687 new_node->s[0] = s;
2688
2689 /* Try to update the chained pointer of the current tail to point to this
2690 * new node.
2691 */
2692 struct queue_node **target = queue_handle_to_node_pptr(next_tail);
2693 struct queue_node *null = NULL;
2694 if (!__atomic_compare_exchange_n(target, &null, new_node, false,
2695 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
2696 /* Failed. Someone else enqueued before we could. */
2697 queue_node_free(new_node);
2698 unhazard(tail);
2699 goto retry;
2700 }
2701
2702 /* We now need the tail to point at our new node. */
2703 next_tail = queue_handle_from_node_ptr(new_node);
2704 }
2705
2706 queue_handle_t head = double_ptr_extract1(ends);
2707 double_ptr_t new = double_ptr_make(head, next_tail);
2708
2709 /* Try to update the queue. */
2710 {
2711 double_ptr_t old = atomic_cas_val(&q[queue_id].ends, ends, new);
2712 if (old != ends) {
2713 /* Failed. Someone else dequeued before we could finish. We know the
2714 * operation that beat us was a dequeue and not an enqueue, because by
2715 * writing to next_tail we have prevented any other enqueue from
2716 * succeeding.
2717 */
2718
2719 /* Undo the update of next_tail. We know this is safe (non-racy) because
2720 * no other enqueue can proceed and a dequeue will never look into
2721 * next_tail due to the way it handles the case when head == tail.
2722 */
2723 next_tail = queue_handle_next(tail);
2724 if (queue_handle_is_state_pptr(next_tail)) {
2725 /* We previously wrote into an existing queue node. */
2726 struct state **target = queue_handle_to_state_pptr(next_tail);
2727 struct state *temp = s;
2728 bool r __attribute__((unused)) = __atomic_compare_exchange_n(
2729 target, &temp, NULL, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
2730 assert(r && "undo of write to next_tail failed");
2731 } else {
2732 /* We previously wrote into a new queue node. */
2733 struct queue_node **target = queue_handle_to_node_pptr(next_tail);
2734 struct queue_node *temp = new_node;
2735 bool r __attribute__((unused)) = __atomic_compare_exchange_n(
2736 target, &temp, NULL, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
2737 assert(r && "undo of write to next_tail failed");
2738
2739 queue_node_free(new_node);
2740 }
2741
2742 unhazard(tail);
2743 ends = old;
2744 goto retry;
2745 }
2746 }
2747
2748 /* Success! */
2749
2750 unhazard(tail);
2751 }
2752
2753 size_t count = __atomic_add_fetch(&q[queue_id].count, 1, __ATOMIC_SEQ_CST);
2754
2755 TRACE(TC_QUEUE, "enqueued state %p into queue %zu, queue length is now %zu",
2756 s, queue_id, count);
2757
2758 return count;
2759 }
2760
queue_dequeue(size_t * NONNULL queue_id)2761 static const struct state *queue_dequeue(size_t *NONNULL queue_id) {
2762 assert(queue_id != NULL && *queue_id < sizeof(q) / sizeof(q[0]) &&
2763 "out of bounds queue access");
2764
2765 const struct state *s = NULL;
2766
2767 for (size_t attempts = 0; attempts < sizeof(q) / sizeof(q[0]); attempts++) {
2768
2769 double_ptr_t ends = atomic_read(&q[*queue_id].ends);
2770
2771 retry:;
2772 queue_handle_t head = double_ptr_extract1(ends);
2773
2774 if (head != 0) {
2775 /* This queue is non-empty. */
2776
2777 /* Try to protect our upcoming accesses to the head. */
2778 hazard(head);
2779 {
2780 double_ptr_t ends_check = atomic_read(&q[*queue_id].ends);
2781 if (ends != ends_check) {
2782 /* Failed. Someone else updated the queue. */
2783 unhazard(head);
2784 ends = ends_check;
2785 goto retry;
2786 }
2787 }
2788
2789 queue_handle_t tail = double_ptr_extract2(ends);
2790
2791 double_ptr_t new;
2792 if (head == tail) {
2793 /* There is only a single element in the queue. We will need to update
2794 * both head and tail.
2795 */
2796 new = double_ptr_make(0, 0);
2797 } else if (queue_handle_is_state_pptr(head)) {
2798 /* There are multiple elements in the queue; we can deal only with the
2799 * head.
2800 */
2801 new = double_ptr_make(queue_handle_next(head), tail);
2802 } else {
2803 /* The head of the queue is the end of a queue node. I.e. the only thing
2804 * remaining in this queue node is the chained pointer to the next queue
2805 * node.
2806 */
2807
2808 /* Load the next queue node. */
2809 struct queue_node **n = queue_handle_to_node_pptr(head);
2810 struct queue_node *new_head = __atomic_load_n(n, __ATOMIC_SEQ_CST);
2811 new = double_ptr_make(queue_handle_from_node_ptr(new_head), tail);
2812
2813 /* Try to replace the current head with the next node. */
2814 double_ptr_t old = atomic_cas_val(&q[*queue_id].ends, ends, new);
2815 /* Either way, now we'll need to retry, but if we succeeded we also need
2816 * to free the queue node we just removed.
2817 */
2818 unhazard(head);
2819 if (old == ends) {
2820 /* Succeeded. */
2821 reclaim(head);
2822 }
2823 ends = old;
2824 goto retry;
2825 }
2826
2827 /* Try to remove the head. */
2828 {
2829 double_ptr_t old = atomic_cas_val(&q[*queue_id].ends, ends, new);
2830 if (old != ends) {
2831 /* Failed. Someone else either enqueued or dequeued. */
2832 unhazard(head);
2833 ends = old;
2834 goto retry;
2835 }
2836 }
2837
2838 /* We now have either a pointer to a state or we've just removed an empty
2839 * queue node that was the last in the queue.
2840 */
2841
2842 if (queue_handle_is_state_pptr(head)) {
2843 struct state **st = queue_handle_to_state_pptr(head);
2844 s = *st;
2845 }
2846
2847 unhazard(head);
2848
2849 if (head == tail || !queue_handle_is_state_pptr(head)) {
2850 reclaim(head);
2851 }
2852
2853 if (s == NULL) {
2854 /* Move to the next queue to try. */
2855 *queue_id = (*queue_id + 1) % (sizeof(q) / sizeof(q[0]));
2856 continue;
2857 }
2858
2859 size_t count =
2860 __atomic_sub_fetch(&q[*queue_id].count, 1, __ATOMIC_SEQ_CST);
2861
2862 TRACE(TC_QUEUE,
2863 "dequeued state %p from queue %zu, queue length is now %zu",
2864 s, *queue_id, count);
2865
2866 return s;
2867 }
2868
2869 /* Move to the next queue to try. */
2870 *queue_id = (*queue_id + 1) % (sizeof(q) / sizeof(q[0]));
2871 }
2872
2873 return s;
2874 }
2875
2876 /******************************************************************************/
2877
2878 /*******************************************************************************
2879 * Reference counted pointers *
2880 * *
2881 * These are capable of encapsulating any generic pointer (void*). Note that *
2882 * we rely on the existence of double-word atomics. On x86-64, you need to *
2883 * use compiler flag '-mcx16' to get an efficient 128-bit cmpxchg. *
2884 * *
2885 * Of these functions, only the following are thread safe: *
2886 * *
2887 * * refcounted_ptr_get *
2888 * * refcounted_ptr_peek *
2889 * * refcounted_ptr_put *
2890 * * refcounted_ptr_set *
2891 * *
2892 * The caller is expected to coordinate with other threads to exclude them *
2893 * operating on the relevant refcounted_ptr_t when using one of the other *
2894 * functions: *
2895 * *
2896 * * refcounted_ptr_shift *
2897 * *
2898 ******************************************************************************/
2899
2900 struct refcounted_ptr {
2901 void *ptr;
2902 size_t count;
2903 };
2904
2905 #if __SIZEOF_POINTER__ <= 4
2906 typedef uint64_t refcounted_ptr_t;
2907 #elif __SIZEOF_POINTER__ <= 8
2908 typedef unsigned __int128 refcounted_ptr_t;
2909 #else
2910 #error "unexpected pointer size; what scalar type to use for refcounted_ptr_t?"
2911 #endif
2912
2913 _Static_assert(sizeof(struct refcounted_ptr) <= sizeof(refcounted_ptr_t),
2914 "refcounted_ptr does not fit in a refcounted_ptr_t, which we "
2915 "need to operate on it atomically");
2916
refcounted_ptr_set(refcounted_ptr_t * NONNULL p,void * ptr)2917 static void refcounted_ptr_set(refcounted_ptr_t *NONNULL p, void *ptr) {
2918
2919 #ifndef NDEBUG
2920 /* Read the current state of the pointer. */
2921 refcounted_ptr_t old = atomic_read(p);
2922 struct refcounted_ptr p2;
2923 memcpy(&p2, &old, sizeof(old));
2924 assert(p2.count == 0 && "overwriting a pointer source while someone still "
2925 "has a reference to this pointer");
2926 #endif
2927
2928 /* Set the current source pointer with no outstanding references. */
2929 struct refcounted_ptr p3;
2930 p3.ptr = ptr;
2931 p3.count = 0;
2932
2933 /* Commit the result. */
2934 refcounted_ptr_t new;
2935 memcpy(&new, &p3, sizeof(p3));
2936 atomic_write(p, new);
2937 }
2938
refcounted_ptr_get(refcounted_ptr_t * NONNULL p)2939 static void *refcounted_ptr_get(refcounted_ptr_t *NONNULL p) {
2940
2941 refcounted_ptr_t old, new;
2942 void *ret;
2943 bool r;
2944
2945 do {
2946
2947 /* Read the current state of the pointer. */
2948 old = atomic_read(p);
2949 struct refcounted_ptr p2;
2950 memcpy(&p2, &old, sizeof(old));
2951
2952 /* Take a reference to it. */
2953 p2.count++;
2954 ret = p2.ptr;
2955
2956 /* Try to commit our results. */
2957 memcpy(&new, &p2, sizeof(new));
2958 r = atomic_cas(p, old, new);
2959 } while (!r);
2960
2961 return ret;
2962 }
2963
refcounted_ptr_put(refcounted_ptr_t * NONNULL p,void * ptr)2964 static void refcounted_ptr_put(refcounted_ptr_t *NONNULL p,
2965 void *ptr __attribute__((unused))) {
2966
2967 refcounted_ptr_t old, new;
2968 bool r;
2969
2970 do {
2971
2972 /* Read the current state of the pointer. */
2973 old = atomic_read(p);
2974 struct refcounted_ptr p2;
2975 memcpy(&p2, &old, sizeof(old));
2976
2977 /* Release our reference to it. */
2978 ASSERT(p2.ptr == ptr && "releasing a reference to a pointer after someone "
2979 "has changed the pointer source");
2980 ASSERT(p2.count > 0 && "releasing a reference to a pointer when it had no "
2981 "outstanding references");
2982 p2.count--;
2983
2984 /* Try to commit our results. */
2985 memcpy(&new, &p2, sizeof(new));
2986 r = atomic_cas(p, old, new);
2987 } while (!r);
2988 }
2989
refcounted_ptr_peek(refcounted_ptr_t * NONNULL p)2990 static void *refcounted_ptr_peek(refcounted_ptr_t *NONNULL p) {
2991
2992 /* Read out the state of the pointer. This rather unpleasant expression is
2993 * designed to emit an atomic load at a smaller granularity than the entire
2994 * refcounted_ptr structure. Because we only need the pointer -- and not the
2995 * count -- we can afford to just atomically read the first word.
2996 */
2997 void *ptr = __atomic_load_n(
2998 (void **)((void *)p + __builtin_offsetof(struct refcounted_ptr, ptr)),
2999 __ATOMIC_SEQ_CST);
3000
3001 return ptr;
3002 }
3003
refcounted_ptr_shift(refcounted_ptr_t * NONNULL current,refcounted_ptr_t * NONNULL next)3004 static void refcounted_ptr_shift(refcounted_ptr_t *NONNULL current,
3005 refcounted_ptr_t *NONNULL next) {
3006
3007 /* None of the operations in this function are performed atomically because we
3008 * assume the caller has synchronised with other threads via other means.
3009 */
3010
3011 /* The pointer we're about to overwrite should not be referenced. */
3012 struct refcounted_ptr p __attribute__((unused));
3013 memcpy(&p, current, sizeof(*current));
3014 ASSERT(p.count == 0 && "overwriting a pointer that still has outstanding "
3015 "references");
3016
3017 /* Shift the next value into the current pointer. */
3018 *current = *next;
3019
3020 /* Blank the value we just shifted over. */
3021 *next = 0;
3022 }
3023
3024 /******************************************************************************/
3025
3026 /*******************************************************************************
3027 * Thread rendezvous support *
3028 ******************************************************************************/
3029
3030 static pthread_mutex_t rendezvous_lock; /* mutual exclusion mechanism for below. */
3031 static pthread_cond_t rendezvous_cond; /* sleep mechanism for below. */
3032 static size_t running_count = 1; /* how many threads are opted in to rendezvous? */
3033 static size_t rendezvous_pending = 1; /* how many threads are opted in and not sleeping? */
3034
rendezvous_init(void)3035 static void rendezvous_init(void) {
3036 int r = pthread_mutex_init(&rendezvous_lock, NULL);
3037 if (__builtin_expect(r != 0, 0)) {
3038 fprintf(stderr, "pthread_mutex_init failed: %s\n", strerror(r));
3039 exit(EXIT_FAILURE);
3040 }
3041
3042 r = pthread_cond_init(&rendezvous_cond, NULL);
3043 if (__builtin_expect(r != 0, 0)) {
3044 fprintf(stderr, "pthread_cond_init failed: %s\n", strerror(r));
3045 exit(EXIT_FAILURE);
3046 }
3047 }
3048
3049 /** call this at the start of a rendezvous point
3050 *
3051 * This is a low level function, not expected to be directly used outside of the
3052 * context of the rendezvous implementation.
3053 *
3054 * \return True if the caller was the last to arrive and henceforth dubbed the
3055 * 'leader'.
3056 */
rendezvous_arrive(void)3057 static bool rendezvous_arrive(void) {
3058 int r __attribute__((unused)) = pthread_mutex_lock(&rendezvous_lock);
3059 assert(r == 0);
3060
3061 /* Take a token from the rendezvous down-counter. */
3062 assert(rendezvous_pending > 0);
3063 rendezvous_pending--;
3064
3065 /* If we were the last to arrive then it was our arrival that dropped the
3066 * counter to zero.
3067 */
3068 return rendezvous_pending == 0;
3069 }
3070
3071 /** call this at the end of a rendezvous point
3072 *
3073 * This is a low level function, not expected to be directly used outside of the
3074 * context of the rendezvous implementation.
3075 *
3076 * \param leader Whether the caller is the 'leader'. If you call this when you
3077 * are the 'leader' it will unblock all 'followers' at the rendezvous point.
3078 * \param action Optional code for the leader to run.
3079 */
rendezvous_depart(bool leader,void (* action)(void))3080 static void rendezvous_depart(bool leader, void (*action)(void)) {
3081 int r __attribute((unused));
3082
3083 if (leader) {
3084 if (action != NULL) {
3085 action();
3086 }
3087
3088 /* Reset the counter for the next rendezvous. */
3089 assert(rendezvous_pending == 0 &&
3090 "a rendezvous point is being exited "
3091 "while some participating threads have yet to arrive");
3092 rendezvous_pending = running_count;
3093
3094 /* Wake up the 'followers'. */
3095 r = pthread_cond_broadcast(&rendezvous_cond);
3096 assert(r == 0);
3097
3098 } else {
3099
3100 /* Wait on the 'leader' to wake us up. */
3101 r = pthread_cond_wait(&rendezvous_cond, &rendezvous_lock);
3102 assert(r == 0);
3103 }
3104
3105 r = pthread_mutex_unlock(&rendezvous_lock);
3106 assert(r == 0);
3107 }
3108
3109 /* Exposed friendly function for performing a rendezvous. */
rendezvous(void (* action)(void))3110 static void rendezvous(void (*action)(void)) {
3111 bool leader = rendezvous_arrive();
3112 if (leader) {
3113 TRACE(TC_SET, "arrived at rendezvous point as leader");
3114 }
3115 rendezvous_depart(leader, action);
3116 }
3117
3118 /* Remove the caller from the pool of threads who participate in this
3119 * rendezvous.
3120 */
rendezvous_opt_out(void (* action)(void))3121 static void rendezvous_opt_out(void (*action)(void)) {
3122
3123 retry:;
3124
3125 /* "Arrive" at the rendezvous to decrement the count of outstanding threads.
3126 */
3127 bool leader = rendezvous_arrive();
3128
3129 if (leader && running_count > 1) {
3130 /* We unfortunately opted out of this rendezvous while the remaining threads
3131 * were arriving at one and we were the last to arrive. Let's pretend we are
3132 * participating in the rendezvous and unblock them.
3133 */
3134 rendezvous_depart(true, action);
3135
3136 /* Re-attempt opting-out. */
3137 goto retry;
3138 }
3139
3140 /* Remove ourselves from the known threads. */
3141 assert(running_count > 0);
3142 running_count--;
3143
3144 int r __attribute__((unused)) = pthread_mutex_unlock(&rendezvous_lock);
3145 assert(r == 0);
3146 }
3147
3148 /******************************************************************************/
3149
3150 /*******************************************************************************
3151 * 'Slots', an opaque wrapper around a state pointer *
3152 * *
3153 * See usage of this in the state set below for its purpose. *
3154 ******************************************************************************/
3155
3156 typedef uintptr_t slot_t;
3157
slot_empty(void)3158 static __attribute__((const)) slot_t slot_empty(void) { return 0; }
3159
slot_is_empty(slot_t s)3160 static __attribute__((const)) bool slot_is_empty(slot_t s) {
3161 return s == slot_empty();
3162 }
3163
slot_tombstone(void)3164 static __attribute__((const)) slot_t slot_tombstone(void) {
3165 static const slot_t TOMBSTONE = ~(slot_t)0;
3166 return TOMBSTONE;
3167 }
3168
slot_is_tombstone(slot_t s)3169 static __attribute__((const)) bool slot_is_tombstone(slot_t s) {
3170 return s == slot_tombstone();
3171 }
3172
slot_to_state(slot_t s)3173 static struct state *slot_to_state(slot_t s) {
3174 ASSERT(!slot_is_empty(s));
3175 ASSERT(!slot_is_tombstone(s));
3176 return (struct state *)s;
3177 }
3178
state_to_slot(const struct state * s)3179 static slot_t state_to_slot(const struct state *s) { return (slot_t)s; }
3180
3181 /******************************************************************************/
3182
3183 /*******************************************************************************
3184 * State set *
3185 * *
3186 * The following implementation provides a set for storing the seen states. *
3187 * There is no support for removing elements, only thread-safe insertion of *
3188 * elements. *
3189 ******************************************************************************/
3190
3191 enum {
3192 INITIAL_SET_SIZE_EXPONENT =
3193 sizeof(unsigned long long) * 8 - 1 -
3194 __builtin_clzll(SET_CAPACITY / sizeof(struct state *) /
3195 sizeof(struct state))
3196 };
3197
3198 struct set {
3199 slot_t *bucket;
3200 size_t size_exponent;
3201 };
3202
3203 /* Some utility functions for dealing with exponents. */
3204
set_size(const struct set * NONNULL set)3205 static size_t set_size(const struct set *NONNULL set) {
3206 return ((size_t)1) << set->size_exponent;
3207 }
3208
set_index(const struct set * NONNULL set,size_t index)3209 static size_t set_index(const struct set *NONNULL set, size_t index) {
3210 return index & (set_size(set) - 1);
3211 }
3212
3213 /* The states we have encountered. This collection will only ever grow while
3214 * checking the model. Note that we have a global reference-counted pointer and
3215 * a local bare pointer. See below for an explanation.
3216 */
3217 static refcounted_ptr_t global_seen;
3218 static _Thread_local struct set *local_seen;
3219
3220 /* Number of elements in the global set (i.e. occupancy). */
3221 static size_t seen_count;
3222
3223 /* The "next" 'global_seen' value. See below for an explanation. */
3224 static refcounted_ptr_t next_global_seen;
3225
3226 /* Now the explanation I teased... When the set capacity exceeds a threshold
3227 * (see 'set_expand' related logic below) it is expanded and the reference
3228 * tracking within the 'refcounted_ptr_t's comes in to play. We need to allocate
3229 * a new seen set ('next_global_seen'), copy over all the elements (done in
3230 * 'set_migrate'), and then "shift" the new set to become the current set. The
3231 * role of the reference counts of both 'global_seen' and 'next_global_seen' in
3232 * all of this is to detect when the last thread releases its reference to the
3233 * old seen set and hence can deallocate it.
3234 */
3235
3236 /* The next chunk to migrate from the old set to the new set. What exactly a
3237 * "chunk" is is covered in 'set_migrate'.
3238 */
3239 static size_t next_migration;
3240
3241 /* A mechanism for synchronisation in 'set_expand'. */
3242 static pthread_mutex_t set_expand_mutex;
3243
set_expand_lock(void)3244 static void set_expand_lock(void) {
3245 if (THREADS > 1) {
3246 int r __attribute__((unused)) = pthread_mutex_lock(&set_expand_mutex);
3247 ASSERT(r == 0);
3248 }
3249 }
3250
set_expand_unlock(void)3251 static void set_expand_unlock(void) {
3252 if (THREADS > 1) {
3253 int r __attribute__((unused)) = pthread_mutex_unlock(&set_expand_mutex);
3254 ASSERT(r == 0);
3255 }
3256 }
3257
set_init(void)3258 static void set_init(void) {
3259
3260 if (THREADS > 1) {
3261 int r = pthread_mutex_init(&set_expand_mutex, NULL);
3262 if (__builtin_expect(r < 0, 0)) {
3263 fprintf(stderr, "pthread_mutex_init failed: %s\n", strerror(r));
3264 exit(EXIT_FAILURE);
3265 }
3266 }
3267
3268 /* Allocate the set we'll store seen states in at some conservative initial
3269 * size.
3270 */
3271 struct set *set = xmalloc(sizeof(*set));
3272 set->size_exponent = INITIAL_SET_SIZE_EXPONENT;
3273 set->bucket = xcalloc(set_size(set), sizeof(set->bucket[0]));
3274
3275 /* Stash this somewhere for threads to later retrieve it from. Note that we
3276 * initialize its reference count to zero as we (the setup logic) are not
3277 * using it beyond this function.
3278 */
3279 refcounted_ptr_set(&global_seen, set);
3280 }
3281
set_thread_init(void)3282 static void set_thread_init(void) {
3283 /* Take a local reference to the global seen set. */
3284 local_seen = refcounted_ptr_get(&global_seen);
3285 }
3286
set_update(void)3287 static void set_update(void) {
3288 /* Guard against the case where we've been called from exit_with() and we're
3289 * not finishing a migration, but just opting out of the rendezvous protocol.
3290 */
3291 if (refcounted_ptr_peek(&next_global_seen) != NULL) {
3292
3293 /* Clean up the old set. Note that we are using a pointer we have already
3294 * given up our reference count to here, but we rely on the caller to ensure
3295 * this access is safe.
3296 */
3297 free(local_seen->bucket);
3298 free(local_seen);
3299
3300 /* Reset migration state for the next time we expand the set. */
3301 next_migration = 0;
3302
3303 /* Update the global pointer to the new set. We know all the above
3304 * migrations have completed and no one needs the old set.
3305 */
3306 refcounted_ptr_shift(&global_seen, &next_global_seen);
3307 }
3308 }
3309
set_migrate(void)3310 static void set_migrate(void) {
3311
3312 TRACE(TC_SET, "assisting in set migration...");
3313
3314 /* Size of a migration chunk. Threads in this function grab a chunk at a time
3315 * to migrate.
3316 */
3317 enum { CHUNK_SIZE = 4096 / sizeof(local_seen->bucket[0]) /* slots */ };
3318
3319 /* Take a pointer to the target set for the migration. */
3320 struct set *next = refcounted_ptr_get(&next_global_seen);
3321
3322 for (;;) {
3323
3324 size_t chunk = __atomic_fetch_add(&next_migration, 1, __ATOMIC_SEQ_CST);
3325 size_t start = chunk * CHUNK_SIZE;
3326 size_t end = start + CHUNK_SIZE;
3327
3328 /* Bail out if we've finished migrating all of the set. */
3329 if (start >= set_size(local_seen)) {
3330 break;
3331 }
3332
3333 /* TODO: The following algorithm assumes insertions can collide. That is, it
3334 * operates atomically on slots because another thread could be migrating
3335 * and also targeting the same slot. If we were to more closely wick to the
3336 * Maier design this would not be required.
3337 */
3338
3339 for (size_t i = start; i < end; i++) {
3340
3341 /* retrieve the slot element and mark it as migrated */
3342 slot_t s = __atomic_exchange_n(&local_seen->bucket[i], slot_tombstone(),
3343 __ATOMIC_SEQ_CST);
3344 ASSERT(!slot_is_tombstone(s) && "attempted double slot migration");
3345
3346 /* If the current slot contained a state, rehash it and insert it into the
3347 * new set. Note we don't need to do any state comparisons because we know
3348 * everything in the old set is unique.
3349 */
3350 if (!slot_is_empty(s)) {
3351 size_t index = set_index(next, state_hash(slot_to_state(s)));
3352 /* insert and shuffle any colliding entries one along */
3353 for (size_t j = index; !slot_is_empty(s); j = set_index(next, j + 1)) {
3354 s = __atomic_exchange_n(&next->bucket[j], s, __ATOMIC_SEQ_CST);
3355 }
3356 }
3357 }
3358 }
3359
3360 /* Release our reference to the old set now we're done with it. */
3361 refcounted_ptr_put(&global_seen, local_seen);
3362
3363 /* Now we need to make sure all the threads get to this point before any one
3364 * thread leaves. The purpose of this is to guarantee we only ever have at
3365 * most two seen sets "in flight". Without this rendezvous, one thread could
3366 * race ahead, fill the new set, and then decide to expand again while some
3367 * are still working on the old set. It's possible to make such a scheme work
3368 * but the synchronisation requirements just seem too complicated.
3369 */
3370 rendezvous(set_update);
3371
3372 /* We're now ready to resume model checking. Note that we already have a
3373 * (reference counted) pointer to the now-current global seen set, so we don't
3374 * need to take a fresh reference to it.
3375 */
3376 local_seen = next;
3377 }
3378
set_expand(void)3379 static void set_expand(void) {
3380
3381 /* Using double-checked locking, we look to see if someone else has already
3382 * started expanding the set. We do this by first checking before acquiring
3383 * the set mutex, and then later again checking after we've acquired the
3384 * mutex. The idea here is that, with multiple threads, you'll frequently find
3385 * someone else beat you to expansion and you can jump straight to helping
3386 * them with migration without having the expense of acquiring the set mutex.
3387 */
3388 if (THREADS > 1 && refcounted_ptr_peek(&next_global_seen) != NULL) {
3389 /* Someone else already expanded it. Join them in the migration effort. */
3390 TRACE(TC_SET, "attempted expansion failed because another thread got there "
3391 "first");
3392 set_migrate();
3393 return;
3394 }
3395
3396 set_expand_lock();
3397
3398 /* Check again, as described above. */
3399 if (THREADS > 1 && refcounted_ptr_peek(&next_global_seen) != NULL) {
3400 set_expand_unlock();
3401 TRACE(TC_SET, "attempted expansion failed because another thread got there "
3402 "first");
3403 set_migrate();
3404 return;
3405 }
3406
3407 TRACE(TC_SET, "expanding set from %zu slots to %zu slots...",
3408 (((size_t)1) << local_seen->size_exponent) / sizeof(slot_t),
3409 (((size_t)1) << (local_seen->size_exponent + 1)) / sizeof(slot_t));
3410
3411 /* Create a set of double the size. */
3412 struct set *set = xmalloc(sizeof(*set));
3413 set->size_exponent = local_seen->size_exponent + 1;
3414 set->bucket = xcalloc(set_size(set), sizeof(set->bucket[0]));
3415
3416 /* Advertise this as the newly expanded global set. */
3417 refcounted_ptr_set(&next_global_seen, set);
3418
3419 /* We now need to migrate all slots from the old set to the new one, but we
3420 * can do this multithreaded.
3421 */
3422 set_expand_unlock();
3423 set_migrate();
3424 }
3425
set_insert(struct state * NONNULL s,size_t * NONNULL count)3426 static bool set_insert(struct state *NONNULL s, size_t *NONNULL count) {
3427
3428 restart:;
3429
3430 if (__atomic_load_n(&seen_count, __ATOMIC_SEQ_CST) * 100 /
3431 set_size(local_seen) >=
3432 SET_EXPAND_THRESHOLD)
3433 set_expand();
3434
3435 size_t index = set_index(local_seen, state_hash(s));
3436
3437 size_t attempts = 0;
3438 for (size_t i = index; attempts < set_size(local_seen);
3439 i = set_index(local_seen, i + 1)) {
3440
3441 /* Guess that the current slot is empty and try to insert here. */
3442 slot_t c = slot_empty();
3443 if (__atomic_compare_exchange_n(&local_seen->bucket[i], &c,
3444 state_to_slot(s), false, __ATOMIC_SEQ_CST,
3445 __ATOMIC_SEQ_CST)) {
3446 /* Success */
3447 *count = __atomic_add_fetch(&seen_count, 1, __ATOMIC_SEQ_CST);
3448 TRACE(TC_SET, "added state %p, set size is now %zu", s, *count);
3449
3450 /* The maximum possible size of the seen state set should be constrained
3451 * by the number of possible states based on how many bits we are using to
3452 * represent the state data.
3453 */
3454 if (STATE_SIZE_BITS < sizeof(size_t) * CHAR_BIT) {
3455 assert(*count <= ((size_t)1) << STATE_SIZE_BITS &&
3456 "seen set size "
3457 "exceeds total possible number of states");
3458 }
3459
3460 /* Update statistics if `--trace memory_usage` is in effect. Note that we
3461 * do this here (when a state is being added to the seen set) rather than
3462 * when the state was originally allocated to ensure that the final
3463 * allocation figures do not include transient states that we allocated
3464 * and then discarded as duplicates.
3465 */
3466 size_t depth = 0;
3467 #if BOUND > 0
3468 depth = (size_t)state_bound_get(s);
3469 #endif
3470 register_allocation(depth);
3471
3472 return true;
3473 }
3474
3475 if (slot_is_tombstone(c)) {
3476 /* This slot has been migrated. We need to rendezvous with other migrating
3477 * threads and restart our insertion attempt on the newly expanded set.
3478 */
3479 set_migrate();
3480 goto restart;
3481 }
3482
3483 /* If we find this already in the set, we're done. */
3484 if (state_eq(s, slot_to_state(c))) {
3485 TRACE(TC_SET, "skipped adding state %p that was already in set", s);
3486 return false;
3487 }
3488
3489 attempts++;
3490 }
3491
3492 /* If we reach here, the set is full. Expand it and retry the insertion. */
3493 set_expand();
3494 return set_insert(s, count);
3495 }
3496
3497 /* Find an existing element in the set.
3498 *
3499 * Why would you ever want to do this? If you already have the state, why do you
3500 * want to find a copy of it? The answer is for liveness information. When
3501 * checking liveness properties, a duplicate of your current state that is
3502 * already contained in the state set might know some of the liveness properties
3503 * are satisfied that your current state considers unknown.
3504 */
3505 static __attribute__((unused)) const struct state *
set_find(const struct state * NONNULL s)3506 set_find(const struct state *NONNULL s) {
3507
3508 assert(s != NULL);
3509
3510 size_t index = set_index(local_seen, state_hash(s));
3511
3512 size_t attempts = 0;
3513 for (size_t i = index; attempts < set_size(local_seen);
3514 i = set_index(local_seen, i + 1)) {
3515
3516 slot_t slot = __atomic_load_n(&local_seen->bucket[i], __ATOMIC_SEQ_CST);
3517
3518 /* This function is only expected to be called during the final liveness
3519 * scan, in which all threads participate. So we should never encounter a
3520 * set undergoing migration.
3521 */
3522 ASSERT(!slot_is_tombstone(slot) &&
3523 "tombstone encountered during final phase");
3524
3525 if (slot_is_empty(slot)) {
3526 /* reached the end of the linear block in which this state could lie */
3527 break;
3528 }
3529
3530 const struct state *n = slot_to_state(slot);
3531 ASSERT(n != NULL && "null pointer stored in state set");
3532
3533 if (state_eq(s, n)) {
3534 /* found */
3535 return n;
3536 }
3537
3538 attempts++;
3539 }
3540
3541 /* not found */
3542 return NULL;
3543 }
3544
3545 /******************************************************************************/
3546
3547 static time_t START_TIME;
3548
gettime()3549 static unsigned long long gettime() {
3550 return (unsigned long long)(time(NULL) - START_TIME);
3551 }
3552
3553 #if LIVENESS_COUNT > 0
3554 /* Set one of the liveness bits (i.e. mark the matching property as 'hit') in a
3555 * state and all its predecessors.
3556 */
mark_liveness(struct state * NONNULL s,size_t index,bool shared)3557 static __attribute__((unused)) void mark_liveness(struct state *NONNULL s,
3558 size_t index, bool shared) {
3559
3560 assert(s != NULL);
3561 ASSERT(index < sizeof(s->liveness) * CHAR_BIT &&
3562 "out of range liveness write");
3563
3564 size_t word_index = index / (sizeof(s->liveness[0]) * CHAR_BIT);
3565 size_t bit_index = index % (sizeof(s->liveness[0]) * CHAR_BIT);
3566
3567 uintptr_t previous_value;
3568 uintptr_t *target = &s->liveness[word_index];
3569 uintptr_t mask = ((uintptr_t)1) << bit_index;
3570
3571 if (shared) {
3572 /* If this state is shared (accessible by other threads) we need to operate
3573 * on its liveness data atomically.
3574 */
3575 previous_value = __atomic_fetch_or(target, mask, __ATOMIC_SEQ_CST);
3576 } else {
3577 /* Otherwise we can use a cheaper ordinary OR. */
3578 previous_value = *target;
3579 *target |= mask;
3580 }
3581
3582 /* Cheat a little and cast away the constness of the previous state for which
3583 * we may need to update liveness data.
3584 */
3585 struct state *previous = state_drop_const(state_previous_get(s));
3586
3587 /* If the given bit was already set, we know all the predecessors of this
3588 * state have already had their corresponding bit marked. However, if it was
3589 * not we now need to recurse to mark them. Note that we assume any
3590 * predecessors of this state are globally visible and hence shared. The
3591 * recursion depth here can be indeterminately deep, but we assume the
3592 * compiler can tail-optimise this call.
3593 */
3594 if ((previous_value & mask) != mask && previous != NULL) {
3595 mark_liveness(previous, index, true);
3596 }
3597 }
3598
3599 /* number of unknown liveness properties for a given state */
unknown_liveness(const struct state * NONNULL s)3600 static unsigned long unknown_liveness(const struct state *NONNULL s) {
3601
3602 assert(s != NULL);
3603
3604 unsigned long unknown = 0;
3605
3606 for (size_t i = 0; i < sizeof(s->liveness) / sizeof(s->liveness[0]); i++) {
3607
3608 uintptr_t word = __atomic_load_n(&s->liveness[i], __ATOMIC_SEQ_CST);
3609
3610 for (size_t j = 0; j < sizeof(s->liveness[0]) * CHAR_BIT; j++) {
3611 if (i * sizeof(s->liveness[0]) * CHAR_BIT + j >= LIVENESS_COUNT) {
3612 break;
3613 }
3614
3615 if (!((word >> j) & 0x1)) {
3616 unknown++;
3617 }
3618 }
3619 }
3620
3621 return unknown;
3622 }
3623
3624 /** learn new liveness information about the state `s` from its successor
3625 *
3626 * Note that typically `state_previous_get(successor) != s` because `successor`
3627 * is actually one of the de-duped aliases of the original successor to `s`.
3628 *
3629 * \param s State to learn information about
3630 * \param successor Successor to s
3631 * \return Number of new liveness information facts learnt
3632 */
learn_liveness(struct state * NONNULL s,const struct state * NONNULL successor)3633 static unsigned long learn_liveness(struct state *NONNULL s,
3634 const struct state *NONNULL successor) {
3635
3636 assert(s != NULL);
3637 assert(successor != NULL);
3638
3639 unsigned long new_info = 0;
3640
3641 for (size_t i = 0; i < sizeof(s->liveness) / sizeof(s->liveness[0]); i++) {
3642
3643 uintptr_t word_src =
3644 __atomic_load_n(&successor->liveness[i], __ATOMIC_SEQ_CST);
3645 uintptr_t word_dst = __atomic_load_n(&s->liveness[i], __ATOMIC_SEQ_CST);
3646
3647 for (size_t j = 0; j < sizeof(s->liveness[0]) * CHAR_BIT; j++) {
3648 if (i * sizeof(s->liveness[0]) * CHAR_BIT + j >= LIVENESS_COUNT) {
3649 break;
3650 }
3651
3652 bool live_src = !!((word_src >> j) & 0x1);
3653 bool live_dst = !!((word_dst >> j) & 0x1);
3654
3655 if (!live_dst && live_src) {
3656 mark_liveness(s, i * sizeof(s->liveness[0]) * CHAR_BIT + j, true);
3657 new_info++;
3658 }
3659 }
3660 }
3661
3662 return new_info;
3663 }
3664 #endif
3665
3666 /* Prototypes for generated functions. */
3667 static void init(void);
3668 static _Noreturn void explore(void);
3669 #if LIVENESS_COUNT > 0
3670 static void check_liveness_final(void);
3671 static unsigned long check_liveness_summarise(void);
3672 #endif
3673
exit_with(int status)3674 static int exit_with(int status) {
3675
3676 /* Opt out of the thread-wide rendezvous protocol. */
3677 refcounted_ptr_put(&global_seen, local_seen);
3678 rendezvous_opt_out(set_update);
3679 local_seen = NULL;
3680
3681 /* Make fired rule count visible globally. */
3682 rules_fired[thread_id] = rules_fired_local;
3683
3684 if (thread_id == 0) {
3685 /* We are the initial thread. Wait on the others before exiting. */
3686 #ifdef __clang__
3687 #pragma clang diagnostic push
3688 #pragma clang diagnostic ignored "-Wtautological-compare"
3689 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
3690 #elif defined(__GNUC__)
3691 #pragma GCC diagnostic push
3692 #pragma GCC diagnostic ignored "-Wtype-limits"
3693 #endif
3694 for (size_t i = 0; phase == RUN && i < sizeof(threads) / sizeof(threads[0]);
3695 i++) {
3696 #ifdef __clang__
3697 #pragma clang diagnostic pop
3698 #elif defined(__GNUC__)
3699 #pragma GCC diagnostic pop
3700 #endif
3701 void *ret;
3702 int r = pthread_join(threads[i], &ret);
3703 if (__builtin_expect(r != 0, 0)) {
3704 flockfile(stdout);
3705 fprintf(stderr, "failed to join thread: %s\n", strerror(r));
3706 funlockfile(stdout);
3707 continue;
3708 }
3709 status |= (int)(intptr_t)ret;
3710 }
3711
3712 /* We're now single-threaded again. */
3713
3714 /* Reacquire a pointer to the seen set. Note that this may not be the same
3715 * value as what we previously had in local_seen because the other threads
3716 * may have expanded and migrated the seen set in the meantime.
3717 */
3718 local_seen = refcounted_ptr_get(&global_seen);
3719
3720 if (error_count == 0) {
3721 /* If we didn't see any other errors, print cover information. */
3722 #ifdef __clang__
3723 #pragma clang diagnostic push
3724 #pragma clang diagnostic ignored "-Wtautological-compare"
3725 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
3726 #elif defined(__GNUC__)
3727 #pragma GCC diagnostic push
3728 #pragma GCC diagnostic ignored "-Wtype-limits"
3729 #endif
3730 for (size_t i = 0; i < sizeof(covers) / sizeof(covers[0]); i++) {
3731 #ifdef __clang__
3732 #pragma clang diagnostic pop
3733 #elif defined(__GNUC__)
3734 #pragma GCC diagnostic pop
3735 #endif
3736 if (MACHINE_READABLE_OUTPUT) {
3737 put("<cover_result message=\"");
3738 xml_printf(COVER_MESSAGES[i]);
3739 put("\" count=\"");
3740 put_uint(covers[i]);
3741 put("\"/>\n");
3742 }
3743 if (covers[i] == 0) {
3744 if (!MACHINE_READABLE_OUTPUT) {
3745 put("\t");
3746 put(red());
3747 put(bold());
3748 put("cover \"");
3749 put(COVER_MESSAGES[i]);
3750 put("\" not hit");
3751 put(reset());
3752 put("\n");
3753 }
3754 error_count++;
3755 status = EXIT_FAILURE;
3756 } else if (!MACHINE_READABLE_OUTPUT) {
3757 put("\t");
3758 put(green());
3759 put(bold());
3760 put("cover \"");
3761 put(COVER_MESSAGES[i]);
3762 put("\" hit ");
3763 put_uint(covers[i]);
3764 put(" times");
3765 put(reset());
3766 put("\n");
3767 }
3768 }
3769 }
3770
3771 #if LIVENESS_COUNT > 0
3772 /* If we have liveness properties to assess and have seen no previous
3773 * errors, do a final check of them now.
3774 */
3775 if (error_count == 0) {
3776 check_liveness_final();
3777
3778 unsigned long failed = check_liveness_summarise();
3779 if (failed > 0) {
3780 error_count += failed;
3781 status = EXIT_FAILURE;
3782 }
3783 }
3784 #endif
3785
3786 if (!MACHINE_READABLE_OUTPUT) {
3787 put("\n"
3788 "===================================================================="
3789 "======\n"
3790 "\n"
3791 "Status:\n"
3792 "\n");
3793 if (error_count == 0) {
3794 put("\t");
3795 put(green());
3796 put(bold());
3797 put("No error found.");
3798 put(reset());
3799 put("\n");
3800 } else {
3801 put("\t");
3802 put(red());
3803 put(bold());
3804 put_uint(error_count);
3805 put(" error(s) found.");
3806 put(reset());
3807 put("\n");
3808 }
3809 put("\n");
3810 }
3811
3812 /* Calculate the total number of rules fired. */
3813 uintmax_t fire_count = 0;
3814 for (size_t i = 0; i < sizeof(rules_fired) / sizeof(rules_fired[0]); i++) {
3815 fire_count += rules_fired[i];
3816 }
3817
3818 /* Paranoid check that we didn't miscount during set insertions/expansions.
3819 */
3820 #ifndef NDEBUG
3821 size_t count = 0;
3822 for (size_t i = 0; i < set_size(local_seen); i++) {
3823 if (!slot_is_empty(local_seen->bucket[i])) {
3824 count++;
3825 }
3826 }
3827 #endif
3828 assert(count == seen_count && "seen set count is inconsistent at exit");
3829
3830 if (MACHINE_READABLE_OUTPUT) {
3831 put("<summary states=\"");
3832 put_uint(seen_count);
3833 put("\" rules_fired=\"");
3834 put_uint(fire_count);
3835 put("\" errors=\"");
3836 put_uint(error_count);
3837 put("\" duration_seconds=\"");
3838 put_uint(gettime());
3839 put("\"/>\n");
3840 put("</rumur_run>\n");
3841 } else {
3842 put("State Space Explored:\n"
3843 "\n"
3844 "\t");
3845 put_uint(seen_count);
3846 put(" states, ");
3847 put_uint(fire_count);
3848 put(" rules fired in ");
3849 put_uint(gettime());
3850 put("s.\n");
3851 }
3852
3853 /* print memory usage statistics if `--trace memory_usage` is in effect */
3854 print_allocation_summary();
3855
3856 exit(status);
3857 } else {
3858 pthread_exit((void *)(intptr_t)status);
3859 }
3860 }
3861
thread_main(void * arg)3862 static void *thread_main(void *arg) {
3863
3864 /* Initialize (thread-local) thread identifier. */
3865 thread_id = (size_t)(uintptr_t)arg;
3866
3867 set_thread_init();
3868
3869 explore();
3870 }
3871
start_secondary_threads(void)3872 static void start_secondary_threads(void) {
3873
3874 /* XXX: Kind of hacky. We've left the rendezvous down-counter at 1 until now
3875 * in case we triggered a rendezvous before starting the other threads (it
3876 * *can* happen). We bump it immediately -- i.e. before starting *any* of the
3877 * secondary threads -- because any of them could race us and trigger a
3878 * rendezvous before we exit the below loop and they need to know about the
3879 * thundering herd bearing down on them. It is safe to do this without holding
3880 * rendezvous_lock because we are still single threaded at this point.
3881 */
3882 assert(running_count == 1);
3883 running_count = THREADS;
3884 assert(rendezvous_pending == 1);
3885 rendezvous_pending = THREADS;
3886
3887 #ifdef __clang__
3888 #pragma clang diagnostic push
3889 #pragma clang diagnostic ignored "-Wtautological-compare"
3890 #pragma clang diagnostic ignored "-Wtautological-unsigned-zero-compare"
3891 #elif defined(__GNUC__)
3892 #pragma GCC diagnostic push
3893 #pragma GCC diagnostic ignored "-Wtype-limits"
3894 #endif
3895 for (size_t i = 0; i < sizeof(threads) / sizeof(threads[0]); i++) {
3896 #ifdef __clang__
3897 #pragma clang diagnostic pop
3898 #elif defined(__GNUC__)
3899 #pragma GCC diagnostic pop
3900 #endif
3901 int r = pthread_create(&threads[i], NULL, thread_main,
3902 (void *)(uintptr_t)(i + 1));
3903 if (__builtin_expect(r != 0, 0)) {
3904 fprintf(stderr, "pthread_create failed: %s\n", strerror(r));
3905 exit(EXIT_FAILURE);
3906 }
3907 }
3908 }
3909
main(void)3910 int main(void) {
3911
3912 if (COLOR == AUTO)
3913 istty = isatty(STDOUT_FILENO) != 0;
3914
3915 /* We don't need to read anything from stdin, so discard it. */
3916 (void)fclose(stdin);
3917
3918 sandbox();
3919
3920 if (MACHINE_READABLE_OUTPUT) {
3921 put("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
3922 "<rumur_run>\n"
3923 "<information state_size_bits=\"");
3924 put_uint(STATE_SIZE_BITS);
3925 put("\" state_size_bytes=\"");
3926 put_uint(STATE_SIZE_BYTES);
3927 put("\" hash_table_slots=\"");
3928 put_uint(((size_t)1) << INITIAL_SET_SIZE_EXPONENT);
3929 put("\"/>\n");
3930 } else {
3931 put("Memory usage:\n"
3932 "\n"
3933 "\t* The size of each state is ");
3934 put_uint(STATE_SIZE_BITS);
3935 put(" bits (rounded up to ");
3936 put_uint(STATE_SIZE_BYTES);
3937 put(" bytes).\n"
3938 "\t* The size of the hash table is ");
3939 put_uint(((size_t)1) << INITIAL_SET_SIZE_EXPONENT);
3940 put(" slots.\n"
3941 "\n");
3942 }
3943
3944 #ifndef NDEBUG
3945 state_print_field_offsets();
3946 #endif
3947
3948 START_TIME = time(NULL);
3949
3950 rendezvous_init();
3951
3952 set_init();
3953
3954 set_thread_init();
3955
3956 init();
3957
3958 if (!MACHINE_READABLE_OUTPUT) {
3959 put("Progress Report:\n\n");
3960 }
3961
3962 explore();
3963 }
3964