1 /*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include <stddef.h>
14 #include <atomic>
15
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42
__kmp_validate_locks(void)43 void __kmp_validate_locks(void) {
44 int i;
45 kmp_uint32 x, y;
46
47 /* Check to make sure unsigned arithmetic does wraps properly */
48 x = ~((kmp_uint32)0) - 2;
49 y = x - 2;
50
51 for (i = 0; i < 8; ++i, ++x, ++y) {
52 kmp_uint32 z = (x - y);
53 KMP_ASSERT(z == 2);
54 }
55
56 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
__kmp_get_tas_lock_owner(kmp_tas_lock_t * lck)70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73
__kmp_is_tas_lock_nestable(kmp_tas_lock_t * lck)74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75 return lck->lk.depth_locked != -1;
76 }
77
78 __forceinline static int
__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t * lck,kmp_int32 gtid)79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80 KMP_MB();
81
82 #ifdef USE_LOCK_PROFILE
83 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84 if ((curr != 0) && (curr != gtid + 1))
85 __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88
89 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94 KMP_FSYNC_ACQUIRED(lck);
95 return KMP_LOCK_ACQUIRED_FIRST;
96 }
97
98 kmp_uint32 spins;
99 KMP_FSYNC_PREPARE(lck);
100 KMP_INIT_YIELD(spins);
101 kmp_backoff_t backoff = __kmp_spin_backoff_params;
102 do {
103 __kmp_spin_backoff(&backoff);
104 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
105 } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
106 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
107 KMP_FSYNC_ACQUIRED(lck);
108 return KMP_LOCK_ACQUIRED_FIRST;
109 }
110
__kmp_acquire_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)111 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
112 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
113 return retval;
114 }
115
__kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)116 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
117 kmp_int32 gtid) {
118 char const *const func = "omp_set_lock";
119 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
120 __kmp_is_tas_lock_nestable(lck)) {
121 KMP_FATAL(LockNestableUsedAsSimple, func);
122 }
123 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
124 KMP_FATAL(LockIsAlreadyOwned, func);
125 }
126 return __kmp_acquire_tas_lock(lck, gtid);
127 }
128
__kmp_test_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)129 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
130 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
131 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
132 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
133 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
134 KMP_FSYNC_ACQUIRED(lck);
135 return TRUE;
136 }
137 return FALSE;
138 }
139
__kmp_test_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)140 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
141 kmp_int32 gtid) {
142 char const *const func = "omp_test_lock";
143 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
144 __kmp_is_tas_lock_nestable(lck)) {
145 KMP_FATAL(LockNestableUsedAsSimple, func);
146 }
147 return __kmp_test_tas_lock(lck, gtid);
148 }
149
__kmp_release_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)150 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
151 KMP_MB(); /* Flush all pending memory write invalidates. */
152
153 KMP_FSYNC_RELEASING(lck);
154 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
155 KMP_MB(); /* Flush all pending memory write invalidates. */
156
157 KMP_YIELD_OVERSUB();
158 return KMP_LOCK_RELEASED;
159 }
160
__kmp_release_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)161 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
162 kmp_int32 gtid) {
163 char const *const func = "omp_unset_lock";
164 KMP_MB(); /* in case another processor initialized lock */
165 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
166 __kmp_is_tas_lock_nestable(lck)) {
167 KMP_FATAL(LockNestableUsedAsSimple, func);
168 }
169 if (__kmp_get_tas_lock_owner(lck) == -1) {
170 KMP_FATAL(LockUnsettingFree, func);
171 }
172 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
173 (__kmp_get_tas_lock_owner(lck) != gtid)) {
174 KMP_FATAL(LockUnsettingSetByAnother, func);
175 }
176 return __kmp_release_tas_lock(lck, gtid);
177 }
178
__kmp_init_tas_lock(kmp_tas_lock_t * lck)179 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
180 lck->lk.poll = KMP_LOCK_FREE(tas);
181 }
182
__kmp_destroy_tas_lock(kmp_tas_lock_t * lck)183 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
184
__kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t * lck)185 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
186 char const *const func = "omp_destroy_lock";
187 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
188 __kmp_is_tas_lock_nestable(lck)) {
189 KMP_FATAL(LockNestableUsedAsSimple, func);
190 }
191 if (__kmp_get_tas_lock_owner(lck) != -1) {
192 KMP_FATAL(LockStillOwned, func);
193 }
194 __kmp_destroy_tas_lock(lck);
195 }
196
197 // nested test and set locks
198
__kmp_acquire_nested_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)199 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
200 KMP_DEBUG_ASSERT(gtid >= 0);
201
202 if (__kmp_get_tas_lock_owner(lck) == gtid) {
203 lck->lk.depth_locked += 1;
204 return KMP_LOCK_ACQUIRED_NEXT;
205 } else {
206 __kmp_acquire_tas_lock_timed_template(lck, gtid);
207 lck->lk.depth_locked = 1;
208 return KMP_LOCK_ACQUIRED_FIRST;
209 }
210 }
211
__kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)212 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
213 kmp_int32 gtid) {
214 char const *const func = "omp_set_nest_lock";
215 if (!__kmp_is_tas_lock_nestable(lck)) {
216 KMP_FATAL(LockSimpleUsedAsNestable, func);
217 }
218 return __kmp_acquire_nested_tas_lock(lck, gtid);
219 }
220
__kmp_test_nested_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)221 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
222 int retval;
223
224 KMP_DEBUG_ASSERT(gtid >= 0);
225
226 if (__kmp_get_tas_lock_owner(lck) == gtid) {
227 retval = ++lck->lk.depth_locked;
228 } else if (!__kmp_test_tas_lock(lck, gtid)) {
229 retval = 0;
230 } else {
231 KMP_MB();
232 retval = lck->lk.depth_locked = 1;
233 }
234 return retval;
235 }
236
__kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)237 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
238 kmp_int32 gtid) {
239 char const *const func = "omp_test_nest_lock";
240 if (!__kmp_is_tas_lock_nestable(lck)) {
241 KMP_FATAL(LockSimpleUsedAsNestable, func);
242 }
243 return __kmp_test_nested_tas_lock(lck, gtid);
244 }
245
__kmp_release_nested_tas_lock(kmp_tas_lock_t * lck,kmp_int32 gtid)246 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
247 KMP_DEBUG_ASSERT(gtid >= 0);
248
249 KMP_MB();
250 if (--(lck->lk.depth_locked) == 0) {
251 __kmp_release_tas_lock(lck, gtid);
252 return KMP_LOCK_RELEASED;
253 }
254 return KMP_LOCK_STILL_HELD;
255 }
256
__kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t * lck,kmp_int32 gtid)257 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
258 kmp_int32 gtid) {
259 char const *const func = "omp_unset_nest_lock";
260 KMP_MB(); /* in case another processor initialized lock */
261 if (!__kmp_is_tas_lock_nestable(lck)) {
262 KMP_FATAL(LockSimpleUsedAsNestable, func);
263 }
264 if (__kmp_get_tas_lock_owner(lck) == -1) {
265 KMP_FATAL(LockUnsettingFree, func);
266 }
267 if (__kmp_get_tas_lock_owner(lck) != gtid) {
268 KMP_FATAL(LockUnsettingSetByAnother, func);
269 }
270 return __kmp_release_nested_tas_lock(lck, gtid);
271 }
272
__kmp_init_nested_tas_lock(kmp_tas_lock_t * lck)273 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
274 __kmp_init_tas_lock(lck);
275 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
276 }
277
__kmp_destroy_nested_tas_lock(kmp_tas_lock_t * lck)278 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
279 __kmp_destroy_tas_lock(lck);
280 lck->lk.depth_locked = 0;
281 }
282
__kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t * lck)283 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
284 char const *const func = "omp_destroy_nest_lock";
285 if (!__kmp_is_tas_lock_nestable(lck)) {
286 KMP_FATAL(LockSimpleUsedAsNestable, func);
287 }
288 if (__kmp_get_tas_lock_owner(lck) != -1) {
289 KMP_FATAL(LockStillOwned, func);
290 }
291 __kmp_destroy_nested_tas_lock(lck);
292 }
293
294 #if KMP_USE_FUTEX
295
296 /* ------------------------------------------------------------------------ */
297 /* futex locks */
298
299 // futex locks are really just test and set locks, with a different method
300 // of handling contention. They take the same amount of space as test and
301 // set locks, and are allocated the same way (i.e. use the area allocated by
302 // the compiler for non-nested locks / allocate nested locks on the heap).
303
__kmp_get_futex_lock_owner(kmp_futex_lock_t * lck)304 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
305 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
306 }
307
__kmp_is_futex_lock_nestable(kmp_futex_lock_t * lck)308 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
309 return lck->lk.depth_locked != -1;
310 }
311
312 __forceinline static int
__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t * lck,kmp_int32 gtid)313 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
314 kmp_int32 gtid_code = (gtid + 1) << 1;
315
316 KMP_MB();
317
318 #ifdef USE_LOCK_PROFILE
319 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
320 if ((curr != 0) && (curr != gtid_code))
321 __kmp_printf("LOCK CONTENTION: %p\n", lck);
322 /* else __kmp_printf( "." );*/
323 #endif /* USE_LOCK_PROFILE */
324
325 KMP_FSYNC_PREPARE(lck);
326 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
327 lck, lck->lk.poll, gtid));
328
329 kmp_int32 poll_val;
330
331 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
332 &(lck->lk.poll), KMP_LOCK_FREE(futex),
333 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
334
335 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
336 KA_TRACE(
337 1000,
338 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
339 lck, gtid, poll_val, cond));
340
341 // NOTE: if you try to use the following condition for this branch
342 //
343 // if ( poll_val & 1 == 0 )
344 //
345 // Then the 12.0 compiler has a bug where the following block will
346 // always be skipped, regardless of the value of the LSB of poll_val.
347 if (!cond) {
348 // Try to set the lsb in the poll to indicate to the owner
349 // thread that they need to wake this thread up.
350 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
351 poll_val | KMP_LOCK_BUSY(1, futex))) {
352 KA_TRACE(
353 1000,
354 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
355 lck, lck->lk.poll, gtid));
356 continue;
357 }
358 poll_val |= KMP_LOCK_BUSY(1, futex);
359
360 KA_TRACE(1000,
361 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
362 lck->lk.poll, gtid));
363 }
364
365 KA_TRACE(
366 1000,
367 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
368 lck, gtid, poll_val));
369
370 long rc;
371 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
372 NULL, 0)) != 0) {
373 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
374 "failed (rc=%ld errno=%d)\n",
375 lck, gtid, poll_val, rc, errno));
376 continue;
377 }
378
379 KA_TRACE(1000,
380 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
381 lck, gtid, poll_val));
382 // This thread has now done a successful futex wait call and was entered on
383 // the OS futex queue. We must now perform a futex wake call when releasing
384 // the lock, as we have no idea how many other threads are in the queue.
385 gtid_code |= 1;
386 }
387
388 KMP_FSYNC_ACQUIRED(lck);
389 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
390 lck->lk.poll, gtid));
391 return KMP_LOCK_ACQUIRED_FIRST;
392 }
393
__kmp_acquire_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)394 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
395 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
396 return retval;
397 }
398
__kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)399 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
400 kmp_int32 gtid) {
401 char const *const func = "omp_set_lock";
402 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
403 __kmp_is_futex_lock_nestable(lck)) {
404 KMP_FATAL(LockNestableUsedAsSimple, func);
405 }
406 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
407 KMP_FATAL(LockIsAlreadyOwned, func);
408 }
409 return __kmp_acquire_futex_lock(lck, gtid);
410 }
411
__kmp_test_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)412 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
413 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
414 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
415 KMP_FSYNC_ACQUIRED(lck);
416 return TRUE;
417 }
418 return FALSE;
419 }
420
__kmp_test_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)421 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
422 kmp_int32 gtid) {
423 char const *const func = "omp_test_lock";
424 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
425 __kmp_is_futex_lock_nestable(lck)) {
426 KMP_FATAL(LockNestableUsedAsSimple, func);
427 }
428 return __kmp_test_futex_lock(lck, gtid);
429 }
430
__kmp_release_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)431 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
432 KMP_MB(); /* Flush all pending memory write invalidates. */
433
434 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
435 lck, lck->lk.poll, gtid));
436
437 KMP_FSYNC_RELEASING(lck);
438
439 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
440
441 KA_TRACE(1000,
442 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
443 lck, gtid, poll_val));
444
445 if (KMP_LOCK_STRIP(poll_val) & 1) {
446 KA_TRACE(1000,
447 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
448 lck, gtid));
449 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
450 NULL, NULL, 0);
451 }
452
453 KMP_MB(); /* Flush all pending memory write invalidates. */
454
455 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
456 lck->lk.poll, gtid));
457
458 KMP_YIELD_OVERSUB();
459 return KMP_LOCK_RELEASED;
460 }
461
__kmp_release_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)462 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
463 kmp_int32 gtid) {
464 char const *const func = "omp_unset_lock";
465 KMP_MB(); /* in case another processor initialized lock */
466 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
467 __kmp_is_futex_lock_nestable(lck)) {
468 KMP_FATAL(LockNestableUsedAsSimple, func);
469 }
470 if (__kmp_get_futex_lock_owner(lck) == -1) {
471 KMP_FATAL(LockUnsettingFree, func);
472 }
473 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
474 (__kmp_get_futex_lock_owner(lck) != gtid)) {
475 KMP_FATAL(LockUnsettingSetByAnother, func);
476 }
477 return __kmp_release_futex_lock(lck, gtid);
478 }
479
__kmp_init_futex_lock(kmp_futex_lock_t * lck)480 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
481 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
482 }
483
__kmp_destroy_futex_lock(kmp_futex_lock_t * lck)484 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
485
__kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t * lck)486 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
487 char const *const func = "omp_destroy_lock";
488 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
489 __kmp_is_futex_lock_nestable(lck)) {
490 KMP_FATAL(LockNestableUsedAsSimple, func);
491 }
492 if (__kmp_get_futex_lock_owner(lck) != -1) {
493 KMP_FATAL(LockStillOwned, func);
494 }
495 __kmp_destroy_futex_lock(lck);
496 }
497
498 // nested futex locks
499
__kmp_acquire_nested_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)500 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
501 KMP_DEBUG_ASSERT(gtid >= 0);
502
503 if (__kmp_get_futex_lock_owner(lck) == gtid) {
504 lck->lk.depth_locked += 1;
505 return KMP_LOCK_ACQUIRED_NEXT;
506 } else {
507 __kmp_acquire_futex_lock_timed_template(lck, gtid);
508 lck->lk.depth_locked = 1;
509 return KMP_LOCK_ACQUIRED_FIRST;
510 }
511 }
512
__kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)513 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
514 kmp_int32 gtid) {
515 char const *const func = "omp_set_nest_lock";
516 if (!__kmp_is_futex_lock_nestable(lck)) {
517 KMP_FATAL(LockSimpleUsedAsNestable, func);
518 }
519 return __kmp_acquire_nested_futex_lock(lck, gtid);
520 }
521
__kmp_test_nested_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)522 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
523 int retval;
524
525 KMP_DEBUG_ASSERT(gtid >= 0);
526
527 if (__kmp_get_futex_lock_owner(lck) == gtid) {
528 retval = ++lck->lk.depth_locked;
529 } else if (!__kmp_test_futex_lock(lck, gtid)) {
530 retval = 0;
531 } else {
532 KMP_MB();
533 retval = lck->lk.depth_locked = 1;
534 }
535 return retval;
536 }
537
__kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)538 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
539 kmp_int32 gtid) {
540 char const *const func = "omp_test_nest_lock";
541 if (!__kmp_is_futex_lock_nestable(lck)) {
542 KMP_FATAL(LockSimpleUsedAsNestable, func);
543 }
544 return __kmp_test_nested_futex_lock(lck, gtid);
545 }
546
__kmp_release_nested_futex_lock(kmp_futex_lock_t * lck,kmp_int32 gtid)547 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
548 KMP_DEBUG_ASSERT(gtid >= 0);
549
550 KMP_MB();
551 if (--(lck->lk.depth_locked) == 0) {
552 __kmp_release_futex_lock(lck, gtid);
553 return KMP_LOCK_RELEASED;
554 }
555 return KMP_LOCK_STILL_HELD;
556 }
557
__kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t * lck,kmp_int32 gtid)558 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
559 kmp_int32 gtid) {
560 char const *const func = "omp_unset_nest_lock";
561 KMP_MB(); /* in case another processor initialized lock */
562 if (!__kmp_is_futex_lock_nestable(lck)) {
563 KMP_FATAL(LockSimpleUsedAsNestable, func);
564 }
565 if (__kmp_get_futex_lock_owner(lck) == -1) {
566 KMP_FATAL(LockUnsettingFree, func);
567 }
568 if (__kmp_get_futex_lock_owner(lck) != gtid) {
569 KMP_FATAL(LockUnsettingSetByAnother, func);
570 }
571 return __kmp_release_nested_futex_lock(lck, gtid);
572 }
573
__kmp_init_nested_futex_lock(kmp_futex_lock_t * lck)574 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
575 __kmp_init_futex_lock(lck);
576 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
577 }
578
__kmp_destroy_nested_futex_lock(kmp_futex_lock_t * lck)579 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
580 __kmp_destroy_futex_lock(lck);
581 lck->lk.depth_locked = 0;
582 }
583
__kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t * lck)584 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
585 char const *const func = "omp_destroy_nest_lock";
586 if (!__kmp_is_futex_lock_nestable(lck)) {
587 KMP_FATAL(LockSimpleUsedAsNestable, func);
588 }
589 if (__kmp_get_futex_lock_owner(lck) != -1) {
590 KMP_FATAL(LockStillOwned, func);
591 }
592 __kmp_destroy_nested_futex_lock(lck);
593 }
594
595 #endif // KMP_USE_FUTEX
596
597 /* ------------------------------------------------------------------------ */
598 /* ticket (bakery) locks */
599
__kmp_get_ticket_lock_owner(kmp_ticket_lock_t * lck)600 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
601 return std::atomic_load_explicit(&lck->lk.owner_id,
602 std::memory_order_relaxed) -
603 1;
604 }
605
__kmp_is_ticket_lock_nestable(kmp_ticket_lock_t * lck)606 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
607 return std::atomic_load_explicit(&lck->lk.depth_locked,
608 std::memory_order_relaxed) != -1;
609 }
610
__kmp_bakery_check(void * now_serving,kmp_uint32 my_ticket)611 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
612 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
613 std::memory_order_acquire) == my_ticket;
614 }
615
616 __forceinline static int
__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t * lck,kmp_int32 gtid)617 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
618 kmp_int32 gtid) {
619 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
620 &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
621
622 #ifdef USE_LOCK_PROFILE
623 if (std::atomic_load_explicit(&lck->lk.now_serving,
624 std::memory_order_relaxed) != my_ticket)
625 __kmp_printf("LOCK CONTENTION: %p\n", lck);
626 /* else __kmp_printf( "." );*/
627 #endif /* USE_LOCK_PROFILE */
628
629 if (std::atomic_load_explicit(&lck->lk.now_serving,
630 std::memory_order_acquire) == my_ticket) {
631 return KMP_LOCK_ACQUIRED_FIRST;
632 }
633 KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
634 return KMP_LOCK_ACQUIRED_FIRST;
635 }
636
__kmp_acquire_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)637 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
638 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
639 return retval;
640 }
641
__kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)642 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
643 kmp_int32 gtid) {
644 char const *const func = "omp_set_lock";
645
646 if (!std::atomic_load_explicit(&lck->lk.initialized,
647 std::memory_order_relaxed)) {
648 KMP_FATAL(LockIsUninitialized, func);
649 }
650 if (lck->lk.self != lck) {
651 KMP_FATAL(LockIsUninitialized, func);
652 }
653 if (__kmp_is_ticket_lock_nestable(lck)) {
654 KMP_FATAL(LockNestableUsedAsSimple, func);
655 }
656 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
657 KMP_FATAL(LockIsAlreadyOwned, func);
658 }
659
660 __kmp_acquire_ticket_lock(lck, gtid);
661
662 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
663 std::memory_order_relaxed);
664 return KMP_LOCK_ACQUIRED_FIRST;
665 }
666
__kmp_test_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)667 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
668 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
669 std::memory_order_relaxed);
670
671 if (std::atomic_load_explicit(&lck->lk.now_serving,
672 std::memory_order_relaxed) == my_ticket) {
673 kmp_uint32 next_ticket = my_ticket + 1;
674 if (std::atomic_compare_exchange_strong_explicit(
675 &lck->lk.next_ticket, &my_ticket, next_ticket,
676 std::memory_order_acquire, std::memory_order_acquire)) {
677 return TRUE;
678 }
679 }
680 return FALSE;
681 }
682
__kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)683 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
684 kmp_int32 gtid) {
685 char const *const func = "omp_test_lock";
686
687 if (!std::atomic_load_explicit(&lck->lk.initialized,
688 std::memory_order_relaxed)) {
689 KMP_FATAL(LockIsUninitialized, func);
690 }
691 if (lck->lk.self != lck) {
692 KMP_FATAL(LockIsUninitialized, func);
693 }
694 if (__kmp_is_ticket_lock_nestable(lck)) {
695 KMP_FATAL(LockNestableUsedAsSimple, func);
696 }
697
698 int retval = __kmp_test_ticket_lock(lck, gtid);
699
700 if (retval) {
701 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
702 std::memory_order_relaxed);
703 }
704 return retval;
705 }
706
__kmp_release_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)707 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
708 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
709 std::memory_order_relaxed) -
710 std::atomic_load_explicit(&lck->lk.now_serving,
711 std::memory_order_relaxed);
712
713 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
714 std::memory_order_release);
715
716 KMP_YIELD(distance >
717 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
718 return KMP_LOCK_RELEASED;
719 }
720
__kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)721 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722 kmp_int32 gtid) {
723 char const *const func = "omp_unset_lock";
724
725 if (!std::atomic_load_explicit(&lck->lk.initialized,
726 std::memory_order_relaxed)) {
727 KMP_FATAL(LockIsUninitialized, func);
728 }
729 if (lck->lk.self != lck) {
730 KMP_FATAL(LockIsUninitialized, func);
731 }
732 if (__kmp_is_ticket_lock_nestable(lck)) {
733 KMP_FATAL(LockNestableUsedAsSimple, func);
734 }
735 if (__kmp_get_ticket_lock_owner(lck) == -1) {
736 KMP_FATAL(LockUnsettingFree, func);
737 }
738 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739 (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740 KMP_FATAL(LockUnsettingSetByAnother, func);
741 }
742 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743 return __kmp_release_ticket_lock(lck, gtid);
744 }
745
__kmp_init_ticket_lock(kmp_ticket_lock_t * lck)746 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747 lck->lk.location = NULL;
748 lck->lk.self = lck;
749 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750 std::memory_order_relaxed);
751 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752 std::memory_order_relaxed);
753 std::atomic_store_explicit(
754 &lck->lk.owner_id, 0,
755 std::memory_order_relaxed); // no thread owns the lock.
756 std::atomic_store_explicit(
757 &lck->lk.depth_locked, -1,
758 std::memory_order_relaxed); // -1 => not a nested lock.
759 std::atomic_store_explicit(&lck->lk.initialized, true,
760 std::memory_order_release);
761 }
762
__kmp_destroy_ticket_lock(kmp_ticket_lock_t * lck)763 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764 std::atomic_store_explicit(&lck->lk.initialized, false,
765 std::memory_order_release);
766 lck->lk.self = NULL;
767 lck->lk.location = NULL;
768 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769 std::memory_order_relaxed);
770 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771 std::memory_order_relaxed);
772 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773 std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774 std::memory_order_relaxed);
775 }
776
__kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t * lck)777 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778 char const *const func = "omp_destroy_lock";
779
780 if (!std::atomic_load_explicit(&lck->lk.initialized,
781 std::memory_order_relaxed)) {
782 KMP_FATAL(LockIsUninitialized, func);
783 }
784 if (lck->lk.self != lck) {
785 KMP_FATAL(LockIsUninitialized, func);
786 }
787 if (__kmp_is_ticket_lock_nestable(lck)) {
788 KMP_FATAL(LockNestableUsedAsSimple, func);
789 }
790 if (__kmp_get_ticket_lock_owner(lck) != -1) {
791 KMP_FATAL(LockStillOwned, func);
792 }
793 __kmp_destroy_ticket_lock(lck);
794 }
795
796 // nested ticket locks
797
__kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)798 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799 KMP_DEBUG_ASSERT(gtid >= 0);
800
801 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803 std::memory_order_relaxed);
804 return KMP_LOCK_ACQUIRED_NEXT;
805 } else {
806 __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808 std::memory_order_relaxed);
809 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810 std::memory_order_relaxed);
811 return KMP_LOCK_ACQUIRED_FIRST;
812 }
813 }
814
__kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)815 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816 kmp_int32 gtid) {
817 char const *const func = "omp_set_nest_lock";
818
819 if (!std::atomic_load_explicit(&lck->lk.initialized,
820 std::memory_order_relaxed)) {
821 KMP_FATAL(LockIsUninitialized, func);
822 }
823 if (lck->lk.self != lck) {
824 KMP_FATAL(LockIsUninitialized, func);
825 }
826 if (!__kmp_is_ticket_lock_nestable(lck)) {
827 KMP_FATAL(LockSimpleUsedAsNestable, func);
828 }
829 return __kmp_acquire_nested_ticket_lock(lck, gtid);
830 }
831
__kmp_test_nested_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)832 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833 int retval;
834
835 KMP_DEBUG_ASSERT(gtid >= 0);
836
837 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839 std::memory_order_relaxed) +
840 1;
841 } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842 retval = 0;
843 } else {
844 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845 std::memory_order_relaxed);
846 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847 std::memory_order_relaxed);
848 retval = 1;
849 }
850 return retval;
851 }
852
__kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)853 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854 kmp_int32 gtid) {
855 char const *const func = "omp_test_nest_lock";
856
857 if (!std::atomic_load_explicit(&lck->lk.initialized,
858 std::memory_order_relaxed)) {
859 KMP_FATAL(LockIsUninitialized, func);
860 }
861 if (lck->lk.self != lck) {
862 KMP_FATAL(LockIsUninitialized, func);
863 }
864 if (!__kmp_is_ticket_lock_nestable(lck)) {
865 KMP_FATAL(LockSimpleUsedAsNestable, func);
866 }
867 return __kmp_test_nested_ticket_lock(lck, gtid);
868 }
869
__kmp_release_nested_ticket_lock(kmp_ticket_lock_t * lck,kmp_int32 gtid)870 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871 KMP_DEBUG_ASSERT(gtid >= 0);
872
873 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874 std::memory_order_relaxed) -
875 1) == 0) {
876 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877 __kmp_release_ticket_lock(lck, gtid);
878 return KMP_LOCK_RELEASED;
879 }
880 return KMP_LOCK_STILL_HELD;
881 }
882
__kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck,kmp_int32 gtid)883 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884 kmp_int32 gtid) {
885 char const *const func = "omp_unset_nest_lock";
886
887 if (!std::atomic_load_explicit(&lck->lk.initialized,
888 std::memory_order_relaxed)) {
889 KMP_FATAL(LockIsUninitialized, func);
890 }
891 if (lck->lk.self != lck) {
892 KMP_FATAL(LockIsUninitialized, func);
893 }
894 if (!__kmp_is_ticket_lock_nestable(lck)) {
895 KMP_FATAL(LockSimpleUsedAsNestable, func);
896 }
897 if (__kmp_get_ticket_lock_owner(lck) == -1) {
898 KMP_FATAL(LockUnsettingFree, func);
899 }
900 if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901 KMP_FATAL(LockUnsettingSetByAnother, func);
902 }
903 return __kmp_release_nested_ticket_lock(lck, gtid);
904 }
905
__kmp_init_nested_ticket_lock(kmp_ticket_lock_t * lck)906 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907 __kmp_init_ticket_lock(lck);
908 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909 std::memory_order_relaxed);
910 // >= 0 for nestable locks, -1 for simple locks
911 }
912
__kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t * lck)913 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914 __kmp_destroy_ticket_lock(lck);
915 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916 std::memory_order_relaxed);
917 }
918
919 static void
__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck)920 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921 char const *const func = "omp_destroy_nest_lock";
922
923 if (!std::atomic_load_explicit(&lck->lk.initialized,
924 std::memory_order_relaxed)) {
925 KMP_FATAL(LockIsUninitialized, func);
926 }
927 if (lck->lk.self != lck) {
928 KMP_FATAL(LockIsUninitialized, func);
929 }
930 if (!__kmp_is_ticket_lock_nestable(lck)) {
931 KMP_FATAL(LockSimpleUsedAsNestable, func);
932 }
933 if (__kmp_get_ticket_lock_owner(lck) != -1) {
934 KMP_FATAL(LockStillOwned, func);
935 }
936 __kmp_destroy_nested_ticket_lock(lck);
937 }
938
939 // access functions to fields which don't exist for all lock kinds.
940
__kmp_get_ticket_lock_location(kmp_ticket_lock_t * lck)941 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942 return lck->lk.location;
943 }
944
__kmp_set_ticket_lock_location(kmp_ticket_lock_t * lck,const ident_t * loc)945 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946 const ident_t *loc) {
947 lck->lk.location = loc;
948 }
949
__kmp_get_ticket_lock_flags(kmp_ticket_lock_t * lck)950 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951 return lck->lk.flags;
952 }
953
__kmp_set_ticket_lock_flags(kmp_ticket_lock_t * lck,kmp_lock_flags_t flags)954 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955 kmp_lock_flags_t flags) {
956 lck->lk.flags = flags;
957 }
958
959 /* ------------------------------------------------------------------------ */
960 /* queuing locks */
961
962 /* First the states
963 (head,tail) = 0, 0 means lock is unheld, nobody on queue
964 UINT_MAX or -1, 0 means lock is held, nobody on queue
965 h, h means lock held or about to transition,
966 1 element on queue
967 h, t h <> t, means lock is held or about to
968 transition, >1 elements on queue
969
970 Now the transitions
971 Acquire(0,0) = -1 ,0
972 Release(0,0) = Error
973 Acquire(-1,0) = h ,h h > 0
974 Release(-1,0) = 0 ,0
975 Acquire(h,h) = h ,t h > 0, t > 0, h <> t
976 Release(h,h) = -1 ,0 h > 0
977 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
979
980 And pictorially
981
982 +-----+
983 | 0, 0|------- release -------> Error
984 +-----+
985 | ^
986 acquire| |release
987 | |
988 | |
989 v |
990 +-----+
991 |-1, 0|
992 +-----+
993 | ^
994 acquire| |release
995 | |
996 | |
997 v |
998 +-----+
999 | h, h|
1000 +-----+
1001 | ^
1002 acquire| |release
1003 | |
1004 | |
1005 v |
1006 +-----+
1007 | h, t|----- acquire, release loopback ---+
1008 +-----+ |
1009 ^ |
1010 | |
1011 +------------------------------------+
1012 */
1013
1014 #ifdef DEBUG_QUEUING_LOCKS
1015
1016 /* Stuff for circular trace buffer */
1017 #define TRACE_BUF_ELE 1024
1018 static char traces[TRACE_BUF_ELE][128] = {0};
1019 static int tc = 0;
1020 #define TRACE_LOCK(X, Y) \
1021 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022 #define TRACE_LOCK_T(X, Y, Z) \
1023 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1025 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1026 Z, Q);
1027
__kmp_dump_queuing_lock(kmp_info_t * this_thr,kmp_int32 gtid,kmp_queuing_lock_t * lck,kmp_int32 head_id,kmp_int32 tail_id)1028 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029 kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030 kmp_int32 tail_id) {
1031 kmp_int32 t, i;
1032
1033 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034
1035 i = tc % TRACE_BUF_ELE;
1036 __kmp_printf_no_lock("%s\n", traces[i]);
1037 i = (i + 1) % TRACE_BUF_ELE;
1038 while (i != (tc % TRACE_BUF_ELE)) {
1039 __kmp_printf_no_lock("%s", traces[i]);
1040 i = (i + 1) % TRACE_BUF_ELE;
1041 }
1042 __kmp_printf_no_lock("\n");
1043
1044 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045 "next_wait:%d, head_id:%d, tail_id:%d\n",
1046 gtid + 1, this_thr->th.th_spin_here,
1047 this_thr->th.th_next_waiting, head_id, tail_id);
1048
1049 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050
1051 if (lck->lk.head_id >= 1) {
1052 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053 while (t > 0) {
1054 __kmp_printf_no_lock("-> %d ", t);
1055 t = __kmp_threads[t - 1]->th.th_next_waiting;
1056 }
1057 }
1058 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1059 __kmp_printf_no_lock("\n\n");
1060 }
1061
1062 #endif /* DEBUG_QUEUING_LOCKS */
1063
__kmp_get_queuing_lock_owner(kmp_queuing_lock_t * lck)1064 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065 return TCR_4(lck->lk.owner_id) - 1;
1066 }
1067
__kmp_is_queuing_lock_nestable(kmp_queuing_lock_t * lck)1068 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069 return lck->lk.depth_locked != -1;
1070 }
1071
1072 /* Acquire a lock using a the queuing lock implementation */
1073 template <bool takeTime>
1074 /* [TLW] The unused template above is left behind because of what BEB believes
1075 is a potential compiler problem with __forceinline. */
1076 __forceinline static int
__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t * lck,kmp_int32 gtid)1077 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078 kmp_int32 gtid) {
1079 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082 volatile kmp_uint32 *spin_here_p;
1083
1084 #if OMPT_SUPPORT
1085 ompt_state_t prev_state = ompt_state_undefined;
1086 #endif
1087
1088 KA_TRACE(1000,
1089 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090
1091 KMP_FSYNC_PREPARE(lck);
1092 KMP_DEBUG_ASSERT(this_thr != NULL);
1093 spin_here_p = &this_thr->th.th_spin_here;
1094
1095 #ifdef DEBUG_QUEUING_LOCKS
1096 TRACE_LOCK(gtid + 1, "acq ent");
1097 if (*spin_here_p)
1098 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099 if (this_thr->th.th_next_waiting != 0)
1100 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101 #endif
1102 KMP_DEBUG_ASSERT(!*spin_here_p);
1103 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104
1105 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106 head_id_p that may follow, not just in execution order, but also in
1107 visibility order. This way, when a releasing thread observes the changes to
1108 the queue by this thread, it can rightly assume that spin_here_p has
1109 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110 not premature. If the releasing thread sets spin_here_p to FALSE before
1111 this thread sets it to TRUE, this thread will hang. */
1112 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113
1114 while (1) {
1115 kmp_int32 enqueued;
1116 kmp_int32 head;
1117 kmp_int32 tail;
1118
1119 head = *head_id_p;
1120
1121 switch (head) {
1122
1123 case -1: {
1124 #ifdef DEBUG_QUEUING_LOCKS
1125 tail = *tail_id_p;
1126 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127 #endif
1128 tail = 0; /* to make sure next link asynchronously read is not set
1129 accidentally; this assignment prevents us from entering the
1130 if ( t > 0 ) condition in the enqueued case below, which is not
1131 necessary for this state transition */
1132
1133 /* try (-1,0)->(tid,tid) */
1134 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135 KMP_PACK_64(-1, 0),
1136 KMP_PACK_64(gtid + 1, gtid + 1));
1137 #ifdef DEBUG_QUEUING_LOCKS
1138 if (enqueued)
1139 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140 #endif
1141 } break;
1142
1143 default: {
1144 tail = *tail_id_p;
1145 KMP_DEBUG_ASSERT(tail != gtid + 1);
1146
1147 #ifdef DEBUG_QUEUING_LOCKS
1148 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149 #endif
1150
1151 if (tail == 0) {
1152 enqueued = FALSE;
1153 } else {
1154 /* try (h,t) or (h,h)->(h,tid) */
1155 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156
1157 #ifdef DEBUG_QUEUING_LOCKS
1158 if (enqueued)
1159 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160 #endif
1161 }
1162 } break;
1163
1164 case 0: /* empty queue */
1165 {
1166 kmp_int32 grabbed_lock;
1167
1168 #ifdef DEBUG_QUEUING_LOCKS
1169 tail = *tail_id_p;
1170 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171 #endif
1172 /* try (0,0)->(-1,0) */
1173
1174 /* only legal transition out of head = 0 is head = -1 with no change to
1175 * tail */
1176 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177
1178 if (grabbed_lock) {
1179
1180 *spin_here_p = FALSE;
1181
1182 KA_TRACE(
1183 1000,
1184 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185 lck, gtid));
1186 #ifdef DEBUG_QUEUING_LOCKS
1187 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188 #endif
1189
1190 #if OMPT_SUPPORT
1191 if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192 /* change the state before clearing wait_id */
1193 this_thr->th.ompt_thread_info.state = prev_state;
1194 this_thr->th.ompt_thread_info.wait_id = 0;
1195 }
1196 #endif
1197
1198 KMP_FSYNC_ACQUIRED(lck);
1199 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200 }
1201 enqueued = FALSE;
1202 } break;
1203 }
1204
1205 #if OMPT_SUPPORT
1206 if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207 /* this thread will spin; set wait_id before entering wait state */
1208 prev_state = this_thr->th.ompt_thread_info.state;
1209 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211 }
1212 #endif
1213
1214 if (enqueued) {
1215 if (tail > 0) {
1216 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217 KMP_ASSERT(tail_thr != NULL);
1218 tail_thr->th.th_next_waiting = gtid + 1;
1219 /* corresponding wait for this write in release code */
1220 }
1221 KA_TRACE(1000,
1222 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223 lck, gtid));
1224
1225 KMP_MB();
1226 // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227 KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228 // Synchronize writes to both runtime thread structures
1229 // and writes in user code.
1230 KMP_MB();
1231
1232 #ifdef DEBUG_QUEUING_LOCKS
1233 TRACE_LOCK(gtid + 1, "acq spin");
1234
1235 if (this_thr->th.th_next_waiting != 0)
1236 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237 #endif
1238 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240 "waiting on queue\n",
1241 lck, gtid));
1242
1243 #ifdef DEBUG_QUEUING_LOCKS
1244 TRACE_LOCK(gtid + 1, "acq exit 2");
1245 #endif
1246
1247 #if OMPT_SUPPORT
1248 /* change the state before clearing wait_id */
1249 this_thr->th.ompt_thread_info.state = prev_state;
1250 this_thr->th.ompt_thread_info.wait_id = 0;
1251 #endif
1252
1253 /* got lock, we were dequeued by the thread that released lock */
1254 return KMP_LOCK_ACQUIRED_FIRST;
1255 }
1256
1257 /* Yield if number of threads > number of logical processors */
1258 /* ToDo: Not sure why this should only be in oversubscription case,
1259 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260 KMP_YIELD_OVERSUB();
1261
1262 #ifdef DEBUG_QUEUING_LOCKS
1263 TRACE_LOCK(gtid + 1, "acq retry");
1264 #endif
1265 }
1266 KMP_ASSERT2(0, "should not get here");
1267 return KMP_LOCK_ACQUIRED_FIRST;
1268 }
1269
__kmp_acquire_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)1270 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271 KMP_DEBUG_ASSERT(gtid >= 0);
1272
1273 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274 return retval;
1275 }
1276
__kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)1277 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278 kmp_int32 gtid) {
1279 char const *const func = "omp_set_lock";
1280 if (lck->lk.initialized != lck) {
1281 KMP_FATAL(LockIsUninitialized, func);
1282 }
1283 if (__kmp_is_queuing_lock_nestable(lck)) {
1284 KMP_FATAL(LockNestableUsedAsSimple, func);
1285 }
1286 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287 KMP_FATAL(LockIsAlreadyOwned, func);
1288 }
1289
1290 __kmp_acquire_queuing_lock(lck, gtid);
1291
1292 lck->lk.owner_id = gtid + 1;
1293 return KMP_LOCK_ACQUIRED_FIRST;
1294 }
1295
__kmp_test_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)1296 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298 kmp_int32 head;
1299 #ifdef KMP_DEBUG
1300 kmp_info_t *this_thr;
1301 #endif
1302
1303 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304 KMP_DEBUG_ASSERT(gtid >= 0);
1305 #ifdef KMP_DEBUG
1306 this_thr = __kmp_thread_from_gtid(gtid);
1307 KMP_DEBUG_ASSERT(this_thr != NULL);
1308 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309 #endif
1310
1311 head = *head_id_p;
1312
1313 if (head == 0) { /* nobody on queue, nobody holding */
1314 /* try (0,0)->(-1,0) */
1315 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316 KA_TRACE(1000,
1317 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318 KMP_FSYNC_ACQUIRED(lck);
1319 return TRUE;
1320 }
1321 }
1322
1323 KA_TRACE(1000,
1324 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325 return FALSE;
1326 }
1327
__kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)1328 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329 kmp_int32 gtid) {
1330 char const *const func = "omp_test_lock";
1331 if (lck->lk.initialized != lck) {
1332 KMP_FATAL(LockIsUninitialized, func);
1333 }
1334 if (__kmp_is_queuing_lock_nestable(lck)) {
1335 KMP_FATAL(LockNestableUsedAsSimple, func);
1336 }
1337
1338 int retval = __kmp_test_queuing_lock(lck, gtid);
1339
1340 if (retval) {
1341 lck->lk.owner_id = gtid + 1;
1342 }
1343 return retval;
1344 }
1345
__kmp_release_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)1346 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347 kmp_info_t *this_thr;
1348 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1349 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1350
1351 KA_TRACE(1000,
1352 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1353 KMP_DEBUG_ASSERT(gtid >= 0);
1354 this_thr = __kmp_thread_from_gtid(gtid);
1355 KMP_DEBUG_ASSERT(this_thr != NULL);
1356 #ifdef DEBUG_QUEUING_LOCKS
1357 TRACE_LOCK(gtid + 1, "rel ent");
1358
1359 if (this_thr->th.th_spin_here)
1360 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1361 if (this_thr->th.th_next_waiting != 0)
1362 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1363 #endif
1364 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1365 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1366
1367 KMP_FSYNC_RELEASING(lck);
1368
1369 while (1) {
1370 kmp_int32 dequeued;
1371 kmp_int32 head;
1372 kmp_int32 tail;
1373
1374 head = *head_id_p;
1375
1376 #ifdef DEBUG_QUEUING_LOCKS
1377 tail = *tail_id_p;
1378 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1379 if (head == 0)
1380 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1381 #endif
1382 KMP_DEBUG_ASSERT(head !=
1383 0); /* holding the lock, head must be -1 or queue head */
1384
1385 if (head == -1) { /* nobody on queue */
1386 /* try (-1,0)->(0,0) */
1387 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1388 KA_TRACE(
1389 1000,
1390 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1391 lck, gtid));
1392 #ifdef DEBUG_QUEUING_LOCKS
1393 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1394 #endif
1395
1396 #if OMPT_SUPPORT
1397 /* nothing to do - no other thread is trying to shift blame */
1398 #endif
1399 return KMP_LOCK_RELEASED;
1400 }
1401 dequeued = FALSE;
1402 } else {
1403 KMP_MB();
1404 tail = *tail_id_p;
1405 if (head == tail) { /* only one thread on the queue */
1406 #ifdef DEBUG_QUEUING_LOCKS
1407 if (head <= 0)
1408 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1409 #endif
1410 KMP_DEBUG_ASSERT(head > 0);
1411
1412 /* try (h,h)->(-1,0) */
1413 dequeued = KMP_COMPARE_AND_STORE_REL64(
1414 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1415 KMP_PACK_64(-1, 0));
1416 #ifdef DEBUG_QUEUING_LOCKS
1417 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1418 #endif
1419
1420 } else {
1421 volatile kmp_int32 *waiting_id_p;
1422 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1423 KMP_DEBUG_ASSERT(head_thr != NULL);
1424 waiting_id_p = &head_thr->th.th_next_waiting;
1425
1426 /* Does this require synchronous reads? */
1427 #ifdef DEBUG_QUEUING_LOCKS
1428 if (head <= 0 || tail <= 0)
1429 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1430 #endif
1431 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1432
1433 /* try (h,t)->(h',t) or (t,t) */
1434 KMP_MB();
1435 /* make sure enqueuing thread has time to update next waiting thread
1436 * field */
1437 *head_id_p =
1438 KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1439 #ifdef DEBUG_QUEUING_LOCKS
1440 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1441 #endif
1442 dequeued = TRUE;
1443 }
1444 }
1445
1446 if (dequeued) {
1447 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1448 KMP_DEBUG_ASSERT(head_thr != NULL);
1449
1450 /* Does this require synchronous reads? */
1451 #ifdef DEBUG_QUEUING_LOCKS
1452 if (head <= 0 || tail <= 0)
1453 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1454 #endif
1455 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1456
1457 /* For clean code only. Thread not released until next statement prevents
1458 race with acquire code. */
1459 head_thr->th.th_next_waiting = 0;
1460 #ifdef DEBUG_QUEUING_LOCKS
1461 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1462 #endif
1463
1464 KMP_MB();
1465 /* reset spin value */
1466 head_thr->th.th_spin_here = FALSE;
1467
1468 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1469 "dequeuing\n",
1470 lck, gtid));
1471 #ifdef DEBUG_QUEUING_LOCKS
1472 TRACE_LOCK(gtid + 1, "rel exit 2");
1473 #endif
1474 return KMP_LOCK_RELEASED;
1475 }
1476 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1477 threads */
1478
1479 #ifdef DEBUG_QUEUING_LOCKS
1480 TRACE_LOCK(gtid + 1, "rel retry");
1481 #endif
1482
1483 } /* while */
1484 KMP_ASSERT2(0, "should not get here");
1485 return KMP_LOCK_RELEASED;
1486 }
1487
__kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)1488 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1489 kmp_int32 gtid) {
1490 char const *const func = "omp_unset_lock";
1491 KMP_MB(); /* in case another processor initialized lock */
1492 if (lck->lk.initialized != lck) {
1493 KMP_FATAL(LockIsUninitialized, func);
1494 }
1495 if (__kmp_is_queuing_lock_nestable(lck)) {
1496 KMP_FATAL(LockNestableUsedAsSimple, func);
1497 }
1498 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1499 KMP_FATAL(LockUnsettingFree, func);
1500 }
1501 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1502 KMP_FATAL(LockUnsettingSetByAnother, func);
1503 }
1504 lck->lk.owner_id = 0;
1505 return __kmp_release_queuing_lock(lck, gtid);
1506 }
1507
__kmp_init_queuing_lock(kmp_queuing_lock_t * lck)1508 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1509 lck->lk.location = NULL;
1510 lck->lk.head_id = 0;
1511 lck->lk.tail_id = 0;
1512 lck->lk.next_ticket = 0;
1513 lck->lk.now_serving = 0;
1514 lck->lk.owner_id = 0; // no thread owns the lock.
1515 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1516 lck->lk.initialized = lck;
1517
1518 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1519 }
1520
__kmp_destroy_queuing_lock(kmp_queuing_lock_t * lck)1521 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1522 lck->lk.initialized = NULL;
1523 lck->lk.location = NULL;
1524 lck->lk.head_id = 0;
1525 lck->lk.tail_id = 0;
1526 lck->lk.next_ticket = 0;
1527 lck->lk.now_serving = 0;
1528 lck->lk.owner_id = 0;
1529 lck->lk.depth_locked = -1;
1530 }
1531
__kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t * lck)1532 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1533 char const *const func = "omp_destroy_lock";
1534 if (lck->lk.initialized != lck) {
1535 KMP_FATAL(LockIsUninitialized, func);
1536 }
1537 if (__kmp_is_queuing_lock_nestable(lck)) {
1538 KMP_FATAL(LockNestableUsedAsSimple, func);
1539 }
1540 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1541 KMP_FATAL(LockStillOwned, func);
1542 }
1543 __kmp_destroy_queuing_lock(lck);
1544 }
1545
1546 // nested queuing locks
1547
__kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)1548 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1549 KMP_DEBUG_ASSERT(gtid >= 0);
1550
1551 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1552 lck->lk.depth_locked += 1;
1553 return KMP_LOCK_ACQUIRED_NEXT;
1554 } else {
1555 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1556 KMP_MB();
1557 lck->lk.depth_locked = 1;
1558 KMP_MB();
1559 lck->lk.owner_id = gtid + 1;
1560 return KMP_LOCK_ACQUIRED_FIRST;
1561 }
1562 }
1563
1564 static int
__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)1565 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1566 kmp_int32 gtid) {
1567 char const *const func = "omp_set_nest_lock";
1568 if (lck->lk.initialized != lck) {
1569 KMP_FATAL(LockIsUninitialized, func);
1570 }
1571 if (!__kmp_is_queuing_lock_nestable(lck)) {
1572 KMP_FATAL(LockSimpleUsedAsNestable, func);
1573 }
1574 return __kmp_acquire_nested_queuing_lock(lck, gtid);
1575 }
1576
__kmp_test_nested_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)1577 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1578 int retval;
1579
1580 KMP_DEBUG_ASSERT(gtid >= 0);
1581
1582 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1583 retval = ++lck->lk.depth_locked;
1584 } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1585 retval = 0;
1586 } else {
1587 KMP_MB();
1588 retval = lck->lk.depth_locked = 1;
1589 KMP_MB();
1590 lck->lk.owner_id = gtid + 1;
1591 }
1592 return retval;
1593 }
1594
__kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)1595 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1596 kmp_int32 gtid) {
1597 char const *const func = "omp_test_nest_lock";
1598 if (lck->lk.initialized != lck) {
1599 KMP_FATAL(LockIsUninitialized, func);
1600 }
1601 if (!__kmp_is_queuing_lock_nestable(lck)) {
1602 KMP_FATAL(LockSimpleUsedAsNestable, func);
1603 }
1604 return __kmp_test_nested_queuing_lock(lck, gtid);
1605 }
1606
__kmp_release_nested_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)1607 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1608 KMP_DEBUG_ASSERT(gtid >= 0);
1609
1610 KMP_MB();
1611 if (--(lck->lk.depth_locked) == 0) {
1612 KMP_MB();
1613 lck->lk.owner_id = 0;
1614 __kmp_release_queuing_lock(lck, gtid);
1615 return KMP_LOCK_RELEASED;
1616 }
1617 return KMP_LOCK_STILL_HELD;
1618 }
1619
1620 static int
__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)1621 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1622 kmp_int32 gtid) {
1623 char const *const func = "omp_unset_nest_lock";
1624 KMP_MB(); /* in case another processor initialized lock */
1625 if (lck->lk.initialized != lck) {
1626 KMP_FATAL(LockIsUninitialized, func);
1627 }
1628 if (!__kmp_is_queuing_lock_nestable(lck)) {
1629 KMP_FATAL(LockSimpleUsedAsNestable, func);
1630 }
1631 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1632 KMP_FATAL(LockUnsettingFree, func);
1633 }
1634 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1635 KMP_FATAL(LockUnsettingSetByAnother, func);
1636 }
1637 return __kmp_release_nested_queuing_lock(lck, gtid);
1638 }
1639
__kmp_init_nested_queuing_lock(kmp_queuing_lock_t * lck)1640 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1641 __kmp_init_queuing_lock(lck);
1642 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1643 }
1644
__kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t * lck)1645 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1646 __kmp_destroy_queuing_lock(lck);
1647 lck->lk.depth_locked = 0;
1648 }
1649
1650 static void
__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck)1651 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1652 char const *const func = "omp_destroy_nest_lock";
1653 if (lck->lk.initialized != lck) {
1654 KMP_FATAL(LockIsUninitialized, func);
1655 }
1656 if (!__kmp_is_queuing_lock_nestable(lck)) {
1657 KMP_FATAL(LockSimpleUsedAsNestable, func);
1658 }
1659 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1660 KMP_FATAL(LockStillOwned, func);
1661 }
1662 __kmp_destroy_nested_queuing_lock(lck);
1663 }
1664
1665 // access functions to fields which don't exist for all lock kinds.
1666
__kmp_get_queuing_lock_location(kmp_queuing_lock_t * lck)1667 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1668 return lck->lk.location;
1669 }
1670
__kmp_set_queuing_lock_location(kmp_queuing_lock_t * lck,const ident_t * loc)1671 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1672 const ident_t *loc) {
1673 lck->lk.location = loc;
1674 }
1675
__kmp_get_queuing_lock_flags(kmp_queuing_lock_t * lck)1676 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1677 return lck->lk.flags;
1678 }
1679
__kmp_set_queuing_lock_flags(kmp_queuing_lock_t * lck,kmp_lock_flags_t flags)1680 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1681 kmp_lock_flags_t flags) {
1682 lck->lk.flags = flags;
1683 }
1684
1685 #if KMP_USE_ADAPTIVE_LOCKS
1686
1687 /* RTM Adaptive locks */
1688
1689 #if KMP_HAVE_RTM_INTRINSICS
1690 #include <immintrin.h>
1691 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1692
1693 #else
1694
1695 // Values from the status register after failed speculation.
1696 #define _XBEGIN_STARTED (~0u)
1697 #define _XABORT_EXPLICIT (1 << 0)
1698 #define _XABORT_RETRY (1 << 1)
1699 #define _XABORT_CONFLICT (1 << 2)
1700 #define _XABORT_CAPACITY (1 << 3)
1701 #define _XABORT_DEBUG (1 << 4)
1702 #define _XABORT_NESTED (1 << 5)
1703 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1704
1705 // Aborts for which it's worth trying again immediately
1706 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1707
1708 #define STRINGIZE_INTERNAL(arg) #arg
1709 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1710
1711 // Access to RTM instructions
1712 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1713 an abort. This is the same definition as the compiler intrinsic that will be
1714 supported at some point. */
_xbegin()1715 static __inline int _xbegin() {
1716 int res = -1;
1717
1718 #if KMP_OS_WINDOWS
1719 #if KMP_ARCH_X86_64
1720 _asm {
1721 _emit 0xC7
1722 _emit 0xF8
1723 _emit 2
1724 _emit 0
1725 _emit 0
1726 _emit 0
1727 jmp L2
1728 mov res, eax
1729 L2:
1730 }
1731 #else /* IA32 */
1732 _asm {
1733 _emit 0xC7
1734 _emit 0xF8
1735 _emit 2
1736 _emit 0
1737 _emit 0
1738 _emit 0
1739 jmp L2
1740 mov res, eax
1741 L2:
1742 }
1743 #endif // KMP_ARCH_X86_64
1744 #else
1745 /* Note that %eax must be noted as killed (clobbered), because the XSR is
1746 returned in %eax(%rax) on abort. Other register values are restored, so
1747 don't need to be killed.
1748
1749 We must also mark 'res' as an input and an output, since otherwise
1750 'res=-1' may be dropped as being dead, whereas we do need the assignment on
1751 the successful (i.e., non-abort) path. */
1752 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1753 " .long 1f-1b-6\n"
1754 " jmp 2f\n"
1755 "1: movl %%eax,%0\n"
1756 "2:"
1757 : "+r"(res)::"memory", "%eax");
1758 #endif // KMP_OS_WINDOWS
1759 return res;
1760 }
1761
1762 /* Transaction end */
_xend()1763 static __inline void _xend() {
1764 #if KMP_OS_WINDOWS
1765 __asm {
1766 _emit 0x0f
1767 _emit 0x01
1768 _emit 0xd5
1769 }
1770 #else
1771 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1772 #endif
1773 }
1774
1775 /* This is a macro, the argument must be a single byte constant which can be
1776 evaluated by the inline assembler, since it is emitted as a byte into the
1777 assembly code. */
1778 // clang-format off
1779 #if KMP_OS_WINDOWS
1780 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1781 #else
1782 #define _xabort(ARG) \
1783 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1784 #endif
1785 // clang-format on
1786 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1787
1788 // Statistics is collected for testing purpose
1789 #if KMP_DEBUG_ADAPTIVE_LOCKS
1790
1791 // We accumulate speculative lock statistics when the lock is destroyed. We
1792 // keep locks that haven't been destroyed in the liveLocks list so that we can
1793 // grab their statistics too.
1794 static kmp_adaptive_lock_statistics_t destroyedStats;
1795
1796 // To hold the list of live locks.
1797 static kmp_adaptive_lock_info_t liveLocks;
1798
1799 // A lock so we can safely update the list of locks.
1800 static kmp_bootstrap_lock_t chain_lock =
1801 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1802
1803 // Initialize the list of stats.
__kmp_init_speculative_stats()1804 void __kmp_init_speculative_stats() {
1805 kmp_adaptive_lock_info_t *lck = &liveLocks;
1806
1807 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1808 sizeof(lck->stats));
1809 lck->stats.next = lck;
1810 lck->stats.prev = lck;
1811
1812 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1813 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1814
1815 __kmp_init_bootstrap_lock(&chain_lock);
1816 }
1817
1818 // Insert the lock into the circular list
__kmp_remember_lock(kmp_adaptive_lock_info_t * lck)1819 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1820 __kmp_acquire_bootstrap_lock(&chain_lock);
1821
1822 lck->stats.next = liveLocks.stats.next;
1823 lck->stats.prev = &liveLocks;
1824
1825 liveLocks.stats.next = lck;
1826 lck->stats.next->stats.prev = lck;
1827
1828 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1829 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1830
1831 __kmp_release_bootstrap_lock(&chain_lock);
1832 }
1833
__kmp_forget_lock(kmp_adaptive_lock_info_t * lck)1834 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1835 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1836 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1837
1838 kmp_adaptive_lock_info_t *n = lck->stats.next;
1839 kmp_adaptive_lock_info_t *p = lck->stats.prev;
1840
1841 n->stats.prev = p;
1842 p->stats.next = n;
1843 }
1844
__kmp_zero_speculative_stats(kmp_adaptive_lock_info_t * lck)1845 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1846 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1847 sizeof(lck->stats));
1848 __kmp_remember_lock(lck);
1849 }
1850
__kmp_add_stats(kmp_adaptive_lock_statistics_t * t,kmp_adaptive_lock_info_t * lck)1851 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1852 kmp_adaptive_lock_info_t *lck) {
1853 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1854
1855 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1856 t->successfulSpeculations += s->successfulSpeculations;
1857 t->hardFailedSpeculations += s->hardFailedSpeculations;
1858 t->softFailedSpeculations += s->softFailedSpeculations;
1859 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1860 t->lemmingYields += s->lemmingYields;
1861 }
1862
__kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t * lck)1863 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1864 __kmp_acquire_bootstrap_lock(&chain_lock);
1865
1866 __kmp_add_stats(&destroyedStats, lck);
1867 __kmp_forget_lock(lck);
1868
1869 __kmp_release_bootstrap_lock(&chain_lock);
1870 }
1871
percent(kmp_uint32 count,kmp_uint32 total)1872 static float percent(kmp_uint32 count, kmp_uint32 total) {
1873 return (total == 0) ? 0.0 : (100.0 * count) / total;
1874 }
1875
__kmp_print_speculative_stats()1876 void __kmp_print_speculative_stats() {
1877 kmp_adaptive_lock_statistics_t total = destroyedStats;
1878 kmp_adaptive_lock_info_t *lck;
1879
1880 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1881 __kmp_add_stats(&total, lck);
1882 }
1883 kmp_adaptive_lock_statistics_t *t = &total;
1884 kmp_uint32 totalSections =
1885 t->nonSpeculativeAcquires + t->successfulSpeculations;
1886 kmp_uint32 totalSpeculations = t->successfulSpeculations +
1887 t->hardFailedSpeculations +
1888 t->softFailedSpeculations;
1889 if (totalSections <= 0)
1890 return;
1891
1892 kmp_safe_raii_file_t statsFile;
1893 if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1894 statsFile.set_stdout();
1895 } else {
1896 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1897 char buffer[buffLen];
1898 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1899 (kmp_int32)getpid());
1900 statsFile.open(buffer, "w");
1901 }
1902
1903 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1904 fprintf(statsFile,
1905 " Lock parameters: \n"
1906 " max_soft_retries : %10d\n"
1907 " max_badness : %10d\n",
1908 __kmp_adaptive_backoff_params.max_soft_retries,
1909 __kmp_adaptive_backoff_params.max_badness);
1910 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1911 t->nonSpeculativeAcquireAttempts);
1912 fprintf(statsFile, " Total critical sections : %10d\n",
1913 totalSections);
1914 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1915 t->successfulSpeculations,
1916 percent(t->successfulSpeculations, totalSections));
1917 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1918 t->nonSpeculativeAcquires,
1919 percent(t->nonSpeculativeAcquires, totalSections));
1920 fprintf(statsFile, " Lemming yields : %10d\n\n",
1921 t->lemmingYields);
1922
1923 fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1924 totalSpeculations);
1925 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1926 t->successfulSpeculations,
1927 percent(t->successfulSpeculations, totalSpeculations));
1928 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1929 t->softFailedSpeculations,
1930 percent(t->softFailedSpeculations, totalSpeculations));
1931 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1932 t->hardFailedSpeculations,
1933 percent(t->hardFailedSpeculations, totalSpeculations));
1934 }
1935
1936 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1937 #else
1938 #define KMP_INC_STAT(lck, stat)
1939
1940 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1941
__kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t * lck)1942 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1943 // It is enough to check that the head_id is zero.
1944 // We don't also need to check the tail.
1945 bool res = lck->lk.head_id == 0;
1946
1947 // We need a fence here, since we must ensure that no memory operations
1948 // from later in this thread float above that read.
1949 #if KMP_COMPILER_ICC
1950 _mm_mfence();
1951 #else
1952 __sync_synchronize();
1953 #endif
1954
1955 return res;
1956 }
1957
1958 // Functions for manipulating the badness
1959 static __inline void
__kmp_update_badness_after_success(kmp_adaptive_lock_t * lck)1960 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1961 // Reset the badness to zero so we eagerly try to speculate again
1962 lck->lk.adaptive.badness = 0;
1963 KMP_INC_STAT(lck, successfulSpeculations);
1964 }
1965
1966 // Create a bit mask with one more set bit.
__kmp_step_badness(kmp_adaptive_lock_t * lck)1967 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1968 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1969 if (newBadness > lck->lk.adaptive.max_badness) {
1970 return;
1971 } else {
1972 lck->lk.adaptive.badness = newBadness;
1973 }
1974 }
1975
1976 // Check whether speculation should be attempted.
1977 KMP_ATTRIBUTE_TARGET_RTM
__kmp_should_speculate(kmp_adaptive_lock_t * lck,kmp_int32 gtid)1978 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1979 kmp_int32 gtid) {
1980 kmp_uint32 badness = lck->lk.adaptive.badness;
1981 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1982 int res = (attempts & badness) == 0;
1983 return res;
1984 }
1985
1986 // Attempt to acquire only the speculative lock.
1987 // Does not back off to the non-speculative lock.
1988 KMP_ATTRIBUTE_TARGET_RTM
__kmp_test_adaptive_lock_only(kmp_adaptive_lock_t * lck,kmp_int32 gtid)1989 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1990 kmp_int32 gtid) {
1991 int retries = lck->lk.adaptive.max_soft_retries;
1992
1993 // We don't explicitly count the start of speculation, rather we record the
1994 // results (success, hard fail, soft fail). The sum of all of those is the
1995 // total number of times we started speculation since all speculations must
1996 // end one of those ways.
1997 do {
1998 kmp_uint32 status = _xbegin();
1999 // Switch this in to disable actual speculation but exercise at least some
2000 // of the rest of the code. Useful for debugging...
2001 // kmp_uint32 status = _XABORT_NESTED;
2002
2003 if (status == _XBEGIN_STARTED) {
2004 /* We have successfully started speculation. Check that no-one acquired
2005 the lock for real between when we last looked and now. This also gets
2006 the lock cache line into our read-set, which we need so that we'll
2007 abort if anyone later claims it for real. */
2008 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2009 // Lock is now visibly acquired, so someone beat us to it. Abort the
2010 // transaction so we'll restart from _xbegin with the failure status.
2011 _xabort(0x01);
2012 KMP_ASSERT2(0, "should not get here");
2013 }
2014 return 1; // Lock has been acquired (speculatively)
2015 } else {
2016 // We have aborted, update the statistics
2017 if (status & SOFT_ABORT_MASK) {
2018 KMP_INC_STAT(lck, softFailedSpeculations);
2019 // and loop round to retry.
2020 } else {
2021 KMP_INC_STAT(lck, hardFailedSpeculations);
2022 // Give up if we had a hard failure.
2023 break;
2024 }
2025 }
2026 } while (retries--); // Loop while we have retries, and didn't fail hard.
2027
2028 // Either we had a hard failure or we didn't succeed softly after
2029 // the full set of attempts, so back off the badness.
2030 __kmp_step_badness(lck);
2031 return 0;
2032 }
2033
2034 // Attempt to acquire the speculative lock, or back off to the non-speculative
2035 // one if the speculative lock cannot be acquired.
2036 // We can succeed speculatively, non-speculatively, or fail.
__kmp_test_adaptive_lock(kmp_adaptive_lock_t * lck,kmp_int32 gtid)2037 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2038 // First try to acquire the lock speculatively
2039 if (__kmp_should_speculate(lck, gtid) &&
2040 __kmp_test_adaptive_lock_only(lck, gtid))
2041 return 1;
2042
2043 // Speculative acquisition failed, so try to acquire it non-speculatively.
2044 // Count the non-speculative acquire attempt
2045 lck->lk.adaptive.acquire_attempts++;
2046
2047 // Use base, non-speculative lock.
2048 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2049 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2050 return 1; // Lock is acquired (non-speculatively)
2051 } else {
2052 return 0; // Failed to acquire the lock, it's already visibly locked.
2053 }
2054 }
2055
__kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck,kmp_int32 gtid)2056 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2057 kmp_int32 gtid) {
2058 char const *const func = "omp_test_lock";
2059 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2060 KMP_FATAL(LockIsUninitialized, func);
2061 }
2062
2063 int retval = __kmp_test_adaptive_lock(lck, gtid);
2064
2065 if (retval) {
2066 lck->lk.qlk.owner_id = gtid + 1;
2067 }
2068 return retval;
2069 }
2070
2071 // Block until we can acquire a speculative, adaptive lock. We check whether we
2072 // should be trying to speculate. If we should be, we check the real lock to see
2073 // if it is free, and, if not, pause without attempting to acquire it until it
2074 // is. Then we try the speculative acquire. This means that although we suffer
2075 // from lemmings a little (because all we can't acquire the lock speculatively
2076 // until the queue of threads waiting has cleared), we don't get into a state
2077 // where we can never acquire the lock speculatively (because we force the queue
2078 // to clear by preventing new arrivals from entering the queue). This does mean
2079 // that when we're trying to break lemmings, the lock is no longer fair. However
2080 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2081 // problem.
__kmp_acquire_adaptive_lock(kmp_adaptive_lock_t * lck,kmp_int32 gtid)2082 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2083 kmp_int32 gtid) {
2084 if (__kmp_should_speculate(lck, gtid)) {
2085 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2086 if (__kmp_test_adaptive_lock_only(lck, gtid))
2087 return;
2088 // We tried speculation and failed, so give up.
2089 } else {
2090 // We can't try speculation until the lock is free, so we pause here
2091 // (without suspending on the queueing lock, to allow it to drain, then
2092 // try again. All other threads will also see the same result for
2093 // shouldSpeculate, so will be doing the same if they try to claim the
2094 // lock from now on.
2095 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2096 KMP_INC_STAT(lck, lemmingYields);
2097 KMP_YIELD(TRUE);
2098 }
2099
2100 if (__kmp_test_adaptive_lock_only(lck, gtid))
2101 return;
2102 }
2103 }
2104
2105 // Speculative acquisition failed, so acquire it non-speculatively.
2106 // Count the non-speculative acquire attempt
2107 lck->lk.adaptive.acquire_attempts++;
2108
2109 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2110 // We have acquired the base lock, so count that.
2111 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2112 }
2113
__kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck,kmp_int32 gtid)2114 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2115 kmp_int32 gtid) {
2116 char const *const func = "omp_set_lock";
2117 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2118 KMP_FATAL(LockIsUninitialized, func);
2119 }
2120 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2121 KMP_FATAL(LockIsAlreadyOwned, func);
2122 }
2123
2124 __kmp_acquire_adaptive_lock(lck, gtid);
2125
2126 lck->lk.qlk.owner_id = gtid + 1;
2127 }
2128
2129 KMP_ATTRIBUTE_TARGET_RTM
__kmp_release_adaptive_lock(kmp_adaptive_lock_t * lck,kmp_int32 gtid)2130 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2131 kmp_int32 gtid) {
2132 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2133 lck))) { // If the lock doesn't look claimed we must be speculating.
2134 // (Or the user's code is buggy and they're releasing without locking;
2135 // if we had XTEST we'd be able to check that case...)
2136 _xend(); // Exit speculation
2137 __kmp_update_badness_after_success(lck);
2138 } else { // Since the lock *is* visibly locked we're not speculating,
2139 // so should use the underlying lock's release scheme.
2140 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2141 }
2142 return KMP_LOCK_RELEASED;
2143 }
2144
__kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck,kmp_int32 gtid)2145 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2146 kmp_int32 gtid) {
2147 char const *const func = "omp_unset_lock";
2148 KMP_MB(); /* in case another processor initialized lock */
2149 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2150 KMP_FATAL(LockIsUninitialized, func);
2151 }
2152 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2153 KMP_FATAL(LockUnsettingFree, func);
2154 }
2155 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2156 KMP_FATAL(LockUnsettingSetByAnother, func);
2157 }
2158 lck->lk.qlk.owner_id = 0;
2159 __kmp_release_adaptive_lock(lck, gtid);
2160 return KMP_LOCK_RELEASED;
2161 }
2162
__kmp_init_adaptive_lock(kmp_adaptive_lock_t * lck)2163 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2164 __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2165 lck->lk.adaptive.badness = 0;
2166 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2167 lck->lk.adaptive.max_soft_retries =
2168 __kmp_adaptive_backoff_params.max_soft_retries;
2169 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2170 #if KMP_DEBUG_ADAPTIVE_LOCKS
2171 __kmp_zero_speculative_stats(&lck->lk.adaptive);
2172 #endif
2173 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2174 }
2175
__kmp_destroy_adaptive_lock(kmp_adaptive_lock_t * lck)2176 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2177 #if KMP_DEBUG_ADAPTIVE_LOCKS
2178 __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2179 #endif
2180 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2181 // Nothing needed for the speculative part.
2182 }
2183
__kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck)2184 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2185 char const *const func = "omp_destroy_lock";
2186 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2187 KMP_FATAL(LockIsUninitialized, func);
2188 }
2189 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2190 KMP_FATAL(LockStillOwned, func);
2191 }
2192 __kmp_destroy_adaptive_lock(lck);
2193 }
2194
2195 #endif // KMP_USE_ADAPTIVE_LOCKS
2196
2197 /* ------------------------------------------------------------------------ */
2198 /* DRDPA ticket locks */
2199 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2200
__kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t * lck)2201 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2202 return lck->lk.owner_id - 1;
2203 }
2204
__kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t * lck)2205 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2206 return lck->lk.depth_locked != -1;
2207 }
2208
2209 __forceinline static int
__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2210 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2211 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2212 kmp_uint64 mask = lck->lk.mask; // atomic load
2213 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2214
2215 #ifdef USE_LOCK_PROFILE
2216 if (polls[ticket & mask] != ticket)
2217 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2218 /* else __kmp_printf( "." );*/
2219 #endif /* USE_LOCK_PROFILE */
2220
2221 // Now spin-wait, but reload the polls pointer and mask, in case the
2222 // polling area has been reconfigured. Unless it is reconfigured, the
2223 // reloads stay in L1 cache and are cheap.
2224 //
2225 // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2226 // The current implementation of KMP_WAIT doesn't allow for mask
2227 // and poll to be re-read every spin iteration.
2228 kmp_uint32 spins;
2229 KMP_FSYNC_PREPARE(lck);
2230 KMP_INIT_YIELD(spins);
2231 while (polls[ticket & mask] < ticket) { // atomic load
2232 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2233 // Re-read the mask and the poll pointer from the lock structure.
2234 //
2235 // Make certain that "mask" is read before "polls" !!!
2236 //
2237 // If another thread picks reconfigures the polling area and updates their
2238 // values, and we get the new value of mask and the old polls pointer, we
2239 // could access memory beyond the end of the old polling area.
2240 mask = lck->lk.mask; // atomic load
2241 polls = lck->lk.polls; // atomic load
2242 }
2243
2244 // Critical section starts here
2245 KMP_FSYNC_ACQUIRED(lck);
2246 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2247 ticket, lck));
2248 lck->lk.now_serving = ticket; // non-volatile store
2249
2250 // Deallocate a garbage polling area if we know that we are the last
2251 // thread that could possibly access it.
2252 //
2253 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2254 // ticket.
2255 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2256 __kmp_free(lck->lk.old_polls);
2257 lck->lk.old_polls = NULL;
2258 lck->lk.cleanup_ticket = 0;
2259 }
2260
2261 // Check to see if we should reconfigure the polling area.
2262 // If there is still a garbage polling area to be deallocated from a
2263 // previous reconfiguration, let a later thread reconfigure it.
2264 if (lck->lk.old_polls == NULL) {
2265 bool reconfigure = false;
2266 std::atomic<kmp_uint64> *old_polls = polls;
2267 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2268
2269 if (TCR_4(__kmp_nth) >
2270 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2271 // We are in oversubscription mode. Contract the polling area
2272 // down to a single location, if that hasn't been done already.
2273 if (num_polls > 1) {
2274 reconfigure = true;
2275 num_polls = TCR_4(lck->lk.num_polls);
2276 mask = 0;
2277 num_polls = 1;
2278 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2279 sizeof(*polls));
2280 polls[0] = ticket;
2281 }
2282 } else {
2283 // We are in under/fully subscribed mode. Check the number of
2284 // threads waiting on the lock. The size of the polling area
2285 // should be at least the number of threads waiting.
2286 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2287 if (num_waiting > num_polls) {
2288 kmp_uint32 old_num_polls = num_polls;
2289 reconfigure = true;
2290 do {
2291 mask = (mask << 1) | 1;
2292 num_polls *= 2;
2293 } while (num_polls <= num_waiting);
2294
2295 // Allocate the new polling area, and copy the relevant portion
2296 // of the old polling area to the new area. __kmp_allocate()
2297 // zeroes the memory it allocates, and most of the old area is
2298 // just zero padding, so we only copy the release counters.
2299 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2300 sizeof(*polls));
2301 kmp_uint32 i;
2302 for (i = 0; i < old_num_polls; i++) {
2303 polls[i].store(old_polls[i]);
2304 }
2305 }
2306 }
2307
2308 if (reconfigure) {
2309 // Now write the updated fields back to the lock structure.
2310 //
2311 // Make certain that "polls" is written before "mask" !!!
2312 //
2313 // If another thread picks up the new value of mask and the old polls
2314 // pointer , it could access memory beyond the end of the old polling
2315 // area.
2316 //
2317 // On x86, we need memory fences.
2318 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2319 "lock %p to %d polls\n",
2320 ticket, lck, num_polls));
2321
2322 lck->lk.old_polls = old_polls;
2323 lck->lk.polls = polls; // atomic store
2324
2325 KMP_MB();
2326
2327 lck->lk.num_polls = num_polls;
2328 lck->lk.mask = mask; // atomic store
2329
2330 KMP_MB();
2331
2332 // Only after the new polling area and mask have been flushed
2333 // to main memory can we update the cleanup ticket field.
2334 //
2335 // volatile load / non-volatile store
2336 lck->lk.cleanup_ticket = lck->lk.next_ticket;
2337 }
2338 }
2339 return KMP_LOCK_ACQUIRED_FIRST;
2340 }
2341
__kmp_acquire_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2342 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2343 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2344 return retval;
2345 }
2346
__kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2347 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2348 kmp_int32 gtid) {
2349 char const *const func = "omp_set_lock";
2350 if (lck->lk.initialized != lck) {
2351 KMP_FATAL(LockIsUninitialized, func);
2352 }
2353 if (__kmp_is_drdpa_lock_nestable(lck)) {
2354 KMP_FATAL(LockNestableUsedAsSimple, func);
2355 }
2356 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2357 KMP_FATAL(LockIsAlreadyOwned, func);
2358 }
2359
2360 __kmp_acquire_drdpa_lock(lck, gtid);
2361
2362 lck->lk.owner_id = gtid + 1;
2363 return KMP_LOCK_ACQUIRED_FIRST;
2364 }
2365
__kmp_test_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2366 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2367 // First get a ticket, then read the polls pointer and the mask.
2368 // The polls pointer must be read before the mask!!! (See above)
2369 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2370 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2371 kmp_uint64 mask = lck->lk.mask; // atomic load
2372 if (polls[ticket & mask] == ticket) {
2373 kmp_uint64 next_ticket = ticket + 1;
2374 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2375 next_ticket)) {
2376 KMP_FSYNC_ACQUIRED(lck);
2377 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2378 ticket, lck));
2379 lck->lk.now_serving = ticket; // non-volatile store
2380
2381 // Since no threads are waiting, there is no possibility that we would
2382 // want to reconfigure the polling area. We might have the cleanup ticket
2383 // value (which says that it is now safe to deallocate old_polls), but
2384 // we'll let a later thread which calls __kmp_acquire_lock do that - this
2385 // routine isn't supposed to block, and we would risk blocks if we called
2386 // __kmp_free() to do the deallocation.
2387 return TRUE;
2388 }
2389 }
2390 return FALSE;
2391 }
2392
__kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2393 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2394 kmp_int32 gtid) {
2395 char const *const func = "omp_test_lock";
2396 if (lck->lk.initialized != lck) {
2397 KMP_FATAL(LockIsUninitialized, func);
2398 }
2399 if (__kmp_is_drdpa_lock_nestable(lck)) {
2400 KMP_FATAL(LockNestableUsedAsSimple, func);
2401 }
2402
2403 int retval = __kmp_test_drdpa_lock(lck, gtid);
2404
2405 if (retval) {
2406 lck->lk.owner_id = gtid + 1;
2407 }
2408 return retval;
2409 }
2410
__kmp_release_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2411 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2412 // Read the ticket value from the lock data struct, then the polls pointer and
2413 // the mask. The polls pointer must be read before the mask!!! (See above)
2414 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2415 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2416 kmp_uint64 mask = lck->lk.mask; // atomic load
2417 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2418 ticket - 1, lck));
2419 KMP_FSYNC_RELEASING(lck);
2420 polls[ticket & mask] = ticket; // atomic store
2421 return KMP_LOCK_RELEASED;
2422 }
2423
__kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2424 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2425 kmp_int32 gtid) {
2426 char const *const func = "omp_unset_lock";
2427 KMP_MB(); /* in case another processor initialized lock */
2428 if (lck->lk.initialized != lck) {
2429 KMP_FATAL(LockIsUninitialized, func);
2430 }
2431 if (__kmp_is_drdpa_lock_nestable(lck)) {
2432 KMP_FATAL(LockNestableUsedAsSimple, func);
2433 }
2434 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2435 KMP_FATAL(LockUnsettingFree, func);
2436 }
2437 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2438 (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2439 KMP_FATAL(LockUnsettingSetByAnother, func);
2440 }
2441 lck->lk.owner_id = 0;
2442 return __kmp_release_drdpa_lock(lck, gtid);
2443 }
2444
__kmp_init_drdpa_lock(kmp_drdpa_lock_t * lck)2445 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2446 lck->lk.location = NULL;
2447 lck->lk.mask = 0;
2448 lck->lk.num_polls = 1;
2449 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2450 lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2451 lck->lk.cleanup_ticket = 0;
2452 lck->lk.old_polls = NULL;
2453 lck->lk.next_ticket = 0;
2454 lck->lk.now_serving = 0;
2455 lck->lk.owner_id = 0; // no thread owns the lock.
2456 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2457 lck->lk.initialized = lck;
2458
2459 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2460 }
2461
__kmp_destroy_drdpa_lock(kmp_drdpa_lock_t * lck)2462 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2463 lck->lk.initialized = NULL;
2464 lck->lk.location = NULL;
2465 if (lck->lk.polls.load() != NULL) {
2466 __kmp_free(lck->lk.polls.load());
2467 lck->lk.polls = NULL;
2468 }
2469 if (lck->lk.old_polls != NULL) {
2470 __kmp_free(lck->lk.old_polls);
2471 lck->lk.old_polls = NULL;
2472 }
2473 lck->lk.mask = 0;
2474 lck->lk.num_polls = 0;
2475 lck->lk.cleanup_ticket = 0;
2476 lck->lk.next_ticket = 0;
2477 lck->lk.now_serving = 0;
2478 lck->lk.owner_id = 0;
2479 lck->lk.depth_locked = -1;
2480 }
2481
__kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)2482 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2483 char const *const func = "omp_destroy_lock";
2484 if (lck->lk.initialized != lck) {
2485 KMP_FATAL(LockIsUninitialized, func);
2486 }
2487 if (__kmp_is_drdpa_lock_nestable(lck)) {
2488 KMP_FATAL(LockNestableUsedAsSimple, func);
2489 }
2490 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2491 KMP_FATAL(LockStillOwned, func);
2492 }
2493 __kmp_destroy_drdpa_lock(lck);
2494 }
2495
2496 // nested drdpa ticket locks
2497
__kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2498 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2499 KMP_DEBUG_ASSERT(gtid >= 0);
2500
2501 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2502 lck->lk.depth_locked += 1;
2503 return KMP_LOCK_ACQUIRED_NEXT;
2504 } else {
2505 __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2506 KMP_MB();
2507 lck->lk.depth_locked = 1;
2508 KMP_MB();
2509 lck->lk.owner_id = gtid + 1;
2510 return KMP_LOCK_ACQUIRED_FIRST;
2511 }
2512 }
2513
__kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2514 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2515 kmp_int32 gtid) {
2516 char const *const func = "omp_set_nest_lock";
2517 if (lck->lk.initialized != lck) {
2518 KMP_FATAL(LockIsUninitialized, func);
2519 }
2520 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2521 KMP_FATAL(LockSimpleUsedAsNestable, func);
2522 }
2523 __kmp_acquire_nested_drdpa_lock(lck, gtid);
2524 }
2525
__kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2526 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2527 int retval;
2528
2529 KMP_DEBUG_ASSERT(gtid >= 0);
2530
2531 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2532 retval = ++lck->lk.depth_locked;
2533 } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2534 retval = 0;
2535 } else {
2536 KMP_MB();
2537 retval = lck->lk.depth_locked = 1;
2538 KMP_MB();
2539 lck->lk.owner_id = gtid + 1;
2540 }
2541 return retval;
2542 }
2543
__kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2544 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2545 kmp_int32 gtid) {
2546 char const *const func = "omp_test_nest_lock";
2547 if (lck->lk.initialized != lck) {
2548 KMP_FATAL(LockIsUninitialized, func);
2549 }
2550 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2551 KMP_FATAL(LockSimpleUsedAsNestable, func);
2552 }
2553 return __kmp_test_nested_drdpa_lock(lck, gtid);
2554 }
2555
__kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2556 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2557 KMP_DEBUG_ASSERT(gtid >= 0);
2558
2559 KMP_MB();
2560 if (--(lck->lk.depth_locked) == 0) {
2561 KMP_MB();
2562 lck->lk.owner_id = 0;
2563 __kmp_release_drdpa_lock(lck, gtid);
2564 return KMP_LOCK_RELEASED;
2565 }
2566 return KMP_LOCK_STILL_HELD;
2567 }
2568
__kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck,kmp_int32 gtid)2569 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2570 kmp_int32 gtid) {
2571 char const *const func = "omp_unset_nest_lock";
2572 KMP_MB(); /* in case another processor initialized lock */
2573 if (lck->lk.initialized != lck) {
2574 KMP_FATAL(LockIsUninitialized, func);
2575 }
2576 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2577 KMP_FATAL(LockSimpleUsedAsNestable, func);
2578 }
2579 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2580 KMP_FATAL(LockUnsettingFree, func);
2581 }
2582 if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2583 KMP_FATAL(LockUnsettingSetByAnother, func);
2584 }
2585 return __kmp_release_nested_drdpa_lock(lck, gtid);
2586 }
2587
__kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t * lck)2588 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2589 __kmp_init_drdpa_lock(lck);
2590 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2591 }
2592
__kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t * lck)2593 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2594 __kmp_destroy_drdpa_lock(lck);
2595 lck->lk.depth_locked = 0;
2596 }
2597
__kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)2598 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2599 char const *const func = "omp_destroy_nest_lock";
2600 if (lck->lk.initialized != lck) {
2601 KMP_FATAL(LockIsUninitialized, func);
2602 }
2603 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2604 KMP_FATAL(LockSimpleUsedAsNestable, func);
2605 }
2606 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2607 KMP_FATAL(LockStillOwned, func);
2608 }
2609 __kmp_destroy_nested_drdpa_lock(lck);
2610 }
2611
2612 // access functions to fields which don't exist for all lock kinds.
2613
__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t * lck)2614 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2615 return lck->lk.location;
2616 }
2617
__kmp_set_drdpa_lock_location(kmp_drdpa_lock_t * lck,const ident_t * loc)2618 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2619 const ident_t *loc) {
2620 lck->lk.location = loc;
2621 }
2622
__kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t * lck)2623 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2624 return lck->lk.flags;
2625 }
2626
__kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t * lck,kmp_lock_flags_t flags)2627 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2628 kmp_lock_flags_t flags) {
2629 lck->lk.flags = flags;
2630 }
2631
2632 // Time stamp counter
2633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2634 #define __kmp_tsc() __kmp_hardware_timestamp()
2635 // Runtime's default backoff parameters
2636 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2637 #else
2638 // Use nanoseconds for other platforms
2639 extern kmp_uint64 __kmp_now_nsec();
2640 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2641 #define __kmp_tsc() __kmp_now_nsec()
2642 #endif
2643
2644 // A useful predicate for dealing with timestamps that may wrap.
2645 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2646 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2647 // Times where going clockwise is less distance than going anti-clockwise
2648 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2649 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2650 // signed(b) = 0 captures the actual difference
before(kmp_uint64 a,kmp_uint64 b)2651 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2652 return ((kmp_int64)b - (kmp_int64)a) > 0;
2653 }
2654
2655 // Truncated binary exponential backoff function
__kmp_spin_backoff(kmp_backoff_t * boff)2656 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2657 // We could flatten this loop, but making it a nested loop gives better result
2658 kmp_uint32 i;
2659 for (i = boff->step; i > 0; i--) {
2660 kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2661 do {
2662 KMP_CPU_PAUSE();
2663 } while (before(__kmp_tsc(), goal));
2664 }
2665 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2666 }
2667
2668 #if KMP_USE_DYNAMIC_LOCK
2669
2670 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2671 // lock word.
__kmp_init_direct_lock(kmp_dyna_lock_t * lck,kmp_dyna_lockseq_t seq)2672 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2673 kmp_dyna_lockseq_t seq) {
2674 TCW_4(*lck, KMP_GET_D_TAG(seq));
2675 KA_TRACE(
2676 20,
2677 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2678 }
2679
2680 #if KMP_USE_TSX
2681
2682 // HLE lock functions - imported from the testbed runtime.
2683 #define HLE_ACQUIRE ".byte 0xf2;"
2684 #define HLE_RELEASE ".byte 0xf3;"
2685
swap4(kmp_uint32 volatile * p,kmp_uint32 v)2686 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2687 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2688 return v;
2689 }
2690
__kmp_destroy_hle_lock(kmp_dyna_lock_t * lck)2691 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2692
__kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t * lck)2693 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2694 TCW_4(*lck, 0);
2695 }
2696
__kmp_acquire_hle_lock(kmp_dyna_lock_t * lck,kmp_int32 gtid)2697 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2698 // Use gtid for KMP_LOCK_BUSY if necessary
2699 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2700 int delay = 1;
2701 do {
2702 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2703 for (int i = delay; i != 0; --i)
2704 KMP_CPU_PAUSE();
2705 delay = ((delay << 1) | 1) & 7;
2706 }
2707 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2708 }
2709 }
2710
__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t * lck,kmp_int32 gtid)2711 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2712 kmp_int32 gtid) {
2713 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2714 }
2715
__kmp_release_hle_lock(kmp_dyna_lock_t * lck,kmp_int32 gtid)2716 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2717 __asm__ volatile(HLE_RELEASE "movl %1,%0"
2718 : "=m"(*lck)
2719 : "r"(KMP_LOCK_FREE(hle))
2720 : "memory");
2721 return KMP_LOCK_RELEASED;
2722 }
2723
__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t * lck,kmp_int32 gtid)2724 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2725 kmp_int32 gtid) {
2726 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2727 }
2728
__kmp_test_hle_lock(kmp_dyna_lock_t * lck,kmp_int32 gtid)2729 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2730 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2731 }
2732
__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t * lck,kmp_int32 gtid)2733 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2734 kmp_int32 gtid) {
2735 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2736 }
2737
__kmp_init_rtm_queuing_lock(kmp_queuing_lock_t * lck)2738 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2739 __kmp_init_queuing_lock(lck);
2740 }
2741
__kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t * lck)2742 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2743 __kmp_destroy_queuing_lock(lck);
2744 }
2745
2746 static void
__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck)2747 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2748 __kmp_destroy_queuing_lock_with_checks(lck);
2749 }
2750
2751 KMP_ATTRIBUTE_TARGET_RTM
__kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)2752 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2753 kmp_int32 gtid) {
2754 unsigned retries = 3, status;
2755 do {
2756 status = _xbegin();
2757 if (status == _XBEGIN_STARTED) {
2758 if (__kmp_is_unlocked_queuing_lock(lck))
2759 return;
2760 _xabort(0xff);
2761 }
2762 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2763 // Wait until lock becomes free
2764 while (!__kmp_is_unlocked_queuing_lock(lck)) {
2765 KMP_YIELD(TRUE);
2766 }
2767 } else if (!(status & _XABORT_RETRY))
2768 break;
2769 } while (retries--);
2770
2771 // Fall-back non-speculative lock (xchg)
2772 __kmp_acquire_queuing_lock(lck, gtid);
2773 }
2774
__kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)2775 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2776 kmp_int32 gtid) {
2777 __kmp_acquire_rtm_queuing_lock(lck, gtid);
2778 }
2779
2780 KMP_ATTRIBUTE_TARGET_RTM
__kmp_release_rtm_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)2781 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2782 kmp_int32 gtid) {
2783 if (__kmp_is_unlocked_queuing_lock(lck)) {
2784 // Releasing from speculation
2785 _xend();
2786 } else {
2787 // Releasing from a real lock
2788 __kmp_release_queuing_lock(lck, gtid);
2789 }
2790 return KMP_LOCK_RELEASED;
2791 }
2792
__kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)2793 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794 kmp_int32 gtid) {
2795 return __kmp_release_rtm_queuing_lock(lck, gtid);
2796 }
2797
2798 KMP_ATTRIBUTE_TARGET_RTM
__kmp_test_rtm_queuing_lock(kmp_queuing_lock_t * lck,kmp_int32 gtid)2799 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800 kmp_int32 gtid) {
2801 unsigned retries = 3, status;
2802 do {
2803 status = _xbegin();
2804 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2805 return 1;
2806 }
2807 if (!(status & _XABORT_RETRY))
2808 break;
2809 } while (retries--);
2810
2811 return __kmp_test_queuing_lock(lck, gtid);
2812 }
2813
__kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t * lck,kmp_int32 gtid)2814 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2815 kmp_int32 gtid) {
2816 return __kmp_test_rtm_queuing_lock(lck, gtid);
2817 }
2818
2819 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2820 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2821
__kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t * lck)2822 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2823 KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2824 }
2825
__kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck)2826 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2827 __kmp_destroy_rtm_spin_lock(lck);
2828 }
2829
2830 KMP_ATTRIBUTE_TARGET_RTM
__kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2831 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2832 kmp_int32 gtid) {
2833 unsigned retries = 3, status;
2834 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2835 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2836 do {
2837 status = _xbegin();
2838 if (status == _XBEGIN_STARTED) {
2839 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2840 return KMP_LOCK_ACQUIRED_FIRST;
2841 _xabort(0xff);
2842 }
2843 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2844 // Wait until lock becomes free
2845 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2846 KMP_YIELD(TRUE);
2847 }
2848 } else if (!(status & _XABORT_RETRY))
2849 break;
2850 } while (retries--);
2851
2852 // Fall-back spin lock
2853 KMP_FSYNC_PREPARE(lck);
2854 kmp_backoff_t backoff = __kmp_spin_backoff_params;
2855 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2856 !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2857 __kmp_spin_backoff(&backoff);
2858 }
2859 KMP_FSYNC_ACQUIRED(lck);
2860 return KMP_LOCK_ACQUIRED_FIRST;
2861 }
2862
__kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2863 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2864 kmp_int32 gtid) {
2865 return __kmp_acquire_rtm_spin_lock(lck, gtid);
2866 }
2867
2868 KMP_ATTRIBUTE_TARGET_RTM
__kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2869 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2870 kmp_int32 gtid) {
2871 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2872 // Releasing from speculation
2873 _xend();
2874 } else {
2875 // Releasing from a real lock
2876 KMP_FSYNC_RELEASING(lck);
2877 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2878 }
2879 return KMP_LOCK_RELEASED;
2880 }
2881
__kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2882 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2883 kmp_int32 gtid) {
2884 return __kmp_release_rtm_spin_lock(lck, gtid);
2885 }
2886
2887 KMP_ATTRIBUTE_TARGET_RTM
__kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2888 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2889 unsigned retries = 3, status;
2890 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2891 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2892 do {
2893 status = _xbegin();
2894 if (status == _XBEGIN_STARTED &&
2895 KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2896 return TRUE;
2897 }
2898 if (!(status & _XABORT_RETRY))
2899 break;
2900 } while (retries--);
2901
2902 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2903 __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2904 KMP_FSYNC_ACQUIRED(lck);
2905 return TRUE;
2906 }
2907 return FALSE;
2908 }
2909
__kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t * lck,kmp_int32 gtid)2910 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2911 kmp_int32 gtid) {
2912 return __kmp_test_rtm_spin_lock(lck, gtid);
2913 }
2914
2915 #endif // KMP_USE_TSX
2916
2917 // Entry functions for indirect locks (first element of direct lock jump tables)
2918 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2919 kmp_dyna_lockseq_t tag);
2920 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2921 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2922 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2923 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2924 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2925 kmp_int32);
2926 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2927 kmp_int32);
2928 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2929 kmp_int32);
2930
2931 // Lock function definitions for the union parameter type
2932 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2933
2934 #define expand1(lk, op) \
2935 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2936 __kmp_##op##_##lk##_##lock(&lock->lk); \
2937 }
2938 #define expand2(lk, op) \
2939 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2940 kmp_int32 gtid) { \
2941 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2942 }
2943 #define expand3(lk, op) \
2944 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2945 kmp_lock_flags_t flags) { \
2946 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2947 }
2948 #define expand4(lk, op) \
2949 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2950 const ident_t *loc) { \
2951 __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2952 }
2953
2954 KMP_FOREACH_LOCK_KIND(expand1, init)
2955 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2956 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2957 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2958 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2959 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2960 KMP_FOREACH_LOCK_KIND(expand2, release)
2961 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2962 KMP_FOREACH_LOCK_KIND(expand2, test)
2963 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2964 KMP_FOREACH_LOCK_KIND(expand3, )
2965 KMP_FOREACH_LOCK_KIND(expand4, )
2966
2967 #undef expand1
2968 #undef expand2
2969 #undef expand3
2970 #undef expand4
2971
2972 // Jump tables for the indirect lock functions
2973 // Only fill in the odd entries, that avoids the need to shift out the low bit
2974
2975 // init functions
2976 #define expand(l, op) 0, __kmp_init_direct_lock,
2977 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2978 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2979 #undef expand
2980
2981 // destroy functions
2982 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2983 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2984 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2985 #undef expand
2986 #define expand(l, op) \
2987 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2988 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2989 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2990 #undef expand
2991
2992 // set/acquire functions
2993 #define expand(l, op) \
2994 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2995 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2996 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2997 #undef expand
2998 #define expand(l, op) \
2999 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3000 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3001 __kmp_set_indirect_lock_with_checks, 0,
3002 KMP_FOREACH_D_LOCK(expand, acquire)};
3003 #undef expand
3004
3005 // unset/release and test functions
3006 #define expand(l, op) \
3007 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3008 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3009 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3010 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3011 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3012 #undef expand
3013 #define expand(l, op) \
3014 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3015 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3016 __kmp_unset_indirect_lock_with_checks, 0,
3017 KMP_FOREACH_D_LOCK(expand, release)};
3018 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3020 #undef expand
3021
3022 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3023 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3024 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3025 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3026 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3027
3028 // Jump tables for the indirect lock functions
3029 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3030 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3031 KMP_FOREACH_I_LOCK(expand, init)};
3032 #undef expand
3033
3034 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3035 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3036 KMP_FOREACH_I_LOCK(expand, destroy)};
3037 #undef expand
3038 #define expand(l, op) \
3039 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3040 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3041 KMP_FOREACH_I_LOCK(expand, destroy)};
3042 #undef expand
3043
3044 // set/acquire functions
3045 #define expand(l, op) \
3046 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3047 static int (*indirect_set[])(kmp_user_lock_p,
3048 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3049 #undef expand
3050 #define expand(l, op) \
3051 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3052 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3053 KMP_FOREACH_I_LOCK(expand, acquire)};
3054 #undef expand
3055
3056 // unset/release and test functions
3057 #define expand(l, op) \
3058 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3059 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3060 KMP_FOREACH_I_LOCK(expand, release)};
3061 static int (*indirect_test[])(kmp_user_lock_p,
3062 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3063 #undef expand
3064 #define expand(l, op) \
3065 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3066 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3067 KMP_FOREACH_I_LOCK(expand, release)};
3068 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3069 KMP_FOREACH_I_LOCK(expand, test)};
3070 #undef expand
3071
3072 // Exposes only one jump tables (*lock or *lock_with_checks).
3073 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3074 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3075 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3076 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3077
3078 // Lock index table.
3079 kmp_indirect_lock_table_t __kmp_i_lock_table;
3080
3081 // Size of indirect locks.
3082 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3083
3084 // Jump tables for lock accessor/modifier.
3085 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3086 const ident_t *) = {0};
3087 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3088 kmp_lock_flags_t) = {0};
3089 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3090 kmp_user_lock_p) = {0};
3091 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3092 kmp_user_lock_p) = {0};
3093
3094 // Use different lock pools for different lock types.
3095 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3096
3097 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3098 // the indirect lock table holds the address and type of the allocated indirect
3099 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3100 // full. A destroyed indirect lock object is returned to the reusable pool of
3101 // locks, unique to each lock type.
__kmp_allocate_indirect_lock(void ** user_lock,kmp_int32 gtid,kmp_indirect_locktag_t tag)3102 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3103 kmp_int32 gtid,
3104 kmp_indirect_locktag_t tag) {
3105 kmp_indirect_lock_t *lck;
3106 kmp_lock_index_t idx;
3107
3108 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3109
3110 if (__kmp_indirect_lock_pool[tag] != NULL) {
3111 // Reuse the allocated and destroyed lock object
3112 lck = __kmp_indirect_lock_pool[tag];
3113 if (OMP_LOCK_T_SIZE < sizeof(void *))
3114 idx = lck->lock->pool.index;
3115 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3116 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3117 lck));
3118 } else {
3119 idx = __kmp_i_lock_table.next;
3120 // Check capacity and double the size if it is full
3121 if (idx == __kmp_i_lock_table.size) {
3122 // Double up the space for block pointers
3123 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3124 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3125 2 * row * sizeof(kmp_indirect_lock_t *));
3126 KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3127 row * sizeof(kmp_indirect_lock_t *));
3128 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3129 __kmp_i_lock_table.table = new_table;
3130 __kmp_free(old_table);
3131 // Allocate new objects in the new blocks
3132 for (int i = row; i < 2 * row; ++i)
3133 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3134 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3135 __kmp_i_lock_table.size = 2 * idx;
3136 }
3137 __kmp_i_lock_table.next++;
3138 lck = KMP_GET_I_LOCK(idx);
3139 // Allocate a new base lock object
3140 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3141 KA_TRACE(20,
3142 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3143 }
3144
3145 __kmp_release_lock(&__kmp_global_lock, gtid);
3146
3147 lck->type = tag;
3148
3149 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3150 *((kmp_lock_index_t *)user_lock) = idx
3151 << 1; // indirect lock word must be even
3152 } else {
3153 *((kmp_indirect_lock_t **)user_lock) = lck;
3154 }
3155
3156 return lck;
3157 }
3158
3159 // User lock lookup for dynamically dispatched locks.
3160 static __forceinline kmp_indirect_lock_t *
__kmp_lookup_indirect_lock(void ** user_lock,const char * func)3161 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3162 if (__kmp_env_consistency_check) {
3163 kmp_indirect_lock_t *lck = NULL;
3164 if (user_lock == NULL) {
3165 KMP_FATAL(LockIsUninitialized, func);
3166 }
3167 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3168 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3169 if (idx >= __kmp_i_lock_table.size) {
3170 KMP_FATAL(LockIsUninitialized, func);
3171 }
3172 lck = KMP_GET_I_LOCK(idx);
3173 } else {
3174 lck = *((kmp_indirect_lock_t **)user_lock);
3175 }
3176 if (lck == NULL) {
3177 KMP_FATAL(LockIsUninitialized, func);
3178 }
3179 return lck;
3180 } else {
3181 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3182 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3183 } else {
3184 return *((kmp_indirect_lock_t **)user_lock);
3185 }
3186 }
3187 }
3188
__kmp_init_indirect_lock(kmp_dyna_lock_t * lock,kmp_dyna_lockseq_t seq)3189 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3190 kmp_dyna_lockseq_t seq) {
3191 #if KMP_USE_ADAPTIVE_LOCKS
3192 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3193 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3194 seq = lockseq_queuing;
3195 }
3196 #endif
3197 #if KMP_USE_TSX
3198 if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
3199 seq = lockseq_queuing;
3200 }
3201 #endif
3202 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3203 kmp_indirect_lock_t *l =
3204 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3205 KMP_I_LOCK_FUNC(l, init)(l->lock);
3206 KA_TRACE(
3207 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3208 seq));
3209 }
3210
__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)3211 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3212 kmp_uint32 gtid = __kmp_entry_gtid();
3213 kmp_indirect_lock_t *l =
3214 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3215 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3216 kmp_indirect_locktag_t tag = l->type;
3217
3218 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3219
3220 // Use the base lock's space to keep the pool chain.
3221 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3222 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3223 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3224 }
3225 __kmp_indirect_lock_pool[tag] = l;
3226
3227 __kmp_release_lock(&__kmp_global_lock, gtid);
3228 }
3229
__kmp_set_indirect_lock(kmp_dyna_lock_t * lock,kmp_int32 gtid)3230 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3231 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3232 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3233 }
3234
__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock,kmp_int32 gtid)3235 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3236 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3237 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3238 }
3239
__kmp_test_indirect_lock(kmp_dyna_lock_t * lock,kmp_int32 gtid)3240 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3241 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3242 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3243 }
3244
__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock,kmp_int32 gtid)3245 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3246 kmp_int32 gtid) {
3247 kmp_indirect_lock_t *l =
3248 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3249 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3250 }
3251
__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock,kmp_int32 gtid)3252 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3253 kmp_int32 gtid) {
3254 kmp_indirect_lock_t *l =
3255 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3256 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3257 }
3258
__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock,kmp_int32 gtid)3259 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3260 kmp_int32 gtid) {
3261 kmp_indirect_lock_t *l =
3262 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3263 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3264 }
3265
3266 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3267
3268 // This is used only in kmp_error.cpp when consistency checking is on.
__kmp_get_user_lock_owner(kmp_user_lock_p lck,kmp_uint32 seq)3269 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3270 switch (seq) {
3271 case lockseq_tas:
3272 case lockseq_nested_tas:
3273 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3274 #if KMP_USE_FUTEX
3275 case lockseq_futex:
3276 case lockseq_nested_futex:
3277 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3278 #endif
3279 case lockseq_ticket:
3280 case lockseq_nested_ticket:
3281 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3282 case lockseq_queuing:
3283 case lockseq_nested_queuing:
3284 #if KMP_USE_ADAPTIVE_LOCKS
3285 case lockseq_adaptive:
3286 #endif
3287 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3288 case lockseq_drdpa:
3289 case lockseq_nested_drdpa:
3290 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3291 default:
3292 return 0;
3293 }
3294 }
3295
3296 // Initializes data for dynamic user locks.
__kmp_init_dynamic_user_locks()3297 void __kmp_init_dynamic_user_locks() {
3298 // Initialize jump table for the lock functions
3299 if (__kmp_env_consistency_check) {
3300 __kmp_direct_set = direct_set_check;
3301 __kmp_direct_unset = direct_unset_check;
3302 __kmp_direct_test = direct_test_check;
3303 __kmp_direct_destroy = direct_destroy_check;
3304 __kmp_indirect_set = indirect_set_check;
3305 __kmp_indirect_unset = indirect_unset_check;
3306 __kmp_indirect_test = indirect_test_check;
3307 __kmp_indirect_destroy = indirect_destroy_check;
3308 } else {
3309 __kmp_direct_set = direct_set;
3310 __kmp_direct_unset = direct_unset;
3311 __kmp_direct_test = direct_test;
3312 __kmp_direct_destroy = direct_destroy;
3313 __kmp_indirect_set = indirect_set;
3314 __kmp_indirect_unset = indirect_unset;
3315 __kmp_indirect_test = indirect_test;
3316 __kmp_indirect_destroy = indirect_destroy;
3317 }
3318 // If the user locks have already been initialized, then return. Allow the
3319 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3320 // new lock tables if they have already been allocated.
3321 if (__kmp_init_user_locks)
3322 return;
3323
3324 // Initialize lock index table
3325 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3326 __kmp_i_lock_table.table =
3327 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3328 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3329 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3330 __kmp_i_lock_table.next = 0;
3331
3332 // Indirect lock size
3333 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3334 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3335 #if KMP_USE_ADAPTIVE_LOCKS
3336 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3337 #endif
3338 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3339 #if KMP_USE_TSX
3340 __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3341 #endif
3342 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3343 #if KMP_USE_FUTEX
3344 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3345 #endif
3346 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3347 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3348 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3349
3350 // Initialize lock accessor/modifier
3351 #define fill_jumps(table, expand, sep) \
3352 { \
3353 table[locktag##sep##ticket] = expand(ticket); \
3354 table[locktag##sep##queuing] = expand(queuing); \
3355 table[locktag##sep##drdpa] = expand(drdpa); \
3356 }
3357
3358 #if KMP_USE_ADAPTIVE_LOCKS
3359 #define fill_table(table, expand) \
3360 { \
3361 fill_jumps(table, expand, _); \
3362 table[locktag_adaptive] = expand(queuing); \
3363 fill_jumps(table, expand, _nested_); \
3364 }
3365 #else
3366 #define fill_table(table, expand) \
3367 { \
3368 fill_jumps(table, expand, _); \
3369 fill_jumps(table, expand, _nested_); \
3370 }
3371 #endif // KMP_USE_ADAPTIVE_LOCKS
3372
3373 #define expand(l) \
3374 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3375 fill_table(__kmp_indirect_set_location, expand);
3376 #undef expand
3377 #define expand(l) \
3378 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3379 fill_table(__kmp_indirect_set_flags, expand);
3380 #undef expand
3381 #define expand(l) \
3382 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3383 fill_table(__kmp_indirect_get_location, expand);
3384 #undef expand
3385 #define expand(l) \
3386 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3387 fill_table(__kmp_indirect_get_flags, expand);
3388 #undef expand
3389
3390 __kmp_init_user_locks = TRUE;
3391 }
3392
3393 // Clean up the lock table.
__kmp_cleanup_indirect_user_locks()3394 void __kmp_cleanup_indirect_user_locks() {
3395 kmp_lock_index_t i;
3396 int k;
3397
3398 // Clean up locks in the pools first (they were already destroyed before going
3399 // into the pools).
3400 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3401 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3402 while (l != NULL) {
3403 kmp_indirect_lock_t *ll = l;
3404 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3405 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3406 ll));
3407 __kmp_free(ll->lock);
3408 ll->lock = NULL;
3409 }
3410 __kmp_indirect_lock_pool[k] = NULL;
3411 }
3412 // Clean up the remaining undestroyed locks.
3413 for (i = 0; i < __kmp_i_lock_table.next; i++) {
3414 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3415 if (l->lock != NULL) {
3416 // Locks not destroyed explicitly need to be destroyed here.
3417 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3418 KA_TRACE(
3419 20,
3420 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3421 l));
3422 __kmp_free(l->lock);
3423 }
3424 }
3425 // Free the table
3426 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3427 __kmp_free(__kmp_i_lock_table.table[i]);
3428 __kmp_free(__kmp_i_lock_table.table);
3429
3430 __kmp_init_user_locks = FALSE;
3431 }
3432
3433 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3434 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3435
3436 #else // KMP_USE_DYNAMIC_LOCK
3437
__kmp_init_tas_lock_with_checks(kmp_tas_lock_t * lck)3438 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3439 __kmp_init_tas_lock(lck);
3440 }
3441
__kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t * lck)3442 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3443 __kmp_init_nested_tas_lock(lck);
3444 }
3445
3446 #if KMP_USE_FUTEX
__kmp_init_futex_lock_with_checks(kmp_futex_lock_t * lck)3447 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3448 __kmp_init_futex_lock(lck);
3449 }
3450
__kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t * lck)3451 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3452 __kmp_init_nested_futex_lock(lck);
3453 }
3454 #endif
3455
__kmp_is_ticket_lock_initialized(kmp_ticket_lock_t * lck)3456 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3457 return lck == lck->lk.self;
3458 }
3459
__kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t * lck)3460 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3461 __kmp_init_ticket_lock(lck);
3462 }
3463
__kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t * lck)3464 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3465 __kmp_init_nested_ticket_lock(lck);
3466 }
3467
__kmp_is_queuing_lock_initialized(kmp_queuing_lock_t * lck)3468 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3469 return lck == lck->lk.initialized;
3470 }
3471
__kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t * lck)3472 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3473 __kmp_init_queuing_lock(lck);
3474 }
3475
3476 static void
__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t * lck)3477 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3478 __kmp_init_nested_queuing_lock(lck);
3479 }
3480
3481 #if KMP_USE_ADAPTIVE_LOCKS
__kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t * lck)3482 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3483 __kmp_init_adaptive_lock(lck);
3484 }
3485 #endif
3486
__kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t * lck)3487 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3488 return lck == lck->lk.initialized;
3489 }
3490
__kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)3491 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3492 __kmp_init_drdpa_lock(lck);
3493 }
3494
__kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t * lck)3495 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3496 __kmp_init_nested_drdpa_lock(lck);
3497 }
3498
3499 /* user locks
3500 * They are implemented as a table of function pointers which are set to the
3501 * lock functions of the appropriate kind, once that has been determined. */
3502
3503 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3504
3505 size_t __kmp_base_user_lock_size = 0;
3506 size_t __kmp_user_lock_size = 0;
3507
3508 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3509 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3510 kmp_int32 gtid) = NULL;
3511
3512 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3513 kmp_int32 gtid) = NULL;
3514 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3515 kmp_int32 gtid) = NULL;
3516 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3517 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3518 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3519 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3520 kmp_int32 gtid) = NULL;
3521
3522 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3523 kmp_int32 gtid) = NULL;
3524 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3525 kmp_int32 gtid) = NULL;
3526 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3527 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3528
3529 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3530 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3531 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3532 const ident_t *loc) = NULL;
3533 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3534 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3535 kmp_lock_flags_t flags) = NULL;
3536
__kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind)3537 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3538 switch (user_lock_kind) {
3539 case lk_default:
3540 default:
3541 KMP_ASSERT(0);
3542
3543 case lk_tas: {
3544 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3545 __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3546
3547 __kmp_get_user_lock_owner_ =
3548 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3549
3550 if (__kmp_env_consistency_check) {
3551 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3552 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3553 } else {
3554 KMP_BIND_USER_LOCK(tas);
3555 KMP_BIND_NESTED_USER_LOCK(tas);
3556 }
3557
3558 __kmp_destroy_user_lock_ =
3559 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3560
3561 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3562
3563 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3564
3565 __kmp_set_user_lock_location_ =
3566 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3567
3568 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3569
3570 __kmp_set_user_lock_flags_ =
3571 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3572 } break;
3573
3574 #if KMP_USE_FUTEX
3575
3576 case lk_futex: {
3577 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3578 __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3579
3580 __kmp_get_user_lock_owner_ =
3581 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3582
3583 if (__kmp_env_consistency_check) {
3584 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3585 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3586 } else {
3587 KMP_BIND_USER_LOCK(futex);
3588 KMP_BIND_NESTED_USER_LOCK(futex);
3589 }
3590
3591 __kmp_destroy_user_lock_ =
3592 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3593
3594 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3595
3596 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3597
3598 __kmp_set_user_lock_location_ =
3599 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3600
3601 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3602
3603 __kmp_set_user_lock_flags_ =
3604 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3605 } break;
3606
3607 #endif // KMP_USE_FUTEX
3608
3609 case lk_ticket: {
3610 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3611 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3612
3613 __kmp_get_user_lock_owner_ =
3614 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3615
3616 if (__kmp_env_consistency_check) {
3617 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3618 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3619 } else {
3620 KMP_BIND_USER_LOCK(ticket);
3621 KMP_BIND_NESTED_USER_LOCK(ticket);
3622 }
3623
3624 __kmp_destroy_user_lock_ =
3625 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3626
3627 __kmp_is_user_lock_initialized_ =
3628 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3629
3630 __kmp_get_user_lock_location_ =
3631 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3632
3633 __kmp_set_user_lock_location_ = (void (*)(
3634 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3635
3636 __kmp_get_user_lock_flags_ =
3637 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3638
3639 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3640 &__kmp_set_ticket_lock_flags);
3641 } break;
3642
3643 case lk_queuing: {
3644 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3645 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3646
3647 __kmp_get_user_lock_owner_ =
3648 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3649
3650 if (__kmp_env_consistency_check) {
3651 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3652 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3653 } else {
3654 KMP_BIND_USER_LOCK(queuing);
3655 KMP_BIND_NESTED_USER_LOCK(queuing);
3656 }
3657
3658 __kmp_destroy_user_lock_ =
3659 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3660
3661 __kmp_is_user_lock_initialized_ =
3662 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3663
3664 __kmp_get_user_lock_location_ =
3665 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3666
3667 __kmp_set_user_lock_location_ = (void (*)(
3668 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3669
3670 __kmp_get_user_lock_flags_ =
3671 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3672
3673 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3674 &__kmp_set_queuing_lock_flags);
3675 } break;
3676
3677 #if KMP_USE_ADAPTIVE_LOCKS
3678 case lk_adaptive: {
3679 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3680 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3681
3682 __kmp_get_user_lock_owner_ =
3683 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3684
3685 if (__kmp_env_consistency_check) {
3686 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3687 } else {
3688 KMP_BIND_USER_LOCK(adaptive);
3689 }
3690
3691 __kmp_destroy_user_lock_ =
3692 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3693
3694 __kmp_is_user_lock_initialized_ =
3695 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3696
3697 __kmp_get_user_lock_location_ =
3698 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3699
3700 __kmp_set_user_lock_location_ = (void (*)(
3701 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3702
3703 __kmp_get_user_lock_flags_ =
3704 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3705
3706 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3707 &__kmp_set_queuing_lock_flags);
3708
3709 } break;
3710 #endif // KMP_USE_ADAPTIVE_LOCKS
3711
3712 case lk_drdpa: {
3713 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3714 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3715
3716 __kmp_get_user_lock_owner_ =
3717 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3718
3719 if (__kmp_env_consistency_check) {
3720 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3721 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3722 } else {
3723 KMP_BIND_USER_LOCK(drdpa);
3724 KMP_BIND_NESTED_USER_LOCK(drdpa);
3725 }
3726
3727 __kmp_destroy_user_lock_ =
3728 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3729
3730 __kmp_is_user_lock_initialized_ =
3731 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3732
3733 __kmp_get_user_lock_location_ =
3734 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3735
3736 __kmp_set_user_lock_location_ = (void (*)(
3737 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3738
3739 __kmp_get_user_lock_flags_ =
3740 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3741
3742 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3743 &__kmp_set_drdpa_lock_flags);
3744 } break;
3745 }
3746 }
3747
3748 // ----------------------------------------------------------------------------
3749 // User lock table & lock allocation
3750
3751 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3752 kmp_user_lock_p __kmp_lock_pool = NULL;
3753
3754 // Lock block-allocation support.
3755 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3756 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3757
__kmp_lock_table_insert(kmp_user_lock_p lck)3758 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3759 // Assume that kmp_global_lock is held upon entry/exit.
3760 kmp_lock_index_t index;
3761 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3762 kmp_lock_index_t size;
3763 kmp_user_lock_p *table;
3764 // Reallocate lock table.
3765 if (__kmp_user_lock_table.allocated == 0) {
3766 size = 1024;
3767 } else {
3768 size = __kmp_user_lock_table.allocated * 2;
3769 }
3770 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3771 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3772 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3773 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3774 // We cannot free the previous table now, since it may be in use by other
3775 // threads. So save the pointer to the previous table in in the first
3776 // element of the new table. All the tables will be organized into a list,
3777 // and could be freed when library shutting down.
3778 __kmp_user_lock_table.table = table;
3779 __kmp_user_lock_table.allocated = size;
3780 }
3781 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3782 __kmp_user_lock_table.allocated);
3783 index = __kmp_user_lock_table.used;
3784 __kmp_user_lock_table.table[index] = lck;
3785 ++__kmp_user_lock_table.used;
3786 return index;
3787 }
3788
__kmp_lock_block_allocate()3789 static kmp_user_lock_p __kmp_lock_block_allocate() {
3790 // Assume that kmp_global_lock is held upon entry/exit.
3791 static int last_index = 0;
3792 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3793 // Restart the index.
3794 last_index = 0;
3795 // Need to allocate a new block.
3796 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3797 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3798 char *buffer =
3799 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3800 // Set up the new block.
3801 kmp_block_of_locks *new_block =
3802 (kmp_block_of_locks *)(&buffer[space_for_locks]);
3803 new_block->next_block = __kmp_lock_blocks;
3804 new_block->locks = (void *)buffer;
3805 // Publish the new block.
3806 KMP_MB();
3807 __kmp_lock_blocks = new_block;
3808 }
3809 kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3810 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3811 last_index++;
3812 return ret;
3813 }
3814
3815 // Get memory for a lock. It may be freshly allocated memory or reused memory
3816 // from lock pool.
__kmp_user_lock_allocate(void ** user_lock,kmp_int32 gtid,kmp_lock_flags_t flags)3817 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3818 kmp_lock_flags_t flags) {
3819 kmp_user_lock_p lck;
3820 kmp_lock_index_t index;
3821 KMP_DEBUG_ASSERT(user_lock);
3822
3823 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3824
3825 if (__kmp_lock_pool == NULL) {
3826 // Lock pool is empty. Allocate new memory.
3827
3828 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3829 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3830 } else {
3831 lck = __kmp_lock_block_allocate();
3832 }
3833
3834 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3835 // and debugger has info on all allocated locks.
3836 index = __kmp_lock_table_insert(lck);
3837 } else {
3838 // Pick up lock from pool.
3839 lck = __kmp_lock_pool;
3840 index = __kmp_lock_pool->pool.index;
3841 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3842 }
3843
3844 // We could potentially differentiate between nested and regular locks
3845 // here, and do the lock table lookup for regular locks only.
3846 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3847 *((kmp_lock_index_t *)user_lock) = index;
3848 } else {
3849 *((kmp_user_lock_p *)user_lock) = lck;
3850 }
3851
3852 // mark the lock if it is critical section lock.
3853 __kmp_set_user_lock_flags(lck, flags);
3854
3855 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3856
3857 return lck;
3858 }
3859
3860 // Put lock's memory to pool for reusing.
__kmp_user_lock_free(void ** user_lock,kmp_int32 gtid,kmp_user_lock_p lck)3861 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3862 kmp_user_lock_p lck) {
3863 KMP_DEBUG_ASSERT(user_lock != NULL);
3864 KMP_DEBUG_ASSERT(lck != NULL);
3865
3866 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3867
3868 lck->pool.next = __kmp_lock_pool;
3869 __kmp_lock_pool = lck;
3870 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3871 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3872 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3873 lck->pool.index = index;
3874 }
3875
3876 __kmp_release_lock(&__kmp_global_lock, gtid);
3877 }
3878
__kmp_lookup_user_lock(void ** user_lock,char const * func)3879 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3880 kmp_user_lock_p lck = NULL;
3881
3882 if (__kmp_env_consistency_check) {
3883 if (user_lock == NULL) {
3884 KMP_FATAL(LockIsUninitialized, func);
3885 }
3886 }
3887
3888 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3889 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3890 if (__kmp_env_consistency_check) {
3891 if (!(0 < index && index < __kmp_user_lock_table.used)) {
3892 KMP_FATAL(LockIsUninitialized, func);
3893 }
3894 }
3895 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3896 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3897 lck = __kmp_user_lock_table.table[index];
3898 } else {
3899 lck = *((kmp_user_lock_p *)user_lock);
3900 }
3901
3902 if (__kmp_env_consistency_check) {
3903 if (lck == NULL) {
3904 KMP_FATAL(LockIsUninitialized, func);
3905 }
3906 }
3907
3908 return lck;
3909 }
3910
__kmp_cleanup_user_locks(void)3911 void __kmp_cleanup_user_locks(void) {
3912 // Reset lock pool. Don't worry about lock in the pool--we will free them when
3913 // iterating through lock table (it includes all the locks, dead or alive).
3914 __kmp_lock_pool = NULL;
3915
3916 #define IS_CRITICAL(lck) \
3917 ((__kmp_get_user_lock_flags_ != NULL) && \
3918 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3919
3920 // Loop through lock table, free all locks.
3921 // Do not free item [0], it is reserved for lock tables list.
3922 //
3923 // FIXME - we are iterating through a list of (pointers to) objects of type
3924 // union kmp_user_lock, but we have no way of knowing whether the base type is
3925 // currently "pool" or whatever the global user lock type is.
3926 //
3927 // We are relying on the fact that for all of the user lock types
3928 // (except "tas"), the first field in the lock struct is the "initialized"
3929 // field, which is set to the address of the lock object itself when
3930 // the lock is initialized. When the union is of type "pool", the
3931 // first field is a pointer to the next object in the free list, which
3932 // will not be the same address as the object itself.
3933 //
3934 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3935 // for "pool" objects on the free list. This must happen as the "location"
3936 // field of real user locks overlaps the "index" field of "pool" objects.
3937 //
3938 // It would be better to run through the free list, and remove all "pool"
3939 // objects from the lock table before executing this loop. However,
3940 // "pool" objects do not always have their index field set (only on
3941 // lin_32e), and I don't want to search the lock table for the address
3942 // of every "pool" object on the free list.
3943 while (__kmp_user_lock_table.used > 1) {
3944 const ident *loc;
3945
3946 // reduce __kmp_user_lock_table.used before freeing the lock,
3947 // so that state of locks is consistent
3948 kmp_user_lock_p lck =
3949 __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3950
3951 if ((__kmp_is_user_lock_initialized_ != NULL) &&
3952 (*__kmp_is_user_lock_initialized_)(lck)) {
3953 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3954 // it is NOT a critical section (user is not responsible for destroying
3955 // criticals) AND we know source location to report.
3956 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3957 ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3958 (loc->psource != NULL)) {
3959 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3960 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3961 __kmp_str_loc_free(&str_loc);
3962 }
3963
3964 #ifdef KMP_DEBUG
3965 if (IS_CRITICAL(lck)) {
3966 KA_TRACE(
3967 20,
3968 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3969 lck, *(void **)lck));
3970 } else {
3971 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3972 *(void **)lck));
3973 }
3974 #endif // KMP_DEBUG
3975
3976 // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3977 __kmp_destroy_user_lock(lck);
3978 }
3979
3980 // Free the lock if block allocation of locks is not used.
3981 if (__kmp_lock_blocks == NULL) {
3982 __kmp_free(lck);
3983 }
3984 }
3985
3986 #undef IS_CRITICAL
3987
3988 // delete lock table(s).
3989 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3990 __kmp_user_lock_table.table = NULL;
3991 __kmp_user_lock_table.allocated = 0;
3992
3993 while (table_ptr != NULL) {
3994 // In the first element we saved the pointer to the previous
3995 // (smaller) lock table.
3996 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3997 __kmp_free(table_ptr);
3998 table_ptr = next;
3999 }
4000
4001 // Free buffers allocated for blocks of locks.
4002 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4003 __kmp_lock_blocks = NULL;
4004
4005 while (block_ptr != NULL) {
4006 kmp_block_of_locks_t *next = block_ptr->next_block;
4007 __kmp_free(block_ptr->locks);
4008 // *block_ptr itself was allocated at the end of the locks vector.
4009 block_ptr = next;
4010 }
4011
4012 TCW_4(__kmp_init_user_locks, FALSE);
4013 }
4014
4015 #endif // KMP_USE_DYNAMIC_LOCK
4016