1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define __KMP_IMP
15 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp.h"
17 #include "kmp_error.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-specific.h"
25 #endif
26 
27 #define MAX_MESSAGE 512
28 
29 // flags will be used in future, e.g. to implement openmp_strict library
30 // restrictions
31 
32 /*!
33  * @ingroup STARTUP_SHUTDOWN
34  * @param loc   in   source location information
35  * @param flags in   for future use (currently ignored)
36  *
37  * Initialize the runtime library. This call is optional; if it is not made then
38  * it will be implicitly called by attempts to use other library functions.
39  */
__kmpc_begin(ident_t * loc,kmp_int32 flags)40 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41   // By default __kmpc_begin() is no-op.
42   char *env;
43   if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44       __kmp_str_match_true(env)) {
45     __kmp_middle_initialize();
46     KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47   } else if (__kmp_ignore_mppbeg() == FALSE) {
48     // By default __kmp_ignore_mppbeg() returns TRUE.
49     __kmp_internal_begin();
50     KC_TRACE(10, ("__kmpc_begin: called\n"));
51   }
52 }
53 
54 /*!
55  * @ingroup STARTUP_SHUTDOWN
56  * @param loc source location information
57  *
58  * Shutdown the runtime library. This is also optional, and even if called will
59  * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
60  * zero.
61  */
__kmpc_end(ident_t * loc)62 void __kmpc_end(ident_t *loc) {
63   // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64   // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65   // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66   // returns FALSE and __kmpc_end() will unregister this root (it can cause
67   // library shut down).
68   if (__kmp_ignore_mppend() == FALSE) {
69     KC_TRACE(10, ("__kmpc_end: called\n"));
70     KA_TRACE(30, ("__kmpc_end\n"));
71 
72     __kmp_internal_end_thread(-1);
73   }
74 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75   // Normal exit process on Windows does not allow worker threads of the final
76   // parallel region to finish reporting their events, so shutting down the
77   // library here fixes the issue at least for the cases where __kmpc_end() is
78   // placed properly.
79   if (ompt_enabled.enabled)
80     __kmp_internal_end_library(__kmp_gtid_get_specific());
81 #endif
82 }
83 
84 /*!
85 @ingroup THREAD_STATES
86 @param loc Source location information.
87 @return The global thread index of the active thread.
88 
89 This function can be called in any context.
90 
91 If the runtime has ony been entered at the outermost level from a
92 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
93 that which would be returned by omp_get_thread_num() in the outermost
94 active parallel construct. (Or zero if there is no active parallel
95 construct, since the master thread is necessarily thread zero).
96 
97 If multiple non-OpenMP threads all enter an OpenMP construct then this
98 will be a unique thread identifier among all the threads created by
99 the OpenMP runtime (but the value cannote be defined in terms of
100 OpenMP thread ids returned by omp_get_thread_num()).
101 */
__kmpc_global_thread_num(ident_t * loc)102 kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
103   kmp_int32 gtid = __kmp_entry_gtid();
104 
105   KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
106 
107   return gtid;
108 }
109 
110 /*!
111 @ingroup THREAD_STATES
112 @param loc Source location information.
113 @return The number of threads under control of the OpenMP<sup>*</sup> runtime
114 
115 This function can be called in any context.
116 It returns the total number of threads under the control of the OpenMP runtime.
117 That is not a number that can be determined by any OpenMP standard calls, since
118 the library may be called from more than one non-OpenMP thread, and this
119 reflects the total over all such calls. Similarly the runtime maintains
120 underlying threads even when they are not active (since the cost of creating
121 and destroying OS threads is high), this call counts all such threads even if
122 they are not waiting for work.
123 */
__kmpc_global_num_threads(ident_t * loc)124 kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
125   KC_TRACE(10,
126            ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 
128   return TCR_4(__kmp_all_nth);
129 }
130 
131 /*!
132 @ingroup THREAD_STATES
133 @param loc Source location information.
134 @return The thread number of the calling thread in the innermost active parallel
135 construct.
136 */
__kmpc_bound_thread_num(ident_t * loc)137 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
138   KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139   return __kmp_tid_from_gtid(__kmp_entry_gtid());
140 }
141 
142 /*!
143 @ingroup THREAD_STATES
144 @param loc Source location information.
145 @return The number of threads in the innermost active parallel construct.
146 */
__kmpc_bound_num_threads(ident_t * loc)147 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
148   KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
149 
150   return __kmp_entry_thread()->th.th_team->t.t_nproc;
151 }
152 
153 /*!
154  * @ingroup DEPRECATED
155  * @param loc location description
156  *
157  * This function need not be called. It always returns TRUE.
158  */
__kmpc_ok_to_fork(ident_t * loc)159 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
160 #ifndef KMP_DEBUG
161 
162   return TRUE;
163 
164 #else
165 
166   const char *semi2;
167   const char *semi3;
168   int line_no;
169 
170   if (__kmp_par_range == 0) {
171     return TRUE;
172   }
173   semi2 = loc->psource;
174   if (semi2 == NULL) {
175     return TRUE;
176   }
177   semi2 = strchr(semi2, ';');
178   if (semi2 == NULL) {
179     return TRUE;
180   }
181   semi2 = strchr(semi2 + 1, ';');
182   if (semi2 == NULL) {
183     return TRUE;
184   }
185   if (__kmp_par_range_filename[0]) {
186     const char *name = semi2 - 1;
187     while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188       name--;
189     }
190     if ((*name == '/') || (*name == ';')) {
191       name++;
192     }
193     if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194       return __kmp_par_range < 0;
195     }
196   }
197   semi3 = strchr(semi2 + 1, ';');
198   if (__kmp_par_range_routine[0]) {
199     if ((semi3 != NULL) && (semi3 > semi2) &&
200         (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201       return __kmp_par_range < 0;
202     }
203   }
204   if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205     if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206       return __kmp_par_range > 0;
207     }
208     return __kmp_par_range < 0;
209   }
210   return TRUE;
211 
212 #endif /* KMP_DEBUG */
213 }
214 
215 /*!
216 @ingroup THREAD_STATES
217 @param loc Source location information.
218 @return 1 if this thread is executing inside an active parallel region, zero if
219 not.
220 */
__kmpc_in_parallel(ident_t * loc)221 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222   return __kmp_entry_thread()->th.th_root->r.r_active;
223 }
224 
225 /*!
226 @ingroup PARALLEL
227 @param loc source location information
228 @param global_tid global thread number
229 @param num_threads number of threads requested for this parallel construct
230 
231 Set the number of threads to be used by the next fork spawned by this thread.
232 This call is only required if the parallel construct has a `num_threads` clause.
233 */
__kmpc_push_num_threads(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_threads)234 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235                              kmp_int32 num_threads) {
236   KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237                 global_tid, num_threads));
238 
239   __kmp_push_num_threads(loc, global_tid, num_threads);
240 }
241 
__kmpc_pop_num_threads(ident_t * loc,kmp_int32 global_tid)242 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243   KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
244 
245   /* the num_threads are automatically popped */
246 }
247 
248 #if OMP_40_ENABLED
249 
__kmpc_push_proc_bind(ident_t * loc,kmp_int32 global_tid,kmp_int32 proc_bind)250 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251                            kmp_int32 proc_bind) {
252   KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253                 proc_bind));
254 
255   __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
256 }
257 
258 #endif /* OMP_40_ENABLED */
259 
260 /*!
261 @ingroup PARALLEL
262 @param loc  source location information
263 @param argc  total number of arguments in the ellipsis
264 @param microtask  pointer to callback routine consisting of outlined parallel
265 construct
266 @param ...  pointers to shared variables that aren't global
267 
268 Do the actual fork and call the microtask in the relevant number of threads.
269 */
__kmpc_fork_call(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)270 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271   int gtid = __kmp_entry_gtid();
272 
273 #if (KMP_STATS_ENABLED)
274   // If we were in a serial region, then stop the serial timer, record
275   // the event, and start parallel region timer
276   stats_state_e previous_state = KMP_GET_THREAD_STATE();
277   if (previous_state == stats_state_e::SERIAL_REGION) {
278     KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279   } else {
280     KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281   }
282   int inParallel = __kmpc_in_parallel(loc);
283   if (inParallel) {
284     KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285   } else {
286     KMP_COUNT_BLOCK(OMP_PARALLEL);
287   }
288 #endif
289 
290   // maybe to save thr_state is enough here
291   {
292     va_list ap;
293     va_start(ap, microtask);
294 
295 #if OMPT_SUPPORT
296     ompt_frame_t *ompt_frame;
297     if (ompt_enabled.enabled) {
298       kmp_info_t *master_th = __kmp_threads[gtid];
299       kmp_team_t *parent_team = master_th->th.th_team;
300       ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301       if (lwt)
302         ompt_frame = &(lwt->ompt_task_info.frame);
303       else {
304         int tid = __kmp_tid_from_gtid(gtid);
305         ompt_frame = &(
306             parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307       }
308       ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
309       OMPT_STORE_RETURN_ADDRESS(gtid);
310     }
311 #endif
312 
313 #if INCLUDE_SSC_MARKS
314     SSC_MARK_FORKING();
315 #endif
316     __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317                     VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318                     VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
320 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321                     &ap
322 #else
323                     ap
324 #endif
325                     );
326 #if INCLUDE_SSC_MARKS
327     SSC_MARK_JOINING();
328 #endif
329     __kmp_join_call(loc, gtid
330 #if OMPT_SUPPORT
331                     ,
332                     fork_context_intel
333 #endif
334                     );
335 
336     va_end(ap);
337   }
338 
339 #if KMP_STATS_ENABLED
340   if (previous_state == stats_state_e::SERIAL_REGION) {
341     KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342   } else {
343     KMP_POP_PARTITIONED_TIMER();
344   }
345 #endif // KMP_STATS_ENABLED
346 }
347 
348 #if OMP_40_ENABLED
349 /*!
350 @ingroup PARALLEL
351 @param loc source location information
352 @param global_tid global thread number
353 @param num_teams number of teams requested for the teams construct
354 @param num_threads number of threads per team requested for the teams construct
355 
356 Set the number of teams to be used by the teams construct.
357 This call is only required if the teams construct has a `num_teams` clause
358 or a `thread_limit` clause (or both).
359 */
__kmpc_push_num_teams(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_teams,kmp_int32 num_threads)360 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361                            kmp_int32 num_teams, kmp_int32 num_threads) {
362   KA_TRACE(20,
363            ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364             global_tid, num_teams, num_threads));
365 
366   __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
367 }
368 
369 /*!
370 @ingroup PARALLEL
371 @param loc  source location information
372 @param argc  total number of arguments in the ellipsis
373 @param microtask  pointer to callback routine consisting of outlined teams
374 construct
375 @param ...  pointers to shared variables that aren't global
376 
377 Do the actual fork and call the microtask in the relevant number of threads.
378 */
__kmpc_fork_teams(ident_t * loc,kmp_int32 argc,kmpc_micro microtask,...)379 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380                        ...) {
381   int gtid = __kmp_entry_gtid();
382   kmp_info_t *this_thr = __kmp_threads[gtid];
383   va_list ap;
384   va_start(ap, microtask);
385 
386   KMP_COUNT_BLOCK(OMP_TEAMS);
387 
388   // remember teams entry point and nesting level
389   this_thr->th.th_teams_microtask = microtask;
390   this_thr->th.th_teams_level =
391       this_thr->th.th_team->t.t_level; // AC: can be >0 on host
392 
393 #if OMPT_SUPPORT
394   kmp_team_t *parent_team = this_thr->th.th_team;
395   int tid = __kmp_tid_from_gtid(gtid);
396   if (ompt_enabled.enabled) {
397     parent_team->t.t_implicit_task_taskdata[tid]
398         .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
399   }
400   OMPT_STORE_RETURN_ADDRESS(gtid);
401 #endif
402 
403   // check if __kmpc_push_num_teams called, set default number of teams
404   // otherwise
405   if (this_thr->th.th_teams_size.nteams == 0) {
406     __kmp_push_num_teams(loc, gtid, 0, 0);
407   }
408   KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409   KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410   KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 
412   __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413                   VOLATILE_CAST(microtask_t)
414                       __kmp_teams_master, // "wrapped" task
415                   VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
416 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
417                   &ap
418 #else
419                   ap
420 #endif
421                   );
422   __kmp_join_call(loc, gtid
423 #if OMPT_SUPPORT
424                   ,
425                   fork_context_intel
426 #endif
427                   );
428 
429   this_thr->th.th_teams_microtask = NULL;
430   this_thr->th.th_teams_level = 0;
431   *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432   va_end(ap);
433 }
434 #endif /* OMP_40_ENABLED */
435 
436 // I don't think this function should ever have been exported.
437 // The __kmpc_ prefix was misapplied.  I'm fairly certain that no generated
438 // openmp code ever called it, but it's been exported from the RTL for so
439 // long that I'm afraid to remove the definition.
__kmpc_invoke_task_func(int gtid)440 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
441 
442 /*!
443 @ingroup PARALLEL
444 @param loc  source location information
445 @param global_tid  global thread number
446 
447 Enter a serialized parallel construct. This interface is used to handle a
448 conditional parallel region, like this,
449 @code
450 #pragma omp parallel if (condition)
451 @endcode
452 when the condition is false.
453 */
__kmpc_serialized_parallel(ident_t * loc,kmp_int32 global_tid)454 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
455 // The implementation is now in kmp_runtime.cpp so that it can share static
456 // functions with kmp_fork_call since the tasks to be done are similar in
457 // each case.
458 #if OMPT_SUPPORT
459   OMPT_STORE_RETURN_ADDRESS(global_tid);
460 #endif
461   __kmp_serialized_parallel(loc, global_tid);
462 }
463 
464 /*!
465 @ingroup PARALLEL
466 @param loc  source location information
467 @param global_tid  global thread number
468 
469 Leave a serialized parallel construct.
470 */
__kmpc_end_serialized_parallel(ident_t * loc,kmp_int32 global_tid)471 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472   kmp_internal_control_t *top;
473   kmp_info_t *this_thr;
474   kmp_team_t *serial_team;
475 
476   KC_TRACE(10,
477            ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
478 
479   /* skip all this code for autopar serialized loops since it results in
480      unacceptable overhead */
481   if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482     return;
483 
484   // Not autopar code
485   if (!TCR_4(__kmp_init_parallel))
486     __kmp_parallel_initialize();
487 
488   this_thr = __kmp_threads[global_tid];
489   serial_team = this_thr->th.th_serial_team;
490 
491 #if OMP_45_ENABLED
492   kmp_task_team_t *task_team = this_thr->th.th_task_team;
493 
494   // we need to wait for the proxy tasks before finishing the thread
495   if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
496     __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
497 #endif
498 
499   KMP_MB();
500   KMP_DEBUG_ASSERT(serial_team);
501   KMP_ASSERT(serial_team->t.t_serialized);
502   KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
503   KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
504   KMP_DEBUG_ASSERT(serial_team->t.t_threads);
505   KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
506 
507 #if OMPT_SUPPORT
508   if (ompt_enabled.enabled &&
509       this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
510     OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
511     if (ompt_enabled.ompt_callback_implicit_task) {
512       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
513           ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
514           OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
515     }
516 
517     // reset clear the task id only after unlinking the task
518     ompt_data_t *parent_task_data;
519     __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
520 
521     if (ompt_enabled.ompt_callback_parallel_end) {
522       ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
523           &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
524           ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
525     }
526     __ompt_lw_taskteam_unlink(this_thr);
527     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
528   }
529 #endif
530 
531   /* If necessary, pop the internal control stack values and replace the team
532    * values */
533   top = serial_team->t.t_control_stack_top;
534   if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535     copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536     serial_team->t.t_control_stack_top = top->next;
537     __kmp_free(top);
538   }
539 
540   // if( serial_team -> t.t_serialized > 1 )
541   serial_team->t.t_level--;
542 
543   /* pop dispatch buffers stack */
544   KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
545   {
546     dispatch_private_info_t *disp_buffer =
547         serial_team->t.t_dispatch->th_disp_buffer;
548     serial_team->t.t_dispatch->th_disp_buffer =
549         serial_team->t.t_dispatch->th_disp_buffer->next;
550     __kmp_free(disp_buffer);
551   }
552 #if OMP_50_ENABLED
553   this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
554 #endif
555 
556   --serial_team->t.t_serialized;
557   if (serial_team->t.t_serialized == 0) {
558 
559 /* return to the parallel section */
560 
561 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
562     if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563       __kmp_clear_x87_fpu_status_word();
564       __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565       __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
566     }
567 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
568 
569     this_thr->th.th_team = serial_team->t.t_parent;
570     this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
571 
572     /* restore values cached in the thread */
573     this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /*  JPH */
574     this_thr->th.th_team_master =
575         serial_team->t.t_parent->t.t_threads[0]; /* JPH */
576     this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
577 
578     /* TODO the below shouldn't need to be adjusted for serialized teams */
579     this_thr->th.th_dispatch =
580         &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
581 
582     __kmp_pop_current_task_from_thread(this_thr);
583 
584     KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
585     this_thr->th.th_current_task->td_flags.executing = 1;
586 
587     if (__kmp_tasking_mode != tskm_immediate_exec) {
588       // Copy the task team from the new child / old parent team to the thread.
589       this_thr->th.th_task_team =
590           this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
591       KA_TRACE(20,
592                ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
593                 "team %p\n",
594                 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
595     }
596   } else {
597     if (__kmp_tasking_mode != tskm_immediate_exec) {
598       KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
599                     "depth of serial team %p to %d\n",
600                     global_tid, serial_team, serial_team->t.t_serialized));
601     }
602   }
603 
604   if (__kmp_env_consistency_check)
605     __kmp_pop_parallel(global_tid, NULL);
606 #if OMPT_SUPPORT
607   if (ompt_enabled.enabled)
608     this_thr->th.ompt_thread_info.state =
609         ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
610                                            : ompt_state_work_parallel);
611 #endif
612 }
613 
614 /*!
615 @ingroup SYNCHRONIZATION
616 @param loc  source location information.
617 
618 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
619 depending on the memory ordering convention obeyed by the compiler
620 even that may not be necessary).
621 */
__kmpc_flush(ident_t * loc)622 void __kmpc_flush(ident_t *loc) {
623   KC_TRACE(10, ("__kmpc_flush: called\n"));
624 
625   /* need explicit __mf() here since use volatile instead in library */
626   KMP_MB(); /* Flush all pending memory write invalidates.  */
627 
628 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
629 #if KMP_MIC
630 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
631 // We shouldn't need it, though, since the ABI rules require that
632 // * If the compiler generates NGO stores it also generates the fence
633 // * If users hand-code NGO stores they should insert the fence
634 // therefore no incomplete unordered stores should be visible.
635 #else
636   // C74404
637   // This is to address non-temporal store instructions (sfence needed).
638   // The clflush instruction is addressed either (mfence needed).
639   // Probably the non-temporal load monvtdqa instruction should also be
640   // addressed.
641   // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
642   if (!__kmp_cpuinfo.initialized) {
643     __kmp_query_cpuid(&__kmp_cpuinfo);
644   }
645   if (!__kmp_cpuinfo.sse2) {
646     // CPU cannot execute SSE2 instructions.
647   } else {
648 #if KMP_COMPILER_ICC
649     _mm_mfence();
650 #elif KMP_COMPILER_MSVC
651     MemoryBarrier();
652 #else
653     __sync_synchronize();
654 #endif // KMP_COMPILER_ICC
655   }
656 #endif // KMP_MIC
657 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
658 // Nothing to see here move along
659 #elif KMP_ARCH_PPC64
660 // Nothing needed here (we have a real MB above).
661 #if KMP_OS_CNK
662   // The flushing thread needs to yield here; this prevents a
663   // busy-waiting thread from saturating the pipeline. flush is
664   // often used in loops like this:
665   // while (!flag) {
666   //   #pragma omp flush(flag)
667   // }
668   // and adding the yield here is good for at least a 10x speedup
669   // when running >2 threads per core (on the NAS LU benchmark).
670   __kmp_yield(TRUE);
671 #endif
672 #else
673 #error Unknown or unsupported architecture
674 #endif
675 
676 #if OMPT_SUPPORT && OMPT_OPTIONAL
677   if (ompt_enabled.ompt_callback_flush) {
678     ompt_callbacks.ompt_callback(ompt_callback_flush)(
679         __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
680   }
681 #endif
682 }
683 
684 /* -------------------------------------------------------------------------- */
685 /*!
686 @ingroup SYNCHRONIZATION
687 @param loc source location information
688 @param global_tid thread id.
689 
690 Execute a barrier.
691 */
__kmpc_barrier(ident_t * loc,kmp_int32 global_tid)692 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
693   KMP_COUNT_BLOCK(OMP_BARRIER);
694   KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
695 
696   if (!TCR_4(__kmp_init_parallel))
697     __kmp_parallel_initialize();
698 
699   if (__kmp_env_consistency_check) {
700     if (loc == 0) {
701       KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
702     }
703 
704     __kmp_check_barrier(global_tid, ct_barrier, loc);
705   }
706 
707 #if OMPT_SUPPORT
708   ompt_frame_t *ompt_frame;
709   if (ompt_enabled.enabled) {
710     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
711     if (ompt_frame->enter_frame.ptr == NULL)
712       ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
713     OMPT_STORE_RETURN_ADDRESS(global_tid);
714   }
715 #endif
716   __kmp_threads[global_tid]->th.th_ident = loc;
717   // TODO: explicit barrier_wait_id:
718   //   this function is called when 'barrier' directive is present or
719   //   implicit barrier at the end of a worksharing construct.
720   // 1) better to add a per-thread barrier counter to a thread data structure
721   // 2) set to 0 when a new team is created
722   // 4) no sync is required
723 
724   __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
725 #if OMPT_SUPPORT && OMPT_OPTIONAL
726   if (ompt_enabled.enabled) {
727     ompt_frame->enter_frame = ompt_data_none;
728   }
729 #endif
730 }
731 
732 /* The BARRIER for a MASTER section is always explicit   */
733 /*!
734 @ingroup WORK_SHARING
735 @param loc  source location information.
736 @param global_tid  global thread number .
737 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
738 */
__kmpc_master(ident_t * loc,kmp_int32 global_tid)739 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
740   int status = 0;
741 
742   KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
743 
744   if (!TCR_4(__kmp_init_parallel))
745     __kmp_parallel_initialize();
746 
747   if (KMP_MASTER_GTID(global_tid)) {
748     KMP_COUNT_BLOCK(OMP_MASTER);
749     KMP_PUSH_PARTITIONED_TIMER(OMP_master);
750     status = 1;
751   }
752 
753 #if OMPT_SUPPORT && OMPT_OPTIONAL
754   if (status) {
755     if (ompt_enabled.ompt_callback_master) {
756       kmp_info_t *this_thr = __kmp_threads[global_tid];
757       kmp_team_t *team = this_thr->th.th_team;
758 
759       int tid = __kmp_tid_from_gtid(global_tid);
760       ompt_callbacks.ompt_callback(ompt_callback_master)(
761           ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762           &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763           OMPT_GET_RETURN_ADDRESS(0));
764     }
765   }
766 #endif
767 
768   if (__kmp_env_consistency_check) {
769 #if KMP_USE_DYNAMIC_LOCK
770     if (status)
771       __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
772     else
773       __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
774 #else
775     if (status)
776       __kmp_push_sync(global_tid, ct_master, loc, NULL);
777     else
778       __kmp_check_sync(global_tid, ct_master, loc, NULL);
779 #endif
780   }
781 
782   return status;
783 }
784 
785 /*!
786 @ingroup WORK_SHARING
787 @param loc  source location information.
788 @param global_tid  global thread number .
789 
790 Mark the end of a <tt>master</tt> region. This should only be called by the
791 thread that executes the <tt>master</tt> region.
792 */
__kmpc_end_master(ident_t * loc,kmp_int32 global_tid)793 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
794   KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
795 
796   KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
797   KMP_POP_PARTITIONED_TIMER();
798 
799 #if OMPT_SUPPORT && OMPT_OPTIONAL
800   kmp_info_t *this_thr = __kmp_threads[global_tid];
801   kmp_team_t *team = this_thr->th.th_team;
802   if (ompt_enabled.ompt_callback_master) {
803     int tid = __kmp_tid_from_gtid(global_tid);
804     ompt_callbacks.ompt_callback(ompt_callback_master)(
805         ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806         &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807         OMPT_GET_RETURN_ADDRESS(0));
808   }
809 #endif
810 
811   if (__kmp_env_consistency_check) {
812     if (global_tid < 0)
813       KMP_WARNING(ThreadIdentInvalid);
814 
815     if (KMP_MASTER_GTID(global_tid))
816       __kmp_pop_sync(global_tid, ct_master, loc);
817   }
818 }
819 
820 /*!
821 @ingroup WORK_SHARING
822 @param loc  source location information.
823 @param gtid  global thread number.
824 
825 Start execution of an <tt>ordered</tt> construct.
826 */
__kmpc_ordered(ident_t * loc,kmp_int32 gtid)827 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
828   int cid = 0;
829   kmp_info_t *th;
830   KMP_DEBUG_ASSERT(__kmp_init_serial);
831 
832   KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
833 
834   if (!TCR_4(__kmp_init_parallel))
835     __kmp_parallel_initialize();
836 
837 #if USE_ITT_BUILD
838   __kmp_itt_ordered_prep(gtid);
839 // TODO: ordered_wait_id
840 #endif /* USE_ITT_BUILD */
841 
842   th = __kmp_threads[gtid];
843 
844 #if OMPT_SUPPORT && OMPT_OPTIONAL
845   kmp_team_t *team;
846   ompt_wait_id_t lck;
847   void *codeptr_ra;
848   if (ompt_enabled.enabled) {
849     OMPT_STORE_RETURN_ADDRESS(gtid);
850     team = __kmp_team_from_gtid(gtid);
851     lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
852     /* OMPT state update */
853     th->th.ompt_thread_info.wait_id = lck;
854     th->th.ompt_thread_info.state = ompt_state_wait_ordered;
855 
856     /* OMPT event callback */
857     codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
858     if (ompt_enabled.ompt_callback_mutex_acquire) {
859       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
860           ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
861           codeptr_ra);
862     }
863   }
864 #endif
865 
866   if (th->th.th_dispatch->th_deo_fcn != 0)
867     (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
868   else
869     __kmp_parallel_deo(&gtid, &cid, loc);
870 
871 #if OMPT_SUPPORT && OMPT_OPTIONAL
872   if (ompt_enabled.enabled) {
873     /* OMPT state update */
874     th->th.ompt_thread_info.state = ompt_state_work_parallel;
875     th->th.ompt_thread_info.wait_id = 0;
876 
877     /* OMPT event callback */
878     if (ompt_enabled.ompt_callback_mutex_acquired) {
879       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
880           ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
881     }
882   }
883 #endif
884 
885 #if USE_ITT_BUILD
886   __kmp_itt_ordered_start(gtid);
887 #endif /* USE_ITT_BUILD */
888 }
889 
890 /*!
891 @ingroup WORK_SHARING
892 @param loc  source location information.
893 @param gtid  global thread number.
894 
895 End execution of an <tt>ordered</tt> construct.
896 */
__kmpc_end_ordered(ident_t * loc,kmp_int32 gtid)897 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
898   int cid = 0;
899   kmp_info_t *th;
900 
901   KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
902 
903 #if USE_ITT_BUILD
904   __kmp_itt_ordered_end(gtid);
905 // TODO: ordered_wait_id
906 #endif /* USE_ITT_BUILD */
907 
908   th = __kmp_threads[gtid];
909 
910   if (th->th.th_dispatch->th_dxo_fcn != 0)
911     (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
912   else
913     __kmp_parallel_dxo(&gtid, &cid, loc);
914 
915 #if OMPT_SUPPORT && OMPT_OPTIONAL
916   OMPT_STORE_RETURN_ADDRESS(gtid);
917   if (ompt_enabled.ompt_callback_mutex_released) {
918     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
919         ompt_mutex_ordered,
920         (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
921             ->t.t_ordered.dt.t_value,
922         OMPT_LOAD_RETURN_ADDRESS(gtid));
923   }
924 #endif
925 }
926 
927 #if KMP_USE_DYNAMIC_LOCK
928 
929 static __forceinline void
__kmp_init_indirect_csptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid,kmp_indirect_locktag_t tag)930 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
931                           kmp_int32 gtid, kmp_indirect_locktag_t tag) {
932   // Pointer to the allocated indirect lock is written to crit, while indexing
933   // is ignored.
934   void *idx;
935   kmp_indirect_lock_t **lck;
936   lck = (kmp_indirect_lock_t **)crit;
937   kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
938   KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
939   KMP_SET_I_LOCK_LOCATION(ilk, loc);
940   KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
941   KA_TRACE(20,
942            ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
943 #if USE_ITT_BUILD
944   __kmp_itt_critical_creating(ilk->lock, loc);
945 #endif
946   int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
947   if (status == 0) {
948 #if USE_ITT_BUILD
949     __kmp_itt_critical_destroyed(ilk->lock);
950 #endif
951     // We don't really need to destroy the unclaimed lock here since it will be
952     // cleaned up at program exit.
953     // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
954   }
955   KMP_DEBUG_ASSERT(*lck != NULL);
956 }
957 
958 // Fast-path acquire tas lock
959 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid)                                       \
960   {                                                                            \
961     kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock;                                \
962     kmp_int32 tas_free = KMP_LOCK_FREE(tas);                                   \
963     kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);                         \
964     if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free ||                          \
965         !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) {    \
966       kmp_uint32 spins;                                                        \
967       KMP_FSYNC_PREPARE(l);                                                    \
968       KMP_INIT_YIELD(spins);                                                   \
969       if (TCR_4(__kmp_nth) >                                                   \
970           (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {               \
971         KMP_YIELD(TRUE);                                                       \
972       } else {                                                                 \
973         KMP_YIELD_SPIN(spins);                                                 \
974       }                                                                        \
975       kmp_backoff_t backoff = __kmp_spin_backoff_params;                       \
976       while (                                                                  \
977           KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free ||                        \
978           !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) {  \
979         __kmp_spin_backoff(&backoff);                                          \
980         if (TCR_4(__kmp_nth) >                                                 \
981             (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
982           KMP_YIELD(TRUE);                                                     \
983         } else {                                                               \
984           KMP_YIELD_SPIN(spins);                                               \
985         }                                                                      \
986       }                                                                        \
987     }                                                                          \
988     KMP_FSYNC_ACQUIRED(l);                                                     \
989   }
990 
991 // Fast-path test tas lock
992 #define KMP_TEST_TAS_LOCK(lock, gtid, rc)                                      \
993   {                                                                            \
994     kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock;                                \
995     kmp_int32 tas_free = KMP_LOCK_FREE(tas);                                   \
996     kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);                         \
997     rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free &&                         \
998          __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy);      \
999   }
1000 
1001 // Fast-path release tas lock
1002 #define KMP_RELEASE_TAS_LOCK(lock, gtid)                                       \
1003   { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1004 
1005 #if KMP_USE_FUTEX
1006 
1007 #include <sys/syscall.h>
1008 #include <unistd.h>
1009 #ifndef FUTEX_WAIT
1010 #define FUTEX_WAIT 0
1011 #endif
1012 #ifndef FUTEX_WAKE
1013 #define FUTEX_WAKE 1
1014 #endif
1015 
1016 // Fast-path acquire futex lock
1017 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid)                                     \
1018   {                                                                            \
1019     kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                          \
1020     kmp_int32 gtid_code = (gtid + 1) << 1;                                     \
1021     KMP_MB();                                                                  \
1022     KMP_FSYNC_PREPARE(ftx);                                                    \
1023     kmp_int32 poll_val;                                                        \
1024     while ((poll_val = KMP_COMPARE_AND_STORE_RET32(                            \
1025                 &(ftx->lk.poll), KMP_LOCK_FREE(futex),                         \
1026                 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {   \
1027       kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;                           \
1028       if (!cond) {                                                             \
1029         if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val,            \
1030                                          poll_val |                            \
1031                                              KMP_LOCK_BUSY(1, futex))) {       \
1032           continue;                                                            \
1033         }                                                                      \
1034         poll_val |= KMP_LOCK_BUSY(1, futex);                                   \
1035       }                                                                        \
1036       kmp_int32 rc;                                                            \
1037       if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val,     \
1038                         NULL, NULL, 0)) != 0) {                                \
1039         continue;                                                              \
1040       }                                                                        \
1041       gtid_code |= 1;                                                          \
1042     }                                                                          \
1043     KMP_FSYNC_ACQUIRED(ftx);                                                   \
1044   }
1045 
1046 // Fast-path test futex lock
1047 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc)                                    \
1048   {                                                                            \
1049     kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                          \
1050     if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex),     \
1051                                     KMP_LOCK_BUSY(gtid + 1 << 1, futex))) {    \
1052       KMP_FSYNC_ACQUIRED(ftx);                                                 \
1053       rc = TRUE;                                                               \
1054     } else {                                                                   \
1055       rc = FALSE;                                                              \
1056     }                                                                          \
1057   }
1058 
1059 // Fast-path release futex lock
1060 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid)                                     \
1061   {                                                                            \
1062     kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock;                          \
1063     KMP_MB();                                                                  \
1064     KMP_FSYNC_RELEASING(ftx);                                                  \
1065     kmp_int32 poll_val =                                                       \
1066         KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex));               \
1067     if (KMP_LOCK_STRIP(poll_val) & 1) {                                        \
1068       syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE,                         \
1069               KMP_LOCK_BUSY(1, futex), NULL, NULL, 0);                         \
1070     }                                                                          \
1071     KMP_MB();                                                                  \
1072     KMP_YIELD(TCR_4(__kmp_nth) >                                               \
1073               (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));            \
1074   }
1075 
1076 #endif // KMP_USE_FUTEX
1077 
1078 #else // KMP_USE_DYNAMIC_LOCK
1079 
__kmp_get_critical_section_ptr(kmp_critical_name * crit,ident_t const * loc,kmp_int32 gtid)1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1081                                                       ident_t const *loc,
1082                                                       kmp_int32 gtid) {
1083   kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1084 
1085   // Because of the double-check, the following load doesn't need to be volatile
1086   kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1087 
1088   if (lck == NULL) {
1089     void *idx;
1090 
1091     // Allocate & initialize the lock.
1092     // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1093     lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1094     __kmp_init_user_lock_with_checks(lck);
1095     __kmp_set_user_lock_location(lck, loc);
1096 #if USE_ITT_BUILD
1097     __kmp_itt_critical_creating(lck);
1098 // __kmp_itt_critical_creating() should be called *before* the first usage
1099 // of underlying lock. It is the only place where we can guarantee it. There
1100 // are chances the lock will destroyed with no usage, but it is not a
1101 // problem, because this is not real event seen by user but rather setting
1102 // name for object (lock). See more details in kmp_itt.h.
1103 #endif /* USE_ITT_BUILD */
1104 
1105     // Use a cmpxchg instruction to slam the start of the critical section with
1106     // the lock pointer.  If another thread beat us to it, deallocate the lock,
1107     // and use the lock that the other thread allocated.
1108     int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1109 
1110     if (status == 0) {
1111 // Deallocate the lock and reload the value.
1112 #if USE_ITT_BUILD
1113       __kmp_itt_critical_destroyed(lck);
1114 // Let ITT know the lock is destroyed and the same memory location may be reused
1115 // for another purpose.
1116 #endif /* USE_ITT_BUILD */
1117       __kmp_destroy_user_lock_with_checks(lck);
1118       __kmp_user_lock_free(&idx, gtid, lck);
1119       lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120       KMP_DEBUG_ASSERT(lck != NULL);
1121     }
1122   }
1123   return lck;
1124 }
1125 
1126 #endif // KMP_USE_DYNAMIC_LOCK
1127 
1128 /*!
1129 @ingroup WORK_SHARING
1130 @param loc  source location information.
1131 @param global_tid  global thread number .
1132 @param crit identity of the critical section. This could be a pointer to a lock
1133 associated with the critical section, or some other suitably unique value.
1134 
1135 Enter code protected by a `critical` construct.
1136 This function blocks until the executing thread can enter the critical section.
1137 */
__kmpc_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1138 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1139                      kmp_critical_name *crit) {
1140 #if KMP_USE_DYNAMIC_LOCK
1141 #if OMPT_SUPPORT && OMPT_OPTIONAL
1142   OMPT_STORE_RETURN_ADDRESS(global_tid);
1143 #endif // OMPT_SUPPORT
1144   __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1145 #else
1146   KMP_COUNT_BLOCK(OMP_CRITICAL);
1147 #if OMPT_SUPPORT && OMPT_OPTIONAL
1148   ompt_state_t prev_state = ompt_state_undefined;
1149   ompt_thread_info_t ti;
1150 #endif
1151   kmp_user_lock_p lck;
1152 
1153   KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1154 
1155   // TODO: add THR_OVHD_STATE
1156 
1157   KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1158   KMP_CHECK_USER_LOCK_INIT();
1159 
1160   if ((__kmp_user_lock_kind == lk_tas) &&
1161       (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1162     lck = (kmp_user_lock_p)crit;
1163   }
1164 #if KMP_USE_FUTEX
1165   else if ((__kmp_user_lock_kind == lk_futex) &&
1166            (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1167     lck = (kmp_user_lock_p)crit;
1168   }
1169 #endif
1170   else { // ticket, queuing or drdpa
1171     lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1172   }
1173 
1174   if (__kmp_env_consistency_check)
1175     __kmp_push_sync(global_tid, ct_critical, loc, lck);
1176 
1177 // since the critical directive binds to all threads, not just the current
1178 // team we have to check this even if we are in a serialized team.
1179 // also, even if we are the uber thread, we still have to conduct the lock,
1180 // as we have to contend with sibling threads.
1181 
1182 #if USE_ITT_BUILD
1183   __kmp_itt_critical_acquiring(lck);
1184 #endif /* USE_ITT_BUILD */
1185 #if OMPT_SUPPORT && OMPT_OPTIONAL
1186   OMPT_STORE_RETURN_ADDRESS(gtid);
1187   void *codeptr_ra = NULL;
1188   if (ompt_enabled.enabled) {
1189     ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1190     /* OMPT state update */
1191     prev_state = ti.state;
1192     ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1193     ti.state = ompt_state_wait_critical;
1194 
1195     /* OMPT event callback */
1196     codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1197     if (ompt_enabled.ompt_callback_mutex_acquire) {
1198       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1199           ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1200           (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1201     }
1202   }
1203 #endif
1204   // Value of 'crit' should be good for using as a critical_id of the critical
1205   // section directive.
1206   __kmp_acquire_user_lock_with_checks(lck, global_tid);
1207 
1208 #if USE_ITT_BUILD
1209   __kmp_itt_critical_acquired(lck);
1210 #endif /* USE_ITT_BUILD */
1211 #if OMPT_SUPPORT && OMPT_OPTIONAL
1212   if (ompt_enabled.enabled) {
1213     /* OMPT state update */
1214     ti.state = prev_state;
1215     ti.wait_id = 0;
1216 
1217     /* OMPT event callback */
1218     if (ompt_enabled.ompt_callback_mutex_acquired) {
1219       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1220           ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1221     }
1222   }
1223 #endif
1224   KMP_POP_PARTITIONED_TIMER();
1225 
1226   KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1227   KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1228 #endif // KMP_USE_DYNAMIC_LOCK
1229 }
1230 
1231 #if KMP_USE_DYNAMIC_LOCK
1232 
1233 // Converts the given hint to an internal lock implementation
__kmp_map_hint_to_lock(uintptr_t hint)1234 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1235 #if KMP_USE_TSX
1236 #define KMP_TSX_LOCK(seq) lockseq_##seq
1237 #else
1238 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1239 #endif
1240 
1241 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1242 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1243 #else
1244 #define KMP_CPUINFO_RTM 0
1245 #endif
1246 
1247   // Hints that do not require further logic
1248   if (hint & kmp_lock_hint_hle)
1249     return KMP_TSX_LOCK(hle);
1250   if (hint & kmp_lock_hint_rtm)
1251     return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1252   if (hint & kmp_lock_hint_adaptive)
1253     return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1254 
1255   // Rule out conflicting hints first by returning the default lock
1256   if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1257     return __kmp_user_lock_seq;
1258   if ((hint & omp_lock_hint_speculative) &&
1259       (hint & omp_lock_hint_nonspeculative))
1260     return __kmp_user_lock_seq;
1261 
1262   // Do not even consider speculation when it appears to be contended
1263   if (hint & omp_lock_hint_contended)
1264     return lockseq_queuing;
1265 
1266   // Uncontended lock without speculation
1267   if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1268     return lockseq_tas;
1269 
1270   // HLE lock for speculation
1271   if (hint & omp_lock_hint_speculative)
1272     return KMP_TSX_LOCK(hle);
1273 
1274   return __kmp_user_lock_seq;
1275 }
1276 
1277 #if OMPT_SUPPORT && OMPT_OPTIONAL
1278 #if KMP_USE_DYNAMIC_LOCK
1279 static kmp_mutex_impl_t
__ompt_get_mutex_impl_type(void * user_lock,kmp_indirect_lock_t * ilock=0)1280 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1281   if (user_lock) {
1282     switch (KMP_EXTRACT_D_TAG(user_lock)) {
1283     case 0:
1284       break;
1285 #if KMP_USE_FUTEX
1286     case locktag_futex:
1287       return kmp_mutex_impl_queuing;
1288 #endif
1289     case locktag_tas:
1290       return kmp_mutex_impl_spin;
1291 #if KMP_USE_TSX
1292     case locktag_hle:
1293       return kmp_mutex_impl_speculative;
1294 #endif
1295     default:
1296       return kmp_mutex_impl_none;
1297     }
1298     ilock = KMP_LOOKUP_I_LOCK(user_lock);
1299   }
1300   KMP_ASSERT(ilock);
1301   switch (ilock->type) {
1302 #if KMP_USE_TSX
1303   case locktag_adaptive:
1304   case locktag_rtm:
1305     return kmp_mutex_impl_speculative;
1306 #endif
1307   case locktag_nested_tas:
1308     return kmp_mutex_impl_spin;
1309 #if KMP_USE_FUTEX
1310   case locktag_nested_futex:
1311 #endif
1312   case locktag_ticket:
1313   case locktag_queuing:
1314   case locktag_drdpa:
1315   case locktag_nested_ticket:
1316   case locktag_nested_queuing:
1317   case locktag_nested_drdpa:
1318     return kmp_mutex_impl_queuing;
1319   default:
1320     return kmp_mutex_impl_none;
1321   }
1322 }
1323 #else
1324 // For locks without dynamic binding
__ompt_get_mutex_impl_type()1325 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1326   switch (__kmp_user_lock_kind) {
1327   case lk_tas:
1328     return kmp_mutex_impl_spin;
1329 #if KMP_USE_FUTEX
1330   case lk_futex:
1331 #endif
1332   case lk_ticket:
1333   case lk_queuing:
1334   case lk_drdpa:
1335     return kmp_mutex_impl_queuing;
1336 #if KMP_USE_TSX
1337   case lk_hle:
1338   case lk_rtm:
1339   case lk_adaptive:
1340     return kmp_mutex_impl_speculative;
1341 #endif
1342   default:
1343     return kmp_mutex_impl_none;
1344   }
1345 }
1346 #endif // KMP_USE_DYNAMIC_LOCK
1347 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1348 
1349 /*!
1350 @ingroup WORK_SHARING
1351 @param loc  source location information.
1352 @param global_tid  global thread number.
1353 @param crit identity of the critical section. This could be a pointer to a lock
1354 associated with the critical section, or some other suitably unique value.
1355 @param hint the lock hint.
1356 
1357 Enter code protected by a `critical` construct with a hint. The hint value is
1358 used to suggest a lock implementation. This function blocks until the executing
1359 thread can enter the critical section unless the hint suggests use of
1360 speculative execution and the hardware supports it.
1361 */
__kmpc_critical_with_hint(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit,uint32_t hint)1362 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1363                                kmp_critical_name *crit, uint32_t hint) {
1364   KMP_COUNT_BLOCK(OMP_CRITICAL);
1365   kmp_user_lock_p lck;
1366 #if OMPT_SUPPORT && OMPT_OPTIONAL
1367   ompt_state_t prev_state = ompt_state_undefined;
1368   ompt_thread_info_t ti;
1369   // This is the case, if called from __kmpc_critical:
1370   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1371   if (!codeptr)
1372     codeptr = OMPT_GET_RETURN_ADDRESS(0);
1373 #endif
1374 
1375   KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1376 
1377   kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1378   // Check if it is initialized.
1379   KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1380   if (*lk == 0) {
1381     kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1382     if (KMP_IS_D_LOCK(lckseq)) {
1383       KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1384                                   KMP_GET_D_TAG(lckseq));
1385     } else {
1386       __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1387     }
1388   }
1389   // Branch for accessing the actual lock object and set operation. This
1390   // branching is inevitable since this lock initialization does not follow the
1391   // normal dispatch path (lock table is not used).
1392   if (KMP_EXTRACT_D_TAG(lk) != 0) {
1393     lck = (kmp_user_lock_p)lk;
1394     if (__kmp_env_consistency_check) {
1395       __kmp_push_sync(global_tid, ct_critical, loc, lck,
1396                       __kmp_map_hint_to_lock(hint));
1397     }
1398 #if USE_ITT_BUILD
1399     __kmp_itt_critical_acquiring(lck);
1400 #endif
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL
1402     if (ompt_enabled.enabled) {
1403       ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1404       /* OMPT state update */
1405       prev_state = ti.state;
1406       ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1407       ti.state = ompt_state_wait_critical;
1408 
1409       /* OMPT event callback */
1410       if (ompt_enabled.ompt_callback_mutex_acquire) {
1411         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1412             ompt_mutex_critical, (unsigned int)hint,
1413             __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1414             codeptr);
1415       }
1416     }
1417 #endif
1418 #if KMP_USE_INLINED_TAS
1419     if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1420       KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1421     } else
1422 #elif KMP_USE_INLINED_FUTEX
1423     if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1424       KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1425     } else
1426 #endif
1427     {
1428       KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1429     }
1430   } else {
1431     kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1432     lck = ilk->lock;
1433     if (__kmp_env_consistency_check) {
1434       __kmp_push_sync(global_tid, ct_critical, loc, lck,
1435                       __kmp_map_hint_to_lock(hint));
1436     }
1437 #if USE_ITT_BUILD
1438     __kmp_itt_critical_acquiring(lck);
1439 #endif
1440 #if OMPT_SUPPORT && OMPT_OPTIONAL
1441     if (ompt_enabled.enabled) {
1442       ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1443       /* OMPT state update */
1444       prev_state = ti.state;
1445       ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1446       ti.state = ompt_state_wait_critical;
1447 
1448       /* OMPT event callback */
1449       if (ompt_enabled.ompt_callback_mutex_acquire) {
1450         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1451             ompt_mutex_critical, (unsigned int)hint,
1452             __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1453             codeptr);
1454       }
1455     }
1456 #endif
1457     KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1458   }
1459   KMP_POP_PARTITIONED_TIMER();
1460 
1461 #if USE_ITT_BUILD
1462   __kmp_itt_critical_acquired(lck);
1463 #endif /* USE_ITT_BUILD */
1464 #if OMPT_SUPPORT && OMPT_OPTIONAL
1465   if (ompt_enabled.enabled) {
1466     /* OMPT state update */
1467     ti.state = prev_state;
1468     ti.wait_id = 0;
1469 
1470     /* OMPT event callback */
1471     if (ompt_enabled.ompt_callback_mutex_acquired) {
1472       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1473           ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1474     }
1475   }
1476 #endif
1477 
1478   KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1479   KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1480 } // __kmpc_critical_with_hint
1481 
1482 #endif // KMP_USE_DYNAMIC_LOCK
1483 
1484 /*!
1485 @ingroup WORK_SHARING
1486 @param loc  source location information.
1487 @param global_tid  global thread number .
1488 @param crit identity of the critical section. This could be a pointer to a lock
1489 associated with the critical section, or some other suitably unique value.
1490 
1491 Leave a critical section, releasing any lock that was held during its execution.
1492 */
__kmpc_end_critical(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)1493 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1494                          kmp_critical_name *crit) {
1495   kmp_user_lock_p lck;
1496 
1497   KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1498 
1499 #if KMP_USE_DYNAMIC_LOCK
1500   if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1501     lck = (kmp_user_lock_p)crit;
1502     KMP_ASSERT(lck != NULL);
1503     if (__kmp_env_consistency_check) {
1504       __kmp_pop_sync(global_tid, ct_critical, loc);
1505     }
1506 #if USE_ITT_BUILD
1507     __kmp_itt_critical_releasing(lck);
1508 #endif
1509 #if KMP_USE_INLINED_TAS
1510     if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1511       KMP_RELEASE_TAS_LOCK(lck, global_tid);
1512     } else
1513 #elif KMP_USE_INLINED_FUTEX
1514     if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1515       KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1516     } else
1517 #endif
1518     {
1519       KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1520     }
1521   } else {
1522     kmp_indirect_lock_t *ilk =
1523         (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1524     KMP_ASSERT(ilk != NULL);
1525     lck = ilk->lock;
1526     if (__kmp_env_consistency_check) {
1527       __kmp_pop_sync(global_tid, ct_critical, loc);
1528     }
1529 #if USE_ITT_BUILD
1530     __kmp_itt_critical_releasing(lck);
1531 #endif
1532     KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1533   }
1534 
1535 #else // KMP_USE_DYNAMIC_LOCK
1536 
1537   if ((__kmp_user_lock_kind == lk_tas) &&
1538       (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1539     lck = (kmp_user_lock_p)crit;
1540   }
1541 #if KMP_USE_FUTEX
1542   else if ((__kmp_user_lock_kind == lk_futex) &&
1543            (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1544     lck = (kmp_user_lock_p)crit;
1545   }
1546 #endif
1547   else { // ticket, queuing or drdpa
1548     lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1549   }
1550 
1551   KMP_ASSERT(lck != NULL);
1552 
1553   if (__kmp_env_consistency_check)
1554     __kmp_pop_sync(global_tid, ct_critical, loc);
1555 
1556 #if USE_ITT_BUILD
1557   __kmp_itt_critical_releasing(lck);
1558 #endif /* USE_ITT_BUILD */
1559   // Value of 'crit' should be good for using as a critical_id of the critical
1560   // section directive.
1561   __kmp_release_user_lock_with_checks(lck, global_tid);
1562 
1563 #endif // KMP_USE_DYNAMIC_LOCK
1564 
1565 #if OMPT_SUPPORT && OMPT_OPTIONAL
1566   /* OMPT release event triggers after lock is released; place here to trigger
1567    * for all #if branches */
1568   OMPT_STORE_RETURN_ADDRESS(global_tid);
1569   if (ompt_enabled.ompt_callback_mutex_released) {
1570     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1571         ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1572         OMPT_LOAD_RETURN_ADDRESS(0));
1573   }
1574 #endif
1575 
1576   KMP_POP_PARTITIONED_TIMER();
1577   KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1578 }
1579 
1580 /*!
1581 @ingroup SYNCHRONIZATION
1582 @param loc source location information
1583 @param global_tid thread id.
1584 @return one if the thread should execute the master block, zero otherwise
1585 
1586 Start execution of a combined barrier and master. The barrier is executed inside
1587 this function.
1588 */
__kmpc_barrier_master(ident_t * loc,kmp_int32 global_tid)1589 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1590   int status;
1591 
1592   KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1593 
1594   if (!TCR_4(__kmp_init_parallel))
1595     __kmp_parallel_initialize();
1596 
1597   if (__kmp_env_consistency_check)
1598     __kmp_check_barrier(global_tid, ct_barrier, loc);
1599 
1600 #if OMPT_SUPPORT
1601   ompt_frame_t *ompt_frame;
1602   if (ompt_enabled.enabled) {
1603     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1604     if (ompt_frame->enter_frame.ptr == NULL)
1605       ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1606     OMPT_STORE_RETURN_ADDRESS(global_tid);
1607   }
1608 #endif
1609 #if USE_ITT_NOTIFY
1610   __kmp_threads[global_tid]->th.th_ident = loc;
1611 #endif
1612   status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1613 #if OMPT_SUPPORT && OMPT_OPTIONAL
1614   if (ompt_enabled.enabled) {
1615     ompt_frame->enter_frame = ompt_data_none;
1616   }
1617 #endif
1618 
1619   return (status != 0) ? 0 : 1;
1620 }
1621 
1622 /*!
1623 @ingroup SYNCHRONIZATION
1624 @param loc source location information
1625 @param global_tid thread id.
1626 
1627 Complete the execution of a combined barrier and master. This function should
1628 only be called at the completion of the <tt>master</tt> code. Other threads will
1629 still be waiting at the barrier and this call releases them.
1630 */
__kmpc_end_barrier_master(ident_t * loc,kmp_int32 global_tid)1631 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1632   KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1633 
1634   __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1635 }
1636 
1637 /*!
1638 @ingroup SYNCHRONIZATION
1639 @param loc source location information
1640 @param global_tid thread id.
1641 @return one if the thread should execute the master block, zero otherwise
1642 
1643 Start execution of a combined barrier and master(nowait) construct.
1644 The barrier is executed inside this function.
1645 There is no equivalent "end" function, since the
1646 */
__kmpc_barrier_master_nowait(ident_t * loc,kmp_int32 global_tid)1647 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1648   kmp_int32 ret;
1649 
1650   KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1651 
1652   if (!TCR_4(__kmp_init_parallel))
1653     __kmp_parallel_initialize();
1654 
1655   if (__kmp_env_consistency_check) {
1656     if (loc == 0) {
1657       KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1658     }
1659     __kmp_check_barrier(global_tid, ct_barrier, loc);
1660   }
1661 
1662 #if OMPT_SUPPORT
1663   ompt_frame_t *ompt_frame;
1664   if (ompt_enabled.enabled) {
1665     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1666     if (ompt_frame->enter_frame.ptr == NULL)
1667       ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1668     OMPT_STORE_RETURN_ADDRESS(global_tid);
1669   }
1670 #endif
1671 #if USE_ITT_NOTIFY
1672   __kmp_threads[global_tid]->th.th_ident = loc;
1673 #endif
1674   __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1675 #if OMPT_SUPPORT && OMPT_OPTIONAL
1676   if (ompt_enabled.enabled) {
1677     ompt_frame->enter_frame = ompt_data_none;
1678   }
1679 #endif
1680 
1681   ret = __kmpc_master(loc, global_tid);
1682 
1683   if (__kmp_env_consistency_check) {
1684     /*  there's no __kmpc_end_master called; so the (stats) */
1685     /*  actions of __kmpc_end_master are done here          */
1686 
1687     if (global_tid < 0) {
1688       KMP_WARNING(ThreadIdentInvalid);
1689     }
1690     if (ret) {
1691       /* only one thread should do the pop since only */
1692       /* one did the push (see __kmpc_master())       */
1693 
1694       __kmp_pop_sync(global_tid, ct_master, loc);
1695     }
1696   }
1697 
1698   return (ret);
1699 }
1700 
1701 /* The BARRIER for a SINGLE process section is always explicit   */
1702 /*!
1703 @ingroup WORK_SHARING
1704 @param loc  source location information
1705 @param global_tid  global thread number
1706 @return One if this thread should execute the single construct, zero otherwise.
1707 
1708 Test whether to execute a <tt>single</tt> construct.
1709 There are no implicit barriers in the two "single" calls, rather the compiler
1710 should introduce an explicit barrier if it is required.
1711 */
1712 
__kmpc_single(ident_t * loc,kmp_int32 global_tid)1713 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1714   kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1715 
1716   if (rc) {
1717     // We are going to execute the single statement, so we should count it.
1718     KMP_COUNT_BLOCK(OMP_SINGLE);
1719     KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1720   }
1721 
1722 #if OMPT_SUPPORT && OMPT_OPTIONAL
1723   kmp_info_t *this_thr = __kmp_threads[global_tid];
1724   kmp_team_t *team = this_thr->th.th_team;
1725   int tid = __kmp_tid_from_gtid(global_tid);
1726 
1727   if (ompt_enabled.enabled) {
1728     if (rc) {
1729       if (ompt_enabled.ompt_callback_work) {
1730         ompt_callbacks.ompt_callback(ompt_callback_work)(
1731             ompt_work_single_executor, ompt_scope_begin,
1732             &(team->t.ompt_team_info.parallel_data),
1733             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1734             1, OMPT_GET_RETURN_ADDRESS(0));
1735       }
1736     } else {
1737       if (ompt_enabled.ompt_callback_work) {
1738         ompt_callbacks.ompt_callback(ompt_callback_work)(
1739             ompt_work_single_other, ompt_scope_begin,
1740             &(team->t.ompt_team_info.parallel_data),
1741             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1742             1, OMPT_GET_RETURN_ADDRESS(0));
1743         ompt_callbacks.ompt_callback(ompt_callback_work)(
1744             ompt_work_single_other, ompt_scope_end,
1745             &(team->t.ompt_team_info.parallel_data),
1746             &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1747             1, OMPT_GET_RETURN_ADDRESS(0));
1748       }
1749     }
1750   }
1751 #endif
1752 
1753   return rc;
1754 }
1755 
1756 /*!
1757 @ingroup WORK_SHARING
1758 @param loc  source location information
1759 @param global_tid  global thread number
1760 
1761 Mark the end of a <tt>single</tt> construct.  This function should
1762 only be called by the thread that executed the block of code protected
1763 by the `single` construct.
1764 */
__kmpc_end_single(ident_t * loc,kmp_int32 global_tid)1765 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1766   __kmp_exit_single(global_tid);
1767   KMP_POP_PARTITIONED_TIMER();
1768 
1769 #if OMPT_SUPPORT && OMPT_OPTIONAL
1770   kmp_info_t *this_thr = __kmp_threads[global_tid];
1771   kmp_team_t *team = this_thr->th.th_team;
1772   int tid = __kmp_tid_from_gtid(global_tid);
1773 
1774   if (ompt_enabled.ompt_callback_work) {
1775     ompt_callbacks.ompt_callback(ompt_callback_work)(
1776         ompt_work_single_executor, ompt_scope_end,
1777         &(team->t.ompt_team_info.parallel_data),
1778         &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1779         OMPT_GET_RETURN_ADDRESS(0));
1780   }
1781 #endif
1782 }
1783 
1784 /*!
1785 @ingroup WORK_SHARING
1786 @param loc Source location
1787 @param global_tid Global thread id
1788 
1789 Mark the end of a statically scheduled loop.
1790 */
__kmpc_for_static_fini(ident_t * loc,kmp_int32 global_tid)1791 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1792   KMP_POP_PARTITIONED_TIMER();
1793   KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1794 
1795 #if OMPT_SUPPORT && OMPT_OPTIONAL
1796   if (ompt_enabled.ompt_callback_work) {
1797     ompt_work_t ompt_work_type = ompt_work_loop;
1798     ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1799     ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1800     // Determine workshare type
1801     if (loc != NULL) {
1802       if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1803         ompt_work_type = ompt_work_loop;
1804       } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1805         ompt_work_type = ompt_work_sections;
1806       } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1807         ompt_work_type = ompt_work_distribute;
1808       } else {
1809         // use default set above.
1810         // a warning about this case is provided in __kmpc_for_static_init
1811       }
1812       KMP_DEBUG_ASSERT(ompt_work_type);
1813     }
1814     ompt_callbacks.ompt_callback(ompt_callback_work)(
1815         ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1816         &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1817   }
1818 #endif
1819   if (__kmp_env_consistency_check)
1820     __kmp_pop_workshare(global_tid, ct_pdo, loc);
1821 }
1822 
1823 // User routines which take C-style arguments (call by value)
1824 // different from the Fortran equivalent routines
1825 
ompc_set_num_threads(int arg)1826 void ompc_set_num_threads(int arg) {
1827   // !!!!! TODO: check the per-task binding
1828   __kmp_set_num_threads(arg, __kmp_entry_gtid());
1829 }
1830 
ompc_set_dynamic(int flag)1831 void ompc_set_dynamic(int flag) {
1832   kmp_info_t *thread;
1833 
1834   /* For the thread-private implementation of the internal controls */
1835   thread = __kmp_entry_thread();
1836 
1837   __kmp_save_internal_controls(thread);
1838 
1839   set__dynamic(thread, flag ? TRUE : FALSE);
1840 }
1841 
ompc_set_nested(int flag)1842 void ompc_set_nested(int flag) {
1843   kmp_info_t *thread;
1844 
1845   /* For the thread-private internal controls implementation */
1846   thread = __kmp_entry_thread();
1847 
1848   __kmp_save_internal_controls(thread);
1849 
1850   set__nested(thread, flag ? TRUE : FALSE);
1851 }
1852 
ompc_set_max_active_levels(int max_active_levels)1853 void ompc_set_max_active_levels(int max_active_levels) {
1854   /* TO DO */
1855   /* we want per-task implementation of this internal control */
1856 
1857   /* For the per-thread internal controls implementation */
1858   __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1859 }
1860 
ompc_set_schedule(omp_sched_t kind,int modifier)1861 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1862   // !!!!! TODO: check the per-task binding
1863   __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1864 }
1865 
ompc_get_ancestor_thread_num(int level)1866 int ompc_get_ancestor_thread_num(int level) {
1867   return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1868 }
1869 
ompc_get_team_size(int level)1870 int ompc_get_team_size(int level) {
1871   return __kmp_get_team_size(__kmp_entry_gtid(), level);
1872 }
1873 
1874 #if OMP_50_ENABLED
1875 /* OpenMP 5.0 Affinity Format API */
1876 
ompc_set_affinity_format(char const * format)1877 void ompc_set_affinity_format(char const *format) {
1878   if (!__kmp_init_serial) {
1879     __kmp_serial_initialize();
1880   }
1881   __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1882                          format, KMP_STRLEN(format) + 1);
1883 }
1884 
ompc_get_affinity_format(char * buffer,size_t size)1885 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1886   size_t format_size;
1887   if (!__kmp_init_serial) {
1888     __kmp_serial_initialize();
1889   }
1890   format_size = KMP_STRLEN(__kmp_affinity_format);
1891   if (buffer && size) {
1892     __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1893                            format_size + 1);
1894   }
1895   return format_size;
1896 }
1897 
ompc_display_affinity(char const * format)1898 void ompc_display_affinity(char const *format) {
1899   int gtid;
1900   if (!TCR_4(__kmp_init_middle)) {
1901     __kmp_middle_initialize();
1902   }
1903   gtid = __kmp_get_gtid();
1904   __kmp_aux_display_affinity(gtid, format);
1905 }
1906 
ompc_capture_affinity(char * buffer,size_t buf_size,char const * format)1907 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1908                              char const *format) {
1909   int gtid;
1910   size_t num_required;
1911   kmp_str_buf_t capture_buf;
1912   if (!TCR_4(__kmp_init_middle)) {
1913     __kmp_middle_initialize();
1914   }
1915   gtid = __kmp_get_gtid();
1916   __kmp_str_buf_init(&capture_buf);
1917   num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1918   if (buffer && buf_size) {
1919     __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1920                            capture_buf.used + 1);
1921   }
1922   __kmp_str_buf_free(&capture_buf);
1923   return num_required;
1924 }
1925 #endif /* OMP_50_ENABLED */
1926 
kmpc_set_stacksize(int arg)1927 void kmpc_set_stacksize(int arg) {
1928   // __kmp_aux_set_stacksize initializes the library if needed
1929   __kmp_aux_set_stacksize(arg);
1930 }
1931 
kmpc_set_stacksize_s(size_t arg)1932 void kmpc_set_stacksize_s(size_t arg) {
1933   // __kmp_aux_set_stacksize initializes the library if needed
1934   __kmp_aux_set_stacksize(arg);
1935 }
1936 
kmpc_set_blocktime(int arg)1937 void kmpc_set_blocktime(int arg) {
1938   int gtid, tid;
1939   kmp_info_t *thread;
1940 
1941   gtid = __kmp_entry_gtid();
1942   tid = __kmp_tid_from_gtid(gtid);
1943   thread = __kmp_thread_from_gtid(gtid);
1944 
1945   __kmp_aux_set_blocktime(arg, thread, tid);
1946 }
1947 
kmpc_set_library(int arg)1948 void kmpc_set_library(int arg) {
1949   // __kmp_user_set_library initializes the library if needed
1950   __kmp_user_set_library((enum library_type)arg);
1951 }
1952 
kmpc_set_defaults(char const * str)1953 void kmpc_set_defaults(char const *str) {
1954   // __kmp_aux_set_defaults initializes the library if needed
1955   __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1956 }
1957 
kmpc_set_disp_num_buffers(int arg)1958 void kmpc_set_disp_num_buffers(int arg) {
1959   // ignore after initialization because some teams have already
1960   // allocated dispatch buffers
1961   if (__kmp_init_serial == 0 && arg > 0)
1962     __kmp_dispatch_num_buffers = arg;
1963 }
1964 
kmpc_set_affinity_mask_proc(int proc,void ** mask)1965 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1966 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1967   return -1;
1968 #else
1969   if (!TCR_4(__kmp_init_middle)) {
1970     __kmp_middle_initialize();
1971   }
1972   return __kmp_aux_set_affinity_mask_proc(proc, mask);
1973 #endif
1974 }
1975 
kmpc_unset_affinity_mask_proc(int proc,void ** mask)1976 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1977 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1978   return -1;
1979 #else
1980   if (!TCR_4(__kmp_init_middle)) {
1981     __kmp_middle_initialize();
1982   }
1983   return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1984 #endif
1985 }
1986 
kmpc_get_affinity_mask_proc(int proc,void ** mask)1987 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1989   return -1;
1990 #else
1991   if (!TCR_4(__kmp_init_middle)) {
1992     __kmp_middle_initialize();
1993   }
1994   return __kmp_aux_get_affinity_mask_proc(proc, mask);
1995 #endif
1996 }
1997 
1998 /* -------------------------------------------------------------------------- */
1999 /*!
2000 @ingroup THREADPRIVATE
2001 @param loc       source location information
2002 @param gtid      global thread number
2003 @param cpy_size  size of the cpy_data buffer
2004 @param cpy_data  pointer to data to be copied
2005 @param cpy_func  helper function to call for copying data
2006 @param didit     flag variable: 1=single thread; 0=not single thread
2007 
2008 __kmpc_copyprivate implements the interface for the private data broadcast
2009 needed for the copyprivate clause associated with a single region in an
2010 OpenMP<sup>*</sup> program (both C and Fortran).
2011 All threads participating in the parallel region call this routine.
2012 One of the threads (called the single thread) should have the <tt>didit</tt>
2013 variable set to 1 and all other threads should have that variable set to 0.
2014 All threads pass a pointer to a data buffer (cpy_data) that they have built.
2015 
2016 The OpenMP specification forbids the use of nowait on the single region when a
2017 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2018 barrier internally to avoid race conditions, so the code generation for the
2019 single region should avoid generating a barrier after the call to @ref
2020 __kmpc_copyprivate.
2021 
2022 The <tt>gtid</tt> parameter is the global thread id for the current thread.
2023 The <tt>loc</tt> parameter is a pointer to source location information.
2024 
2025 Internal implementation: The single thread will first copy its descriptor
2026 address (cpy_data) to a team-private location, then the other threads will each
2027 call the function pointed to by the parameter cpy_func, which carries out the
2028 copy by copying the data using the cpy_data buffer.
2029 
2030 The cpy_func routine used for the copy and the contents of the data area defined
2031 by cpy_data and cpy_size may be built in any fashion that will allow the copy
2032 to be done. For instance, the cpy_data buffer can hold the actual data to be
2033 copied or it may hold a list of pointers to the data. The cpy_func routine must
2034 interpret the cpy_data buffer appropriately.
2035 
2036 The interface to cpy_func is as follows:
2037 @code
2038 void cpy_func( void *destination, void *source )
2039 @endcode
2040 where void *destination is the cpy_data pointer for the thread being copied to
2041 and void *source is the cpy_data pointer for the thread being copied from.
2042 */
__kmpc_copyprivate(ident_t * loc,kmp_int32 gtid,size_t cpy_size,void * cpy_data,void (* cpy_func)(void *,void *),kmp_int32 didit)2043 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2044                         void *cpy_data, void (*cpy_func)(void *, void *),
2045                         kmp_int32 didit) {
2046   void **data_ptr;
2047 
2048   KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2049 
2050   KMP_MB();
2051 
2052   data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2053 
2054   if (__kmp_env_consistency_check) {
2055     if (loc == 0) {
2056       KMP_WARNING(ConstructIdentInvalid);
2057     }
2058   }
2059 
2060   // ToDo: Optimize the following two barriers into some kind of split barrier
2061 
2062   if (didit)
2063     *data_ptr = cpy_data;
2064 
2065 #if OMPT_SUPPORT
2066   ompt_frame_t *ompt_frame;
2067   if (ompt_enabled.enabled) {
2068     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2069     if (ompt_frame->enter_frame.ptr == NULL)
2070       ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2071     OMPT_STORE_RETURN_ADDRESS(gtid);
2072   }
2073 #endif
2074 /* This barrier is not a barrier region boundary */
2075 #if USE_ITT_NOTIFY
2076   __kmp_threads[gtid]->th.th_ident = loc;
2077 #endif
2078   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2079 
2080   if (!didit)
2081     (*cpy_func)(cpy_data, *data_ptr);
2082 
2083 // Consider next barrier a user-visible barrier for barrier region boundaries
2084 // Nesting checks are already handled by the single construct checks
2085 
2086 #if OMPT_SUPPORT
2087   if (ompt_enabled.enabled) {
2088     OMPT_STORE_RETURN_ADDRESS(gtid);
2089   }
2090 #endif
2091 #if USE_ITT_NOTIFY
2092   __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2093 // tasks can overwrite the location)
2094 #endif
2095   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2096 #if OMPT_SUPPORT && OMPT_OPTIONAL
2097   if (ompt_enabled.enabled) {
2098     ompt_frame->enter_frame = ompt_data_none;
2099   }
2100 #endif
2101 }
2102 
2103 /* -------------------------------------------------------------------------- */
2104 
2105 #define INIT_LOCK __kmp_init_user_lock_with_checks
2106 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2107 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2108 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2109 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2110 #define ACQUIRE_NESTED_LOCK_TIMED                                              \
2111   __kmp_acquire_nested_user_lock_with_checks_timed
2112 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2113 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2114 #define TEST_LOCK __kmp_test_user_lock_with_checks
2115 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2116 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2117 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2118 
2119 // TODO: Make check abort messages use location info & pass it into
2120 // with_checks routines
2121 
2122 #if KMP_USE_DYNAMIC_LOCK
2123 
2124 // internal lock initializer
__kmp_init_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2125 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2126                                                     kmp_dyna_lockseq_t seq) {
2127   if (KMP_IS_D_LOCK(seq)) {
2128     KMP_INIT_D_LOCK(lock, seq);
2129 #if USE_ITT_BUILD
2130     __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2131 #endif
2132   } else {
2133     KMP_INIT_I_LOCK(lock, seq);
2134 #if USE_ITT_BUILD
2135     kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2136     __kmp_itt_lock_creating(ilk->lock, loc);
2137 #endif
2138   }
2139 }
2140 
2141 // internal nest lock initializer
2142 static __forceinline void
__kmp_init_nest_lock_with_hint(ident_t * loc,void ** lock,kmp_dyna_lockseq_t seq)2143 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2144                                kmp_dyna_lockseq_t seq) {
2145 #if KMP_USE_TSX
2146   // Don't have nested lock implementation for speculative locks
2147   if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2148     seq = __kmp_user_lock_seq;
2149 #endif
2150   switch (seq) {
2151   case lockseq_tas:
2152     seq = lockseq_nested_tas;
2153     break;
2154 #if KMP_USE_FUTEX
2155   case lockseq_futex:
2156     seq = lockseq_nested_futex;
2157     break;
2158 #endif
2159   case lockseq_ticket:
2160     seq = lockseq_nested_ticket;
2161     break;
2162   case lockseq_queuing:
2163     seq = lockseq_nested_queuing;
2164     break;
2165   case lockseq_drdpa:
2166     seq = lockseq_nested_drdpa;
2167     break;
2168   default:
2169     seq = lockseq_nested_queuing;
2170   }
2171   KMP_INIT_I_LOCK(lock, seq);
2172 #if USE_ITT_BUILD
2173   kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2174   __kmp_itt_lock_creating(ilk->lock, loc);
2175 #endif
2176 }
2177 
2178 /* initialize the lock with a hint */
__kmpc_init_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2179 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2180                                 uintptr_t hint) {
2181   KMP_DEBUG_ASSERT(__kmp_init_serial);
2182   if (__kmp_env_consistency_check && user_lock == NULL) {
2183     KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2184   }
2185 
2186   __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2187 
2188 #if OMPT_SUPPORT && OMPT_OPTIONAL
2189   // This is the case, if called from omp_init_lock_with_hint:
2190   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2191   if (!codeptr)
2192     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2193   if (ompt_enabled.ompt_callback_lock_init) {
2194     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2195         ompt_mutex_lock, (omp_lock_hint_t)hint,
2196         __ompt_get_mutex_impl_type(user_lock),
2197         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2198   }
2199 #endif
2200 }
2201 
2202 /* initialize the lock with a hint */
__kmpc_init_nest_lock_with_hint(ident_t * loc,kmp_int32 gtid,void ** user_lock,uintptr_t hint)2203 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2204                                      void **user_lock, uintptr_t hint) {
2205   KMP_DEBUG_ASSERT(__kmp_init_serial);
2206   if (__kmp_env_consistency_check && user_lock == NULL) {
2207     KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2208   }
2209 
2210   __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2211 
2212 #if OMPT_SUPPORT && OMPT_OPTIONAL
2213   // This is the case, if called from omp_init_lock_with_hint:
2214   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2215   if (!codeptr)
2216     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217   if (ompt_enabled.ompt_callback_lock_init) {
2218     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219         ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2220         __ompt_get_mutex_impl_type(user_lock),
2221         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2222   }
2223 #endif
2224 }
2225 
2226 #endif // KMP_USE_DYNAMIC_LOCK
2227 
2228 /* initialize the lock */
__kmpc_init_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2229 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2230 #if KMP_USE_DYNAMIC_LOCK
2231 
2232   KMP_DEBUG_ASSERT(__kmp_init_serial);
2233   if (__kmp_env_consistency_check && user_lock == NULL) {
2234     KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2235   }
2236   __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2237 
2238 #if OMPT_SUPPORT && OMPT_OPTIONAL
2239   // This is the case, if called from omp_init_lock_with_hint:
2240   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2241   if (!codeptr)
2242     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2243   if (ompt_enabled.ompt_callback_lock_init) {
2244     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2245         ompt_mutex_lock, omp_lock_hint_none,
2246         __ompt_get_mutex_impl_type(user_lock),
2247         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2248   }
2249 #endif
2250 
2251 #else // KMP_USE_DYNAMIC_LOCK
2252 
2253   static char const *const func = "omp_init_lock";
2254   kmp_user_lock_p lck;
2255   KMP_DEBUG_ASSERT(__kmp_init_serial);
2256 
2257   if (__kmp_env_consistency_check) {
2258     if (user_lock == NULL) {
2259       KMP_FATAL(LockIsUninitialized, func);
2260     }
2261   }
2262 
2263   KMP_CHECK_USER_LOCK_INIT();
2264 
2265   if ((__kmp_user_lock_kind == lk_tas) &&
2266       (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2267     lck = (kmp_user_lock_p)user_lock;
2268   }
2269 #if KMP_USE_FUTEX
2270   else if ((__kmp_user_lock_kind == lk_futex) &&
2271            (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2272     lck = (kmp_user_lock_p)user_lock;
2273   }
2274 #endif
2275   else {
2276     lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2277   }
2278   INIT_LOCK(lck);
2279   __kmp_set_user_lock_location(lck, loc);
2280 
2281 #if OMPT_SUPPORT && OMPT_OPTIONAL
2282   // This is the case, if called from omp_init_lock_with_hint:
2283   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2284   if (!codeptr)
2285     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2286   if (ompt_enabled.ompt_callback_lock_init) {
2287     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2288         ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2289         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2290   }
2291 #endif
2292 
2293 #if USE_ITT_BUILD
2294   __kmp_itt_lock_creating(lck);
2295 #endif /* USE_ITT_BUILD */
2296 
2297 #endif // KMP_USE_DYNAMIC_LOCK
2298 } // __kmpc_init_lock
2299 
2300 /* initialize the lock */
__kmpc_init_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2301 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2302 #if KMP_USE_DYNAMIC_LOCK
2303 
2304   KMP_DEBUG_ASSERT(__kmp_init_serial);
2305   if (__kmp_env_consistency_check && user_lock == NULL) {
2306     KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2307   }
2308   __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2309 
2310 #if OMPT_SUPPORT && OMPT_OPTIONAL
2311   // This is the case, if called from omp_init_lock_with_hint:
2312   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2313   if (!codeptr)
2314     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2315   if (ompt_enabled.ompt_callback_lock_init) {
2316     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2317         ompt_mutex_nest_lock, omp_lock_hint_none,
2318         __ompt_get_mutex_impl_type(user_lock),
2319         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2320   }
2321 #endif
2322 
2323 #else // KMP_USE_DYNAMIC_LOCK
2324 
2325   static char const *const func = "omp_init_nest_lock";
2326   kmp_user_lock_p lck;
2327   KMP_DEBUG_ASSERT(__kmp_init_serial);
2328 
2329   if (__kmp_env_consistency_check) {
2330     if (user_lock == NULL) {
2331       KMP_FATAL(LockIsUninitialized, func);
2332     }
2333   }
2334 
2335   KMP_CHECK_USER_LOCK_INIT();
2336 
2337   if ((__kmp_user_lock_kind == lk_tas) &&
2338       (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2339        OMP_NEST_LOCK_T_SIZE)) {
2340     lck = (kmp_user_lock_p)user_lock;
2341   }
2342 #if KMP_USE_FUTEX
2343   else if ((__kmp_user_lock_kind == lk_futex) &&
2344            (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2345             OMP_NEST_LOCK_T_SIZE)) {
2346     lck = (kmp_user_lock_p)user_lock;
2347   }
2348 #endif
2349   else {
2350     lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2351   }
2352 
2353   INIT_NESTED_LOCK(lck);
2354   __kmp_set_user_lock_location(lck, loc);
2355 
2356 #if OMPT_SUPPORT && OMPT_OPTIONAL
2357   // This is the case, if called from omp_init_lock_with_hint:
2358   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2359   if (!codeptr)
2360     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2361   if (ompt_enabled.ompt_callback_lock_init) {
2362     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2363         ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2364         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2365   }
2366 #endif
2367 
2368 #if USE_ITT_BUILD
2369   __kmp_itt_lock_creating(lck);
2370 #endif /* USE_ITT_BUILD */
2371 
2372 #endif // KMP_USE_DYNAMIC_LOCK
2373 } // __kmpc_init_nest_lock
2374 
__kmpc_destroy_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2375 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2376 #if KMP_USE_DYNAMIC_LOCK
2377 
2378 #if USE_ITT_BUILD
2379   kmp_user_lock_p lck;
2380   if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2381     lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2382   } else {
2383     lck = (kmp_user_lock_p)user_lock;
2384   }
2385   __kmp_itt_lock_destroyed(lck);
2386 #endif
2387 #if OMPT_SUPPORT && OMPT_OPTIONAL
2388   // This is the case, if called from omp_init_lock_with_hint:
2389   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2390   if (!codeptr)
2391     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2392   if (ompt_enabled.ompt_callback_lock_destroy) {
2393     kmp_user_lock_p lck;
2394     if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2395       lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2396     } else {
2397       lck = (kmp_user_lock_p)user_lock;
2398     }
2399     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2400         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2401   }
2402 #endif
2403   KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2404 #else
2405   kmp_user_lock_p lck;
2406 
2407   if ((__kmp_user_lock_kind == lk_tas) &&
2408       (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2409     lck = (kmp_user_lock_p)user_lock;
2410   }
2411 #if KMP_USE_FUTEX
2412   else if ((__kmp_user_lock_kind == lk_futex) &&
2413            (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2414     lck = (kmp_user_lock_p)user_lock;
2415   }
2416 #endif
2417   else {
2418     lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2419   }
2420 
2421 #if OMPT_SUPPORT && OMPT_OPTIONAL
2422   // This is the case, if called from omp_init_lock_with_hint:
2423   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2424   if (!codeptr)
2425     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2426   if (ompt_enabled.ompt_callback_lock_destroy) {
2427     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2428         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2429   }
2430 #endif
2431 
2432 #if USE_ITT_BUILD
2433   __kmp_itt_lock_destroyed(lck);
2434 #endif /* USE_ITT_BUILD */
2435   DESTROY_LOCK(lck);
2436 
2437   if ((__kmp_user_lock_kind == lk_tas) &&
2438       (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2439     ;
2440   }
2441 #if KMP_USE_FUTEX
2442   else if ((__kmp_user_lock_kind == lk_futex) &&
2443            (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2444     ;
2445   }
2446 #endif
2447   else {
2448     __kmp_user_lock_free(user_lock, gtid, lck);
2449   }
2450 #endif // KMP_USE_DYNAMIC_LOCK
2451 } // __kmpc_destroy_lock
2452 
2453 /* destroy the lock */
__kmpc_destroy_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2454 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2455 #if KMP_USE_DYNAMIC_LOCK
2456 
2457 #if USE_ITT_BUILD
2458   kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2459   __kmp_itt_lock_destroyed(ilk->lock);
2460 #endif
2461 #if OMPT_SUPPORT && OMPT_OPTIONAL
2462   // This is the case, if called from omp_init_lock_with_hint:
2463   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2464   if (!codeptr)
2465     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2466   if (ompt_enabled.ompt_callback_lock_destroy) {
2467     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2468         ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2469   }
2470 #endif
2471   KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2472 
2473 #else // KMP_USE_DYNAMIC_LOCK
2474 
2475   kmp_user_lock_p lck;
2476 
2477   if ((__kmp_user_lock_kind == lk_tas) &&
2478       (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2479        OMP_NEST_LOCK_T_SIZE)) {
2480     lck = (kmp_user_lock_p)user_lock;
2481   }
2482 #if KMP_USE_FUTEX
2483   else if ((__kmp_user_lock_kind == lk_futex) &&
2484            (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2485             OMP_NEST_LOCK_T_SIZE)) {
2486     lck = (kmp_user_lock_p)user_lock;
2487   }
2488 #endif
2489   else {
2490     lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2491   }
2492 
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL
2494   // This is the case, if called from omp_init_lock_with_hint:
2495   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2496   if (!codeptr)
2497     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2498   if (ompt_enabled.ompt_callback_lock_destroy) {
2499     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2500         ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2501   }
2502 #endif
2503 
2504 #if USE_ITT_BUILD
2505   __kmp_itt_lock_destroyed(lck);
2506 #endif /* USE_ITT_BUILD */
2507 
2508   DESTROY_NESTED_LOCK(lck);
2509 
2510   if ((__kmp_user_lock_kind == lk_tas) &&
2511       (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2512        OMP_NEST_LOCK_T_SIZE)) {
2513     ;
2514   }
2515 #if KMP_USE_FUTEX
2516   else if ((__kmp_user_lock_kind == lk_futex) &&
2517            (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2518             OMP_NEST_LOCK_T_SIZE)) {
2519     ;
2520   }
2521 #endif
2522   else {
2523     __kmp_user_lock_free(user_lock, gtid, lck);
2524   }
2525 #endif // KMP_USE_DYNAMIC_LOCK
2526 } // __kmpc_destroy_nest_lock
2527 
__kmpc_set_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2528 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2529   KMP_COUNT_BLOCK(OMP_set_lock);
2530 #if KMP_USE_DYNAMIC_LOCK
2531   int tag = KMP_EXTRACT_D_TAG(user_lock);
2532 #if USE_ITT_BUILD
2533   __kmp_itt_lock_acquiring(
2534       (kmp_user_lock_p)
2535           user_lock); // itt function will get to the right lock object.
2536 #endif
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL
2538   // This is the case, if called from omp_init_lock_with_hint:
2539   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540   if (!codeptr)
2541     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542   if (ompt_enabled.ompt_callback_mutex_acquire) {
2543     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2544         ompt_mutex_lock, omp_lock_hint_none,
2545         __ompt_get_mutex_impl_type(user_lock),
2546         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2547   }
2548 #endif
2549 #if KMP_USE_INLINED_TAS
2550   if (tag == locktag_tas && !__kmp_env_consistency_check) {
2551     KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2552   } else
2553 #elif KMP_USE_INLINED_FUTEX
2554   if (tag == locktag_futex && !__kmp_env_consistency_check) {
2555     KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2556   } else
2557 #endif
2558   {
2559     __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2560   }
2561 #if USE_ITT_BUILD
2562   __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2563 #endif
2564 #if OMPT_SUPPORT && OMPT_OPTIONAL
2565   if (ompt_enabled.ompt_callback_mutex_acquired) {
2566     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2567         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2568   }
2569 #endif
2570 
2571 #else // KMP_USE_DYNAMIC_LOCK
2572 
2573   kmp_user_lock_p lck;
2574 
2575   if ((__kmp_user_lock_kind == lk_tas) &&
2576       (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2577     lck = (kmp_user_lock_p)user_lock;
2578   }
2579 #if KMP_USE_FUTEX
2580   else if ((__kmp_user_lock_kind == lk_futex) &&
2581            (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2582     lck = (kmp_user_lock_p)user_lock;
2583   }
2584 #endif
2585   else {
2586     lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2587   }
2588 
2589 #if USE_ITT_BUILD
2590   __kmp_itt_lock_acquiring(lck);
2591 #endif /* USE_ITT_BUILD */
2592 #if OMPT_SUPPORT && OMPT_OPTIONAL
2593   // This is the case, if called from omp_init_lock_with_hint:
2594   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2595   if (!codeptr)
2596     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2597   if (ompt_enabled.ompt_callback_mutex_acquire) {
2598     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2599         ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2600         (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2601   }
2602 #endif
2603 
2604   ACQUIRE_LOCK(lck, gtid);
2605 
2606 #if USE_ITT_BUILD
2607   __kmp_itt_lock_acquired(lck);
2608 #endif /* USE_ITT_BUILD */
2609 
2610 #if OMPT_SUPPORT && OMPT_OPTIONAL
2611   if (ompt_enabled.ompt_callback_mutex_acquired) {
2612     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2613         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2614   }
2615 #endif
2616 
2617 #endif // KMP_USE_DYNAMIC_LOCK
2618 }
2619 
__kmpc_set_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2620 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2621 #if KMP_USE_DYNAMIC_LOCK
2622 
2623 #if USE_ITT_BUILD
2624   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2625 #endif
2626 #if OMPT_SUPPORT && OMPT_OPTIONAL
2627   // This is the case, if called from omp_init_lock_with_hint:
2628   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2629   if (!codeptr)
2630     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2631   if (ompt_enabled.enabled) {
2632     if (ompt_enabled.ompt_callback_mutex_acquire) {
2633       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2634           ompt_mutex_nest_lock, omp_lock_hint_none,
2635           __ompt_get_mutex_impl_type(user_lock),
2636           (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2637     }
2638   }
2639 #endif
2640   int acquire_status =
2641       KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2642   (void) acquire_status;
2643 #if USE_ITT_BUILD
2644   __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2645 #endif
2646 
2647 #if OMPT_SUPPORT && OMPT_OPTIONAL
2648   if (ompt_enabled.enabled) {
2649     if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2650       if (ompt_enabled.ompt_callback_mutex_acquired) {
2651         // lock_first
2652         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2653             ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2654             codeptr);
2655       }
2656     } else {
2657       if (ompt_enabled.ompt_callback_nest_lock) {
2658         // lock_next
2659         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2660             ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2661       }
2662     }
2663   }
2664 #endif
2665 
2666 #else // KMP_USE_DYNAMIC_LOCK
2667   int acquire_status;
2668   kmp_user_lock_p lck;
2669 
2670   if ((__kmp_user_lock_kind == lk_tas) &&
2671       (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2672        OMP_NEST_LOCK_T_SIZE)) {
2673     lck = (kmp_user_lock_p)user_lock;
2674   }
2675 #if KMP_USE_FUTEX
2676   else if ((__kmp_user_lock_kind == lk_futex) &&
2677            (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2678             OMP_NEST_LOCK_T_SIZE)) {
2679     lck = (kmp_user_lock_p)user_lock;
2680   }
2681 #endif
2682   else {
2683     lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2684   }
2685 
2686 #if USE_ITT_BUILD
2687   __kmp_itt_lock_acquiring(lck);
2688 #endif /* USE_ITT_BUILD */
2689 #if OMPT_SUPPORT && OMPT_OPTIONAL
2690   // This is the case, if called from omp_init_lock_with_hint:
2691   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2692   if (!codeptr)
2693     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2694   if (ompt_enabled.enabled) {
2695     if (ompt_enabled.ompt_callback_mutex_acquire) {
2696       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2697           ompt_mutex_nest_lock, omp_lock_hint_none,
2698           __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2699           codeptr);
2700     }
2701   }
2702 #endif
2703 
2704   ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2705 
2706 #if USE_ITT_BUILD
2707   __kmp_itt_lock_acquired(lck);
2708 #endif /* USE_ITT_BUILD */
2709 
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL
2711   if (ompt_enabled.enabled) {
2712     if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2713       if (ompt_enabled.ompt_callback_mutex_acquired) {
2714         // lock_first
2715         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2716             ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2717       }
2718     } else {
2719       if (ompt_enabled.ompt_callback_nest_lock) {
2720         // lock_next
2721         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2722             ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2723       }
2724     }
2725   }
2726 #endif
2727 
2728 #endif // KMP_USE_DYNAMIC_LOCK
2729 }
2730 
__kmpc_unset_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2731 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2732 #if KMP_USE_DYNAMIC_LOCK
2733 
2734   int tag = KMP_EXTRACT_D_TAG(user_lock);
2735 #if USE_ITT_BUILD
2736   __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2737 #endif
2738 #if KMP_USE_INLINED_TAS
2739   if (tag == locktag_tas && !__kmp_env_consistency_check) {
2740     KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2741   } else
2742 #elif KMP_USE_INLINED_FUTEX
2743   if (tag == locktag_futex && !__kmp_env_consistency_check) {
2744     KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2745   } else
2746 #endif
2747   {
2748     __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2749   }
2750 
2751 #if OMPT_SUPPORT && OMPT_OPTIONAL
2752   // This is the case, if called from omp_init_lock_with_hint:
2753   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2754   if (!codeptr)
2755     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2756   if (ompt_enabled.ompt_callback_mutex_released) {
2757     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2758         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2759   }
2760 #endif
2761 
2762 #else // KMP_USE_DYNAMIC_LOCK
2763 
2764   kmp_user_lock_p lck;
2765 
2766   /* Can't use serial interval since not block structured */
2767   /* release the lock */
2768 
2769   if ((__kmp_user_lock_kind == lk_tas) &&
2770       (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2771 #if KMP_OS_LINUX &&                                                            \
2772     (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2773 // "fast" path implemented to fix customer performance issue
2774 #if USE_ITT_BUILD
2775     __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2776 #endif /* USE_ITT_BUILD */
2777     TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2778     KMP_MB();
2779 
2780 #if OMPT_SUPPORT && OMPT_OPTIONAL
2781     // This is the case, if called from omp_init_lock_with_hint:
2782     void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2783     if (!codeptr)
2784       codeptr = OMPT_GET_RETURN_ADDRESS(0);
2785     if (ompt_enabled.ompt_callback_mutex_released) {
2786       ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2787           ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2788     }
2789 #endif
2790 
2791     return;
2792 #else
2793     lck = (kmp_user_lock_p)user_lock;
2794 #endif
2795   }
2796 #if KMP_USE_FUTEX
2797   else if ((__kmp_user_lock_kind == lk_futex) &&
2798            (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2799     lck = (kmp_user_lock_p)user_lock;
2800   }
2801 #endif
2802   else {
2803     lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2804   }
2805 
2806 #if USE_ITT_BUILD
2807   __kmp_itt_lock_releasing(lck);
2808 #endif /* USE_ITT_BUILD */
2809 
2810   RELEASE_LOCK(lck, gtid);
2811 
2812 #if OMPT_SUPPORT && OMPT_OPTIONAL
2813   // This is the case, if called from omp_init_lock_with_hint:
2814   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2815   if (!codeptr)
2816     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2817   if (ompt_enabled.ompt_callback_mutex_released) {
2818     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2819         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2820   }
2821 #endif
2822 
2823 #endif // KMP_USE_DYNAMIC_LOCK
2824 }
2825 
2826 /* release the lock */
__kmpc_unset_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2827 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2828 #if KMP_USE_DYNAMIC_LOCK
2829 
2830 #if USE_ITT_BUILD
2831   __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2832 #endif
2833   int release_status =
2834       KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2835   (void) release_status;
2836 
2837 #if OMPT_SUPPORT && OMPT_OPTIONAL
2838   // This is the case, if called from omp_init_lock_with_hint:
2839   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2840   if (!codeptr)
2841     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2842   if (ompt_enabled.enabled) {
2843     if (release_status == KMP_LOCK_RELEASED) {
2844       if (ompt_enabled.ompt_callback_mutex_released) {
2845         // release_lock_last
2846         ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2847             ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2848             codeptr);
2849       }
2850     } else if (ompt_enabled.ompt_callback_nest_lock) {
2851       // release_lock_prev
2852       ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2853           ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2854     }
2855   }
2856 #endif
2857 
2858 #else // KMP_USE_DYNAMIC_LOCK
2859 
2860   kmp_user_lock_p lck;
2861 
2862   /* Can't use serial interval since not block structured */
2863 
2864   if ((__kmp_user_lock_kind == lk_tas) &&
2865       (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2866        OMP_NEST_LOCK_T_SIZE)) {
2867 #if KMP_OS_LINUX &&                                                            \
2868     (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2869     // "fast" path implemented to fix customer performance issue
2870     kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2871 #if USE_ITT_BUILD
2872     __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2873 #endif /* USE_ITT_BUILD */
2874 
2875 #if OMPT_SUPPORT && OMPT_OPTIONAL
2876     int release_status = KMP_LOCK_STILL_HELD;
2877 #endif
2878 
2879     if (--(tl->lk.depth_locked) == 0) {
2880       TCW_4(tl->lk.poll, 0);
2881 #if OMPT_SUPPORT && OMPT_OPTIONAL
2882       release_status = KMP_LOCK_RELEASED;
2883 #endif
2884     }
2885     KMP_MB();
2886 
2887 #if OMPT_SUPPORT && OMPT_OPTIONAL
2888     // This is the case, if called from omp_init_lock_with_hint:
2889     void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2890     if (!codeptr)
2891       codeptr = OMPT_GET_RETURN_ADDRESS(0);
2892     if (ompt_enabled.enabled) {
2893       if (release_status == KMP_LOCK_RELEASED) {
2894         if (ompt_enabled.ompt_callback_mutex_released) {
2895           // release_lock_last
2896           ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2897               ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2898         }
2899       } else if (ompt_enabled.ompt_callback_nest_lock) {
2900         // release_lock_previous
2901         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2902             ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2903       }
2904     }
2905 #endif
2906 
2907     return;
2908 #else
2909     lck = (kmp_user_lock_p)user_lock;
2910 #endif
2911   }
2912 #if KMP_USE_FUTEX
2913   else if ((__kmp_user_lock_kind == lk_futex) &&
2914            (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2915             OMP_NEST_LOCK_T_SIZE)) {
2916     lck = (kmp_user_lock_p)user_lock;
2917   }
2918 #endif
2919   else {
2920     lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2921   }
2922 
2923 #if USE_ITT_BUILD
2924   __kmp_itt_lock_releasing(lck);
2925 #endif /* USE_ITT_BUILD */
2926 
2927   int release_status;
2928   release_status = RELEASE_NESTED_LOCK(lck, gtid);
2929 #if OMPT_SUPPORT && OMPT_OPTIONAL
2930   // This is the case, if called from omp_init_lock_with_hint:
2931   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2932   if (!codeptr)
2933     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2934   if (ompt_enabled.enabled) {
2935     if (release_status == KMP_LOCK_RELEASED) {
2936       if (ompt_enabled.ompt_callback_mutex_released) {
2937         // release_lock_last
2938         ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2939             ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2940       }
2941     } else if (ompt_enabled.ompt_callback_nest_lock) {
2942       // release_lock_previous
2943       ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2944           ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2945     }
2946   }
2947 #endif
2948 
2949 #endif // KMP_USE_DYNAMIC_LOCK
2950 }
2951 
2952 /* try to acquire the lock */
__kmpc_test_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)2953 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2954   KMP_COUNT_BLOCK(OMP_test_lock);
2955 
2956 #if KMP_USE_DYNAMIC_LOCK
2957   int rc;
2958   int tag = KMP_EXTRACT_D_TAG(user_lock);
2959 #if USE_ITT_BUILD
2960   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2961 #endif
2962 #if OMPT_SUPPORT && OMPT_OPTIONAL
2963   // This is the case, if called from omp_init_lock_with_hint:
2964   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2965   if (!codeptr)
2966     codeptr = OMPT_GET_RETURN_ADDRESS(0);
2967   if (ompt_enabled.ompt_callback_mutex_acquire) {
2968     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2969         ompt_mutex_lock, omp_lock_hint_none,
2970         __ompt_get_mutex_impl_type(user_lock),
2971         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2972   }
2973 #endif
2974 #if KMP_USE_INLINED_TAS
2975   if (tag == locktag_tas && !__kmp_env_consistency_check) {
2976     KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2977   } else
2978 #elif KMP_USE_INLINED_FUTEX
2979   if (tag == locktag_futex && !__kmp_env_consistency_check) {
2980     KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2981   } else
2982 #endif
2983   {
2984     rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2985   }
2986   if (rc) {
2987 #if USE_ITT_BUILD
2988     __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2989 #endif
2990 #if OMPT_SUPPORT && OMPT_OPTIONAL
2991     if (ompt_enabled.ompt_callback_mutex_acquired) {
2992       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2993           ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2994     }
2995 #endif
2996     return FTN_TRUE;
2997   } else {
2998 #if USE_ITT_BUILD
2999     __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3000 #endif
3001     return FTN_FALSE;
3002   }
3003 
3004 #else // KMP_USE_DYNAMIC_LOCK
3005 
3006   kmp_user_lock_p lck;
3007   int rc;
3008 
3009   if ((__kmp_user_lock_kind == lk_tas) &&
3010       (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3011     lck = (kmp_user_lock_p)user_lock;
3012   }
3013 #if KMP_USE_FUTEX
3014   else if ((__kmp_user_lock_kind == lk_futex) &&
3015            (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3016     lck = (kmp_user_lock_p)user_lock;
3017   }
3018 #endif
3019   else {
3020     lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3021   }
3022 
3023 #if USE_ITT_BUILD
3024   __kmp_itt_lock_acquiring(lck);
3025 #endif /* USE_ITT_BUILD */
3026 #if OMPT_SUPPORT && OMPT_OPTIONAL
3027   // This is the case, if called from omp_init_lock_with_hint:
3028   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3029   if (!codeptr)
3030     codeptr = OMPT_GET_RETURN_ADDRESS(0);
3031   if (ompt_enabled.ompt_callback_mutex_acquire) {
3032     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3033         ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3034         (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3035   }
3036 #endif
3037 
3038   rc = TEST_LOCK(lck, gtid);
3039 #if USE_ITT_BUILD
3040   if (rc) {
3041     __kmp_itt_lock_acquired(lck);
3042   } else {
3043     __kmp_itt_lock_cancelled(lck);
3044   }
3045 #endif /* USE_ITT_BUILD */
3046 #if OMPT_SUPPORT && OMPT_OPTIONAL
3047   if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3048     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3049         ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3050   }
3051 #endif
3052 
3053   return (rc ? FTN_TRUE : FTN_FALSE);
3054 
3055 /* Can't use serial interval since not block structured */
3056 
3057 #endif // KMP_USE_DYNAMIC_LOCK
3058 }
3059 
3060 /* try to acquire the lock */
__kmpc_test_nest_lock(ident_t * loc,kmp_int32 gtid,void ** user_lock)3061 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3062 #if KMP_USE_DYNAMIC_LOCK
3063   int rc;
3064 #if USE_ITT_BUILD
3065   __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3066 #endif
3067 #if OMPT_SUPPORT && OMPT_OPTIONAL
3068   // This is the case, if called from omp_init_lock_with_hint:
3069   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3070   if (!codeptr)
3071     codeptr = OMPT_GET_RETURN_ADDRESS(0);
3072   if (ompt_enabled.ompt_callback_mutex_acquire) {
3073     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3074         ompt_mutex_nest_lock, omp_lock_hint_none,
3075         __ompt_get_mutex_impl_type(user_lock),
3076         (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3077   }
3078 #endif
3079   rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3080 #if USE_ITT_BUILD
3081   if (rc) {
3082     __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3083   } else {
3084     __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3085   }
3086 #endif
3087 #if OMPT_SUPPORT && OMPT_OPTIONAL
3088   if (ompt_enabled.enabled && rc) {
3089     if (rc == 1) {
3090       if (ompt_enabled.ompt_callback_mutex_acquired) {
3091         // lock_first
3092         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3093             ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3094             codeptr);
3095       }
3096     } else {
3097       if (ompt_enabled.ompt_callback_nest_lock) {
3098         // lock_next
3099         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3100             ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3101       }
3102     }
3103   }
3104 #endif
3105   return rc;
3106 
3107 #else // KMP_USE_DYNAMIC_LOCK
3108 
3109   kmp_user_lock_p lck;
3110   int rc;
3111 
3112   if ((__kmp_user_lock_kind == lk_tas) &&
3113       (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3114        OMP_NEST_LOCK_T_SIZE)) {
3115     lck = (kmp_user_lock_p)user_lock;
3116   }
3117 #if KMP_USE_FUTEX
3118   else if ((__kmp_user_lock_kind == lk_futex) &&
3119            (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3120             OMP_NEST_LOCK_T_SIZE)) {
3121     lck = (kmp_user_lock_p)user_lock;
3122   }
3123 #endif
3124   else {
3125     lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3126   }
3127 
3128 #if USE_ITT_BUILD
3129   __kmp_itt_lock_acquiring(lck);
3130 #endif /* USE_ITT_BUILD */
3131 
3132 #if OMPT_SUPPORT && OMPT_OPTIONAL
3133   // This is the case, if called from omp_init_lock_with_hint:
3134   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3135   if (!codeptr)
3136     codeptr = OMPT_GET_RETURN_ADDRESS(0);
3137   if (ompt_enabled.enabled) &&
3138         ompt_enabled.ompt_callback_mutex_acquire) {
3139       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3140           ompt_mutex_nest_lock, omp_lock_hint_none,
3141           __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3142           codeptr);
3143     }
3144 #endif
3145 
3146   rc = TEST_NESTED_LOCK(lck, gtid);
3147 #if USE_ITT_BUILD
3148   if (rc) {
3149     __kmp_itt_lock_acquired(lck);
3150   } else {
3151     __kmp_itt_lock_cancelled(lck);
3152   }
3153 #endif /* USE_ITT_BUILD */
3154 #if OMPT_SUPPORT && OMPT_OPTIONAL
3155   if (ompt_enabled.enabled && rc) {
3156     if (rc == 1) {
3157       if (ompt_enabled.ompt_callback_mutex_acquired) {
3158         // lock_first
3159         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3160             ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3161       }
3162     } else {
3163       if (ompt_enabled.ompt_callback_nest_lock) {
3164         // lock_next
3165         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3166             ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3167       }
3168     }
3169   }
3170 #endif
3171   return rc;
3172 
3173 /* Can't use serial interval since not block structured */
3174 
3175 #endif // KMP_USE_DYNAMIC_LOCK
3176 }
3177 
3178 // Interface to fast scalable reduce methods routines
3179 
3180 // keep the selected method in a thread local structure for cross-function
3181 // usage: will be used in __kmpc_end_reduce* functions;
3182 // another solution: to re-determine the method one more time in
3183 // __kmpc_end_reduce* functions (new prototype required then)
3184 // AT: which solution is better?
3185 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod)                              \
3186   ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3187 
3188 #define __KMP_GET_REDUCTION_METHOD(gtid)                                       \
3189   (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3190 
3191 // description of the packed_reduction_method variable: look at the macros in
3192 // kmp.h
3193 
3194 // used in a critical section reduce block
3195 static __forceinline void
__kmp_enter_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3196 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3197                                           kmp_critical_name *crit) {
3198 
3199   // this lock was visible to a customer and to the threading profile tool as a
3200   // serial overhead span (although it's used for an internal purpose only)
3201   //            why was it visible in previous implementation?
3202   //            should we keep it visible in new reduce block?
3203   kmp_user_lock_p lck;
3204 
3205 #if KMP_USE_DYNAMIC_LOCK
3206 
3207   kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3208   // Check if it is initialized.
3209   if (*lk == 0) {
3210     if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3211       KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3212                                   KMP_GET_D_TAG(__kmp_user_lock_seq));
3213     } else {
3214       __kmp_init_indirect_csptr(crit, loc, global_tid,
3215                                 KMP_GET_I_TAG(__kmp_user_lock_seq));
3216     }
3217   }
3218   // Branch for accessing the actual lock object and set operation. This
3219   // branching is inevitable since this lock initialization does not follow the
3220   // normal dispatch path (lock table is not used).
3221   if (KMP_EXTRACT_D_TAG(lk) != 0) {
3222     lck = (kmp_user_lock_p)lk;
3223     KMP_DEBUG_ASSERT(lck != NULL);
3224     if (__kmp_env_consistency_check) {
3225       __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3226     }
3227     KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3228   } else {
3229     kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3230     lck = ilk->lock;
3231     KMP_DEBUG_ASSERT(lck != NULL);
3232     if (__kmp_env_consistency_check) {
3233       __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3234     }
3235     KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3236   }
3237 
3238 #else // KMP_USE_DYNAMIC_LOCK
3239 
3240   // We know that the fast reduction code is only emitted by Intel compilers
3241   // with 32 byte critical sections. If there isn't enough space, then we
3242   // have to use a pointer.
3243   if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3244     lck = (kmp_user_lock_p)crit;
3245   } else {
3246     lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3247   }
3248   KMP_DEBUG_ASSERT(lck != NULL);
3249 
3250   if (__kmp_env_consistency_check)
3251     __kmp_push_sync(global_tid, ct_critical, loc, lck);
3252 
3253   __kmp_acquire_user_lock_with_checks(lck, global_tid);
3254 
3255 #endif // KMP_USE_DYNAMIC_LOCK
3256 }
3257 
3258 // used in a critical section reduce block
3259 static __forceinline void
__kmp_end_critical_section_reduce_block(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * crit)3260 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3261                                         kmp_critical_name *crit) {
3262 
3263   kmp_user_lock_p lck;
3264 
3265 #if KMP_USE_DYNAMIC_LOCK
3266 
3267   if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3268     lck = (kmp_user_lock_p)crit;
3269     if (__kmp_env_consistency_check)
3270       __kmp_pop_sync(global_tid, ct_critical, loc);
3271     KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3272   } else {
3273     kmp_indirect_lock_t *ilk =
3274         (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3275     if (__kmp_env_consistency_check)
3276       __kmp_pop_sync(global_tid, ct_critical, loc);
3277     KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3278   }
3279 
3280 #else // KMP_USE_DYNAMIC_LOCK
3281 
3282   // We know that the fast reduction code is only emitted by Intel compilers
3283   // with 32 byte critical sections. If there isn't enough space, then we have
3284   // to use a pointer.
3285   if (__kmp_base_user_lock_size > 32) {
3286     lck = *((kmp_user_lock_p *)crit);
3287     KMP_ASSERT(lck != NULL);
3288   } else {
3289     lck = (kmp_user_lock_p)crit;
3290   }
3291 
3292   if (__kmp_env_consistency_check)
3293     __kmp_pop_sync(global_tid, ct_critical, loc);
3294 
3295   __kmp_release_user_lock_with_checks(lck, global_tid);
3296 
3297 #endif // KMP_USE_DYNAMIC_LOCK
3298 } // __kmp_end_critical_section_reduce_block
3299 
3300 #if OMP_40_ENABLED
3301 static __forceinline int
__kmp_swap_teams_for_teams_reduction(kmp_info_t * th,kmp_team_t ** team_p,int * task_state)3302 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3303                                      int *task_state) {
3304   kmp_team_t *team;
3305 
3306   // Check if we are inside the teams construct?
3307   if (th->th.th_teams_microtask) {
3308     *team_p = team = th->th.th_team;
3309     if (team->t.t_level == th->th.th_teams_level) {
3310       // This is reduction at teams construct.
3311       KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3312       // Let's swap teams temporarily for the reduction.
3313       th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3314       th->th.th_team = team->t.t_parent;
3315       th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3316       th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3317       *task_state = th->th.th_task_state;
3318       th->th.th_task_state = 0;
3319 
3320       return 1;
3321     }
3322   }
3323   return 0;
3324 }
3325 
3326 static __forceinline void
__kmp_restore_swapped_teams(kmp_info_t * th,kmp_team_t * team,int task_state)3327 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3328   // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3329   th->th.th_info.ds.ds_tid = 0;
3330   th->th.th_team = team;
3331   th->th.th_team_nproc = team->t.t_nproc;
3332   th->th.th_task_team = team->t.t_task_team[task_state];
3333   th->th.th_task_state = task_state;
3334 }
3335 #endif
3336 
3337 /* 2.a.i. Reduce Block without a terminating barrier */
3338 /*!
3339 @ingroup SYNCHRONIZATION
3340 @param loc source location information
3341 @param global_tid global thread number
3342 @param num_vars number of items (variables) to be reduced
3343 @param reduce_size size of data in bytes to be reduced
3344 @param reduce_data pointer to data to be reduced
3345 @param reduce_func callback function providing reduction operation on two
3346 operands and returning result of reduction in lhs_data
3347 @param lck pointer to the unique lock data structure
3348 @result 1 for the master thread, 0 for all other team threads, 2 for all team
3349 threads if atomic reduction needed
3350 
3351 The nowait version is used for a reduce clause with the nowait argument.
3352 */
3353 kmp_int32
__kmpc_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3354 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3355                      size_t reduce_size, void *reduce_data,
3356                      void (*reduce_func)(void *lhs_data, void *rhs_data),
3357                      kmp_critical_name *lck) {
3358 
3359   KMP_COUNT_BLOCK(REDUCE_nowait);
3360   int retval = 0;
3361   PACKED_REDUCTION_METHOD_T packed_reduction_method;
3362 #if OMP_40_ENABLED
3363   kmp_info_t *th;
3364   kmp_team_t *team;
3365   int teams_swapped = 0, task_state;
3366 #endif
3367   KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3368 
3369   // why do we need this initialization here at all?
3370   // Reduction clause can not be used as a stand-alone directive.
3371 
3372   // do not call __kmp_serial_initialize(), it will be called by
3373   // __kmp_parallel_initialize() if needed
3374   // possible detection of false-positive race by the threadchecker ???
3375   if (!TCR_4(__kmp_init_parallel))
3376     __kmp_parallel_initialize();
3377 
3378 // check correctness of reduce block nesting
3379 #if KMP_USE_DYNAMIC_LOCK
3380   if (__kmp_env_consistency_check)
3381     __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3382 #else
3383   if (__kmp_env_consistency_check)
3384     __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3385 #endif
3386 
3387 #if OMP_40_ENABLED
3388   th = __kmp_thread_from_gtid(global_tid);
3389   teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3390 #endif // OMP_40_ENABLED
3391 
3392   // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3393   // the value should be kept in a variable
3394   // the variable should be either a construct-specific or thread-specific
3395   // property, not a team specific property
3396   //     (a thread can reach the next reduce block on the next construct, reduce
3397   //     method may differ on the next construct)
3398   // an ident_t "loc" parameter could be used as a construct-specific property
3399   // (what if loc == 0?)
3400   //     (if both construct-specific and team-specific variables were shared,
3401   //     then unness extra syncs should be needed)
3402   // a thread-specific variable is better regarding two issues above (next
3403   // construct and extra syncs)
3404   // a thread-specific "th_local.reduction_method" variable is used currently
3405   // each thread executes 'determine' and 'set' lines (no need to execute by one
3406   // thread, to avoid unness extra syncs)
3407 
3408   packed_reduction_method = __kmp_determine_reduction_method(
3409       loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3410   __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3411 
3412   if (packed_reduction_method == critical_reduce_block) {
3413 
3414     __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3415     retval = 1;
3416 
3417   } else if (packed_reduction_method == empty_reduce_block) {
3418 
3419     // usage: if team size == 1, no synchronization is required ( Intel
3420     // platforms only )
3421     retval = 1;
3422 
3423   } else if (packed_reduction_method == atomic_reduce_block) {
3424 
3425     retval = 2;
3426 
3427     // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3428     // won't be called by the code gen)
3429     //     (it's not quite good, because the checking block has been closed by
3430     //     this 'pop',
3431     //      but atomic operation has not been executed yet, will be executed
3432     //      slightly later, literally on next instruction)
3433     if (__kmp_env_consistency_check)
3434       __kmp_pop_sync(global_tid, ct_reduce, loc);
3435 
3436   } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3437                                    tree_reduce_block)) {
3438 
3439 // AT: performance issue: a real barrier here
3440 // AT:     (if master goes slow, other threads are blocked here waiting for the
3441 // master to come and release them)
3442 // AT:     (it's not what a customer might expect specifying NOWAIT clause)
3443 // AT:     (specifying NOWAIT won't result in improvement of performance, it'll
3444 // be confusing to a customer)
3445 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3446 // might go faster and be more in line with sense of NOWAIT
3447 // AT: TO DO: do epcc test and compare times
3448 
3449 // this barrier should be invisible to a customer and to the threading profile
3450 // tool (it's neither a terminating barrier nor customer's code, it's
3451 // used for an internal purpose)
3452 #if OMPT_SUPPORT
3453     // JP: can this barrier potentially leed to task scheduling?
3454     // JP: as long as there is a barrier in the implementation, OMPT should and
3455     // will provide the barrier events
3456     //         so we set-up the necessary frame/return addresses.
3457     ompt_frame_t *ompt_frame;
3458     if (ompt_enabled.enabled) {
3459       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3460       if (ompt_frame->enter_frame.ptr == NULL)
3461         ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3462       OMPT_STORE_RETURN_ADDRESS(global_tid);
3463     }
3464 #endif
3465 #if USE_ITT_NOTIFY
3466     __kmp_threads[global_tid]->th.th_ident = loc;
3467 #endif
3468     retval =
3469         __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3470                       global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3471     retval = (retval != 0) ? (0) : (1);
3472 #if OMPT_SUPPORT && OMPT_OPTIONAL
3473     if (ompt_enabled.enabled) {
3474       ompt_frame->enter_frame = ompt_data_none;
3475     }
3476 #endif
3477 
3478     // all other workers except master should do this pop here
3479     //     ( none of other workers will get to __kmpc_end_reduce_nowait() )
3480     if (__kmp_env_consistency_check) {
3481       if (retval == 0) {
3482         __kmp_pop_sync(global_tid, ct_reduce, loc);
3483       }
3484     }
3485 
3486   } else {
3487 
3488     // should never reach this block
3489     KMP_ASSERT(0); // "unexpected method"
3490   }
3491 #if OMP_40_ENABLED
3492   if (teams_swapped) {
3493     __kmp_restore_swapped_teams(th, team, task_state);
3494   }
3495 #endif
3496   KA_TRACE(
3497       10,
3498       ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3499        global_tid, packed_reduction_method, retval));
3500 
3501   return retval;
3502 }
3503 
3504 /*!
3505 @ingroup SYNCHRONIZATION
3506 @param loc source location information
3507 @param global_tid global thread id.
3508 @param lck pointer to the unique lock data structure
3509 
3510 Finish the execution of a reduce nowait.
3511 */
__kmpc_end_reduce_nowait(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3512 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3513                               kmp_critical_name *lck) {
3514 
3515   PACKED_REDUCTION_METHOD_T packed_reduction_method;
3516 
3517   KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3518 
3519   packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3520 
3521   if (packed_reduction_method == critical_reduce_block) {
3522 
3523     __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3524 
3525   } else if (packed_reduction_method == empty_reduce_block) {
3526 
3527     // usage: if team size == 1, no synchronization is required ( on Intel
3528     // platforms only )
3529 
3530   } else if (packed_reduction_method == atomic_reduce_block) {
3531 
3532     // neither master nor other workers should get here
3533     //     (code gen does not generate this call in case 2: atomic reduce block)
3534     // actually it's better to remove this elseif at all;
3535     // after removal this value will checked by the 'else' and will assert
3536 
3537   } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3538                                    tree_reduce_block)) {
3539 
3540     // only master gets here
3541 
3542   } else {
3543 
3544     // should never reach this block
3545     KMP_ASSERT(0); // "unexpected method"
3546   }
3547 
3548   if (__kmp_env_consistency_check)
3549     __kmp_pop_sync(global_tid, ct_reduce, loc);
3550 
3551   KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3552                 global_tid, packed_reduction_method));
3553 
3554   return;
3555 }
3556 
3557 /* 2.a.ii. Reduce Block with a terminating barrier */
3558 
3559 /*!
3560 @ingroup SYNCHRONIZATION
3561 @param loc source location information
3562 @param global_tid global thread number
3563 @param num_vars number of items (variables) to be reduced
3564 @param reduce_size size of data in bytes to be reduced
3565 @param reduce_data pointer to data to be reduced
3566 @param reduce_func callback function providing reduction operation on two
3567 operands and returning result of reduction in lhs_data
3568 @param lck pointer to the unique lock data structure
3569 @result 1 for the master thread, 0 for all other team threads, 2 for all team
3570 threads if atomic reduction needed
3571 
3572 A blocking reduce that includes an implicit barrier.
3573 */
__kmpc_reduce(ident_t * loc,kmp_int32 global_tid,kmp_int32 num_vars,size_t reduce_size,void * reduce_data,void (* reduce_func)(void * lhs_data,void * rhs_data),kmp_critical_name * lck)3574 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3575                         size_t reduce_size, void *reduce_data,
3576                         void (*reduce_func)(void *lhs_data, void *rhs_data),
3577                         kmp_critical_name *lck) {
3578   KMP_COUNT_BLOCK(REDUCE_wait);
3579   int retval = 0;
3580   PACKED_REDUCTION_METHOD_T packed_reduction_method;
3581 #if OMP_40_ENABLED
3582   kmp_info_t *th;
3583   kmp_team_t *team;
3584   int teams_swapped = 0, task_state;
3585 #endif
3586 
3587   KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3588 
3589   // why do we need this initialization here at all?
3590   // Reduction clause can not be a stand-alone directive.
3591 
3592   // do not call __kmp_serial_initialize(), it will be called by
3593   // __kmp_parallel_initialize() if needed
3594   // possible detection of false-positive race by the threadchecker ???
3595   if (!TCR_4(__kmp_init_parallel))
3596     __kmp_parallel_initialize();
3597 
3598 // check correctness of reduce block nesting
3599 #if KMP_USE_DYNAMIC_LOCK
3600   if (__kmp_env_consistency_check)
3601     __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3602 #else
3603   if (__kmp_env_consistency_check)
3604     __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3605 #endif
3606 
3607 #if OMP_40_ENABLED
3608   th = __kmp_thread_from_gtid(global_tid);
3609   teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3610 #endif // OMP_40_ENABLED
3611 
3612   packed_reduction_method = __kmp_determine_reduction_method(
3613       loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3614   __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3615 
3616   if (packed_reduction_method == critical_reduce_block) {
3617 
3618     __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3619     retval = 1;
3620 
3621   } else if (packed_reduction_method == empty_reduce_block) {
3622 
3623     // usage: if team size == 1, no synchronization is required ( Intel
3624     // platforms only )
3625     retval = 1;
3626 
3627   } else if (packed_reduction_method == atomic_reduce_block) {
3628 
3629     retval = 2;
3630 
3631   } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3632                                    tree_reduce_block)) {
3633 
3634 // case tree_reduce_block:
3635 // this barrier should be visible to a customer and to the threading profile
3636 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3637 #if OMPT_SUPPORT
3638     ompt_frame_t *ompt_frame;
3639     if (ompt_enabled.enabled) {
3640       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3641       if (ompt_frame->enter_frame.ptr == NULL)
3642         ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3643       OMPT_STORE_RETURN_ADDRESS(global_tid);
3644     }
3645 #endif
3646 #if USE_ITT_NOTIFY
3647     __kmp_threads[global_tid]->th.th_ident =
3648         loc; // needed for correct notification of frames
3649 #endif
3650     retval =
3651         __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3652                       global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3653     retval = (retval != 0) ? (0) : (1);
3654 #if OMPT_SUPPORT && OMPT_OPTIONAL
3655     if (ompt_enabled.enabled) {
3656       ompt_frame->enter_frame = ompt_data_none;
3657     }
3658 #endif
3659 
3660     // all other workers except master should do this pop here
3661     // ( none of other workers except master will enter __kmpc_end_reduce() )
3662     if (__kmp_env_consistency_check) {
3663       if (retval == 0) { // 0: all other workers; 1: master
3664         __kmp_pop_sync(global_tid, ct_reduce, loc);
3665       }
3666     }
3667 
3668   } else {
3669 
3670     // should never reach this block
3671     KMP_ASSERT(0); // "unexpected method"
3672   }
3673 #if OMP_40_ENABLED
3674   if (teams_swapped) {
3675     __kmp_restore_swapped_teams(th, team, task_state);
3676   }
3677 #endif
3678 
3679   KA_TRACE(10,
3680            ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3681             global_tid, packed_reduction_method, retval));
3682 
3683   return retval;
3684 }
3685 
3686 /*!
3687 @ingroup SYNCHRONIZATION
3688 @param loc source location information
3689 @param global_tid global thread id.
3690 @param lck pointer to the unique lock data structure
3691 
3692 Finish the execution of a blocking reduce.
3693 The <tt>lck</tt> pointer must be the same as that used in the corresponding
3694 start function.
3695 */
__kmpc_end_reduce(ident_t * loc,kmp_int32 global_tid,kmp_critical_name * lck)3696 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3697                        kmp_critical_name *lck) {
3698 
3699   PACKED_REDUCTION_METHOD_T packed_reduction_method;
3700 #if OMP_40_ENABLED
3701   kmp_info_t *th;
3702   kmp_team_t *team;
3703   int teams_swapped = 0, task_state;
3704 #endif
3705 
3706   KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3707 
3708 #if OMP_40_ENABLED
3709   th = __kmp_thread_from_gtid(global_tid);
3710   teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3711 #endif // OMP_40_ENABLED
3712 
3713   packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3714 
3715   // this barrier should be visible to a customer and to the threading profile
3716   // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3717 
3718   if (packed_reduction_method == critical_reduce_block) {
3719 
3720     __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3721 
3722 // TODO: implicit barrier: should be exposed
3723 #if OMPT_SUPPORT
3724     ompt_frame_t *ompt_frame;
3725     if (ompt_enabled.enabled) {
3726       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3727       if (ompt_frame->enter_frame.ptr == NULL)
3728         ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3729       OMPT_STORE_RETURN_ADDRESS(global_tid);
3730     }
3731 #endif
3732 #if USE_ITT_NOTIFY
3733     __kmp_threads[global_tid]->th.th_ident = loc;
3734 #endif
3735     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3736 #if OMPT_SUPPORT && OMPT_OPTIONAL
3737     if (ompt_enabled.enabled) {
3738       ompt_frame->enter_frame = ompt_data_none;
3739     }
3740 #endif
3741 
3742   } else if (packed_reduction_method == empty_reduce_block) {
3743 
3744 // usage: if team size==1, no synchronization is required (Intel platforms only)
3745 
3746 // TODO: implicit barrier: should be exposed
3747 #if OMPT_SUPPORT
3748     ompt_frame_t *ompt_frame;
3749     if (ompt_enabled.enabled) {
3750       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3751       if (ompt_frame->enter_frame.ptr == NULL)
3752         ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3753       OMPT_STORE_RETURN_ADDRESS(global_tid);
3754     }
3755 #endif
3756 #if USE_ITT_NOTIFY
3757     __kmp_threads[global_tid]->th.th_ident = loc;
3758 #endif
3759     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3760 #if OMPT_SUPPORT && OMPT_OPTIONAL
3761     if (ompt_enabled.enabled) {
3762       ompt_frame->enter_frame = ompt_data_none;
3763     }
3764 #endif
3765 
3766   } else if (packed_reduction_method == atomic_reduce_block) {
3767 
3768 #if OMPT_SUPPORT
3769     ompt_frame_t *ompt_frame;
3770     if (ompt_enabled.enabled) {
3771       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3772       if (ompt_frame->enter_frame.ptr == NULL)
3773         ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3774       OMPT_STORE_RETURN_ADDRESS(global_tid);
3775     }
3776 #endif
3777 // TODO: implicit barrier: should be exposed
3778 #if USE_ITT_NOTIFY
3779     __kmp_threads[global_tid]->th.th_ident = loc;
3780 #endif
3781     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3782 #if OMPT_SUPPORT && OMPT_OPTIONAL
3783     if (ompt_enabled.enabled) {
3784       ompt_frame->enter_frame = ompt_data_none;
3785     }
3786 #endif
3787 
3788   } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3789                                    tree_reduce_block)) {
3790 
3791     // only master executes here (master releases all other workers)
3792     __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3793                             global_tid);
3794 
3795   } else {
3796 
3797     // should never reach this block
3798     KMP_ASSERT(0); // "unexpected method"
3799   }
3800 #if OMP_40_ENABLED
3801   if (teams_swapped) {
3802     __kmp_restore_swapped_teams(th, team, task_state);
3803   }
3804 #endif
3805 
3806   if (__kmp_env_consistency_check)
3807     __kmp_pop_sync(global_tid, ct_reduce, loc);
3808 
3809   KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3810                 global_tid, packed_reduction_method));
3811 
3812   return;
3813 }
3814 
3815 #undef __KMP_GET_REDUCTION_METHOD
3816 #undef __KMP_SET_REDUCTION_METHOD
3817 
3818 /* end of interface to fast scalable reduce routines */
3819 
__kmpc_get_taskid()3820 kmp_uint64 __kmpc_get_taskid() {
3821 
3822   kmp_int32 gtid;
3823   kmp_info_t *thread;
3824 
3825   gtid = __kmp_get_gtid();
3826   if (gtid < 0) {
3827     return 0;
3828   }
3829   thread = __kmp_thread_from_gtid(gtid);
3830   return thread->th.th_current_task->td_task_id;
3831 
3832 } // __kmpc_get_taskid
3833 
__kmpc_get_parent_taskid()3834 kmp_uint64 __kmpc_get_parent_taskid() {
3835 
3836   kmp_int32 gtid;
3837   kmp_info_t *thread;
3838   kmp_taskdata_t *parent_task;
3839 
3840   gtid = __kmp_get_gtid();
3841   if (gtid < 0) {
3842     return 0;
3843   }
3844   thread = __kmp_thread_from_gtid(gtid);
3845   parent_task = thread->th.th_current_task->td_parent;
3846   return (parent_task == NULL ? 0 : parent_task->td_task_id);
3847 
3848 } // __kmpc_get_parent_taskid
3849 
3850 #if OMP_45_ENABLED
3851 /*!
3852 @ingroup WORK_SHARING
3853 @param loc  source location information.
3854 @param gtid  global thread number.
3855 @param num_dims  number of associated doacross loops.
3856 @param dims  info on loops bounds.
3857 
3858 Initialize doacross loop information.
3859 Expect compiler send us inclusive bounds,
3860 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3861 */
__kmpc_doacross_init(ident_t * loc,int gtid,int num_dims,const struct kmp_dim * dims)3862 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3863                           const struct kmp_dim *dims) {
3864   int j, idx;
3865   kmp_int64 last, trace_count;
3866   kmp_info_t *th = __kmp_threads[gtid];
3867   kmp_team_t *team = th->th.th_team;
3868   kmp_uint32 *flags;
3869   kmp_disp_t *pr_buf = th->th.th_dispatch;
3870   dispatch_shared_info_t *sh_buf;
3871 
3872   KA_TRACE(
3873       20,
3874       ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3875        gtid, num_dims, !team->t.t_serialized));
3876   KMP_DEBUG_ASSERT(dims != NULL);
3877   KMP_DEBUG_ASSERT(num_dims > 0);
3878 
3879   if (team->t.t_serialized) {
3880     KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3881     return; // no dependencies if team is serialized
3882   }
3883   KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3884   idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3885   // the next loop
3886   sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3887 
3888   // Save bounds info into allocated private buffer
3889   KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3890   pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3891       th, sizeof(kmp_int64) * (4 * num_dims + 1));
3892   KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3893   pr_buf->th_doacross_info[0] =
3894       (kmp_int64)num_dims; // first element is number of dimensions
3895   // Save also address of num_done in order to access it later without knowing
3896   // the buffer index
3897   pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3898   pr_buf->th_doacross_info[2] = dims[0].lo;
3899   pr_buf->th_doacross_info[3] = dims[0].up;
3900   pr_buf->th_doacross_info[4] = dims[0].st;
3901   last = 5;
3902   for (j = 1; j < num_dims; ++j) {
3903     kmp_int64
3904         range_length; // To keep ranges of all dimensions but the first dims[0]
3905     if (dims[j].st == 1) { // most common case
3906       // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3907       range_length = dims[j].up - dims[j].lo + 1;
3908     } else {
3909       if (dims[j].st > 0) {
3910         KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3911         range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3912       } else { // negative increment
3913         KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3914         range_length =
3915             (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3916       }
3917     }
3918     pr_buf->th_doacross_info[last++] = range_length;
3919     pr_buf->th_doacross_info[last++] = dims[j].lo;
3920     pr_buf->th_doacross_info[last++] = dims[j].up;
3921     pr_buf->th_doacross_info[last++] = dims[j].st;
3922   }
3923 
3924   // Compute total trip count.
3925   // Start with range of dims[0] which we don't need to keep in the buffer.
3926   if (dims[0].st == 1) { // most common case
3927     trace_count = dims[0].up - dims[0].lo + 1;
3928   } else if (dims[0].st > 0) {
3929     KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3930     trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3931   } else { // negative increment
3932     KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3933     trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3934   }
3935   for (j = 1; j < num_dims; ++j) {
3936     trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3937   }
3938   KMP_DEBUG_ASSERT(trace_count > 0);
3939 
3940   // Check if shared buffer is not occupied by other loop (idx -
3941   // __kmp_dispatch_num_buffers)
3942   if (idx != sh_buf->doacross_buf_idx) {
3943     // Shared buffer is occupied, wait for it to be free
3944     __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3945                        __kmp_eq_4, NULL);
3946   }
3947 #if KMP_32_BIT_ARCH
3948   // Check if we are the first thread. After the CAS the first thread gets 0,
3949   // others get 1 if initialization is in progress, allocated pointer otherwise.
3950   // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3951   flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3952       (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3953 #else
3954   flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3955       (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3956 #endif
3957   if (flags == NULL) {
3958     // we are the first thread, allocate the array of flags
3959     size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3960     flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3961     KMP_MB();
3962     sh_buf->doacross_flags = flags;
3963   } else if (flags == (kmp_uint32 *)1) {
3964 #if KMP_32_BIT_ARCH
3965     // initialization is still in progress, need to wait
3966     while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3967 #else
3968     while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3969 #endif
3970       KMP_YIELD(TRUE);
3971     KMP_MB();
3972   } else {
3973     KMP_MB();
3974   }
3975   KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3976   pr_buf->th_doacross_flags =
3977       sh_buf->doacross_flags; // save private copy in order to not
3978   // touch shared buffer on each iteration
3979   KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3980 }
3981 
__kmpc_doacross_wait(ident_t * loc,int gtid,const kmp_int64 * vec)3982 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3983   kmp_int32 shft, num_dims, i;
3984   kmp_uint32 flag;
3985   kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3986   kmp_info_t *th = __kmp_threads[gtid];
3987   kmp_team_t *team = th->th.th_team;
3988   kmp_disp_t *pr_buf;
3989   kmp_int64 lo, up, st;
3990 
3991   KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3992   if (team->t.t_serialized) {
3993     KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3994     return; // no dependencies if team is serialized
3995   }
3996 
3997   // calculate sequential iteration number and check out-of-bounds condition
3998   pr_buf = th->th.th_dispatch;
3999   KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4000   num_dims = pr_buf->th_doacross_info[0];
4001   lo = pr_buf->th_doacross_info[2];
4002   up = pr_buf->th_doacross_info[3];
4003   st = pr_buf->th_doacross_info[4];
4004   if (st == 1) { // most common case
4005     if (vec[0] < lo || vec[0] > up) {
4006       KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4007                     "bounds [%lld,%lld]\n",
4008                     gtid, vec[0], lo, up));
4009       return;
4010     }
4011     iter_number = vec[0] - lo;
4012   } else if (st > 0) {
4013     if (vec[0] < lo || vec[0] > up) {
4014       KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4015                     "bounds [%lld,%lld]\n",
4016                     gtid, vec[0], lo, up));
4017       return;
4018     }
4019     iter_number = (kmp_uint64)(vec[0] - lo) / st;
4020   } else { // negative increment
4021     if (vec[0] > lo || vec[0] < up) {
4022       KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4023                     "bounds [%lld,%lld]\n",
4024                     gtid, vec[0], lo, up));
4025       return;
4026     }
4027     iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4028   }
4029   for (i = 1; i < num_dims; ++i) {
4030     kmp_int64 iter, ln;
4031     kmp_int32 j = i * 4;
4032     ln = pr_buf->th_doacross_info[j + 1];
4033     lo = pr_buf->th_doacross_info[j + 2];
4034     up = pr_buf->th_doacross_info[j + 3];
4035     st = pr_buf->th_doacross_info[j + 4];
4036     if (st == 1) {
4037       if (vec[i] < lo || vec[i] > up) {
4038         KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4039                       "bounds [%lld,%lld]\n",
4040                       gtid, vec[i], lo, up));
4041         return;
4042       }
4043       iter = vec[i] - lo;
4044     } else if (st > 0) {
4045       if (vec[i] < lo || vec[i] > up) {
4046         KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4047                       "bounds [%lld,%lld]\n",
4048                       gtid, vec[i], lo, up));
4049         return;
4050       }
4051       iter = (kmp_uint64)(vec[i] - lo) / st;
4052     } else { // st < 0
4053       if (vec[i] > lo || vec[i] < up) {
4054         KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4055                       "bounds [%lld,%lld]\n",
4056                       gtid, vec[i], lo, up));
4057         return;
4058       }
4059       iter = (kmp_uint64)(lo - vec[i]) / (-st);
4060     }
4061     iter_number = iter + ln * iter_number;
4062   }
4063   shft = iter_number % 32; // use 32-bit granularity
4064   iter_number >>= 5; // divided by 32
4065   flag = 1 << shft;
4066   while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4067     KMP_YIELD(TRUE);
4068   }
4069   KMP_MB();
4070   KA_TRACE(20,
4071            ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4072             gtid, (iter_number << 5) + shft));
4073 }
4074 
__kmpc_doacross_post(ident_t * loc,int gtid,const kmp_int64 * vec)4075 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4076   kmp_int32 shft, num_dims, i;
4077   kmp_uint32 flag;
4078   kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4079   kmp_info_t *th = __kmp_threads[gtid];
4080   kmp_team_t *team = th->th.th_team;
4081   kmp_disp_t *pr_buf;
4082   kmp_int64 lo, st;
4083 
4084   KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4085   if (team->t.t_serialized) {
4086     KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4087     return; // no dependencies if team is serialized
4088   }
4089 
4090   // calculate sequential iteration number (same as in "wait" but no
4091   // out-of-bounds checks)
4092   pr_buf = th->th.th_dispatch;
4093   KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4094   num_dims = pr_buf->th_doacross_info[0];
4095   lo = pr_buf->th_doacross_info[2];
4096   st = pr_buf->th_doacross_info[4];
4097   if (st == 1) { // most common case
4098     iter_number = vec[0] - lo;
4099   } else if (st > 0) {
4100     iter_number = (kmp_uint64)(vec[0] - lo) / st;
4101   } else { // negative increment
4102     iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4103   }
4104   for (i = 1; i < num_dims; ++i) {
4105     kmp_int64 iter, ln;
4106     kmp_int32 j = i * 4;
4107     ln = pr_buf->th_doacross_info[j + 1];
4108     lo = pr_buf->th_doacross_info[j + 2];
4109     st = pr_buf->th_doacross_info[j + 4];
4110     if (st == 1) {
4111       iter = vec[i] - lo;
4112     } else if (st > 0) {
4113       iter = (kmp_uint64)(vec[i] - lo) / st;
4114     } else { // st < 0
4115       iter = (kmp_uint64)(lo - vec[i]) / (-st);
4116     }
4117     iter_number = iter + ln * iter_number;
4118   }
4119   shft = iter_number % 32; // use 32-bit granularity
4120   iter_number >>= 5; // divided by 32
4121   flag = 1 << shft;
4122   KMP_MB();
4123   if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4124     KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4125   KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4126                 (iter_number << 5) + shft));
4127 }
4128 
__kmpc_doacross_fini(ident_t * loc,int gtid)4129 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4130   kmp_int32 num_done;
4131   kmp_info_t *th = __kmp_threads[gtid];
4132   kmp_team_t *team = th->th.th_team;
4133   kmp_disp_t *pr_buf = th->th.th_dispatch;
4134 
4135   KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4136   if (team->t.t_serialized) {
4137     KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4138     return; // nothing to do
4139   }
4140   num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4141   if (num_done == th->th.th_team_nproc) {
4142     // we are the last thread, need to free shared resources
4143     int idx = pr_buf->th_doacross_buf_idx - 1;
4144     dispatch_shared_info_t *sh_buf =
4145         &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4146     KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4147                      (kmp_int64)&sh_buf->doacross_num_done);
4148     KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4149     KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4150     __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4151     sh_buf->doacross_flags = NULL;
4152     sh_buf->doacross_num_done = 0;
4153     sh_buf->doacross_buf_idx +=
4154         __kmp_dispatch_num_buffers; // free buffer for future re-use
4155   }
4156   // free private resources (need to keep buffer index forever)
4157   pr_buf->th_doacross_flags = NULL;
4158   __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4159   pr_buf->th_doacross_info = NULL;
4160   KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4161 }
4162 #endif
4163 
4164 #if OMP_50_ENABLED
__kmpc_get_target_offload(void)4165 int __kmpc_get_target_offload(void) {
4166   if (!__kmp_init_serial) {
4167     __kmp_serial_initialize();
4168   }
4169   return __kmp_target_offload;
4170 }
4171 #endif // OMP_50_ENABLED
4172 
4173 // end of file //
4174