1 /* Copyright (C) 2015-2021 Free Software Foundation, Inc.
2    Contributed by Alexander Monakov <amonakov@ispras.ru>
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This is an NVPTX specific implementation of a barrier synchronization
27    mechanism for libgomp.  This type is private to the library.  This
28    implementation uses atomic instructions and bar.sync instruction.  */
29 
30 #include <limits.h>
31 #include "libgomp.h"
32 
33 
34 void
gomp_barrier_wait_end(gomp_barrier_t * bar,gomp_barrier_state_t state)35 gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
36 {
37   if (__builtin_expect (state & BAR_WAS_LAST, 0))
38     {
39       /* Next time we'll be awaiting TOTAL threads again.  */
40       bar->awaited = bar->total;
41       __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
42 			MEMMODEL_RELEASE);
43     }
44   if (bar->total > 1)
45     asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
46 }
47 
48 void
gomp_barrier_wait(gomp_barrier_t * bar)49 gomp_barrier_wait (gomp_barrier_t *bar)
50 {
51   gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
52 }
53 
54 /* Like gomp_barrier_wait, except that if the encountering thread
55    is not the last one to hit the barrier, it returns immediately.
56    The intended usage is that a thread which intends to gomp_barrier_destroy
57    this barrier calls gomp_barrier_wait, while all other threads
58    call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
59    the barrier can be safely destroyed.  */
60 
61 void
gomp_barrier_wait_last(gomp_barrier_t * bar)62 gomp_barrier_wait_last (gomp_barrier_t *bar)
63 {
64   /* Deferring to gomp_barrier_wait does not use the optimization opportunity
65      allowed by the interface contract for all-but-last participants.  The
66      original implementation in config/linux/bar.c handles this better.  */
67   gomp_barrier_wait (bar);
68 }
69 
70 void
gomp_team_barrier_wake(gomp_barrier_t * bar,int count)71 gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
72 {
73   if (bar->total > 1)
74     asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
75 }
76 
77 void
gomp_team_barrier_wait_end(gomp_barrier_t * bar,gomp_barrier_state_t state)78 gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
79 {
80   unsigned int generation, gen;
81 
82   if (__builtin_expect (state & BAR_WAS_LAST, 0))
83     {
84       /* Next time we'll be awaiting TOTAL threads again.  */
85       struct gomp_thread *thr = gomp_thread ();
86       struct gomp_team *team = thr->ts.team;
87 
88       bar->awaited = bar->total;
89       team->work_share_cancelled = 0;
90       if (__builtin_expect (team->task_count, 0))
91 	{
92 	  gomp_barrier_handle_tasks (state);
93 	  state &= ~BAR_WAS_LAST;
94 	}
95       else
96 	{
97 	  state &= ~BAR_CANCELLED;
98 	  state += BAR_INCR - BAR_WAS_LAST;
99 	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
100 	  if (bar->total > 1)
101 	    asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
102 	  return;
103 	}
104     }
105 
106   generation = state;
107   state &= ~BAR_CANCELLED;
108   do
109     {
110       if (bar->total > 1)
111 	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
112       gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
113       if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
114 	{
115 	  gomp_barrier_handle_tasks (state);
116 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
117 	}
118       generation |= gen & BAR_WAITING_FOR_TASK;
119     }
120   while (gen != state + BAR_INCR);
121 }
122 
123 void
gomp_team_barrier_wait(gomp_barrier_t * bar)124 gomp_team_barrier_wait (gomp_barrier_t *bar)
125 {
126   gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
127 }
128 
129 void
gomp_team_barrier_wait_final(gomp_barrier_t * bar)130 gomp_team_barrier_wait_final (gomp_barrier_t *bar)
131 {
132   gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
133   if (__builtin_expect (state & BAR_WAS_LAST, 0))
134     bar->awaited_final = bar->total;
135   gomp_team_barrier_wait_end (bar, state);
136 }
137 
138 bool
gomp_team_barrier_wait_cancel_end(gomp_barrier_t * bar,gomp_barrier_state_t state)139 gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
140 				   gomp_barrier_state_t state)
141 {
142   unsigned int generation, gen;
143 
144   if (__builtin_expect (state & BAR_WAS_LAST, 0))
145     {
146       /* Next time we'll be awaiting TOTAL threads again.  */
147       /* BAR_CANCELLED should never be set in state here, because
148 	 cancellation means that at least one of the threads has been
149 	 cancelled, thus on a cancellable barrier we should never see
150 	 all threads to arrive.  */
151       struct gomp_thread *thr = gomp_thread ();
152       struct gomp_team *team = thr->ts.team;
153 
154       bar->awaited = bar->total;
155       team->work_share_cancelled = 0;
156       if (__builtin_expect (team->task_count, 0))
157 	{
158 	  gomp_barrier_handle_tasks (state);
159 	  state &= ~BAR_WAS_LAST;
160 	}
161       else
162 	{
163 	  state += BAR_INCR - BAR_WAS_LAST;
164 	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
165 	  if (bar->total > 1)
166 	    asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
167 	  return false;
168 	}
169     }
170 
171   if (__builtin_expect (state & BAR_CANCELLED, 0))
172     return true;
173 
174   generation = state;
175   do
176     {
177       if (bar->total > 1)
178 	asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
179       gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
180       if (__builtin_expect (gen & BAR_CANCELLED, 0))
181 	return true;
182       if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
183 	{
184 	  gomp_barrier_handle_tasks (state);
185 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
186 	}
187       generation |= gen & BAR_WAITING_FOR_TASK;
188     }
189   while (gen != state + BAR_INCR);
190 
191   return false;
192 }
193 
194 bool
gomp_team_barrier_wait_cancel(gomp_barrier_t * bar)195 gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
196 {
197   return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
198 }
199 
200 void
gomp_team_barrier_cancel(struct gomp_team * team)201 gomp_team_barrier_cancel (struct gomp_team *team)
202 {
203   gomp_mutex_lock (&team->task_lock);
204   if (team->barrier.generation & BAR_CANCELLED)
205     {
206       gomp_mutex_unlock (&team->task_lock);
207       return;
208     }
209   team->barrier.generation |= BAR_CANCELLED;
210   gomp_mutex_unlock (&team->task_lock);
211   gomp_team_barrier_wake (&team->barrier, INT_MAX);
212 }
213