1 /* Copyright (C) 2015-2020 Free Software Foundation, Inc.
2    Contributed by Alexander Monakov <amonakov@ispras.ru>
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This is an NVPTX specific implementation of a barrier synchronization
27    mechanism for libgomp.  This type is private to the library.  This
28    implementation uses atomic instructions and bar.sync instruction.  */
29 
30 #include <limits.h>
31 #include "libgomp.h"
32 
33 
34 void
gomp_barrier_wait_end(gomp_barrier_t * bar,gomp_barrier_state_t state)35 gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
36 {
37   if (__builtin_expect (state & BAR_WAS_LAST, 0))
38     {
39       /* Next time we'll be awaiting TOTAL threads again.  */
40       bar->awaited = bar->total;
41       __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
42 			MEMMODEL_RELEASE);
43     }
44   asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
45 }
46 
47 void
gomp_barrier_wait(gomp_barrier_t * bar)48 gomp_barrier_wait (gomp_barrier_t *bar)
49 {
50   gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
51 }
52 
53 /* Like gomp_barrier_wait, except that if the encountering thread
54    is not the last one to hit the barrier, it returns immediately.
55    The intended usage is that a thread which intends to gomp_barrier_destroy
56    this barrier calls gomp_barrier_wait, while all other threads
57    call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
58    the barrier can be safely destroyed.  */
59 
60 void
gomp_barrier_wait_last(gomp_barrier_t * bar)61 gomp_barrier_wait_last (gomp_barrier_t *bar)
62 {
63   /* Deferring to gomp_barrier_wait does not use the optimization opportunity
64      allowed by the interface contract for all-but-last participants.  The
65      original implementation in config/linux/bar.c handles this better.  */
66   gomp_barrier_wait (bar);
67 }
68 
69 void
gomp_team_barrier_wake(gomp_barrier_t * bar,int count)70 gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
71 {
72   asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
73 }
74 
75 void
gomp_team_barrier_wait_end(gomp_barrier_t * bar,gomp_barrier_state_t state)76 gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
77 {
78   unsigned int generation, gen;
79 
80   if (__builtin_expect (state & BAR_WAS_LAST, 0))
81     {
82       /* Next time we'll be awaiting TOTAL threads again.  */
83       struct gomp_thread *thr = gomp_thread ();
84       struct gomp_team *team = thr->ts.team;
85 
86       bar->awaited = bar->total;
87       team->work_share_cancelled = 0;
88       if (__builtin_expect (team->task_count, 0))
89 	{
90 	  gomp_barrier_handle_tasks (state);
91 	  state &= ~BAR_WAS_LAST;
92 	}
93       else
94 	{
95 	  state &= ~BAR_CANCELLED;
96 	  state += BAR_INCR - BAR_WAS_LAST;
97 	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
98 	  asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
99 	  return;
100 	}
101     }
102 
103   generation = state;
104   state &= ~BAR_CANCELLED;
105   do
106     {
107       asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
108       gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
109       if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
110 	{
111 	  gomp_barrier_handle_tasks (state);
112 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
113 	}
114       generation |= gen & BAR_WAITING_FOR_TASK;
115     }
116   while (gen != state + BAR_INCR);
117 }
118 
119 void
gomp_team_barrier_wait(gomp_barrier_t * bar)120 gomp_team_barrier_wait (gomp_barrier_t *bar)
121 {
122   gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
123 }
124 
125 void
gomp_team_barrier_wait_final(gomp_barrier_t * bar)126 gomp_team_barrier_wait_final (gomp_barrier_t *bar)
127 {
128   gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
129   if (__builtin_expect (state & BAR_WAS_LAST, 0))
130     bar->awaited_final = bar->total;
131   gomp_team_barrier_wait_end (bar, state);
132 }
133 
134 bool
gomp_team_barrier_wait_cancel_end(gomp_barrier_t * bar,gomp_barrier_state_t state)135 gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
136 				   gomp_barrier_state_t state)
137 {
138   unsigned int generation, gen;
139 
140   if (__builtin_expect (state & BAR_WAS_LAST, 0))
141     {
142       /* Next time we'll be awaiting TOTAL threads again.  */
143       /* BAR_CANCELLED should never be set in state here, because
144 	 cancellation means that at least one of the threads has been
145 	 cancelled, thus on a cancellable barrier we should never see
146 	 all threads to arrive.  */
147       struct gomp_thread *thr = gomp_thread ();
148       struct gomp_team *team = thr->ts.team;
149 
150       bar->awaited = bar->total;
151       team->work_share_cancelled = 0;
152       if (__builtin_expect (team->task_count, 0))
153 	{
154 	  gomp_barrier_handle_tasks (state);
155 	  state &= ~BAR_WAS_LAST;
156 	}
157       else
158 	{
159 	  state += BAR_INCR - BAR_WAS_LAST;
160 	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELEASE);
161 	  asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
162 	  return false;
163 	}
164     }
165 
166   if (__builtin_expect (state & BAR_CANCELLED, 0))
167     return true;
168 
169   generation = state;
170   do
171     {
172       asm ("bar.sync 1, %0;" : : "r" (32 * bar->total));
173       gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
174       if (__builtin_expect (gen & BAR_CANCELLED, 0))
175 	return true;
176       if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
177 	{
178 	  gomp_barrier_handle_tasks (state);
179 	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
180 	}
181       generation |= gen & BAR_WAITING_FOR_TASK;
182     }
183   while (gen != state + BAR_INCR);
184 
185   return false;
186 }
187 
188 bool
gomp_team_barrier_wait_cancel(gomp_barrier_t * bar)189 gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
190 {
191   return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
192 }
193 
194 void
gomp_team_barrier_cancel(struct gomp_team * team)195 gomp_team_barrier_cancel (struct gomp_team *team)
196 {
197   gomp_mutex_lock (&team->task_lock);
198   if (team->barrier.generation & BAR_CANCELLED)
199     {
200       gomp_mutex_unlock (&team->task_lock);
201       return;
202     }
203   team->barrier.generation |= BAR_CANCELLED;
204   gomp_mutex_unlock (&team->task_lock);
205   gomp_team_barrier_wake (&team->barrier, INT_MAX);
206 }
207