1 /* Copyright (C) 2017-2020 Free Software Foundation, Inc.
2    Contributed by Mentor Embedded.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file handles maintenance of threads on AMD GCN.  */
27 
28 #include "libgomp.h"
29 #include <stdlib.h>
30 #include <string.h>
31 
32 static void gomp_thread_start (struct gomp_thread_pool *);
33 
34 /* This externally visible function handles target region entry.  It
35    sets up a per-team thread pool and transfers control by returning to
36    the kernel in the master thread or gomp_thread_start in other threads.
37 
38    The name of this function is part of the interface with the compiler: for
39    each OpenMP kernel the compiler configures the stack, then calls here.
40 
41    Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue.  */
42 
43 void
gomp_gcn_enter_kernel(void)44 gomp_gcn_enter_kernel (void)
45 {
46   int threadid = __builtin_gcn_dim_pos (1);
47 
48   if (threadid == 0)
49     {
50       int numthreads = __builtin_gcn_dim_size (1);
51       int teamid = __builtin_gcn_dim_pos(0);
52 
53       /* Set up the global state.
54 	 Every team will do this, but that should be harmless.  */
55       gomp_global_icv.nthreads_var = 16;
56       gomp_global_icv.thread_limit_var = numthreads;
57       /* Starting additional threads is not supported.  */
58       gomp_global_icv.dyn_var = true;
59 
60       /* Initialize the team arena for optimized memory allocation.
61          The arena has been allocated on the host side, and the address
62          passed in via the kernargs.  Each team takes a small slice of it.  */
63       register void **kernargs asm("s8");
64       void *team_arena = (kernargs[4] + TEAM_ARENA_SIZE*teamid);
65       void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START;
66       void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE;
67       void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END;
68       *arena_start = team_arena;
69       *arena_free = team_arena;
70       *arena_end = team_arena + TEAM_ARENA_SIZE;
71 
72       /* Allocate and initialize the team-local-storage data.  */
73       struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs)
74 						      * numthreads);
75       set_gcn_thrs (thrs);
76 
77       /* Allocate and initialize a pool of threads in the team.
78          The threads are already running, of course, we just need to manage
79          the communication between them.  */
80       struct gomp_thread_pool *pool = team_malloc (sizeof (*pool));
81       pool->threads = team_malloc (sizeof (void *) * numthreads);
82       for (int tid = 0; tid < numthreads; tid++)
83 	pool->threads[tid] = &thrs[tid];
84       pool->threads_size = numthreads;
85       pool->threads_used = numthreads;
86       pool->threads_busy = 1;
87       pool->last_team = NULL;
88       gomp_simple_barrier_init (&pool->threads_dock, numthreads);
89       thrs->thread_pool = pool;
90 
91       asm ("s_barrier" ::: "memory");
92       return;  /* Return to kernel.  */
93     }
94   else
95     {
96       asm ("s_barrier" ::: "memory");
97       gomp_thread_start (gcn_thrs ()[0].thread_pool);
98       /* gomp_thread_start does not return.  */
99     }
100 }
101 
102 void
gomp_gcn_exit_kernel(void)103 gomp_gcn_exit_kernel (void)
104 {
105   gomp_free_thread (gcn_thrs ());
106   team_free (gcn_thrs ());
107 }
108 
109 /* This function contains the idle loop in which a thread waits
110    to be called up to become part of a team.  */
111 
112 static void
gomp_thread_start(struct gomp_thread_pool * pool)113 gomp_thread_start (struct gomp_thread_pool *pool)
114 {
115   struct gomp_thread *thr = gomp_thread ();
116 
117   gomp_sem_init (&thr->release, 0);
118   thr->thread_pool = pool;
119 
120   /* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
121      which contains "s_endpgm", or an infinite no-op loop is
122      suspected (this happens when the thread master crashes).  */
123   int nul_limit = 99;
124   do
125     {
126       gomp_simple_barrier_wait (&pool->threads_dock);
127       if (!thr->fn)
128 	{
129 	  if (nul_limit-- > 0)
130 	    continue;
131 	  else
132 	    {
133 	      const char msg[] = ("team master not responding;"
134 				  " slave thread aborting");
135 	      write (2, msg, sizeof (msg)-1);
136 	      abort();
137 	    }
138 	}
139       thr->fn (thr->data);
140       thr->fn = NULL;
141 
142       struct gomp_task *task = thr->task;
143       gomp_team_barrier_wait_final (&thr->ts.team->barrier);
144       gomp_finish_task (task);
145     }
146   while (1);
147 }
148 
149 /* Launch a team.  */
150 
151 void
gomp_team_start(void (* fn)(void *),void * data,unsigned nthreads,unsigned flags,struct gomp_team * team,struct gomp_taskgroup * taskgroup)152 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
153 		 unsigned flags, struct gomp_team *team,
154 		 struct gomp_taskgroup *taskgroup)
155 {
156   struct gomp_thread *thr, *nthr;
157   struct gomp_task *task;
158   struct gomp_task_icv *icv;
159   struct gomp_thread_pool *pool;
160   unsigned long nthreads_var;
161 
162   thr = gomp_thread ();
163   pool = thr->thread_pool;
164   task = thr->task;
165   icv = task ? &task->icv : &gomp_global_icv;
166 
167   /* Always save the previous state, even if this isn't a nested team.
168      In particular, we should save any work share state from an outer
169      orphaned work share construct.  */
170   team->prev_ts = thr->ts;
171 
172   thr->ts.team = team;
173   thr->ts.team_id = 0;
174   ++thr->ts.level;
175   if (nthreads > 1)
176     ++thr->ts.active_level;
177   thr->ts.work_share = &team->work_shares[0];
178   thr->ts.last_work_share = NULL;
179   thr->ts.single_count = 0;
180   thr->ts.static_trip = 0;
181   thr->task = &team->implicit_task[0];
182   nthreads_var = icv->nthreads_var;
183   gomp_init_task (thr->task, task, icv);
184   team->implicit_task[0].icv.nthreads_var = nthreads_var;
185   team->implicit_task[0].taskgroup = taskgroup;
186 
187   if (nthreads == 1)
188     return;
189 
190   /* Release existing idle threads.  */
191   for (unsigned i = 1; i < nthreads; ++i)
192     {
193       nthr = pool->threads[i];
194       nthr->ts.team = team;
195       nthr->ts.work_share = &team->work_shares[0];
196       nthr->ts.last_work_share = NULL;
197       nthr->ts.team_id = i;
198       nthr->ts.level = team->prev_ts.level + 1;
199       nthr->ts.active_level = thr->ts.active_level;
200       nthr->ts.single_count = 0;
201       nthr->ts.static_trip = 0;
202       nthr->task = &team->implicit_task[i];
203       gomp_init_task (nthr->task, task, icv);
204       team->implicit_task[i].icv.nthreads_var = nthreads_var;
205       team->implicit_task[i].taskgroup = taskgroup;
206       nthr->fn = fn;
207       nthr->data = data;
208       team->ordered_release[i] = &nthr->release;
209     }
210 
211   gomp_simple_barrier_wait (&pool->threads_dock);
212 }
213 
214 #include "../../team.c"
215