1 //===------------- task.h - NVPTX OpenMP tasks support ----------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Task implementation support.
10 //
11 //  explicit task structure uses
12 //  omptarget_nvptx task
13 //  kmp_task
14 //
15 //  where kmp_task is
16 //    - klegacy_TaskDescr    <- task pointer
17 //        shared -> X
18 //        routine
19 //        part_id
20 //        descr
21 //    -  private (of size given by task_alloc call). Accessed by
22 //       task+sizeof(klegacy_TaskDescr)
23 //        * private data *
24 //    - shared: X. Accessed by shared ptr in klegacy_TaskDescr
25 //        * pointer table to shared variables *
26 //    - end
27 //
28 //===----------------------------------------------------------------------===//
29 #pragma omp declare target
30 
31 #include "common/omptarget.h"
32 
__kmpc_omp_task_alloc(kmp_Ident * loc,uint32_t global_tid,int32_t flag,size_t sizeOfTaskInclPrivate,size_t sizeOfSharedTable,kmp_TaskFctPtr taskSub)33 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
34     kmp_Ident *loc,     // unused
35     uint32_t global_tid, // unused
36     int32_t flag, // unused (because in our impl, all are immediately exec
37     size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
38     kmp_TaskFctPtr taskSub) {
39   PRINT(LD_IO,
40         "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
41         "fct 0x%llx)\n",
42         (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
43         (unsigned long long)taskSub);
44   // want task+priv to be a multiple of 8 bytes
45   size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
46   sizeOfTaskInclPrivate += padForTaskInclPriv;
47   size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
48   ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
49          "need task descr of size %d to be a multiple of %d\n",
50          (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
51   size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
52   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
53       (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
54           totSize, "explicit task descriptor");
55   kmp_TaskDescr *newKmpTaskDescr = &newExplicitTaskDescr->kmpTaskDescr;
56   ASSERT0(LT_FUSSY,
57           (uint64_t)newKmpTaskDescr ==
58               (uint64_t)ADD_BYTES(newExplicitTaskDescr,
59                                   sizeof(omptarget_nvptx_TaskDescr)),
60           "bad size assumptions");
61   // init kmp_TaskDescr
62   newKmpTaskDescr->sharedPointerTable =
63       (void *)((char *)newKmpTaskDescr + sizeOfTaskInclPrivate);
64   newKmpTaskDescr->sub = taskSub;
65   newKmpTaskDescr->destructors = NULL;
66   PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
67         (unsigned long long)newKmpTaskDescr,
68         (unsigned long long)newExplicitTaskDescr);
69 
70   return newKmpTaskDescr;
71 }
72 
__kmpc_omp_task(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)73 EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
74                                kmp_TaskDescr *newKmpTaskDescr) {
75   return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
76                                    0);
77 }
78 
__kmpc_omp_task_with_deps(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)79 EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
80                                          kmp_TaskDescr *newKmpTaskDescr,
81                                          int32_t depNum, void *depList,
82                                          int32_t noAliasDepNum,
83                                          void *noAliasDepList) {
84   PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
85         P64(newKmpTaskDescr));
86   ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
87           "Runtime must be initialized.");
88   // 1. get explicit task descr from kmp task descr
89   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
90       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
91           newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
92   ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
93           "bad assumptions");
94   omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
95   ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
96           "bad assumptions");
97 
98   // 2. push new context: update new task descriptor
99   int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
100   omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
101   newTaskDescr->CopyForExplicitTask(parentTaskDescr);
102   // set new task descriptor as top
103   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
104 
105   // 3. call sub
106   PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
107         (unsigned long long)newKmpTaskDescr->sub,
108         (unsigned long long)newKmpTaskDescr);
109   newKmpTaskDescr->sub(0, newKmpTaskDescr);
110   PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
111         (unsigned long long)newKmpTaskDescr->sub);
112 
113   // 4. pop context
114   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
115                                                              parentTaskDescr);
116   // 5. free
117   SafeFree(newExplicitTaskDescr, "explicit task descriptor");
118   return 0;
119 }
120 
__kmpc_omp_task_begin_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)121 EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
122                                       kmp_TaskDescr *newKmpTaskDescr) {
123   PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
124         (unsigned long long)newKmpTaskDescr);
125   ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
126           "Runtime must be initialized.");
127   // 1. get explicit task descr from kmp task descr
128   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
129       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
130           newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
131   ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
132           "bad assumptions");
133   omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
134   ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
135           "bad assumptions");
136 
137   // 2. push new context: update new task descriptor
138   int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
139   omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
140   newTaskDescr->CopyForExplicitTask(parentTaskDescr);
141   // set new task descriptor as top
142   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
143   // 3... noting to call... is inline
144   // 4 & 5 ... done in complete
145 }
146 
__kmpc_omp_task_complete_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)147 EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
148                                          kmp_TaskDescr *newKmpTaskDescr) {
149   PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
150         (unsigned long long)newKmpTaskDescr);
151   ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
152           "Runtime must be initialized.");
153   // 1. get explicit task descr from kmp task descr
154   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
155       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
156           newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
157   ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
158           "bad assumptions");
159   omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
160   ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
161           "bad assumptions");
162   // 2. get parent
163   omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
164   // 3... noting to call... is inline
165   // 4. pop context
166   int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
167   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
168                                                              parentTaskDescr);
169   // 5. free
170   SafeFree(newExplicitTaskDescr, "explicit task descriptor");
171 }
172 
__kmpc_omp_wait_deps(kmp_Ident * loc,uint32_t global_tid,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)173 EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
174                                  int32_t depNum, void *depList,
175                                  int32_t noAliasDepNum, void *noAliasDepList) {
176   PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
177   // nothing to do as all our tasks are executed as final
178 }
179 
__kmpc_taskgroup(kmp_Ident * loc,uint32_t global_tid)180 EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
181   PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
182   // nothing to do as all our tasks are executed as final
183 }
184 
__kmpc_end_taskgroup(kmp_Ident * loc,uint32_t global_tid)185 EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
186   PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
187   // nothing to do as all our tasks are executed as final
188 }
189 
__kmpc_omp_taskyield(kmp_Ident * loc,uint32_t global_tid,int end_part)190 EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
191                                     int end_part) {
192   PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
193   // do nothing: tasks are executed immediately, no yielding allowed
194   return 0;
195 }
196 
__kmpc_omp_taskwait(kmp_Ident * loc,uint32_t global_tid)197 EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
198   PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
199   // nothing to do as all our tasks are executed as final
200   return 0;
201 }
202 
__kmpc_taskloop(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int if_val,uint64_t * lb,uint64_t * ub,int64_t st,int nogroup,int32_t sched,uint64_t grainsize,void * task_dup)203 EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
204                             kmp_TaskDescr *newKmpTaskDescr, int if_val,
205                             uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
206                             int32_t sched, uint64_t grainsize, void *task_dup) {
207 
208   // skip task entirely if empty iteration space
209   if (*lb > *ub)
210     return;
211 
212   // the compiler has already stored lb and ub in the kmp_TaskDescr structure
213   // as we are using a single task to execute the entire loop, we can leave
214   // the initial task_t untouched
215 
216   __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0);
217 }
218 
219 #pragma omp end declare target
220