1 //===------------- task.h - NVPTX OpenMP tasks support ----------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Task implementation support.
10 //
11 // explicit task structure uses
12 // omptarget_nvptx task
13 // kmp_task
14 //
15 // where kmp_task is
16 // - klegacy_TaskDescr <- task pointer
17 // shared -> X
18 // routine
19 // part_id
20 // descr
21 // - private (of size given by task_alloc call). Accessed by
22 // task+sizeof(klegacy_TaskDescr)
23 // * private data *
24 // - shared: X. Accessed by shared ptr in klegacy_TaskDescr
25 // * pointer table to shared variables *
26 // - end
27 //
28 //===----------------------------------------------------------------------===//
29 #pragma omp declare target
30
31 #include "common/omptarget.h"
32
__kmpc_omp_task_alloc(kmp_Ident * loc,uint32_t global_tid,int32_t flag,size_t sizeOfTaskInclPrivate,size_t sizeOfSharedTable,kmp_TaskFctPtr taskSub)33 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
34 kmp_Ident *loc, // unused
35 uint32_t global_tid, // unused
36 int32_t flag, // unused (because in our impl, all are immediately exec
37 size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
38 kmp_TaskFctPtr taskSub) {
39 PRINT(LD_IO,
40 "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
41 "fct 0x%llx)\n",
42 (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
43 (unsigned long long)taskSub);
44 // want task+priv to be a multiple of 8 bytes
45 size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
46 sizeOfTaskInclPrivate += padForTaskInclPriv;
47 size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
48 ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
49 "need task descr of size %d to be a multiple of %d\n",
50 (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
51 size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
52 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
53 (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
54 totSize, "explicit task descriptor");
55 kmp_TaskDescr *newKmpTaskDescr = &newExplicitTaskDescr->kmpTaskDescr;
56 ASSERT0(LT_FUSSY,
57 (uint64_t)newKmpTaskDescr ==
58 (uint64_t)ADD_BYTES(newExplicitTaskDescr,
59 sizeof(omptarget_nvptx_TaskDescr)),
60 "bad size assumptions");
61 // init kmp_TaskDescr
62 newKmpTaskDescr->sharedPointerTable =
63 (void *)((char *)newKmpTaskDescr + sizeOfTaskInclPrivate);
64 newKmpTaskDescr->sub = taskSub;
65 newKmpTaskDescr->destructors = NULL;
66 PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
67 (unsigned long long)newKmpTaskDescr,
68 (unsigned long long)newExplicitTaskDescr);
69
70 return newKmpTaskDescr;
71 }
72
__kmpc_omp_task(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)73 EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
74 kmp_TaskDescr *newKmpTaskDescr) {
75 return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
76 0);
77 }
78
__kmpc_omp_task_with_deps(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)79 EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
80 kmp_TaskDescr *newKmpTaskDescr,
81 int32_t depNum, void *depList,
82 int32_t noAliasDepNum,
83 void *noAliasDepList) {
84 PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
85 P64(newKmpTaskDescr));
86 ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
87 "Runtime must be initialized.");
88 // 1. get explicit task descr from kmp task descr
89 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
90 (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
91 newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
92 ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
93 "bad assumptions");
94 omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
95 ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
96 "bad assumptions");
97
98 // 2. push new context: update new task descriptor
99 int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
100 omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
101 newTaskDescr->CopyForExplicitTask(parentTaskDescr);
102 // set new task descriptor as top
103 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
104
105 // 3. call sub
106 PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
107 (unsigned long long)newKmpTaskDescr->sub,
108 (unsigned long long)newKmpTaskDescr);
109 newKmpTaskDescr->sub(0, newKmpTaskDescr);
110 PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
111 (unsigned long long)newKmpTaskDescr->sub);
112
113 // 4. pop context
114 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
115 parentTaskDescr);
116 // 5. free
117 SafeFree(newExplicitTaskDescr, "explicit task descriptor");
118 return 0;
119 }
120
__kmpc_omp_task_begin_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)121 EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
122 kmp_TaskDescr *newKmpTaskDescr) {
123 PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
124 (unsigned long long)newKmpTaskDescr);
125 ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
126 "Runtime must be initialized.");
127 // 1. get explicit task descr from kmp task descr
128 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
129 (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
130 newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
131 ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
132 "bad assumptions");
133 omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
134 ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
135 "bad assumptions");
136
137 // 2. push new context: update new task descriptor
138 int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
139 omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
140 newTaskDescr->CopyForExplicitTask(parentTaskDescr);
141 // set new task descriptor as top
142 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
143 // 3... noting to call... is inline
144 // 4 & 5 ... done in complete
145 }
146
__kmpc_omp_task_complete_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)147 EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
148 kmp_TaskDescr *newKmpTaskDescr) {
149 PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
150 (unsigned long long)newKmpTaskDescr);
151 ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
152 "Runtime must be initialized.");
153 // 1. get explicit task descr from kmp task descr
154 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
155 (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
156 newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
157 ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
158 "bad assumptions");
159 omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
160 ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
161 "bad assumptions");
162 // 2. get parent
163 omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
164 // 3... noting to call... is inline
165 // 4. pop context
166 int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
167 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
168 parentTaskDescr);
169 // 5. free
170 SafeFree(newExplicitTaskDescr, "explicit task descriptor");
171 }
172
__kmpc_omp_wait_deps(kmp_Ident * loc,uint32_t global_tid,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)173 EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
174 int32_t depNum, void *depList,
175 int32_t noAliasDepNum, void *noAliasDepList) {
176 PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
177 // nothing to do as all our tasks are executed as final
178 }
179
__kmpc_taskgroup(kmp_Ident * loc,uint32_t global_tid)180 EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
181 PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
182 // nothing to do as all our tasks are executed as final
183 }
184
__kmpc_end_taskgroup(kmp_Ident * loc,uint32_t global_tid)185 EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
186 PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
187 // nothing to do as all our tasks are executed as final
188 }
189
__kmpc_omp_taskyield(kmp_Ident * loc,uint32_t global_tid,int end_part)190 EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
191 int end_part) {
192 PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
193 // do nothing: tasks are executed immediately, no yielding allowed
194 return 0;
195 }
196
__kmpc_omp_taskwait(kmp_Ident * loc,uint32_t global_tid)197 EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
198 PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
199 // nothing to do as all our tasks are executed as final
200 return 0;
201 }
202
__kmpc_taskloop(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int if_val,uint64_t * lb,uint64_t * ub,int64_t st,int nogroup,int32_t sched,uint64_t grainsize,void * task_dup)203 EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
204 kmp_TaskDescr *newKmpTaskDescr, int if_val,
205 uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
206 int32_t sched, uint64_t grainsize, void *task_dup) {
207
208 // skip task entirely if empty iteration space
209 if (*lb > *ub)
210 return;
211
212 // the compiler has already stored lb and ub in the kmp_TaskDescr structure
213 // as we are using a single task to execute the entire loop, we can leave
214 // the initial task_t untouched
215
216 __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0);
217 }
218
219 #pragma omp end declare target
220