1 // WorkGroup.cpp (Oclgrind)
2 // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith,
3 // University of Bristol. All rights reserved.
4 //
5 // This program is provided under a three-clause BSD license. For full
6 // license terms please see the LICENSE file distributed with this
7 // source code.
8
9 #include "common.h"
10
11 #include <sstream>
12
13 #include "llvm/IR/Module.h"
14
15 #include "Context.h"
16 #include "Kernel.h"
17 #include "KernelInvocation.h"
18 #include "Memory.h"
19 #include "WorkGroup.h"
20 #include "WorkItem.h"
21
22 using namespace oclgrind;
23 using namespace std;
24
WorkGroup(const KernelInvocation * kernelInvocation,Size3 wgid)25 WorkGroup::WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid)
26 : WorkGroup(kernelInvocation, wgid, kernelInvocation->getLocalSize())
27 {
28 }
29
WorkGroup(const KernelInvocation * kernelInvocation,Size3 wgid,Size3 size)30 WorkGroup::WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid,
31 Size3 size)
32 : m_context(kernelInvocation->getContext())
33 {
34 m_groupID = wgid;
35 m_groupSize = size;
36
37 m_groupIndex =
38 (m_groupID.x +
39 (m_groupID.y + m_groupID.z * (kernelInvocation->getNumGroups().y) *
40 kernelInvocation->getNumGroups().x));
41
42 // Allocate local memory
43 m_localMemory =
44 new Memory(AddrSpaceLocal, sizeof(size_t) == 8 ? 16 : 8, m_context);
45 const Kernel* kernel = kernelInvocation->getKernel();
46 for (auto value = kernel->values_begin(); value != kernel->values_end();
47 value++)
48 {
49 const llvm::Type* type = value->first->getType();
50 if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal)
51 {
52 size_t ptr = m_localMemory->allocateBuffer(value->second.size);
53 m_localAddresses[value->first] = ptr;
54 }
55 }
56
57 // Initialise work-items
58 for (size_t k = 0; k < m_groupSize.z; k++)
59 {
60 for (size_t j = 0; j < m_groupSize.y; j++)
61 {
62 for (size_t i = 0; i < m_groupSize.x; i++)
63 {
64 WorkItem* workItem =
65 new WorkItem(kernelInvocation, this, Size3(i, j, k));
66 m_workItems.push_back(workItem);
67 m_running.insert(workItem);
68 }
69 }
70 }
71
72 m_nextEvent = 1;
73 m_barrier = NULL;
74 }
75
~WorkGroup()76 WorkGroup::~WorkGroup()
77 {
78 // Delete work-items
79 for (unsigned i = 0; i < m_workItems.size(); i++)
80 {
81 delete m_workItems[i];
82 }
83
84 delete m_localMemory;
85 }
86
async_copy(const WorkItem * workItem,const llvm::Instruction * instruction,AsyncCopyType type,size_t dest,size_t src,size_t size,size_t num,size_t srcStride,size_t destStride,size_t event)87 size_t WorkGroup::async_copy(const WorkItem* workItem,
88 const llvm::Instruction* instruction,
89 AsyncCopyType type, size_t dest, size_t src,
90 size_t size, size_t num, size_t srcStride,
91 size_t destStride, size_t event)
92 {
93 AsyncCopy copy = {instruction, type, dest, src,
94 size, num, srcStride, destStride,
95
96 event};
97
98 // Check if copy has already been registered by another work-item
99 list<pair<AsyncCopy, set<const WorkItem*>>>::iterator itr;
100 for (itr = m_asyncCopies.begin(); itr != m_asyncCopies.end(); itr++)
101 {
102 if (itr->second.count(workItem))
103 {
104 continue;
105 }
106
107 // Check for divergence
108 if ((itr->first.instruction->getDebugLoc() !=
109 copy.instruction->getDebugLoc()) ||
110 (itr->first.type != copy.type) || (itr->first.dest != copy.dest) ||
111 (itr->first.src != copy.src) || (itr->first.size != copy.size) ||
112 (itr->first.num != copy.num) ||
113 (itr->first.srcStride != copy.srcStride) ||
114 (itr->first.destStride != copy.destStride))
115 {
116 Context::Message msg(ERROR, m_context);
117 msg << "Work-group divergence detected (async copy)" << endl
118 << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl
119 << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
120 << endl
121 << "Work-item: " << msg.CURRENT_ENTITY << endl
122 << msg.CURRENT_LOCATION << endl
123 << "dest=0x" << hex << copy.dest << ", "
124 << "src=0x" << hex << copy.src << endl
125 << "elem_size=" << dec << copy.size << ", "
126 << "num_elems=" << dec << copy.num << ", "
127 << "src_stride=" << dec << copy.srcStride << ", "
128 << "dest_stride=" << dec << copy.destStride << endl
129 << endl
130 << "Previous work-items executed:" << endl
131 << itr->first.instruction << endl
132 << "dest=0x" << hex << itr->first.dest << ", "
133 << "src=0x" << hex << itr->first.src << endl
134 << "elem_size=" << dec << itr->first.size << ", "
135 << "num_elems=" << dec << itr->first.num << ", "
136 << "src_stride=" << dec << itr->first.srcStride << ", "
137 << "dest_stride=" << dec << itr->first.destStride << endl;
138 msg.send();
139 }
140
141 itr->second.insert(workItem);
142 return itr->first.event;
143 }
144
145 // Create new event if necessary
146 if (copy.event == 0)
147 {
148 copy.event = m_nextEvent++;
149 }
150
151 // Register new copy and event
152 m_asyncCopies.push_back(make_pair(copy, set<const WorkItem*>()));
153 m_asyncCopies.back().second.insert(workItem);
154 if (!m_events.count(event))
155 {
156 m_events[copy.event] = list<AsyncCopy>();
157 }
158 m_events[copy.event].push_back(copy);
159
160 return copy.event;
161 }
162
clearBarrier()163 void WorkGroup::clearBarrier()
164 {
165 assert(m_barrier);
166
167 // Check for divergence
168 if (m_barrier->workItems.size() != m_workItems.size())
169 {
170 Context::Message msg(ERROR, m_context);
171 msg << "Work-group divergence detected (barrier)" << endl
172 << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl
173 << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
174 << "Only " << dec << m_barrier->workItems.size() << " out of "
175 << m_workItems.size() << " work-items executed barrier" << endl
176 << m_barrier->instruction << endl;
177 msg.send();
178 }
179
180 // Move work-items to running state
181 set<WorkItem*>::iterator itr;
182 for (itr = m_barrier->workItems.begin(); itr != m_barrier->workItems.end();
183 itr++)
184 {
185 (*itr)->clearBarrier();
186 m_running.insert(*itr);
187 }
188 m_barrier->workItems.clear();
189
190 // Deal with events
191 while (!m_barrier->events.empty())
192 {
193 size_t event = m_barrier->events.front();
194
195 // Perform copy
196 list<AsyncCopy> copies = m_events[event];
197 list<AsyncCopy>::iterator itr;
198 for (itr = copies.begin(); itr != copies.end(); itr++)
199 {
200 Memory *destMem, *srcMem;
201 if (itr->type == GLOBAL_TO_LOCAL)
202 {
203 destMem = m_localMemory;
204 srcMem = m_context->getGlobalMemory();
205 }
206 else
207 {
208 destMem = m_context->getGlobalMemory();
209 srcMem = m_localMemory;
210 }
211
212 size_t src = itr->src;
213 size_t dest = itr->dest;
214 unsigned char* buffer = new unsigned char[itr->size];
215 for (unsigned i = 0; i < itr->num; i++)
216 {
217 srcMem->load(buffer, src, itr->size);
218 destMem->store(buffer, dest, itr->size);
219 src += itr->srcStride * itr->size;
220 dest += itr->destStride * itr->size;
221 }
222 delete[] buffer;
223 }
224 m_events.erase(event);
225
226 // Remove copies from list for this event
227 list<pair<AsyncCopy, set<const WorkItem*>>>::iterator cItr;
228 for (cItr = m_asyncCopies.begin(); cItr != m_asyncCopies.end();)
229 {
230 if (cItr->first.event == event)
231 {
232 // Check that all work-items registered the copy
233 if (cItr->second.size() != m_workItems.size())
234 {
235 Context::Message msg(ERROR, m_context);
236 msg << "Work-group divergence detected (async copy)" << endl
237 << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl
238 << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
239 << "Only " << dec << cItr->second.size() << " out of "
240 << m_workItems.size() << " work-items executed copy" << endl
241 << cItr->first.instruction << endl;
242 msg.send();
243 }
244
245 cItr = m_asyncCopies.erase(cItr);
246 }
247 else
248 {
249 cItr++;
250 }
251 }
252
253 m_barrier->events.remove(event);
254 }
255
256 m_context->notifyWorkGroupBarrier(this, m_barrier->fence);
257
258 delete m_barrier;
259 m_barrier = NULL;
260 }
261
getCurrentBarrier() const262 const llvm::Instruction* WorkGroup::getCurrentBarrier() const
263 {
264 return m_barrier ? m_barrier->instruction : NULL;
265 }
266
getGroupID() const267 Size3 WorkGroup::getGroupID() const
268 {
269 return m_groupID;
270 }
271
getGroupIndex() const272 size_t WorkGroup::getGroupIndex() const
273 {
274 return m_groupIndex;
275 }
276
getGroupSize() const277 Size3 WorkGroup::getGroupSize() const
278 {
279 return m_groupSize;
280 }
281
getLocalMemory() const282 Memory* WorkGroup::getLocalMemory() const
283 {
284 return m_localMemory;
285 }
286
getLocalMemoryAddress(const llvm::Value * value) const287 size_t WorkGroup::getLocalMemoryAddress(const llvm::Value* value) const
288 {
289 return m_localAddresses.at(value);
290 }
291
getNextWorkItem() const292 WorkItem* WorkGroup::getNextWorkItem() const
293 {
294 if (m_running.empty())
295 {
296 return NULL;
297 }
298 return *m_running.begin();
299 }
300
getWorkItem(Size3 localID) const301 WorkItem* WorkGroup::getWorkItem(Size3 localID) const
302 {
303 return m_workItems[localID.x +
304 (localID.y + localID.z * m_groupSize.y) * m_groupSize.x];
305 }
306
hasBarrier() const307 bool WorkGroup::hasBarrier() const
308 {
309 return m_barrier;
310 }
311
notifyBarrier(WorkItem * workItem,const llvm::Instruction * instruction,uint64_t fence,list<size_t> events)312 void WorkGroup::notifyBarrier(WorkItem* workItem,
313 const llvm::Instruction* instruction,
314 uint64_t fence, list<size_t> events)
315 {
316 if (!m_barrier)
317 {
318 // Create new barrier
319 m_barrier = new Barrier;
320 m_barrier->instruction = instruction;
321 m_barrier->fence = fence;
322
323 m_barrier->events = events;
324
325 // Check for invalid events
326 list<size_t>::iterator itr;
327 for (itr = events.begin(); itr != events.end(); itr++)
328 {
329 if (!m_events.count(*itr))
330 {
331 m_context->logError("Invalid wait event");
332 }
333 }
334 }
335 else
336 {
337 // Check for divergence
338 bool divergence = false;
339 if (instruction->getDebugLoc() != m_barrier->instruction->getDebugLoc() ||
340 fence != m_barrier->fence || events.size() != m_barrier->events.size())
341 {
342 divergence = true;
343 }
344
345 // Check events are all the same
346 int divergentEventIndex = -1;
347 size_t newEvent = -1;
348 size_t oldEvent = -1;
349 if (!divergence)
350 {
351 int i = 0;
352 list<size_t>::iterator cItr = events.begin();
353 list<size_t>::iterator pItr = m_barrier->events.begin();
354 for (; cItr != events.end(); cItr++, pItr++, i++)
355 {
356 if (*cItr != *pItr)
357 {
358 divergence = true;
359
360 divergentEventIndex = i;
361 newEvent = *cItr;
362 oldEvent = *pItr;
363
364 break;
365 }
366 }
367 }
368
369 if (divergence)
370 {
371 Context::Message msg(ERROR, m_context);
372 msg << "Work-group divergence detected (barrier)" << endl
373 << msg.INDENT << "Kernel: " << msg.CURRENT_KERNEL << endl
374 << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
375 << endl
376 << "Work-item: " << msg.CURRENT_ENTITY << endl
377 << msg.CURRENT_LOCATION << endl
378 << "fence=0x" << hex << fence << ", "
379 << "num_events=" << dec << events.size() << endl;
380 if (divergentEventIndex >= 0)
381 {
382 msg << "events[" << dec << divergentEventIndex << "]=" << newEvent
383 << endl;
384 }
385 msg << endl
386 << "Previous work-items executed:" << endl
387 << m_barrier->instruction << endl
388 << "fence=0x" << hex << m_barrier->fence << ", "
389 << "num_events=" << dec << m_barrier->events.size() << endl;
390 if (divergentEventIndex >= 0)
391 {
392 msg << "events[" << dec << divergentEventIndex << "]=" << oldEvent
393 << endl;
394 }
395 msg.send();
396 }
397 }
398
399 m_running.erase(workItem);
400 m_barrier->workItems.insert(workItem);
401 }
402
notifyFinished(WorkItem * workItem)403 void WorkGroup::notifyFinished(WorkItem* workItem)
404 {
405 m_running.erase(workItem);
406
407 // Check if work-group finished without waiting for all events
408 if (m_running.empty() && !m_barrier && !m_events.empty())
409 {
410 m_context->logError("Work-item finished without waiting for events");
411 }
412 }
413
operator ()(const WorkItem * lhs,const WorkItem * rhs) const414 bool WorkGroup::WorkItemCmp::operator()(const WorkItem* lhs,
415 const WorkItem* rhs) const
416 {
417 Size3 lgid = lhs->getGlobalID();
418 Size3 rgid = rhs->getGlobalID();
419 if (lgid.z != rgid.z)
420 {
421 return lgid.z < rgid.z;
422 }
423 if (lgid.y != rgid.y)
424 {
425 return lgid.y < rgid.y;
426 }
427 return lgid.x < rgid.x;
428 }
429