1 // WorkGroup.cpp (Oclgrind)
2 // Copyright (c) 2013-2019, James Price and Simon McIntosh-Smith,
3 // University of Bristol. All rights reserved.
4 //
5 // This program is provided under a three-clause BSD license. For full
6 // license terms please see the LICENSE file distributed with this
7 // source code.
8 
9 #include "common.h"
10 
11 #include <sstream>
12 
13 #include "llvm/IR/Module.h"
14 
15 #include "Context.h"
16 #include "Kernel.h"
17 #include "KernelInvocation.h"
18 #include "Memory.h"
19 #include "WorkGroup.h"
20 #include "WorkItem.h"
21 
22 using namespace oclgrind;
23 using namespace std;
24 
WorkGroup(const KernelInvocation * kernelInvocation,Size3 wgid)25 WorkGroup::WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid)
26     : WorkGroup(kernelInvocation, wgid, kernelInvocation->getLocalSize())
27 {
28 }
29 
WorkGroup(const KernelInvocation * kernelInvocation,Size3 wgid,Size3 size)30 WorkGroup::WorkGroup(const KernelInvocation* kernelInvocation, Size3 wgid,
31                      Size3 size)
32     : m_context(kernelInvocation->getContext())
33 {
34   m_groupID = wgid;
35   m_groupSize = size;
36 
37   m_groupIndex =
38     (m_groupID.x +
39      (m_groupID.y + m_groupID.z * (kernelInvocation->getNumGroups().y) *
40                       kernelInvocation->getNumGroups().x));
41 
42   // Allocate local memory
43   m_localMemory =
44     new Memory(AddrSpaceLocal, sizeof(size_t) == 8 ? 16 : 8, m_context);
45   const Kernel* kernel = kernelInvocation->getKernel();
46   for (auto value = kernel->values_begin(); value != kernel->values_end();
47        value++)
48   {
49     const llvm::Type* type = value->first->getType();
50     if (type->isPointerTy() && type->getPointerAddressSpace() == AddrSpaceLocal)
51     {
52       size_t ptr = m_localMemory->allocateBuffer(value->second.size);
53       m_localAddresses[value->first] = ptr;
54     }
55   }
56 
57   // Initialise work-items
58   for (size_t k = 0; k < m_groupSize.z; k++)
59   {
60     for (size_t j = 0; j < m_groupSize.y; j++)
61     {
62       for (size_t i = 0; i < m_groupSize.x; i++)
63       {
64         WorkItem* workItem =
65           new WorkItem(kernelInvocation, this, Size3(i, j, k));
66         m_workItems.push_back(workItem);
67         m_running.insert(workItem);
68       }
69     }
70   }
71 
72   m_nextEvent = 1;
73   m_barrier = NULL;
74 }
75 
~WorkGroup()76 WorkGroup::~WorkGroup()
77 {
78   // Delete work-items
79   for (unsigned i = 0; i < m_workItems.size(); i++)
80   {
81     delete m_workItems[i];
82   }
83 
84   delete m_localMemory;
85 }
86 
async_copy(const WorkItem * workItem,const llvm::Instruction * instruction,AsyncCopyType type,size_t dest,size_t src,size_t size,size_t num,size_t srcStride,size_t destStride,size_t event)87 size_t WorkGroup::async_copy(const WorkItem* workItem,
88                              const llvm::Instruction* instruction,
89                              AsyncCopyType type, size_t dest, size_t src,
90                              size_t size, size_t num, size_t srcStride,
91                              size_t destStride, size_t event)
92 {
93   AsyncCopy copy = {instruction, type, dest,      src,
94                     size,        num,  srcStride, destStride,
95 
96                     event};
97 
98   // Check if copy has already been registered by another work-item
99   list<pair<AsyncCopy, set<const WorkItem*>>>::iterator itr;
100   for (itr = m_asyncCopies.begin(); itr != m_asyncCopies.end(); itr++)
101   {
102     if (itr->second.count(workItem))
103     {
104       continue;
105     }
106 
107     // Check for divergence
108     if ((itr->first.instruction->getDebugLoc() !=
109          copy.instruction->getDebugLoc()) ||
110         (itr->first.type != copy.type) || (itr->first.dest != copy.dest) ||
111         (itr->first.src != copy.src) || (itr->first.size != copy.size) ||
112         (itr->first.num != copy.num) ||
113         (itr->first.srcStride != copy.srcStride) ||
114         (itr->first.destStride != copy.destStride))
115     {
116       Context::Message msg(ERROR, m_context);
117       msg << "Work-group divergence detected (async copy)" << endl
118           << msg.INDENT << "Kernel:     " << msg.CURRENT_KERNEL << endl
119           << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
120           << endl
121           << "Work-item:  " << msg.CURRENT_ENTITY << endl
122           << msg.CURRENT_LOCATION << endl
123           << "dest=0x" << hex << copy.dest << ", "
124           << "src=0x" << hex << copy.src << endl
125           << "elem_size=" << dec << copy.size << ", "
126           << "num_elems=" << dec << copy.num << ", "
127           << "src_stride=" << dec << copy.srcStride << ", "
128           << "dest_stride=" << dec << copy.destStride << endl
129           << endl
130           << "Previous work-items executed:" << endl
131           << itr->first.instruction << endl
132           << "dest=0x" << hex << itr->first.dest << ", "
133           << "src=0x" << hex << itr->first.src << endl
134           << "elem_size=" << dec << itr->first.size << ", "
135           << "num_elems=" << dec << itr->first.num << ", "
136           << "src_stride=" << dec << itr->first.srcStride << ", "
137           << "dest_stride=" << dec << itr->first.destStride << endl;
138       msg.send();
139     }
140 
141     itr->second.insert(workItem);
142     return itr->first.event;
143   }
144 
145   // Create new event if necessary
146   if (copy.event == 0)
147   {
148     copy.event = m_nextEvent++;
149   }
150 
151   // Register new copy and event
152   m_asyncCopies.push_back(make_pair(copy, set<const WorkItem*>()));
153   m_asyncCopies.back().second.insert(workItem);
154   if (!m_events.count(event))
155   {
156     m_events[copy.event] = list<AsyncCopy>();
157   }
158   m_events[copy.event].push_back(copy);
159 
160   return copy.event;
161 }
162 
clearBarrier()163 void WorkGroup::clearBarrier()
164 {
165   assert(m_barrier);
166 
167   // Check for divergence
168   if (m_barrier->workItems.size() != m_workItems.size())
169   {
170     Context::Message msg(ERROR, m_context);
171     msg << "Work-group divergence detected (barrier)" << endl
172         << msg.INDENT << "Kernel:     " << msg.CURRENT_KERNEL << endl
173         << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
174         << "Only " << dec << m_barrier->workItems.size() << " out of "
175         << m_workItems.size() << " work-items executed barrier" << endl
176         << m_barrier->instruction << endl;
177     msg.send();
178   }
179 
180   // Move work-items to running state
181   set<WorkItem*>::iterator itr;
182   for (itr = m_barrier->workItems.begin(); itr != m_barrier->workItems.end();
183        itr++)
184   {
185     (*itr)->clearBarrier();
186     m_running.insert(*itr);
187   }
188   m_barrier->workItems.clear();
189 
190   // Deal with events
191   while (!m_barrier->events.empty())
192   {
193     size_t event = m_barrier->events.front();
194 
195     // Perform copy
196     list<AsyncCopy> copies = m_events[event];
197     list<AsyncCopy>::iterator itr;
198     for (itr = copies.begin(); itr != copies.end(); itr++)
199     {
200       Memory *destMem, *srcMem;
201       if (itr->type == GLOBAL_TO_LOCAL)
202       {
203         destMem = m_localMemory;
204         srcMem = m_context->getGlobalMemory();
205       }
206       else
207       {
208         destMem = m_context->getGlobalMemory();
209         srcMem = m_localMemory;
210       }
211 
212       size_t src = itr->src;
213       size_t dest = itr->dest;
214       unsigned char* buffer = new unsigned char[itr->size];
215       for (unsigned i = 0; i < itr->num; i++)
216       {
217         srcMem->load(buffer, src, itr->size);
218         destMem->store(buffer, dest, itr->size);
219         src += itr->srcStride * itr->size;
220         dest += itr->destStride * itr->size;
221       }
222       delete[] buffer;
223     }
224     m_events.erase(event);
225 
226     // Remove copies from list for this event
227     list<pair<AsyncCopy, set<const WorkItem*>>>::iterator cItr;
228     for (cItr = m_asyncCopies.begin(); cItr != m_asyncCopies.end();)
229     {
230       if (cItr->first.event == event)
231       {
232         // Check that all work-items registered the copy
233         if (cItr->second.size() != m_workItems.size())
234         {
235           Context::Message msg(ERROR, m_context);
236           msg << "Work-group divergence detected (async copy)" << endl
237               << msg.INDENT << "Kernel:     " << msg.CURRENT_KERNEL << endl
238               << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
239               << "Only " << dec << cItr->second.size() << " out of "
240               << m_workItems.size() << " work-items executed copy" << endl
241               << cItr->first.instruction << endl;
242           msg.send();
243         }
244 
245         cItr = m_asyncCopies.erase(cItr);
246       }
247       else
248       {
249         cItr++;
250       }
251     }
252 
253     m_barrier->events.remove(event);
254   }
255 
256   m_context->notifyWorkGroupBarrier(this, m_barrier->fence);
257 
258   delete m_barrier;
259   m_barrier = NULL;
260 }
261 
getCurrentBarrier() const262 const llvm::Instruction* WorkGroup::getCurrentBarrier() const
263 {
264   return m_barrier ? m_barrier->instruction : NULL;
265 }
266 
getGroupID() const267 Size3 WorkGroup::getGroupID() const
268 {
269   return m_groupID;
270 }
271 
getGroupIndex() const272 size_t WorkGroup::getGroupIndex() const
273 {
274   return m_groupIndex;
275 }
276 
getGroupSize() const277 Size3 WorkGroup::getGroupSize() const
278 {
279   return m_groupSize;
280 }
281 
getLocalMemory() const282 Memory* WorkGroup::getLocalMemory() const
283 {
284   return m_localMemory;
285 }
286 
getLocalMemoryAddress(const llvm::Value * value) const287 size_t WorkGroup::getLocalMemoryAddress(const llvm::Value* value) const
288 {
289   return m_localAddresses.at(value);
290 }
291 
getNextWorkItem() const292 WorkItem* WorkGroup::getNextWorkItem() const
293 {
294   if (m_running.empty())
295   {
296     return NULL;
297   }
298   return *m_running.begin();
299 }
300 
getWorkItem(Size3 localID) const301 WorkItem* WorkGroup::getWorkItem(Size3 localID) const
302 {
303   return m_workItems[localID.x +
304                      (localID.y + localID.z * m_groupSize.y) * m_groupSize.x];
305 }
306 
hasBarrier() const307 bool WorkGroup::hasBarrier() const
308 {
309   return m_barrier;
310 }
311 
notifyBarrier(WorkItem * workItem,const llvm::Instruction * instruction,uint64_t fence,list<size_t> events)312 void WorkGroup::notifyBarrier(WorkItem* workItem,
313                               const llvm::Instruction* instruction,
314                               uint64_t fence, list<size_t> events)
315 {
316   if (!m_barrier)
317   {
318     // Create new barrier
319     m_barrier = new Barrier;
320     m_barrier->instruction = instruction;
321     m_barrier->fence = fence;
322 
323     m_barrier->events = events;
324 
325     // Check for invalid events
326     list<size_t>::iterator itr;
327     for (itr = events.begin(); itr != events.end(); itr++)
328     {
329       if (!m_events.count(*itr))
330       {
331         m_context->logError("Invalid wait event");
332       }
333     }
334   }
335   else
336   {
337     // Check for divergence
338     bool divergence = false;
339     if (instruction->getDebugLoc() != m_barrier->instruction->getDebugLoc() ||
340         fence != m_barrier->fence || events.size() != m_barrier->events.size())
341     {
342       divergence = true;
343     }
344 
345     // Check events are all the same
346     int divergentEventIndex = -1;
347     size_t newEvent = -1;
348     size_t oldEvent = -1;
349     if (!divergence)
350     {
351       int i = 0;
352       list<size_t>::iterator cItr = events.begin();
353       list<size_t>::iterator pItr = m_barrier->events.begin();
354       for (; cItr != events.end(); cItr++, pItr++, i++)
355       {
356         if (*cItr != *pItr)
357         {
358           divergence = true;
359 
360           divergentEventIndex = i;
361           newEvent = *cItr;
362           oldEvent = *pItr;
363 
364           break;
365         }
366       }
367     }
368 
369     if (divergence)
370     {
371       Context::Message msg(ERROR, m_context);
372       msg << "Work-group divergence detected (barrier)" << endl
373           << msg.INDENT << "Kernel:     " << msg.CURRENT_KERNEL << endl
374           << "Work-group: " << msg.CURRENT_WORK_GROUP << endl
375           << endl
376           << "Work-item:  " << msg.CURRENT_ENTITY << endl
377           << msg.CURRENT_LOCATION << endl
378           << "fence=0x" << hex << fence << ", "
379           << "num_events=" << dec << events.size() << endl;
380       if (divergentEventIndex >= 0)
381       {
382         msg << "events[" << dec << divergentEventIndex << "]=" << newEvent
383             << endl;
384       }
385       msg << endl
386           << "Previous work-items executed:" << endl
387           << m_barrier->instruction << endl
388           << "fence=0x" << hex << m_barrier->fence << ", "
389           << "num_events=" << dec << m_barrier->events.size() << endl;
390       if (divergentEventIndex >= 0)
391       {
392         msg << "events[" << dec << divergentEventIndex << "]=" << oldEvent
393             << endl;
394       }
395       msg.send();
396     }
397   }
398 
399   m_running.erase(workItem);
400   m_barrier->workItems.insert(workItem);
401 }
402 
notifyFinished(WorkItem * workItem)403 void WorkGroup::notifyFinished(WorkItem* workItem)
404 {
405   m_running.erase(workItem);
406 
407   // Check if work-group finished without waiting for all events
408   if (m_running.empty() && !m_barrier && !m_events.empty())
409   {
410     m_context->logError("Work-item finished without waiting for events");
411   }
412 }
413 
operator ()(const WorkItem * lhs,const WorkItem * rhs) const414 bool WorkGroup::WorkItemCmp::operator()(const WorkItem* lhs,
415                                         const WorkItem* rhs) const
416 {
417   Size3 lgid = lhs->getGlobalID();
418   Size3 rgid = rhs->getGlobalID();
419   if (lgid.z != rgid.z)
420   {
421     return lgid.z < rgid.z;
422   }
423   if (lgid.y != rgid.y)
424   {
425     return lgid.y < rgid.y;
426   }
427   return lgid.x < rgid.x;
428 }
429