1 #include <libgeodecomp/config.h>
2 #ifdef LIBGEODECOMP_WITH_OPENCL
3 
4 #ifndef LIBGEODECOMP_PARALLELIZATION_HIPARSIMULATOR_OPENCLSTEPPER_H
5 #define LIBGEODECOMP_PARALLELIZATION_HIPARSIMULATOR_OPENCLSTEPPER_H
6 
7 #ifndef __CL_ENABLE_EXCEPTIONS
8 #define __CL_ENABLE_EXCEPTIONS
9 #endif
10 
11 #include <boost/shared_ptr.hpp>
12 #include <CL/cl.h>
13 #include <CL/cl.hpp>
14 
15 #include <libgeodecomp/parallelization/hiparsimulator/stepper.h>
16 
17 namespace LibGeoDecomp {
18 namespace HiParSimulator {
19 
20 template<typename CELL_TYPE>
21 class OpenCLStepper : public Stepper<CELL_TYPE>
22 {
23     friend class OpenCLStepperTest;
24 public:
25     typedef typename APITraits::SelectTopology<CELL_TYPE>::Value Topology;
26     const static int DIM = Topology::DIM;
27 
28     typedef class Stepper<CELL_TYPE> ParentType;
29     typedef typename ParentType::GridType GridType;
30     typedef PartitionManager<Topology> PartitionManagerType;
31 
32     using Stepper<CELL_TYPE>::initializer;
33     using Stepper<CELL_TYPE>::partitionManager;
34 
35     inline OpenCLStepper(
36         const std::string& cellSourceFile,
37         boost::shared_ptr<PartitionManagerType> partitionManager,
38         boost::shared_ptr<Initializer<CELL_TYPE> > initializer,
39         const int platformID=0,
40         const int deviceID=0) :
ParentType(partitionManager,initializer)41         ParentType(partitionManager, initializer)
42     {
43 //         std::vector<cl::Platform> platforms;
44 //         cl::Platform::get(&platforms);
45 //         std::vector<cl::Device> devices;
46 //         platforms[platformID].getDevices(CL_DEVICE_TYPE_ALL, &devices);
47 //         cl::Device usedDevice = devices[deviceID];
48 //         context = cl::Context(devices);
49 //         cmdQueue = cl::CommandQueue(context, usedDevice);
50 
51 //         std::string clSourceString =
52 // "#if defined(cl_khr_fp64)\n"
53 // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
54 // "#elif defined(cl_amd_fp64)\n"
55 // "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
56 // "#endif\n"
57 // "\n"
58 // "#include \"" + cellSourceFile + "\"\n"
59 // "\n"
60 // #include <libgeodecomp/parallelization/hiparsimulator/escapedopenclkernel.h>
61 //             ;
62 
63 //         cl::Program::Sources clSource(
64 //             1,
65 //             std::make_pair(clSourceString.c_str(),
66 //                            clSourceString.size()));
67 //         cl::Program clProgram(context, clSource);
68 
69 //         try {
70 //             clProgram.build(devices);
71 //         } catch (...) {
72 //             // Normally we don't catch exceptions, but in this case
73 //             // printing the build log (which might get lost otherwise)
74 //             // is valuable for the user who needs to debug his code.
75 //             std::cerr << "Build Log: "
76 //                       << clProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(usedDevice) << "\n";
77 //             throw;
78 //         }
79 
80         // kernel = cl::Kernel(clProgram, "execute");
81 
82         // fixme:
83         // curStep = initializer().startStep();
84         // curNanoStep = 0;
85         // initGrids();
86     }
87 
currentStep()88     inline virtual std::pair<std::size_t, std::size_t> currentStep() const
89     {
90         return std::make_pair(curStep, curNanoStep);
91     }
92 
update(std::size_t nanoSteps)93     inline virtual void update(std::size_t nanoSteps)
94     {
95         // // fixme: implement me (later)
96         // try {
97         //     cl::Buffer startCoordsBuffer, endCoordsBuffer;
98 
99         //     Coord<DIM> c = initializer->gridDimensions();
100         //     int zDim = c.z();
101         //     int yDim = c.y();
102         //     int xDim = c.x();
103 
104         //     int actualX = xDim;
105         //     int actualY = yDim;
106 
107         //     std::vector<int> startCoords;
108         //     std::vector<int> endCoords;
109 
110         //     genThreadCoords(
111         //         &startCoords,
112         //         &endCoords,
113         //         0,
114         //         0,
115         //         0,
116         //         xDim,
117         //         yDim,
118         //         zDim,
119         //         actualX,
120         //         actualY,
121         //         zDim,
122         //         1);
123 
124         //     startCoordsBuffer = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, startCoords.size()*sizeof(int), &startCoords[0]);
125         //     endCoordsBuffer = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, endCoords.size()*sizeof(int), &endCoords[0]);
126 
127         //     cl::NDRange global(actualX, actualY, zDim);
128         //     //fixme: local range could be chosen dynamically
129         //     cl::NDRange local(16, 16, 1);
130 
131         //     // disabling dead code, as Jochen will deliver the new code soon
132         //     // cl::KernelFunctor livingKernel = kernel.bind(cmdQueue, global, local);
133         //     // livingKernel(inputDeviceGrid, outputDeviceGrid, zDim, yDim, xDim,
134         //     //              1, 0, 0, 0,
135         //     //              startCoordsBuffer, endCoordsBuffer, actualX, actualY);
136         //     // livingKernel.getError();
137         //     // cmdQueue.finish();
138 
139 
140         // } catch (cl::Error& err) {
141         //     std::cerr << "OpenCL error: " << err.what() << ", " << oclStrerror(err.err()) << std::endl;
142         //     throw err;
143         // } catch (...) {
144         //     throw;
145         // }
146     }
147 
grid()148     inline virtual const GridType& grid() const
149     {
150         // cmdQueue.enqueueReadBuffer(
151         //     outputDeviceGrid, true, 0,
152         //     hostGrid->getDimensions().prod() * sizeof(CELL_TYPE), hostGrid->baseAddress());
153         return *hostGrid;
154     }
155 
156 private:
157     int curStep;
158     int curNanoStep;
159     boost::shared_ptr<GridType> hostGrid;
160 
161     cl::Buffer inputDeviceGrid;
162     cl::Buffer outputDeviceGrid;
163     cl::Context context;
164     cl::CommandQueue cmdQueue;
165     cl::Kernel kernel;
166 
genThreadCoords(std::vector<int> * startCoords,std::vector<int> * endCoords,const int & offset_x,const int & offset_y,const int & offset_z,const int & active_x,const int & active_y,const int & active_z,const int & actual_x,const int & actual_y,const int & actual_z,const int & planes)167     inline void genThreadCoords(std::vector<int> *startCoords,
168 			 std::vector<int> *endCoords,
169 			 const int& offset_x,
170 			 const int& offset_y,
171 			 const int& offset_z,
172 			 const int& active_x,
173 			 const int& active_y,
174 			 const int& active_z,
175 			 const int& actual_x,
176 			 const int& actual_y,
177 			 const int& actual_z,
178 			 const int& planes)
179     {
180       int maxX = active_x;
181       int maxY = active_y;
182       int maxZ = ceil(1.0 * actual_z/planes);
183       int numThreads = actual_x * actual_y * maxZ;
184       startCoords->resize(numThreads);
185       endCoords->resize(numThreads);
186 
187       for (int z = 0; z < maxZ; ++z) {
188         int startZ = offset_z + z * planes;
189         int endZ = std::min(offset_z + active_z,
190                             startZ + planes);
191 
192         for (int y = 0; y < actual_y; ++y) {
193 	  for (int x = 0; x < actual_x; ++x) {
194 	    int threadID = (z * actual_x * actual_y) + (y * actual_x) + x;
195 	    int myEndZ = endZ;
196 	    if (x >= maxX || y >= maxY)
197 	      myEndZ = startZ;
198 
199 	    (*startCoords)[threadID] = startZ;
200 	    (*endCoords)[threadID] = myEndZ;
201 	  }
202         }
203       }
204     }
205 
initGrids()206     inline void initGrids()
207     {
208         const CoordBox<DIM>& gridBox =
209             partitionManager->ownRegion().boundingBox();
210         hostGrid.reset(new GridType(gridBox, CELL_TYPE()));
211         initializer->grid(&*hostGrid);
212 
213         // inputDeviceGrid = cl::Buffer(
214         //     context,
215         //     CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
216         //     hostGrid->getDimensions().prod() * sizeof(CELL_TYPE),
217         //     hostGrid->baseAddress());
218 	// std::vector<CELL_TYPE> zeroMem(hostGrid->getDimensions().prod(), 0);
219 	// outputDeviceGrid = cl::Buffer(
220         //     context,
221         //     CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
222         //     hostGrid->getDimensions().prod() * sizeof(CELL_TYPE),
223         //     &zeroMem[0]);
224     }
225 
oclStrerror(int nr)226     inline std::string oclStrerror (int nr) {
227       switch (nr) {
228       case 0:
229 	return "CL_SUCCESS";
230       case -1:
231 	return "CL_DEVICE_NOT_FOUND";
232       case -2:
233 	return "CL_DEVICE_NOT_AVAILABLE";
234       case -3:
235 	return "CL_COMPILER_NOT_AVAILABLE";
236       case -4:
237 	return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
238       case -5:
239 	return "CL_OUT_OF_RESOURCES";
240       case -6:
241 	return "CL_OUT_OF_HOST_MEMORY";
242       case -7:
243 	return "CL_PROFILING_INFO_NOT_AVAILABLE";
244       case -8:
245 	return "CL_MEM_COPY_OVERLAP";
246       case -9:
247 	return "CL_IMAGE_FORMAT_MISMATCH";
248       case -10:
249 	return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
250       case -11:
251 	return "CL_BUILD_PROGRAM_FAILURE";
252       case -12:
253 	return "CL_MAP_FAILURE";
254       case -13:
255 	return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
256       case -14:
257 	return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
258       case -30:
259 	return "CL_INVALID_VALUE";
260       case -31:
261 	return "CL_INVALID_DEVICE_TYPE";
262       case -32:
263 	return "CL_INVALID_PLATFORM";
264       case -33:
265 	return "CL_INVALID_DEVICE";
266       case -34:
267 	return "CL_INVALID_CONTEXT";
268       case -35:
269 	return "CL_INVALID_QUEUE_PROPERTIES";
270       case -36:
271 	return "CL_INVALID_COMMAND_QUEUE";
272       case -37:
273 	return "CL_INVALID_HOST_PTR";
274       case -38:
275 	return "CL_INVALID_MEM_OBJECT";
276       case -39:
277 	return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
278       case -40:
279 	return "CL_INVALID_IMAGE_SIZE";
280       case -41:
281 	return "CL_INVALID_SAMPLER";
282       case -42:
283 	return "CL_INVALID_BINARY";
284       case -43:
285 	return "CL_INVALID_BUILD_OPTIONS";
286       case -44:
287 	return "CL_INVALID_PROGRAM";
288       case -45:
289 	return "CL_INVALID_PROGRAM_EXECUTABLE";
290       case -46:
291 	return "CL_INVALID_KERNEL_NAME";
292       case -47:
293 	return "CL_INVALID_KERNEL_DEFINITION";
294       case -48:
295 	return "CL_INVALID_KERNEL";
296       case -49:
297 	return "CL_INVALID_ARG_INDEX";
298       case -50:
299 	return "CL_INVALID_ARG_VALUE";
300       case -51:
301 	return "CL_INVALID_ARG_SIZE";
302       case -52:
303 	return "CL_INVALID_KERNEL_ARGS";
304       case -53:
305 	return "CL_INVALID_WORK_DIMENSION";
306       case -54:
307 	return "CL_INVALID_WORK_GROUP_SIZE";
308       case -55:
309 	return "CL_INVALID_WORK_ITEM_SIZE";
310       case -56:
311 	return "CL_INVALID_GLOBAL_OFFSET";
312       case -57:
313 	return "CL_INVALID_EVENT_WAIT_LIST";
314       case -58:
315 	return "CL_INVALID_EVENT";
316       case -59:
317 	return "CL_INVALID_OPERATION";
318       case -60:
319 	return "CL_INVALID_GL_OBJECT";
320       case -61:
321 	return "CL_INVALID_BUFFER_SIZE";
322       case -62:
323 	return "CL_INVALID_MIP_LEVEL";
324       case -63:
325 	return "CL_INVALID_GLOBAL_WORK_SIZE";
326       case -64:
327 	return "CL_INVALID_PROPERTY";
328       }
329       return "nothing found";
330     }
331 };
332 
333 }
334 }
335 
336 #endif
337 #endif
338