1 #include <libgeodecomp/config.h> 2 #ifdef LIBGEODECOMP_WITH_OPENCL 3 4 #ifndef LIBGEODECOMP_PARALLELIZATION_HIPARSIMULATOR_OPENCLSTEPPER_H 5 #define LIBGEODECOMP_PARALLELIZATION_HIPARSIMULATOR_OPENCLSTEPPER_H 6 7 #ifndef __CL_ENABLE_EXCEPTIONS 8 #define __CL_ENABLE_EXCEPTIONS 9 #endif 10 11 #include <boost/shared_ptr.hpp> 12 #include <CL/cl.h> 13 #include <CL/cl.hpp> 14 15 #include <libgeodecomp/parallelization/hiparsimulator/stepper.h> 16 17 namespace LibGeoDecomp { 18 namespace HiParSimulator { 19 20 template<typename CELL_TYPE> 21 class OpenCLStepper : public Stepper<CELL_TYPE> 22 { 23 friend class OpenCLStepperTest; 24 public: 25 typedef typename APITraits::SelectTopology<CELL_TYPE>::Value Topology; 26 const static int DIM = Topology::DIM; 27 28 typedef class Stepper<CELL_TYPE> ParentType; 29 typedef typename ParentType::GridType GridType; 30 typedef PartitionManager<Topology> PartitionManagerType; 31 32 using Stepper<CELL_TYPE>::initializer; 33 using Stepper<CELL_TYPE>::partitionManager; 34 35 inline OpenCLStepper( 36 const std::string& cellSourceFile, 37 boost::shared_ptr<PartitionManagerType> partitionManager, 38 boost::shared_ptr<Initializer<CELL_TYPE> > initializer, 39 const int platformID=0, 40 const int deviceID=0) : ParentType(partitionManager,initializer)41 ParentType(partitionManager, initializer) 42 { 43 // std::vector<cl::Platform> platforms; 44 // cl::Platform::get(&platforms); 45 // std::vector<cl::Device> devices; 46 // platforms[platformID].getDevices(CL_DEVICE_TYPE_ALL, &devices); 47 // cl::Device usedDevice = devices[deviceID]; 48 // context = cl::Context(devices); 49 // cmdQueue = cl::CommandQueue(context, usedDevice); 50 51 // std::string clSourceString = 52 // "#if defined(cl_khr_fp64)\n" 53 // "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" 54 // "#elif defined(cl_amd_fp64)\n" 55 // "#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n" 56 // "#endif\n" 57 // "\n" 58 // "#include \"" + cellSourceFile + "\"\n" 59 // "\n" 60 // #include <libgeodecomp/parallelization/hiparsimulator/escapedopenclkernel.h> 61 // ; 62 63 // cl::Program::Sources clSource( 64 // 1, 65 // std::make_pair(clSourceString.c_str(), 66 // clSourceString.size())); 67 // cl::Program clProgram(context, clSource); 68 69 // try { 70 // clProgram.build(devices); 71 // } catch (...) { 72 // // Normally we don't catch exceptions, but in this case 73 // // printing the build log (which might get lost otherwise) 74 // // is valuable for the user who needs to debug his code. 75 // std::cerr << "Build Log: " 76 // << clProgram.getBuildInfo<CL_PROGRAM_BUILD_LOG>(usedDevice) << "\n"; 77 // throw; 78 // } 79 80 // kernel = cl::Kernel(clProgram, "execute"); 81 82 // fixme: 83 // curStep = initializer().startStep(); 84 // curNanoStep = 0; 85 // initGrids(); 86 } 87 currentStep()88 inline virtual std::pair<std::size_t, std::size_t> currentStep() const 89 { 90 return std::make_pair(curStep, curNanoStep); 91 } 92 update(std::size_t nanoSteps)93 inline virtual void update(std::size_t nanoSteps) 94 { 95 // // fixme: implement me (later) 96 // try { 97 // cl::Buffer startCoordsBuffer, endCoordsBuffer; 98 99 // Coord<DIM> c = initializer->gridDimensions(); 100 // int zDim = c.z(); 101 // int yDim = c.y(); 102 // int xDim = c.x(); 103 104 // int actualX = xDim; 105 // int actualY = yDim; 106 107 // std::vector<int> startCoords; 108 // std::vector<int> endCoords; 109 110 // genThreadCoords( 111 // &startCoords, 112 // &endCoords, 113 // 0, 114 // 0, 115 // 0, 116 // xDim, 117 // yDim, 118 // zDim, 119 // actualX, 120 // actualY, 121 // zDim, 122 // 1); 123 124 // startCoordsBuffer = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, startCoords.size()*sizeof(int), &startCoords[0]); 125 // endCoordsBuffer = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, endCoords.size()*sizeof(int), &endCoords[0]); 126 127 // cl::NDRange global(actualX, actualY, zDim); 128 // //fixme: local range could be chosen dynamically 129 // cl::NDRange local(16, 16, 1); 130 131 // // disabling dead code, as Jochen will deliver the new code soon 132 // // cl::KernelFunctor livingKernel = kernel.bind(cmdQueue, global, local); 133 // // livingKernel(inputDeviceGrid, outputDeviceGrid, zDim, yDim, xDim, 134 // // 1, 0, 0, 0, 135 // // startCoordsBuffer, endCoordsBuffer, actualX, actualY); 136 // // livingKernel.getError(); 137 // // cmdQueue.finish(); 138 139 140 // } catch (cl::Error& err) { 141 // std::cerr << "OpenCL error: " << err.what() << ", " << oclStrerror(err.err()) << std::endl; 142 // throw err; 143 // } catch (...) { 144 // throw; 145 // } 146 } 147 grid()148 inline virtual const GridType& grid() const 149 { 150 // cmdQueue.enqueueReadBuffer( 151 // outputDeviceGrid, true, 0, 152 // hostGrid->getDimensions().prod() * sizeof(CELL_TYPE), hostGrid->baseAddress()); 153 return *hostGrid; 154 } 155 156 private: 157 int curStep; 158 int curNanoStep; 159 boost::shared_ptr<GridType> hostGrid; 160 161 cl::Buffer inputDeviceGrid; 162 cl::Buffer outputDeviceGrid; 163 cl::Context context; 164 cl::CommandQueue cmdQueue; 165 cl::Kernel kernel; 166 genThreadCoords(std::vector<int> * startCoords,std::vector<int> * endCoords,const int & offset_x,const int & offset_y,const int & offset_z,const int & active_x,const int & active_y,const int & active_z,const int & actual_x,const int & actual_y,const int & actual_z,const int & planes)167 inline void genThreadCoords(std::vector<int> *startCoords, 168 std::vector<int> *endCoords, 169 const int& offset_x, 170 const int& offset_y, 171 const int& offset_z, 172 const int& active_x, 173 const int& active_y, 174 const int& active_z, 175 const int& actual_x, 176 const int& actual_y, 177 const int& actual_z, 178 const int& planes) 179 { 180 int maxX = active_x; 181 int maxY = active_y; 182 int maxZ = ceil(1.0 * actual_z/planes); 183 int numThreads = actual_x * actual_y * maxZ; 184 startCoords->resize(numThreads); 185 endCoords->resize(numThreads); 186 187 for (int z = 0; z < maxZ; ++z) { 188 int startZ = offset_z + z * planes; 189 int endZ = std::min(offset_z + active_z, 190 startZ + planes); 191 192 for (int y = 0; y < actual_y; ++y) { 193 for (int x = 0; x < actual_x; ++x) { 194 int threadID = (z * actual_x * actual_y) + (y * actual_x) + x; 195 int myEndZ = endZ; 196 if (x >= maxX || y >= maxY) 197 myEndZ = startZ; 198 199 (*startCoords)[threadID] = startZ; 200 (*endCoords)[threadID] = myEndZ; 201 } 202 } 203 } 204 } 205 initGrids()206 inline void initGrids() 207 { 208 const CoordBox<DIM>& gridBox = 209 partitionManager->ownRegion().boundingBox(); 210 hostGrid.reset(new GridType(gridBox, CELL_TYPE())); 211 initializer->grid(&*hostGrid); 212 213 // inputDeviceGrid = cl::Buffer( 214 // context, 215 // CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 216 // hostGrid->getDimensions().prod() * sizeof(CELL_TYPE), 217 // hostGrid->baseAddress()); 218 // std::vector<CELL_TYPE> zeroMem(hostGrid->getDimensions().prod(), 0); 219 // outputDeviceGrid = cl::Buffer( 220 // context, 221 // CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 222 // hostGrid->getDimensions().prod() * sizeof(CELL_TYPE), 223 // &zeroMem[0]); 224 } 225 oclStrerror(int nr)226 inline std::string oclStrerror (int nr) { 227 switch (nr) { 228 case 0: 229 return "CL_SUCCESS"; 230 case -1: 231 return "CL_DEVICE_NOT_FOUND"; 232 case -2: 233 return "CL_DEVICE_NOT_AVAILABLE"; 234 case -3: 235 return "CL_COMPILER_NOT_AVAILABLE"; 236 case -4: 237 return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; 238 case -5: 239 return "CL_OUT_OF_RESOURCES"; 240 case -6: 241 return "CL_OUT_OF_HOST_MEMORY"; 242 case -7: 243 return "CL_PROFILING_INFO_NOT_AVAILABLE"; 244 case -8: 245 return "CL_MEM_COPY_OVERLAP"; 246 case -9: 247 return "CL_IMAGE_FORMAT_MISMATCH"; 248 case -10: 249 return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; 250 case -11: 251 return "CL_BUILD_PROGRAM_FAILURE"; 252 case -12: 253 return "CL_MAP_FAILURE"; 254 case -13: 255 return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; 256 case -14: 257 return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; 258 case -30: 259 return "CL_INVALID_VALUE"; 260 case -31: 261 return "CL_INVALID_DEVICE_TYPE"; 262 case -32: 263 return "CL_INVALID_PLATFORM"; 264 case -33: 265 return "CL_INVALID_DEVICE"; 266 case -34: 267 return "CL_INVALID_CONTEXT"; 268 case -35: 269 return "CL_INVALID_QUEUE_PROPERTIES"; 270 case -36: 271 return "CL_INVALID_COMMAND_QUEUE"; 272 case -37: 273 return "CL_INVALID_HOST_PTR"; 274 case -38: 275 return "CL_INVALID_MEM_OBJECT"; 276 case -39: 277 return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; 278 case -40: 279 return "CL_INVALID_IMAGE_SIZE"; 280 case -41: 281 return "CL_INVALID_SAMPLER"; 282 case -42: 283 return "CL_INVALID_BINARY"; 284 case -43: 285 return "CL_INVALID_BUILD_OPTIONS"; 286 case -44: 287 return "CL_INVALID_PROGRAM"; 288 case -45: 289 return "CL_INVALID_PROGRAM_EXECUTABLE"; 290 case -46: 291 return "CL_INVALID_KERNEL_NAME"; 292 case -47: 293 return "CL_INVALID_KERNEL_DEFINITION"; 294 case -48: 295 return "CL_INVALID_KERNEL"; 296 case -49: 297 return "CL_INVALID_ARG_INDEX"; 298 case -50: 299 return "CL_INVALID_ARG_VALUE"; 300 case -51: 301 return "CL_INVALID_ARG_SIZE"; 302 case -52: 303 return "CL_INVALID_KERNEL_ARGS"; 304 case -53: 305 return "CL_INVALID_WORK_DIMENSION"; 306 case -54: 307 return "CL_INVALID_WORK_GROUP_SIZE"; 308 case -55: 309 return "CL_INVALID_WORK_ITEM_SIZE"; 310 case -56: 311 return "CL_INVALID_GLOBAL_OFFSET"; 312 case -57: 313 return "CL_INVALID_EVENT_WAIT_LIST"; 314 case -58: 315 return "CL_INVALID_EVENT"; 316 case -59: 317 return "CL_INVALID_OPERATION"; 318 case -60: 319 return "CL_INVALID_GL_OBJECT"; 320 case -61: 321 return "CL_INVALID_BUFFER_SIZE"; 322 case -62: 323 return "CL_INVALID_MIP_LEVEL"; 324 case -63: 325 return "CL_INVALID_GLOBAL_WORK_SIZE"; 326 case -64: 327 return "CL_INVALID_PROPERTY"; 328 } 329 return "nothing found"; 330 } 331 }; 332 333 } 334 } 335 336 #endif 337 #endif 338