1 #ifndef B3_OPENCL_ARRAY_H 2 #define B3_OPENCL_ARRAY_H 3 4 #include "Bullet3Common/b3AlignedObjectArray.h" 5 #include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" 6 7 template <typename T> 8 class b3OpenCLArray 9 { 10 size_t m_size; 11 size_t m_capacity; 12 cl_mem m_clBuffer; 13 14 cl_context m_clContext; 15 cl_command_queue m_commandQueue; 16 17 bool m_ownsMemory; 18 19 bool m_allowGrowingCapacity; 20 deallocate()21 void deallocate() 22 { 23 if (m_clBuffer && m_ownsMemory) 24 { 25 clReleaseMemObject(m_clBuffer); 26 } 27 m_clBuffer = 0; 28 m_capacity = 0; 29 } 30 31 b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src); 32 allocSize(size_t size)33 B3_FORCE_INLINE size_t allocSize(size_t size) 34 { 35 return (size ? size * 2 : 1); 36 } 37 38 public: 39 b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity = 0, bool allowGrowingCapacity = true) 40 : m_size(0), m_capacity(0), m_clBuffer(0), m_clContext(ctx), m_commandQueue(queue), m_ownsMemory(true), m_allowGrowingCapacity(true) 41 { 42 if (initialCapacity) 43 { 44 reserve(initialCapacity); 45 } 46 m_allowGrowingCapacity = allowGrowingCapacity; 47 } 48 49 ///this is an error-prone method with no error checking, be careful! setFromOpenCLBuffer(cl_mem buffer,size_t sizeInElements)50 void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements) 51 { 52 deallocate(); 53 m_ownsMemory = false; 54 m_allowGrowingCapacity = false; 55 m_clBuffer = buffer; 56 m_size = sizeInElements; 57 m_capacity = sizeInElements; 58 } 59 60 // we could enable this assignment, but need to make sure to avoid accidental deep copies 61 // b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src) 62 // { 63 // copyFromArray(src); 64 // return *this; 65 // } 66 getBufferCL()67 cl_mem getBufferCL() const 68 { 69 return m_clBuffer; 70 } 71 ~b3OpenCLArray()72 virtual ~b3OpenCLArray() 73 { 74 deallocate(); 75 m_size = 0; 76 m_capacity = 0; 77 } 78 79 B3_FORCE_INLINE bool push_back(const T& _Val, bool waitForCompletion = true) 80 { 81 bool result = true; 82 size_t sz = size(); 83 if (sz == capacity()) 84 { 85 result = reserve(allocSize(size())); 86 } 87 copyFromHostPointer(&_Val, 1, sz, waitForCompletion); 88 m_size++; 89 return result; 90 } 91 forcedAt(size_t n)92 B3_FORCE_INLINE T forcedAt(size_t n) const 93 { 94 b3Assert(n >= 0); 95 b3Assert(n < capacity()); 96 T elem; 97 copyToHostPointer(&elem, 1, n, true); 98 return elem; 99 } 100 at(size_t n)101 B3_FORCE_INLINE T at(size_t n) const 102 { 103 b3Assert(n >= 0); 104 b3Assert(n < size()); 105 T elem; 106 copyToHostPointer(&elem, 1, n, true); 107 return elem; 108 } 109 110 B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents = true) 111 { 112 bool result = true; 113 size_t curSize = size(); 114 115 if (newsize < curSize) 116 { 117 //leave the OpenCL memory for now 118 } 119 else 120 { 121 if (newsize > size()) 122 { 123 result = reserve(newsize, copyOldContents); 124 } 125 126 //leave new data uninitialized (init in debug mode?) 127 //for (size_t i=curSize;i<newsize;i++) ... 128 } 129 130 if (result) 131 { 132 m_size = newsize; 133 } 134 else 135 { 136 m_size = 0; 137 } 138 return result; 139 } 140 size()141 B3_FORCE_INLINE size_t size() const 142 { 143 return m_size; 144 } 145 capacity()146 B3_FORCE_INLINE size_t capacity() const 147 { 148 return m_capacity; 149 } 150 151 B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents = true) 152 { 153 bool result = true; 154 // determine new minimum length of allocated storage 155 if (capacity() < _Count) 156 { // not enough room, reallocate 157 158 if (m_allowGrowingCapacity) 159 { 160 cl_int ciErrNum; 161 //create a new OpenCL buffer 162 size_t memSizeInBytes = sizeof(T) * _Count; 163 cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum); 164 if (ciErrNum != CL_SUCCESS) 165 { 166 b3Error("OpenCL out-of-memory\n"); 167 _Count = 0; 168 result = false; 169 } 170 //#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS 171 #ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS 172 unsigned char* src = (unsigned char*)malloc(memSizeInBytes); 173 for (size_t i = 0; i < memSizeInBytes; i++) 174 src[i] = 0xbb; 175 ciErrNum = clEnqueueWriteBuffer(m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0, 0, 0); 176 b3Assert(ciErrNum == CL_SUCCESS); 177 clFinish(m_commandQueue); 178 free(src); 179 #endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS 180 181 if (result) 182 { 183 if (copyOldContents) 184 copyToCL(buf, size()); 185 } 186 187 //deallocate the old buffer 188 deallocate(); 189 190 m_clBuffer = buf; 191 192 m_capacity = _Count; 193 } 194 else 195 { 196 //fail: assert and 197 b3Assert(0); 198 deallocate(); 199 result = false; 200 } 201 } 202 return result; 203 } 204 205 void copyToCL(cl_mem destination, size_t numElements, size_t firstElem = 0, size_t dstOffsetInElems = 0) const 206 { 207 if (numElements <= 0) 208 return; 209 210 b3Assert(m_clBuffer); 211 b3Assert(destination); 212 213 //likely some error, destination is same as source 214 b3Assert(m_clBuffer != destination); 215 216 b3Assert((firstElem + numElements) <= m_size); 217 218 cl_int status = 0; 219 220 b3Assert(numElements > 0); 221 b3Assert(numElements <= m_size); 222 223 size_t srcOffsetBytes = sizeof(T) * firstElem; 224 size_t dstOffsetInBytes = sizeof(T) * dstOffsetInElems; 225 226 status = clEnqueueCopyBuffer(m_commandQueue, m_clBuffer, destination, 227 srcOffsetBytes, dstOffsetInBytes, sizeof(T) * numElements, 0, 0, 0); 228 229 b3Assert(status == CL_SUCCESS); 230 } 231 232 void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion = true) 233 { 234 size_t newSize = srcArray.size(); 235 236 bool copyOldContents = false; 237 resize(newSize, copyOldContents); 238 if (newSize) 239 copyFromHostPointer(&srcArray[0], newSize, 0, waitForCompletion); 240 } 241 242 void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem = 0, bool waitForCompletion = true) 243 { 244 b3Assert(numElems + destFirstElem <= capacity()); 245 246 if (numElems + destFirstElem) 247 { 248 cl_int status = 0; 249 size_t sizeInBytes = sizeof(T) * numElems; 250 status = clEnqueueWriteBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * destFirstElem, sizeInBytes, 251 src, 0, 0, 0); 252 b3Assert(status == CL_SUCCESS); 253 if (waitForCompletion) 254 clFinish(m_commandQueue); 255 } 256 else 257 { 258 b3Error("copyFromHostPointer invalid range\n"); 259 } 260 } 261 262 void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion = true) const 263 { 264 destArray.resize(this->size()); 265 if (size()) 266 copyToHostPointer(&destArray[0], size(), 0, waitForCompletion); 267 } 268 269 void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem = 0, bool waitForCompletion = true) const 270 { 271 b3Assert(numElem + srcFirstElem <= capacity()); 272 273 if (numElem + srcFirstElem <= capacity()) 274 { 275 cl_int status = 0; 276 status = clEnqueueReadBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * srcFirstElem, sizeof(T) * numElem, 277 destPtr, 0, 0, 0); 278 b3Assert(status == CL_SUCCESS); 279 280 if (waitForCompletion) 281 clFinish(m_commandQueue); 282 } 283 else 284 { 285 b3Error("copyToHostPointer invalid range\n"); 286 } 287 } 288 copyFromOpenCLArray(const b3OpenCLArray & src)289 void copyFromOpenCLArray(const b3OpenCLArray& src) 290 { 291 size_t newSize = src.size(); 292 resize(newSize); 293 if (size()) 294 { 295 src.copyToCL(m_clBuffer, size()); 296 } 297 } 298 }; 299 300 #endif //B3_OPENCL_ARRAY_H 301