1 #ifndef B3_OPENCL_ARRAY_H
2 #define B3_OPENCL_ARRAY_H
3 
4 #include "Bullet3Common/b3AlignedObjectArray.h"
5 #include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"
6 
7 template <typename T>
8 class b3OpenCLArray
9 {
10 	size_t m_size;
11 	size_t m_capacity;
12 	cl_mem m_clBuffer;
13 
14 	cl_context m_clContext;
15 	cl_command_queue m_commandQueue;
16 
17 	bool m_ownsMemory;
18 
19 	bool m_allowGrowingCapacity;
20 
deallocate()21 	void deallocate()
22 	{
23 		if (m_clBuffer && m_ownsMemory)
24 		{
25 			clReleaseMemObject(m_clBuffer);
26 		}
27 		m_clBuffer = 0;
28 		m_capacity = 0;
29 	}
30 
31 	b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src);
32 
allocSize(size_t size)33 	B3_FORCE_INLINE size_t allocSize(size_t size)
34 	{
35 		return (size ? size * 2 : 1);
36 	}
37 
38 public:
39 	b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity = 0, bool allowGrowingCapacity = true)
40 		: m_size(0), m_capacity(0), m_clBuffer(0), m_clContext(ctx), m_commandQueue(queue), m_ownsMemory(true), m_allowGrowingCapacity(true)
41 	{
42 		if (initialCapacity)
43 		{
44 			reserve(initialCapacity);
45 		}
46 		m_allowGrowingCapacity = allowGrowingCapacity;
47 	}
48 
49 	///this is an error-prone method with no error checking, be careful!
setFromOpenCLBuffer(cl_mem buffer,size_t sizeInElements)50 	void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements)
51 	{
52 		deallocate();
53 		m_ownsMemory = false;
54 		m_allowGrowingCapacity = false;
55 		m_clBuffer = buffer;
56 		m_size = sizeInElements;
57 		m_capacity = sizeInElements;
58 	}
59 
60 	// we could enable this assignment, but need to make sure to avoid accidental deep copies
61 	//	b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
62 	//	{
63 	//		copyFromArray(src);
64 	//		return *this;
65 	//	}
66 
getBufferCL()67 	cl_mem getBufferCL() const
68 	{
69 		return m_clBuffer;
70 	}
71 
~b3OpenCLArray()72 	virtual ~b3OpenCLArray()
73 	{
74 		deallocate();
75 		m_size = 0;
76 		m_capacity = 0;
77 	}
78 
79 	B3_FORCE_INLINE bool push_back(const T& _Val, bool waitForCompletion = true)
80 	{
81 		bool result = true;
82 		size_t sz = size();
83 		if (sz == capacity())
84 		{
85 			result = reserve(allocSize(size()));
86 		}
87 		copyFromHostPointer(&_Val, 1, sz, waitForCompletion);
88 		m_size++;
89 		return result;
90 	}
91 
forcedAt(size_t n)92 	B3_FORCE_INLINE T forcedAt(size_t n) const
93 	{
94 		b3Assert(n >= 0);
95 		b3Assert(n < capacity());
96 		T elem;
97 		copyToHostPointer(&elem, 1, n, true);
98 		return elem;
99 	}
100 
at(size_t n)101 	B3_FORCE_INLINE T at(size_t n) const
102 	{
103 		b3Assert(n >= 0);
104 		b3Assert(n < size());
105 		T elem;
106 		copyToHostPointer(&elem, 1, n, true);
107 		return elem;
108 	}
109 
110 	B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents = true)
111 	{
112 		bool result = true;
113 		size_t curSize = size();
114 
115 		if (newsize < curSize)
116 		{
117 			//leave the OpenCL memory for now
118 		}
119 		else
120 		{
121 			if (newsize > size())
122 			{
123 				result = reserve(newsize, copyOldContents);
124 			}
125 
126 			//leave new data uninitialized (init in debug mode?)
127 			//for (size_t i=curSize;i<newsize;i++) ...
128 		}
129 
130 		if (result)
131 		{
132 			m_size = newsize;
133 		}
134 		else
135 		{
136 			m_size = 0;
137 		}
138 		return result;
139 	}
140 
size()141 	B3_FORCE_INLINE size_t size() const
142 	{
143 		return m_size;
144 	}
145 
capacity()146 	B3_FORCE_INLINE size_t capacity() const
147 	{
148 		return m_capacity;
149 	}
150 
151 	B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents = true)
152 	{
153 		bool result = true;
154 		// determine new minimum length of allocated storage
155 		if (capacity() < _Count)
156 		{  // not enough room, reallocate
157 
158 			if (m_allowGrowingCapacity)
159 			{
160 				cl_int ciErrNum;
161 				//create a new OpenCL buffer
162 				size_t memSizeInBytes = sizeof(T) * _Count;
163 				cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum);
164 				if (ciErrNum != CL_SUCCESS)
165 				{
166 					b3Error("OpenCL out-of-memory\n");
167 					_Count = 0;
168 					result = false;
169 				}
170 //#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
171 #ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
172 				unsigned char* src = (unsigned char*)malloc(memSizeInBytes);
173 				for (size_t i = 0; i < memSizeInBytes; i++)
174 					src[i] = 0xbb;
175 				ciErrNum = clEnqueueWriteBuffer(m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0, 0, 0);
176 				b3Assert(ciErrNum == CL_SUCCESS);
177 				clFinish(m_commandQueue);
178 				free(src);
179 #endif  //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
180 
181 				if (result)
182 				{
183 					if (copyOldContents)
184 						copyToCL(buf, size());
185 				}
186 
187 				//deallocate the old buffer
188 				deallocate();
189 
190 				m_clBuffer = buf;
191 
192 				m_capacity = _Count;
193 			}
194 			else
195 			{
196 				//fail: assert and
197 				b3Assert(0);
198 				deallocate();
199 				result = false;
200 			}
201 		}
202 		return result;
203 	}
204 
205 	void copyToCL(cl_mem destination, size_t numElements, size_t firstElem = 0, size_t dstOffsetInElems = 0) const
206 	{
207 		if (numElements <= 0)
208 			return;
209 
210 		b3Assert(m_clBuffer);
211 		b3Assert(destination);
212 
213 		//likely some error, destination is same as source
214 		b3Assert(m_clBuffer != destination);
215 
216 		b3Assert((firstElem + numElements) <= m_size);
217 
218 		cl_int status = 0;
219 
220 		b3Assert(numElements > 0);
221 		b3Assert(numElements <= m_size);
222 
223 		size_t srcOffsetBytes = sizeof(T) * firstElem;
224 		size_t dstOffsetInBytes = sizeof(T) * dstOffsetInElems;
225 
226 		status = clEnqueueCopyBuffer(m_commandQueue, m_clBuffer, destination,
227 									 srcOffsetBytes, dstOffsetInBytes, sizeof(T) * numElements, 0, 0, 0);
228 
229 		b3Assert(status == CL_SUCCESS);
230 	}
231 
232 	void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion = true)
233 	{
234 		size_t newSize = srcArray.size();
235 
236 		bool copyOldContents = false;
237 		resize(newSize, copyOldContents);
238 		if (newSize)
239 			copyFromHostPointer(&srcArray[0], newSize, 0, waitForCompletion);
240 	}
241 
242 	void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem = 0, bool waitForCompletion = true)
243 	{
244 		b3Assert(numElems + destFirstElem <= capacity());
245 
246 		if (numElems + destFirstElem)
247 		{
248 			cl_int status = 0;
249 			size_t sizeInBytes = sizeof(T) * numElems;
250 			status = clEnqueueWriteBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * destFirstElem, sizeInBytes,
251 										  src, 0, 0, 0);
252 			b3Assert(status == CL_SUCCESS);
253 			if (waitForCompletion)
254 				clFinish(m_commandQueue);
255 		}
256 		else
257 		{
258 			b3Error("copyFromHostPointer invalid range\n");
259 		}
260 	}
261 
262 	void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion = true) const
263 	{
264 		destArray.resize(this->size());
265 		if (size())
266 			copyToHostPointer(&destArray[0], size(), 0, waitForCompletion);
267 	}
268 
269 	void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem = 0, bool waitForCompletion = true) const
270 	{
271 		b3Assert(numElem + srcFirstElem <= capacity());
272 
273 		if (numElem + srcFirstElem <= capacity())
274 		{
275 			cl_int status = 0;
276 			status = clEnqueueReadBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * srcFirstElem, sizeof(T) * numElem,
277 										 destPtr, 0, 0, 0);
278 			b3Assert(status == CL_SUCCESS);
279 
280 			if (waitForCompletion)
281 				clFinish(m_commandQueue);
282 		}
283 		else
284 		{
285 			b3Error("copyToHostPointer invalid range\n");
286 		}
287 	}
288 
copyFromOpenCLArray(const b3OpenCLArray & src)289 	void copyFromOpenCLArray(const b3OpenCLArray& src)
290 	{
291 		size_t newSize = src.size();
292 		resize(newSize);
293 		if (size())
294 		{
295 			src.copyToCL(m_clBuffer, size());
296 		}
297 	}
298 };
299 
300 #endif  //B3_OPENCL_ARRAY_H
301