1 
2 #ifndef B3_LAUNCHER_CL_H
3 #define B3_LAUNCHER_CL_H
4 
5 #include "b3BufferInfoCL.h"
6 #include "Bullet3Common/b3MinMax.h"
7 #include "b3OpenCLArray.h"
8 #include <stdio.h>
9 
10 #define B3_DEBUG_SERIALIZE_CL
11 
12 #ifdef _WIN32
13 #pragma warning(disable : 4996)
14 #endif
15 #define B3_CL_MAX_ARG_SIZE 16
B3_ATTRIBUTE_ALIGNED16(struct)16 B3_ATTRIBUTE_ALIGNED16(struct)
17 b3KernelArgData
18 {
19 	int m_isBuffer;
20 	int m_argIndex;
21 	int m_argSizeInBytes;
22 	int m_unusedPadding;
23 	union {
24 		cl_mem m_clBuffer;
25 		unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
26 	};
27 };
28 
29 class b3LauncherCL
30 {
31 	cl_command_queue m_commandQueue;
32 	cl_kernel m_kernel;
33 	int m_idx;
34 
35 	b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
36 	int m_serializationSizeInBytes;
37 	bool m_enableSerialization;
38 
39 	const char* m_name;
40 
41 public:
42 	b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays;
43 
44 	b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
45 
46 	virtual ~b3LauncherCL();
47 
48 	void setBuffer(cl_mem clBuffer);
49 
50 	void setBuffers(b3BufferInfoCL* buffInfo, int n);
51 
getSerializationBufferSize()52 	int getSerializationBufferSize() const
53 	{
54 		return m_serializationSizeInBytes;
55 	}
56 
57 	int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
58 
59 	inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
60 
61 	int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
62 
getNumArguments()63 	int getNumArguments() const
64 	{
65 		return m_kernelArguments.size();
66 	}
67 
getArgument(int index)68 	b3KernelArgData getArgument(int index)
69 	{
70 		return m_kernelArguments[index];
71 	}
72 
73 	void serializeToFile(const char* fileName, int numWorkItems);
74 
75 	template <typename T>
setConst(const T & consts)76 	inline void setConst(const T& consts)
77 	{
78 		int sz = sizeof(T);
79 		b3Assert(sz <= B3_CL_MAX_ARG_SIZE);
80 
81 		if (m_enableSerialization)
82 		{
83 			b3KernelArgData kernelArg;
84 			kernelArg.m_argIndex = m_idx;
85 			kernelArg.m_isBuffer = 0;
86 			T* destArg = (T*)kernelArg.m_argData;
87 			*destArg = consts;
88 			kernelArg.m_argSizeInBytes = sizeof(T);
89 			m_kernelArguments.push_back(kernelArg);
90 			m_serializationSizeInBytes += sizeof(b3KernelArgData);
91 		}
92 
93 		cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts);
94 		b3Assert(status == CL_SUCCESS);
95 	}
96 
97 	inline void launch1D(int numThreads, int localSize = 64)
98 	{
99 		launch2D(numThreads, 1, localSize, 1);
100 	}
101 
launch2D(int numThreadsX,int numThreadsY,int localSizeX,int localSizeY)102 	inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY)
103 	{
104 		size_t gRange[3] = {1, 1, 1};
105 		size_t lRange[3] = {1, 1, 1};
106 		lRange[0] = localSizeX;
107 		lRange[1] = localSizeY;
108 		gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1));
109 		gRange[0] *= lRange[0];
110 		gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1));
111 		gRange[1] *= lRange[1];
112 
113 		cl_int status = clEnqueueNDRangeKernel(m_commandQueue,
114 											   m_kernel, 2, NULL, gRange, lRange, 0, 0, 0);
115 		if (status != CL_SUCCESS)
116 		{
117 			printf("Error: OpenCL status = %d\n", status);
118 		}
119 		b3Assert(status == CL_SUCCESS);
120 	}
121 
enableSerialization(bool serialize)122 	void enableSerialization(bool serialize)
123 	{
124 		m_enableSerialization = serialize;
125 	}
126 };
127 
128 #endif  //B3_LAUNCHER_CL_H
129