1 /*
2 Copyright (c) 2012 Advanced Micro Devices, Inc.
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 //Originally written by Takahiro Harada
15 //Host-code rewritten by Erwin Coumans
16 
17 #define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl"
18 #define KERNEL0 "SearchSortDataLowerKernel"
19 #define KERNEL1 "SearchSortDataUpperKernel"
20 #define KERNEL2 "SubtractKernel"
21 
22 #include "b3BoundSearchCL.h"
23 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
24 #include "b3LauncherCL.h"
25 #include "kernels/BoundSearchKernelsCL.h"
26 
b3BoundSearchCL(cl_context ctx,cl_device_id device,cl_command_queue queue,int maxSize)27 b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
28 	: m_context(ctx),
29 	  m_device(device),
30 	  m_queue(queue)
31 {
32 	const char* additionalMacros = "";
33 	//const char* srcFileNameForCaching="";
34 
35 	cl_int pErrNum;
36 	const char* kernelSource = boundSearchKernelsCL;
37 
38 	cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, BOUNDSEARCH_PATH);
39 	b3Assert(boundSearchProg);
40 
41 	m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg, additionalMacros);
42 	b3Assert(m_lowerSortDataKernel);
43 
44 	m_upperSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg, additionalMacros);
45 	b3Assert(m_upperSortDataKernel);
46 
47 	m_subtractKernel = 0;
48 
49 	if (maxSize)
50 	{
51 		m_subtractKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg, additionalMacros);
52 		b3Assert(m_subtractKernel);
53 	}
54 
55 	//m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST );
56 
57 	m_lower = (maxSize == 0) ? 0 : new b3OpenCLArray<unsigned int>(ctx, queue, maxSize);
58 	m_upper = (maxSize == 0) ? 0 : new b3OpenCLArray<unsigned int>(ctx, queue, maxSize);
59 
60 	m_filler = new b3FillCL(ctx, device, queue);
61 }
62 
~b3BoundSearchCL()63 b3BoundSearchCL::~b3BoundSearchCL()
64 {
65 	delete m_lower;
66 	delete m_upper;
67 	delete m_filler;
68 
69 	clReleaseKernel(m_lowerSortDataKernel);
70 	clReleaseKernel(m_upperSortDataKernel);
71 	clReleaseKernel(m_subtractKernel);
72 }
73 
execute(b3OpenCLArray<b3SortData> & src,int nSrc,b3OpenCLArray<unsigned int> & dst,int nDst,Option option)74 void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option)
75 {
76 	b3Int4 constBuffer;
77 	constBuffer.x = nSrc;
78 	constBuffer.y = nDst;
79 
80 	if (option == BOUND_LOWER)
81 	{
82 		b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())};
83 
84 		b3LauncherCL launcher(m_queue, m_lowerSortDataKernel, "m_lowerSortDataKernel");
85 		launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
86 		launcher.setConst(nSrc);
87 		launcher.setConst(nDst);
88 
89 		launcher.launch1D(nSrc, 64);
90 	}
91 	else if (option == BOUND_UPPER)
92 	{
93 		b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())};
94 
95 		b3LauncherCL launcher(m_queue, m_upperSortDataKernel, "m_upperSortDataKernel");
96 		launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
97 		launcher.setConst(nSrc);
98 		launcher.setConst(nDst);
99 
100 		launcher.launch1D(nSrc, 64);
101 	}
102 	else if (option == COUNT)
103 	{
104 		b3Assert(m_lower);
105 		b3Assert(m_upper);
106 		b3Assert(m_lower->capacity() <= (int)nDst);
107 		b3Assert(m_upper->capacity() <= (int)nDst);
108 
109 		int zero = 0;
110 		m_filler->execute(*m_lower, zero, nDst);
111 		m_filler->execute(*m_upper, zero, nDst);
112 
113 		execute(src, nSrc, *m_lower, nDst, BOUND_LOWER);
114 		execute(src, nSrc, *m_upper, nDst, BOUND_UPPER);
115 
116 		{
117 			b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_upper->getBufferCL(), true), b3BufferInfoCL(m_lower->getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())};
118 
119 			b3LauncherCL launcher(m_queue, m_subtractKernel, "m_subtractKernel");
120 			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
121 			launcher.setConst(nSrc);
122 			launcher.setConst(nDst);
123 
124 			launcher.launch1D(nDst, 64);
125 		}
126 	}
127 	else
128 	{
129 		b3Assert(0);
130 	}
131 }
132 
executeHost(b3AlignedObjectArray<b3SortData> & src,int nSrc,b3AlignedObjectArray<unsigned int> & dst,int nDst,Option option)133 void b3BoundSearchCL::executeHost(b3AlignedObjectArray<b3SortData>& src, int nSrc,
134 								  b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option)
135 {
136 	for (int i = 0; i < nSrc - 1; i++)
137 		b3Assert(src[i].m_key <= src[i + 1].m_key);
138 
139 	b3SortData minData, zeroData, maxData;
140 	minData.m_key = -1;
141 	minData.m_value = -1;
142 	zeroData.m_key = 0;
143 	zeroData.m_value = 0;
144 	maxData.m_key = nDst;
145 	maxData.m_value = nDst;
146 
147 	if (option == BOUND_LOWER)
148 	{
149 		for (int i = 0; i < nSrc; i++)
150 		{
151 			b3SortData& iData = (i == 0) ? minData : src[i - 1];
152 			b3SortData& jData = (i == nSrc) ? maxData : src[i];
153 
154 			if (iData.m_key != jData.m_key)
155 			{
156 				int k = jData.m_key;
157 				{
158 					dst[k] = i;
159 				}
160 			}
161 		}
162 	}
163 	else if (option == BOUND_UPPER)
164 	{
165 		for (int i = 1; i < nSrc + 1; i++)
166 		{
167 			b3SortData& iData = src[i - 1];
168 			b3SortData& jData = (i == nSrc) ? maxData : src[i];
169 
170 			if (iData.m_key != jData.m_key)
171 			{
172 				int k = iData.m_key;
173 				{
174 					dst[k] = i;
175 				}
176 			}
177 		}
178 	}
179 	else if (option == COUNT)
180 	{
181 		b3AlignedObjectArray<unsigned int> lower;
182 		lower.resize(nDst);
183 		b3AlignedObjectArray<unsigned int> upper;
184 		upper.resize(nDst);
185 
186 		for (int i = 0; i < nDst; i++)
187 		{
188 			lower[i] = upper[i] = 0;
189 		}
190 
191 		executeHost(src, nSrc, lower, nDst, BOUND_LOWER);
192 		executeHost(src, nSrc, upper, nDst, BOUND_UPPER);
193 
194 		for (int i = 0; i < nDst; i++)
195 		{
196 			dst[i] = upper[i] - lower[i];
197 		}
198 	}
199 	else
200 	{
201 		b3Assert(0);
202 	}
203 }
204