1 /*
2 Copyright (c) 2013 Advanced Micro Devices, Inc.
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 //Originally written by Erwin Coumans
15 
16 #include "b3GpuRigidBodyPipeline.h"
17 #include "b3GpuRigidBodyPipelineInternalData.h"
18 #include "kernels/integrateKernel.h"
19 #include "kernels/updateAabbsKernel.h"
20 
21 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
22 #include "b3GpuNarrowPhase.h"
23 #include "Bullet3Geometry/b3AabbUtil.h"
24 #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
25 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
26 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
27 #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
28 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h"
29 #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
30 
31 //#define TEST_OTHER_GPU_SOLVER
32 
33 #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
34 #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
35 
36 bool useBullet2CpuSolver = true;
37 
38 //choice of contact solver
39 bool gUseJacobi = false;
40 bool gUseDbvt = false;
41 bool gDumpContactStats = false;
42 bool gCalcWorldSpaceAabbOnCpu = false;
43 bool gUseCalculateOverlappingPairsHost = false;
44 bool gIntegrateOnCpu = false;
45 bool gClearPairsOnGpu = true;
46 
47 #define TEST_OTHER_GPU_SOLVER 1
48 #ifdef TEST_OTHER_GPU_SOLVER
49 #include "b3GpuJacobiContactSolver.h"
50 #endif  //TEST_OTHER_GPU_SOLVER
51 
52 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
53 #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
54 #include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h"
55 
56 #include "b3GpuPgsContactSolver.h"
57 #include "b3Solver.h"
58 
59 #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
60 #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"
61 
62 #include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
63 #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
64 
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device,cl_command_queue q,class b3GpuNarrowPhase * narrowphase,class b3GpuBroadphaseInterface * broadphaseSap,struct b3DynamicBvhBroadphase * broadphaseDbvt,const b3Config & config)65 b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
66 {
67 	m_data = new b3GpuRigidBodyPipelineInternalData;
68 	m_data->m_constraintUid = 0;
69 	m_data->m_config = config;
70 	m_data->m_context = ctx;
71 	m_data->m_device = device;
72 	m_data->m_queue = q;
73 
74 	m_data->m_solver = new b3PgsJacobiSolver(true);                            //new b3PgsJacobiSolver(true);
75 	m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx, device, q, true);  //new b3PgsJacobiSolver(true);
76 
77 	m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx, q, config.m_maxConvexBodies);
78 	m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx, q, config.m_maxBroadphasePairs);
79 
80 	m_data->m_gpuConstraints = new b3OpenCLArray<b3GpuGenericConstraint>(ctx, q);
81 #ifdef TEST_OTHER_GPU_SOLVER
82 	m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
83 #endif  //	TEST_OTHER_GPU_SOLVER
84 
85 	m_data->m_solver2 = new b3GpuPgsContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
86 
87 	m_data->m_raycaster = new b3GpuRaycast(ctx, device, q);
88 
89 	m_data->m_broadphaseDbvt = broadphaseDbvt;
90 	m_data->m_broadphaseSap = broadphaseSap;
91 	m_data->m_narrowphase = narrowphase;
92 	m_data->m_gravity.setValue(0.f, -9.8f, 0.f);
93 
94 	cl_int errNum = 0;
95 
96 	{
97 		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, integrateKernelCL, &errNum, "", B3_RIGIDBODY_INTEGRATE_PATH);
98 		b3Assert(errNum == CL_SUCCESS);
99 		m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, integrateKernelCL, "integrateTransformsKernel", &errNum, prog);
100 		b3Assert(errNum == CL_SUCCESS);
101 		clReleaseProgram(prog);
102 	}
103 	{
104 		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, &errNum, "", B3_RIGIDBODY_UPDATEAABB_PATH);
105 		b3Assert(errNum == CL_SUCCESS);
106 		m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "initializeGpuAabbsFull", &errNum, prog);
107 		b3Assert(errNum == CL_SUCCESS);
108 
109 		m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "clearOverlappingPairsKernel", &errNum, prog);
110 		b3Assert(errNum == CL_SUCCESS);
111 
112 		clReleaseProgram(prog);
113 	}
114 }
115 
~b3GpuRigidBodyPipeline()116 b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline()
117 {
118 	if (m_data->m_integrateTransformsKernel)
119 		clReleaseKernel(m_data->m_integrateTransformsKernel);
120 
121 	if (m_data->m_updateAabbsKernel)
122 		clReleaseKernel(m_data->m_updateAabbsKernel);
123 
124 	if (m_data->m_clearOverlappingPairsKernel)
125 		clReleaseKernel(m_data->m_clearOverlappingPairsKernel);
126 	delete m_data->m_raycaster;
127 	delete m_data->m_solver;
128 	delete m_data->m_allAabbsGPU;
129 	delete m_data->m_gpuConstraints;
130 	delete m_data->m_overlappingPairsGPU;
131 
132 #ifdef TEST_OTHER_GPU_SOLVER
133 	delete m_data->m_solver3;
134 #endif  //TEST_OTHER_GPU_SOLVER
135 
136 	delete m_data->m_solver2;
137 
138 	delete m_data;
139 }
140 
reset()141 void b3GpuRigidBodyPipeline::reset()
142 {
143 	m_data->m_gpuConstraints->resize(0);
144 	m_data->m_cpuConstraints.resize(0);
145 	m_data->m_allAabbsGPU->resize(0);
146 	m_data->m_allAabbsCPU.resize(0);
147 }
148 
addConstraint(b3TypedConstraint * constraint)149 void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint)
150 {
151 	m_data->m_joints.push_back(constraint);
152 }
153 
removeConstraint(b3TypedConstraint * constraint)154 void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint)
155 {
156 	m_data->m_joints.remove(constraint);
157 }
158 
removeConstraintByUid(int uid)159 void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid)
160 {
161 	m_data->m_gpuSolver->recomputeBatches();
162 	//slow linear search
163 	m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
164 	//remove
165 	for (int i = 0; i < m_data->m_cpuConstraints.size(); i++)
166 	{
167 		if (m_data->m_cpuConstraints[i].m_uid == uid)
168 		{
169 			//m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]);
170 			m_data->m_cpuConstraints.swap(i, m_data->m_cpuConstraints.size() - 1);
171 			m_data->m_cpuConstraints.pop_back();
172 
173 			break;
174 		}
175 	}
176 
177 	if (m_data->m_cpuConstraints.size())
178 	{
179 		m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
180 	}
181 	else
182 	{
183 		m_data->m_gpuConstraints->resize(0);
184 	}
185 }
createPoint2PointConstraint(int bodyA,int bodyB,const float * pivotInA,const float * pivotInB,float breakingThreshold)186 int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold)
187 {
188 	m_data->m_gpuSolver->recomputeBatches();
189 	b3GpuGenericConstraint c;
190 	c.m_uid = m_data->m_constraintUid;
191 	m_data->m_constraintUid++;
192 	c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
193 	c.m_rbA = bodyA;
194 	c.m_rbB = bodyB;
195 	c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
196 	c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
197 	c.m_breakingImpulseThreshold = breakingThreshold;
198 	c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE;
199 	m_data->m_cpuConstraints.push_back(c);
200 	return c.m_uid;
201 }
createFixedConstraint(int bodyA,int bodyB,const float * pivotInA,const float * pivotInB,const float * relTargetAB,float breakingThreshold)202 int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold)
203 {
204 	m_data->m_gpuSolver->recomputeBatches();
205 	b3GpuGenericConstraint c;
206 	c.m_uid = m_data->m_constraintUid;
207 	m_data->m_constraintUid++;
208 	c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
209 	c.m_rbA = bodyA;
210 	c.m_rbB = bodyB;
211 	c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
212 	c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
213 	c.m_relTargetAB.setValue(relTargetAB[0], relTargetAB[1], relTargetAB[2], relTargetAB[3]);
214 	c.m_breakingImpulseThreshold = breakingThreshold;
215 	c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE;
216 
217 	m_data->m_cpuConstraints.push_back(c);
218 	return c.m_uid;
219 }
220 
stepSimulation(float deltaTime)221 void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
222 {
223 	//update worldspace AABBs from local AABB/worldtransform
224 	{
225 		B3_PROFILE("setupGpuAabbs");
226 		setupGpuAabbsFull();
227 	}
228 
229 	int numPairs = 0;
230 
231 	//compute overlapping pairs
232 	{
233 		if (gUseDbvt)
234 		{
235 			{
236 				B3_PROFILE("setAabb");
237 				m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU);
238 				for (int i = 0; i < m_data->m_allAabbsCPU.size(); i++)
239 				{
240 					b3Vector3 aabbMin = b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0], m_data->m_allAabbsCPU[i].m_min[1], m_data->m_allAabbsCPU[i].m_min[2]);
241 					b3Vector3 aabbMax = b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0], m_data->m_allAabbsCPU[i].m_max[1], m_data->m_allAabbsCPU[i].m_max[2]);
242 					m_data->m_broadphaseDbvt->setAabb(i, aabbMin, aabbMax, 0);
243 				}
244 			}
245 
246 			{
247 				B3_PROFILE("calculateOverlappingPairs");
248 				m_data->m_broadphaseDbvt->calculateOverlappingPairs();
249 			}
250 			numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
251 		}
252 		else
253 		{
254 			if (gUseCalculateOverlappingPairsHost)
255 			{
256 				m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs);
257 			}
258 			else
259 			{
260 				m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs);
261 			}
262 			numPairs = m_data->m_broadphaseSap->getNumOverlap();
263 		}
264 	}
265 
266 	//compute contact points
267 	//	printf("numPairs=%d\n",numPairs);
268 
269 	int numContacts = 0;
270 
271 	int numBodies = m_data->m_narrowphase->getNumRigidBodies();
272 
273 	if (numPairs)
274 	{
275 		cl_mem pairs = 0;
276 		cl_mem aabbsWS = 0;
277 		if (gUseDbvt)
278 		{
279 			B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
280 			m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
281 			pairs = m_data->m_overlappingPairsGPU->getBufferCL();
282 			aabbsWS = m_data->m_allAabbsGPU->getBufferCL();
283 		}
284 		else
285 		{
286 			pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer();
287 			aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS();
288 		}
289 
290 		m_data->m_overlappingPairsGPU->resize(numPairs);
291 
292 		//mark the contacts for each pair as 'unused'
293 		if (numPairs)
294 		{
295 			b3OpenCLArray<b3BroadphasePair> gpuPairs(this->m_data->m_context, m_data->m_queue);
296 			gpuPairs.setFromOpenCLBuffer(pairs, numPairs);
297 
298 			if (gClearPairsOnGpu)
299 			{
300 				//b3AlignedObjectArray<b3BroadphasePair> hostPairs;//just for debugging
301 				//gpuPairs.copyToHost(hostPairs);
302 
303 				b3LauncherCL launcher(m_data->m_queue, m_data->m_clearOverlappingPairsKernel, "clearOverlappingPairsKernel");
304 				launcher.setBuffer(pairs);
305 				launcher.setConst(numPairs);
306 				launcher.launch1D(numPairs);
307 
308 				//gpuPairs.copyToHost(hostPairs);
309 			}
310 			else
311 			{
312 				b3AlignedObjectArray<b3BroadphasePair> hostPairs;
313 				gpuPairs.copyToHost(hostPairs);
314 
315 				for (int i = 0; i < hostPairs.size(); i++)
316 				{
317 					hostPairs[i].z = 0xffffffff;
318 				}
319 
320 				gpuPairs.copyFromHost(hostPairs);
321 			}
322 		}
323 
324 		m_data->m_narrowphase->computeContacts(pairs, numPairs, aabbsWS, numBodies);
325 		numContacts = m_data->m_narrowphase->getNumContactsGpu();
326 
327 		if (gUseDbvt)
328 		{
329 			///store the cached information (contact locations in the 'z' component)
330 			B3_PROFILE("m_overlappingPairsGPU->copyToHost");
331 			m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
332 		}
333 		if (gDumpContactStats && numContacts)
334 		{
335 			m_data->m_narrowphase->getContactsGpu();
336 
337 			printf("numContacts = %d\n", numContacts);
338 
339 			int totalPoints = 0;
340 			const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU();
341 
342 			for (int i = 0; i < numContacts; i++)
343 			{
344 				totalPoints += contacts->getNPoints();
345 			}
346 			printf("totalPoints=%d\n", totalPoints);
347 		}
348 	}
349 
350 	//convert contact points to contact constraints
351 
352 	//solve constraints
353 
354 	b3OpenCLArray<b3RigidBodyData> gpuBodies(m_data->m_context, m_data->m_queue, 0, true);
355 	gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(), m_data->m_narrowphase->getNumRigidBodies());
356 	b3OpenCLArray<b3InertiaData> gpuInertias(m_data->m_context, m_data->m_queue, 0, true);
357 	gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(), m_data->m_narrowphase->getNumRigidBodies());
358 	b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context, m_data->m_queue, 0, true);
359 	gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(), m_data->m_narrowphase->getNumContactsGpu());
360 
361 	int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size();
362 	if (useBullet2CpuSolver && numJoints)
363 	{
364 		//	b3AlignedObjectArray<b3Contact4> hostContacts;
365 		//gpuContacts.copyToHost(hostContacts);
366 		{
367 			bool useGpu = m_data->m_joints.size() == 0;
368 
369 			//			b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
370 			//m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints);
371 			if (useGpu)
372 			{
373 				m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(), &gpuBodies, &gpuInertias, numJoints, m_data->m_gpuConstraints);
374 			}
375 			else
376 			{
377 				b3AlignedObjectArray<b3RigidBodyData> hostBodies;
378 				gpuBodies.copyToHost(hostBodies);
379 				b3AlignedObjectArray<b3InertiaData> hostInertias;
380 				gpuInertias.copyToHost(hostInertias);
381 
382 				b3TypedConstraint** joints = numJoints ? &m_data->m_joints[0] : 0;
383 				m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(), &hostBodies[0], &hostInertias[0], 0, 0, numJoints, joints);
384 				gpuBodies.copyFromHost(hostBodies);
385 			}
386 		}
387 	}
388 
389 	if (numContacts)
390 	{
391 #ifdef TEST_OTHER_GPU_SOLVER
392 
393 		if (gUseJacobi)
394 		{
395 			bool useGpu = true;
396 			if (useGpu)
397 			{
398 				bool forceHost = false;
399 				if (forceHost)
400 				{
401 					b3AlignedObjectArray<b3RigidBodyData> hostBodies;
402 					b3AlignedObjectArray<b3InertiaData> hostInertias;
403 					b3AlignedObjectArray<b3Contact4> hostContacts;
404 
405 					{
406 						B3_PROFILE("copyToHost");
407 						gpuBodies.copyToHost(hostBodies);
408 						gpuInertias.copyToHost(hostInertias);
409 						gpuContacts.copyToHost(hostContacts);
410 					}
411 
412 					{
413 						b3JacobiSolverInfo solverInfo;
414 						m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(), &hostContacts[0], hostContacts.size(), solverInfo);
415 					}
416 					{
417 						B3_PROFILE("copyFromHost");
418 						gpuBodies.copyFromHost(hostBodies);
419 					}
420 				}
421 				else
422 
423 				{
424 					int static0Index = m_data->m_narrowphase->getStatic0Index();
425 					b3JacobiSolverInfo solverInfo;
426 					//m_data->m_solver3->solveContacts(    >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo);
427 					//m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
428 					m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
429 				}
430 			}
431 			else
432 			{
433 				b3AlignedObjectArray<b3RigidBodyData> hostBodies;
434 				gpuBodies.copyToHost(hostBodies);
435 				b3AlignedObjectArray<b3InertiaData> hostInertias;
436 				gpuInertias.copyToHost(hostInertias);
437 				b3AlignedObjectArray<b3Contact4> hostContacts;
438 				gpuContacts.copyToHost(hostContacts);
439 				{
440 					//m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
441 				}
442 				gpuBodies.copyFromHost(hostBodies);
443 			}
444 		}
445 		else
446 #endif  //TEST_OTHER_GPU_SOLVER
447 		{
448 			int static0Index = m_data->m_narrowphase->getStatic0Index();
449 			m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
450 
451 			//m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
452 
453 			/*m_data->m_solver3->solveContactConstraintHost(
454 			(b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies,
455 			(b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias,
456 			(b3OpenCLArray<Constraint4>*) &gpuContacts,
457 			0,numContacts,256);
458 			*/
459 		}
460 	}
461 
462 	integrate(deltaTime);
463 }
464 
integrate(float timeStep)465 void b3GpuRigidBodyPipeline::integrate(float timeStep)
466 {
467 	//integrate
468 	int numBodies = m_data->m_narrowphase->getNumRigidBodies();
469 	float angularDamp = 0.99f;
470 
471 	if (gIntegrateOnCpu)
472 	{
473 		if (numBodies)
474 		{
475 			b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData();
476 			npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU);
477 
478 			b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0);
479 
480 			for (int nodeID = 0; nodeID < numBodies; nodeID++)
481 			{
482 				integrateSingleTransform(bodies, nodeID, timeStep, angularDamp, m_data->m_gravity);
483 			}
484 			npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU);
485 		}
486 	}
487 	else
488 	{
489 		b3LauncherCL launcher(m_data->m_queue, m_data->m_integrateTransformsKernel, "m_integrateTransformsKernel");
490 		launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
491 
492 		launcher.setConst(numBodies);
493 		launcher.setConst(timeStep);
494 		launcher.setConst(angularDamp);
495 		launcher.setConst(m_data->m_gravity);
496 		launcher.launch1D(numBodies);
497 	}
498 }
499 
setupGpuAabbsFull()500 void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
501 {
502 	cl_int ciErrNum = 0;
503 
504 	int numBodies = m_data->m_narrowphase->getNumRigidBodies();
505 	if (!numBodies)
506 		return;
507 
508 	if (gCalcWorldSpaceAabbOnCpu)
509 	{
510 		if (numBodies)
511 		{
512 			if (gUseDbvt)
513 			{
514 				m_data->m_allAabbsCPU.resize(numBodies);
515 				m_data->m_narrowphase->readbackAllBodiesToCpu();
516 				for (int i = 0; i < numBodies; i++)
517 				{
518 					b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_allAabbsCPU[0]);
519 				}
520 				m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
521 			}
522 			else
523 			{
524 				m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies);
525 				m_data->m_narrowphase->readbackAllBodiesToCpu();
526 				for (int i = 0; i < numBodies; i++)
527 				{
528 					b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_broadphaseSap->getAllAabbsCPU()[0]);
529 				}
530 				m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU());
531 				//m_data->m_broadphaseSap->writeAabbsToGpu();
532 			}
533 		}
534 	}
535 	else
536 	{
537 		//__kernel void initializeGpuAabbsFull(  const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
538 		b3LauncherCL launcher(m_data->m_queue, m_data->m_updateAabbsKernel, "m_updateAabbsKernel");
539 		launcher.setConst(numBodies);
540 		cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
541 		launcher.setBuffer(bodies);
542 		cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu();
543 		launcher.setBuffer(collidables);
544 		cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu();
545 		launcher.setBuffer(localAabbs);
546 
547 		cl_mem worldAabbs = 0;
548 		if (gUseDbvt)
549 		{
550 			worldAabbs = m_data->m_allAabbsGPU->getBufferCL();
551 		}
552 		else
553 		{
554 			worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS();
555 		}
556 		launcher.setBuffer(worldAabbs);
557 		launcher.launch1D(numBodies);
558 
559 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
560 	}
561 
562 	/*
563 	b3AlignedObjectArray<b3SapAabb> aabbs;
564 	m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs);
565 
566 	printf("numAabbs = %d\n",  aabbs.size());
567 
568 	for (int i=0;i<aabbs.size();i++)
569 	{
570 		printf("aabb[%d].m_min=%f,%f,%f,%d\n",i,aabbs[i].m_minVec[0],aabbs[i].m_minVec[1],aabbs[i].m_minVec[2],aabbs[i].m_minIndices[3]);
571 		printf("aabb[%d].m_max=%f,%f,%f,%d\n",i,aabbs[i].m_maxVec[0],aabbs[i].m_maxVec[1],aabbs[i].m_maxVec[2],aabbs[i].m_signedMaxIndices[3]);
572 
573 	};
574 	*/
575 }
576 
getBodyBuffer()577 cl_mem b3GpuRigidBodyPipeline::getBodyBuffer()
578 {
579 	return m_data->m_narrowphase->getBodiesGpu();
580 }
581 
getNumBodies() const582 int b3GpuRigidBodyPipeline::getNumBodies() const
583 {
584 	return m_data->m_narrowphase->getNumRigidBodies();
585 }
586 
setGravity(const float * grav)587 void b3GpuRigidBodyPipeline::setGravity(const float* grav)
588 {
589 	m_data->m_gravity.setValue(grav[0], grav[1], grav[2]);
590 }
591 
copyConstraintsToHost()592 void b3GpuRigidBodyPipeline::copyConstraintsToHost()
593 {
594 	m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
595 }
596 
writeAllInstancesToGpu()597 void b3GpuRigidBodyPipeline::writeAllInstancesToGpu()
598 {
599 	m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
600 	m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
601 }
602 
registerPhysicsInstance(float mass,const float * position,const float * orientation,int collidableIndex,int userIndex,bool writeInstanceToGpu)603 int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu)
604 {
605 	b3Vector3 aabbMin = b3MakeVector3(0, 0, 0), aabbMax = b3MakeVector3(0, 0, 0);
606 
607 	if (collidableIndex >= 0)
608 	{
609 		b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
610 		b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]);
611 		b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]);
612 
613 		b3Scalar margin = 0.01f;
614 		b3Transform t;
615 		t.setIdentity();
616 		t.setOrigin(b3MakeVector3(position[0], position[1], position[2]));
617 		t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3]));
618 		b3TransformAabb(localAabbMin, localAabbMax, margin, t, aabbMin, aabbMax);
619 	}
620 	else
621 	{
622 		b3Error("registerPhysicsInstance using invalid collidableIndex\n");
623 		return -1;
624 	}
625 
626 	bool writeToGpu = false;
627 	int bodyIndex = m_data->m_narrowphase->getNumRigidBodies();
628 	bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex, mass, position, orientation, &aabbMin.getX(), &aabbMax.getX(), writeToGpu);
629 
630 	if (bodyIndex >= 0)
631 	{
632 		if (gUseDbvt)
633 		{
634 			m_data->m_broadphaseDbvt->createProxy(aabbMin, aabbMax, bodyIndex, 0, 1, 1);
635 			b3SapAabb aabb;
636 			for (int i = 0; i < 3; i++)
637 			{
638 				aabb.m_min[i] = aabbMin[i];
639 				aabb.m_max[i] = aabbMax[i];
640 				aabb.m_minIndices[3] = bodyIndex;
641 			}
642 			m_data->m_allAabbsCPU.push_back(aabb);
643 			if (writeInstanceToGpu)
644 			{
645 				m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
646 			}
647 		}
648 		else
649 		{
650 			if (mass)
651 			{
652 				m_data->m_broadphaseSap->createProxy(aabbMin, aabbMax, bodyIndex, 1, 1);  //m_dispatcher);
653 			}
654 			else
655 			{
656 				m_data->m_broadphaseSap->createLargeProxy(aabbMin, aabbMax, bodyIndex, 1, 1);  //m_dispatcher);
657 			}
658 		}
659 	}
660 
661 	/*
662 	if (mass>0.f)
663 		m_numDynamicPhysicsInstances++;
664 
665 	m_numPhysicsInstances++;
666 	*/
667 
668 	return bodyIndex;
669 }
670 
castRays(const b3AlignedObjectArray<b3RayInfo> & rays,b3AlignedObjectArray<b3RayHit> & hitResults)671 void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults)
672 {
673 	this->m_data->m_raycaster->castRays(rays, hitResults,
674 										getNumBodies(), this->m_data->m_narrowphase->getBodiesCpu(),
675 										m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(),
676 										m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap);
677 }
678