1 /*
2 Copyright (c) 2013 Advanced Micro Devices, Inc.
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 //Originally written by Erwin Coumans
15
16 #include "b3GpuRigidBodyPipeline.h"
17 #include "b3GpuRigidBodyPipelineInternalData.h"
18 #include "kernels/integrateKernel.h"
19 #include "kernels/updateAabbsKernel.h"
20
21 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
22 #include "b3GpuNarrowPhase.h"
23 #include "Bullet3Geometry/b3AabbUtil.h"
24 #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
25 #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
26 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
27 #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
28 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h"
29 #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
30
31 //#define TEST_OTHER_GPU_SOLVER
32
33 #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
34 #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
35
36 bool useBullet2CpuSolver = true;
37
38 //choice of contact solver
39 bool gUseJacobi = false;
40 bool gUseDbvt = false;
41 bool gDumpContactStats = false;
42 bool gCalcWorldSpaceAabbOnCpu = false;
43 bool gUseCalculateOverlappingPairsHost = false;
44 bool gIntegrateOnCpu = false;
45 bool gClearPairsOnGpu = true;
46
47 #define TEST_OTHER_GPU_SOLVER 1
48 #ifdef TEST_OTHER_GPU_SOLVER
49 #include "b3GpuJacobiContactSolver.h"
50 #endif //TEST_OTHER_GPU_SOLVER
51
52 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
53 #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
54 #include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h"
55
56 #include "b3GpuPgsContactSolver.h"
57 #include "b3Solver.h"
58
59 #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
60 #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"
61
62 #include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
63 #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
64
b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device,cl_command_queue q,class b3GpuNarrowPhase * narrowphase,class b3GpuBroadphaseInterface * broadphaseSap,struct b3DynamicBvhBroadphase * broadphaseDbvt,const b3Config & config)65 b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
66 {
67 m_data = new b3GpuRigidBodyPipelineInternalData;
68 m_data->m_constraintUid = 0;
69 m_data->m_config = config;
70 m_data->m_context = ctx;
71 m_data->m_device = device;
72 m_data->m_queue = q;
73
74 m_data->m_solver = new b3PgsJacobiSolver(true); //new b3PgsJacobiSolver(true);
75 m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx, device, q, true); //new b3PgsJacobiSolver(true);
76
77 m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx, q, config.m_maxConvexBodies);
78 m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx, q, config.m_maxBroadphasePairs);
79
80 m_data->m_gpuConstraints = new b3OpenCLArray<b3GpuGenericConstraint>(ctx, q);
81 #ifdef TEST_OTHER_GPU_SOLVER
82 m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
83 #endif // TEST_OTHER_GPU_SOLVER
84
85 m_data->m_solver2 = new b3GpuPgsContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
86
87 m_data->m_raycaster = new b3GpuRaycast(ctx, device, q);
88
89 m_data->m_broadphaseDbvt = broadphaseDbvt;
90 m_data->m_broadphaseSap = broadphaseSap;
91 m_data->m_narrowphase = narrowphase;
92 m_data->m_gravity.setValue(0.f, -9.8f, 0.f);
93
94 cl_int errNum = 0;
95
96 {
97 cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, integrateKernelCL, &errNum, "", B3_RIGIDBODY_INTEGRATE_PATH);
98 b3Assert(errNum == CL_SUCCESS);
99 m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, integrateKernelCL, "integrateTransformsKernel", &errNum, prog);
100 b3Assert(errNum == CL_SUCCESS);
101 clReleaseProgram(prog);
102 }
103 {
104 cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, &errNum, "", B3_RIGIDBODY_UPDATEAABB_PATH);
105 b3Assert(errNum == CL_SUCCESS);
106 m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "initializeGpuAabbsFull", &errNum, prog);
107 b3Assert(errNum == CL_SUCCESS);
108
109 m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "clearOverlappingPairsKernel", &errNum, prog);
110 b3Assert(errNum == CL_SUCCESS);
111
112 clReleaseProgram(prog);
113 }
114 }
115
~b3GpuRigidBodyPipeline()116 b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline()
117 {
118 if (m_data->m_integrateTransformsKernel)
119 clReleaseKernel(m_data->m_integrateTransformsKernel);
120
121 if (m_data->m_updateAabbsKernel)
122 clReleaseKernel(m_data->m_updateAabbsKernel);
123
124 if (m_data->m_clearOverlappingPairsKernel)
125 clReleaseKernel(m_data->m_clearOverlappingPairsKernel);
126 delete m_data->m_raycaster;
127 delete m_data->m_solver;
128 delete m_data->m_allAabbsGPU;
129 delete m_data->m_gpuConstraints;
130 delete m_data->m_overlappingPairsGPU;
131
132 #ifdef TEST_OTHER_GPU_SOLVER
133 delete m_data->m_solver3;
134 #endif //TEST_OTHER_GPU_SOLVER
135
136 delete m_data->m_solver2;
137
138 delete m_data;
139 }
140
reset()141 void b3GpuRigidBodyPipeline::reset()
142 {
143 m_data->m_gpuConstraints->resize(0);
144 m_data->m_cpuConstraints.resize(0);
145 m_data->m_allAabbsGPU->resize(0);
146 m_data->m_allAabbsCPU.resize(0);
147 }
148
addConstraint(b3TypedConstraint * constraint)149 void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint)
150 {
151 m_data->m_joints.push_back(constraint);
152 }
153
removeConstraint(b3TypedConstraint * constraint)154 void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint)
155 {
156 m_data->m_joints.remove(constraint);
157 }
158
removeConstraintByUid(int uid)159 void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid)
160 {
161 m_data->m_gpuSolver->recomputeBatches();
162 //slow linear search
163 m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
164 //remove
165 for (int i = 0; i < m_data->m_cpuConstraints.size(); i++)
166 {
167 if (m_data->m_cpuConstraints[i].m_uid == uid)
168 {
169 //m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]);
170 m_data->m_cpuConstraints.swap(i, m_data->m_cpuConstraints.size() - 1);
171 m_data->m_cpuConstraints.pop_back();
172
173 break;
174 }
175 }
176
177 if (m_data->m_cpuConstraints.size())
178 {
179 m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
180 }
181 else
182 {
183 m_data->m_gpuConstraints->resize(0);
184 }
185 }
createPoint2PointConstraint(int bodyA,int bodyB,const float * pivotInA,const float * pivotInB,float breakingThreshold)186 int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold)
187 {
188 m_data->m_gpuSolver->recomputeBatches();
189 b3GpuGenericConstraint c;
190 c.m_uid = m_data->m_constraintUid;
191 m_data->m_constraintUid++;
192 c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
193 c.m_rbA = bodyA;
194 c.m_rbB = bodyB;
195 c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
196 c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
197 c.m_breakingImpulseThreshold = breakingThreshold;
198 c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE;
199 m_data->m_cpuConstraints.push_back(c);
200 return c.m_uid;
201 }
createFixedConstraint(int bodyA,int bodyB,const float * pivotInA,const float * pivotInB,const float * relTargetAB,float breakingThreshold)202 int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold)
203 {
204 m_data->m_gpuSolver->recomputeBatches();
205 b3GpuGenericConstraint c;
206 c.m_uid = m_data->m_constraintUid;
207 m_data->m_constraintUid++;
208 c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
209 c.m_rbA = bodyA;
210 c.m_rbB = bodyB;
211 c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
212 c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
213 c.m_relTargetAB.setValue(relTargetAB[0], relTargetAB[1], relTargetAB[2], relTargetAB[3]);
214 c.m_breakingImpulseThreshold = breakingThreshold;
215 c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE;
216
217 m_data->m_cpuConstraints.push_back(c);
218 return c.m_uid;
219 }
220
stepSimulation(float deltaTime)221 void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
222 {
223 //update worldspace AABBs from local AABB/worldtransform
224 {
225 B3_PROFILE("setupGpuAabbs");
226 setupGpuAabbsFull();
227 }
228
229 int numPairs = 0;
230
231 //compute overlapping pairs
232 {
233 if (gUseDbvt)
234 {
235 {
236 B3_PROFILE("setAabb");
237 m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU);
238 for (int i = 0; i < m_data->m_allAabbsCPU.size(); i++)
239 {
240 b3Vector3 aabbMin = b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0], m_data->m_allAabbsCPU[i].m_min[1], m_data->m_allAabbsCPU[i].m_min[2]);
241 b3Vector3 aabbMax = b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0], m_data->m_allAabbsCPU[i].m_max[1], m_data->m_allAabbsCPU[i].m_max[2]);
242 m_data->m_broadphaseDbvt->setAabb(i, aabbMin, aabbMax, 0);
243 }
244 }
245
246 {
247 B3_PROFILE("calculateOverlappingPairs");
248 m_data->m_broadphaseDbvt->calculateOverlappingPairs();
249 }
250 numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
251 }
252 else
253 {
254 if (gUseCalculateOverlappingPairsHost)
255 {
256 m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs);
257 }
258 else
259 {
260 m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs);
261 }
262 numPairs = m_data->m_broadphaseSap->getNumOverlap();
263 }
264 }
265
266 //compute contact points
267 // printf("numPairs=%d\n",numPairs);
268
269 int numContacts = 0;
270
271 int numBodies = m_data->m_narrowphase->getNumRigidBodies();
272
273 if (numPairs)
274 {
275 cl_mem pairs = 0;
276 cl_mem aabbsWS = 0;
277 if (gUseDbvt)
278 {
279 B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
280 m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
281 pairs = m_data->m_overlappingPairsGPU->getBufferCL();
282 aabbsWS = m_data->m_allAabbsGPU->getBufferCL();
283 }
284 else
285 {
286 pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer();
287 aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS();
288 }
289
290 m_data->m_overlappingPairsGPU->resize(numPairs);
291
292 //mark the contacts for each pair as 'unused'
293 if (numPairs)
294 {
295 b3OpenCLArray<b3BroadphasePair> gpuPairs(this->m_data->m_context, m_data->m_queue);
296 gpuPairs.setFromOpenCLBuffer(pairs, numPairs);
297
298 if (gClearPairsOnGpu)
299 {
300 //b3AlignedObjectArray<b3BroadphasePair> hostPairs;//just for debugging
301 //gpuPairs.copyToHost(hostPairs);
302
303 b3LauncherCL launcher(m_data->m_queue, m_data->m_clearOverlappingPairsKernel, "clearOverlappingPairsKernel");
304 launcher.setBuffer(pairs);
305 launcher.setConst(numPairs);
306 launcher.launch1D(numPairs);
307
308 //gpuPairs.copyToHost(hostPairs);
309 }
310 else
311 {
312 b3AlignedObjectArray<b3BroadphasePair> hostPairs;
313 gpuPairs.copyToHost(hostPairs);
314
315 for (int i = 0; i < hostPairs.size(); i++)
316 {
317 hostPairs[i].z = 0xffffffff;
318 }
319
320 gpuPairs.copyFromHost(hostPairs);
321 }
322 }
323
324 m_data->m_narrowphase->computeContacts(pairs, numPairs, aabbsWS, numBodies);
325 numContacts = m_data->m_narrowphase->getNumContactsGpu();
326
327 if (gUseDbvt)
328 {
329 ///store the cached information (contact locations in the 'z' component)
330 B3_PROFILE("m_overlappingPairsGPU->copyToHost");
331 m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
332 }
333 if (gDumpContactStats && numContacts)
334 {
335 m_data->m_narrowphase->getContactsGpu();
336
337 printf("numContacts = %d\n", numContacts);
338
339 int totalPoints = 0;
340 const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU();
341
342 for (int i = 0; i < numContacts; i++)
343 {
344 totalPoints += contacts->getNPoints();
345 }
346 printf("totalPoints=%d\n", totalPoints);
347 }
348 }
349
350 //convert contact points to contact constraints
351
352 //solve constraints
353
354 b3OpenCLArray<b3RigidBodyData> gpuBodies(m_data->m_context, m_data->m_queue, 0, true);
355 gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(), m_data->m_narrowphase->getNumRigidBodies());
356 b3OpenCLArray<b3InertiaData> gpuInertias(m_data->m_context, m_data->m_queue, 0, true);
357 gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(), m_data->m_narrowphase->getNumRigidBodies());
358 b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context, m_data->m_queue, 0, true);
359 gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(), m_data->m_narrowphase->getNumContactsGpu());
360
361 int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size();
362 if (useBullet2CpuSolver && numJoints)
363 {
364 // b3AlignedObjectArray<b3Contact4> hostContacts;
365 //gpuContacts.copyToHost(hostContacts);
366 {
367 bool useGpu = m_data->m_joints.size() == 0;
368
369 // b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
370 //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints);
371 if (useGpu)
372 {
373 m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(), &gpuBodies, &gpuInertias, numJoints, m_data->m_gpuConstraints);
374 }
375 else
376 {
377 b3AlignedObjectArray<b3RigidBodyData> hostBodies;
378 gpuBodies.copyToHost(hostBodies);
379 b3AlignedObjectArray<b3InertiaData> hostInertias;
380 gpuInertias.copyToHost(hostInertias);
381
382 b3TypedConstraint** joints = numJoints ? &m_data->m_joints[0] : 0;
383 m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(), &hostBodies[0], &hostInertias[0], 0, 0, numJoints, joints);
384 gpuBodies.copyFromHost(hostBodies);
385 }
386 }
387 }
388
389 if (numContacts)
390 {
391 #ifdef TEST_OTHER_GPU_SOLVER
392
393 if (gUseJacobi)
394 {
395 bool useGpu = true;
396 if (useGpu)
397 {
398 bool forceHost = false;
399 if (forceHost)
400 {
401 b3AlignedObjectArray<b3RigidBodyData> hostBodies;
402 b3AlignedObjectArray<b3InertiaData> hostInertias;
403 b3AlignedObjectArray<b3Contact4> hostContacts;
404
405 {
406 B3_PROFILE("copyToHost");
407 gpuBodies.copyToHost(hostBodies);
408 gpuInertias.copyToHost(hostInertias);
409 gpuContacts.copyToHost(hostContacts);
410 }
411
412 {
413 b3JacobiSolverInfo solverInfo;
414 m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(), &hostContacts[0], hostContacts.size(), solverInfo);
415 }
416 {
417 B3_PROFILE("copyFromHost");
418 gpuBodies.copyFromHost(hostBodies);
419 }
420 }
421 else
422
423 {
424 int static0Index = m_data->m_narrowphase->getStatic0Index();
425 b3JacobiSolverInfo solverInfo;
426 //m_data->m_solver3->solveContacts( >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo);
427 //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
428 m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
429 }
430 }
431 else
432 {
433 b3AlignedObjectArray<b3RigidBodyData> hostBodies;
434 gpuBodies.copyToHost(hostBodies);
435 b3AlignedObjectArray<b3InertiaData> hostInertias;
436 gpuInertias.copyToHost(hostInertias);
437 b3AlignedObjectArray<b3Contact4> hostContacts;
438 gpuContacts.copyToHost(hostContacts);
439 {
440 //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
441 }
442 gpuBodies.copyFromHost(hostBodies);
443 }
444 }
445 else
446 #endif //TEST_OTHER_GPU_SOLVER
447 {
448 int static0Index = m_data->m_narrowphase->getStatic0Index();
449 m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
450
451 //m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
452
453 /*m_data->m_solver3->solveContactConstraintHost(
454 (b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies,
455 (b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias,
456 (b3OpenCLArray<Constraint4>*) &gpuContacts,
457 0,numContacts,256);
458 */
459 }
460 }
461
462 integrate(deltaTime);
463 }
464
integrate(float timeStep)465 void b3GpuRigidBodyPipeline::integrate(float timeStep)
466 {
467 //integrate
468 int numBodies = m_data->m_narrowphase->getNumRigidBodies();
469 float angularDamp = 0.99f;
470
471 if (gIntegrateOnCpu)
472 {
473 if (numBodies)
474 {
475 b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData();
476 npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU);
477
478 b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0);
479
480 for (int nodeID = 0; nodeID < numBodies; nodeID++)
481 {
482 integrateSingleTransform(bodies, nodeID, timeStep, angularDamp, m_data->m_gravity);
483 }
484 npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU);
485 }
486 }
487 else
488 {
489 b3LauncherCL launcher(m_data->m_queue, m_data->m_integrateTransformsKernel, "m_integrateTransformsKernel");
490 launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
491
492 launcher.setConst(numBodies);
493 launcher.setConst(timeStep);
494 launcher.setConst(angularDamp);
495 launcher.setConst(m_data->m_gravity);
496 launcher.launch1D(numBodies);
497 }
498 }
499
setupGpuAabbsFull()500 void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
501 {
502 cl_int ciErrNum = 0;
503
504 int numBodies = m_data->m_narrowphase->getNumRigidBodies();
505 if (!numBodies)
506 return;
507
508 if (gCalcWorldSpaceAabbOnCpu)
509 {
510 if (numBodies)
511 {
512 if (gUseDbvt)
513 {
514 m_data->m_allAabbsCPU.resize(numBodies);
515 m_data->m_narrowphase->readbackAllBodiesToCpu();
516 for (int i = 0; i < numBodies; i++)
517 {
518 b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_allAabbsCPU[0]);
519 }
520 m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
521 }
522 else
523 {
524 m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies);
525 m_data->m_narrowphase->readbackAllBodiesToCpu();
526 for (int i = 0; i < numBodies; i++)
527 {
528 b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_broadphaseSap->getAllAabbsCPU()[0]);
529 }
530 m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU());
531 //m_data->m_broadphaseSap->writeAabbsToGpu();
532 }
533 }
534 }
535 else
536 {
537 //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
538 b3LauncherCL launcher(m_data->m_queue, m_data->m_updateAabbsKernel, "m_updateAabbsKernel");
539 launcher.setConst(numBodies);
540 cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
541 launcher.setBuffer(bodies);
542 cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu();
543 launcher.setBuffer(collidables);
544 cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu();
545 launcher.setBuffer(localAabbs);
546
547 cl_mem worldAabbs = 0;
548 if (gUseDbvt)
549 {
550 worldAabbs = m_data->m_allAabbsGPU->getBufferCL();
551 }
552 else
553 {
554 worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS();
555 }
556 launcher.setBuffer(worldAabbs);
557 launcher.launch1D(numBodies);
558
559 oclCHECKERROR(ciErrNum, CL_SUCCESS);
560 }
561
562 /*
563 b3AlignedObjectArray<b3SapAabb> aabbs;
564 m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs);
565
566 printf("numAabbs = %d\n", aabbs.size());
567
568 for (int i=0;i<aabbs.size();i++)
569 {
570 printf("aabb[%d].m_min=%f,%f,%f,%d\n",i,aabbs[i].m_minVec[0],aabbs[i].m_minVec[1],aabbs[i].m_minVec[2],aabbs[i].m_minIndices[3]);
571 printf("aabb[%d].m_max=%f,%f,%f,%d\n",i,aabbs[i].m_maxVec[0],aabbs[i].m_maxVec[1],aabbs[i].m_maxVec[2],aabbs[i].m_signedMaxIndices[3]);
572
573 };
574 */
575 }
576
getBodyBuffer()577 cl_mem b3GpuRigidBodyPipeline::getBodyBuffer()
578 {
579 return m_data->m_narrowphase->getBodiesGpu();
580 }
581
getNumBodies() const582 int b3GpuRigidBodyPipeline::getNumBodies() const
583 {
584 return m_data->m_narrowphase->getNumRigidBodies();
585 }
586
setGravity(const float * grav)587 void b3GpuRigidBodyPipeline::setGravity(const float* grav)
588 {
589 m_data->m_gravity.setValue(grav[0], grav[1], grav[2]);
590 }
591
copyConstraintsToHost()592 void b3GpuRigidBodyPipeline::copyConstraintsToHost()
593 {
594 m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
595 }
596
writeAllInstancesToGpu()597 void b3GpuRigidBodyPipeline::writeAllInstancesToGpu()
598 {
599 m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
600 m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
601 }
602
registerPhysicsInstance(float mass,const float * position,const float * orientation,int collidableIndex,int userIndex,bool writeInstanceToGpu)603 int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu)
604 {
605 b3Vector3 aabbMin = b3MakeVector3(0, 0, 0), aabbMax = b3MakeVector3(0, 0, 0);
606
607 if (collidableIndex >= 0)
608 {
609 b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
610 b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]);
611 b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]);
612
613 b3Scalar margin = 0.01f;
614 b3Transform t;
615 t.setIdentity();
616 t.setOrigin(b3MakeVector3(position[0], position[1], position[2]));
617 t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3]));
618 b3TransformAabb(localAabbMin, localAabbMax, margin, t, aabbMin, aabbMax);
619 }
620 else
621 {
622 b3Error("registerPhysicsInstance using invalid collidableIndex\n");
623 return -1;
624 }
625
626 bool writeToGpu = false;
627 int bodyIndex = m_data->m_narrowphase->getNumRigidBodies();
628 bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex, mass, position, orientation, &aabbMin.getX(), &aabbMax.getX(), writeToGpu);
629
630 if (bodyIndex >= 0)
631 {
632 if (gUseDbvt)
633 {
634 m_data->m_broadphaseDbvt->createProxy(aabbMin, aabbMax, bodyIndex, 0, 1, 1);
635 b3SapAabb aabb;
636 for (int i = 0; i < 3; i++)
637 {
638 aabb.m_min[i] = aabbMin[i];
639 aabb.m_max[i] = aabbMax[i];
640 aabb.m_minIndices[3] = bodyIndex;
641 }
642 m_data->m_allAabbsCPU.push_back(aabb);
643 if (writeInstanceToGpu)
644 {
645 m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
646 }
647 }
648 else
649 {
650 if (mass)
651 {
652 m_data->m_broadphaseSap->createProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher);
653 }
654 else
655 {
656 m_data->m_broadphaseSap->createLargeProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher);
657 }
658 }
659 }
660
661 /*
662 if (mass>0.f)
663 m_numDynamicPhysicsInstances++;
664
665 m_numPhysicsInstances++;
666 */
667
668 return bodyIndex;
669 }
670
castRays(const b3AlignedObjectArray<b3RayInfo> & rays,b3AlignedObjectArray<b3RayHit> & hitResults)671 void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults)
672 {
673 this->m_data->m_raycaster->castRays(rays, hitResults,
674 getNumBodies(), this->m_data->m_narrowphase->getBodiesCpu(),
675 m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(),
676 m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap);
677 }
678