1 /*
2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org
4
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
10
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15
16 bool findSeparatingAxisOnGpu = true;
17 bool splitSearchSepAxisConcave = false;
18 bool splitSearchSepAxisConvex = true;
19 bool useMprGpu = true; //use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling...
20 bool bvhTraversalKernelGPU = true;
21 bool findConcaveSeparatingAxisKernelGPU = true;
22 bool clipConcaveFacesAndFindContactsCPU = false; //false;//true;
23 bool clipConvexFacesAndFindContactsCPU = false; //false;//true;
24 bool reduceConcaveContactsOnGPU = true; //false;
25 bool reduceConvexContactsOnGPU = true; //false;
26 bool findConvexClippingFacesGPU = true;
27 bool useGjk = false; ///option for CPU/host testing, when findSeparatingAxisOnGpu = false
28 bool useGjkContacts = false; //////option for CPU/host testing when findSeparatingAxisOnGpu = false
29
30 static int myframecount = 0; ///for testing
31
32 ///This file was written by Erwin Coumans
33 ///Separating axis rest based on work from Pierre Terdiman, see
34 ///And contact clipping based on work from Simon Hobbs
35
36 //#define B3_DEBUG_SAT_FACE
37
38 //#define CHECK_ON_HOST
39
40 #ifdef CHECK_ON_HOST
41 //#define PERSISTENT_CONTACTS_HOST
42 #endif
43
44 int b3g_actualSATPairTests = 0;
45
46 #include "b3ConvexHullContact.h"
47 #include <string.h> //memcpy
48 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
49 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h"
50
51 #include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h"
52 #include "Bullet3Geometry/b3AabbUtil.h"
53
54 typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
55
56 #include <float.h> //for FLT_MAX
57 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
58 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
59 //#include "AdlQuaternion.h"
60
61 #include "kernels/satKernels.h"
62 #include "kernels/mprKernels.h"
63
64 #include "kernels/satConcaveKernels.h"
65
66 #include "kernels/satClipHullContacts.h"
67 #include "kernels/bvhTraversal.h"
68 #include "kernels/primitiveContacts.h"
69
70 #include "Bullet3Geometry/b3AabbUtil.h"
71
72 #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl"
73 #define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl"
74
75 #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl"
76
77 #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl"
78 #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl"
79 #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl"
80
81 #ifndef __global
82 #define __global
83 #endif
84
85 #ifndef __kernel
86 #define __kernel
87 #endif
88
89 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h"
90 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h"
91 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h"
92 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h"
93
94 #define dot3F4 b3Dot
95
GpuSatCollision(cl_context ctx,cl_device_id device,cl_command_queue q)96 GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q)
97 : m_context(ctx),
98 m_device(device),
99 m_queue(q),
100
101 m_findSeparatingAxisKernel(0),
102 m_findSeparatingAxisVertexFaceKernel(0),
103 m_findSeparatingAxisEdgeEdgeKernel(0),
104 m_unitSphereDirections(m_context, m_queue),
105
106 m_totalContactsOut(m_context, m_queue),
107 m_sepNormals(m_context, m_queue),
108 m_dmins(m_context, m_queue),
109
110 m_hasSeparatingNormals(m_context, m_queue),
111 m_concaveSepNormals(m_context, m_queue),
112 m_concaveHasSeparatingNormals(m_context, m_queue),
113 m_numConcavePairsOut(m_context, m_queue),
114
115 m_gpuCompoundPairs(m_context, m_queue),
116
117 m_gpuCompoundSepNormals(m_context, m_queue),
118 m_gpuHasCompoundSepNormals(m_context, m_queue),
119
120 m_numCompoundPairsOut(m_context, m_queue)
121 {
122 m_totalContactsOut.push_back(0);
123
124 cl_int errNum = 0;
125
126 if (1)
127 {
128 const char* mprSrc = mprKernelsCL;
129
130 const char* srcConcave = satConcaveKernelsCL;
131 char flags[1024] = {0};
132 //#ifdef CL_PLATFORM_INTEL
133 // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl");
134 //#endif
135 m_mprPenetrationKernel = 0;
136 m_findSeparatingAxisUnitSphereKernel = 0;
137
138 if (useMprGpu)
139 {
140 cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH);
141 b3Assert(errNum == CL_SUCCESS);
142
143 m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg);
144 b3Assert(m_mprPenetrationKernel);
145 b3Assert(errNum == CL_SUCCESS);
146
147 m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg);
148 b3Assert(m_findSeparatingAxisUnitSphereKernel);
149 b3Assert(errNum == CL_SUCCESS);
150
151 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
152 m_unitSphereDirections.resize(numDirections);
153 m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true);
154 }
155
156 cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH);
157 b3Assert(errNum == CL_SUCCESS);
158
159 cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH);
160 b3Assert(errNum == CL_SUCCESS);
161
162 m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg);
163 b3Assert(m_findSeparatingAxisKernel);
164 b3Assert(errNum == CL_SUCCESS);
165
166 m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg);
167 b3Assert(m_findSeparatingAxisVertexFaceKernel);
168
169 m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg);
170 b3Assert(m_findSeparatingAxisVertexFaceKernel);
171
172 m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg);
173 b3Assert(m_findConcaveSeparatingAxisKernel);
174 b3Assert(errNum == CL_SUCCESS);
175
176 m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg);
177 b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel);
178 b3Assert(errNum == CL_SUCCESS);
179
180 m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg);
181 b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel);
182 b3Assert(errNum == CL_SUCCESS);
183
184 m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg);
185 b3Assert(m_findCompoundPairsKernel);
186 b3Assert(errNum == CL_SUCCESS);
187 m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg);
188 b3Assert(m_processCompoundPairsKernel);
189 b3Assert(errNum == CL_SUCCESS);
190 }
191
192 if (1)
193 {
194 const char* srcClip = satClipKernelsCL;
195
196 char flags[1024] = {0};
197 //#ifdef CL_PLATFORM_INTEL
198 // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
199 //#endif
200
201 cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH);
202 b3Assert(errNum == CL_SUCCESS);
203
204 m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg);
205 b3Assert(errNum == CL_SUCCESS);
206
207 m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg);
208 b3Assert(errNum == CL_SUCCESS);
209
210 m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg);
211 b3Assert(errNum == CL_SUCCESS);
212
213 m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg);
214 b3Assert(errNum == CL_SUCCESS);
215
216 m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg);
217 b3Assert(errNum == CL_SUCCESS);
218
219 // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
220 // b3Assert(errNum==CL_SUCCESS);
221
222 m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip,
223 "newContactReductionKernel", &errNum, satClipContactsProg);
224 b3Assert(errNum == CL_SUCCESS);
225 }
226 else
227 {
228 m_clipHullHullKernel = 0;
229 m_clipCompoundsHullHullKernel = 0;
230 m_findClippingFacesKernel = 0;
231 m_newContactReductionKernel = 0;
232 m_clipFacesAndFindContacts = 0;
233 m_clipHullHullConcaveConvexKernel = 0;
234 // m_extractManifoldAndAddContactKernel = 0;
235 }
236
237 if (1)
238 {
239 const char* srcBvh = bvhTraversalKernelCL;
240 cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH);
241 b3Assert(errNum == CL_SUCCESS);
242
243 m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, "");
244 b3Assert(errNum == CL_SUCCESS);
245 }
246
247 {
248 const char* primitiveContactsSrc = primitiveContactsKernelsCL;
249 cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH);
250 b3Assert(errNum == CL_SUCCESS);
251
252 m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, "");
253 b3Assert(errNum == CL_SUCCESS);
254
255 m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg);
256 b3Assert(errNum == CL_SUCCESS);
257 b3Assert(m_findConcaveSphereContactsKernel);
258
259 m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, "");
260 b3Assert(errNum == CL_SUCCESS);
261 b3Assert(m_processCompoundPairsPrimitivesKernel);
262 }
263 }
264
~GpuSatCollision()265 GpuSatCollision::~GpuSatCollision()
266 {
267 if (m_findSeparatingAxisVertexFaceKernel)
268 clReleaseKernel(m_findSeparatingAxisVertexFaceKernel);
269
270 if (m_findSeparatingAxisEdgeEdgeKernel)
271 clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel);
272
273 if (m_findSeparatingAxisUnitSphereKernel)
274 clReleaseKernel(m_findSeparatingAxisUnitSphereKernel);
275
276 if (m_mprPenetrationKernel)
277 clReleaseKernel(m_mprPenetrationKernel);
278
279 if (m_findSeparatingAxisKernel)
280 clReleaseKernel(m_findSeparatingAxisKernel);
281
282 if (m_findConcaveSeparatingAxisVertexFaceKernel)
283 clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel);
284
285 if (m_findConcaveSeparatingAxisEdgeEdgeKernel)
286 clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel);
287
288 if (m_findConcaveSeparatingAxisKernel)
289 clReleaseKernel(m_findConcaveSeparatingAxisKernel);
290
291 if (m_findCompoundPairsKernel)
292 clReleaseKernel(m_findCompoundPairsKernel);
293
294 if (m_processCompoundPairsKernel)
295 clReleaseKernel(m_processCompoundPairsKernel);
296
297 if (m_findClippingFacesKernel)
298 clReleaseKernel(m_findClippingFacesKernel);
299
300 if (m_clipFacesAndFindContacts)
301 clReleaseKernel(m_clipFacesAndFindContacts);
302 if (m_newContactReductionKernel)
303 clReleaseKernel(m_newContactReductionKernel);
304 if (m_primitiveContactsKernel)
305 clReleaseKernel(m_primitiveContactsKernel);
306
307 if (m_findConcaveSphereContactsKernel)
308 clReleaseKernel(m_findConcaveSphereContactsKernel);
309
310 if (m_processCompoundPairsPrimitivesKernel)
311 clReleaseKernel(m_processCompoundPairsPrimitivesKernel);
312
313 if (m_clipHullHullKernel)
314 clReleaseKernel(m_clipHullHullKernel);
315 if (m_clipCompoundsHullHullKernel)
316 clReleaseKernel(m_clipCompoundsHullHullKernel);
317
318 if (m_clipHullHullConcaveConvexKernel)
319 clReleaseKernel(m_clipHullHullConcaveConvexKernel);
320 // if (m_extractManifoldAndAddContactKernel)
321 // clReleaseKernel(m_extractManifoldAndAddContactKernel);
322
323 if (m_bvhTraversalKernel)
324 clReleaseKernel(m_bvhTraversalKernel);
325 }
326
327 struct MyTriangleCallback : public b3NodeOverlapCallback
328 {
329 int m_bodyIndexA;
330 int m_bodyIndexB;
331
processNodeMyTriangleCallback332 virtual void processNode(int subPart, int triangleIndex)
333 {
334 printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB);
335 printf("triangleIndex %d\n", triangleIndex);
336 }
337 };
338
339 #define float4 b3Vector3
340 #define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w)
341
signedDistanceFromPointToPlane(const float4 & point,const float4 & planeEqn,float4 * closestPointOnFace)342 float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace)
343 {
344 float4 n = planeEqn;
345 n[3] = 0.f;
346 float dist = dot3F4(n, point) + planeEqn[3];
347 *closestPointOnFace = point - dist * n;
348 return dist;
349 }
350
351 #define cross3(a, b) (a.cross(b))
transform(const b3Vector3 * v,const b3Vector3 * pos,const b3Quaternion * orn)352 b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn)
353 {
354 b3Transform tr;
355 tr.setIdentity();
356 tr.setOrigin(*pos);
357 tr.setRotation(*orn);
358 b3Vector3 res = tr(*v);
359 return res;
360 }
361
IsPointInPolygon(const float4 & p,const b3GpuFace * face,const float4 * baseVertex,const int * convexIndices,float4 * out)362 inline bool IsPointInPolygon(const float4& p,
363 const b3GpuFace* face,
364 const float4* baseVertex,
365 const int* convexIndices,
366 float4* out)
367 {
368 float4 a;
369 float4 b;
370 float4 ab;
371 float4 ap;
372 float4 v;
373
374 float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f);
375
376 if (face->m_numIndices < 2)
377 return false;
378
379 float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]];
380 b = v0;
381
382 for (unsigned i = 0; i != face->m_numIndices; ++i)
383 {
384 a = b;
385 float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];
386 b = vi;
387 ab = b - a;
388 ap = p - a;
389 v = cross3(ab, plane);
390
391 if (b3Dot(ap, v) > 0.f)
392 {
393 float ab_m2 = b3Dot(ab, ab);
394 float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f;
395 if (rt <= 0.f)
396 {
397 *out = a;
398 }
399 else if (rt >= 1.f)
400 {
401 *out = b;
402 }
403 else
404 {
405 float s = 1.f - rt;
406 out[0].x = s * a.x + rt * b.x;
407 out[0].y = s * a.y + rt * b.y;
408 out[0].z = s * a.z + rt * b.z;
409 }
410 return false;
411 }
412 }
413 return true;
414 }
415
416 #define normalize3(a) (a.normalize())
417
extractManifoldSequentialGlobal(const float4 * p,int nPoints,const float4 & nearNormal,b3Int4 * contactIdx)418 int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
419 {
420 if (nPoints == 0)
421 return 0;
422
423 if (nPoints <= 4)
424 return nPoints;
425
426 if (nPoints > 64)
427 nPoints = 64;
428
429 float4 center = b3MakeVector3(0, 0, 0, 0);
430 {
431 for (int i = 0; i < nPoints; i++)
432 center += p[i];
433 center /= (float)nPoints;
434 }
435
436 // sample 4 directions
437
438 float4 aVector = p[0] - center;
439 float4 u = cross3(nearNormal, aVector);
440 float4 v = cross3(nearNormal, u);
441 u = normalize3(u);
442 v = normalize3(v);
443
444 //keep point with deepest penetration
445 float minW = FLT_MAX;
446
447 int minIndex = -1;
448
449 float4 maxDots;
450 maxDots.x = FLT_MIN;
451 maxDots.y = FLT_MIN;
452 maxDots.z = FLT_MIN;
453 maxDots.w = FLT_MIN;
454
455 // idx, distance
456 for (int ie = 0; ie < nPoints; ie++)
457 {
458 if (p[ie].w < minW)
459 {
460 minW = p[ie].w;
461 minIndex = ie;
462 }
463 float f;
464 float4 r = p[ie] - center;
465 f = dot3F4(u, r);
466 if (f < maxDots.x)
467 {
468 maxDots.x = f;
469 contactIdx[0].x = ie;
470 }
471
472 f = dot3F4(-u, r);
473 if (f < maxDots.y)
474 {
475 maxDots.y = f;
476 contactIdx[0].y = ie;
477 }
478
479 f = dot3F4(v, r);
480 if (f < maxDots.z)
481 {
482 maxDots.z = f;
483 contactIdx[0].z = ie;
484 }
485
486 f = dot3F4(-v, r);
487 if (f < maxDots.w)
488 {
489 maxDots.w = f;
490 contactIdx[0].w = ie;
491 }
492 }
493
494 if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
495 {
496 //replace the first contact with minimum (todo: replace contact with least penetration)
497 contactIdx[0].x = minIndex;
498 }
499
500 return 4;
501 }
502
503 #define MAX_VERTS 1024
504
project(const b3ConvexPolyhedronData & hull,const float4 & pos,const b3Quaternion & orn,const float4 & dir,const b3AlignedObjectArray<b3Vector3> & vertices,b3Scalar & min,b3Scalar & max)505 inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
506 {
507 min = FLT_MAX;
508 max = -FLT_MAX;
509 int numVerts = hull.m_numVertices;
510
511 const float4 localDir = b3QuatRotate(orn.inverse(), dir);
512
513 b3Scalar offset = dot3F4(pos, dir);
514
515 for (int i = 0; i < numVerts; i++)
516 {
517 //b3Vector3 pt = trans * vertices[m_vertexOffset+i];
518 //b3Scalar dp = pt.dot(dir);
519 //b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
520 b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir);
521 //b3Assert(dp==dpL);
522 if (dp < min) min = dp;
523 if (dp > max) max = dp;
524 }
525 if (min > max)
526 {
527 b3Scalar tmp = min;
528 min = max;
529 max = tmp;
530 }
531 min += offset;
532 max += offset;
533 }
534
TestSepAxis(const b3ConvexPolyhedronData & hullA,const b3ConvexPolyhedronData & hullB,const float4 & posA,const b3Quaternion & ornA,const float4 & posB,const b3Quaternion & ornB,const float4 & sep_axis,const b3AlignedObjectArray<b3Vector3> & verticesA,const b3AlignedObjectArray<b3Vector3> & verticesB,b3Scalar & depth)535 static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
536 const float4& posA, const b3Quaternion& ornA,
537 const float4& posB, const b3Quaternion& ornB,
538 const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth)
539 {
540 b3Scalar Min0, Max0;
541 b3Scalar Min1, Max1;
542 project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0);
543 project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1);
544
545 if (Max0 < Min1 || Max1 < Min0)
546 return false;
547
548 b3Scalar d0 = Max0 - Min1;
549 assert(d0 >= 0.0f);
550 b3Scalar d1 = Max1 - Min0;
551 assert(d1 >= 0.0f);
552 depth = d0 < d1 ? d0 : d1;
553 return true;
554 }
555
IsAlmostZero(const b3Vector3 & v)556 inline bool IsAlmostZero(const b3Vector3& v)
557 {
558 if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false;
559 return true;
560 }
561
findSeparatingAxis(const b3ConvexPolyhedronData & hullA,const b3ConvexPolyhedronData & hullB,const float4 & posA1,const b3Quaternion & ornA,const float4 & posB1,const b3Quaternion & ornB,const b3AlignedObjectArray<b3Vector3> & verticesA,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,const b3AlignedObjectArray<b3Vector3> & verticesB,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesB,const b3AlignedObjectArray<b3GpuFace> & facesB,const b3AlignedObjectArray<int> & indicesB,b3Vector3 & sep)562 static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
563 const float4& posA1,
564 const b3Quaternion& ornA,
565 const float4& posB1,
566 const b3Quaternion& ornB,
567 const b3AlignedObjectArray<b3Vector3>& verticesA,
568 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
569 const b3AlignedObjectArray<b3GpuFace>& facesA,
570 const b3AlignedObjectArray<int>& indicesA,
571 const b3AlignedObjectArray<b3Vector3>& verticesB,
572 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
573 const b3AlignedObjectArray<b3GpuFace>& facesB,
574 const b3AlignedObjectArray<int>& indicesB,
575
576 b3Vector3& sep)
577 {
578 B3_PROFILE("findSeparatingAxis");
579
580 b3g_actualSATPairTests++;
581 float4 posA = posA1;
582 posA.w = 0.f;
583 float4 posB = posB1;
584 posB.w = 0.f;
585 //#ifdef TEST_INTERNAL_OBJECTS
586 float4 c0local = (float4&)hullA.m_localCenter;
587 float4 c0 = transform(&c0local, &posA, &ornA);
588 float4 c1local = (float4&)hullB.m_localCenter;
589 float4 c1 = transform(&c1local, &posB, &ornB);
590 const float4 deltaC2 = c0 - c1;
591 //#endif
592
593 b3Scalar dmin = FLT_MAX;
594 int curPlaneTests = 0;
595
596 int numFacesA = hullA.m_numFaces;
597 // Test normals from hullA
598 for (int i = 0; i < numFacesA; i++)
599 {
600 const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane;
601 float4 faceANormalWS = b3QuatRotate(ornA, normal);
602
603 if (dot3F4(deltaC2, faceANormalWS) < 0)
604 faceANormalWS *= -1.f;
605
606 curPlaneTests++;
607 #ifdef TEST_INTERNAL_OBJECTS
608 gExpectedNbTests++;
609 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
610 continue;
611 gActualNbTests++;
612 #endif
613
614 b3Scalar d;
615 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d))
616 return false;
617
618 if (d < dmin)
619 {
620 dmin = d;
621 sep = (b3Vector3&)faceANormalWS;
622 }
623 }
624
625 int numFacesB = hullB.m_numFaces;
626 // Test normals from hullB
627 for (int i = 0; i < numFacesB; i++)
628 {
629 float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane;
630 float4 WorldNormal = b3QuatRotate(ornB, normal);
631
632 if (dot3F4(deltaC2, WorldNormal) < 0)
633 {
634 WorldNormal *= -1.f;
635 }
636 curPlaneTests++;
637 #ifdef TEST_INTERNAL_OBJECTS
638 gExpectedNbTests++;
639 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin))
640 continue;
641 gActualNbTests++;
642 #endif
643
644 b3Scalar d;
645 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d))
646 return false;
647
648 if (d < dmin)
649 {
650 dmin = d;
651 sep = (b3Vector3&)WorldNormal;
652 }
653 }
654
655 int curEdgeEdge = 0;
656 // Test edges
657 for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++)
658 {
659 const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0];
660 float4 edge0World = b3QuatRotate(ornA, (float4&)edge0);
661
662 for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++)
663 {
664 const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1];
665 float4 edge1World = b3QuatRotate(ornB, (float4&)edge1);
666
667 float4 crossje = cross3(edge0World, edge1World);
668
669 curEdgeEdge++;
670 if (!IsAlmostZero((b3Vector3&)crossje))
671 {
672 crossje = normalize3(crossje);
673 if (dot3F4(deltaC2, crossje) < 0)
674 crossje *= -1.f;
675
676 #ifdef TEST_INTERNAL_OBJECTS
677 gExpectedNbTests++;
678 if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin))
679 continue;
680 gActualNbTests++;
681 #endif
682
683 b3Scalar dist;
684 if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist))
685 return false;
686
687 if (dist < dmin)
688 {
689 dmin = dist;
690 sep = (b3Vector3&)crossje;
691 }
692 }
693 }
694 }
695
696 if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f)
697 sep = -sep;
698
699 return true;
700 }
701
findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData * hullA,__global const b3ConvexPolyhedronData * hullB,const b3Float4 & posA1,const b3Quat & ornA,const b3Float4 & posB1,const b3Quat & ornB,const b3Float4 & DeltaC2,__global const b3AlignedObjectArray<float4> & vertices,__global const b3AlignedObjectArray<float4> & uniqueEdges,__global const b3AlignedObjectArray<b3GpuFace> & faces,__global const b3AlignedObjectArray<int> & indices,float4 * sep,float * dmin)702 bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
703 const b3Float4& posA1,
704 const b3Quat& ornA,
705 const b3Float4& posB1,
706 const b3Quat& ornB,
707 const b3Float4& DeltaC2,
708 __global const b3AlignedObjectArray<float4>& vertices,
709 __global const b3AlignedObjectArray<float4>& uniqueEdges,
710 __global const b3AlignedObjectArray<b3GpuFace>& faces,
711 __global const b3AlignedObjectArray<int>& indices,
712 float4* sep,
713 float* dmin)
714 {
715 // int i = get_global_id(0);
716
717 float4 posA = posA1;
718 posA.w = 0.f;
719 float4 posB = posB1;
720 posB.w = 0.f;
721
722 //int curPlaneTests=0;
723
724 int curEdgeEdge = 0;
725 // Test edges
726 for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++)
727 {
728 const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0];
729 float4 edge0World = b3QuatRotate(ornA, edge0);
730
731 for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++)
732 {
733 const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1];
734 float4 edge1World = b3QuatRotate(ornB, edge1);
735
736 float4 crossje = cross3(edge0World, edge1World);
737
738 curEdgeEdge++;
739 if (!IsAlmostZero(crossje))
740 {
741 crossje = normalize3(crossje);
742 if (dot3F4(DeltaC2, crossje) < 0)
743 crossje *= -1.f;
744
745 float dist;
746 bool result = true;
747 {
748 float Min0, Max0;
749 float Min1, Max1;
750 project(*hullA, posA, ornA, crossje, vertices, Min0, Max0);
751 project(*hullB, posB, ornB, crossje, vertices, Min1, Max1);
752
753 if (Max0 < Min1 || Max1 < Min0)
754 result = false;
755
756 float d0 = Max0 - Min1;
757 float d1 = Max1 - Min0;
758 dist = d0 < d1 ? d0 : d1;
759 result = true;
760 }
761
762 if (dist < *dmin)
763 {
764 *dmin = dist;
765 *sep = crossje;
766 }
767 }
768 }
769 }
770
771 if ((dot3F4(-DeltaC2, *sep)) > 0.0f)
772 {
773 *sep = -(*sep);
774 }
775 return true;
776 }
777
lerp3(const float4 & a,const float4 & b,float t)778 __inline float4 lerp3(const float4& a, const float4& b, float t)
779 {
780 return b3MakeVector3(a.x + (b.x - a.x) * t,
781 a.y + (b.y - a.y) * t,
782 a.z + (b.z - a.z) * t,
783 0.f);
784 }
785
786 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
clipFace(const float4 * pVtxIn,int numVertsIn,float4 & planeNormalWS,float planeEqWS,float4 * ppVtxOut)787 int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut)
788 {
789 int ve;
790 float ds, de;
791 int numVertsOut = 0;
792 if (numVertsIn < 2)
793 return 0;
794
795 float4 firstVertex = pVtxIn[numVertsIn - 1];
796 float4 endVertex = pVtxIn[0];
797
798 ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS;
799
800 for (ve = 0; ve < numVertsIn; ve++)
801 {
802 endVertex = pVtxIn[ve];
803
804 de = dot3F4(planeNormalWS, endVertex) + planeEqWS;
805
806 if (ds < 0)
807 {
808 if (de < 0)
809 {
810 // Start < 0, end < 0, so output endVertex
811 ppVtxOut[numVertsOut++] = endVertex;
812 }
813 else
814 {
815 // Start < 0, end >= 0, so output intersection
816 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
817 }
818 }
819 else
820 {
821 if (de < 0)
822 {
823 // Start >= 0, end < 0 so output intersection and end
824 ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
825 ppVtxOut[numVertsOut++] = endVertex;
826 }
827 }
828 firstVertex = endVertex;
829 ds = de;
830 }
831 return numVertsOut;
832 }
833
clipFaceAgainstHull(const float4 & separatingNormal,const b3ConvexPolyhedronData * hullA,const float4 & posA,const b3Quaternion & ornA,float4 * worldVertsB1,int numWorldVertsB1,float4 * worldVertsB2,int capacityWorldVertsB2,const float minDist,float maxDist,const b3AlignedObjectArray<float4> & verticesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,float4 * contactsOut,int contactCapacity)834 int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA,
835 const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1,
836 float4* worldVertsB2, int capacityWorldVertsB2,
837 const float minDist, float maxDist,
838 const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
839 //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB,
840 float4* contactsOut,
841 int contactCapacity)
842 {
843 int numContactsOut = 0;
844
845 float4* pVtxIn = worldVertsB1;
846 float4* pVtxOut = worldVertsB2;
847
848 int numVertsIn = numWorldVertsB1;
849 int numVertsOut = 0;
850
851 int closestFaceA = -1;
852 {
853 float dmin = FLT_MAX;
854 for (int face = 0; face < hullA->m_numFaces; face++)
855 {
856 const float4 Normal = b3MakeVector3(
857 facesA[hullA->m_faceOffset + face].m_plane.x,
858 facesA[hullA->m_faceOffset + face].m_plane.y,
859 facesA[hullA->m_faceOffset + face].m_plane.z, 0.f);
860 const float4 faceANormalWS = b3QuatRotate(ornA, Normal);
861
862 float d = dot3F4(faceANormalWS, separatingNormal);
863 if (d < dmin)
864 {
865 dmin = d;
866 closestFaceA = face;
867 }
868 }
869 }
870 if (closestFaceA < 0)
871 return numContactsOut;
872
873 b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA];
874
875 // clip polygon to back of planes of all faces of hull A that are adjacent to witness face
876 // int numContacts = numWorldVertsB1;
877 int numVerticesA = polyA.m_numIndices;
878 for (int e0 = 0; e0 < numVerticesA; e0++)
879 {
880 const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]];
881 const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]];
882 const float4 edge0 = a - b;
883 const float4 WorldEdge0 = b3QuatRotate(ornA, edge0);
884 float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
885 float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA);
886
887 float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1);
888 float4 worldA1 = transform(&a, &posA, &ornA);
889 float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1);
890
891 float4 planeNormalWS = planeNormalWS1;
892 float planeEqWS = planeEqWS1;
893
894 //clip face
895 //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
896 numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut);
897
898 //btSwap(pVtxIn,pVtxOut);
899 float4* tmp = pVtxOut;
900 pVtxOut = pVtxIn;
901 pVtxIn = tmp;
902 numVertsIn = numVertsOut;
903 numVertsOut = 0;
904 }
905
906 // only keep points that are behind the witness face
907 {
908 float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
909 float localPlaneEq = polyA.m_plane.w;
910 float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal);
911 float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA);
912 for (int i = 0; i < numVertsIn; i++)
913 {
914 float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS;
915 if (depth <= minDist)
916 {
917 depth = minDist;
918 }
919 if (numContactsOut < contactCapacity)
920 {
921 if (depth <= maxDist)
922 {
923 float4 pointInWorld = pVtxIn[i];
924 //resultOut.addContactPoint(separatingNormal,point,depth);
925 contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);
926 //printf("depth=%f\n",depth);
927 }
928 }
929 else
930 {
931 b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity);
932 }
933 }
934 }
935
936 return numContactsOut;
937 }
938
clipHullAgainstHull(const float4 & separatingNormal,const b3ConvexPolyhedronData & hullA,const b3ConvexPolyhedronData & hullB,const float4 & posA,const b3Quaternion & ornA,const float4 & posB,const b3Quaternion & ornB,float4 * worldVertsB1,float4 * worldVertsB2,int capacityWorldVerts,const float minDist,float maxDist,const b3AlignedObjectArray<float4> & verticesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,const b3AlignedObjectArray<float4> & verticesB,const b3AlignedObjectArray<b3GpuFace> & facesB,const b3AlignedObjectArray<int> & indicesB,float4 * contactsOut,int contactCapacity)939 static int clipHullAgainstHull(const float4& separatingNormal,
940 const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
941 const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB,
942 float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
943 const float minDist, float maxDist,
944 const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
945 const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB,
946
947 float4* contactsOut,
948 int contactCapacity)
949 {
950 int numContactsOut = 0;
951 int numWorldVertsB1 = 0;
952
953 B3_PROFILE("clipHullAgainstHull");
954
955 // float curMaxDist=maxDist;
956 int closestFaceB = -1;
957 float dmax = -FLT_MAX;
958
959 {
960 //B3_PROFILE("closestFaceB");
961 if (hullB.m_numFaces != 1)
962 {
963 //printf("wtf\n");
964 }
965 static bool once = true;
966 //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
967
968 for (int face = 0; face < hullB.m_numFaces; face++)
969 {
970 #ifdef BT_DEBUG_SAT_FACE
971 if (once)
972 printf("face %d\n", face);
973 const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face];
974 if (once)
975 {
976 for (int i = 0; i < faceB->m_numIndices; i++)
977 {
978 float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]];
979 printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z);
980 }
981 }
982 #endif //BT_DEBUG_SAT_FACE \
983 //if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
984 {
985 const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x,
986 facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f);
987 const float4 WorldNormal = b3QuatRotate(ornB, Normal);
988 #ifdef BT_DEBUG_SAT_FACE
989 if (once)
990 printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z);
991 #endif
992 float d = dot3F4(WorldNormal, separatingNormal);
993 if (d > dmax)
994 {
995 dmax = d;
996 closestFaceB = face;
997 }
998 }
999 }
1000 once = false;
1001 }
1002
1003 b3Assert(closestFaceB >= 0);
1004 {
1005 //B3_PROFILE("worldVertsB1");
1006 const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB];
1007 const int numVertices = polyB.m_numIndices;
1008 for (int e0 = 0; e0 < numVertices; e0++)
1009 {
1010 const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]];
1011 worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB);
1012 }
1013 }
1014
1015 if (closestFaceB >= 0)
1016 {
1017 //B3_PROFILE("clipFaceAgainstHull");
1018 numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA,
1019 posA, ornA,
1020 worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist,
1021 verticesA, facesA, indicesA,
1022 contactsOut, contactCapacity);
1023 }
1024
1025 return numContactsOut;
1026 }
1027
1028 #define PARALLEL_SUM(v, n) \
1029 for (int j = 1; j < n; j++) v[0] += v[j];
1030 #define PARALLEL_DO(execution, n) \
1031 for (int ie = 0; ie < n; ie++) \
1032 { \
1033 execution; \
1034 }
1035 #define REDUCE_MAX(v, n) \
1036 { \
1037 int i = 0; \
1038 for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \
1039 }
1040 #define REDUCE_MIN(v, n) \
1041 { \
1042 int i = 0; \
1043 for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \
1044 }
1045
extractManifold(const float4 * p,int nPoints,const float4 & nearNormal,b3Int4 * contactIdx)1046 int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
1047 {
1048 if (nPoints == 0)
1049 return 0;
1050
1051 if (nPoints <= 4)
1052 return nPoints;
1053
1054 if (nPoints > 64)
1055 nPoints = 64;
1056
1057 float4 center = make_float4(0, 0, 0, 0);
1058 {
1059 for (int i = 0; i < nPoints; i++)
1060 center += p[i];
1061 center /= (float)nPoints;
1062 }
1063
1064 // sample 4 directions
1065
1066 float4 aVector = p[0] - center;
1067 float4 u = cross3(nearNormal, aVector);
1068 float4 v = cross3(nearNormal, u);
1069 u = normalize3(u);
1070 v = normalize3(v);
1071
1072 //keep point with deepest penetration
1073 float minW = FLT_MAX;
1074
1075 int minIndex = -1;
1076
1077 float4 maxDots;
1078 maxDots.x = FLT_MIN;
1079 maxDots.y = FLT_MIN;
1080 maxDots.z = FLT_MIN;
1081 maxDots.w = FLT_MIN;
1082
1083 // idx, distance
1084 for (int ie = 0; ie < nPoints; ie++)
1085 {
1086 if (p[ie].w < minW)
1087 {
1088 minW = p[ie].w;
1089 minIndex = ie;
1090 }
1091 float f;
1092 float4 r = p[ie] - center;
1093 f = dot3F4(u, r);
1094 if (f < maxDots.x)
1095 {
1096 maxDots.x = f;
1097 contactIdx[0].x = ie;
1098 }
1099
1100 f = dot3F4(-u, r);
1101 if (f < maxDots.y)
1102 {
1103 maxDots.y = f;
1104 contactIdx[0].y = ie;
1105 }
1106
1107 f = dot3F4(v, r);
1108 if (f < maxDots.z)
1109 {
1110 maxDots.z = f;
1111 contactIdx[0].z = ie;
1112 }
1113
1114 f = dot3F4(-v, r);
1115 if (f < maxDots.w)
1116 {
1117 maxDots.w = f;
1118 contactIdx[0].w = ie;
1119 }
1120 }
1121
1122 if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
1123 {
1124 //replace the first contact with minimum (todo: replace contact with least penetration)
1125 contactIdx[0].x = minIndex;
1126 }
1127
1128 return 4;
1129 }
1130
clipHullHullSingle(int bodyIndexA,int bodyIndexB,const float4 & posA,const b3Quaternion & ornA,const float4 & posB,const b3Quaternion & ornB,int collidableIndexA,int collidableIndexB,const b3AlignedObjectArray<b3RigidBodyData> * bodyBuf,b3AlignedObjectArray<b3Contact4> * globalContactOut,int & nContacts,const b3AlignedObjectArray<b3ConvexPolyhedronData> & hostConvexDataA,const b3AlignedObjectArray<b3ConvexPolyhedronData> & hostConvexDataB,const b3AlignedObjectArray<b3Vector3> & verticesA,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,const b3AlignedObjectArray<b3Vector3> & verticesB,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesB,const b3AlignedObjectArray<b3GpuFace> & facesB,const b3AlignedObjectArray<int> & indicesB,const b3AlignedObjectArray<b3Collidable> & hostCollidablesA,const b3AlignedObjectArray<b3Collidable> & hostCollidablesB,const b3Vector3 & sepNormalWorldSpace,int maxContactCapacity)1131 int clipHullHullSingle(
1132 int bodyIndexA, int bodyIndexB,
1133 const float4& posA,
1134 const b3Quaternion& ornA,
1135 const float4& posB,
1136 const b3Quaternion& ornB,
1137
1138 int collidableIndexA, int collidableIndexB,
1139
1140 const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,
1141 b3AlignedObjectArray<b3Contact4>* globalContactOut,
1142 int& nContacts,
1143
1144 const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
1145 const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
1146
1147 const b3AlignedObjectArray<b3Vector3>& verticesA,
1148 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
1149 const b3AlignedObjectArray<b3GpuFace>& facesA,
1150 const b3AlignedObjectArray<int>& indicesA,
1151
1152 const b3AlignedObjectArray<b3Vector3>& verticesB,
1153 const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
1154 const b3AlignedObjectArray<b3GpuFace>& facesB,
1155 const b3AlignedObjectArray<int>& indicesB,
1156
1157 const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
1158 const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
1159 const b3Vector3& sepNormalWorldSpace,
1160 int maxContactCapacity)
1161 {
1162 int contactIndex = -1;
1163 b3ConvexPolyhedronData hullA, hullB;
1164
1165 b3Collidable colA = hostCollidablesA[collidableIndexA];
1166 hullA = hostConvexDataA[colA.m_shapeIndex];
1167 //printf("numvertsA = %d\n",hullA.m_numVertices);
1168
1169 b3Collidable colB = hostCollidablesB[collidableIndexB];
1170 hullB = hostConvexDataB[colB.m_shapeIndex];
1171 //printf("numvertsB = %d\n",hullB.m_numVertices);
1172
1173 float4 contactsOut[MAX_VERTS];
1174 int localContactCapacity = MAX_VERTS;
1175
1176 #ifdef _WIN32
1177 b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
1178 b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
1179 #endif
1180
1181 {
1182 float4 worldVertsB1[MAX_VERTS];
1183 float4 worldVertsB2[MAX_VERTS];
1184 int capacityWorldVerts = MAX_VERTS;
1185
1186 float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f);
1187 int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
1188 int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;
1189
1190 b3Scalar minDist = -1;
1191 b3Scalar maxDist = 0.;
1192
1193 b3Transform trA, trB;
1194 {
1195 //B3_PROFILE("transform computation");
1196 //trA.setIdentity();
1197 trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z));
1198 trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w));
1199
1200 //trB.setIdentity();
1201 trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z));
1202 trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w));
1203 }
1204
1205 b3Quaternion trAorn = trA.getRotation();
1206 b3Quaternion trBorn = trB.getRotation();
1207
1208 int numContactsOut = clipHullAgainstHull(hostNormal,
1209 hostConvexDataA.at(shapeA),
1210 hostConvexDataB.at(shapeB),
1211 (float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
1212 (float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
1213 worldVertsB1, worldVertsB2, capacityWorldVerts,
1214 minDist, maxDist,
1215 verticesA, facesA, indicesA,
1216 verticesB, facesB, indicesB,
1217
1218 contactsOut, localContactCapacity);
1219
1220 if (numContactsOut > 0)
1221 {
1222 B3_PROFILE("overlap");
1223
1224 float4 normalOnSurfaceB = (float4&)hostNormal;
1225
1226 b3Int4 contactIdx;
1227 contactIdx.x = 0;
1228 contactIdx.y = 1;
1229 contactIdx.z = 2;
1230 contactIdx.w = 3;
1231
1232 int numPoints = 0;
1233
1234 {
1235 // B3_PROFILE("extractManifold");
1236 numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);
1237 }
1238
1239 b3Assert(numPoints);
1240
1241 if (nContacts < maxContactCapacity)
1242 {
1243 contactIndex = nContacts;
1244 globalContactOut->expand();
1245 b3Contact4& contact = globalContactOut->at(nContacts);
1246 contact.m_batchIdx = 0; //i;
1247 contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
1248 contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
1249
1250 contact.m_frictionCoeffCmp = 45874;
1251 contact.m_restituitionCoeffCmp = 0;
1252
1253 // float distance = 0.f;
1254 for (int p = 0; p < numPoints; p++)
1255 {
1256 contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]]; //check if it is actually on B
1257 contact.m_worldNormalOnB = normalOnSurfaceB;
1258 }
1259 //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
1260 contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
1261 nContacts++;
1262 }
1263 else
1264 {
1265 b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
1266 }
1267 }
1268 }
1269 return contactIndex;
1270 }
1271
computeContactPlaneConvex(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3Vector3 * convexVertices,const int * convexIndices,const b3GpuFace * faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity)1272 void computeContactPlaneConvex(int pairIndex,
1273 int bodyIndexA, int bodyIndexB,
1274 int collidableIndexA, int collidableIndexB,
1275 const b3RigidBodyData* rigidBodies,
1276 const b3Collidable* collidables,
1277 const b3ConvexPolyhedronData* convexShapes,
1278 const b3Vector3* convexVertices,
1279 const int* convexIndices,
1280 const b3GpuFace* faces,
1281 b3Contact4* globalContactsOut,
1282 int& nGlobalContactsOut,
1283 int maxContactCapacity)
1284 {
1285 int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
1286 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex];
1287
1288 b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
1289 b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
1290 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
1291 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
1292
1293 // int numContactsOut = 0;
1294 // int numWorldVertsB1= 0;
1295
1296 b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
1297 b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
1298 b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
1299 float planeConstant = planeEq.w;
1300 b3Transform convexWorldTransform;
1301 convexWorldTransform.setIdentity();
1302 convexWorldTransform.setOrigin(posB);
1303 convexWorldTransform.setRotation(ornB);
1304 b3Transform planeTransform;
1305 planeTransform.setIdentity();
1306 planeTransform.setOrigin(posA);
1307 planeTransform.setRotation(ornA);
1308
1309 b3Transform planeInConvex;
1310 planeInConvex = convexWorldTransform.inverse() * planeTransform;
1311 b3Transform convexInPlane;
1312 convexInPlane = planeTransform.inverse() * convexWorldTransform;
1313
1314 b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
1315 float maxDot = -1e30;
1316 int hitVertex = -1;
1317 b3Vector3 hitVtx;
1318
1319 #define MAX_PLANE_CONVEX_POINTS 64
1320
1321 b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
1322 int numPoints = 0;
1323
1324 b3Int4 contactIdx;
1325 contactIdx.s[0] = 0;
1326 contactIdx.s[1] = 1;
1327 contactIdx.s[2] = 2;
1328 contactIdx.s[3] = 3;
1329
1330 for (int i = 0; i < hullB->m_numVertices; i++)
1331 {
1332 b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
1333 float curDot = vtx.dot(planeNormalInConvex);
1334
1335 if (curDot > maxDot)
1336 {
1337 hitVertex = i;
1338 maxDot = curDot;
1339 hitVtx = vtx;
1340 //make sure the deepest points is always included
1341 if (numPoints == MAX_PLANE_CONVEX_POINTS)
1342 numPoints--;
1343 }
1344
1345 if (numPoints < MAX_PLANE_CONVEX_POINTS)
1346 {
1347 b3Vector3 vtxWorld = convexWorldTransform * vtx;
1348 b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
1349 float dist = planeNormal.dot(vtxInPlane) - planeConstant;
1350 if (dist < 0.f)
1351 {
1352 vtxWorld.w = dist;
1353 contactPoints[numPoints] = vtxWorld;
1354 numPoints++;
1355 }
1356 }
1357 }
1358
1359 int numReducedPoints = 0;
1360
1361 numReducedPoints = numPoints;
1362
1363 if (numPoints > 4)
1364 {
1365 numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
1366 }
1367 int dstIdx;
1368 // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
1369
1370 if (numReducedPoints > 0)
1371 {
1372 if (nGlobalContactsOut < maxContactCapacity)
1373 {
1374 dstIdx = nGlobalContactsOut;
1375 nGlobalContactsOut++;
1376
1377 b3Contact4* c = &globalContactsOut[dstIdx];
1378 c->m_worldNormalOnB = -planeNormalWorld;
1379 c->setFrictionCoeff(0.7);
1380 c->setRestituitionCoeff(0.f);
1381
1382 c->m_batchIdx = pairIndex;
1383 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
1384 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
1385 for (int i = 0; i < numReducedPoints; i++)
1386 {
1387 b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
1388 c->m_worldPosB[i] = pOnB1;
1389 }
1390 c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
1391 } //if (dstIdx < numPairs)
1392 }
1393
1394 // printf("computeContactPlaneConvex\n");
1395 }
1396
MyUnQuantize(const unsigned short * vecIn,const b3Vector3 & quantization,const b3Vector3 & bvhAabbMin)1397 B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin)
1398 {
1399 b3Vector3 vecOut;
1400 vecOut.setValue(
1401 (b3Scalar)(vecIn[0]) / (quantization.x),
1402 (b3Scalar)(vecIn[1]) / (quantization.y),
1403 (b3Scalar)(vecIn[2]) / (quantization.z));
1404 vecOut += bvhAabbMin;
1405 return vecOut;
1406 }
1407
traverseTreeTree()1408 void traverseTreeTree()
1409 {
1410 }
1411
1412 #include "Bullet3Common/shared/b3Mat3x3.h"
1413
1414 int numAabbChecks = 0;
1415 int maxNumAabbChecks = 0;
1416 int maxDepth = 0;
1417
1418 // work-in-progress
findCompoundPairsKernel(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,__global const b3RigidBodyData * rigidBodies,__global const b3Collidable * collidables,__global const b3ConvexPolyhedronData * convexShapes,__global const b3AlignedObjectArray<b3Float4> & vertices,__global const b3AlignedObjectArray<b3Aabb> & aabbsWorldSpace,__global const b3AlignedObjectArray<b3Aabb> & aabbsLocalSpace,__global const b3GpuChildShape * gpuChildShapes,__global b3Int4 * gpuCompoundPairsOut,__global int * numCompoundPairsOut,int maxNumCompoundPairsCapacity,b3AlignedObjectArray<b3QuantizedBvhNode> & treeNodesCPU,b3AlignedObjectArray<b3BvhSubtreeInfo> & subTreesCPU,b3AlignedObjectArray<b3BvhInfo> & bvhInfoCPU)1419 __kernel void findCompoundPairsKernel(
1420 int pairIndex,
1421 int bodyIndexA,
1422 int bodyIndexB,
1423 int collidableIndexA,
1424 int collidableIndexB,
1425 __global const b3RigidBodyData* rigidBodies,
1426 __global const b3Collidable* collidables,
1427 __global const b3ConvexPolyhedronData* convexShapes,
1428 __global const b3AlignedObjectArray<b3Float4>& vertices,
1429 __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace,
1430 __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace,
1431 __global const b3GpuChildShape* gpuChildShapes,
1432 __global b3Int4* gpuCompoundPairsOut,
1433 __global int* numCompoundPairsOut,
1434 int maxNumCompoundPairsCapacity,
1435 b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
1436 b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
1437 b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
1438 {
1439 numAabbChecks = 0;
1440 maxNumAabbChecks = 0;
1441 // int i = pairIndex;
1442 {
1443 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1444 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1445
1446 //once the broadphase avoids static-static pairs, we can remove this test
1447 if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0))
1448 {
1449 return;
1450 }
1451
1452 if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1453 {
1454 int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
1455 int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
1456 int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees;
1457 int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;
1458 int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset;
1459
1460 int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees;
1461
1462 float4 posA = rigidBodies[bodyIndexA].m_pos;
1463 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1464
1465 b3Transform transA;
1466 transA.setIdentity();
1467 transA.setOrigin(posA);
1468 transA.setRotation(ornA);
1469
1470 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1471 float4 posB = rigidBodies[bodyIndexB].m_pos;
1472
1473 b3Transform transB;
1474 transB.setIdentity();
1475 transB.setOrigin(posB);
1476 transB.setRotation(ornB);
1477
1478 for (int p = 0; p < numSubTreesA; p++)
1479 {
1480 b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p];
1481 //bvhInfoCPU[bvhA].m_quantization
1482 b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1483 b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1484
1485 b3Vector3 aabbAMinOut, aabbAMaxOut;
1486 float margin = 0.f;
1487 b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1488
1489 for (int q = 0; q < numSubTreesB; q++)
1490 {
1491 b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q];
1492
1493 b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1494 b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1495
1496 b3Vector3 aabbBMinOut, aabbBMaxOut;
1497 float margin = 0.f;
1498 b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1499
1500 numAabbChecks = 0;
1501 bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1502 if (aabbOverlap)
1503 {
1504 int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset;
1505 // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1506
1507 int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset;
1508 // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1509
1510 b3AlignedObjectArray<b3Int2> nodeStack;
1511 b3Int2 node0;
1512 node0.x = startNodeIndexA;
1513 node0.y = startNodeIndexB;
1514
1515 int maxStackDepth = 1024;
1516 nodeStack.resize(maxStackDepth);
1517 int depth = 0;
1518 nodeStack[depth++] = node0;
1519
1520 do
1521 {
1522 if (depth > maxDepth)
1523 {
1524 maxDepth = depth;
1525 printf("maxDepth=%d\n", maxDepth);
1526 }
1527 b3Int2 node = nodeStack[--depth];
1528
1529 b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1530 b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1531
1532 b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1533 b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1534
1535 float margin = 0.f;
1536 b3Vector3 aabbAMinOut, aabbAMaxOut;
1537 b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1538
1539 b3Vector3 aabbBMinOut, aabbBMaxOut;
1540 b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1541
1542 numAabbChecks++;
1543 bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1544 if (nodeOverlap)
1545 {
1546 bool isLeafA = treeNodesCPU[node.x].isLeafNode();
1547 bool isLeafB = treeNodesCPU[node.y].isLeafNode();
1548 bool isInternalA = !isLeafA;
1549 bool isInternalB = !isLeafB;
1550
1551 //fail, even though it might hit two leaf nodes
1552 if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB))
1553 {
1554 b3Error("Error: traversal exceeded maxStackDepth\n");
1555 continue;
1556 }
1557
1558 if (isInternalA)
1559 {
1560 int nodeAleftChild = node.x + 1;
1561 bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode();
1562 int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex();
1563
1564 if (isInternalB)
1565 {
1566 int nodeBleftChild = node.y + 1;
1567 bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1568 int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1569
1570 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1571 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1572 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1573 nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1574 }
1575 else
1576 {
1577 nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y);
1578 nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y);
1579 }
1580 }
1581 else
1582 {
1583 if (isInternalB)
1584 {
1585 int nodeBleftChild = node.y + 1;
1586 bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1587 int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1588 nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild);
1589 nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild);
1590 }
1591 else
1592 {
1593 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1594 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1595 {
1596 int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();
1597 int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex();
1598 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1599 }
1600 }
1601 }
1602 }
1603 } while (depth);
1604 maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks);
1605 }
1606 }
1607 }
1608
1609 return;
1610 }
1611
1612 if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1613 {
1614 if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1615 {
1616 int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1617 for (int c = 0; c < numChildrenA; c++)
1618 {
1619 int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c;
1620 int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1621
1622 float4 posA = rigidBodies[bodyIndexA].m_pos;
1623 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1624 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1625 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1626 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1627 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1628
1629 b3Aabb aabbA = aabbsLocalSpace[childColIndexA];
1630
1631 b3Transform transA;
1632 transA.setIdentity();
1633 transA.setOrigin(newPosA);
1634 transA.setRotation(newOrnA);
1635 b3Scalar margin = 0.0f;
1636
1637 b3Vector3 aabbAMinOut, aabbAMaxOut;
1638
1639 b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1640
1641 if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1642 {
1643 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1644 for (int b = 0; b < numChildrenB; b++)
1645 {
1646 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1647 int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1648 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1649 float4 posB = rigidBodies[bodyIndexB].m_pos;
1650 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1651 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1652 float4 newPosB = transform(&childPosB, &posB, &ornB);
1653 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1654
1655 b3Aabb aabbB = aabbsLocalSpace[childColIndexB];
1656
1657 b3Transform transB;
1658 transB.setIdentity();
1659 transB.setOrigin(newPosB);
1660 transB.setRotation(newOrnB);
1661
1662 b3Vector3 aabbBMinOut, aabbBMaxOut;
1663 b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1664
1665 numAabbChecks++;
1666 bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1667 if (aabbOverlap)
1668 {
1669 /*
1670 int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1671 float dmin = FLT_MAX;
1672 float4 posA = newPosA;
1673 posA.w = 0.f;
1674 float4 posB = newPosB;
1675 posB.w = 0.f;
1676 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1677 b3Quat ornA = newOrnA;
1678 float4 c0 = transform(&c0local, &posA, &ornA);
1679 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1680 b3Quat ornB =newOrnB;
1681 float4 c1 = transform(&c1local,&posB,&ornB);
1682 const float4 DeltaC2 = c0 - c1;
1683 */
1684 { //
1685 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1686 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1687 {
1688 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1689 }
1690 } //
1691 } //fi(1)
1692 } //for (int b=0
1693 } //if (collidables[collidableIndexB].
1694 else //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1695 {
1696 if (1)
1697 {
1698 // int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1699 // float dmin = FLT_MAX;
1700 float4 posA = newPosA;
1701 posA.w = 0.f;
1702 float4 posB = rigidBodies[bodyIndexB].m_pos;
1703 posB.w = 0.f;
1704 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1705 b3Quat ornA = newOrnA;
1706 float4 c0;
1707 c0 = transform(&c0local, &posA, &ornA);
1708 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1709 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1710 float4 c1;
1711 c1 = transform(&c1local, &posB, &ornB);
1712 // const float4 DeltaC2 = c0 - c1;
1713
1714 {
1715 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1716 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1717 {
1718 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1);
1719 } //if (compoundPairIdx<maxNumCompoundPairsCapacity)
1720 } //
1721 } //fi (1)
1722 } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1723 } //for (int b=0;b<numChildrenB;b++)
1724 return;
1725 } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1726 if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1727 {
1728 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1729 for (int b = 0; b < numChildrenB; b++)
1730 {
1731 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1732 int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1733 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1734 float4 posB = rigidBodies[bodyIndexB].m_pos;
1735 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1736 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1737 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1738 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1739
1740 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1741
1742 //////////////////////////////////////
1743
1744 if (1)
1745 {
1746 // int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1747 // float dmin = FLT_MAX;
1748 float4 posA = rigidBodies[bodyIndexA].m_pos;
1749 posA.w = 0.f;
1750 float4 posB = newPosB;
1751 posB.w = 0.f;
1752 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1753 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1754 float4 c0;
1755 c0 = transform(&c0local, &posA, &ornA);
1756 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1757 b3Quat ornB = newOrnB;
1758 float4 c1;
1759 c1 = transform(&c1local, &posB, &ornB);
1760 // const float4 DeltaC2 = c0 - c1;
1761 { //
1762 int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1763 if (compoundPairIdx < maxNumCompoundPairsCapacity)
1764 {
1765 gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB);
1766 } //fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1767 } //
1768 } //fi (1)
1769 } //for (int b=0;b<numChildrenB;b++)
1770 return;
1771 } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1772 return;
1773 } //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1774 } //i<numPairs
1775 }
1776
processCompoundPairsKernel(__global const b3Int4 * gpuCompoundPairs,__global const b3RigidBodyData * rigidBodies,__global const b3Collidable * collidables,__global const b3ConvexPolyhedronData * convexShapes,__global const b3AlignedObjectArray<b3Float4> & vertices,__global const b3AlignedObjectArray<b3Float4> & uniqueEdges,__global const b3AlignedObjectArray<b3GpuFace> & faces,__global const b3AlignedObjectArray<int> & indices,__global b3Aabb * aabbs,__global const b3GpuChildShape * gpuChildShapes,__global b3AlignedObjectArray<b3Float4> & gpuCompoundSepNormalsOut,__global b3AlignedObjectArray<int> & gpuHasCompoundSepNormalsOut,int numCompoundPairs,int i)1777 __kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs,
1778 __global const b3RigidBodyData* rigidBodies,
1779 __global const b3Collidable* collidables,
1780 __global const b3ConvexPolyhedronData* convexShapes,
1781 __global const b3AlignedObjectArray<b3Float4>& vertices,
1782 __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1783 __global const b3AlignedObjectArray<b3GpuFace>& faces,
1784 __global const b3AlignedObjectArray<int>& indices,
1785 __global b3Aabb* aabbs,
1786 __global const b3GpuChildShape* gpuChildShapes,
1787 __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1788 __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1789 int numCompoundPairs,
1790 int i)
1791 {
1792 // int i = get_global_id(0);
1793 if (i < numCompoundPairs)
1794 {
1795 int bodyIndexA = gpuCompoundPairs[i].x;
1796 int bodyIndexB = gpuCompoundPairs[i].y;
1797
1798 int childShapeIndexA = gpuCompoundPairs[i].z;
1799 int childShapeIndexB = gpuCompoundPairs[i].w;
1800
1801 int collidableIndexA = -1;
1802 int collidableIndexB = -1;
1803
1804 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1805 float4 posA = rigidBodies[bodyIndexA].m_pos;
1806
1807 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1808 float4 posB = rigidBodies[bodyIndexB].m_pos;
1809
1810 if (childShapeIndexA >= 0)
1811 {
1812 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1813 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1814 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1815 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1816 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1817 posA = newPosA;
1818 ornA = newOrnA;
1819 }
1820 else
1821 {
1822 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1823 }
1824
1825 if (childShapeIndexB >= 0)
1826 {
1827 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1828 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1829 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1830 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1831 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1832 posB = newPosB;
1833 ornB = newOrnB;
1834 }
1835 else
1836 {
1837 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1838 }
1839
1840 gpuHasCompoundSepNormalsOut[i] = 0;
1841
1842 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1843 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1844
1845 int shapeTypeA = collidables[collidableIndexA].m_shapeType;
1846 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
1847
1848 if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
1849 {
1850 return;
1851 }
1852
1853 int hasSeparatingAxis = 5;
1854
1855 // int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1856 float dmin = FLT_MAX;
1857 posA.w = 0.f;
1858 posB.w = 0.f;
1859 float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1860 float4 c0 = transform(&c0local, &posA, &ornA);
1861 float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1862 float4 c1 = transform(&c1local, &posB, &ornB);
1863 const float4 DeltaC2 = c0 - c1;
1864 float4 sepNormal = make_float4(1, 0, 0, 0);
1865 // bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
1866 bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin);
1867
1868 hasSeparatingAxis = 4;
1869 if (!sepA)
1870 {
1871 hasSeparatingAxis = 0;
1872 }
1873 else
1874 {
1875 bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin);
1876
1877 if (!sepB)
1878 {
1879 hasSeparatingAxis = 0;
1880 }
1881 else //(!sepB)
1882 {
1883 bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin);
1884 if (sepEE)
1885 {
1886 gpuCompoundSepNormalsOut[i] = sepNormal; //fastNormalize4(sepNormal);
1887 gpuHasCompoundSepNormalsOut[i] = 1;
1888 } //sepEE
1889 } //(!sepB)
1890 } //(!sepA)
1891 }
1892 }
1893
clipCompoundsHullHullKernel(__global const b3Int4 * gpuCompoundPairs,__global const b3RigidBodyData * rigidBodies,__global const b3Collidable * collidables,__global const b3ConvexPolyhedronData * convexShapes,__global const b3AlignedObjectArray<b3Float4> & vertices,__global const b3AlignedObjectArray<b3Float4> & uniqueEdges,__global const b3AlignedObjectArray<b3GpuFace> & faces,__global const b3AlignedObjectArray<int> & indices,__global const b3GpuChildShape * gpuChildShapes,__global const b3AlignedObjectArray<b3Float4> & gpuCompoundSepNormalsOut,__global const b3AlignedObjectArray<int> & gpuHasCompoundSepNormalsOut,__global struct b3Contact4Data * globalContactsOut,int * nGlobalContactsOut,int numCompoundPairs,int maxContactCapacity,int i)1894 __kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs,
1895 __global const b3RigidBodyData* rigidBodies,
1896 __global const b3Collidable* collidables,
1897 __global const b3ConvexPolyhedronData* convexShapes,
1898 __global const b3AlignedObjectArray<b3Float4>& vertices,
1899 __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1900 __global const b3AlignedObjectArray<b3GpuFace>& faces,
1901 __global const b3AlignedObjectArray<int>& indices,
1902 __global const b3GpuChildShape* gpuChildShapes,
1903 __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1904 __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1905 __global struct b3Contact4Data* globalContactsOut,
1906 int* nGlobalContactsOut,
1907 int numCompoundPairs, int maxContactCapacity, int i)
1908 {
1909 // int i = get_global_id(0);
1910 int pairIndex = i;
1911
1912 float4 worldVertsB1[64];
1913 float4 worldVertsB2[64];
1914 int capacityWorldVerts = 64;
1915
1916 float4 localContactsOut[64];
1917 int localContactCapacity = 64;
1918
1919 float minDist = -1e30f;
1920 float maxDist = 0.0f;
1921
1922 if (i < numCompoundPairs)
1923 {
1924 if (gpuHasCompoundSepNormalsOut[i])
1925 {
1926 int bodyIndexA = gpuCompoundPairs[i].x;
1927 int bodyIndexB = gpuCompoundPairs[i].y;
1928
1929 int childShapeIndexA = gpuCompoundPairs[i].z;
1930 int childShapeIndexB = gpuCompoundPairs[i].w;
1931
1932 int collidableIndexA = -1;
1933 int collidableIndexB = -1;
1934
1935 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1936 float4 posA = rigidBodies[bodyIndexA].m_pos;
1937
1938 b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1939 float4 posB = rigidBodies[bodyIndexB].m_pos;
1940
1941 if (childShapeIndexA >= 0)
1942 {
1943 collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1944 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1945 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1946 float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1947 b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1948 posA = newPosA;
1949 ornA = newOrnA;
1950 }
1951 else
1952 {
1953 collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1954 }
1955
1956 if (childShapeIndexB >= 0)
1957 {
1958 collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1959 float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1960 b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1961 float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1962 b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1963 posB = newPosB;
1964 ornB = newOrnB;
1965 }
1966 else
1967 {
1968 collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1969 }
1970
1971 int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1972 int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1973
1974 int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],
1975 convexShapes[shapeIndexA], convexShapes[shapeIndexB],
1976 posA, ornA,
1977 posB, ornB,
1978 worldVertsB1, worldVertsB2, capacityWorldVerts,
1979 minDist, maxDist,
1980 vertices, faces, indices,
1981 vertices, faces, indices,
1982 localContactsOut, localContactCapacity);
1983
1984 if (numLocalContactsOut > 0)
1985 {
1986 float4 normal = -gpuCompoundSepNormalsOut[i];
1987 int nPoints = numLocalContactsOut;
1988 float4* pointsIn = localContactsOut;
1989 b3Int4 contactIdx; // = {-1,-1,-1,-1};
1990
1991 contactIdx.s[0] = 0;
1992 contactIdx.s[1] = 1;
1993 contactIdx.s[2] = 2;
1994 contactIdx.s[3] = 3;
1995
1996 int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
1997
1998 int dstIdx;
1999 dstIdx = b3AtomicInc(nGlobalContactsOut);
2000 if ((dstIdx + nReducedContacts) < maxContactCapacity)
2001 {
2002 __global struct b3Contact4Data* c = globalContactsOut + dstIdx;
2003 c->m_worldNormalOnB = -normal;
2004 c->m_restituitionCoeffCmp = (0.f * 0xffff);
2005 c->m_frictionCoeffCmp = (0.7f * 0xffff);
2006 c->m_batchIdx = pairIndex;
2007 int bodyA = gpuCompoundPairs[pairIndex].x;
2008 int bodyB = gpuCompoundPairs[pairIndex].y;
2009 c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA;
2010 c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB;
2011 c->m_childIndexA = childShapeIndexA;
2012 c->m_childIndexB = childShapeIndexB;
2013 for (int i = 0; i < nReducedContacts; i++)
2014 {
2015 c->m_worldPosB[i] = pointsIn[contactIdx.s[i]];
2016 }
2017 b3Contact4Data_setNumPoints(c, nReducedContacts);
2018 }
2019
2020 } // if (numContactsOut>0)
2021 } // if (gpuHasCompoundSepNormalsOut[i])
2022 } // if (i<numCompoundPairs)
2023 }
2024
computeContactCompoundCompound(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3GpuChildShape * cpuChildShapes,const b3AlignedObjectArray<b3Aabb> & hostAabbsWorldSpace,const b3AlignedObjectArray<b3Aabb> & hostAabbsLocalSpace,const b3AlignedObjectArray<b3Vector3> & convexVertices,const b3AlignedObjectArray<b3Vector3> & hostUniqueEdges,const b3AlignedObjectArray<int> & convexIndices,const b3AlignedObjectArray<b3GpuFace> & faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity,b3AlignedObjectArray<b3QuantizedBvhNode> & treeNodesCPU,b3AlignedObjectArray<b3BvhSubtreeInfo> & subTreesCPU,b3AlignedObjectArray<b3BvhInfo> & bvhInfoCPU)2025 void computeContactCompoundCompound(int pairIndex,
2026 int bodyIndexA, int bodyIndexB,
2027 int collidableIndexA, int collidableIndexB,
2028 const b3RigidBodyData* rigidBodies,
2029 const b3Collidable* collidables,
2030 const b3ConvexPolyhedronData* convexShapes,
2031 const b3GpuChildShape* cpuChildShapes,
2032 const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace,
2033 const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace,
2034
2035 const b3AlignedObjectArray<b3Vector3>& convexVertices,
2036 const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges,
2037 const b3AlignedObjectArray<int>& convexIndices,
2038 const b3AlignedObjectArray<b3GpuFace>& faces,
2039
2040 b3Contact4* globalContactsOut,
2041 int& nGlobalContactsOut,
2042 int maxContactCapacity,
2043 b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
2044 b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
2045 b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
2046 {
2047 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2048 b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2049
2050 b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
2051 int numCompoundPairsOut = 0;
2052 int maxNumCompoundPairsCapacity = 8192; //1024;
2053 cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);
2054
2055 // work-in-progress
2056 findCompoundPairsKernel(
2057 pairIndex,
2058 bodyIndexA, bodyIndexB,
2059 collidableIndexA, collidableIndexB,
2060 rigidBodies,
2061 collidables,
2062 convexShapes,
2063 convexVertices,
2064 hostAabbsWorldSpace,
2065 hostAabbsLocalSpace,
2066 cpuChildShapes,
2067 &cpuCompoundPairsOut[0],
2068 &numCompoundPairsOut,
2069 maxNumCompoundPairsCapacity,
2070 treeNodesCPU,
2071 subTreesCPU,
2072 bvhInfoCPU);
2073
2074 printf("maxNumAabbChecks=%d\n", maxNumAabbChecks);
2075 if (numCompoundPairsOut > maxNumCompoundPairsCapacity)
2076 {
2077 b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity);
2078 numCompoundPairsOut = maxNumCompoundPairsCapacity;
2079 }
2080 b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;
2081 b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;
2082 cpuCompoundSepNormalsOut.resize(numCompoundPairsOut);
2083 cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut);
2084
2085 for (int i = 0; i < numCompoundPairsOut; i++)
2086 {
2087 processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes,
2088 cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i);
2089 }
2090
2091 for (int i = 0; i < numCompoundPairsOut; i++)
2092 {
2093 clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes,
2094 cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i);
2095 }
2096 /*
2097 int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
2098
2099 float4 posA = rigidBodies[bodyIndexA].m_pos;
2100 b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
2101 float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
2102 b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
2103 float4 newPosA = b3QuatRotate(ornA,childPosA)+posA;
2104 b3Quat newOrnA = b3QuatMul(ornA,childOrnA);
2105
2106 int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
2107
2108
2109 bool foundSepAxis = findSeparatingAxis(hullA,hullB,
2110 posA,
2111 ornA,
2112 posB,
2113 ornB,
2114
2115 convexVertices,uniqueEdges,faces,convexIndices,
2116 convexVertices,uniqueEdges,faces,convexIndices,
2117
2118 sepNormalWorldSpace
2119 );
2120 */
2121
2122 /*
2123 if (foundSepAxis)
2124 {
2125
2126
2127 contactIndex = clipHullHullSingle(
2128 bodyIndexA, bodyIndexB,
2129 posA,ornA,
2130 posB,ornB,
2131 collidableIndexA, collidableIndexB,
2132 &rigidBodies,
2133 &globalContactsOut,
2134 nGlobalContactsOut,
2135
2136 convexShapes,
2137 convexShapes,
2138
2139 convexVertices,
2140 uniqueEdges,
2141 faces,
2142 convexIndices,
2143
2144 convexVertices,
2145 uniqueEdges,
2146 faces,
2147 convexIndices,
2148
2149 collidables,
2150 collidables,
2151 sepNormalWorldSpace,
2152 maxContactCapacity);
2153
2154 }
2155 */
2156
2157 // return contactIndex;
2158
2159 /*
2160
2161 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2162 for (int c=0;c<numChildrenB;c++)
2163 {
2164 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c;
2165 int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2166
2167 float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2168 b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2169 b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2170 b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2171 float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB;
2172 b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB);
2173
2174 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2175
2176 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2177
2178 }
2179 */
2180 }
2181
computeContactPlaneCompound(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3GpuChildShape * cpuChildShapes,const b3Vector3 * convexVertices,const int * convexIndices,const b3GpuFace * faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity)2182 void computeContactPlaneCompound(int pairIndex,
2183 int bodyIndexA, int bodyIndexB,
2184 int collidableIndexA, int collidableIndexB,
2185 const b3RigidBodyData* rigidBodies,
2186 const b3Collidable* collidables,
2187 const b3ConvexPolyhedronData* convexShapes,
2188 const b3GpuChildShape* cpuChildShapes,
2189 const b3Vector3* convexVertices,
2190 const int* convexIndices,
2191 const b3GpuFace* faces,
2192
2193 b3Contact4* globalContactsOut,
2194 int& nGlobalContactsOut,
2195 int maxContactCapacity)
2196 {
2197 int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2198 b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2199
2200 int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2201 for (int c = 0; c < numChildrenB; c++)
2202 {
2203 int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c;
2204 int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2205
2206 float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2207 b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2208 b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2209 b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2210 float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB;
2211 b3Quaternion ornB = rootOrnB * childOrnB; //b3QuatMul(ornB,childOrnB);
2212
2213 int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2214
2215 const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2216
2217 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2218 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2219
2220 // int numContactsOut = 0;
2221 // int numWorldVertsB1= 0;
2222
2223 b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
2224 b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
2225 b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
2226 float planeConstant = planeEq.w;
2227 b3Transform convexWorldTransform;
2228 convexWorldTransform.setIdentity();
2229 convexWorldTransform.setOrigin(posB);
2230 convexWorldTransform.setRotation(ornB);
2231 b3Transform planeTransform;
2232 planeTransform.setIdentity();
2233 planeTransform.setOrigin(posA);
2234 planeTransform.setRotation(ornA);
2235
2236 b3Transform planeInConvex;
2237 planeInConvex = convexWorldTransform.inverse() * planeTransform;
2238 b3Transform convexInPlane;
2239 convexInPlane = planeTransform.inverse() * convexWorldTransform;
2240
2241 b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
2242 float maxDot = -1e30;
2243 int hitVertex = -1;
2244 b3Vector3 hitVtx;
2245
2246 #define MAX_PLANE_CONVEX_POINTS 64
2247
2248 b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
2249 int numPoints = 0;
2250
2251 b3Int4 contactIdx;
2252 contactIdx.s[0] = 0;
2253 contactIdx.s[1] = 1;
2254 contactIdx.s[2] = 2;
2255 contactIdx.s[3] = 3;
2256
2257 for (int i = 0; i < hullB->m_numVertices; i++)
2258 {
2259 b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
2260 float curDot = vtx.dot(planeNormalInConvex);
2261
2262 if (curDot > maxDot)
2263 {
2264 hitVertex = i;
2265 maxDot = curDot;
2266 hitVtx = vtx;
2267 //make sure the deepest points is always included
2268 if (numPoints == MAX_PLANE_CONVEX_POINTS)
2269 numPoints--;
2270 }
2271
2272 if (numPoints < MAX_PLANE_CONVEX_POINTS)
2273 {
2274 b3Vector3 vtxWorld = convexWorldTransform * vtx;
2275 b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
2276 float dist = planeNormal.dot(vtxInPlane) - planeConstant;
2277 if (dist < 0.f)
2278 {
2279 vtxWorld.w = dist;
2280 contactPoints[numPoints] = vtxWorld;
2281 numPoints++;
2282 }
2283 }
2284 }
2285
2286 int numReducedPoints = 0;
2287
2288 numReducedPoints = numPoints;
2289
2290 if (numPoints > 4)
2291 {
2292 numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
2293 }
2294 int dstIdx;
2295 // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2296
2297 if (numReducedPoints > 0)
2298 {
2299 if (nGlobalContactsOut < maxContactCapacity)
2300 {
2301 dstIdx = nGlobalContactsOut;
2302 nGlobalContactsOut++;
2303
2304 b3Contact4* c = &globalContactsOut[dstIdx];
2305 c->m_worldNormalOnB = -planeNormalWorld;
2306 c->setFrictionCoeff(0.7);
2307 c->setRestituitionCoeff(0.f);
2308
2309 c->m_batchIdx = pairIndex;
2310 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2311 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2312 for (int i = 0; i < numReducedPoints; i++)
2313 {
2314 b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
2315 c->m_worldPosB[i] = pOnB1;
2316 }
2317 c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
2318 } //if (dstIdx < numPairs)
2319 }
2320 }
2321 }
2322
computeContactSphereConvex(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3Vector3 * convexVertices,const int * convexIndices,const b3GpuFace * faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity)2323 void computeContactSphereConvex(int pairIndex,
2324 int bodyIndexA, int bodyIndexB,
2325 int collidableIndexA, int collidableIndexB,
2326 const b3RigidBodyData* rigidBodies,
2327 const b3Collidable* collidables,
2328 const b3ConvexPolyhedronData* convexShapes,
2329 const b3Vector3* convexVertices,
2330 const int* convexIndices,
2331 const b3GpuFace* faces,
2332 b3Contact4* globalContactsOut,
2333 int& nGlobalContactsOut,
2334 int maxContactCapacity)
2335 {
2336 float radius = collidables[collidableIndexA].m_radius;
2337 float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
2338 b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;
2339
2340 float4 pos = rigidBodies[bodyIndexB].m_pos;
2341
2342 b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;
2343
2344 b3Transform tr;
2345 tr.setIdentity();
2346 tr.setOrigin(pos);
2347 tr.setRotation(quat);
2348 b3Transform trInv = tr.inverse();
2349
2350 float4 spherePos = trInv(spherePos1);
2351
2352 int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
2353 int shapeIndex = collidables[collidableIndex].m_shapeIndex;
2354 int numFaces = convexShapes[shapeIndex].m_numFaces;
2355 float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
2356 // float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
2357 float minDist = -1000000.f; // TODO: What is the largest/smallest float?
2358 bool bCollide = true;
2359 int region = -1;
2360 float4 localHitNormal;
2361 for (int f = 0; f < numFaces; f++)
2362 {
2363 b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f];
2364 float4 planeEqn;
2365 float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f);
2366 float4 n1 = localPlaneNormal; //quatRotate(quat,localPlaneNormal);
2367 planeEqn = n1;
2368 planeEqn[3] = face.m_plane.w;
2369
2370 float4 pntReturn;
2371 float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);
2372
2373 if (dist > radius)
2374 {
2375 bCollide = false;
2376 break;
2377 }
2378
2379 if (dist > 0)
2380 {
2381 //might hit an edge or vertex
2382 b3Vector3 out;
2383
2384 bool isInPoly = IsPointInPolygon(spherePos,
2385 &face,
2386 &convexVertices[convexShapes[shapeIndex].m_vertexOffset],
2387 convexIndices,
2388 &out);
2389 if (isInPoly)
2390 {
2391 if (dist > minDist)
2392 {
2393 minDist = dist;
2394 closestPnt = pntReturn;
2395 localHitNormal = planeEqn;
2396 region = 1;
2397 }
2398 }
2399 else
2400 {
2401 b3Vector3 tmp = spherePos - out;
2402 b3Scalar l2 = tmp.length2();
2403 if (l2 < radius * radius)
2404 {
2405 dist = b3Sqrt(l2);
2406 if (dist > minDist)
2407 {
2408 minDist = dist;
2409 closestPnt = out;
2410 localHitNormal = tmp / dist;
2411 region = 2;
2412 }
2413 }
2414 else
2415 {
2416 bCollide = false;
2417 break;
2418 }
2419 }
2420 }
2421 else
2422 {
2423 if (dist > minDist)
2424 {
2425 minDist = dist;
2426 closestPnt = pntReturn;
2427 localHitNormal = planeEqn;
2428 region = 3;
2429 }
2430 }
2431 }
2432 static int numChecks = 0;
2433 numChecks++;
2434
2435 if (bCollide && minDist > -10000)
2436 {
2437 float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal; //-hitNormalWorld;
2438 float4 pOnB1 = tr(closestPnt);
2439 //printf("dist ,%f,",minDist);
2440 float actualDepth = minDist - radius;
2441 if (actualDepth < 0)
2442 {
2443 //printf("actualDepth = ,%f,", actualDepth);
2444 //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
2445 //printf("region=,%d,\n", region);
2446 pOnB1[3] = actualDepth;
2447
2448 int dstIdx;
2449 // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2450
2451 if (nGlobalContactsOut < maxContactCapacity)
2452 {
2453 dstIdx = nGlobalContactsOut;
2454 nGlobalContactsOut++;
2455
2456 b3Contact4* c = &globalContactsOut[dstIdx];
2457 c->m_worldNormalOnB = normalOnSurfaceB1;
2458 c->setFrictionCoeff(0.7);
2459 c->setRestituitionCoeff(0.f);
2460
2461 c->m_batchIdx = pairIndex;
2462 c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2463 c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2464 c->m_worldPosB[0] = pOnB1;
2465 int numPoints = 1;
2466 c->m_worldNormalOnB.w = (b3Scalar)numPoints;
2467 } //if (dstIdx < numPairs)
2468 }
2469 } //if (hasCollision)
2470 }
2471
computeContactConvexConvex2(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3AlignedObjectArray<b3RigidBodyData> & rigidBodies,const b3AlignedObjectArray<b3Collidable> & collidables,const b3AlignedObjectArray<b3ConvexPolyhedronData> & convexShapes,const b3AlignedObjectArray<b3Vector3> & convexVertices,const b3AlignedObjectArray<b3Vector3> & uniqueEdges,const b3AlignedObjectArray<int> & convexIndices,const b3AlignedObjectArray<b3GpuFace> & faces,b3AlignedObjectArray<b3Contact4> & globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity,const b3AlignedObjectArray<b3Contact4> & oldContacts)2472 int computeContactConvexConvex2(
2473 int pairIndex,
2474 int bodyIndexA, int bodyIndexB,
2475 int collidableIndexA, int collidableIndexB,
2476 const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,
2477 const b3AlignedObjectArray<b3Collidable>& collidables,
2478 const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
2479 const b3AlignedObjectArray<b3Vector3>& convexVertices,
2480 const b3AlignedObjectArray<b3Vector3>& uniqueEdges,
2481 const b3AlignedObjectArray<int>& convexIndices,
2482 const b3AlignedObjectArray<b3GpuFace>& faces,
2483 b3AlignedObjectArray<b3Contact4>& globalContactsOut,
2484 int& nGlobalContactsOut,
2485 int maxContactCapacity,
2486 const b3AlignedObjectArray<b3Contact4>& oldContacts)
2487 {
2488 int contactIndex = -1;
2489 b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2490 b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2491 b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
2492 b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
2493
2494 b3ConvexPolyhedronData hullA, hullB;
2495
2496 b3Vector3 sepNormalWorldSpace;
2497
2498 b3Collidable colA = collidables[collidableIndexA];
2499 hullA = convexShapes[colA.m_shapeIndex];
2500 //printf("numvertsA = %d\n",hullA.m_numVertices);
2501
2502 b3Collidable colB = collidables[collidableIndexB];
2503 hullB = convexShapes[colB.m_shapeIndex];
2504 //printf("numvertsB = %d\n",hullB.m_numVertices);
2505
2506 // int contactCapacity = MAX_VERTS;
2507 //int numContactsOut=0;
2508
2509 #ifdef _WIN32
2510 b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
2511 b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
2512 #endif
2513
2514 bool foundSepAxis = findSeparatingAxis(hullA, hullB,
2515 posA,
2516 ornA,
2517 posB,
2518 ornB,
2519
2520 convexVertices, uniqueEdges, faces, convexIndices,
2521 convexVertices, uniqueEdges, faces, convexIndices,
2522
2523 sepNormalWorldSpace);
2524
2525 if (foundSepAxis)
2526 {
2527 contactIndex = clipHullHullSingle(
2528 bodyIndexA, bodyIndexB,
2529 posA, ornA,
2530 posB, ornB,
2531 collidableIndexA, collidableIndexB,
2532 &rigidBodies,
2533 &globalContactsOut,
2534 nGlobalContactsOut,
2535
2536 convexShapes,
2537 convexShapes,
2538
2539 convexVertices,
2540 uniqueEdges,
2541 faces,
2542 convexIndices,
2543
2544 convexVertices,
2545 uniqueEdges,
2546 faces,
2547 convexIndices,
2548
2549 collidables,
2550 collidables,
2551 sepNormalWorldSpace,
2552 maxContactCapacity);
2553 }
2554
2555 return contactIndex;
2556 }
2557
computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4> * pairs,int nPairs,const b3OpenCLArray<b3RigidBodyData> * bodyBuf,b3OpenCLArray<b3Contact4> * contactOut,int & nContacts,const b3OpenCLArray<b3Contact4> * oldContacts,int maxContactCapacity,int compoundPairCapacity,const b3OpenCLArray<b3ConvexPolyhedronData> & convexData,const b3OpenCLArray<b3Vector3> & gpuVertices,const b3OpenCLArray<b3Vector3> & gpuUniqueEdges,const b3OpenCLArray<b3GpuFace> & gpuFaces,const b3OpenCLArray<int> & gpuIndices,const b3OpenCLArray<b3Collidable> & gpuCollidables,const b3OpenCLArray<b3GpuChildShape> & gpuChildShapes,const b3OpenCLArray<b3Aabb> & clAabbsWorldSpace,const b3OpenCLArray<b3Aabb> & clAabbsLocalSpace,b3OpenCLArray<b3Vector3> & worldVertsB1GPU,b3OpenCLArray<b3Int4> & clippingFacesOutGPU,b3OpenCLArray<b3Vector3> & worldNormalsAGPU,b3OpenCLArray<b3Vector3> & worldVertsA1GPU,b3OpenCLArray<b3Vector3> & worldVertsB2GPU,b3AlignedObjectArray<class b3OptimizedBvh * > & bvhDataUnused,b3OpenCLArray<b3QuantizedBvhNode> * treeNodesGPU,b3OpenCLArray<b3BvhSubtreeInfo> * subTreesGPU,b3OpenCLArray<b3BvhInfo> * bvhInfo,int numObjects,int maxTriConvexPairCapacity,b3OpenCLArray<b3Int4> & triangleConvexPairsOut,int & numTriConvexPairsOut)2558 void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs,
2559 const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
2560 b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
2561 const b3OpenCLArray<b3Contact4>* oldContacts,
2562 int maxContactCapacity,
2563 int compoundPairCapacity,
2564 const b3OpenCLArray<b3ConvexPolyhedronData>& convexData,
2565 const b3OpenCLArray<b3Vector3>& gpuVertices,
2566 const b3OpenCLArray<b3Vector3>& gpuUniqueEdges,
2567 const b3OpenCLArray<b3GpuFace>& gpuFaces,
2568 const b3OpenCLArray<int>& gpuIndices,
2569 const b3OpenCLArray<b3Collidable>& gpuCollidables,
2570 const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
2571
2572 const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace,
2573 const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace,
2574
2575 b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
2576 b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
2577 b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
2578 b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
2579 b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
2580 b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused,
2581 b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
2582 b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
2583 b3OpenCLArray<b3BvhInfo>* bvhInfo,
2584
2585 int numObjects,
2586 int maxTriConvexPairCapacity,
2587 b3OpenCLArray<b3Int4>& triangleConvexPairsOut,
2588 int& numTriConvexPairsOut)
2589 {
2590 myframecount++;
2591
2592 if (!nPairs)
2593 return;
2594
2595 #ifdef CHECK_ON_HOST
2596
2597 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
2598 treeNodesGPU->copyToHost(treeNodesCPU);
2599
2600 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
2601 subTreesGPU->copyToHost(subTreesCPU);
2602
2603 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
2604 bvhInfo->copyToHost(bvhInfoCPU);
2605
2606 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
2607 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
2608
2609 b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
2610 clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
2611
2612 b3AlignedObjectArray<b3Int4> hostPairs;
2613 pairs->copyToHost(hostPairs);
2614
2615 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2616 bodyBuf->copyToHost(hostBodyBuf);
2617
2618 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
2619 convexData.copyToHost(hostConvexData);
2620
2621 b3AlignedObjectArray<b3Vector3> hostVertices;
2622 gpuVertices.copyToHost(hostVertices);
2623
2624 b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2625 gpuUniqueEdges.copyToHost(hostUniqueEdges);
2626 b3AlignedObjectArray<b3GpuFace> hostFaces;
2627 gpuFaces.copyToHost(hostFaces);
2628 b3AlignedObjectArray<int> hostIndices;
2629 gpuIndices.copyToHost(hostIndices);
2630 b3AlignedObjectArray<b3Collidable> hostCollidables;
2631 gpuCollidables.copyToHost(hostCollidables);
2632
2633 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2634 gpuChildShapes.copyToHost(cpuChildShapes);
2635
2636 b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs;
2637
2638 b3AlignedObjectArray<b3Contact4> hostContacts;
2639 if (nContacts)
2640 {
2641 contactOut->copyToHost(hostContacts);
2642 }
2643
2644 b3AlignedObjectArray<b3Contact4> oldHostContacts;
2645
2646 if (oldContacts->size())
2647 {
2648 oldContacts->copyToHost(oldHostContacts);
2649 }
2650
2651 hostContacts.resize(maxContactCapacity);
2652
2653 for (int i = 0; i < nPairs; i++)
2654 {
2655 int bodyIndexA = hostPairs[i].x;
2656 int bodyIndexB = hostPairs[i].y;
2657 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
2658 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
2659
2660 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
2661 hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2662 {
2663 computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2664 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2665 }
2666
2667 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2668 hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)
2669 {
2670 computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2671 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2672 //printf("convex-sphere\n");
2673 }
2674
2675 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2676 hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2677 {
2678 computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2679 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2680 // printf("convex-plane\n");
2681 }
2682
2683 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2684 hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2685 {
2686 computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2687 &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2688 // printf("plane-convex\n");
2689 }
2690
2691 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2692 hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2693 {
2694 computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2695 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0],
2696 nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU);
2697 // printf("convex-plane\n");
2698 }
2699
2700 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2701 hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2702 {
2703 computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2704 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2705 // printf("convex-plane\n");
2706 }
2707
2708 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2709 hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2710 {
2711 computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2712 &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2713 // printf("plane-convex\n");
2714 }
2715
2716 if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2717 hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2718 {
2719 //printf("hostPairs[i].z=%d\n",hostPairs[i].z);
2720 int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts);
2721 //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
2722
2723 if (contactIndex >= 0)
2724 {
2725 // printf("convex convex contactIndex = %d\n",contactIndex);
2726 hostPairs[i].z = contactIndex;
2727 }
2728 // printf("plane-convex\n");
2729 }
2730 }
2731
2732 if (hostPairs.size())
2733 {
2734 pairs->copyFromHost(hostPairs);
2735 }
2736
2737 hostContacts.resize(nContacts);
2738 if (nContacts)
2739 {
2740 contactOut->copyFromHost(hostContacts);
2741 }
2742 else
2743 {
2744 contactOut->resize(0);
2745 }
2746
2747 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2748 //printf("(HOST) nContacts = %d\n",nContacts);
2749
2750 #else
2751
2752 {
2753 if (nPairs)
2754 {
2755 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2756
2757 B3_PROFILE("primitiveContactsKernel");
2758 b3BufferInfoCL bInfo[] = {
2759 b3BufferInfoCL(pairs->getBufferCL(), true),
2760 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2761 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2762 b3BufferInfoCL(convexData.getBufferCL(), true),
2763 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2764 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2765 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2766 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2767 b3BufferInfoCL(contactOut->getBufferCL()),
2768 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2769
2770 b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel");
2771 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2772 launcher.setConst(nPairs);
2773 launcher.setConst(maxContactCapacity);
2774 int num = nPairs;
2775 launcher.launch1D(num);
2776 clFinish(m_queue);
2777
2778 nContacts = m_totalContactsOut.at(0);
2779 contactOut->resize(nContacts);
2780 }
2781 }
2782
2783 #endif //CHECK_ON_HOST
2784
2785 B3_PROFILE("computeConvexConvexContactsGPUSAT");
2786 // printf("nContacts = %d\n",nContacts);
2787
2788 m_sepNormals.resize(nPairs);
2789 m_hasSeparatingNormals.resize(nPairs);
2790
2791 int concaveCapacity = maxTriConvexPairCapacity;
2792 m_concaveSepNormals.resize(concaveCapacity);
2793 m_concaveHasSeparatingNormals.resize(concaveCapacity);
2794 m_numConcavePairsOut.resize(0);
2795 m_numConcavePairsOut.push_back(0);
2796
2797 m_gpuCompoundPairs.resize(compoundPairCapacity);
2798
2799 m_gpuCompoundSepNormals.resize(compoundPairCapacity);
2800
2801 m_gpuHasCompoundSepNormals.resize(compoundPairCapacity);
2802
2803 m_numCompoundPairsOut.resize(0);
2804 m_numCompoundPairsOut.push_back(0);
2805
2806 int numCompoundPairs = 0;
2807
2808 int numConcavePairs = 0;
2809
2810 {
2811 clFinish(m_queue);
2812 if (findSeparatingAxisOnGpu)
2813 {
2814 m_dmins.resize(nPairs);
2815 if (splitSearchSepAxisConvex)
2816 {
2817 if (useMprGpu)
2818 {
2819 nContacts = m_totalContactsOut.at(0);
2820 {
2821 B3_PROFILE("mprPenetrationKernel");
2822 b3BufferInfoCL bInfo[] = {
2823 b3BufferInfoCL(pairs->getBufferCL(), true),
2824 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2825 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2826 b3BufferInfoCL(convexData.getBufferCL(), true),
2827 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2828 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2829 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2830 b3BufferInfoCL(contactOut->getBufferCL()),
2831 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2832
2833 b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel");
2834 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2835
2836 launcher.setConst(maxContactCapacity);
2837 launcher.setConst(nPairs);
2838
2839 int num = nPairs;
2840 launcher.launch1D(num);
2841 clFinish(m_queue);
2842 /*
2843 b3AlignedObjectArray<int>hostHasSepAxis;
2844 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
2845 b3AlignedObjectArray<b3Vector3>hostSepAxis;
2846 m_sepNormals.copyToHost(hostSepAxis);
2847 */
2848 nContacts = m_totalContactsOut.at(0);
2849 contactOut->resize(nContacts);
2850 // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts);
2851 if (nContacts > maxContactCapacity)
2852 {
2853 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
2854 nContacts = maxContactCapacity;
2855 }
2856 }
2857 }
2858
2859 if (1)
2860 {
2861 if (1)
2862 {
2863 {
2864 B3_PROFILE("findSeparatingAxisVertexFaceKernel");
2865 b3BufferInfoCL bInfo[] = {
2866 b3BufferInfoCL(pairs->getBufferCL(), true),
2867 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2868 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2869 b3BufferInfoCL(convexData.getBufferCL(), true),
2870 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2871 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2872 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2873 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2874 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2875 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2876 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2877 b3BufferInfoCL(m_dmins.getBufferCL())};
2878
2879 b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel");
2880 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2881 launcher.setConst(nPairs);
2882
2883 int num = nPairs;
2884 launcher.launch1D(num);
2885 clFinish(m_queue);
2886 }
2887
2888 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2889
2890 {
2891 B3_PROFILE("findSeparatingAxisEdgeEdgeKernel");
2892 b3BufferInfoCL bInfo[] = {
2893 b3BufferInfoCL(pairs->getBufferCL(), true),
2894 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2895 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2896 b3BufferInfoCL(convexData.getBufferCL(), true),
2897 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2898 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2899 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2900 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2901 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2902 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2903 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2904 b3BufferInfoCL(m_dmins.getBufferCL()),
2905 b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true)
2906
2907 };
2908
2909 b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel");
2910 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2911 launcher.setConst(numDirections);
2912 launcher.setConst(nPairs);
2913 int num = nPairs;
2914 launcher.launch1D(num);
2915 clFinish(m_queue);
2916 }
2917 }
2918 if (useMprGpu)
2919 {
2920 B3_PROFILE("findSeparatingAxisUnitSphereKernel");
2921 b3BufferInfoCL bInfo[] = {
2922 b3BufferInfoCL(pairs->getBufferCL(), true),
2923 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2924 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2925 b3BufferInfoCL(convexData.getBufferCL(), true),
2926 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2927 b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true),
2928 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2929 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2930 b3BufferInfoCL(m_dmins.getBufferCL())};
2931
2932 b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel");
2933 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2934 int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2935 launcher.setConst(numDirections);
2936
2937 launcher.setConst(nPairs);
2938
2939 int num = nPairs;
2940 launcher.launch1D(num);
2941 clFinish(m_queue);
2942 }
2943 }
2944 }
2945 else
2946 {
2947 B3_PROFILE("findSeparatingAxisKernel");
2948 b3BufferInfoCL bInfo[] = {
2949 b3BufferInfoCL(pairs->getBufferCL(), true),
2950 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2951 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2952 b3BufferInfoCL(convexData.getBufferCL(), true),
2953 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2954 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2955 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2956 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2957 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2958 b3BufferInfoCL(m_sepNormals.getBufferCL()),
2959 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())};
2960
2961 b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel");
2962 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2963 launcher.setConst(nPairs);
2964
2965 int num = nPairs;
2966 launcher.launch1D(num);
2967 clFinish(m_queue);
2968 }
2969 }
2970 else
2971 {
2972 B3_PROFILE("findSeparatingAxisKernel CPU");
2973
2974 b3AlignedObjectArray<b3Int4> hostPairs;
2975 pairs->copyToHost(hostPairs);
2976 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2977 bodyBuf->copyToHost(hostBodyBuf);
2978
2979 b3AlignedObjectArray<b3Collidable> hostCollidables;
2980 gpuCollidables.copyToHost(hostCollidables);
2981
2982 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2983 gpuChildShapes.copyToHost(cpuChildShapes);
2984
2985 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData;
2986 convexData.copyToHost(hostConvexShapeData);
2987
2988 b3AlignedObjectArray<b3Vector3> hostVertices;
2989 gpuVertices.copyToHost(hostVertices);
2990
2991 b3AlignedObjectArray<int> hostHasSepAxis;
2992 hostHasSepAxis.resize(nPairs);
2993 b3AlignedObjectArray<b3Vector3> hostSepAxis;
2994 hostSepAxis.resize(nPairs);
2995
2996 b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2997 gpuUniqueEdges.copyToHost(hostUniqueEdges);
2998 b3AlignedObjectArray<b3GpuFace> hostFaces;
2999 gpuFaces.copyToHost(hostFaces);
3000
3001 b3AlignedObjectArray<int> hostIndices;
3002 gpuIndices.copyToHost(hostIndices);
3003
3004 b3AlignedObjectArray<b3Contact4> hostContacts;
3005 if (nContacts)
3006 {
3007 contactOut->copyToHost(hostContacts);
3008 }
3009 hostContacts.resize(maxContactCapacity);
3010 int nGlobalContactsOut = nContacts;
3011
3012 for (int i = 0; i < nPairs; i++)
3013 {
3014 int bodyIndexA = hostPairs[i].x;
3015 int bodyIndexB = hostPairs[i].y;
3016 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3017 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3018
3019 int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
3020 int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
3021
3022 hostHasSepAxis[i] = 0;
3023
3024 //once the broadphase avoids static-static pairs, we can remove this test
3025 if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0))
3026 {
3027 continue;
3028 }
3029
3030 if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL))
3031 {
3032 continue;
3033 }
3034
3035 float dmin = FLT_MAX;
3036
3037 b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA];
3038 b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB];
3039 b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos;
3040 b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos;
3041 b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat;
3042 b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat;
3043
3044 if (useGjk)
3045 {
3046 //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR
3047 {
3048 b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3049 b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3050 b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3051 b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3052 b3Vector3 DeltaC2 = c0 - c1;
3053
3054 b3Vector3 sepAxis;
3055
3056 bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3057 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3058 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3059 &sepAxis, &dmin);
3060
3061 if (hasSepAxisA)
3062 {
3063 bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3064 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3065 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3066 &sepAxis, &dmin);
3067 if (hasSepAxisB)
3068 {
3069 bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3070 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3071 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3072 &sepAxis, &dmin, false);
3073
3074 if (hasEdgeEdge)
3075 {
3076 hostHasSepAxis[i] = 1;
3077 hostSepAxis[i] = sepAxis;
3078 hostSepAxis[i].w = dmin;
3079 }
3080 }
3081 }
3082 }
3083
3084 if (hostHasSepAxis[i])
3085 {
3086 int pairIndex = i;
3087
3088 bool useMpr = true;
3089 if (useMpr)
3090 {
3091 int res = 0;
3092 float depth = 0.f;
3093 b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0);
3094 b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0);
3095
3096 float depthOut;
3097 b3Vector3 dirOut;
3098 b3Vector3 posOut;
3099
3100 //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut);
3101 res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut);
3102 depth = depthOut;
3103 sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z);
3104 resultPointOnBWorld = posOut;
3105 //hostHasSepAxis[i] = 0;
3106
3107 if (res == 0)
3108 {
3109 //add point?
3110 //printf("depth = %f\n",depth);
3111 //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]);
3112 //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]);
3113
3114 float dist = 0.f;
3115
3116 const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex];
3117 const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex];
3118
3119 if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3120 {
3121 if (depth > dist)
3122 {
3123 float diff = depth - dist;
3124
3125 static float maxdiff = 0.f;
3126 if (maxdiff < diff)
3127 {
3128 maxdiff = diff;
3129 printf("maxdiff = %20.10f\n", maxdiff);
3130 }
3131 }
3132 }
3133 if (depth > dmin)
3134 {
3135 b3Vector3 oldAxis = hostSepAxis[i];
3136 depth = dmin;
3137 sepAxis2 = oldAxis;
3138 }
3139
3140 if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3141 {
3142 if (depth > dist)
3143 {
3144 float diff = depth - dist;
3145 //printf("?diff = %f\n",diff );
3146 static float maxdiff = 0.f;
3147 if (maxdiff < diff)
3148 {
3149 maxdiff = diff;
3150 printf("maxdiff = %20.10f\n", maxdiff);
3151 }
3152 }
3153 //this is used for SAT
3154 //hostHasSepAxis[i] = 1;
3155 //hostSepAxis[i] = sepAxis2;
3156
3157 //add contact point
3158
3159 //int contactIndex = nGlobalContactsOut;
3160 b3Contact4& newContact = hostContacts.at(nGlobalContactsOut);
3161 nGlobalContactsOut++;
3162 newContact.m_batchIdx = 0; //i;
3163 newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
3164 newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
3165
3166 newContact.m_frictionCoeffCmp = 45874;
3167 newContact.m_restituitionCoeffCmp = 0;
3168
3169 static float maxDepth = 0.f;
3170
3171 if (depth > maxDepth)
3172 {
3173 maxDepth = depth;
3174 printf("MPR maxdepth = %f\n", maxDepth);
3175 }
3176
3177 resultPointOnBWorld.w = -depth;
3178 newContact.m_worldPosB[0] = resultPointOnBWorld;
3179 //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2;
3180 newContact.m_worldNormalOnB = sepAxis2;
3181 newContact.m_worldNormalOnB.w = (b3Scalar)1;
3182 }
3183 else
3184 {
3185 printf("rejected\n");
3186 }
3187 }
3188 }
3189 else
3190 {
3191 //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
3192 b3AlignedObjectArray<b3Contact4> oldHostContacts;
3193 int result;
3194 result = computeContactConvexConvex2( //hostPairs,
3195 pairIndex,
3196 bodyIndexA, bodyIndexB,
3197 collidableIndexA, collidableIndexB,
3198 hostBodyBuf,
3199 hostCollidables,
3200 hostConvexShapeData,
3201 hostVertices,
3202 hostUniqueEdges,
3203 hostIndices,
3204 hostFaces,
3205 hostContacts,
3206 nGlobalContactsOut,
3207 maxContactCapacity,
3208 oldHostContacts
3209 //hostHasSepAxis,
3210 //hostSepAxis
3211
3212 );
3213 } //mpr
3214 } //hostHasSepAxis[i] = 1;
3215 }
3216 else
3217 {
3218 b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3219 b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3220 b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3221 b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3222 b3Vector3 DeltaC2 = c0 - c1;
3223
3224 b3Vector3 sepAxis;
3225
3226 bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3227 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3228 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3229 &sepAxis, &dmin);
3230
3231 if (hasSepAxisA)
3232 {
3233 bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3234 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3235 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3236 &sepAxis, &dmin);
3237 if (hasSepAxisB)
3238 {
3239 bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3240 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3241 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3242 &sepAxis, &dmin, true);
3243
3244 if (hasEdgeEdge)
3245 {
3246 hostHasSepAxis[i] = 1;
3247 hostSepAxis[i] = sepAxis;
3248 }
3249 }
3250 }
3251 }
3252 }
3253
3254 if (useGjkContacts) //nGlobalContactsOut>0)
3255 {
3256 //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut);
3257 nContacts = nGlobalContactsOut;
3258 contactOut->copyFromHost(hostContacts);
3259
3260 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3261 }
3262
3263 m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3264 m_sepNormals.copyFromHost(hostSepAxis);
3265
3266 /*
3267 //double-check results from GPU (comment-out the 'else' so both paths are executed
3268 b3AlignedObjectArray<int> checkHasSepAxis;
3269 m_hasSeparatingNormals.copyToHost(checkHasSepAxis);
3270 static int frameCount = 0;
3271 frameCount++;
3272 for (int i=0;i<nPairs;i++)
3273 {
3274 if (hostHasSepAxis[i] != checkHasSepAxis[i])
3275 {
3276 printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n",
3277 frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]);
3278 }
3279 }
3280 //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3281 // m_sepNormals.copyFromHost(hostSepAxis);
3282 */
3283 }
3284
3285 numCompoundPairs = m_numCompoundPairsOut.at(0);
3286 bool useGpuFindCompoundPairs = true;
3287 if (useGpuFindCompoundPairs)
3288 {
3289 B3_PROFILE("findCompoundPairsKernel");
3290 b3BufferInfoCL bInfo[] =
3291 {
3292 b3BufferInfoCL(pairs->getBufferCL(), true),
3293 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3294 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3295 b3BufferInfoCL(convexData.getBufferCL(), true),
3296 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3297 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3298 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3299 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3300 b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true),
3301 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3302 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()),
3303 b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()),
3304 b3BufferInfoCL(subTreesGPU->getBufferCL()),
3305 b3BufferInfoCL(treeNodesGPU->getBufferCL()),
3306 b3BufferInfoCL(bvhInfo->getBufferCL())};
3307
3308 b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel");
3309 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3310 launcher.setConst(nPairs);
3311 launcher.setConst(compoundPairCapacity);
3312
3313 int num = nPairs;
3314 launcher.launch1D(num);
3315 clFinish(m_queue);
3316
3317 numCompoundPairs = m_numCompoundPairsOut.at(0);
3318 //printf("numCompoundPairs =%d\n",numCompoundPairs );
3319 if (numCompoundPairs)
3320 {
3321 //printf("numCompoundPairs=%d\n",numCompoundPairs);
3322 }
3323 }
3324 else
3325 {
3326 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3327 treeNodesGPU->copyToHost(treeNodesCPU);
3328
3329 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3330 subTreesGPU->copyToHost(subTreesCPU);
3331
3332 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3333 bvhInfo->copyToHost(bvhInfoCPU);
3334
3335 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3336 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3337
3338 b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
3339 clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
3340
3341 b3AlignedObjectArray<b3Int4> hostPairs;
3342 pairs->copyToHost(hostPairs);
3343
3344 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3345 bodyBuf->copyToHost(hostBodyBuf);
3346
3347 b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
3348 cpuCompoundPairsOut.resize(compoundPairCapacity);
3349
3350 b3AlignedObjectArray<b3Collidable> hostCollidables;
3351 gpuCollidables.copyToHost(hostCollidables);
3352
3353 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3354 gpuChildShapes.copyToHost(cpuChildShapes);
3355
3356 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3357 convexData.copyToHost(hostConvexData);
3358
3359 b3AlignedObjectArray<b3Vector3> hostVertices;
3360 gpuVertices.copyToHost(hostVertices);
3361
3362 for (int pairIndex = 0; pairIndex < nPairs; pairIndex++)
3363 {
3364 int bodyIndexA = hostPairs[pairIndex].x;
3365 int bodyIndexB = hostPairs[pairIndex].y;
3366 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3367 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3368 if (cpuChildShapes.size())
3369 {
3370 findCompoundPairsKernel(
3371 pairIndex,
3372 bodyIndexA,
3373 bodyIndexB,
3374 collidableIndexA,
3375 collidableIndexB,
3376 &hostBodyBuf[0],
3377 &hostCollidables[0],
3378 &hostConvexData[0],
3379 hostVertices,
3380 hostAabbsWorldSpace,
3381 hostAabbsLocalSpace,
3382 &cpuChildShapes[0],
3383 &cpuCompoundPairsOut[0],
3384 &numCompoundPairs,
3385 compoundPairCapacity,
3386 treeNodesCPU,
3387 subTreesCPU,
3388 bvhInfoCPU);
3389 }
3390 }
3391
3392 m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true);
3393 if (numCompoundPairs)
3394 {
3395 b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0];
3396 m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true);
3397 }
3398 //cpuCompoundPairsOut
3399 }
3400 if (numCompoundPairs)
3401 {
3402 printf("numCompoundPairs=%d\n", numCompoundPairs);
3403 }
3404
3405 if (numCompoundPairs > compoundPairCapacity)
3406 {
3407 b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity);
3408 numCompoundPairs = compoundPairCapacity;
3409 }
3410
3411 m_gpuCompoundPairs.resize(numCompoundPairs);
3412 m_gpuHasCompoundSepNormals.resize(numCompoundPairs);
3413 m_gpuCompoundSepNormals.resize(numCompoundPairs);
3414
3415 if (numCompoundPairs)
3416 {
3417 B3_PROFILE("processCompoundPairsPrimitivesKernel");
3418 b3BufferInfoCL bInfo[] =
3419 {
3420 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3421 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3422 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3423 b3BufferInfoCL(convexData.getBufferCL(), true),
3424 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3425 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3426 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3427 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3428 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3429 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3430 b3BufferInfoCL(contactOut->getBufferCL()),
3431 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3432
3433 b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel");
3434 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3435 launcher.setConst(numCompoundPairs);
3436 launcher.setConst(maxContactCapacity);
3437
3438 int num = numCompoundPairs;
3439 launcher.launch1D(num);
3440 clFinish(m_queue);
3441 nContacts = m_totalContactsOut.at(0);
3442 //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts);
3443 if (nContacts > maxContactCapacity)
3444 {
3445 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3446 nContacts = maxContactCapacity;
3447 }
3448 }
3449
3450 if (numCompoundPairs)
3451 {
3452 B3_PROFILE("processCompoundPairsKernel");
3453 b3BufferInfoCL bInfo[] =
3454 {
3455 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3456 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3457 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3458 b3BufferInfoCL(convexData.getBufferCL(), true),
3459 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3460 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3461 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3462 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3463 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3464 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3465 b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()),
3466 b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())};
3467
3468 b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel");
3469 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3470 launcher.setConst(numCompoundPairs);
3471
3472 int num = numCompoundPairs;
3473 launcher.launch1D(num);
3474 clFinish(m_queue);
3475 }
3476
3477 //printf("numConcave = %d\n",numConcave);
3478
3479 // printf("hostNormals.size()=%d\n",hostNormals.size());
3480 //int numPairs = pairCount.at(0);
3481 }
3482 int vertexFaceCapacity = 64;
3483
3484 {
3485 //now perform the tree query on GPU
3486
3487 if (treeNodesGPU->size() && treeNodesGPU->size())
3488 {
3489 if (bvhTraversalKernelGPU)
3490 {
3491 B3_PROFILE("m_bvhTraversalKernel");
3492
3493 numConcavePairs = m_numConcavePairsOut.at(0);
3494
3495 b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel");
3496 launcher.setBuffer(pairs->getBufferCL());
3497 launcher.setBuffer(bodyBuf->getBufferCL());
3498 launcher.setBuffer(gpuCollidables.getBufferCL());
3499 launcher.setBuffer(clAabbsWorldSpace.getBufferCL());
3500 launcher.setBuffer(triangleConvexPairsOut.getBufferCL());
3501 launcher.setBuffer(m_numConcavePairsOut.getBufferCL());
3502 launcher.setBuffer(subTreesGPU->getBufferCL());
3503 launcher.setBuffer(treeNodesGPU->getBufferCL());
3504 launcher.setBuffer(bvhInfo->getBufferCL());
3505
3506 launcher.setConst(nPairs);
3507 launcher.setConst(maxTriConvexPairCapacity);
3508 int num = nPairs;
3509 launcher.launch1D(num);
3510 clFinish(m_queue);
3511 numConcavePairs = m_numConcavePairsOut.at(0);
3512 }
3513 else
3514 {
3515 b3AlignedObjectArray<b3Int4> hostPairs;
3516 pairs->copyToHost(hostPairs);
3517 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3518 bodyBuf->copyToHost(hostBodyBuf);
3519 b3AlignedObjectArray<b3Collidable> hostCollidables;
3520 gpuCollidables.copyToHost(hostCollidables);
3521 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3522 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3523
3524 //int maxTriConvexPairCapacity,
3525 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3526 triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3527
3528 //int numTriConvexPairsOutHost=0;
3529 numConcavePairs = 0;
3530 //m_numConcavePairsOut
3531
3532 b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3533 treeNodesGPU->copyToHost(treeNodesCPU);
3534 b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3535 subTreesGPU->copyToHost(subTreesCPU);
3536 b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3537 bvhInfo->copyToHost(bvhInfoCPU);
3538 //compute it...
3539
3540 volatile int hostNumConcavePairsOut = 0;
3541
3542 //
3543 for (int i = 0; i < nPairs; i++)
3544 {
3545 b3BvhTraversal(&hostPairs.at(0),
3546 &hostBodyBuf.at(0),
3547 &hostCollidables.at(0),
3548 &hostAabbsWorldSpace.at(0),
3549 &triangleConvexPairsOutHost.at(0),
3550 &hostNumConcavePairsOut,
3551 &subTreesCPU.at(0),
3552 &treeNodesCPU.at(0),
3553 &bvhInfoCPU.at(0),
3554 nPairs,
3555 maxTriConvexPairCapacity,
3556 i);
3557 }
3558 numConcavePairs = hostNumConcavePairsOut;
3559
3560 if (hostNumConcavePairsOut)
3561 {
3562 triangleConvexPairsOutHost.resize(hostNumConcavePairsOut);
3563 triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost);
3564 }
3565 //
3566
3567 m_numConcavePairsOut.resize(0);
3568 m_numConcavePairsOut.push_back(numConcavePairs);
3569 }
3570
3571 //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity);
3572
3573 if (numConcavePairs > maxTriConvexPairCapacity)
3574 {
3575 static int exceeded_maxTriConvexPairCapacity_count = 0;
3576 b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n",
3577 numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++);
3578 numConcavePairs = maxTriConvexPairCapacity;
3579 }
3580 triangleConvexPairsOut.resize(numConcavePairs);
3581
3582 if (numConcavePairs)
3583 {
3584 clippingFacesOutGPU.resize(numConcavePairs);
3585 worldNormalsAGPU.resize(numConcavePairs);
3586 worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3587 worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3588
3589 if (findConcaveSeparatingAxisKernelGPU)
3590 {
3591 /*
3592 m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3593 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3594 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3595 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3596 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3597 */
3598
3599 //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
3600 if (splitSearchSepAxisConcave)
3601 {
3602 //printf("numConcavePairs = %d\n",numConcavePairs);
3603 m_dmins.resize(numConcavePairs);
3604 {
3605 B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel");
3606 b3BufferInfoCL bInfo[] = {
3607 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3608 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3609 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3610 b3BufferInfoCL(convexData.getBufferCL(), true),
3611 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3612 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3613 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3614 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3615 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3616 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3617 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3618 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3619 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3620 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3621 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3622 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3623 b3BufferInfoCL(m_dmins.getBufferCL())};
3624
3625 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel");
3626 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3627 launcher.setConst(vertexFaceCapacity);
3628 launcher.setConst(numConcavePairs);
3629
3630 int num = numConcavePairs;
3631 launcher.launch1D(num);
3632 clFinish(m_queue);
3633 }
3634 // numConcavePairs = 0;
3635 if (1)
3636 {
3637 B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel");
3638 b3BufferInfoCL bInfo[] = {
3639 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3640 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3641 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3642 b3BufferInfoCL(convexData.getBufferCL(), true),
3643 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3644 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3645 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3646 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3647 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3648 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3649 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3650 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3651 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3652 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3653 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3654 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3655 b3BufferInfoCL(m_dmins.getBufferCL())};
3656
3657 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel");
3658 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3659 launcher.setConst(vertexFaceCapacity);
3660 launcher.setConst(numConcavePairs);
3661
3662 int num = numConcavePairs;
3663 launcher.launch1D(num);
3664 clFinish(m_queue);
3665 }
3666
3667 // numConcavePairs = 0;
3668 }
3669 else
3670 {
3671 B3_PROFILE("findConcaveSeparatingAxisKernel");
3672 b3BufferInfoCL bInfo[] = {
3673 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3674 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3675 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3676 b3BufferInfoCL(convexData.getBufferCL(), true),
3677 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3678 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3679 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3680 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3681 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3682 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3683 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3684 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3685 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3686 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3687 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3688 b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
3689
3690 b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel");
3691 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3692 launcher.setConst(vertexFaceCapacity);
3693 launcher.setConst(numConcavePairs);
3694
3695 int num = numConcavePairs;
3696 launcher.launch1D(num);
3697 clFinish(m_queue);
3698 }
3699 }
3700 else
3701 {
3702 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3703 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3704 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3705 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3706 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3707
3708 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3709 triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3710 //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3711 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3712 bodyBuf->copyToHost(hostBodyBuf);
3713 b3AlignedObjectArray<b3Collidable> hostCollidables;
3714 gpuCollidables.copyToHost(hostCollidables);
3715 b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3716 clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3717
3718 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3719 convexData.copyToHost(hostConvexData);
3720
3721 b3AlignedObjectArray<b3Vector3> hostVertices;
3722 gpuVertices.copyToHost(hostVertices);
3723
3724 b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
3725 gpuUniqueEdges.copyToHost(hostUniqueEdges);
3726 b3AlignedObjectArray<b3GpuFace> hostFaces;
3727 gpuFaces.copyToHost(hostFaces);
3728 b3AlignedObjectArray<int> hostIndices;
3729 gpuIndices.copyToHost(hostIndices);
3730 b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3731 gpuChildShapes.copyToHost(cpuChildShapes);
3732
3733 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3734 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3735 concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size());
3736
3737 b3GpuChildShape* childShapePointerCPU = 0;
3738 if (cpuChildShapes.size())
3739 childShapePointerCPU = &cpuChildShapes.at(0);
3740
3741 clippingFacesOutCPU.resize(clippingFacesOutGPU.size());
3742 worldVertsA1CPU.resize(worldVertsA1GPU.size());
3743 worldNormalsACPU.resize(worldNormalsAGPU.size());
3744 worldVertsB1CPU.resize(worldVertsB1GPU.size());
3745
3746 for (int i = 0; i < numConcavePairs; i++)
3747 {
3748 b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0),
3749 &hostBodyBuf.at(0),
3750 &hostCollidables.at(0),
3751 &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0),
3752 &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU,
3753 &hostAabbsWorldSpace.at(0),
3754 &concaveSepNormalsHost.at(0),
3755 &clippingFacesOutCPU.at(0),
3756 &worldVertsA1CPU.at(0),
3757 &worldNormalsACPU.at(0),
3758 &worldVertsB1CPU.at(0),
3759 &concaveHasSeparatingNormalsCPU.at(0),
3760 vertexFaceCapacity,
3761 numConcavePairs, i);
3762 };
3763
3764 m_concaveSepNormals.copyFromHost(concaveSepNormalsHost);
3765 m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3766 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3767 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3768 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3769 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3770 }
3771 // b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
3772 // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals);
3773 // b3AlignedObjectArray<b3Int4> cpuConcavePairs;
3774 // triangleConvexPairsOut.copyToHost(cpuConcavePairs);
3775 }
3776 }
3777 }
3778
3779 if (numConcavePairs)
3780 {
3781 if (numConcavePairs)
3782 {
3783 B3_PROFILE("findConcaveSphereContactsKernel");
3784 nContacts = m_totalContactsOut.at(0);
3785 // printf("nContacts1 = %d\n",nContacts);
3786 b3BufferInfoCL bInfo[] = {
3787 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3788 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3789 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3790 b3BufferInfoCL(convexData.getBufferCL(), true),
3791 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3792 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3793 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3794 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3795 b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3796 b3BufferInfoCL(contactOut->getBufferCL()),
3797 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3798
3799 b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel");
3800 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3801
3802 launcher.setConst(numConcavePairs);
3803 launcher.setConst(maxContactCapacity);
3804
3805 int num = numConcavePairs;
3806 launcher.launch1D(num);
3807 clFinish(m_queue);
3808 nContacts = m_totalContactsOut.at(0);
3809 //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts);
3810
3811 //printf("nContacts2 = %d\n",nContacts);
3812
3813 if (nContacts >= maxContactCapacity)
3814 {
3815 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3816 nContacts = maxContactCapacity;
3817 }
3818 }
3819 }
3820
3821 #ifdef __APPLE__
3822 bool contactClippingOnGpu = true;
3823 #else
3824 bool contactClippingOnGpu = true;
3825 #endif
3826
3827 if (contactClippingOnGpu)
3828 {
3829 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3830 // printf("nContacts3 = %d\n",nContacts);
3831
3832 //B3_PROFILE("clipHullHullKernel");
3833
3834 bool breakupConcaveConvexKernel = true;
3835
3836 #ifdef __APPLE__
3837 //actually, some Apple OpenCL platform/device combinations work fine...
3838 breakupConcaveConvexKernel = true;
3839 #endif
3840 //concave-convex contact clipping
3841 if (numConcavePairs)
3842 {
3843 // printf("numConcavePairs = %d\n", numConcavePairs);
3844 // nContacts = m_totalContactsOut.at(0);
3845 // printf("nContacts before = %d\n", nContacts);
3846
3847 if (breakupConcaveConvexKernel)
3848 {
3849 worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs);
3850
3851 //clipFacesAndFindContacts
3852
3853 if (clipConcaveFacesAndFindContactsCPU)
3854 {
3855 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3856 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3857 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3858 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3859
3860 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3861 worldVertsA1GPU.copyToHost(worldVertsA1CPU);
3862 worldNormalsAGPU.copyToHost(worldNormalsACPU);
3863 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
3864
3865 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3866 m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3867
3868 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3869 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3870
3871 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3872 worldVertsB2CPU.resize(worldVertsB2GPU.size());
3873
3874 for (int i = 0; i < numConcavePairs; i++)
3875 {
3876 clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0),
3877 &concaveHasSeparatingNormalsCPU.at(0),
3878 &clippingFacesOutCPU.at(0),
3879 &worldVertsA1CPU.at(0),
3880 &worldNormalsACPU.at(0),
3881 &worldVertsB1CPU.at(0),
3882 &worldVertsB2CPU.at(0),
3883 vertexFaceCapacity,
3884 i);
3885 }
3886
3887 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3888 worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
3889 }
3890 else
3891 {
3892 if (1)
3893 {
3894 B3_PROFILE("clipFacesAndFindContacts");
3895 //nContacts = m_totalContactsOut.at(0);
3896 //int h = m_hasSeparatingNormals.at(0);
3897 //int4 p = clippingFacesOutGPU.at(0);
3898 b3BufferInfoCL bInfo[] = {
3899 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3900 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3901 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3902 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3903 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3904 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3905 b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
3906 b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
3907 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3908 launcher.setConst(vertexFaceCapacity);
3909
3910 launcher.setConst(numConcavePairs);
3911 int debugMode = 0;
3912 launcher.setConst(debugMode);
3913 int num = numConcavePairs;
3914 launcher.launch1D(num);
3915 clFinish(m_queue);
3916 //int bla = m_totalContactsOut.at(0);
3917 }
3918 }
3919 //contactReduction
3920 {
3921 int newContactCapacity = nContacts + numConcavePairs;
3922 contactOut->reserve(newContactCapacity);
3923 if (reduceConcaveContactsOnGPU)
3924 {
3925 // printf("newReservation = %d\n",newReservation);
3926 {
3927 B3_PROFILE("newContactReductionKernel");
3928 b3BufferInfoCL bInfo[] =
3929 {
3930 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
3931 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3932 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3933 b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3934 b3BufferInfoCL(contactOut->getBufferCL()),
3935 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3936 b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
3937 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3938
3939 b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
3940 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3941 launcher.setConst(vertexFaceCapacity);
3942 launcher.setConst(newContactCapacity);
3943 launcher.setConst(numConcavePairs);
3944 int num = numConcavePairs;
3945
3946 launcher.launch1D(num);
3947 }
3948 nContacts = m_totalContactsOut.at(0);
3949 contactOut->resize(nContacts);
3950
3951 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
3952 }
3953 else
3954 {
3955 volatile int nGlobalContactsOut = nContacts;
3956 b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3957 triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3958 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3959 bodyBuf->copyToHost(hostBodyBuf);
3960
3961 b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3962 m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3963
3964 b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3965 m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3966
3967 b3AlignedObjectArray<b3Contact4> hostContacts;
3968 if (nContacts)
3969 {
3970 contactOut->copyToHost(hostContacts);
3971 }
3972 hostContacts.resize(newContactCapacity);
3973
3974 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3975 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3976
3977 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3978 worldVertsB2GPU.copyToHost(worldVertsB2CPU);
3979
3980 for (int i = 0; i < numConcavePairs; i++)
3981 {
3982 b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0),
3983 &hostBodyBuf.at(0),
3984 &concaveSepNormalsHost.at(0),
3985 &concaveHasSeparatingNormalsCPU.at(0),
3986 &hostContacts.at(0),
3987 &clippingFacesOutCPU.at(0),
3988 &worldVertsB2CPU.at(0),
3989 &nGlobalContactsOut,
3990 vertexFaceCapacity,
3991 newContactCapacity,
3992 numConcavePairs,
3993 i);
3994 }
3995
3996 nContacts = nGlobalContactsOut;
3997 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3998 // nContacts = m_totalContactsOut.at(0);
3999 //contactOut->resize(nContacts);
4000 hostContacts.resize(nContacts);
4001 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4002 contactOut->copyFromHost(hostContacts);
4003 }
4004 }
4005 //re-use?
4006 }
4007 else
4008 {
4009 B3_PROFILE("clipHullHullConcaveConvexKernel");
4010 nContacts = m_totalContactsOut.at(0);
4011 int newContactCapacity = contactOut->capacity();
4012
4013 //printf("contactOut5 = %d\n",nContacts);
4014 b3BufferInfoCL bInfo[] = {
4015 b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
4016 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4017 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4018 b3BufferInfoCL(convexData.getBufferCL(), true),
4019 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4020 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4021 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4022 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4023 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4024 b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
4025 b3BufferInfoCL(contactOut->getBufferCL()),
4026 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4027 b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel");
4028 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4029 launcher.setConst(newContactCapacity);
4030 launcher.setConst(numConcavePairs);
4031 int num = numConcavePairs;
4032 launcher.launch1D(num);
4033 clFinish(m_queue);
4034 nContacts = m_totalContactsOut.at(0);
4035 contactOut->resize(nContacts);
4036 //printf("contactOut6 = %d\n",nContacts);
4037 b3AlignedObjectArray<b3Contact4> cpuContacts;
4038 contactOut->copyToHost(cpuContacts);
4039 }
4040 // printf("nContacts after = %d\n", nContacts);
4041 } //numConcavePairs
4042
4043 //convex-convex contact clipping
4044
4045 bool breakupKernel = false;
4046
4047 #ifdef __APPLE__
4048 breakupKernel = true;
4049 #endif
4050
4051 #ifdef CHECK_ON_HOST
4052 bool computeConvexConvex = false;
4053 #else
4054 bool computeConvexConvex = true;
4055 #endif //CHECK_ON_HOST
4056 if (computeConvexConvex)
4057 {
4058 B3_PROFILE("clipHullHullKernel");
4059 if (breakupKernel)
4060 {
4061 worldVertsB1GPU.resize(vertexFaceCapacity * nPairs);
4062 clippingFacesOutGPU.resize(nPairs);
4063 worldNormalsAGPU.resize(nPairs);
4064 worldVertsA1GPU.resize(vertexFaceCapacity * nPairs);
4065 worldVertsB2GPU.resize(vertexFaceCapacity * nPairs);
4066
4067 if (findConvexClippingFacesGPU)
4068 {
4069 B3_PROFILE("findClippingFacesKernel");
4070 b3BufferInfoCL bInfo[] = {
4071 b3BufferInfoCL(pairs->getBufferCL(), true),
4072 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4073 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4074 b3BufferInfoCL(convexData.getBufferCL(), true),
4075 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4076 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4077 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4078 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4079 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4080 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4081 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4082 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4083 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4084 b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
4085
4086 b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel");
4087 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4088 launcher.setConst(vertexFaceCapacity);
4089 launcher.setConst(nPairs);
4090 int num = nPairs;
4091 launcher.launch1D(num);
4092 clFinish(m_queue);
4093 }
4094 else
4095 {
4096 float minDist = -1e30f;
4097 float maxDist = 0.02f;
4098
4099 b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
4100 convexData.copyToHost(hostConvexData);
4101 b3AlignedObjectArray<b3Collidable> hostCollidables;
4102 gpuCollidables.copyToHost(hostCollidables);
4103
4104 b3AlignedObjectArray<int> hostHasSepNormals;
4105 m_hasSeparatingNormals.copyToHost(hostHasSepNormals);
4106 b3AlignedObjectArray<b3Vector3> cpuSepNormals;
4107 m_sepNormals.copyToHost(cpuSepNormals);
4108
4109 b3AlignedObjectArray<b3Int4> hostPairs;
4110 pairs->copyToHost(hostPairs);
4111 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4112 bodyBuf->copyToHost(hostBodyBuf);
4113
4114 //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs);
4115 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4116 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4117
4118 b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
4119 clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
4120
4121 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4122 worldNormalsACPU.resize(nPairs);
4123
4124 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4125 worldVertsA1CPU.resize(worldVertsA1GPU.size());
4126
4127 b3AlignedObjectArray<b3Vector3> hostVertices;
4128 gpuVertices.copyToHost(hostVertices);
4129 b3AlignedObjectArray<b3GpuFace> hostFaces;
4130 gpuFaces.copyToHost(hostFaces);
4131 b3AlignedObjectArray<int> hostIndices;
4132 gpuIndices.copyToHost(hostIndices);
4133
4134 for (int i = 0; i < nPairs; i++)
4135 {
4136 int bodyIndexA = hostPairs[i].x;
4137 int bodyIndexB = hostPairs[i].y;
4138
4139 int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
4140 int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
4141
4142 int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
4143 int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
4144
4145 if (hostHasSepNormals[i])
4146 {
4147 b3FindClippingFaces(cpuSepNormals[i],
4148 &hostConvexData[shapeIndexA],
4149 &hostConvexData[shapeIndexB],
4150 hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat,
4151 hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat,
4152 &worldVertsA1CPU.at(0), &worldNormalsACPU.at(0),
4153 &worldVertsB1CPU.at(0),
4154 vertexFaceCapacity, minDist, maxDist,
4155 &hostVertices.at(0), &hostFaces.at(0),
4156 &hostIndices.at(0),
4157 &hostVertices.at(0), &hostFaces.at(0),
4158 &hostIndices.at(0), &clippingFacesOutCPU.at(0), i);
4159 }
4160 }
4161
4162 clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
4163 worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
4164 worldNormalsAGPU.copyFromHost(worldNormalsACPU);
4165 worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
4166 }
4167
4168 ///clip face B against face A, reduce contacts and append them to a global contact array
4169 if (1)
4170 {
4171 if (clipConvexFacesAndFindContactsCPU)
4172 {
4173 //b3AlignedObjectArray<b3Int4> hostPairs;
4174 //pairs->copyToHost(hostPairs);
4175
4176 b3AlignedObjectArray<b3Vector3> hostSepNormals;
4177 m_sepNormals.copyToHost(hostSepNormals);
4178 b3AlignedObjectArray<int> hostHasSepAxis;
4179 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4180
4181 b3AlignedObjectArray<b3Int4> hostClippingFaces;
4182 clippingFacesOutGPU.copyToHost(hostClippingFaces);
4183 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4184 worldVertsB2CPU.resize(vertexFaceCapacity * nPairs);
4185
4186 b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4187 worldVertsA1GPU.copyToHost(worldVertsA1CPU);
4188 b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4189 worldNormalsAGPU.copyToHost(worldNormalsACPU);
4190
4191 b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4192 worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4193
4194 /*
4195 __global const b3Float4* separatingNormals,
4196 __global const int* hasSeparatingAxis,
4197 __global b3Int4* clippingFacesOut,
4198 __global b3Float4* worldVertsA1,
4199 __global b3Float4* worldNormalsA1,
4200 __global b3Float4* worldVertsB1,
4201 __global b3Float4* worldVertsB2,
4202 int vertexFaceCapacity,
4203 int pairIndex
4204 */
4205 for (int i = 0; i < nPairs; i++)
4206 {
4207 clipFacesAndFindContactsKernel(
4208 &hostSepNormals.at(0),
4209 &hostHasSepAxis.at(0),
4210 &hostClippingFaces.at(0),
4211 &worldVertsA1CPU.at(0),
4212 &worldNormalsACPU.at(0),
4213 &worldVertsB1CPU.at(0),
4214 &worldVertsB2CPU.at(0),
4215
4216 vertexFaceCapacity,
4217 i);
4218 }
4219
4220 clippingFacesOutGPU.copyFromHost(hostClippingFaces);
4221 worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
4222 }
4223 else
4224 {
4225 B3_PROFILE("clipFacesAndFindContacts");
4226 //nContacts = m_totalContactsOut.at(0);
4227 //int h = m_hasSeparatingNormals.at(0);
4228 //int4 p = clippingFacesOutGPU.at(0);
4229 b3BufferInfoCL bInfo[] = {
4230 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4231 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4232 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4233 b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4234 b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4235 b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
4236 b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
4237
4238 b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
4239 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4240 launcher.setConst(vertexFaceCapacity);
4241
4242 launcher.setConst(nPairs);
4243 int debugMode = 0;
4244 launcher.setConst(debugMode);
4245 int num = nPairs;
4246 launcher.launch1D(num);
4247 clFinish(m_queue);
4248 }
4249
4250 {
4251 nContacts = m_totalContactsOut.at(0);
4252 //printf("nContacts = %d\n",nContacts);
4253
4254 int newContactCapacity = nContacts + nPairs;
4255 contactOut->reserve(newContactCapacity);
4256
4257 if (reduceConvexContactsOnGPU)
4258 {
4259 {
4260 B3_PROFILE("newContactReductionKernel");
4261 b3BufferInfoCL bInfo[] =
4262 {
4263 b3BufferInfoCL(pairs->getBufferCL(), true),
4264 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4265 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4266 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4267 b3BufferInfoCL(contactOut->getBufferCL()),
4268 b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4269 b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
4270 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4271
4272 b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
4273 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4274 launcher.setConst(vertexFaceCapacity);
4275 launcher.setConst(newContactCapacity);
4276 launcher.setConst(nPairs);
4277 int num = nPairs;
4278
4279 launcher.launch1D(num);
4280 }
4281 nContacts = m_totalContactsOut.at(0);
4282 contactOut->resize(nContacts);
4283 }
4284 else
4285 {
4286 volatile int nGlobalContactsOut = nContacts;
4287 b3AlignedObjectArray<b3Int4> hostPairs;
4288 pairs->copyToHost(hostPairs);
4289 b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4290 bodyBuf->copyToHost(hostBodyBuf);
4291 b3AlignedObjectArray<b3Vector3> hostSepNormals;
4292 m_sepNormals.copyToHost(hostSepNormals);
4293 b3AlignedObjectArray<int> hostHasSepAxis;
4294 m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4295 b3AlignedObjectArray<b3Contact4> hostContactsOut;
4296 contactOut->copyToHost(hostContactsOut);
4297 hostContactsOut.resize(newContactCapacity);
4298
4299 b3AlignedObjectArray<b3Int4> hostClippingFaces;
4300 clippingFacesOutGPU.copyToHost(hostClippingFaces);
4301 b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4302 worldVertsB2GPU.copyToHost(worldVertsB2CPU);
4303
4304 for (int i = 0; i < nPairs; i++)
4305 {
4306 b3NewContactReductionKernel(&hostPairs.at(0),
4307 &hostBodyBuf.at(0),
4308 &hostSepNormals.at(0),
4309 &hostHasSepAxis.at(0),
4310 &hostContactsOut.at(0),
4311 &hostClippingFaces.at(0),
4312 &worldVertsB2CPU.at(0),
4313 &nGlobalContactsOut,
4314 vertexFaceCapacity,
4315 newContactCapacity,
4316 nPairs,
4317 i);
4318 }
4319
4320 nContacts = nGlobalContactsOut;
4321 m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
4322 hostContactsOut.resize(nContacts);
4323 //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4324 contactOut->copyFromHost(hostContactsOut);
4325 }
4326 // b3Contact4 pt = contactOut->at(0);
4327 // printf("nContacts = %d\n",nContacts);
4328 }
4329 }
4330 }
4331 else //breakupKernel
4332 {
4333 if (nPairs)
4334 {
4335 b3BufferInfoCL bInfo[] = {
4336 b3BufferInfoCL(pairs->getBufferCL(), true),
4337 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4338 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4339 b3BufferInfoCL(convexData.getBufferCL(), true),
4340 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4341 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4342 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4343 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4344 b3BufferInfoCL(m_sepNormals.getBufferCL()),
4345 b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4346 b3BufferInfoCL(contactOut->getBufferCL()),
4347 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4348 b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel");
4349 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4350 launcher.setConst(nPairs);
4351 launcher.setConst(maxContactCapacity);
4352
4353 int num = nPairs;
4354 launcher.launch1D(num);
4355 clFinish(m_queue);
4356
4357 nContacts = m_totalContactsOut.at(0);
4358 if (nContacts >= maxContactCapacity)
4359 {
4360 b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
4361 nContacts = maxContactCapacity;
4362 }
4363 contactOut->resize(nContacts);
4364 }
4365 }
4366
4367 int nCompoundsPairs = m_gpuCompoundPairs.size();
4368
4369 if (nCompoundsPairs)
4370 {
4371 b3BufferInfoCL bInfo[] = {
4372 b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
4373 b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4374 b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4375 b3BufferInfoCL(convexData.getBufferCL(), true),
4376 b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4377 b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4378 b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4379 b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4380 b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4381 b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true),
4382 b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true),
4383 b3BufferInfoCL(contactOut->getBufferCL()),
4384 b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4385 b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel");
4386 launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4387 launcher.setConst(nCompoundsPairs);
4388 launcher.setConst(maxContactCapacity);
4389
4390 int num = nCompoundsPairs;
4391 launcher.launch1D(num);
4392 clFinish(m_queue);
4393
4394 nContacts = m_totalContactsOut.at(0);
4395 if (nContacts > maxContactCapacity)
4396 {
4397 b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
4398 nContacts = maxContactCapacity;
4399 }
4400 contactOut->resize(nContacts);
4401 } //if nCompoundsPairs
4402 }
4403 } //contactClippingOnGpu
4404
4405 //printf("nContacts end = %d\n",nContacts);
4406
4407 //printf("frameCount = %d\n",frameCount++);
4408 }
4409