1 /*
2 Bullet Continuous Collision Detection and Physics Library
3 Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
4 
5 This software is provided 'as-is', without any express or implied warranty.
6 In no event will the authors be held liable for any damages arising from the use of this software.
7 Permission is granted to anyone to use this software for any purpose,
8 including commercial applications, and to alter it and redistribute it freely,
9 subject to the following restrictions:
10 
11 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
12 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 3. This notice may not be removed or altered from any source distribution.
14 */
15 
16 bool findSeparatingAxisOnGpu = true;
17 bool splitSearchSepAxisConcave = false;
18 bool splitSearchSepAxisConvex = true;
19 bool useMprGpu = true;  //use mpr for edge-edge  (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling...
20 bool bvhTraversalKernelGPU = true;
21 bool findConcaveSeparatingAxisKernelGPU = true;
22 bool clipConcaveFacesAndFindContactsCPU = false;  //false;//true;
23 bool clipConvexFacesAndFindContactsCPU = false;   //false;//true;
24 bool reduceConcaveContactsOnGPU = true;           //false;
25 bool reduceConvexContactsOnGPU = true;            //false;
26 bool findConvexClippingFacesGPU = true;
27 bool useGjk = false;          ///option for CPU/host testing, when findSeparatingAxisOnGpu = false
28 bool useGjkContacts = false;  //////option for CPU/host testing when findSeparatingAxisOnGpu = false
29 
30 static int myframecount = 0;  ///for testing
31 
32 ///This file was written by Erwin Coumans
33 ///Separating axis rest based on work from Pierre Terdiman, see
34 ///And contact clipping based on work from Simon Hobbs
35 
36 //#define B3_DEBUG_SAT_FACE
37 
38 //#define CHECK_ON_HOST
39 
40 #ifdef CHECK_ON_HOST
41 //#define PERSISTENT_CONTACTS_HOST
42 #endif
43 
44 int b3g_actualSATPairTests = 0;
45 
46 #include "b3ConvexHullContact.h"
47 #include <string.h>  //memcpy
48 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
49 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h"
50 
51 #include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h"
52 #include "Bullet3Geometry/b3AabbUtil.h"
53 
54 typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;
55 
56 #include <float.h>  //for FLT_MAX
57 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
58 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
59 //#include "AdlQuaternion.h"
60 
61 #include "kernels/satKernels.h"
62 #include "kernels/mprKernels.h"
63 
64 #include "kernels/satConcaveKernels.h"
65 
66 #include "kernels/satClipHullContacts.h"
67 #include "kernels/bvhTraversal.h"
68 #include "kernels/primitiveContacts.h"
69 
70 #include "Bullet3Geometry/b3AabbUtil.h"
71 
72 #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl"
73 #define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl"
74 
75 #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl"
76 
77 #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl"
78 #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl"
79 #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl"
80 
81 #ifndef __global
82 #define __global
83 #endif
84 
85 #ifndef __kernel
86 #define __kernel
87 #endif
88 
89 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h"
90 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h"
91 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h"
92 #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h"
93 
94 #define dot3F4 b3Dot
95 
GpuSatCollision(cl_context ctx,cl_device_id device,cl_command_queue q)96 GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q)
97 	: m_context(ctx),
98 	  m_device(device),
99 	  m_queue(q),
100 
101 	  m_findSeparatingAxisKernel(0),
102 	  m_findSeparatingAxisVertexFaceKernel(0),
103 	  m_findSeparatingAxisEdgeEdgeKernel(0),
104 	  m_unitSphereDirections(m_context, m_queue),
105 
106 	  m_totalContactsOut(m_context, m_queue),
107 	  m_sepNormals(m_context, m_queue),
108 	  m_dmins(m_context, m_queue),
109 
110 	  m_hasSeparatingNormals(m_context, m_queue),
111 	  m_concaveSepNormals(m_context, m_queue),
112 	  m_concaveHasSeparatingNormals(m_context, m_queue),
113 	  m_numConcavePairsOut(m_context, m_queue),
114 
115 	  m_gpuCompoundPairs(m_context, m_queue),
116 
117 	  m_gpuCompoundSepNormals(m_context, m_queue),
118 	  m_gpuHasCompoundSepNormals(m_context, m_queue),
119 
120 	  m_numCompoundPairsOut(m_context, m_queue)
121 {
122 	m_totalContactsOut.push_back(0);
123 
124 	cl_int errNum = 0;
125 
126 	if (1)
127 	{
128 		const char* mprSrc = mprKernelsCL;
129 
130 		const char* srcConcave = satConcaveKernelsCL;
131 		char flags[1024] = {0};
132 		//#ifdef CL_PLATFORM_INTEL
133 		//		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl");
134 		//#endif
135 		m_mprPenetrationKernel = 0;
136 		m_findSeparatingAxisUnitSphereKernel = 0;
137 
138 		if (useMprGpu)
139 		{
140 			cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH);
141 			b3Assert(errNum == CL_SUCCESS);
142 
143 			m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg);
144 			b3Assert(m_mprPenetrationKernel);
145 			b3Assert(errNum == CL_SUCCESS);
146 
147 			m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg);
148 			b3Assert(m_findSeparatingAxisUnitSphereKernel);
149 			b3Assert(errNum == CL_SUCCESS);
150 
151 			int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
152 			m_unitSphereDirections.resize(numDirections);
153 			m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true);
154 		}
155 
156 		cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH);
157 		b3Assert(errNum == CL_SUCCESS);
158 
159 		cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH);
160 		b3Assert(errNum == CL_SUCCESS);
161 
162 		m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg);
163 		b3Assert(m_findSeparatingAxisKernel);
164 		b3Assert(errNum == CL_SUCCESS);
165 
166 		m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg);
167 		b3Assert(m_findSeparatingAxisVertexFaceKernel);
168 
169 		m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg);
170 		b3Assert(m_findSeparatingAxisVertexFaceKernel);
171 
172 		m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg);
173 		b3Assert(m_findConcaveSeparatingAxisKernel);
174 		b3Assert(errNum == CL_SUCCESS);
175 
176 		m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg);
177 		b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel);
178 		b3Assert(errNum == CL_SUCCESS);
179 
180 		m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg);
181 		b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel);
182 		b3Assert(errNum == CL_SUCCESS);
183 
184 		m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg);
185 		b3Assert(m_findCompoundPairsKernel);
186 		b3Assert(errNum == CL_SUCCESS);
187 		m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg);
188 		b3Assert(m_processCompoundPairsKernel);
189 		b3Assert(errNum == CL_SUCCESS);
190 	}
191 
192 	if (1)
193 	{
194 		const char* srcClip = satClipKernelsCL;
195 
196 		char flags[1024] = {0};
197 		//#ifdef CL_PLATFORM_INTEL
198 		//		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
199 		//#endif
200 
201 		cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH);
202 		b3Assert(errNum == CL_SUCCESS);
203 
204 		m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg);
205 		b3Assert(errNum == CL_SUCCESS);
206 
207 		m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg);
208 		b3Assert(errNum == CL_SUCCESS);
209 
210 		m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg);
211 		b3Assert(errNum == CL_SUCCESS);
212 
213 		m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg);
214 		b3Assert(errNum == CL_SUCCESS);
215 
216 		m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg);
217 		b3Assert(errNum == CL_SUCCESS);
218 
219 		//		m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
220 		//	b3Assert(errNum==CL_SUCCESS);
221 
222 		m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip,
223 																			   "newContactReductionKernel", &errNum, satClipContactsProg);
224 		b3Assert(errNum == CL_SUCCESS);
225 	}
226 	else
227 	{
228 		m_clipHullHullKernel = 0;
229 		m_clipCompoundsHullHullKernel = 0;
230 		m_findClippingFacesKernel = 0;
231 		m_newContactReductionKernel = 0;
232 		m_clipFacesAndFindContacts = 0;
233 		m_clipHullHullConcaveConvexKernel = 0;
234 		//		m_extractManifoldAndAddContactKernel = 0;
235 	}
236 
237 	if (1)
238 	{
239 		const char* srcBvh = bvhTraversalKernelCL;
240 		cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH);
241 		b3Assert(errNum == CL_SUCCESS);
242 
243 		m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, "");
244 		b3Assert(errNum == CL_SUCCESS);
245 	}
246 
247 	{
248 		const char* primitiveContactsSrc = primitiveContactsKernelsCL;
249 		cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH);
250 		b3Assert(errNum == CL_SUCCESS);
251 
252 		m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, "");
253 		b3Assert(errNum == CL_SUCCESS);
254 
255 		m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg);
256 		b3Assert(errNum == CL_SUCCESS);
257 		b3Assert(m_findConcaveSphereContactsKernel);
258 
259 		m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, "");
260 		b3Assert(errNum == CL_SUCCESS);
261 		b3Assert(m_processCompoundPairsPrimitivesKernel);
262 	}
263 }
264 
~GpuSatCollision()265 GpuSatCollision::~GpuSatCollision()
266 {
267 	if (m_findSeparatingAxisVertexFaceKernel)
268 		clReleaseKernel(m_findSeparatingAxisVertexFaceKernel);
269 
270 	if (m_findSeparatingAxisEdgeEdgeKernel)
271 		clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel);
272 
273 	if (m_findSeparatingAxisUnitSphereKernel)
274 		clReleaseKernel(m_findSeparatingAxisUnitSphereKernel);
275 
276 	if (m_mprPenetrationKernel)
277 		clReleaseKernel(m_mprPenetrationKernel);
278 
279 	if (m_findSeparatingAxisKernel)
280 		clReleaseKernel(m_findSeparatingAxisKernel);
281 
282 	if (m_findConcaveSeparatingAxisVertexFaceKernel)
283 		clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel);
284 
285 	if (m_findConcaveSeparatingAxisEdgeEdgeKernel)
286 		clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel);
287 
288 	if (m_findConcaveSeparatingAxisKernel)
289 		clReleaseKernel(m_findConcaveSeparatingAxisKernel);
290 
291 	if (m_findCompoundPairsKernel)
292 		clReleaseKernel(m_findCompoundPairsKernel);
293 
294 	if (m_processCompoundPairsKernel)
295 		clReleaseKernel(m_processCompoundPairsKernel);
296 
297 	if (m_findClippingFacesKernel)
298 		clReleaseKernel(m_findClippingFacesKernel);
299 
300 	if (m_clipFacesAndFindContacts)
301 		clReleaseKernel(m_clipFacesAndFindContacts);
302 	if (m_newContactReductionKernel)
303 		clReleaseKernel(m_newContactReductionKernel);
304 	if (m_primitiveContactsKernel)
305 		clReleaseKernel(m_primitiveContactsKernel);
306 
307 	if (m_findConcaveSphereContactsKernel)
308 		clReleaseKernel(m_findConcaveSphereContactsKernel);
309 
310 	if (m_processCompoundPairsPrimitivesKernel)
311 		clReleaseKernel(m_processCompoundPairsPrimitivesKernel);
312 
313 	if (m_clipHullHullKernel)
314 		clReleaseKernel(m_clipHullHullKernel);
315 	if (m_clipCompoundsHullHullKernel)
316 		clReleaseKernel(m_clipCompoundsHullHullKernel);
317 
318 	if (m_clipHullHullConcaveConvexKernel)
319 		clReleaseKernel(m_clipHullHullConcaveConvexKernel);
320 	//	if (m_extractManifoldAndAddContactKernel)
321 	//	clReleaseKernel(m_extractManifoldAndAddContactKernel);
322 
323 	if (m_bvhTraversalKernel)
324 		clReleaseKernel(m_bvhTraversalKernel);
325 }
326 
327 struct MyTriangleCallback : public b3NodeOverlapCallback
328 {
329 	int m_bodyIndexA;
330 	int m_bodyIndexB;
331 
processNodeMyTriangleCallback332 	virtual void processNode(int subPart, int triangleIndex)
333 	{
334 		printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB);
335 		printf("triangleIndex %d\n", triangleIndex);
336 	}
337 };
338 
339 #define float4 b3Vector3
340 #define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w)
341 
signedDistanceFromPointToPlane(const float4 & point,const float4 & planeEqn,float4 * closestPointOnFace)342 float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace)
343 {
344 	float4 n = planeEqn;
345 	n[3] = 0.f;
346 	float dist = dot3F4(n, point) + planeEqn[3];
347 	*closestPointOnFace = point - dist * n;
348 	return dist;
349 }
350 
351 #define cross3(a, b) (a.cross(b))
transform(const b3Vector3 * v,const b3Vector3 * pos,const b3Quaternion * orn)352 b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn)
353 {
354 	b3Transform tr;
355 	tr.setIdentity();
356 	tr.setOrigin(*pos);
357 	tr.setRotation(*orn);
358 	b3Vector3 res = tr(*v);
359 	return res;
360 }
361 
IsPointInPolygon(const float4 & p,const b3GpuFace * face,const float4 * baseVertex,const int * convexIndices,float4 * out)362 inline bool IsPointInPolygon(const float4& p,
363 							 const b3GpuFace* face,
364 							 const float4* baseVertex,
365 							 const int* convexIndices,
366 							 float4* out)
367 {
368 	float4 a;
369 	float4 b;
370 	float4 ab;
371 	float4 ap;
372 	float4 v;
373 
374 	float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f);
375 
376 	if (face->m_numIndices < 2)
377 		return false;
378 
379 	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]];
380 	b = v0;
381 
382 	for (unsigned i = 0; i != face->m_numIndices; ++i)
383 	{
384 		a = b;
385 		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];
386 		b = vi;
387 		ab = b - a;
388 		ap = p - a;
389 		v = cross3(ab, plane);
390 
391 		if (b3Dot(ap, v) > 0.f)
392 		{
393 			float ab_m2 = b3Dot(ab, ab);
394 			float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f;
395 			if (rt <= 0.f)
396 			{
397 				*out = a;
398 			}
399 			else if (rt >= 1.f)
400 			{
401 				*out = b;
402 			}
403 			else
404 			{
405 				float s = 1.f - rt;
406 				out[0].x = s * a.x + rt * b.x;
407 				out[0].y = s * a.y + rt * b.y;
408 				out[0].z = s * a.z + rt * b.z;
409 			}
410 			return false;
411 		}
412 	}
413 	return true;
414 }
415 
416 #define normalize3(a) (a.normalize())
417 
extractManifoldSequentialGlobal(const float4 * p,int nPoints,const float4 & nearNormal,b3Int4 * contactIdx)418 int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
419 {
420 	if (nPoints == 0)
421 		return 0;
422 
423 	if (nPoints <= 4)
424 		return nPoints;
425 
426 	if (nPoints > 64)
427 		nPoints = 64;
428 
429 	float4 center = b3MakeVector3(0, 0, 0, 0);
430 	{
431 		for (int i = 0; i < nPoints; i++)
432 			center += p[i];
433 		center /= (float)nPoints;
434 	}
435 
436 	//	sample 4 directions
437 
438 	float4 aVector = p[0] - center;
439 	float4 u = cross3(nearNormal, aVector);
440 	float4 v = cross3(nearNormal, u);
441 	u = normalize3(u);
442 	v = normalize3(v);
443 
444 	//keep point with deepest penetration
445 	float minW = FLT_MAX;
446 
447 	int minIndex = -1;
448 
449 	float4 maxDots;
450 	maxDots.x = FLT_MIN;
451 	maxDots.y = FLT_MIN;
452 	maxDots.z = FLT_MIN;
453 	maxDots.w = FLT_MIN;
454 
455 	//	idx, distance
456 	for (int ie = 0; ie < nPoints; ie++)
457 	{
458 		if (p[ie].w < minW)
459 		{
460 			minW = p[ie].w;
461 			minIndex = ie;
462 		}
463 		float f;
464 		float4 r = p[ie] - center;
465 		f = dot3F4(u, r);
466 		if (f < maxDots.x)
467 		{
468 			maxDots.x = f;
469 			contactIdx[0].x = ie;
470 		}
471 
472 		f = dot3F4(-u, r);
473 		if (f < maxDots.y)
474 		{
475 			maxDots.y = f;
476 			contactIdx[0].y = ie;
477 		}
478 
479 		f = dot3F4(v, r);
480 		if (f < maxDots.z)
481 		{
482 			maxDots.z = f;
483 			contactIdx[0].z = ie;
484 		}
485 
486 		f = dot3F4(-v, r);
487 		if (f < maxDots.w)
488 		{
489 			maxDots.w = f;
490 			contactIdx[0].w = ie;
491 		}
492 	}
493 
494 	if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
495 	{
496 		//replace the first contact with minimum (todo: replace contact with least penetration)
497 		contactIdx[0].x = minIndex;
498 	}
499 
500 	return 4;
501 }
502 
503 #define MAX_VERTS 1024
504 
project(const b3ConvexPolyhedronData & hull,const float4 & pos,const b3Quaternion & orn,const float4 & dir,const b3AlignedObjectArray<b3Vector3> & vertices,b3Scalar & min,b3Scalar & max)505 inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
506 {
507 	min = FLT_MAX;
508 	max = -FLT_MAX;
509 	int numVerts = hull.m_numVertices;
510 
511 	const float4 localDir = b3QuatRotate(orn.inverse(), dir);
512 
513 	b3Scalar offset = dot3F4(pos, dir);
514 
515 	for (int i = 0; i < numVerts; i++)
516 	{
517 		//b3Vector3 pt = trans * vertices[m_vertexOffset+i];
518 		//b3Scalar dp = pt.dot(dir);
519 		//b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
520 		b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir);
521 		//b3Assert(dp==dpL);
522 		if (dp < min) min = dp;
523 		if (dp > max) max = dp;
524 	}
525 	if (min > max)
526 	{
527 		b3Scalar tmp = min;
528 		min = max;
529 		max = tmp;
530 	}
531 	min += offset;
532 	max += offset;
533 }
534 
TestSepAxis(const b3ConvexPolyhedronData & hullA,const b3ConvexPolyhedronData & hullB,const float4 & posA,const b3Quaternion & ornA,const float4 & posB,const b3Quaternion & ornB,const float4 & sep_axis,const b3AlignedObjectArray<b3Vector3> & verticesA,const b3AlignedObjectArray<b3Vector3> & verticesB,b3Scalar & depth)535 static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
536 						const float4& posA, const b3Quaternion& ornA,
537 						const float4& posB, const b3Quaternion& ornB,
538 						const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth)
539 {
540 	b3Scalar Min0, Max0;
541 	b3Scalar Min1, Max1;
542 	project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0);
543 	project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1);
544 
545 	if (Max0 < Min1 || Max1 < Min0)
546 		return false;
547 
548 	b3Scalar d0 = Max0 - Min1;
549 	assert(d0 >= 0.0f);
550 	b3Scalar d1 = Max1 - Min0;
551 	assert(d1 >= 0.0f);
552 	depth = d0 < d1 ? d0 : d1;
553 	return true;
554 }
555 
IsAlmostZero(const b3Vector3 & v)556 inline bool IsAlmostZero(const b3Vector3& v)
557 {
558 	if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false;
559 	return true;
560 }
561 
findSeparatingAxis(const b3ConvexPolyhedronData & hullA,const b3ConvexPolyhedronData & hullB,const float4 & posA1,const b3Quaternion & ornA,const float4 & posB1,const b3Quaternion & ornB,const b3AlignedObjectArray<b3Vector3> & verticesA,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,const b3AlignedObjectArray<b3Vector3> & verticesB,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesB,const b3AlignedObjectArray<b3GpuFace> & facesB,const b3AlignedObjectArray<int> & indicesB,b3Vector3 & sep)562 static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
563 							   const float4& posA1,
564 							   const b3Quaternion& ornA,
565 							   const float4& posB1,
566 							   const b3Quaternion& ornB,
567 							   const b3AlignedObjectArray<b3Vector3>& verticesA,
568 							   const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
569 							   const b3AlignedObjectArray<b3GpuFace>& facesA,
570 							   const b3AlignedObjectArray<int>& indicesA,
571 							   const b3AlignedObjectArray<b3Vector3>& verticesB,
572 							   const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
573 							   const b3AlignedObjectArray<b3GpuFace>& facesB,
574 							   const b3AlignedObjectArray<int>& indicesB,
575 
576 							   b3Vector3& sep)
577 {
578 	B3_PROFILE("findSeparatingAxis");
579 
580 	b3g_actualSATPairTests++;
581 	float4 posA = posA1;
582 	posA.w = 0.f;
583 	float4 posB = posB1;
584 	posB.w = 0.f;
585 	//#ifdef TEST_INTERNAL_OBJECTS
586 	float4 c0local = (float4&)hullA.m_localCenter;
587 	float4 c0 = transform(&c0local, &posA, &ornA);
588 	float4 c1local = (float4&)hullB.m_localCenter;
589 	float4 c1 = transform(&c1local, &posB, &ornB);
590 	const float4 deltaC2 = c0 - c1;
591 	//#endif
592 
593 	b3Scalar dmin = FLT_MAX;
594 	int curPlaneTests = 0;
595 
596 	int numFacesA = hullA.m_numFaces;
597 	// Test normals from hullA
598 	for (int i = 0; i < numFacesA; i++)
599 	{
600 		const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane;
601 		float4 faceANormalWS = b3QuatRotate(ornA, normal);
602 
603 		if (dot3F4(deltaC2, faceANormalWS) < 0)
604 			faceANormalWS *= -1.f;
605 
606 		curPlaneTests++;
607 #ifdef TEST_INTERNAL_OBJECTS
608 		gExpectedNbTests++;
609 		if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
610 			continue;
611 		gActualNbTests++;
612 #endif
613 
614 		b3Scalar d;
615 		if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d))
616 			return false;
617 
618 		if (d < dmin)
619 		{
620 			dmin = d;
621 			sep = (b3Vector3&)faceANormalWS;
622 		}
623 	}
624 
625 	int numFacesB = hullB.m_numFaces;
626 	// Test normals from hullB
627 	for (int i = 0; i < numFacesB; i++)
628 	{
629 		float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane;
630 		float4 WorldNormal = b3QuatRotate(ornB, normal);
631 
632 		if (dot3F4(deltaC2, WorldNormal) < 0)
633 		{
634 			WorldNormal *= -1.f;
635 		}
636 		curPlaneTests++;
637 #ifdef TEST_INTERNAL_OBJECTS
638 		gExpectedNbTests++;
639 		if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin))
640 			continue;
641 		gActualNbTests++;
642 #endif
643 
644 		b3Scalar d;
645 		if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d))
646 			return false;
647 
648 		if (d < dmin)
649 		{
650 			dmin = d;
651 			sep = (b3Vector3&)WorldNormal;
652 		}
653 	}
654 
655 	int curEdgeEdge = 0;
656 	// Test edges
657 	for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++)
658 	{
659 		const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0];
660 		float4 edge0World = b3QuatRotate(ornA, (float4&)edge0);
661 
662 		for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++)
663 		{
664 			const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1];
665 			float4 edge1World = b3QuatRotate(ornB, (float4&)edge1);
666 
667 			float4 crossje = cross3(edge0World, edge1World);
668 
669 			curEdgeEdge++;
670 			if (!IsAlmostZero((b3Vector3&)crossje))
671 			{
672 				crossje = normalize3(crossje);
673 				if (dot3F4(deltaC2, crossje) < 0)
674 					crossje *= -1.f;
675 
676 #ifdef TEST_INTERNAL_OBJECTS
677 				gExpectedNbTests++;
678 				if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin))
679 					continue;
680 				gActualNbTests++;
681 #endif
682 
683 				b3Scalar dist;
684 				if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist))
685 					return false;
686 
687 				if (dist < dmin)
688 				{
689 					dmin = dist;
690 					sep = (b3Vector3&)crossje;
691 				}
692 			}
693 		}
694 	}
695 
696 	if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f)
697 		sep = -sep;
698 
699 	return true;
700 }
701 
findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData * hullA,__global const b3ConvexPolyhedronData * hullB,const b3Float4 & posA1,const b3Quat & ornA,const b3Float4 & posB1,const b3Quat & ornB,const b3Float4 & DeltaC2,__global const b3AlignedObjectArray<float4> & vertices,__global const b3AlignedObjectArray<float4> & uniqueEdges,__global const b3AlignedObjectArray<b3GpuFace> & faces,__global const b3AlignedObjectArray<int> & indices,float4 * sep,float * dmin)702 bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,
703 								const b3Float4& posA1,
704 								const b3Quat& ornA,
705 								const b3Float4& posB1,
706 								const b3Quat& ornB,
707 								const b3Float4& DeltaC2,
708 								__global const b3AlignedObjectArray<float4>& vertices,
709 								__global const b3AlignedObjectArray<float4>& uniqueEdges,
710 								__global const b3AlignedObjectArray<b3GpuFace>& faces,
711 								__global const b3AlignedObjectArray<int>& indices,
712 								float4* sep,
713 								float* dmin)
714 {
715 	//	int i = get_global_id(0);
716 
717 	float4 posA = posA1;
718 	posA.w = 0.f;
719 	float4 posB = posB1;
720 	posB.w = 0.f;
721 
722 	//int curPlaneTests=0;
723 
724 	int curEdgeEdge = 0;
725 	// Test edges
726 	for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++)
727 	{
728 		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0];
729 		float4 edge0World = b3QuatRotate(ornA, edge0);
730 
731 		for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++)
732 		{
733 			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1];
734 			float4 edge1World = b3QuatRotate(ornB, edge1);
735 
736 			float4 crossje = cross3(edge0World, edge1World);
737 
738 			curEdgeEdge++;
739 			if (!IsAlmostZero(crossje))
740 			{
741 				crossje = normalize3(crossje);
742 				if (dot3F4(DeltaC2, crossje) < 0)
743 					crossje *= -1.f;
744 
745 				float dist;
746 				bool result = true;
747 				{
748 					float Min0, Max0;
749 					float Min1, Max1;
750 					project(*hullA, posA, ornA, crossje, vertices, Min0, Max0);
751 					project(*hullB, posB, ornB, crossje, vertices, Min1, Max1);
752 
753 					if (Max0 < Min1 || Max1 < Min0)
754 						result = false;
755 
756 					float d0 = Max0 - Min1;
757 					float d1 = Max1 - Min0;
758 					dist = d0 < d1 ? d0 : d1;
759 					result = true;
760 				}
761 
762 				if (dist < *dmin)
763 				{
764 					*dmin = dist;
765 					*sep = crossje;
766 				}
767 			}
768 		}
769 	}
770 
771 	if ((dot3F4(-DeltaC2, *sep)) > 0.0f)
772 	{
773 		*sep = -(*sep);
774 	}
775 	return true;
776 }
777 
lerp3(const float4 & a,const float4 & b,float t)778 __inline float4 lerp3(const float4& a, const float4& b, float t)
779 {
780 	return b3MakeVector3(a.x + (b.x - a.x) * t,
781 						 a.y + (b.y - a.y) * t,
782 						 a.z + (b.z - a.z) * t,
783 						 0.f);
784 }
785 
786 // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut
clipFace(const float4 * pVtxIn,int numVertsIn,float4 & planeNormalWS,float planeEqWS,float4 * ppVtxOut)787 int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut)
788 {
789 	int ve;
790 	float ds, de;
791 	int numVertsOut = 0;
792 	if (numVertsIn < 2)
793 		return 0;
794 
795 	float4 firstVertex = pVtxIn[numVertsIn - 1];
796 	float4 endVertex = pVtxIn[0];
797 
798 	ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS;
799 
800 	for (ve = 0; ve < numVertsIn; ve++)
801 	{
802 		endVertex = pVtxIn[ve];
803 
804 		de = dot3F4(planeNormalWS, endVertex) + planeEqWS;
805 
806 		if (ds < 0)
807 		{
808 			if (de < 0)
809 			{
810 				// Start < 0, end < 0, so output endVertex
811 				ppVtxOut[numVertsOut++] = endVertex;
812 			}
813 			else
814 			{
815 				// Start < 0, end >= 0, so output intersection
816 				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
817 			}
818 		}
819 		else
820 		{
821 			if (de < 0)
822 			{
823 				// Start >= 0, end < 0 so output intersection and end
824 				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));
825 				ppVtxOut[numVertsOut++] = endVertex;
826 			}
827 		}
828 		firstVertex = endVertex;
829 		ds = de;
830 	}
831 	return numVertsOut;
832 }
833 
clipFaceAgainstHull(const float4 & separatingNormal,const b3ConvexPolyhedronData * hullA,const float4 & posA,const b3Quaternion & ornA,float4 * worldVertsB1,int numWorldVertsB1,float4 * worldVertsB2,int capacityWorldVertsB2,const float minDist,float maxDist,const b3AlignedObjectArray<float4> & verticesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,float4 * contactsOut,int contactCapacity)834 int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA,
835 						const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1,
836 						float4* worldVertsB2, int capacityWorldVertsB2,
837 						const float minDist, float maxDist,
838 						const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
839 						//const float4* verticesB,	const b3GpuFace* facesB,	const int* indicesB,
840 						float4* contactsOut,
841 						int contactCapacity)
842 {
843 	int numContactsOut = 0;
844 
845 	float4* pVtxIn = worldVertsB1;
846 	float4* pVtxOut = worldVertsB2;
847 
848 	int numVertsIn = numWorldVertsB1;
849 	int numVertsOut = 0;
850 
851 	int closestFaceA = -1;
852 	{
853 		float dmin = FLT_MAX;
854 		for (int face = 0; face < hullA->m_numFaces; face++)
855 		{
856 			const float4 Normal = b3MakeVector3(
857 				facesA[hullA->m_faceOffset + face].m_plane.x,
858 				facesA[hullA->m_faceOffset + face].m_plane.y,
859 				facesA[hullA->m_faceOffset + face].m_plane.z, 0.f);
860 			const float4 faceANormalWS = b3QuatRotate(ornA, Normal);
861 
862 			float d = dot3F4(faceANormalWS, separatingNormal);
863 			if (d < dmin)
864 			{
865 				dmin = d;
866 				closestFaceA = face;
867 			}
868 		}
869 	}
870 	if (closestFaceA < 0)
871 		return numContactsOut;
872 
873 	b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA];
874 
875 	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
876 	//	int numContacts = numWorldVertsB1;
877 	int numVerticesA = polyA.m_numIndices;
878 	for (int e0 = 0; e0 < numVerticesA; e0++)
879 	{
880 		const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]];
881 		const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]];
882 		const float4 edge0 = a - b;
883 		const float4 WorldEdge0 = b3QuatRotate(ornA, edge0);
884 		float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
885 		float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA);
886 
887 		float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1);
888 		float4 worldA1 = transform(&a, &posA, &ornA);
889 		float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1);
890 
891 		float4 planeNormalWS = planeNormalWS1;
892 		float planeEqWS = planeEqWS1;
893 
894 		//clip face
895 		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
896 		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut);
897 
898 		//btSwap(pVtxIn,pVtxOut);
899 		float4* tmp = pVtxOut;
900 		pVtxOut = pVtxIn;
901 		pVtxIn = tmp;
902 		numVertsIn = numVertsOut;
903 		numVertsOut = 0;
904 	}
905 
906 	// only keep points that are behind the witness face
907 	{
908 		float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);
909 		float localPlaneEq = polyA.m_plane.w;
910 		float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal);
911 		float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA);
912 		for (int i = 0; i < numVertsIn; i++)
913 		{
914 			float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS;
915 			if (depth <= minDist)
916 			{
917 				depth = minDist;
918 			}
919 			if (numContactsOut < contactCapacity)
920 			{
921 				if (depth <= maxDist)
922 				{
923 					float4 pointInWorld = pVtxIn[i];
924 					//resultOut.addContactPoint(separatingNormal,point,depth);
925 					contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);
926 					//printf("depth=%f\n",depth);
927 				}
928 			}
929 			else
930 			{
931 				b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity);
932 			}
933 		}
934 	}
935 
936 	return numContactsOut;
937 }
938 
clipHullAgainstHull(const float4 & separatingNormal,const b3ConvexPolyhedronData & hullA,const b3ConvexPolyhedronData & hullB,const float4 & posA,const b3Quaternion & ornA,const float4 & posB,const b3Quaternion & ornB,float4 * worldVertsB1,float4 * worldVertsB2,int capacityWorldVerts,const float minDist,float maxDist,const b3AlignedObjectArray<float4> & verticesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,const b3AlignedObjectArray<float4> & verticesB,const b3AlignedObjectArray<b3GpuFace> & facesB,const b3AlignedObjectArray<int> & indicesB,float4 * contactsOut,int contactCapacity)939 static int clipHullAgainstHull(const float4& separatingNormal,
940 							   const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,
941 							   const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB,
942 							   float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,
943 							   const float minDist, float maxDist,
944 							   const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA,
945 							   const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB,
946 
947 							   float4* contactsOut,
948 							   int contactCapacity)
949 {
950 	int numContactsOut = 0;
951 	int numWorldVertsB1 = 0;
952 
953 	B3_PROFILE("clipHullAgainstHull");
954 
955 	//	float curMaxDist=maxDist;
956 	int closestFaceB = -1;
957 	float dmax = -FLT_MAX;
958 
959 	{
960 		//B3_PROFILE("closestFaceB");
961 		if (hullB.m_numFaces != 1)
962 		{
963 			//printf("wtf\n");
964 		}
965 		static bool once = true;
966 		//printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z);
967 
968 		for (int face = 0; face < hullB.m_numFaces; face++)
969 		{
970 #ifdef BT_DEBUG_SAT_FACE
971 			if (once)
972 				printf("face %d\n", face);
973 			const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face];
974 			if (once)
975 			{
976 				for (int i = 0; i < faceB->m_numIndices; i++)
977 				{
978 					float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]];
979 					printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z);
980 				}
981 			}
982 #endif  //BT_DEBUG_SAT_FACE \
983 	//if (facesB[hullB.m_faceOffset+face].m_numIndices>2)
984 			{
985 				const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x,
986 													facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f);
987 				const float4 WorldNormal = b3QuatRotate(ornB, Normal);
988 #ifdef BT_DEBUG_SAT_FACE
989 				if (once)
990 					printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z);
991 #endif
992 				float d = dot3F4(WorldNormal, separatingNormal);
993 				if (d > dmax)
994 				{
995 					dmax = d;
996 					closestFaceB = face;
997 				}
998 			}
999 		}
1000 		once = false;
1001 	}
1002 
1003 	b3Assert(closestFaceB >= 0);
1004 	{
1005 		//B3_PROFILE("worldVertsB1");
1006 		const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB];
1007 		const int numVertices = polyB.m_numIndices;
1008 		for (int e0 = 0; e0 < numVertices; e0++)
1009 		{
1010 			const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]];
1011 			worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB);
1012 		}
1013 	}
1014 
1015 	if (closestFaceB >= 0)
1016 	{
1017 		//B3_PROFILE("clipFaceAgainstHull");
1018 		numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA,
1019 											 posA, ornA,
1020 											 worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist,
1021 											 verticesA, facesA, indicesA,
1022 											 contactsOut, contactCapacity);
1023 	}
1024 
1025 	return numContactsOut;
1026 }
1027 
1028 #define PARALLEL_SUM(v, n) \
1029 	for (int j = 1; j < n; j++) v[0] += v[j];
1030 #define PARALLEL_DO(execution, n)  \
1031 	for (int ie = 0; ie < n; ie++) \
1032 	{                              \
1033 		execution;                 \
1034 	}
1035 #define REDUCE_MAX(v, n)                                                                                     \
1036 	{                                                                                                        \
1037 		int i = 0;                                                                                           \
1038 		for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \
1039 	}
1040 #define REDUCE_MIN(v, n)                                                                                     \
1041 	{                                                                                                        \
1042 		int i = 0;                                                                                           \
1043 		for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \
1044 	}
1045 
extractManifold(const float4 * p,int nPoints,const float4 & nearNormal,b3Int4 * contactIdx)1046 int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)
1047 {
1048 	if (nPoints == 0)
1049 		return 0;
1050 
1051 	if (nPoints <= 4)
1052 		return nPoints;
1053 
1054 	if (nPoints > 64)
1055 		nPoints = 64;
1056 
1057 	float4 center = make_float4(0, 0, 0, 0);
1058 	{
1059 		for (int i = 0; i < nPoints; i++)
1060 			center += p[i];
1061 		center /= (float)nPoints;
1062 	}
1063 
1064 	//	sample 4 directions
1065 
1066 	float4 aVector = p[0] - center;
1067 	float4 u = cross3(nearNormal, aVector);
1068 	float4 v = cross3(nearNormal, u);
1069 	u = normalize3(u);
1070 	v = normalize3(v);
1071 
1072 	//keep point with deepest penetration
1073 	float minW = FLT_MAX;
1074 
1075 	int minIndex = -1;
1076 
1077 	float4 maxDots;
1078 	maxDots.x = FLT_MIN;
1079 	maxDots.y = FLT_MIN;
1080 	maxDots.z = FLT_MIN;
1081 	maxDots.w = FLT_MIN;
1082 
1083 	//	idx, distance
1084 	for (int ie = 0; ie < nPoints; ie++)
1085 	{
1086 		if (p[ie].w < minW)
1087 		{
1088 			minW = p[ie].w;
1089 			minIndex = ie;
1090 		}
1091 		float f;
1092 		float4 r = p[ie] - center;
1093 		f = dot3F4(u, r);
1094 		if (f < maxDots.x)
1095 		{
1096 			maxDots.x = f;
1097 			contactIdx[0].x = ie;
1098 		}
1099 
1100 		f = dot3F4(-u, r);
1101 		if (f < maxDots.y)
1102 		{
1103 			maxDots.y = f;
1104 			contactIdx[0].y = ie;
1105 		}
1106 
1107 		f = dot3F4(v, r);
1108 		if (f < maxDots.z)
1109 		{
1110 			maxDots.z = f;
1111 			contactIdx[0].z = ie;
1112 		}
1113 
1114 		f = dot3F4(-v, r);
1115 		if (f < maxDots.w)
1116 		{
1117 			maxDots.w = f;
1118 			contactIdx[0].w = ie;
1119 		}
1120 	}
1121 
1122 	if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)
1123 	{
1124 		//replace the first contact with minimum (todo: replace contact with least penetration)
1125 		contactIdx[0].x = minIndex;
1126 	}
1127 
1128 	return 4;
1129 }
1130 
clipHullHullSingle(int bodyIndexA,int bodyIndexB,const float4 & posA,const b3Quaternion & ornA,const float4 & posB,const b3Quaternion & ornB,int collidableIndexA,int collidableIndexB,const b3AlignedObjectArray<b3RigidBodyData> * bodyBuf,b3AlignedObjectArray<b3Contact4> * globalContactOut,int & nContacts,const b3AlignedObjectArray<b3ConvexPolyhedronData> & hostConvexDataA,const b3AlignedObjectArray<b3ConvexPolyhedronData> & hostConvexDataB,const b3AlignedObjectArray<b3Vector3> & verticesA,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesA,const b3AlignedObjectArray<b3GpuFace> & facesA,const b3AlignedObjectArray<int> & indicesA,const b3AlignedObjectArray<b3Vector3> & verticesB,const b3AlignedObjectArray<b3Vector3> & uniqueEdgesB,const b3AlignedObjectArray<b3GpuFace> & facesB,const b3AlignedObjectArray<int> & indicesB,const b3AlignedObjectArray<b3Collidable> & hostCollidablesA,const b3AlignedObjectArray<b3Collidable> & hostCollidablesB,const b3Vector3 & sepNormalWorldSpace,int maxContactCapacity)1131 int clipHullHullSingle(
1132 	int bodyIndexA, int bodyIndexB,
1133 	const float4& posA,
1134 	const b3Quaternion& ornA,
1135 	const float4& posB,
1136 	const b3Quaternion& ornB,
1137 
1138 	int collidableIndexA, int collidableIndexB,
1139 
1140 	const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,
1141 	b3AlignedObjectArray<b3Contact4>* globalContactOut,
1142 	int& nContacts,
1143 
1144 	const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA,
1145 	const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB,
1146 
1147 	const b3AlignedObjectArray<b3Vector3>& verticesA,
1148 	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,
1149 	const b3AlignedObjectArray<b3GpuFace>& facesA,
1150 	const b3AlignedObjectArray<int>& indicesA,
1151 
1152 	const b3AlignedObjectArray<b3Vector3>& verticesB,
1153 	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,
1154 	const b3AlignedObjectArray<b3GpuFace>& facesB,
1155 	const b3AlignedObjectArray<int>& indicesB,
1156 
1157 	const b3AlignedObjectArray<b3Collidable>& hostCollidablesA,
1158 	const b3AlignedObjectArray<b3Collidable>& hostCollidablesB,
1159 	const b3Vector3& sepNormalWorldSpace,
1160 	int maxContactCapacity)
1161 {
1162 	int contactIndex = -1;
1163 	b3ConvexPolyhedronData hullA, hullB;
1164 
1165 	b3Collidable colA = hostCollidablesA[collidableIndexA];
1166 	hullA = hostConvexDataA[colA.m_shapeIndex];
1167 	//printf("numvertsA = %d\n",hullA.m_numVertices);
1168 
1169 	b3Collidable colB = hostCollidablesB[collidableIndexB];
1170 	hullB = hostConvexDataB[colB.m_shapeIndex];
1171 	//printf("numvertsB = %d\n",hullB.m_numVertices);
1172 
1173 	float4 contactsOut[MAX_VERTS];
1174 	int localContactCapacity = MAX_VERTS;
1175 
1176 #ifdef _WIN32
1177 	b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));
1178 	b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));
1179 #endif
1180 
1181 	{
1182 		float4 worldVertsB1[MAX_VERTS];
1183 		float4 worldVertsB2[MAX_VERTS];
1184 		int capacityWorldVerts = MAX_VERTS;
1185 
1186 		float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f);
1187 		int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;
1188 		int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;
1189 
1190 		b3Scalar minDist = -1;
1191 		b3Scalar maxDist = 0.;
1192 
1193 		b3Transform trA, trB;
1194 		{
1195 			//B3_PROFILE("transform computation");
1196 			//trA.setIdentity();
1197 			trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z));
1198 			trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w));
1199 
1200 			//trB.setIdentity();
1201 			trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z));
1202 			trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w));
1203 		}
1204 
1205 		b3Quaternion trAorn = trA.getRotation();
1206 		b3Quaternion trBorn = trB.getRotation();
1207 
1208 		int numContactsOut = clipHullAgainstHull(hostNormal,
1209 												 hostConvexDataA.at(shapeA),
1210 												 hostConvexDataB.at(shapeB),
1211 												 (float4&)trA.getOrigin(), (b3Quaternion&)trAorn,
1212 												 (float4&)trB.getOrigin(), (b3Quaternion&)trBorn,
1213 												 worldVertsB1, worldVertsB2, capacityWorldVerts,
1214 												 minDist, maxDist,
1215 												 verticesA, facesA, indicesA,
1216 												 verticesB, facesB, indicesB,
1217 
1218 												 contactsOut, localContactCapacity);
1219 
1220 		if (numContactsOut > 0)
1221 		{
1222 			B3_PROFILE("overlap");
1223 
1224 			float4 normalOnSurfaceB = (float4&)hostNormal;
1225 
1226 			b3Int4 contactIdx;
1227 			contactIdx.x = 0;
1228 			contactIdx.y = 1;
1229 			contactIdx.z = 2;
1230 			contactIdx.w = 3;
1231 
1232 			int numPoints = 0;
1233 
1234 			{
1235 				//	B3_PROFILE("extractManifold");
1236 				numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);
1237 			}
1238 
1239 			b3Assert(numPoints);
1240 
1241 			if (nContacts < maxContactCapacity)
1242 			{
1243 				contactIndex = nContacts;
1244 				globalContactOut->expand();
1245 				b3Contact4& contact = globalContactOut->at(nContacts);
1246 				contact.m_batchIdx = 0;  //i;
1247 				contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
1248 				contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
1249 
1250 				contact.m_frictionCoeffCmp = 45874;
1251 				contact.m_restituitionCoeffCmp = 0;
1252 
1253 				//			float distance = 0.f;
1254 				for (int p = 0; p < numPoints; p++)
1255 				{
1256 					contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];  //check if it is actually on B
1257 					contact.m_worldNormalOnB = normalOnSurfaceB;
1258 				}
1259 				//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
1260 				contact.m_worldNormalOnB.w = (b3Scalar)numPoints;
1261 				nContacts++;
1262 			}
1263 			else
1264 			{
1265 				b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
1266 			}
1267 		}
1268 	}
1269 	return contactIndex;
1270 }
1271 
computeContactPlaneConvex(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3Vector3 * convexVertices,const int * convexIndices,const b3GpuFace * faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity)1272 void computeContactPlaneConvex(int pairIndex,
1273 							   int bodyIndexA, int bodyIndexB,
1274 							   int collidableIndexA, int collidableIndexB,
1275 							   const b3RigidBodyData* rigidBodies,
1276 							   const b3Collidable* collidables,
1277 							   const b3ConvexPolyhedronData* convexShapes,
1278 							   const b3Vector3* convexVertices,
1279 							   const int* convexIndices,
1280 							   const b3GpuFace* faces,
1281 							   b3Contact4* globalContactsOut,
1282 							   int& nGlobalContactsOut,
1283 							   int maxContactCapacity)
1284 {
1285 	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
1286 	const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex];
1287 
1288 	b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
1289 	b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
1290 	b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
1291 	b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
1292 
1293 	//	int numContactsOut = 0;
1294 	//	int numWorldVertsB1= 0;
1295 
1296 	b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
1297 	b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
1298 	b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
1299 	float planeConstant = planeEq.w;
1300 	b3Transform convexWorldTransform;
1301 	convexWorldTransform.setIdentity();
1302 	convexWorldTransform.setOrigin(posB);
1303 	convexWorldTransform.setRotation(ornB);
1304 	b3Transform planeTransform;
1305 	planeTransform.setIdentity();
1306 	planeTransform.setOrigin(posA);
1307 	planeTransform.setRotation(ornA);
1308 
1309 	b3Transform planeInConvex;
1310 	planeInConvex = convexWorldTransform.inverse() * planeTransform;
1311 	b3Transform convexInPlane;
1312 	convexInPlane = planeTransform.inverse() * convexWorldTransform;
1313 
1314 	b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
1315 	float maxDot = -1e30;
1316 	int hitVertex = -1;
1317 	b3Vector3 hitVtx;
1318 
1319 #define MAX_PLANE_CONVEX_POINTS 64
1320 
1321 	b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
1322 	int numPoints = 0;
1323 
1324 	b3Int4 contactIdx;
1325 	contactIdx.s[0] = 0;
1326 	contactIdx.s[1] = 1;
1327 	contactIdx.s[2] = 2;
1328 	contactIdx.s[3] = 3;
1329 
1330 	for (int i = 0; i < hullB->m_numVertices; i++)
1331 	{
1332 		b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
1333 		float curDot = vtx.dot(planeNormalInConvex);
1334 
1335 		if (curDot > maxDot)
1336 		{
1337 			hitVertex = i;
1338 			maxDot = curDot;
1339 			hitVtx = vtx;
1340 			//make sure the deepest points is always included
1341 			if (numPoints == MAX_PLANE_CONVEX_POINTS)
1342 				numPoints--;
1343 		}
1344 
1345 		if (numPoints < MAX_PLANE_CONVEX_POINTS)
1346 		{
1347 			b3Vector3 vtxWorld = convexWorldTransform * vtx;
1348 			b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
1349 			float dist = planeNormal.dot(vtxInPlane) - planeConstant;
1350 			if (dist < 0.f)
1351 			{
1352 				vtxWorld.w = dist;
1353 				contactPoints[numPoints] = vtxWorld;
1354 				numPoints++;
1355 			}
1356 		}
1357 	}
1358 
1359 	int numReducedPoints = 0;
1360 
1361 	numReducedPoints = numPoints;
1362 
1363 	if (numPoints > 4)
1364 	{
1365 		numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
1366 	}
1367 	int dstIdx;
1368 	//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
1369 
1370 	if (numReducedPoints > 0)
1371 	{
1372 		if (nGlobalContactsOut < maxContactCapacity)
1373 		{
1374 			dstIdx = nGlobalContactsOut;
1375 			nGlobalContactsOut++;
1376 
1377 			b3Contact4* c = &globalContactsOut[dstIdx];
1378 			c->m_worldNormalOnB = -planeNormalWorld;
1379 			c->setFrictionCoeff(0.7);
1380 			c->setRestituitionCoeff(0.f);
1381 
1382 			c->m_batchIdx = pairIndex;
1383 			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
1384 			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
1385 			for (int i = 0; i < numReducedPoints; i++)
1386 			{
1387 				b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
1388 				c->m_worldPosB[i] = pOnB1;
1389 			}
1390 			c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
1391 		}  //if (dstIdx < numPairs)
1392 	}
1393 
1394 	//	printf("computeContactPlaneConvex\n");
1395 }
1396 
MyUnQuantize(const unsigned short * vecIn,const b3Vector3 & quantization,const b3Vector3 & bvhAabbMin)1397 B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin)
1398 {
1399 	b3Vector3 vecOut;
1400 	vecOut.setValue(
1401 		(b3Scalar)(vecIn[0]) / (quantization.x),
1402 		(b3Scalar)(vecIn[1]) / (quantization.y),
1403 		(b3Scalar)(vecIn[2]) / (quantization.z));
1404 	vecOut += bvhAabbMin;
1405 	return vecOut;
1406 }
1407 
traverseTreeTree()1408 void traverseTreeTree()
1409 {
1410 }
1411 
1412 #include "Bullet3Common/shared/b3Mat3x3.h"
1413 
1414 int numAabbChecks = 0;
1415 int maxNumAabbChecks = 0;
1416 int maxDepth = 0;
1417 
1418 // work-in-progress
findCompoundPairsKernel(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,__global const b3RigidBodyData * rigidBodies,__global const b3Collidable * collidables,__global const b3ConvexPolyhedronData * convexShapes,__global const b3AlignedObjectArray<b3Float4> & vertices,__global const b3AlignedObjectArray<b3Aabb> & aabbsWorldSpace,__global const b3AlignedObjectArray<b3Aabb> & aabbsLocalSpace,__global const b3GpuChildShape * gpuChildShapes,__global b3Int4 * gpuCompoundPairsOut,__global int * numCompoundPairsOut,int maxNumCompoundPairsCapacity,b3AlignedObjectArray<b3QuantizedBvhNode> & treeNodesCPU,b3AlignedObjectArray<b3BvhSubtreeInfo> & subTreesCPU,b3AlignedObjectArray<b3BvhInfo> & bvhInfoCPU)1419 __kernel void findCompoundPairsKernel(
1420 	int pairIndex,
1421 	int bodyIndexA,
1422 	int bodyIndexB,
1423 	int collidableIndexA,
1424 	int collidableIndexB,
1425 	__global const b3RigidBodyData* rigidBodies,
1426 	__global const b3Collidable* collidables,
1427 	__global const b3ConvexPolyhedronData* convexShapes,
1428 	__global const b3AlignedObjectArray<b3Float4>& vertices,
1429 	__global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace,
1430 	__global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace,
1431 	__global const b3GpuChildShape* gpuChildShapes,
1432 	__global b3Int4* gpuCompoundPairsOut,
1433 	__global int* numCompoundPairsOut,
1434 	int maxNumCompoundPairsCapacity,
1435 	b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
1436 	b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
1437 	b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
1438 {
1439 	numAabbChecks = 0;
1440 	maxNumAabbChecks = 0;
1441 	//	int i = pairIndex;
1442 	{
1443 		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1444 		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1445 
1446 		//once the broadphase avoids static-static pairs, we can remove this test
1447 		if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0))
1448 		{
1449 			return;
1450 		}
1451 
1452 		if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1453 		{
1454 			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
1455 			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
1456 			int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees;
1457 			int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;
1458 			int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset;
1459 
1460 			int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees;
1461 
1462 			float4 posA = rigidBodies[bodyIndexA].m_pos;
1463 			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1464 
1465 			b3Transform transA;
1466 			transA.setIdentity();
1467 			transA.setOrigin(posA);
1468 			transA.setRotation(ornA);
1469 
1470 			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1471 			float4 posB = rigidBodies[bodyIndexB].m_pos;
1472 
1473 			b3Transform transB;
1474 			transB.setIdentity();
1475 			transB.setOrigin(posB);
1476 			transB.setRotation(ornB);
1477 
1478 			for (int p = 0; p < numSubTreesA; p++)
1479 			{
1480 				b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p];
1481 				//bvhInfoCPU[bvhA].m_quantization
1482 				b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1483 				b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1484 
1485 				b3Vector3 aabbAMinOut, aabbAMaxOut;
1486 				float margin = 0.f;
1487 				b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1488 
1489 				for (int q = 0; q < numSubTreesB; q++)
1490 				{
1491 					b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q];
1492 
1493 					b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1494 					b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1495 
1496 					b3Vector3 aabbBMinOut, aabbBMaxOut;
1497 					float margin = 0.f;
1498 					b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1499 
1500 					numAabbChecks = 0;
1501 					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1502 					if (aabbOverlap)
1503 					{
1504 						int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset;
1505 						//				int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1506 
1507 						int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset;
1508 						//				int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1509 
1510 						b3AlignedObjectArray<b3Int2> nodeStack;
1511 						b3Int2 node0;
1512 						node0.x = startNodeIndexA;
1513 						node0.y = startNodeIndexB;
1514 
1515 						int maxStackDepth = 1024;
1516 						nodeStack.resize(maxStackDepth);
1517 						int depth = 0;
1518 						nodeStack[depth++] = node0;
1519 
1520 						do
1521 						{
1522 							if (depth > maxDepth)
1523 							{
1524 								maxDepth = depth;
1525 								printf("maxDepth=%d\n", maxDepth);
1526 							}
1527 							b3Int2 node = nodeStack[--depth];
1528 
1529 							b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1530 							b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin);
1531 
1532 							b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1533 							b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin);
1534 
1535 							float margin = 0.f;
1536 							b3Vector3 aabbAMinOut, aabbAMaxOut;
1537 							b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1538 
1539 							b3Vector3 aabbBMinOut, aabbBMaxOut;
1540 							b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1541 
1542 							numAabbChecks++;
1543 							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1544 							if (nodeOverlap)
1545 							{
1546 								bool isLeafA = treeNodesCPU[node.x].isLeafNode();
1547 								bool isLeafB = treeNodesCPU[node.y].isLeafNode();
1548 								bool isInternalA = !isLeafA;
1549 								bool isInternalB = !isLeafB;
1550 
1551 								//fail, even though it might hit two leaf nodes
1552 								if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB))
1553 								{
1554 									b3Error("Error: traversal exceeded maxStackDepth\n");
1555 									continue;
1556 								}
1557 
1558 								if (isInternalA)
1559 								{
1560 									int nodeAleftChild = node.x + 1;
1561 									bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode();
1562 									int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex();
1563 
1564 									if (isInternalB)
1565 									{
1566 										int nodeBleftChild = node.y + 1;
1567 										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1568 										int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1569 
1570 										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1571 										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1572 										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1573 										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1574 									}
1575 									else
1576 									{
1577 										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y);
1578 										nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y);
1579 									}
1580 								}
1581 								else
1582 								{
1583 									if (isInternalB)
1584 									{
1585 										int nodeBleftChild = node.y + 1;
1586 										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode();
1587 										int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();
1588 										nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild);
1589 										nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild);
1590 									}
1591 									else
1592 									{
1593 										int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1594 										if (compoundPairIdx < maxNumCompoundPairsCapacity)
1595 										{
1596 											int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();
1597 											int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex();
1598 											gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1599 										}
1600 									}
1601 								}
1602 							}
1603 						} while (depth);
1604 						maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks);
1605 					}
1606 				}
1607 			}
1608 
1609 			return;
1610 		}
1611 
1612 		if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1613 		{
1614 			if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1615 			{
1616 				int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1617 				for (int c = 0; c < numChildrenA; c++)
1618 				{
1619 					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c;
1620 					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1621 
1622 					float4 posA = rigidBodies[bodyIndexA].m_pos;
1623 					b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1624 					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1625 					b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1626 					float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1627 					b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1628 
1629 					b3Aabb aabbA = aabbsLocalSpace[childColIndexA];
1630 
1631 					b3Transform transA;
1632 					transA.setIdentity();
1633 					transA.setOrigin(newPosA);
1634 					transA.setRotation(newOrnA);
1635 					b3Scalar margin = 0.0f;
1636 
1637 					b3Vector3 aabbAMinOut, aabbAMaxOut;
1638 
1639 					b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut);
1640 
1641 					if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
1642 					{
1643 						int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1644 						for (int b = 0; b < numChildrenB; b++)
1645 						{
1646 							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1647 							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1648 							b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1649 							float4 posB = rigidBodies[bodyIndexB].m_pos;
1650 							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1651 							b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1652 							float4 newPosB = transform(&childPosB, &posB, &ornB);
1653 							b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1654 
1655 							b3Aabb aabbB = aabbsLocalSpace[childColIndexB];
1656 
1657 							b3Transform transB;
1658 							transB.setIdentity();
1659 							transB.setOrigin(newPosB);
1660 							transB.setRotation(newOrnB);
1661 
1662 							b3Vector3 aabbBMinOut, aabbBMaxOut;
1663 							b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);
1664 
1665 							numAabbChecks++;
1666 							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);
1667 							if (aabbOverlap)
1668 							{
1669 								/*
1670 								int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1671 								float dmin = FLT_MAX;
1672 								float4 posA = newPosA;
1673 								posA.w = 0.f;
1674 								float4 posB = newPosB;
1675 								posB.w = 0.f;
1676 								float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1677 								b3Quat ornA = newOrnA;
1678 								float4 c0 = transform(&c0local, &posA, &ornA);
1679 								float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1680 								b3Quat ornB =newOrnB;
1681 								float4 c1 = transform(&c1local,&posB,&ornB);
1682 								const float4 DeltaC2 = c0 - c1;
1683 								*/
1684 								{  //
1685 									int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1686 									if (compoundPairIdx < maxNumCompoundPairsCapacity)
1687 									{
1688 										gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);
1689 									}
1690 								}  //
1691 							}      //fi(1)
1692 						}          //for (int b=0
1693 					}              //if (collidables[collidableIndexB].
1694 					else           //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1695 					{
1696 						if (1)
1697 						{
1698 							//	int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1699 							//	float dmin = FLT_MAX;
1700 							float4 posA = newPosA;
1701 							posA.w = 0.f;
1702 							float4 posB = rigidBodies[bodyIndexB].m_pos;
1703 							posB.w = 0.f;
1704 							float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1705 							b3Quat ornA = newOrnA;
1706 							float4 c0;
1707 							c0 = transform(&c0local, &posA, &ornA);
1708 							float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1709 							b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1710 							float4 c1;
1711 							c1 = transform(&c1local, &posB, &ornB);
1712 							//	const float4 DeltaC2 = c0 - c1;
1713 
1714 							{
1715 								int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1716 								if (compoundPairIdx < maxNumCompoundPairsCapacity)
1717 								{
1718 									gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1);
1719 								}  //if (compoundPairIdx<maxNumCompoundPairsCapacity)
1720 							}      //
1721 						}          //fi (1)
1722 					}              //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1723 				}                  //for (int b=0;b<numChildrenB;b++)
1724 				return;
1725 			}  //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1726 			if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))
1727 			{
1728 				int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1729 				for (int b = 0; b < numChildrenB; b++)
1730 				{
1731 					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;
1732 					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1733 					b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1734 					float4 posB = rigidBodies[bodyIndexB].m_pos;
1735 					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1736 					b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1737 					float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1738 					b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1739 
1740 					int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1741 
1742 					//////////////////////////////////////
1743 
1744 					if (1)
1745 					{
1746 						//	int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1747 						//	float dmin = FLT_MAX;
1748 						float4 posA = rigidBodies[bodyIndexA].m_pos;
1749 						posA.w = 0.f;
1750 						float4 posB = newPosB;
1751 						posB.w = 0.f;
1752 						float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1753 						b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1754 						float4 c0;
1755 						c0 = transform(&c0local, &posA, &ornA);
1756 						float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1757 						b3Quat ornB = newOrnB;
1758 						float4 c1;
1759 						c1 = transform(&c1local, &posB, &ornB);
1760 						//	const float4 DeltaC2 = c0 - c1;
1761 						{  //
1762 							int compoundPairIdx = b3AtomicInc(numCompoundPairsOut);
1763 							if (compoundPairIdx < maxNumCompoundPairsCapacity)
1764 							{
1765 								gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB);
1766 							}  //fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1767 						}      //
1768 					}          //fi (1)
1769 				}              //for (int b=0;b<numChildrenB;b++)
1770 				return;
1771 			}  //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1772 			return;
1773 		}  //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1774 	}      //i<numPairs
1775 }
1776 
processCompoundPairsKernel(__global const b3Int4 * gpuCompoundPairs,__global const b3RigidBodyData * rigidBodies,__global const b3Collidable * collidables,__global const b3ConvexPolyhedronData * convexShapes,__global const b3AlignedObjectArray<b3Float4> & vertices,__global const b3AlignedObjectArray<b3Float4> & uniqueEdges,__global const b3AlignedObjectArray<b3GpuFace> & faces,__global const b3AlignedObjectArray<int> & indices,__global b3Aabb * aabbs,__global const b3GpuChildShape * gpuChildShapes,__global b3AlignedObjectArray<b3Float4> & gpuCompoundSepNormalsOut,__global b3AlignedObjectArray<int> & gpuHasCompoundSepNormalsOut,int numCompoundPairs,int i)1777 __kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs,
1778 										 __global const b3RigidBodyData* rigidBodies,
1779 										 __global const b3Collidable* collidables,
1780 										 __global const b3ConvexPolyhedronData* convexShapes,
1781 										 __global const b3AlignedObjectArray<b3Float4>& vertices,
1782 										 __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1783 										 __global const b3AlignedObjectArray<b3GpuFace>& faces,
1784 										 __global const b3AlignedObjectArray<int>& indices,
1785 										 __global b3Aabb* aabbs,
1786 										 __global const b3GpuChildShape* gpuChildShapes,
1787 										 __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1788 										 __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1789 										 int numCompoundPairs,
1790 										 int i)
1791 {
1792 	//	int i = get_global_id(0);
1793 	if (i < numCompoundPairs)
1794 	{
1795 		int bodyIndexA = gpuCompoundPairs[i].x;
1796 		int bodyIndexB = gpuCompoundPairs[i].y;
1797 
1798 		int childShapeIndexA = gpuCompoundPairs[i].z;
1799 		int childShapeIndexB = gpuCompoundPairs[i].w;
1800 
1801 		int collidableIndexA = -1;
1802 		int collidableIndexB = -1;
1803 
1804 		b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1805 		float4 posA = rigidBodies[bodyIndexA].m_pos;
1806 
1807 		b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1808 		float4 posB = rigidBodies[bodyIndexB].m_pos;
1809 
1810 		if (childShapeIndexA >= 0)
1811 		{
1812 			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1813 			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1814 			b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1815 			float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1816 			b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1817 			posA = newPosA;
1818 			ornA = newOrnA;
1819 		}
1820 		else
1821 		{
1822 			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1823 		}
1824 
1825 		if (childShapeIndexB >= 0)
1826 		{
1827 			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1828 			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1829 			b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1830 			float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1831 			b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1832 			posB = newPosB;
1833 			ornB = newOrnB;
1834 		}
1835 		else
1836 		{
1837 			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1838 		}
1839 
1840 		gpuHasCompoundSepNormalsOut[i] = 0;
1841 
1842 		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1843 		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1844 
1845 		int shapeTypeA = collidables[collidableIndexA].m_shapeType;
1846 		int shapeTypeB = collidables[collidableIndexB].m_shapeType;
1847 
1848 		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
1849 		{
1850 			return;
1851 		}
1852 
1853 		int hasSeparatingAxis = 5;
1854 
1855 		//	int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1856 		float dmin = FLT_MAX;
1857 		posA.w = 0.f;
1858 		posB.w = 0.f;
1859 		float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1860 		float4 c0 = transform(&c0local, &posA, &ornA);
1861 		float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1862 		float4 c1 = transform(&c1local, &posB, &ornB);
1863 		const float4 DeltaC2 = c0 - c1;
1864 		float4 sepNormal = make_float4(1, 0, 0, 0);
1865 		//		bool sepA = findSeparatingAxis(	convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
1866 		bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal);  //,&dmin);
1867 
1868 		hasSeparatingAxis = 4;
1869 		if (!sepA)
1870 		{
1871 			hasSeparatingAxis = 0;
1872 		}
1873 		else
1874 		{
1875 			bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal);  //,&dmin);
1876 
1877 			if (!sepB)
1878 			{
1879 				hasSeparatingAxis = 0;
1880 			}
1881 			else  //(!sepB)
1882 			{
1883 				bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin);
1884 				if (sepEE)
1885 				{
1886 					gpuCompoundSepNormalsOut[i] = sepNormal;  //fastNormalize4(sepNormal);
1887 					gpuHasCompoundSepNormalsOut[i] = 1;
1888 				}  //sepEE
1889 			}      //(!sepB)
1890 		}          //(!sepA)
1891 	}
1892 }
1893 
clipCompoundsHullHullKernel(__global const b3Int4 * gpuCompoundPairs,__global const b3RigidBodyData * rigidBodies,__global const b3Collidable * collidables,__global const b3ConvexPolyhedronData * convexShapes,__global const b3AlignedObjectArray<b3Float4> & vertices,__global const b3AlignedObjectArray<b3Float4> & uniqueEdges,__global const b3AlignedObjectArray<b3GpuFace> & faces,__global const b3AlignedObjectArray<int> & indices,__global const b3GpuChildShape * gpuChildShapes,__global const b3AlignedObjectArray<b3Float4> & gpuCompoundSepNormalsOut,__global const b3AlignedObjectArray<int> & gpuHasCompoundSepNormalsOut,__global struct b3Contact4Data * globalContactsOut,int * nGlobalContactsOut,int numCompoundPairs,int maxContactCapacity,int i)1894 __kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs,
1895 										  __global const b3RigidBodyData* rigidBodies,
1896 										  __global const b3Collidable* collidables,
1897 										  __global const b3ConvexPolyhedronData* convexShapes,
1898 										  __global const b3AlignedObjectArray<b3Float4>& vertices,
1899 										  __global const b3AlignedObjectArray<b3Float4>& uniqueEdges,
1900 										  __global const b3AlignedObjectArray<b3GpuFace>& faces,
1901 										  __global const b3AlignedObjectArray<int>& indices,
1902 										  __global const b3GpuChildShape* gpuChildShapes,
1903 										  __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut,
1904 										  __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut,
1905 										  __global struct b3Contact4Data* globalContactsOut,
1906 										  int* nGlobalContactsOut,
1907 										  int numCompoundPairs, int maxContactCapacity, int i)
1908 {
1909 	//	int i = get_global_id(0);
1910 	int pairIndex = i;
1911 
1912 	float4 worldVertsB1[64];
1913 	float4 worldVertsB2[64];
1914 	int capacityWorldVerts = 64;
1915 
1916 	float4 localContactsOut[64];
1917 	int localContactCapacity = 64;
1918 
1919 	float minDist = -1e30f;
1920 	float maxDist = 0.0f;
1921 
1922 	if (i < numCompoundPairs)
1923 	{
1924 		if (gpuHasCompoundSepNormalsOut[i])
1925 		{
1926 			int bodyIndexA = gpuCompoundPairs[i].x;
1927 			int bodyIndexB = gpuCompoundPairs[i].y;
1928 
1929 			int childShapeIndexA = gpuCompoundPairs[i].z;
1930 			int childShapeIndexB = gpuCompoundPairs[i].w;
1931 
1932 			int collidableIndexA = -1;
1933 			int collidableIndexB = -1;
1934 
1935 			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1936 			float4 posA = rigidBodies[bodyIndexA].m_pos;
1937 
1938 			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1939 			float4 posB = rigidBodies[bodyIndexB].m_pos;
1940 
1941 			if (childShapeIndexA >= 0)
1942 			{
1943 				collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1944 				float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1945 				b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1946 				float4 newPosA = b3QuatRotate(ornA, childPosA) + posA;
1947 				b3Quat newOrnA = b3QuatMul(ornA, childOrnA);
1948 				posA = newPosA;
1949 				ornA = newOrnA;
1950 			}
1951 			else
1952 			{
1953 				collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1954 			}
1955 
1956 			if (childShapeIndexB >= 0)
1957 			{
1958 				collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1959 				float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1960 				b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1961 				float4 newPosB = b3QuatRotate(ornB, childPosB) + posB;
1962 				b3Quat newOrnB = b3QuatMul(ornB, childOrnB);
1963 				posB = newPosB;
1964 				ornB = newOrnB;
1965 			}
1966 			else
1967 			{
1968 				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1969 			}
1970 
1971 			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1972 			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1973 
1974 			int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],
1975 														  convexShapes[shapeIndexA], convexShapes[shapeIndexB],
1976 														  posA, ornA,
1977 														  posB, ornB,
1978 														  worldVertsB1, worldVertsB2, capacityWorldVerts,
1979 														  minDist, maxDist,
1980 														  vertices, faces, indices,
1981 														  vertices, faces, indices,
1982 														  localContactsOut, localContactCapacity);
1983 
1984 			if (numLocalContactsOut > 0)
1985 			{
1986 				float4 normal = -gpuCompoundSepNormalsOut[i];
1987 				int nPoints = numLocalContactsOut;
1988 				float4* pointsIn = localContactsOut;
1989 				b3Int4 contactIdx;  // = {-1,-1,-1,-1};
1990 
1991 				contactIdx.s[0] = 0;
1992 				contactIdx.s[1] = 1;
1993 				contactIdx.s[2] = 2;
1994 				contactIdx.s[3] = 3;
1995 
1996 				int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);
1997 
1998 				int dstIdx;
1999 				dstIdx = b3AtomicInc(nGlobalContactsOut);
2000 				if ((dstIdx + nReducedContacts) < maxContactCapacity)
2001 				{
2002 					__global struct b3Contact4Data* c = globalContactsOut + dstIdx;
2003 					c->m_worldNormalOnB = -normal;
2004 					c->m_restituitionCoeffCmp = (0.f * 0xffff);
2005 					c->m_frictionCoeffCmp = (0.7f * 0xffff);
2006 					c->m_batchIdx = pairIndex;
2007 					int bodyA = gpuCompoundPairs[pairIndex].x;
2008 					int bodyB = gpuCompoundPairs[pairIndex].y;
2009 					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA;
2010 					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB;
2011 					c->m_childIndexA = childShapeIndexA;
2012 					c->m_childIndexB = childShapeIndexB;
2013 					for (int i = 0; i < nReducedContacts; i++)
2014 					{
2015 						c->m_worldPosB[i] = pointsIn[contactIdx.s[i]];
2016 					}
2017 					b3Contact4Data_setNumPoints(c, nReducedContacts);
2018 				}
2019 
2020 			}  //		if (numContactsOut>0)
2021 		}      //		if (gpuHasCompoundSepNormalsOut[i])
2022 	}          //	if (i<numCompoundPairs)
2023 }
2024 
computeContactCompoundCompound(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3GpuChildShape * cpuChildShapes,const b3AlignedObjectArray<b3Aabb> & hostAabbsWorldSpace,const b3AlignedObjectArray<b3Aabb> & hostAabbsLocalSpace,const b3AlignedObjectArray<b3Vector3> & convexVertices,const b3AlignedObjectArray<b3Vector3> & hostUniqueEdges,const b3AlignedObjectArray<int> & convexIndices,const b3AlignedObjectArray<b3GpuFace> & faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity,b3AlignedObjectArray<b3QuantizedBvhNode> & treeNodesCPU,b3AlignedObjectArray<b3BvhSubtreeInfo> & subTreesCPU,b3AlignedObjectArray<b3BvhInfo> & bvhInfoCPU)2025 void computeContactCompoundCompound(int pairIndex,
2026 									int bodyIndexA, int bodyIndexB,
2027 									int collidableIndexA, int collidableIndexB,
2028 									const b3RigidBodyData* rigidBodies,
2029 									const b3Collidable* collidables,
2030 									const b3ConvexPolyhedronData* convexShapes,
2031 									const b3GpuChildShape* cpuChildShapes,
2032 									const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace,
2033 									const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace,
2034 
2035 									const b3AlignedObjectArray<b3Vector3>& convexVertices,
2036 									const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges,
2037 									const b3AlignedObjectArray<int>& convexIndices,
2038 									const b3AlignedObjectArray<b3GpuFace>& faces,
2039 
2040 									b3Contact4* globalContactsOut,
2041 									int& nGlobalContactsOut,
2042 									int maxContactCapacity,
2043 									b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU,
2044 									b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU,
2045 									b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)
2046 {
2047 	int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2048 	b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2049 
2050 	b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
2051 	int numCompoundPairsOut = 0;
2052 	int maxNumCompoundPairsCapacity = 8192;  //1024;
2053 	cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);
2054 
2055 	// work-in-progress
2056 	findCompoundPairsKernel(
2057 		pairIndex,
2058 		bodyIndexA, bodyIndexB,
2059 		collidableIndexA, collidableIndexB,
2060 		rigidBodies,
2061 		collidables,
2062 		convexShapes,
2063 		convexVertices,
2064 		hostAabbsWorldSpace,
2065 		hostAabbsLocalSpace,
2066 		cpuChildShapes,
2067 		&cpuCompoundPairsOut[0],
2068 		&numCompoundPairsOut,
2069 		maxNumCompoundPairsCapacity,
2070 		treeNodesCPU,
2071 		subTreesCPU,
2072 		bvhInfoCPU);
2073 
2074 	printf("maxNumAabbChecks=%d\n", maxNumAabbChecks);
2075 	if (numCompoundPairsOut > maxNumCompoundPairsCapacity)
2076 	{
2077 		b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity);
2078 		numCompoundPairsOut = maxNumCompoundPairsCapacity;
2079 	}
2080 	b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;
2081 	b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;
2082 	cpuCompoundSepNormalsOut.resize(numCompoundPairsOut);
2083 	cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut);
2084 
2085 	for (int i = 0; i < numCompoundPairsOut; i++)
2086 	{
2087 		processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes,
2088 								   cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i);
2089 	}
2090 
2091 	for (int i = 0; i < numCompoundPairsOut; i++)
2092 	{
2093 		clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes,
2094 									cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i);
2095 	}
2096 	/*
2097 		int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
2098 
2099 					float4 posA = rigidBodies[bodyIndexA].m_pos;
2100 					b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
2101 					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
2102 					b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
2103 					float4 newPosA = b3QuatRotate(ornA,childPosA)+posA;
2104 					b3Quat newOrnA = b3QuatMul(ornA,childOrnA);
2105 
2106 					int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
2107 
2108 
2109 			bool foundSepAxis = findSeparatingAxis(hullA,hullB,
2110 							posA,
2111 							ornA,
2112 							posB,
2113 							ornB,
2114 
2115 							convexVertices,uniqueEdges,faces,convexIndices,
2116 							convexVertices,uniqueEdges,faces,convexIndices,
2117 
2118 							sepNormalWorldSpace
2119 							);
2120 							*/
2121 
2122 	/*
2123 	if (foundSepAxis)
2124 	{
2125 
2126 
2127 		contactIndex = clipHullHullSingle(
2128 			bodyIndexA, bodyIndexB,
2129 						   posA,ornA,
2130 						   posB,ornB,
2131 			collidableIndexA, collidableIndexB,
2132 			&rigidBodies,
2133 			&globalContactsOut,
2134 			nGlobalContactsOut,
2135 
2136 			convexShapes,
2137 			convexShapes,
2138 
2139 			convexVertices,
2140 			uniqueEdges,
2141 			faces,
2142 			convexIndices,
2143 
2144 			convexVertices,
2145 			uniqueEdges,
2146 			faces,
2147 			convexIndices,
2148 
2149 			collidables,
2150 			collidables,
2151 			sepNormalWorldSpace,
2152 			maxContactCapacity);
2153 
2154 	}
2155 	*/
2156 
2157 	//	return contactIndex;
2158 
2159 	/*
2160 
2161 	int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2162 	for (int c=0;c<numChildrenB;c++)
2163 	{
2164 		int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c;
2165 		int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2166 
2167 		float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2168 		b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2169 		b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2170 		b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2171 		float4  posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB;
2172 		b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB);
2173 
2174 		int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2175 
2176 		const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2177 
2178 	}
2179 	*/
2180 }
2181 
computeContactPlaneCompound(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3GpuChildShape * cpuChildShapes,const b3Vector3 * convexVertices,const int * convexIndices,const b3GpuFace * faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity)2182 void computeContactPlaneCompound(int pairIndex,
2183 								 int bodyIndexA, int bodyIndexB,
2184 								 int collidableIndexA, int collidableIndexB,
2185 								 const b3RigidBodyData* rigidBodies,
2186 								 const b3Collidable* collidables,
2187 								 const b3ConvexPolyhedronData* convexShapes,
2188 								 const b3GpuChildShape* cpuChildShapes,
2189 								 const b3Vector3* convexVertices,
2190 								 const int* convexIndices,
2191 								 const b3GpuFace* faces,
2192 
2193 								 b3Contact4* globalContactsOut,
2194 								 int& nGlobalContactsOut,
2195 								 int maxContactCapacity)
2196 {
2197 	int shapeTypeB = collidables[collidableIndexB].m_shapeType;
2198 	b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);
2199 
2200 	int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
2201 	for (int c = 0; c < numChildrenB; c++)
2202 	{
2203 		int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c;
2204 		int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;
2205 
2206 		float4 rootPosB = rigidBodies[bodyIndexB].m_pos;
2207 		b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;
2208 		b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;
2209 		b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation;
2210 		float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB;
2211 		b3Quaternion ornB = rootOrnB * childOrnB;  //b3QuatMul(ornB,childOrnB);
2212 
2213 		int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
2214 
2215 		const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB];
2216 
2217 		b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2218 		b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2219 
2220 		//	int numContactsOut = 0;
2221 		//	int numWorldVertsB1= 0;
2222 
2223 		b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;
2224 		b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z);
2225 		b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);
2226 		float planeConstant = planeEq.w;
2227 		b3Transform convexWorldTransform;
2228 		convexWorldTransform.setIdentity();
2229 		convexWorldTransform.setOrigin(posB);
2230 		convexWorldTransform.setRotation(ornB);
2231 		b3Transform planeTransform;
2232 		planeTransform.setIdentity();
2233 		planeTransform.setOrigin(posA);
2234 		planeTransform.setRotation(ornA);
2235 
2236 		b3Transform planeInConvex;
2237 		planeInConvex = convexWorldTransform.inverse() * planeTransform;
2238 		b3Transform convexInPlane;
2239 		convexInPlane = planeTransform.inverse() * convexWorldTransform;
2240 
2241 		b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;
2242 		float maxDot = -1e30;
2243 		int hitVertex = -1;
2244 		b3Vector3 hitVtx;
2245 
2246 #define MAX_PLANE_CONVEX_POINTS 64
2247 
2248 		b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];
2249 		int numPoints = 0;
2250 
2251 		b3Int4 contactIdx;
2252 		contactIdx.s[0] = 0;
2253 		contactIdx.s[1] = 1;
2254 		contactIdx.s[2] = 2;
2255 		contactIdx.s[3] = 3;
2256 
2257 		for (int i = 0; i < hullB->m_numVertices; i++)
2258 		{
2259 			b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];
2260 			float curDot = vtx.dot(planeNormalInConvex);
2261 
2262 			if (curDot > maxDot)
2263 			{
2264 				hitVertex = i;
2265 				maxDot = curDot;
2266 				hitVtx = vtx;
2267 				//make sure the deepest points is always included
2268 				if (numPoints == MAX_PLANE_CONVEX_POINTS)
2269 					numPoints--;
2270 			}
2271 
2272 			if (numPoints < MAX_PLANE_CONVEX_POINTS)
2273 			{
2274 				b3Vector3 vtxWorld = convexWorldTransform * vtx;
2275 				b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld;
2276 				float dist = planeNormal.dot(vtxInPlane) - planeConstant;
2277 				if (dist < 0.f)
2278 				{
2279 					vtxWorld.w = dist;
2280 					contactPoints[numPoints] = vtxWorld;
2281 					numPoints++;
2282 				}
2283 			}
2284 		}
2285 
2286 		int numReducedPoints = 0;
2287 
2288 		numReducedPoints = numPoints;
2289 
2290 		if (numPoints > 4)
2291 		{
2292 			numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);
2293 		}
2294 		int dstIdx;
2295 		//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2296 
2297 		if (numReducedPoints > 0)
2298 		{
2299 			if (nGlobalContactsOut < maxContactCapacity)
2300 			{
2301 				dstIdx = nGlobalContactsOut;
2302 				nGlobalContactsOut++;
2303 
2304 				b3Contact4* c = &globalContactsOut[dstIdx];
2305 				c->m_worldNormalOnB = -planeNormalWorld;
2306 				c->setFrictionCoeff(0.7);
2307 				c->setRestituitionCoeff(0.f);
2308 
2309 				c->m_batchIdx = pairIndex;
2310 				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2311 				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2312 				for (int i = 0; i < numReducedPoints; i++)
2313 				{
2314 					b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];
2315 					c->m_worldPosB[i] = pOnB1;
2316 				}
2317 				c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints;
2318 			}  //if (dstIdx < numPairs)
2319 		}
2320 	}
2321 }
2322 
computeContactSphereConvex(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3RigidBodyData * rigidBodies,const b3Collidable * collidables,const b3ConvexPolyhedronData * convexShapes,const b3Vector3 * convexVertices,const int * convexIndices,const b3GpuFace * faces,b3Contact4 * globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity)2323 void computeContactSphereConvex(int pairIndex,
2324 								int bodyIndexA, int bodyIndexB,
2325 								int collidableIndexA, int collidableIndexB,
2326 								const b3RigidBodyData* rigidBodies,
2327 								const b3Collidable* collidables,
2328 								const b3ConvexPolyhedronData* convexShapes,
2329 								const b3Vector3* convexVertices,
2330 								const int* convexIndices,
2331 								const b3GpuFace* faces,
2332 								b3Contact4* globalContactsOut,
2333 								int& nGlobalContactsOut,
2334 								int maxContactCapacity)
2335 {
2336 	float radius = collidables[collidableIndexA].m_radius;
2337 	float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;
2338 	b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat;
2339 
2340 	float4 pos = rigidBodies[bodyIndexB].m_pos;
2341 
2342 	b3Quaternion quat = rigidBodies[bodyIndexB].m_quat;
2343 
2344 	b3Transform tr;
2345 	tr.setIdentity();
2346 	tr.setOrigin(pos);
2347 	tr.setRotation(quat);
2348 	b3Transform trInv = tr.inverse();
2349 
2350 	float4 spherePos = trInv(spherePos1);
2351 
2352 	int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx;
2353 	int shapeIndex = collidables[collidableIndex].m_shapeIndex;
2354 	int numFaces = convexShapes[shapeIndex].m_numFaces;
2355 	float4 closestPnt = b3MakeVector3(0, 0, 0, 0);
2356 	//	float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0);
2357 	float minDist = -1000000.f;  // TODO: What is the largest/smallest float?
2358 	bool bCollide = true;
2359 	int region = -1;
2360 	float4 localHitNormal;
2361 	for (int f = 0; f < numFaces; f++)
2362 	{
2363 		b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f];
2364 		float4 planeEqn;
2365 		float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f);
2366 		float4 n1 = localPlaneNormal;  //quatRotate(quat,localPlaneNormal);
2367 		planeEqn = n1;
2368 		planeEqn[3] = face.m_plane.w;
2369 
2370 		float4 pntReturn;
2371 		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);
2372 
2373 		if (dist > radius)
2374 		{
2375 			bCollide = false;
2376 			break;
2377 		}
2378 
2379 		if (dist > 0)
2380 		{
2381 			//might hit an edge or vertex
2382 			b3Vector3 out;
2383 
2384 			bool isInPoly = IsPointInPolygon(spherePos,
2385 											 &face,
2386 											 &convexVertices[convexShapes[shapeIndex].m_vertexOffset],
2387 											 convexIndices,
2388 											 &out);
2389 			if (isInPoly)
2390 			{
2391 				if (dist > minDist)
2392 				{
2393 					minDist = dist;
2394 					closestPnt = pntReturn;
2395 					localHitNormal = planeEqn;
2396 					region = 1;
2397 				}
2398 			}
2399 			else
2400 			{
2401 				b3Vector3 tmp = spherePos - out;
2402 				b3Scalar l2 = tmp.length2();
2403 				if (l2 < radius * radius)
2404 				{
2405 					dist = b3Sqrt(l2);
2406 					if (dist > minDist)
2407 					{
2408 						minDist = dist;
2409 						closestPnt = out;
2410 						localHitNormal = tmp / dist;
2411 						region = 2;
2412 					}
2413 				}
2414 				else
2415 				{
2416 					bCollide = false;
2417 					break;
2418 				}
2419 			}
2420 		}
2421 		else
2422 		{
2423 			if (dist > minDist)
2424 			{
2425 				minDist = dist;
2426 				closestPnt = pntReturn;
2427 				localHitNormal = planeEqn;
2428 				region = 3;
2429 			}
2430 		}
2431 	}
2432 	static int numChecks = 0;
2433 	numChecks++;
2434 
2435 	if (bCollide && minDist > -10000)
2436 	{
2437 		float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal;  //-hitNormalWorld;
2438 		float4 pOnB1 = tr(closestPnt);
2439 		//printf("dist ,%f,",minDist);
2440 		float actualDepth = minDist - radius;
2441 		if (actualDepth < 0)
2442 		{
2443 			//printf("actualDepth = ,%f,", actualDepth);
2444 			//printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z);
2445 			//printf("region=,%d,\n", region);
2446 			pOnB1[3] = actualDepth;
2447 
2448 			int dstIdx;
2449 			//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx );
2450 
2451 			if (nGlobalContactsOut < maxContactCapacity)
2452 			{
2453 				dstIdx = nGlobalContactsOut;
2454 				nGlobalContactsOut++;
2455 
2456 				b3Contact4* c = &globalContactsOut[dstIdx];
2457 				c->m_worldNormalOnB = normalOnSurfaceB1;
2458 				c->setFrictionCoeff(0.7);
2459 				c->setRestituitionCoeff(0.f);
2460 
2461 				c->m_batchIdx = pairIndex;
2462 				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA;
2463 				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB;
2464 				c->m_worldPosB[0] = pOnB1;
2465 				int numPoints = 1;
2466 				c->m_worldNormalOnB.w = (b3Scalar)numPoints;
2467 			}  //if (dstIdx < numPairs)
2468 		}
2469 	}  //if (hasCollision)
2470 }
2471 
computeContactConvexConvex2(int pairIndex,int bodyIndexA,int bodyIndexB,int collidableIndexA,int collidableIndexB,const b3AlignedObjectArray<b3RigidBodyData> & rigidBodies,const b3AlignedObjectArray<b3Collidable> & collidables,const b3AlignedObjectArray<b3ConvexPolyhedronData> & convexShapes,const b3AlignedObjectArray<b3Vector3> & convexVertices,const b3AlignedObjectArray<b3Vector3> & uniqueEdges,const b3AlignedObjectArray<int> & convexIndices,const b3AlignedObjectArray<b3GpuFace> & faces,b3AlignedObjectArray<b3Contact4> & globalContactsOut,int & nGlobalContactsOut,int maxContactCapacity,const b3AlignedObjectArray<b3Contact4> & oldContacts)2472 int computeContactConvexConvex2(
2473 	int pairIndex,
2474 	int bodyIndexA, int bodyIndexB,
2475 	int collidableIndexA, int collidableIndexB,
2476 	const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,
2477 	const b3AlignedObjectArray<b3Collidable>& collidables,
2478 	const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
2479 	const b3AlignedObjectArray<b3Vector3>& convexVertices,
2480 	const b3AlignedObjectArray<b3Vector3>& uniqueEdges,
2481 	const b3AlignedObjectArray<int>& convexIndices,
2482 	const b3AlignedObjectArray<b3GpuFace>& faces,
2483 	b3AlignedObjectArray<b3Contact4>& globalContactsOut,
2484 	int& nGlobalContactsOut,
2485 	int maxContactCapacity,
2486 	const b3AlignedObjectArray<b3Contact4>& oldContacts)
2487 {
2488 	int contactIndex = -1;
2489 	b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;
2490 	b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;
2491 	b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;
2492 	b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;
2493 
2494 	b3ConvexPolyhedronData hullA, hullB;
2495 
2496 	b3Vector3 sepNormalWorldSpace;
2497 
2498 	b3Collidable colA = collidables[collidableIndexA];
2499 	hullA = convexShapes[colA.m_shapeIndex];
2500 	//printf("numvertsA = %d\n",hullA.m_numVertices);
2501 
2502 	b3Collidable colB = collidables[collidableIndexB];
2503 	hullB = convexShapes[colB.m_shapeIndex];
2504 	//printf("numvertsB = %d\n",hullB.m_numVertices);
2505 
2506 	//	int contactCapacity = MAX_VERTS;
2507 	//int numContactsOut=0;
2508 
2509 #ifdef _WIN32
2510 	b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));
2511 	b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));
2512 #endif
2513 
2514 	bool foundSepAxis = findSeparatingAxis(hullA, hullB,
2515 										   posA,
2516 										   ornA,
2517 										   posB,
2518 										   ornB,
2519 
2520 										   convexVertices, uniqueEdges, faces, convexIndices,
2521 										   convexVertices, uniqueEdges, faces, convexIndices,
2522 
2523 										   sepNormalWorldSpace);
2524 
2525 	if (foundSepAxis)
2526 	{
2527 		contactIndex = clipHullHullSingle(
2528 			bodyIndexA, bodyIndexB,
2529 			posA, ornA,
2530 			posB, ornB,
2531 			collidableIndexA, collidableIndexB,
2532 			&rigidBodies,
2533 			&globalContactsOut,
2534 			nGlobalContactsOut,
2535 
2536 			convexShapes,
2537 			convexShapes,
2538 
2539 			convexVertices,
2540 			uniqueEdges,
2541 			faces,
2542 			convexIndices,
2543 
2544 			convexVertices,
2545 			uniqueEdges,
2546 			faces,
2547 			convexIndices,
2548 
2549 			collidables,
2550 			collidables,
2551 			sepNormalWorldSpace,
2552 			maxContactCapacity);
2553 	}
2554 
2555 	return contactIndex;
2556 }
2557 
computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4> * pairs,int nPairs,const b3OpenCLArray<b3RigidBodyData> * bodyBuf,b3OpenCLArray<b3Contact4> * contactOut,int & nContacts,const b3OpenCLArray<b3Contact4> * oldContacts,int maxContactCapacity,int compoundPairCapacity,const b3OpenCLArray<b3ConvexPolyhedronData> & convexData,const b3OpenCLArray<b3Vector3> & gpuVertices,const b3OpenCLArray<b3Vector3> & gpuUniqueEdges,const b3OpenCLArray<b3GpuFace> & gpuFaces,const b3OpenCLArray<int> & gpuIndices,const b3OpenCLArray<b3Collidable> & gpuCollidables,const b3OpenCLArray<b3GpuChildShape> & gpuChildShapes,const b3OpenCLArray<b3Aabb> & clAabbsWorldSpace,const b3OpenCLArray<b3Aabb> & clAabbsLocalSpace,b3OpenCLArray<b3Vector3> & worldVertsB1GPU,b3OpenCLArray<b3Int4> & clippingFacesOutGPU,b3OpenCLArray<b3Vector3> & worldNormalsAGPU,b3OpenCLArray<b3Vector3> & worldVertsA1GPU,b3OpenCLArray<b3Vector3> & worldVertsB2GPU,b3AlignedObjectArray<class b3OptimizedBvh * > & bvhDataUnused,b3OpenCLArray<b3QuantizedBvhNode> * treeNodesGPU,b3OpenCLArray<b3BvhSubtreeInfo> * subTreesGPU,b3OpenCLArray<b3BvhInfo> * bvhInfo,int numObjects,int maxTriConvexPairCapacity,b3OpenCLArray<b3Int4> & triangleConvexPairsOut,int & numTriConvexPairsOut)2558 void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs,
2559 														const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
2560 														b3OpenCLArray<b3Contact4>* contactOut, int& nContacts,
2561 														const b3OpenCLArray<b3Contact4>* oldContacts,
2562 														int maxContactCapacity,
2563 														int compoundPairCapacity,
2564 														const b3OpenCLArray<b3ConvexPolyhedronData>& convexData,
2565 														const b3OpenCLArray<b3Vector3>& gpuVertices,
2566 														const b3OpenCLArray<b3Vector3>& gpuUniqueEdges,
2567 														const b3OpenCLArray<b3GpuFace>& gpuFaces,
2568 														const b3OpenCLArray<int>& gpuIndices,
2569 														const b3OpenCLArray<b3Collidable>& gpuCollidables,
2570 														const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes,
2571 
2572 														const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace,
2573 														const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace,
2574 
2575 														b3OpenCLArray<b3Vector3>& worldVertsB1GPU,
2576 														b3OpenCLArray<b3Int4>& clippingFacesOutGPU,
2577 														b3OpenCLArray<b3Vector3>& worldNormalsAGPU,
2578 														b3OpenCLArray<b3Vector3>& worldVertsA1GPU,
2579 														b3OpenCLArray<b3Vector3>& worldVertsB2GPU,
2580 														b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused,
2581 														b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU,
2582 														b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU,
2583 														b3OpenCLArray<b3BvhInfo>* bvhInfo,
2584 
2585 														int numObjects,
2586 														int maxTriConvexPairCapacity,
2587 														b3OpenCLArray<b3Int4>& triangleConvexPairsOut,
2588 														int& numTriConvexPairsOut)
2589 {
2590 	myframecount++;
2591 
2592 	if (!nPairs)
2593 		return;
2594 
2595 #ifdef CHECK_ON_HOST
2596 
2597 	b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
2598 	treeNodesGPU->copyToHost(treeNodesCPU);
2599 
2600 	b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
2601 	subTreesGPU->copyToHost(subTreesCPU);
2602 
2603 	b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
2604 	bvhInfo->copyToHost(bvhInfoCPU);
2605 
2606 	b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
2607 	clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
2608 
2609 	b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
2610 	clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
2611 
2612 	b3AlignedObjectArray<b3Int4> hostPairs;
2613 	pairs->copyToHost(hostPairs);
2614 
2615 	b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2616 	bodyBuf->copyToHost(hostBodyBuf);
2617 
2618 	b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
2619 	convexData.copyToHost(hostConvexData);
2620 
2621 	b3AlignedObjectArray<b3Vector3> hostVertices;
2622 	gpuVertices.copyToHost(hostVertices);
2623 
2624 	b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2625 	gpuUniqueEdges.copyToHost(hostUniqueEdges);
2626 	b3AlignedObjectArray<b3GpuFace> hostFaces;
2627 	gpuFaces.copyToHost(hostFaces);
2628 	b3AlignedObjectArray<int> hostIndices;
2629 	gpuIndices.copyToHost(hostIndices);
2630 	b3AlignedObjectArray<b3Collidable> hostCollidables;
2631 	gpuCollidables.copyToHost(hostCollidables);
2632 
2633 	b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2634 	gpuChildShapes.copyToHost(cpuChildShapes);
2635 
2636 	b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs;
2637 
2638 	b3AlignedObjectArray<b3Contact4> hostContacts;
2639 	if (nContacts)
2640 	{
2641 		contactOut->copyToHost(hostContacts);
2642 	}
2643 
2644 	b3AlignedObjectArray<b3Contact4> oldHostContacts;
2645 
2646 	if (oldContacts->size())
2647 	{
2648 		oldContacts->copyToHost(oldHostContacts);
2649 	}
2650 
2651 	hostContacts.resize(maxContactCapacity);
2652 
2653 	for (int i = 0; i < nPairs; i++)
2654 	{
2655 		int bodyIndexA = hostPairs[i].x;
2656 		int bodyIndexB = hostPairs[i].y;
2657 		int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
2658 		int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
2659 
2660 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
2661 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2662 		{
2663 			computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2664 									   &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2665 		}
2666 
2667 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2668 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)
2669 		{
2670 			computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2671 									   &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2672 			//printf("convex-sphere\n");
2673 		}
2674 
2675 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2676 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2677 		{
2678 			computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2679 									  &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2680 			//			printf("convex-plane\n");
2681 		}
2682 
2683 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2684 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2685 		{
2686 			computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2687 									  &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2688 			//			printf("plane-convex\n");
2689 		}
2690 
2691 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2692 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2693 		{
2694 			computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2695 										   &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0],
2696 										   nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU);
2697 			//			printf("convex-plane\n");
2698 		}
2699 
2700 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
2701 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)
2702 		{
2703 			computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0],
2704 										&hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2705 			//			printf("convex-plane\n");
2706 		}
2707 
2708 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&
2709 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
2710 		{
2711 			computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0],
2712 										&hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);
2713 			//			printf("plane-convex\n");
2714 		}
2715 
2716 		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
2717 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
2718 		{
2719 			//printf("hostPairs[i].z=%d\n",hostPairs[i].z);
2720 			int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts);
2721 			//int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
2722 
2723 			if (contactIndex >= 0)
2724 			{
2725 				//				printf("convex convex contactIndex = %d\n",contactIndex);
2726 				hostPairs[i].z = contactIndex;
2727 			}
2728 			//			printf("plane-convex\n");
2729 		}
2730 	}
2731 
2732 	if (hostPairs.size())
2733 	{
2734 		pairs->copyFromHost(hostPairs);
2735 	}
2736 
2737 	hostContacts.resize(nContacts);
2738 	if (nContacts)
2739 	{
2740 		contactOut->copyFromHost(hostContacts);
2741 	}
2742 	else
2743 	{
2744 		contactOut->resize(0);
2745 	}
2746 
2747 	m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2748 	//printf("(HOST) nContacts = %d\n",nContacts);
2749 
2750 #else
2751 
2752 	{
2753 		if (nPairs)
2754 		{
2755 			m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
2756 
2757 			B3_PROFILE("primitiveContactsKernel");
2758 			b3BufferInfoCL bInfo[] = {
2759 				b3BufferInfoCL(pairs->getBufferCL(), true),
2760 				b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2761 				b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2762 				b3BufferInfoCL(convexData.getBufferCL(), true),
2763 				b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2764 				b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2765 				b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2766 				b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2767 				b3BufferInfoCL(contactOut->getBufferCL()),
2768 				b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2769 
2770 			b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel");
2771 			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2772 			launcher.setConst(nPairs);
2773 			launcher.setConst(maxContactCapacity);
2774 			int num = nPairs;
2775 			launcher.launch1D(num);
2776 			clFinish(m_queue);
2777 
2778 			nContacts = m_totalContactsOut.at(0);
2779 			contactOut->resize(nContacts);
2780 		}
2781 	}
2782 
2783 #endif  //CHECK_ON_HOST
2784 
2785 	B3_PROFILE("computeConvexConvexContactsGPUSAT");
2786 	// printf("nContacts = %d\n",nContacts);
2787 
2788 	m_sepNormals.resize(nPairs);
2789 	m_hasSeparatingNormals.resize(nPairs);
2790 
2791 	int concaveCapacity = maxTriConvexPairCapacity;
2792 	m_concaveSepNormals.resize(concaveCapacity);
2793 	m_concaveHasSeparatingNormals.resize(concaveCapacity);
2794 	m_numConcavePairsOut.resize(0);
2795 	m_numConcavePairsOut.push_back(0);
2796 
2797 	m_gpuCompoundPairs.resize(compoundPairCapacity);
2798 
2799 	m_gpuCompoundSepNormals.resize(compoundPairCapacity);
2800 
2801 	m_gpuHasCompoundSepNormals.resize(compoundPairCapacity);
2802 
2803 	m_numCompoundPairsOut.resize(0);
2804 	m_numCompoundPairsOut.push_back(0);
2805 
2806 	int numCompoundPairs = 0;
2807 
2808 	int numConcavePairs = 0;
2809 
2810 	{
2811 		clFinish(m_queue);
2812 		if (findSeparatingAxisOnGpu)
2813 		{
2814 			m_dmins.resize(nPairs);
2815 			if (splitSearchSepAxisConvex)
2816 			{
2817 				if (useMprGpu)
2818 				{
2819 					nContacts = m_totalContactsOut.at(0);
2820 					{
2821 						B3_PROFILE("mprPenetrationKernel");
2822 						b3BufferInfoCL bInfo[] = {
2823 							b3BufferInfoCL(pairs->getBufferCL(), true),
2824 							b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2825 							b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2826 							b3BufferInfoCL(convexData.getBufferCL(), true),
2827 							b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2828 							b3BufferInfoCL(m_sepNormals.getBufferCL()),
2829 							b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2830 							b3BufferInfoCL(contactOut->getBufferCL()),
2831 							b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
2832 
2833 						b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel");
2834 						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2835 
2836 						launcher.setConst(maxContactCapacity);
2837 						launcher.setConst(nPairs);
2838 
2839 						int num = nPairs;
2840 						launcher.launch1D(num);
2841 						clFinish(m_queue);
2842 						/*
2843 						b3AlignedObjectArray<int>hostHasSepAxis;
2844 						m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
2845 						b3AlignedObjectArray<b3Vector3>hostSepAxis;
2846 						m_sepNormals.copyToHost(hostSepAxis);
2847 						*/
2848 						nContacts = m_totalContactsOut.at(0);
2849 						contactOut->resize(nContacts);
2850 						//	printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts);
2851 						if (nContacts > maxContactCapacity)
2852 						{
2853 							b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
2854 							nContacts = maxContactCapacity;
2855 						}
2856 					}
2857 				}
2858 
2859 				if (1)
2860 				{
2861 					if (1)
2862 					{
2863 						{
2864 							B3_PROFILE("findSeparatingAxisVertexFaceKernel");
2865 							b3BufferInfoCL bInfo[] = {
2866 								b3BufferInfoCL(pairs->getBufferCL(), true),
2867 								b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2868 								b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2869 								b3BufferInfoCL(convexData.getBufferCL(), true),
2870 								b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2871 								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2872 								b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2873 								b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2874 								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2875 								b3BufferInfoCL(m_sepNormals.getBufferCL()),
2876 								b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2877 								b3BufferInfoCL(m_dmins.getBufferCL())};
2878 
2879 							b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel");
2880 							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2881 							launcher.setConst(nPairs);
2882 
2883 							int num = nPairs;
2884 							launcher.launch1D(num);
2885 							clFinish(m_queue);
2886 						}
2887 
2888 						int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2889 
2890 						{
2891 							B3_PROFILE("findSeparatingAxisEdgeEdgeKernel");
2892 							b3BufferInfoCL bInfo[] = {
2893 								b3BufferInfoCL(pairs->getBufferCL(), true),
2894 								b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2895 								b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2896 								b3BufferInfoCL(convexData.getBufferCL(), true),
2897 								b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2898 								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2899 								b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2900 								b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2901 								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2902 								b3BufferInfoCL(m_sepNormals.getBufferCL()),
2903 								b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2904 								b3BufferInfoCL(m_dmins.getBufferCL()),
2905 								b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true)
2906 
2907 							};
2908 
2909 							b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel");
2910 							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2911 							launcher.setConst(numDirections);
2912 							launcher.setConst(nPairs);
2913 							int num = nPairs;
2914 							launcher.launch1D(num);
2915 							clFinish(m_queue);
2916 						}
2917 					}
2918 					if (useMprGpu)
2919 					{
2920 						B3_PROFILE("findSeparatingAxisUnitSphereKernel");
2921 						b3BufferInfoCL bInfo[] = {
2922 							b3BufferInfoCL(pairs->getBufferCL(), true),
2923 							b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2924 							b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2925 							b3BufferInfoCL(convexData.getBufferCL(), true),
2926 							b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2927 							b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true),
2928 							b3BufferInfoCL(m_sepNormals.getBufferCL()),
2929 							b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
2930 							b3BufferInfoCL(m_dmins.getBufferCL())};
2931 
2932 						b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel");
2933 						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2934 						int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);
2935 						launcher.setConst(numDirections);
2936 
2937 						launcher.setConst(nPairs);
2938 
2939 						int num = nPairs;
2940 						launcher.launch1D(num);
2941 						clFinish(m_queue);
2942 					}
2943 				}
2944 			}
2945 			else
2946 			{
2947 				B3_PROFILE("findSeparatingAxisKernel");
2948 				b3BufferInfoCL bInfo[] = {
2949 					b3BufferInfoCL(pairs->getBufferCL(), true),
2950 					b3BufferInfoCL(bodyBuf->getBufferCL(), true),
2951 					b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
2952 					b3BufferInfoCL(convexData.getBufferCL(), true),
2953 					b3BufferInfoCL(gpuVertices.getBufferCL(), true),
2954 					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
2955 					b3BufferInfoCL(gpuFaces.getBufferCL(), true),
2956 					b3BufferInfoCL(gpuIndices.getBufferCL(), true),
2957 					b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
2958 					b3BufferInfoCL(m_sepNormals.getBufferCL()),
2959 					b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())};
2960 
2961 				b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel");
2962 				launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
2963 				launcher.setConst(nPairs);
2964 
2965 				int num = nPairs;
2966 				launcher.launch1D(num);
2967 				clFinish(m_queue);
2968 			}
2969 		}
2970 		else
2971 		{
2972 			B3_PROFILE("findSeparatingAxisKernel CPU");
2973 
2974 			b3AlignedObjectArray<b3Int4> hostPairs;
2975 			pairs->copyToHost(hostPairs);
2976 			b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
2977 			bodyBuf->copyToHost(hostBodyBuf);
2978 
2979 			b3AlignedObjectArray<b3Collidable> hostCollidables;
2980 			gpuCollidables.copyToHost(hostCollidables);
2981 
2982 			b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
2983 			gpuChildShapes.copyToHost(cpuChildShapes);
2984 
2985 			b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData;
2986 			convexData.copyToHost(hostConvexShapeData);
2987 
2988 			b3AlignedObjectArray<b3Vector3> hostVertices;
2989 			gpuVertices.copyToHost(hostVertices);
2990 
2991 			b3AlignedObjectArray<int> hostHasSepAxis;
2992 			hostHasSepAxis.resize(nPairs);
2993 			b3AlignedObjectArray<b3Vector3> hostSepAxis;
2994 			hostSepAxis.resize(nPairs);
2995 
2996 			b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
2997 			gpuUniqueEdges.copyToHost(hostUniqueEdges);
2998 			b3AlignedObjectArray<b3GpuFace> hostFaces;
2999 			gpuFaces.copyToHost(hostFaces);
3000 
3001 			b3AlignedObjectArray<int> hostIndices;
3002 			gpuIndices.copyToHost(hostIndices);
3003 
3004 			b3AlignedObjectArray<b3Contact4> hostContacts;
3005 			if (nContacts)
3006 			{
3007 				contactOut->copyToHost(hostContacts);
3008 			}
3009 			hostContacts.resize(maxContactCapacity);
3010 			int nGlobalContactsOut = nContacts;
3011 
3012 			for (int i = 0; i < nPairs; i++)
3013 			{
3014 				int bodyIndexA = hostPairs[i].x;
3015 				int bodyIndexB = hostPairs[i].y;
3016 				int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3017 				int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3018 
3019 				int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
3020 				int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
3021 
3022 				hostHasSepAxis[i] = 0;
3023 
3024 				//once the broadphase avoids static-static pairs, we can remove this test
3025 				if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0))
3026 				{
3027 					continue;
3028 				}
3029 
3030 				if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL))
3031 				{
3032 					continue;
3033 				}
3034 
3035 				float dmin = FLT_MAX;
3036 
3037 				b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA];
3038 				b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB];
3039 				b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos;
3040 				b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos;
3041 				b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat;
3042 				b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat;
3043 
3044 				if (useGjk)
3045 				{
3046 					//first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR
3047 					{
3048 						b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3049 						b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3050 						b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3051 						b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3052 						b3Vector3 DeltaC2 = c0 - c1;
3053 
3054 						b3Vector3 sepAxis;
3055 
3056 						bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3057 																&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3058 																&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3059 																&sepAxis, &dmin);
3060 
3061 						if (hasSepAxisA)
3062 						{
3063 							bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3064 																	&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3065 																	&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3066 																	&sepAxis, &dmin);
3067 							if (hasSepAxisB)
3068 							{
3069 								bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3070 																				&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3071 																				&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3072 																				&sepAxis, &dmin, false);
3073 
3074 								if (hasEdgeEdge)
3075 								{
3076 									hostHasSepAxis[i] = 1;
3077 									hostSepAxis[i] = sepAxis;
3078 									hostSepAxis[i].w = dmin;
3079 								}
3080 							}
3081 						}
3082 					}
3083 
3084 					if (hostHasSepAxis[i])
3085 					{
3086 						int pairIndex = i;
3087 
3088 						bool useMpr = true;
3089 						if (useMpr)
3090 						{
3091 							int res = 0;
3092 							float depth = 0.f;
3093 							b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0);
3094 							b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0);
3095 
3096 							float depthOut;
3097 							b3Vector3 dirOut;
3098 							b3Vector3 posOut;
3099 
3100 							//res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut);
3101 							res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut);
3102 							depth = depthOut;
3103 							sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z);
3104 							resultPointOnBWorld = posOut;
3105 							//hostHasSepAxis[i] = 0;
3106 
3107 							if (res == 0)
3108 							{
3109 								//add point?
3110 								//printf("depth = %f\n",depth);
3111 								//printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]);
3112 								//qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]);
3113 
3114 								float dist = 0.f;
3115 
3116 								const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex];
3117 								const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex];
3118 
3119 								if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3120 								{
3121 									if (depth > dist)
3122 									{
3123 										float diff = depth - dist;
3124 
3125 										static float maxdiff = 0.f;
3126 										if (maxdiff < diff)
3127 										{
3128 											maxdiff = diff;
3129 											printf("maxdiff = %20.10f\n", maxdiff);
3130 										}
3131 									}
3132 								}
3133 								if (depth > dmin)
3134 								{
3135 									b3Vector3 oldAxis = hostSepAxis[i];
3136 									depth = dmin;
3137 									sepAxis2 = oldAxis;
3138 								}
3139 
3140 								if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))
3141 								{
3142 									if (depth > dist)
3143 									{
3144 										float diff = depth - dist;
3145 										//printf("?diff  = %f\n",diff );
3146 										static float maxdiff = 0.f;
3147 										if (maxdiff < diff)
3148 										{
3149 											maxdiff = diff;
3150 											printf("maxdiff = %20.10f\n", maxdiff);
3151 										}
3152 									}
3153 									//this is used for SAT
3154 									//hostHasSepAxis[i] = 1;
3155 									//hostSepAxis[i] = sepAxis2;
3156 
3157 									//add contact point
3158 
3159 									//int contactIndex = nGlobalContactsOut;
3160 									b3Contact4& newContact = hostContacts.at(nGlobalContactsOut);
3161 									nGlobalContactsOut++;
3162 									newContact.m_batchIdx = 0;  //i;
3163 									newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA;
3164 									newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;
3165 
3166 									newContact.m_frictionCoeffCmp = 45874;
3167 									newContact.m_restituitionCoeffCmp = 0;
3168 
3169 									static float maxDepth = 0.f;
3170 
3171 									if (depth > maxDepth)
3172 									{
3173 										maxDepth = depth;
3174 										printf("MPR maxdepth = %f\n", maxDepth);
3175 									}
3176 
3177 									resultPointOnBWorld.w = -depth;
3178 									newContact.m_worldPosB[0] = resultPointOnBWorld;
3179 									//b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2;
3180 									newContact.m_worldNormalOnB = sepAxis2;
3181 									newContact.m_worldNormalOnB.w = (b3Scalar)1;
3182 								}
3183 								else
3184 								{
3185 									printf("rejected\n");
3186 								}
3187 							}
3188 						}
3189 						else
3190 						{
3191 							//int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
3192 							b3AlignedObjectArray<b3Contact4> oldHostContacts;
3193 							int result;
3194 							result = computeContactConvexConvex2(  //hostPairs,
3195 								pairIndex,
3196 								bodyIndexA, bodyIndexB,
3197 								collidableIndexA, collidableIndexB,
3198 								hostBodyBuf,
3199 								hostCollidables,
3200 								hostConvexShapeData,
3201 								hostVertices,
3202 								hostUniqueEdges,
3203 								hostIndices,
3204 								hostFaces,
3205 								hostContacts,
3206 								nGlobalContactsOut,
3207 								maxContactCapacity,
3208 								oldHostContacts
3209 								//hostHasSepAxis,
3210 								//hostSepAxis
3211 
3212 							);
3213 						}  //mpr
3214 					}      //hostHasSepAxis[i] = 1;
3215 				}
3216 				else
3217 				{
3218 					b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;
3219 					b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);
3220 					b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter;
3221 					b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);
3222 					b3Vector3 DeltaC2 = c0 - c1;
3223 
3224 					b3Vector3 sepAxis;
3225 
3226 					bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3227 															&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3228 															&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3229 															&sepAxis, &dmin);
3230 
3231 					if (hasSepAxisA)
3232 					{
3233 						bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2,
3234 																&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3235 																&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3236 																&sepAxis, &dmin);
3237 						if (hasSepAxisB)
3238 						{
3239 							bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2,
3240 																			&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3241 																			&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0),
3242 																			&sepAxis, &dmin, true);
3243 
3244 							if (hasEdgeEdge)
3245 							{
3246 								hostHasSepAxis[i] = 1;
3247 								hostSepAxis[i] = sepAxis;
3248 							}
3249 						}
3250 					}
3251 				}
3252 			}
3253 
3254 			if (useGjkContacts)  //nGlobalContactsOut>0)
3255 			{
3256 				//printf("nGlobalContactsOut=%d\n",nGlobalContactsOut);
3257 				nContacts = nGlobalContactsOut;
3258 				contactOut->copyFromHost(hostContacts);
3259 
3260 				m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3261 			}
3262 
3263 			m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3264 			m_sepNormals.copyFromHost(hostSepAxis);
3265 
3266 			/*
3267              //double-check results from GPU (comment-out the 'else' so both paths are executed
3268             b3AlignedObjectArray<int> checkHasSepAxis;
3269             m_hasSeparatingNormals.copyToHost(checkHasSepAxis);
3270             static int frameCount = 0;
3271             frameCount++;
3272             for (int i=0;i<nPairs;i++)
3273             {
3274                 if (hostHasSepAxis[i] != checkHasSepAxis[i])
3275                 {
3276                     printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n",
3277                            frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]);
3278                 }
3279             }
3280             //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);
3281             //    m_sepNormals.copyFromHost(hostSepAxis);
3282             */
3283 		}
3284 
3285 		numCompoundPairs = m_numCompoundPairsOut.at(0);
3286 		bool useGpuFindCompoundPairs = true;
3287 		if (useGpuFindCompoundPairs)
3288 		{
3289 			B3_PROFILE("findCompoundPairsKernel");
3290 			b3BufferInfoCL bInfo[] =
3291 				{
3292 					b3BufferInfoCL(pairs->getBufferCL(), true),
3293 					b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3294 					b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3295 					b3BufferInfoCL(convexData.getBufferCL(), true),
3296 					b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3297 					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3298 					b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3299 					b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3300 					b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true),
3301 					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3302 					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()),
3303 					b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()),
3304 					b3BufferInfoCL(subTreesGPU->getBufferCL()),
3305 					b3BufferInfoCL(treeNodesGPU->getBufferCL()),
3306 					b3BufferInfoCL(bvhInfo->getBufferCL())};
3307 
3308 			b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel");
3309 			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3310 			launcher.setConst(nPairs);
3311 			launcher.setConst(compoundPairCapacity);
3312 
3313 			int num = nPairs;
3314 			launcher.launch1D(num);
3315 			clFinish(m_queue);
3316 
3317 			numCompoundPairs = m_numCompoundPairsOut.at(0);
3318 			//printf("numCompoundPairs =%d\n",numCompoundPairs );
3319 			if (numCompoundPairs)
3320 			{
3321 				//printf("numCompoundPairs=%d\n",numCompoundPairs);
3322 			}
3323 		}
3324 		else
3325 		{
3326 			b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3327 			treeNodesGPU->copyToHost(treeNodesCPU);
3328 
3329 			b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3330 			subTreesGPU->copyToHost(subTreesCPU);
3331 
3332 			b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3333 			bvhInfo->copyToHost(bvhInfoCPU);
3334 
3335 			b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3336 			clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3337 
3338 			b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace;
3339 			clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace);
3340 
3341 			b3AlignedObjectArray<b3Int4> hostPairs;
3342 			pairs->copyToHost(hostPairs);
3343 
3344 			b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3345 			bodyBuf->copyToHost(hostBodyBuf);
3346 
3347 			b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut;
3348 			cpuCompoundPairsOut.resize(compoundPairCapacity);
3349 
3350 			b3AlignedObjectArray<b3Collidable> hostCollidables;
3351 			gpuCollidables.copyToHost(hostCollidables);
3352 
3353 			b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3354 			gpuChildShapes.copyToHost(cpuChildShapes);
3355 
3356 			b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3357 			convexData.copyToHost(hostConvexData);
3358 
3359 			b3AlignedObjectArray<b3Vector3> hostVertices;
3360 			gpuVertices.copyToHost(hostVertices);
3361 
3362 			for (int pairIndex = 0; pairIndex < nPairs; pairIndex++)
3363 			{
3364 				int bodyIndexA = hostPairs[pairIndex].x;
3365 				int bodyIndexB = hostPairs[pairIndex].y;
3366 				int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
3367 				int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
3368 				if (cpuChildShapes.size())
3369 				{
3370 					findCompoundPairsKernel(
3371 						pairIndex,
3372 						bodyIndexA,
3373 						bodyIndexB,
3374 						collidableIndexA,
3375 						collidableIndexB,
3376 						&hostBodyBuf[0],
3377 						&hostCollidables[0],
3378 						&hostConvexData[0],
3379 						hostVertices,
3380 						hostAabbsWorldSpace,
3381 						hostAabbsLocalSpace,
3382 						&cpuChildShapes[0],
3383 						&cpuCompoundPairsOut[0],
3384 						&numCompoundPairs,
3385 						compoundPairCapacity,
3386 						treeNodesCPU,
3387 						subTreesCPU,
3388 						bvhInfoCPU);
3389 				}
3390 			}
3391 
3392 			m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true);
3393 			if (numCompoundPairs)
3394 			{
3395 				b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0];
3396 				m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true);
3397 			}
3398 			//cpuCompoundPairsOut
3399 		}
3400 		if (numCompoundPairs)
3401 		{
3402 			printf("numCompoundPairs=%d\n", numCompoundPairs);
3403 		}
3404 
3405 		if (numCompoundPairs > compoundPairCapacity)
3406 		{
3407 			b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity);
3408 			numCompoundPairs = compoundPairCapacity;
3409 		}
3410 
3411 		m_gpuCompoundPairs.resize(numCompoundPairs);
3412 		m_gpuHasCompoundSepNormals.resize(numCompoundPairs);
3413 		m_gpuCompoundSepNormals.resize(numCompoundPairs);
3414 
3415 		if (numCompoundPairs)
3416 		{
3417 			B3_PROFILE("processCompoundPairsPrimitivesKernel");
3418 			b3BufferInfoCL bInfo[] =
3419 				{
3420 					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3421 					b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3422 					b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3423 					b3BufferInfoCL(convexData.getBufferCL(), true),
3424 					b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3425 					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3426 					b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3427 					b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3428 					b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3429 					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3430 					b3BufferInfoCL(contactOut->getBufferCL()),
3431 					b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3432 
3433 			b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel");
3434 			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3435 			launcher.setConst(numCompoundPairs);
3436 			launcher.setConst(maxContactCapacity);
3437 
3438 			int num = numCompoundPairs;
3439 			launcher.launch1D(num);
3440 			clFinish(m_queue);
3441 			nContacts = m_totalContactsOut.at(0);
3442 			//printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts);
3443 			if (nContacts > maxContactCapacity)
3444 			{
3445 				b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3446 				nContacts = maxContactCapacity;
3447 			}
3448 		}
3449 
3450 		if (numCompoundPairs)
3451 		{
3452 			B3_PROFILE("processCompoundPairsKernel");
3453 			b3BufferInfoCL bInfo[] =
3454 				{
3455 					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
3456 					b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3457 					b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3458 					b3BufferInfoCL(convexData.getBufferCL(), true),
3459 					b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3460 					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3461 					b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3462 					b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3463 					b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3464 					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3465 					b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()),
3466 					b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())};
3467 
3468 			b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel");
3469 			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3470 			launcher.setConst(numCompoundPairs);
3471 
3472 			int num = numCompoundPairs;
3473 			launcher.launch1D(num);
3474 			clFinish(m_queue);
3475 		}
3476 
3477 		//printf("numConcave  = %d\n",numConcave);
3478 
3479 		//		printf("hostNormals.size()=%d\n",hostNormals.size());
3480 		//int numPairs = pairCount.at(0);
3481 	}
3482 	int vertexFaceCapacity = 64;
3483 
3484 	{
3485 		//now perform the tree query on GPU
3486 
3487 		if (treeNodesGPU->size() && treeNodesGPU->size())
3488 		{
3489 			if (bvhTraversalKernelGPU)
3490 			{
3491 				B3_PROFILE("m_bvhTraversalKernel");
3492 
3493 				numConcavePairs = m_numConcavePairsOut.at(0);
3494 
3495 				b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel");
3496 				launcher.setBuffer(pairs->getBufferCL());
3497 				launcher.setBuffer(bodyBuf->getBufferCL());
3498 				launcher.setBuffer(gpuCollidables.getBufferCL());
3499 				launcher.setBuffer(clAabbsWorldSpace.getBufferCL());
3500 				launcher.setBuffer(triangleConvexPairsOut.getBufferCL());
3501 				launcher.setBuffer(m_numConcavePairsOut.getBufferCL());
3502 				launcher.setBuffer(subTreesGPU->getBufferCL());
3503 				launcher.setBuffer(treeNodesGPU->getBufferCL());
3504 				launcher.setBuffer(bvhInfo->getBufferCL());
3505 
3506 				launcher.setConst(nPairs);
3507 				launcher.setConst(maxTriConvexPairCapacity);
3508 				int num = nPairs;
3509 				launcher.launch1D(num);
3510 				clFinish(m_queue);
3511 				numConcavePairs = m_numConcavePairsOut.at(0);
3512 			}
3513 			else
3514 			{
3515 				b3AlignedObjectArray<b3Int4> hostPairs;
3516 				pairs->copyToHost(hostPairs);
3517 				b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3518 				bodyBuf->copyToHost(hostBodyBuf);
3519 				b3AlignedObjectArray<b3Collidable> hostCollidables;
3520 				gpuCollidables.copyToHost(hostCollidables);
3521 				b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3522 				clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3523 
3524 				//int maxTriConvexPairCapacity,
3525 				b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3526 				triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3527 
3528 				//int numTriConvexPairsOutHost=0;
3529 				numConcavePairs = 0;
3530 				//m_numConcavePairsOut
3531 
3532 				b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
3533 				treeNodesGPU->copyToHost(treeNodesCPU);
3534 				b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
3535 				subTreesGPU->copyToHost(subTreesCPU);
3536 				b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
3537 				bvhInfo->copyToHost(bvhInfoCPU);
3538 				//compute it...
3539 
3540 				volatile int hostNumConcavePairsOut = 0;
3541 
3542 				//
3543 				for (int i = 0; i < nPairs; i++)
3544 				{
3545 					b3BvhTraversal(&hostPairs.at(0),
3546 								   &hostBodyBuf.at(0),
3547 								   &hostCollidables.at(0),
3548 								   &hostAabbsWorldSpace.at(0),
3549 								   &triangleConvexPairsOutHost.at(0),
3550 								   &hostNumConcavePairsOut,
3551 								   &subTreesCPU.at(0),
3552 								   &treeNodesCPU.at(0),
3553 								   &bvhInfoCPU.at(0),
3554 								   nPairs,
3555 								   maxTriConvexPairCapacity,
3556 								   i);
3557 				}
3558 				numConcavePairs = hostNumConcavePairsOut;
3559 
3560 				if (hostNumConcavePairsOut)
3561 				{
3562 					triangleConvexPairsOutHost.resize(hostNumConcavePairsOut);
3563 					triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost);
3564 				}
3565 				//
3566 
3567 				m_numConcavePairsOut.resize(0);
3568 				m_numConcavePairsOut.push_back(numConcavePairs);
3569 			}
3570 
3571 			//printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity);
3572 
3573 			if (numConcavePairs > maxTriConvexPairCapacity)
3574 			{
3575 				static int exceeded_maxTriConvexPairCapacity_count = 0;
3576 				b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n",
3577 						numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++);
3578 				numConcavePairs = maxTriConvexPairCapacity;
3579 			}
3580 			triangleConvexPairsOut.resize(numConcavePairs);
3581 
3582 			if (numConcavePairs)
3583 			{
3584 				clippingFacesOutGPU.resize(numConcavePairs);
3585 				worldNormalsAGPU.resize(numConcavePairs);
3586 				worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3587 				worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs));
3588 
3589 				if (findConcaveSeparatingAxisKernelGPU)
3590 				{
3591 					/*
3592 					m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3593 						clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3594 						worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3595 						worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3596 						worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3597 					*/
3598 
3599 					//now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
3600 					if (splitSearchSepAxisConcave)
3601 					{
3602 						//printf("numConcavePairs = %d\n",numConcavePairs);
3603 						m_dmins.resize(numConcavePairs);
3604 						{
3605 							B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel");
3606 							b3BufferInfoCL bInfo[] = {
3607 								b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3608 								b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3609 								b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3610 								b3BufferInfoCL(convexData.getBufferCL(), true),
3611 								b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3612 								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3613 								b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3614 								b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3615 								b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3616 								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3617 								b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3618 								b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3619 								b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3620 								b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3621 								b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3622 								b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3623 								b3BufferInfoCL(m_dmins.getBufferCL())};
3624 
3625 							b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel");
3626 							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3627 							launcher.setConst(vertexFaceCapacity);
3628 							launcher.setConst(numConcavePairs);
3629 
3630 							int num = numConcavePairs;
3631 							launcher.launch1D(num);
3632 							clFinish(m_queue);
3633 						}
3634 						//                        numConcavePairs = 0;
3635 						if (1)
3636 						{
3637 							B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel");
3638 							b3BufferInfoCL bInfo[] = {
3639 								b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3640 								b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3641 								b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3642 								b3BufferInfoCL(convexData.getBufferCL(), true),
3643 								b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3644 								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3645 								b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3646 								b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3647 								b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3648 								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3649 								b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3650 								b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3651 								b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3652 								b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3653 								b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3654 								b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3655 								b3BufferInfoCL(m_dmins.getBufferCL())};
3656 
3657 							b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel");
3658 							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3659 							launcher.setConst(vertexFaceCapacity);
3660 							launcher.setConst(numConcavePairs);
3661 
3662 							int num = numConcavePairs;
3663 							launcher.launch1D(num);
3664 							clFinish(m_queue);
3665 						}
3666 
3667 						// numConcavePairs = 0;
3668 					}
3669 					else
3670 					{
3671 						B3_PROFILE("findConcaveSeparatingAxisKernel");
3672 						b3BufferInfoCL bInfo[] = {
3673 							b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3674 							b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3675 							b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3676 							b3BufferInfoCL(convexData.getBufferCL(), true),
3677 							b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3678 							b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3679 							b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3680 							b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3681 							b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
3682 							b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3683 							b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3684 							b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3685 							b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3686 							b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3687 							b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3688 							b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
3689 
3690 						b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel");
3691 						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3692 						launcher.setConst(vertexFaceCapacity);
3693 						launcher.setConst(numConcavePairs);
3694 
3695 						int num = numConcavePairs;
3696 						launcher.launch1D(num);
3697 						clFinish(m_queue);
3698 					}
3699 				}
3700 				else
3701 				{
3702 					b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3703 					b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3704 					b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3705 					b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3706 					b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3707 
3708 					b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3709 					triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3710 					//triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
3711 					b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3712 					bodyBuf->copyToHost(hostBodyBuf);
3713 					b3AlignedObjectArray<b3Collidable> hostCollidables;
3714 					gpuCollidables.copyToHost(hostCollidables);
3715 					b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
3716 					clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
3717 
3718 					b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
3719 					convexData.copyToHost(hostConvexData);
3720 
3721 					b3AlignedObjectArray<b3Vector3> hostVertices;
3722 					gpuVertices.copyToHost(hostVertices);
3723 
3724 					b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
3725 					gpuUniqueEdges.copyToHost(hostUniqueEdges);
3726 					b3AlignedObjectArray<b3GpuFace> hostFaces;
3727 					gpuFaces.copyToHost(hostFaces);
3728 					b3AlignedObjectArray<int> hostIndices;
3729 					gpuIndices.copyToHost(hostIndices);
3730 					b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
3731 					gpuChildShapes.copyToHost(cpuChildShapes);
3732 
3733 					b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3734 					m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3735 					concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size());
3736 
3737 					b3GpuChildShape* childShapePointerCPU = 0;
3738 					if (cpuChildShapes.size())
3739 						childShapePointerCPU = &cpuChildShapes.at(0);
3740 
3741 					clippingFacesOutCPU.resize(clippingFacesOutGPU.size());
3742 					worldVertsA1CPU.resize(worldVertsA1GPU.size());
3743 					worldNormalsACPU.resize(worldNormalsAGPU.size());
3744 					worldVertsB1CPU.resize(worldVertsB1GPU.size());
3745 
3746 					for (int i = 0; i < numConcavePairs; i++)
3747 					{
3748 						b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0),
3749 														  &hostBodyBuf.at(0),
3750 														  &hostCollidables.at(0),
3751 														  &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0),
3752 														  &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU,
3753 														  &hostAabbsWorldSpace.at(0),
3754 														  &concaveSepNormalsHost.at(0),
3755 														  &clippingFacesOutCPU.at(0),
3756 														  &worldVertsA1CPU.at(0),
3757 														  &worldNormalsACPU.at(0),
3758 														  &worldVertsB1CPU.at(0),
3759 														  &concaveHasSeparatingNormalsCPU.at(0),
3760 														  vertexFaceCapacity,
3761 														  numConcavePairs, i);
3762 					};
3763 
3764 					m_concaveSepNormals.copyFromHost(concaveSepNormalsHost);
3765 					m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
3766 					clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3767 					worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
3768 					worldNormalsAGPU.copyFromHost(worldNormalsACPU);
3769 					worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
3770 				}
3771 				//							b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
3772 				//						m_concaveSepNormals.copyToHost(cpuCompoundSepNormals);
3773 				//					b3AlignedObjectArray<b3Int4> cpuConcavePairs;
3774 				//				triangleConvexPairsOut.copyToHost(cpuConcavePairs);
3775 			}
3776 		}
3777 	}
3778 
3779 	if (numConcavePairs)
3780 	{
3781 		if (numConcavePairs)
3782 		{
3783 			B3_PROFILE("findConcaveSphereContactsKernel");
3784 			nContacts = m_totalContactsOut.at(0);
3785 			//				printf("nContacts1 = %d\n",nContacts);
3786 			b3BufferInfoCL bInfo[] = {
3787 				b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()),
3788 				b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3789 				b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
3790 				b3BufferInfoCL(convexData.getBufferCL(), true),
3791 				b3BufferInfoCL(gpuVertices.getBufferCL(), true),
3792 				b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
3793 				b3BufferInfoCL(gpuFaces.getBufferCL(), true),
3794 				b3BufferInfoCL(gpuIndices.getBufferCL(), true),
3795 				b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true),
3796 				b3BufferInfoCL(contactOut->getBufferCL()),
3797 				b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3798 
3799 			b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel");
3800 			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3801 
3802 			launcher.setConst(numConcavePairs);
3803 			launcher.setConst(maxContactCapacity);
3804 
3805 			int num = numConcavePairs;
3806 			launcher.launch1D(num);
3807 			clFinish(m_queue);
3808 			nContacts = m_totalContactsOut.at(0);
3809 			//printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts);
3810 
3811 			//printf("nContacts2 = %d\n",nContacts);
3812 
3813 			if (nContacts >= maxContactCapacity)
3814 			{
3815 				b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
3816 				nContacts = maxContactCapacity;
3817 			}
3818 		}
3819 	}
3820 
3821 #ifdef __APPLE__
3822 	bool contactClippingOnGpu = true;
3823 #else
3824 	bool contactClippingOnGpu = true;
3825 #endif
3826 
3827 	if (contactClippingOnGpu)
3828 	{
3829 		m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3830 		//		printf("nContacts3 = %d\n",nContacts);
3831 
3832 		//B3_PROFILE("clipHullHullKernel");
3833 
3834 		bool breakupConcaveConvexKernel = true;
3835 
3836 #ifdef __APPLE__
3837 		//actually, some Apple OpenCL platform/device combinations work fine...
3838 		breakupConcaveConvexKernel = true;
3839 #endif
3840 		//concave-convex contact clipping
3841 		if (numConcavePairs)
3842 		{
3843 			//			printf("numConcavePairs = %d\n", numConcavePairs);
3844 			//		nContacts = m_totalContactsOut.at(0);
3845 			//	printf("nContacts before = %d\n", nContacts);
3846 
3847 			if (breakupConcaveConvexKernel)
3848 			{
3849 				worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs);
3850 
3851 				//clipFacesAndFindContacts
3852 
3853 				if (clipConcaveFacesAndFindContactsCPU)
3854 				{
3855 					b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3856 					b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
3857 					b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
3858 					b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
3859 
3860 					clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3861 					worldVertsA1GPU.copyToHost(worldVertsA1CPU);
3862 					worldNormalsAGPU.copyToHost(worldNormalsACPU);
3863 					worldVertsB1GPU.copyToHost(worldVertsB1CPU);
3864 
3865 					b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3866 					m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3867 
3868 					b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3869 					m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3870 
3871 					b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3872 					worldVertsB2CPU.resize(worldVertsB2GPU.size());
3873 
3874 					for (int i = 0; i < numConcavePairs; i++)
3875 					{
3876 						clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0),
3877 													   &concaveHasSeparatingNormalsCPU.at(0),
3878 													   &clippingFacesOutCPU.at(0),
3879 													   &worldVertsA1CPU.at(0),
3880 													   &worldNormalsACPU.at(0),
3881 													   &worldVertsB1CPU.at(0),
3882 													   &worldVertsB2CPU.at(0),
3883 													   vertexFaceCapacity,
3884 													   i);
3885 					}
3886 
3887 					clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
3888 					worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
3889 				}
3890 				else
3891 				{
3892 					if (1)
3893 					{
3894 						B3_PROFILE("clipFacesAndFindContacts");
3895 						//nContacts = m_totalContactsOut.at(0);
3896 						//int h = m_hasSeparatingNormals.at(0);
3897 						//int4 p = clippingFacesOutGPU.at(0);
3898 						b3BufferInfoCL bInfo[] = {
3899 							b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3900 							b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3901 							b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3902 							b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
3903 							b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
3904 							b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
3905 							b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
3906 						b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
3907 						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3908 						launcher.setConst(vertexFaceCapacity);
3909 
3910 						launcher.setConst(numConcavePairs);
3911 						int debugMode = 0;
3912 						launcher.setConst(debugMode);
3913 						int num = numConcavePairs;
3914 						launcher.launch1D(num);
3915 						clFinish(m_queue);
3916 						//int bla = m_totalContactsOut.at(0);
3917 					}
3918 				}
3919 				//contactReduction
3920 				{
3921 					int newContactCapacity = nContacts + numConcavePairs;
3922 					contactOut->reserve(newContactCapacity);
3923 					if (reduceConcaveContactsOnGPU)
3924 					{
3925 						//						printf("newReservation = %d\n",newReservation);
3926 						{
3927 							B3_PROFILE("newContactReductionKernel");
3928 							b3BufferInfoCL bInfo[] =
3929 								{
3930 									b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
3931 									b3BufferInfoCL(bodyBuf->getBufferCL(), true),
3932 									b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
3933 									b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()),
3934 									b3BufferInfoCL(contactOut->getBufferCL()),
3935 									b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
3936 									b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
3937 									b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
3938 
3939 							b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
3940 							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
3941 							launcher.setConst(vertexFaceCapacity);
3942 							launcher.setConst(newContactCapacity);
3943 							launcher.setConst(numConcavePairs);
3944 							int num = numConcavePairs;
3945 
3946 							launcher.launch1D(num);
3947 						}
3948 						nContacts = m_totalContactsOut.at(0);
3949 						contactOut->resize(nContacts);
3950 
3951 						//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
3952 					}
3953 					else
3954 					{
3955 						volatile int nGlobalContactsOut = nContacts;
3956 						b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
3957 						triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
3958 						b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
3959 						bodyBuf->copyToHost(hostBodyBuf);
3960 
3961 						b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;
3962 						m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);
3963 
3964 						b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
3965 						m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
3966 
3967 						b3AlignedObjectArray<b3Contact4> hostContacts;
3968 						if (nContacts)
3969 						{
3970 							contactOut->copyToHost(hostContacts);
3971 						}
3972 						hostContacts.resize(newContactCapacity);
3973 
3974 						b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
3975 						b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
3976 
3977 						clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
3978 						worldVertsB2GPU.copyToHost(worldVertsB2CPU);
3979 
3980 						for (int i = 0; i < numConcavePairs; i++)
3981 						{
3982 							b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0),
3983 														&hostBodyBuf.at(0),
3984 														&concaveSepNormalsHost.at(0),
3985 														&concaveHasSeparatingNormalsCPU.at(0),
3986 														&hostContacts.at(0),
3987 														&clippingFacesOutCPU.at(0),
3988 														&worldVertsB2CPU.at(0),
3989 														&nGlobalContactsOut,
3990 														vertexFaceCapacity,
3991 														newContactCapacity,
3992 														numConcavePairs,
3993 														i);
3994 						}
3995 
3996 						nContacts = nGlobalContactsOut;
3997 						m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
3998 						//						nContacts = m_totalContactsOut.at(0);
3999 						//contactOut->resize(nContacts);
4000 						hostContacts.resize(nContacts);
4001 						//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4002 						contactOut->copyFromHost(hostContacts);
4003 					}
4004 				}
4005 				//re-use?
4006 			}
4007 			else
4008 			{
4009 				B3_PROFILE("clipHullHullConcaveConvexKernel");
4010 				nContacts = m_totalContactsOut.at(0);
4011 				int newContactCapacity = contactOut->capacity();
4012 
4013 				//printf("contactOut5 = %d\n",nContacts);
4014 				b3BufferInfoCL bInfo[] = {
4015 					b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true),
4016 					b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4017 					b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4018 					b3BufferInfoCL(convexData.getBufferCL(), true),
4019 					b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4020 					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4021 					b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4022 					b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4023 					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4024 					b3BufferInfoCL(m_concaveSepNormals.getBufferCL()),
4025 					b3BufferInfoCL(contactOut->getBufferCL()),
4026 					b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4027 				b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel");
4028 				launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4029 				launcher.setConst(newContactCapacity);
4030 				launcher.setConst(numConcavePairs);
4031 				int num = numConcavePairs;
4032 				launcher.launch1D(num);
4033 				clFinish(m_queue);
4034 				nContacts = m_totalContactsOut.at(0);
4035 				contactOut->resize(nContacts);
4036 				//printf("contactOut6 = %d\n",nContacts);
4037 				b3AlignedObjectArray<b3Contact4> cpuContacts;
4038 				contactOut->copyToHost(cpuContacts);
4039 			}
4040 			//			printf("nContacts after = %d\n", nContacts);
4041 		}  //numConcavePairs
4042 
4043 		//convex-convex contact clipping
4044 
4045 		bool breakupKernel = false;
4046 
4047 #ifdef __APPLE__
4048 		breakupKernel = true;
4049 #endif
4050 
4051 #ifdef CHECK_ON_HOST
4052 		bool computeConvexConvex = false;
4053 #else
4054 		bool computeConvexConvex = true;
4055 #endif  //CHECK_ON_HOST
4056 		if (computeConvexConvex)
4057 		{
4058 			B3_PROFILE("clipHullHullKernel");
4059 			if (breakupKernel)
4060 			{
4061 				worldVertsB1GPU.resize(vertexFaceCapacity * nPairs);
4062 				clippingFacesOutGPU.resize(nPairs);
4063 				worldNormalsAGPU.resize(nPairs);
4064 				worldVertsA1GPU.resize(vertexFaceCapacity * nPairs);
4065 				worldVertsB2GPU.resize(vertexFaceCapacity * nPairs);
4066 
4067 				if (findConvexClippingFacesGPU)
4068 				{
4069 					B3_PROFILE("findClippingFacesKernel");
4070 					b3BufferInfoCL bInfo[] = {
4071 						b3BufferInfoCL(pairs->getBufferCL(), true),
4072 						b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4073 						b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4074 						b3BufferInfoCL(convexData.getBufferCL(), true),
4075 						b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4076 						b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4077 						b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4078 						b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4079 						b3BufferInfoCL(m_sepNormals.getBufferCL()),
4080 						b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4081 						b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4082 						b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4083 						b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4084 						b3BufferInfoCL(worldVertsB1GPU.getBufferCL())};
4085 
4086 					b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel");
4087 					launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4088 					launcher.setConst(vertexFaceCapacity);
4089 					launcher.setConst(nPairs);
4090 					int num = nPairs;
4091 					launcher.launch1D(num);
4092 					clFinish(m_queue);
4093 				}
4094 				else
4095 				{
4096 					float minDist = -1e30f;
4097 					float maxDist = 0.02f;
4098 
4099 					b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;
4100 					convexData.copyToHost(hostConvexData);
4101 					b3AlignedObjectArray<b3Collidable> hostCollidables;
4102 					gpuCollidables.copyToHost(hostCollidables);
4103 
4104 					b3AlignedObjectArray<int> hostHasSepNormals;
4105 					m_hasSeparatingNormals.copyToHost(hostHasSepNormals);
4106 					b3AlignedObjectArray<b3Vector3> cpuSepNormals;
4107 					m_sepNormals.copyToHost(cpuSepNormals);
4108 
4109 					b3AlignedObjectArray<b3Int4> hostPairs;
4110 					pairs->copyToHost(hostPairs);
4111 					b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4112 					bodyBuf->copyToHost(hostBodyBuf);
4113 
4114 					//worldVertsB1GPU.resize(vertexFaceCapacity*nPairs);
4115 					b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4116 					worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4117 
4118 					b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
4119 					clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);
4120 
4121 					b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4122 					worldNormalsACPU.resize(nPairs);
4123 
4124 					b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4125 					worldVertsA1CPU.resize(worldVertsA1GPU.size());
4126 
4127 					b3AlignedObjectArray<b3Vector3> hostVertices;
4128 					gpuVertices.copyToHost(hostVertices);
4129 					b3AlignedObjectArray<b3GpuFace> hostFaces;
4130 					gpuFaces.copyToHost(hostFaces);
4131 					b3AlignedObjectArray<int> hostIndices;
4132 					gpuIndices.copyToHost(hostIndices);
4133 
4134 					for (int i = 0; i < nPairs; i++)
4135 					{
4136 						int bodyIndexA = hostPairs[i].x;
4137 						int bodyIndexB = hostPairs[i].y;
4138 
4139 						int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx;
4140 						int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;
4141 
4142 						int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex;
4143 						int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex;
4144 
4145 						if (hostHasSepNormals[i])
4146 						{
4147 							b3FindClippingFaces(cpuSepNormals[i],
4148 												&hostConvexData[shapeIndexA],
4149 												&hostConvexData[shapeIndexB],
4150 												hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat,
4151 												hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat,
4152 												&worldVertsA1CPU.at(0), &worldNormalsACPU.at(0),
4153 												&worldVertsB1CPU.at(0),
4154 												vertexFaceCapacity, minDist, maxDist,
4155 												&hostVertices.at(0), &hostFaces.at(0),
4156 												&hostIndices.at(0),
4157 												&hostVertices.at(0), &hostFaces.at(0),
4158 												&hostIndices.at(0), &clippingFacesOutCPU.at(0), i);
4159 						}
4160 					}
4161 
4162 					clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
4163 					worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
4164 					worldNormalsAGPU.copyFromHost(worldNormalsACPU);
4165 					worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
4166 				}
4167 
4168 				///clip face B against face A, reduce contacts and append them to a global contact array
4169 				if (1)
4170 				{
4171 					if (clipConvexFacesAndFindContactsCPU)
4172 					{
4173 						//b3AlignedObjectArray<b3Int4> hostPairs;
4174 						//pairs->copyToHost(hostPairs);
4175 
4176 						b3AlignedObjectArray<b3Vector3> hostSepNormals;
4177 						m_sepNormals.copyToHost(hostSepNormals);
4178 						b3AlignedObjectArray<int> hostHasSepAxis;
4179 						m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4180 
4181 						b3AlignedObjectArray<b3Int4> hostClippingFaces;
4182 						clippingFacesOutGPU.copyToHost(hostClippingFaces);
4183 						b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4184 						worldVertsB2CPU.resize(vertexFaceCapacity * nPairs);
4185 
4186 						b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
4187 						worldVertsA1GPU.copyToHost(worldVertsA1CPU);
4188 						b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
4189 						worldNormalsAGPU.copyToHost(worldNormalsACPU);
4190 
4191 						b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
4192 						worldVertsB1GPU.copyToHost(worldVertsB1CPU);
4193 
4194 						/*
4195 					  __global const b3Float4* separatingNormals,
4196                                                    __global const int* hasSeparatingAxis,
4197                                                    __global b3Int4* clippingFacesOut,
4198                                                    __global b3Float4* worldVertsA1,
4199                                                    __global b3Float4* worldNormalsA1,
4200                                                    __global b3Float4* worldVertsB1,
4201                                                    __global b3Float4* worldVertsB2,
4202                                                     int vertexFaceCapacity,
4203 															int pairIndex
4204 					*/
4205 						for (int i = 0; i < nPairs; i++)
4206 						{
4207 							clipFacesAndFindContactsKernel(
4208 								&hostSepNormals.at(0),
4209 								&hostHasSepAxis.at(0),
4210 								&hostClippingFaces.at(0),
4211 								&worldVertsA1CPU.at(0),
4212 								&worldNormalsACPU.at(0),
4213 								&worldVertsB1CPU.at(0),
4214 								&worldVertsB2CPU.at(0),
4215 
4216 								vertexFaceCapacity,
4217 								i);
4218 						}
4219 
4220 						clippingFacesOutGPU.copyFromHost(hostClippingFaces);
4221 						worldVertsB2GPU.copyFromHost(worldVertsB2CPU);
4222 					}
4223 					else
4224 					{
4225 						B3_PROFILE("clipFacesAndFindContacts");
4226 						//nContacts = m_totalContactsOut.at(0);
4227 						//int h = m_hasSeparatingNormals.at(0);
4228 						//int4 p = clippingFacesOutGPU.at(0);
4229 						b3BufferInfoCL bInfo[] = {
4230 							b3BufferInfoCL(m_sepNormals.getBufferCL()),
4231 							b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4232 							b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4233 							b3BufferInfoCL(worldVertsA1GPU.getBufferCL()),
4234 							b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
4235 							b3BufferInfoCL(worldVertsB1GPU.getBufferCL()),
4236 							b3BufferInfoCL(worldVertsB2GPU.getBufferCL())};
4237 
4238 						b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts");
4239 						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4240 						launcher.setConst(vertexFaceCapacity);
4241 
4242 						launcher.setConst(nPairs);
4243 						int debugMode = 0;
4244 						launcher.setConst(debugMode);
4245 						int num = nPairs;
4246 						launcher.launch1D(num);
4247 						clFinish(m_queue);
4248 					}
4249 
4250 					{
4251 						nContacts = m_totalContactsOut.at(0);
4252 						//printf("nContacts = %d\n",nContacts);
4253 
4254 						int newContactCapacity = nContacts + nPairs;
4255 						contactOut->reserve(newContactCapacity);
4256 
4257 						if (reduceConvexContactsOnGPU)
4258 						{
4259 							{
4260 								B3_PROFILE("newContactReductionKernel");
4261 								b3BufferInfoCL bInfo[] =
4262 									{
4263 										b3BufferInfoCL(pairs->getBufferCL(), true),
4264 										b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4265 										b3BufferInfoCL(m_sepNormals.getBufferCL()),
4266 										b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4267 										b3BufferInfoCL(contactOut->getBufferCL()),
4268 										b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()),
4269 										b3BufferInfoCL(worldVertsB2GPU.getBufferCL()),
4270 										b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4271 
4272 								b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel");
4273 								launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4274 								launcher.setConst(vertexFaceCapacity);
4275 								launcher.setConst(newContactCapacity);
4276 								launcher.setConst(nPairs);
4277 								int num = nPairs;
4278 
4279 								launcher.launch1D(num);
4280 							}
4281 							nContacts = m_totalContactsOut.at(0);
4282 							contactOut->resize(nContacts);
4283 						}
4284 						else
4285 						{
4286 							volatile int nGlobalContactsOut = nContacts;
4287 							b3AlignedObjectArray<b3Int4> hostPairs;
4288 							pairs->copyToHost(hostPairs);
4289 							b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;
4290 							bodyBuf->copyToHost(hostBodyBuf);
4291 							b3AlignedObjectArray<b3Vector3> hostSepNormals;
4292 							m_sepNormals.copyToHost(hostSepNormals);
4293 							b3AlignedObjectArray<int> hostHasSepAxis;
4294 							m_hasSeparatingNormals.copyToHost(hostHasSepAxis);
4295 							b3AlignedObjectArray<b3Contact4> hostContactsOut;
4296 							contactOut->copyToHost(hostContactsOut);
4297 							hostContactsOut.resize(newContactCapacity);
4298 
4299 							b3AlignedObjectArray<b3Int4> hostClippingFaces;
4300 							clippingFacesOutGPU.copyToHost(hostClippingFaces);
4301 							b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;
4302 							worldVertsB2GPU.copyToHost(worldVertsB2CPU);
4303 
4304 							for (int i = 0; i < nPairs; i++)
4305 							{
4306 								b3NewContactReductionKernel(&hostPairs.at(0),
4307 															&hostBodyBuf.at(0),
4308 															&hostSepNormals.at(0),
4309 															&hostHasSepAxis.at(0),
4310 															&hostContactsOut.at(0),
4311 															&hostClippingFaces.at(0),
4312 															&worldVertsB2CPU.at(0),
4313 															&nGlobalContactsOut,
4314 															vertexFaceCapacity,
4315 															newContactCapacity,
4316 															nPairs,
4317 															i);
4318 							}
4319 
4320 							nContacts = nGlobalContactsOut;
4321 							m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);
4322 							hostContactsOut.resize(nContacts);
4323 							//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);
4324 							contactOut->copyFromHost(hostContactsOut);
4325 						}
4326 						//                    b3Contact4 pt = contactOut->at(0);
4327 						//                  printf("nContacts = %d\n",nContacts);
4328 					}
4329 				}
4330 			}
4331 			else  //breakupKernel
4332 			{
4333 				if (nPairs)
4334 				{
4335 					b3BufferInfoCL bInfo[] = {
4336 						b3BufferInfoCL(pairs->getBufferCL(), true),
4337 						b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4338 						b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4339 						b3BufferInfoCL(convexData.getBufferCL(), true),
4340 						b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4341 						b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4342 						b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4343 						b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4344 						b3BufferInfoCL(m_sepNormals.getBufferCL()),
4345 						b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()),
4346 						b3BufferInfoCL(contactOut->getBufferCL()),
4347 						b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4348 					b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel");
4349 					launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4350 					launcher.setConst(nPairs);
4351 					launcher.setConst(maxContactCapacity);
4352 
4353 					int num = nPairs;
4354 					launcher.launch1D(num);
4355 					clFinish(m_queue);
4356 
4357 					nContacts = m_totalContactsOut.at(0);
4358 					if (nContacts >= maxContactCapacity)
4359 					{
4360 						b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity);
4361 						nContacts = maxContactCapacity;
4362 					}
4363 					contactOut->resize(nContacts);
4364 				}
4365 			}
4366 
4367 			int nCompoundsPairs = m_gpuCompoundPairs.size();
4368 
4369 			if (nCompoundsPairs)
4370 			{
4371 				b3BufferInfoCL bInfo[] = {
4372 					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true),
4373 					b3BufferInfoCL(bodyBuf->getBufferCL(), true),
4374 					b3BufferInfoCL(gpuCollidables.getBufferCL(), true),
4375 					b3BufferInfoCL(convexData.getBufferCL(), true),
4376 					b3BufferInfoCL(gpuVertices.getBufferCL(), true),
4377 					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true),
4378 					b3BufferInfoCL(gpuFaces.getBufferCL(), true),
4379 					b3BufferInfoCL(gpuIndices.getBufferCL(), true),
4380 					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true),
4381 					b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true),
4382 					b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true),
4383 					b3BufferInfoCL(contactOut->getBufferCL()),
4384 					b3BufferInfoCL(m_totalContactsOut.getBufferCL())};
4385 				b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel");
4386 				launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
4387 				launcher.setConst(nCompoundsPairs);
4388 				launcher.setConst(maxContactCapacity);
4389 
4390 				int num = nCompoundsPairs;
4391 				launcher.launch1D(num);
4392 				clFinish(m_queue);
4393 
4394 				nContacts = m_totalContactsOut.at(0);
4395 				if (nContacts > maxContactCapacity)
4396 				{
4397 					b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);
4398 					nContacts = maxContactCapacity;
4399 				}
4400 				contactOut->resize(nContacts);
4401 			}  //if nCompoundsPairs
4402 		}
4403 	}  //contactClippingOnGpu
4404 
4405 	//printf("nContacts end = %d\n",nContacts);
4406 
4407 	//printf("frameCount = %d\n",frameCount++);
4408 }
4409