1//keep this enum in sync with the CPU version (in btCollidable.h)
2//written by Erwin Coumans
3
4
5#define SHAPE_CONVEX_HULL 3
6#define SHAPE_CONCAVE_TRIMESH 5
7#define TRIANGLE_NUM_CONVEX_FACES 5
8#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6
9
10#define B3_MAX_STACK_DEPTH 256
11
12
13typedef unsigned int u32;
14
15///keep this in sync with btCollidable.h
16typedef struct
17{
18	union {
19		int m_numChildShapes;
20		int m_bvhIndex;
21	};
22	union
23	{
24		float m_radius;
25		int	m_compoundBvhIndex;
26	};
27
28	int m_shapeType;
29	int m_shapeIndex;
30
31} btCollidableGpu;
32
33#define MAX_NUM_PARTS_IN_BITS 10
34
35///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.
36///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).
37typedef struct
38{
39	//12 bytes
40	unsigned short int	m_quantizedAabbMin[3];
41	unsigned short int	m_quantizedAabbMax[3];
42	//4 bytes
43	int	m_escapeIndexOrTriangleIndex;
44} b3QuantizedBvhNode;
45
46typedef struct
47{
48	float4		m_aabbMin;
49	float4		m_aabbMax;
50	float4		m_quantization;
51	int			m_numNodes;
52	int			m_numSubTrees;
53	int			m_nodeOffset;
54	int			m_subTreeOffset;
55
56} b3BvhInfo;
57
58
59int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)
60{
61	unsigned int x=0;
62	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
63	// Get only the lower bits where the triangle index is stored
64	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
65}
66
67int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
68{
69	unsigned int x=0;
70	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
71	// Get only the lower bits where the triangle index is stored
72	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));
73}
74
75int isLeafNode(const b3QuantizedBvhNode* rootNode)
76{
77	//skipindex is negative (internal node), triangleindex >=0 (leafnode)
78	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
79}
80
81int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)
82{
83	//skipindex is negative (internal node), triangleindex >=0 (leafnode)
84	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;
85}
86
87int getEscapeIndex(const b3QuantizedBvhNode* rootNode)
88{
89	return -rootNode->m_escapeIndexOrTriangleIndex;
90}
91
92int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)
93{
94	return -rootNode->m_escapeIndexOrTriangleIndex;
95}
96
97
98typedef struct
99{
100	//12 bytes
101	unsigned short int	m_quantizedAabbMin[3];
102	unsigned short int	m_quantizedAabbMax[3];
103	//4 bytes, points to the root of the subtree
104	int			m_rootNodeIndex;
105	//4 bytes
106	int			m_subtreeSize;
107	int			m_padding[3];
108} b3BvhSubtreeInfo;
109
110
111
112
113
114
115
116typedef struct
117{
118	float4	m_childPosition;
119	float4	m_childOrientation;
120	int m_shapeIndex;
121	int m_unused0;
122	int m_unused1;
123	int m_unused2;
124} btGpuChildShape;
125
126
127typedef struct
128{
129	float4 m_pos;
130	float4 m_quat;
131	float4 m_linVel;
132	float4 m_angVel;
133
134	u32 m_collidableIdx;
135	float m_invMass;
136	float m_restituitionCoeff;
137	float m_frictionCoeff;
138} BodyData;
139
140
141typedef struct
142{
143	float4		m_localCenter;
144	float4		m_extents;
145	float4		mC;
146	float4		mE;
147
148	float			m_radius;
149	int	m_faceOffset;
150	int m_numFaces;
151	int	m_numVertices;
152
153	int m_vertexOffset;
154	int	m_uniqueEdgesOffset;
155	int	m_numUniqueEdges;
156	int m_unused;
157} ConvexPolyhedronCL;
158
159typedef struct
160{
161	union
162	{
163		float4	m_min;
164		float   m_minElems[4];
165		int			m_minIndices[4];
166	};
167	union
168	{
169		float4	m_max;
170		float   m_maxElems[4];
171		int			m_maxIndices[4];
172	};
173} btAabbCL;
174
175#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
176#include "Bullet3Common/shared/b3Int2.h"
177
178
179
180typedef struct
181{
182	float4 m_plane;
183	int m_indexOffset;
184	int m_numIndices;
185} btGpuFace;
186
187#define make_float4 (float4)
188
189
190__inline
191float4 cross3(float4 a, float4 b)
192{
193	return cross(a,b);
194
195
196//	float4 a1 = make_float4(a.xyz,0.f);
197//	float4 b1 = make_float4(b.xyz,0.f);
198
199//	return cross(a1,b1);
200
201//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);
202
203	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);
204
205	//return c;
206}
207
208__inline
209float dot3F4(float4 a, float4 b)
210{
211	float4 a1 = make_float4(a.xyz,0.f);
212	float4 b1 = make_float4(b.xyz,0.f);
213	return dot(a1, b1);
214}
215
216__inline
217float4 fastNormalize4(float4 v)
218{
219	v = make_float4(v.xyz,0.f);
220	return fast_normalize(v);
221}
222
223
224///////////////////////////////////////
225//	Quaternion
226///////////////////////////////////////
227
228typedef float4 Quaternion;
229
230__inline
231Quaternion qtMul(Quaternion a, Quaternion b);
232
233__inline
234Quaternion qtNormalize(Quaternion in);
235
236__inline
237float4 qtRotate(Quaternion q, float4 vec);
238
239__inline
240Quaternion qtInvert(Quaternion q);
241
242
243
244
245__inline
246Quaternion qtMul(Quaternion a, Quaternion b)
247{
248	Quaternion ans;
249	ans = cross3( a, b );
250	ans += a.w*b+b.w*a;
251//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
252	ans.w = a.w*b.w - dot3F4(a, b);
253	return ans;
254}
255
256__inline
257Quaternion qtNormalize(Quaternion in)
258{
259	return fastNormalize4(in);
260//	in /= length( in );
261//	return in;
262}
263__inline
264float4 qtRotate(Quaternion q, float4 vec)
265{
266	Quaternion qInv = qtInvert( q );
267	float4 vcpy = vec;
268	vcpy.w = 0.f;
269	float4 out = qtMul(qtMul(q,vcpy),qInv);
270	return out;
271}
272
273__inline
274Quaternion qtInvert(Quaternion q)
275{
276	return (Quaternion)(-q.xyz, q.w);
277}
278
279__inline
280float4 qtInvRotate(const Quaternion q, float4 vec)
281{
282	return qtRotate( qtInvert( q ), vec );
283}
284
285__inline
286float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)
287{
288	return qtRotate( *orientation, *p ) + (*translation);
289}
290
291
292
293__inline
294float4 normalize3(const float4 a)
295{
296	float4 n = make_float4(a.x, a.y, a.z, 0.f);
297	return fastNormalize4( n );
298}
299
300inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn,
301const float4* dir, const float4* vertices, float* min, float* max)
302{
303	min[0] = FLT_MAX;
304	max[0] = -FLT_MAX;
305	int numVerts = hull->m_numVertices;
306
307	const float4 localDir = qtInvRotate(orn,*dir);
308	float offset = dot(pos,*dir);
309	for(int i=0;i<numVerts;i++)
310	{
311		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
312		if(dp < min[0])
313			min[0] = dp;
314		if(dp > max[0])
315			max[0] = dp;
316	}
317	if(min[0]>max[0])
318	{
319		float tmp = min[0];
320		min[0] = max[0];
321		max[0] = tmp;
322	}
323	min[0] += offset;
324	max[0] += offset;
325}
326
327inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn,
328const float4* dir, __global const float4* vertices, float* min, float* max)
329{
330	min[0] = FLT_MAX;
331	max[0] = -FLT_MAX;
332	int numVerts = hull->m_numVertices;
333
334	const float4 localDir = qtInvRotate(orn,*dir);
335	float offset = dot(pos,*dir);
336	for(int i=0;i<numVerts;i++)
337	{
338		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);
339		if(dp < min[0])
340			min[0] = dp;
341		if(dp > max[0])
342			max[0] = dp;
343	}
344	if(min[0]>max[0])
345	{
346		float tmp = min[0];
347		min[0] = max[0];
348		max[0] = tmp;
349	}
350	min[0] += offset;
351	max[0] += offset;
352}
353
354inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
355	const float4 posA,const float4 ornA,
356	const float4 posB,const float4 ornB,
357	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)
358{
359	float Min0,Max0;
360	float Min1,Max1;
361	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);
362	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);
363
364	if(Max0<Min1 || Max1<Min0)
365		return false;
366
367	float d0 = Max0 - Min1;
368	float d1 = Max1 - Min0;
369	*depth = d0<d1 ? d0:d1;
370	return true;
371}
372
373
374
375
376inline bool IsAlmostZero(const float4 v)
377{
378	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)
379		return false;
380	return true;
381}
382
383
384
385bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
386	const float4 posA1,
387	const float4 ornA,
388	const float4 posB1,
389	const float4 ornB,
390	const float4 DeltaC2,
391
392	const float4* verticesA,
393	const float4* uniqueEdgesA,
394	const btGpuFace* facesA,
395	const int*  indicesA,
396
397	__global const float4* verticesB,
398	__global const float4* uniqueEdgesB,
399	__global const btGpuFace* facesB,
400	__global const int*  indicesB,
401	float4* sep,
402	float* dmin)
403{
404
405
406	float4 posA = posA1;
407	posA.w = 0.f;
408	float4 posB = posB1;
409	posB.w = 0.f;
410	int curPlaneTests=0;
411	{
412		int numFacesA = hullA->m_numFaces;
413		// Test normals from hullA
414		for(int i=0;i<numFacesA;i++)
415		{
416			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
417			float4 faceANormalWS = qtRotate(ornA,normal);
418			if (dot3F4(DeltaC2,faceANormalWS)<0)
419				faceANormalWS*=-1.f;
420			curPlaneTests++;
421			float d;
422			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))
423				return false;
424			if(d<*dmin)
425			{
426				*dmin = d;
427				*sep = faceANormalWS;
428			}
429		}
430	}
431	if((dot3F4(-DeltaC2,*sep))>0.0f)
432	{
433		*sep = -(*sep);
434	}
435	return true;
436}
437
438bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB,
439	const float4 posA1,
440	const float4 ornA,
441	const float4 posB1,
442	const float4 ornB,
443	const float4 DeltaC2,
444	__global const float4* verticesA,
445	__global const float4* uniqueEdgesA,
446	__global const btGpuFace* facesA,
447	__global const int*  indicesA,
448	const float4* verticesB,
449	const float4* uniqueEdgesB,
450	const btGpuFace* facesB,
451	const int*  indicesB,
452	float4* sep,
453	float* dmin)
454{
455
456
457	float4 posA = posA1;
458	posA.w = 0.f;
459	float4 posB = posB1;
460	posB.w = 0.f;
461	int curPlaneTests=0;
462	{
463		int numFacesA = hullA->m_numFaces;
464		// Test normals from hullA
465		for(int i=0;i<numFacesA;i++)
466		{
467			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;
468			float4 faceANormalWS = qtRotate(ornA,normal);
469			if (dot3F4(DeltaC2,faceANormalWS)<0)
470				faceANormalWS *= -1.f;
471			curPlaneTests++;
472			float d;
473			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))
474				return false;
475			if(d<*dmin)
476			{
477				*dmin = d;
478				*sep = faceANormalWS;
479			}
480		}
481	}
482	if((dot3F4(-DeltaC2,*sep))>0.0f)
483	{
484		*sep = -(*sep);
485	}
486	return true;
487}
488
489
490
491bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
492	const float4 posA1,
493	const float4 ornA,
494	const float4 posB1,
495	const float4 ornB,
496	const float4 DeltaC2,
497	const float4* verticesA,
498	const float4* uniqueEdgesA,
499	const btGpuFace* facesA,
500	const int*  indicesA,
501	__global const float4* verticesB,
502	__global const float4* uniqueEdgesB,
503	__global const btGpuFace* facesB,
504	__global const int*  indicesB,
505		float4* sep,
506	float* dmin)
507{
508
509
510	float4 posA = posA1;
511	posA.w = 0.f;
512	float4 posB = posB1;
513	posB.w = 0.f;
514
515	int curPlaneTests=0;
516
517	int curEdgeEdge = 0;
518	// Test edges
519	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
520	{
521		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];
522		float4 edge0World = qtRotate(ornA,edge0);
523
524		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
525		{
526			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];
527			float4 edge1World = qtRotate(ornB,edge1);
528
529
530			float4 crossje = cross3(edge0World,edge1World);
531
532			curEdgeEdge++;
533			if(!IsAlmostZero(crossje))
534			{
535				crossje = normalize3(crossje);
536				if (dot3F4(DeltaC2,crossje)<0)
537					crossje *= -1.f;
538
539				float dist;
540				bool result = true;
541				{
542					float Min0,Max0;
543					float Min1,Max1;
544					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);
545					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);
546
547					if(Max0<Min1 || Max1<Min0)
548						result = false;
549
550					float d0 = Max0 - Min1;
551					float d1 = Max1 - Min0;
552					dist = d0<d1 ? d0:d1;
553					result = true;
554
555				}
556
557
558				if(dist<*dmin)
559				{
560					*dmin = dist;
561					*sep = crossje;
562				}
563			}
564		}
565
566	}
567
568
569	if((dot3F4(-DeltaC2,*sep))>0.0f)
570	{
571		*sep = -(*sep);
572	}
573	return true;
574}
575
576
577inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
578	const float4 posA,const float4 ornA,
579	const float4 posB,const float4 ornB,
580	float4* sep_axis, __global const float4* vertices,float* depth)
581{
582	float Min0,Max0;
583	float Min1,Max1;
584	project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);
585	project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);
586
587	if(Max0<Min1 || Max1<Min0)
588		return false;
589
590	float d0 = Max0 - Min1;
591	float d1 = Max1 - Min0;
592	*depth = d0<d1 ? d0:d1;
593	return true;
594}
595
596
597bool findSeparatingAxis(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
598	const float4 posA1,
599	const float4 ornA,
600	const float4 posB1,
601	const float4 ornB,
602	const float4 DeltaC2,
603	__global const float4* vertices,
604	__global const float4* uniqueEdges,
605	__global const btGpuFace* faces,
606	__global const int*  indices,
607	float4* sep,
608	float* dmin)
609{
610
611
612	float4 posA = posA1;
613	posA.w = 0.f;
614	float4 posB = posB1;
615	posB.w = 0.f;
616
617	int curPlaneTests=0;
618
619	{
620		int numFacesA = hullA->m_numFaces;
621		// Test normals from hullA
622		for(int i=0;i<numFacesA;i++)
623		{
624			const float4 normal = faces[hullA->m_faceOffset+i].m_plane;
625			float4 faceANormalWS = qtRotate(ornA,normal);
626
627			if (dot3F4(DeltaC2,faceANormalWS)<0)
628				faceANormalWS*=-1.f;
629
630			curPlaneTests++;
631
632			float d;
633			if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))
634				return false;
635
636			if(d<*dmin)
637			{
638				*dmin = d;
639				*sep = faceANormalWS;
640			}
641		}
642	}
643
644
645		if((dot3F4(-DeltaC2,*sep))>0.0f)
646		{
647			*sep = -(*sep);
648		}
649
650	return true;
651}
652
653
654
655
656bool findSeparatingAxisUnitSphere(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
657	const float4 posA1,
658	const float4 ornA,
659	const float4 posB1,
660	const float4 ornB,
661	const float4 DeltaC2,
662	__global const float4* vertices,
663	__global const float4* unitSphereDirections,
664	int numUnitSphereDirections,
665	float4* sep,
666	float* dmin)
667{
668
669	float4 posA = posA1;
670	posA.w = 0.f;
671	float4 posB = posB1;
672	posB.w = 0.f;
673
674	int curPlaneTests=0;
675
676	int curEdgeEdge = 0;
677	// Test unit sphere directions
678	for (int i=0;i<numUnitSphereDirections;i++)
679	{
680
681		float4 crossje;
682		crossje = unitSphereDirections[i];
683
684		if (dot3F4(DeltaC2,crossje)>0)
685			crossje *= -1.f;
686		{
687			float dist;
688			bool result = true;
689			float Min0,Max0;
690			float Min1,Max1;
691			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);
692			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);
693
694			if(Max0<Min1 || Max1<Min0)
695				return false;
696
697			float d0 = Max0 - Min1;
698			float d1 = Max1 - Min0;
699			dist = d0<d1 ? d0:d1;
700			result = true;
701
702			if(dist<*dmin)
703			{
704				*dmin = dist;
705				*sep = crossje;
706			}
707		}
708	}
709
710
711	if((dot3F4(-DeltaC2,*sep))>0.0f)
712	{
713		*sep = -(*sep);
714	}
715	return true;
716}
717
718
719bool findSeparatingAxisEdgeEdge(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB,
720	const float4 posA1,
721	const float4 ornA,
722	const float4 posB1,
723	const float4 ornB,
724	const float4 DeltaC2,
725	__global const float4* vertices,
726	__global const float4* uniqueEdges,
727	__global const btGpuFace* faces,
728	__global const int*  indices,
729	float4* sep,
730	float* dmin)
731{
732
733
734	float4 posA = posA1;
735	posA.w = 0.f;
736	float4 posB = posB1;
737	posB.w = 0.f;
738
739	int curPlaneTests=0;
740
741	int curEdgeEdge = 0;
742	// Test edges
743	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)
744	{
745		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];
746		float4 edge0World = qtRotate(ornA,edge0);
747
748		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)
749		{
750			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];
751			float4 edge1World = qtRotate(ornB,edge1);
752
753
754			float4 crossje = cross3(edge0World,edge1World);
755
756			curEdgeEdge++;
757			if(!IsAlmostZero(crossje))
758			{
759				crossje = normalize3(crossje);
760				if (dot3F4(DeltaC2,crossje)<0)
761					crossje*=-1.f;
762
763				float dist;
764				bool result = true;
765				{
766					float Min0,Max0;
767					float Min1,Max1;
768					project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);
769					project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);
770
771					if(Max0<Min1 || Max1<Min0)
772						return false;
773
774					float d0 = Max0 - Min1;
775					float d1 = Max1 - Min0;
776					dist = d0<d1 ? d0:d1;
777					result = true;
778
779				}
780
781
782				if(dist<*dmin)
783				{
784					*dmin = dist;
785					*sep = crossje;
786				}
787			}
788		}
789
790	}
791
792
793	if((dot3F4(-DeltaC2,*sep))>0.0f)
794	{
795		*sep = -(*sep);
796	}
797	return true;
798}
799
800
801// work-in-progress
802__kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,
803																					__global const BodyData* rigidBodies,
804																					__global const btCollidableGpu* collidables,
805																					__global const ConvexPolyhedronCL* convexShapes,
806																					__global const float4* vertices,
807																					__global const float4* uniqueEdges,
808																					__global const btGpuFace* faces,
809																					__global const int* indices,
810																					__global btAabbCL* aabbs,
811																					__global const btGpuChildShape* gpuChildShapes,
812																					__global volatile float4* gpuCompoundSepNormalsOut,
813																					__global volatile int* gpuHasCompoundSepNormalsOut,
814																					int numCompoundPairs
815																					)
816{
817
818	int i = get_global_id(0);
819	if (i<numCompoundPairs)
820	{
821		int bodyIndexA = gpuCompoundPairs[i].x;
822		int bodyIndexB = gpuCompoundPairs[i].y;
823
824		int childShapeIndexA = gpuCompoundPairs[i].z;
825		int childShapeIndexB = gpuCompoundPairs[i].w;
826
827		int collidableIndexA = -1;
828		int collidableIndexB = -1;
829
830		float4 ornA = rigidBodies[bodyIndexA].m_quat;
831		float4 posA = rigidBodies[bodyIndexA].m_pos;
832
833		float4 ornB = rigidBodies[bodyIndexB].m_quat;
834		float4 posB = rigidBodies[bodyIndexB].m_pos;
835
836		if (childShapeIndexA >= 0)
837		{
838			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
839			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
840			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
841			float4 newPosA = qtRotate(ornA,childPosA)+posA;
842			float4 newOrnA = qtMul(ornA,childOrnA);
843			posA = newPosA;
844			ornA = newOrnA;
845		} else
846		{
847			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
848		}
849
850		if (childShapeIndexB>=0)
851		{
852			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
853			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
854			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
855			float4 newPosB = transform(&childPosB,&posB,&ornB);
856			float4 newOrnB = qtMul(ornB,childOrnB);
857			posB = newPosB;
858			ornB = newOrnB;
859		} else
860		{
861			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
862		}
863
864		gpuHasCompoundSepNormalsOut[i] = 0;
865
866		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
867		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
868
869		int shapeTypeA = collidables[collidableIndexA].m_shapeType;
870		int shapeTypeB = collidables[collidableIndexB].m_shapeType;
871
872
873		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))
874		{
875			return;
876		}
877
878		int hasSeparatingAxis = 5;
879
880		int numFacesA = convexShapes[shapeIndexA].m_numFaces;
881		float dmin = FLT_MAX;
882		posA.w = 0.f;
883		posB.w = 0.f;
884		float4 c0local = convexShapes[shapeIndexA].m_localCenter;
885		float4 c0 = transform(&c0local, &posA, &ornA);
886		float4 c1local = convexShapes[shapeIndexB].m_localCenter;
887		float4 c1 = transform(&c1local,&posB,&ornB);
888		const float4 DeltaC2 = c0 - c1;
889		float4 sepNormal = make_float4(1,0,0,0);
890		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
891		hasSeparatingAxis = 4;
892		if (!sepA)
893		{
894			hasSeparatingAxis = 0;
895		} else
896		{
897			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
898
899			if (!sepB)
900			{
901				hasSeparatingAxis = 0;
902			} else//(!sepB)
903			{
904				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);
905				if (sepEE)
906				{
907						gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);
908						gpuHasCompoundSepNormalsOut[i] = 1;
909				}//sepEE
910			}//(!sepB)
911		}//(!sepA)
912
913
914	}
915
916}
917
918
919inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)
920{
921		b3Float4 vecOut;
922		vecOut = b3MakeFloat4(
923			(float)(vecIn[0]) / (quantization.x),
924			(float)(vecIn[1]) / (quantization.y),
925			(float)(vecIn[2]) / (quantization.z),
926			0.f);
927
928		vecOut += bvhAabbMin;
929		return vecOut;
930}
931
932inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)
933{
934		b3Float4 vecOut;
935		vecOut = b3MakeFloat4(
936			(float)(vecIn[0]) / (quantization.x),
937			(float)(vecIn[1]) / (quantization.y),
938			(float)(vecIn[2]) / (quantization.z),
939			0.f);
940
941		vecOut += bvhAabbMin;
942		return vecOut;
943}
944
945
946// work-in-progress
947__kernel void   findCompoundPairsKernel( __global const int4* pairs,
948	__global const BodyData* rigidBodies,
949	__global const btCollidableGpu* collidables,
950	__global const ConvexPolyhedronCL* convexShapes,
951	__global const float4* vertices,
952	__global const float4* uniqueEdges,
953	__global const btGpuFace* faces,
954	__global const int* indices,
955	__global b3Aabb_t* aabbLocalSpace,
956	__global const btGpuChildShape* gpuChildShapes,
957	__global volatile int4* gpuCompoundPairsOut,
958	__global volatile int* numCompoundPairsOut,
959	__global const b3BvhSubtreeInfo* subtrees,
960	__global const b3QuantizedBvhNode* quantizedNodes,
961	__global const b3BvhInfo* bvhInfos,
962	int numPairs,
963	int maxNumCompoundPairsCapacity
964	)
965{
966
967	int i = get_global_id(0);
968
969	if (i<numPairs)
970	{
971		int bodyIndexA = pairs[i].x;
972		int bodyIndexB = pairs[i].y;
973
974		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
975		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
976
977		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
978		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
979
980
981		//once the broadphase avoids static-static pairs, we can remove this test
982		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
983		{
984			return;
985		}
986
987		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
988		{
989			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;
990			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;
991			int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;
992			int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;
993			int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;
994
995
996			int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;
997
998			float4 posA = rigidBodies[bodyIndexA].m_pos;
999			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;
1000
1001			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;
1002			float4 posB = rigidBodies[bodyIndexB].m_pos;
1003
1004
1005			for (int p=0;p<numSubTreesA;p++)
1006			{
1007				b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];
1008				//bvhInfos[bvhA].m_quantization
1009				b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1010				b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1011
1012				b3Float4 aabbAMinOut,aabbAMaxOut;
1013				float margin=0.f;
1014				b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);
1015
1016				for (int q=0;q<numSubTreesB;q++)
1017				{
1018					b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];
1019
1020					b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1021					b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1022
1023					b3Float4 aabbBMinOut,aabbBMaxOut;
1024					float margin=0.f;
1025					b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);
1026
1027
1028
1029					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
1030					if (aabbOverlap)
1031					{
1032
1033						int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;
1034						int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;
1035
1036						int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;
1037						int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;
1038
1039
1040						b3Int2 nodeStack[B3_MAX_STACK_DEPTH];
1041						b3Int2 node0;
1042						node0.x = startNodeIndexA;
1043						node0.y = startNodeIndexB;
1044						int maxStackDepth = B3_MAX_STACK_DEPTH;
1045						int depth=0;
1046						nodeStack[depth++]=node0;
1047
1048						do
1049						{
1050							b3Int2 node = nodeStack[--depth];
1051
1052							b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1053							b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);
1054
1055							b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1056							b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);
1057
1058							float margin=0.f;
1059							b3Float4 aabbAMinOut,aabbAMaxOut;
1060							b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);
1061
1062							b3Float4 aabbBMinOut,aabbBMaxOut;
1063							b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);
1064
1065
1066							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);
1067							if (nodeOverlap)
1068							{
1069								bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);
1070								bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);
1071								bool isInternalA = !isLeafA;
1072								bool isInternalB = !isLeafB;
1073
1074								//fail, even though it might hit two leaf nodes
1075								if (depth+4>maxStackDepth && !(isLeafA && isLeafB))
1076								{
1077									//printf("Error: traversal exceeded maxStackDepth");
1078									continue;
1079								}
1080
1081								if(isInternalA)
1082								{
1083									int nodeAleftChild = node.x+1;
1084									bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);
1085									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);
1086
1087									if(isInternalB)
1088									{
1089										int nodeBleftChild = node.y+1;
1090										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);
1091										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);
1092
1093										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);
1094										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);
1095										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);
1096										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);
1097									}
1098									else
1099									{
1100										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);
1101										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);
1102									}
1103								}
1104								else
1105								{
1106									if(isInternalB)
1107									{
1108										int nodeBleftChild = node.y+1;
1109										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);
1110										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);
1111										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);
1112										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);
1113									}
1114									else
1115									{
1116										int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1117										if (compoundPairIdx<maxNumCompoundPairsCapacity)
1118										{
1119											int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);
1120											int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);
1121											gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
1122										}
1123									}
1124								}
1125							}
1126						} while (depth);
1127					}
1128				}
1129			}
1130
1131			return;
1132		}
1133
1134
1135
1136
1137
1138		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1139		{
1140
1141			if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1142			{
1143
1144				int numChildrenA = collidables[collidableIndexA].m_numChildShapes;
1145				for (int c=0;c<numChildrenA;c++)
1146				{
1147					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;
1148					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;
1149
1150					float4 posA = rigidBodies[bodyIndexA].m_pos;
1151					float4 ornA = rigidBodies[bodyIndexA].m_quat;
1152					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;
1153					float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;
1154					float4 newPosA = qtRotate(ornA,childPosA)+posA;
1155					float4 newOrnA = qtMul(ornA,childOrnA);
1156
1157					int shapeIndexA = collidables[childColIndexA].m_shapeIndex;
1158					b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];
1159					float margin = 0.f;
1160
1161					b3Float4 aabbAMinWS;
1162					b3Float4 aabbAMaxWS;
1163
1164					b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,
1165						newPosA,
1166						newOrnA,
1167						&aabbAMinWS,&aabbAMaxWS);
1168
1169
1170					if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1171					{
1172						int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1173						for (int b=0;b<numChildrenB;b++)
1174						{
1175							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
1176							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1177							float4 ornB = rigidBodies[bodyIndexB].m_quat;
1178							float4 posB = rigidBodies[bodyIndexB].m_pos;
1179							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1180							float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1181							float4 newPosB = transform(&childPosB,&posB,&ornB);
1182							float4 newOrnB = qtMul(ornB,childOrnB);
1183
1184							int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1185							b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];
1186
1187							b3Float4 aabbBMinWS;
1188							b3Float4 aabbBMaxWS;
1189
1190							b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,
1191								newPosB,
1192								newOrnB,
1193								&aabbBMinWS,&aabbBMaxWS);
1194
1195
1196
1197							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);
1198							if (aabbOverlap)
1199							{
1200								int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1201								float dmin = FLT_MAX;
1202								float4 posA = newPosA;
1203								posA.w = 0.f;
1204								float4 posB = newPosB;
1205								posB.w = 0.f;
1206								float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1207								float4 ornA = newOrnA;
1208								float4 c0 = transform(&c0local, &posA, &ornA);
1209								float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1210								float4 ornB =newOrnB;
1211								float4 c1 = transform(&c1local,&posB,&ornB);
1212								const float4 DeltaC2 = c0 - c1;
1213
1214								{//
1215									int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1216									if (compoundPairIdx<maxNumCompoundPairsCapacity)
1217									{
1218										gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);
1219									}
1220								}//
1221							}//fi(1)
1222						} //for (int b=0
1223					}//if (collidables[collidableIndexB].
1224					else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1225					{
1226						if (1)
1227						{
1228							int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1229							float dmin = FLT_MAX;
1230							float4 posA = newPosA;
1231							posA.w = 0.f;
1232							float4 posB = rigidBodies[bodyIndexB].m_pos;
1233							posB.w = 0.f;
1234							float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1235							float4 ornA = newOrnA;
1236							float4 c0 = transform(&c0local, &posA, &ornA);
1237							float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1238							float4 ornB = rigidBodies[bodyIndexB].m_quat;
1239							float4 c1 = transform(&c1local,&posB,&ornB);
1240							const float4 DeltaC2 = c0 - c1;
1241
1242							{
1243								int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1244								if (compoundPairIdx<maxNumCompoundPairsCapacity)
1245								{
1246									gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);
1247								}//if (compoundPairIdx<maxNumCompoundPairsCapacity)
1248							}//
1249						}//fi (1)
1250					}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1251				}//for (int b=0;b<numChildrenB;b++)
1252				return;
1253			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1254			if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)
1255				&& (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1256			{
1257				int numChildrenB = collidables[collidableIndexB].m_numChildShapes;
1258				for (int b=0;b<numChildrenB;b++)
1259				{
1260					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;
1261					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1262					float4 ornB = rigidBodies[bodyIndexB].m_quat;
1263					float4 posB = rigidBodies[bodyIndexB].m_pos;
1264					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1265					float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1266					float4 newPosB = qtRotate(ornB,childPosB)+posB;
1267					float4 newOrnB = qtMul(ornB,childOrnB);
1268
1269					int shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1270
1271
1272					//////////////////////////////////////
1273
1274					if (1)
1275					{
1276						int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1277						float dmin = FLT_MAX;
1278						float4 posA = rigidBodies[bodyIndexA].m_pos;
1279						posA.w = 0.f;
1280						float4 posB = newPosB;
1281						posB.w = 0.f;
1282						float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1283						float4 ornA = rigidBodies[bodyIndexA].m_quat;
1284						float4 c0 = transform(&c0local, &posA, &ornA);
1285						float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1286						float4 ornB =newOrnB;
1287						float4 c1 = transform(&c1local,&posB,&ornB);
1288						const float4 DeltaC2 = c0 - c1;
1289						{//
1290							int compoundPairIdx = atomic_inc(numCompoundPairsOut);
1291							if (compoundPairIdx<maxNumCompoundPairsCapacity)
1292							{
1293								gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);
1294							}//fi (compoundPairIdx<maxNumCompoundPairsCapacity)
1295						}//
1296					}//fi (1)
1297				}//for (int b=0;b<numChildrenB;b++)
1298				return;
1299			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1300			return;
1301		}//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))
1302	}//i<numPairs
1303}
1304
1305// work-in-progress
1306__kernel void   findSeparatingAxisKernel( __global const int4* pairs,
1307																					__global const BodyData* rigidBodies,
1308																					__global const btCollidableGpu* collidables,
1309																					__global const ConvexPolyhedronCL* convexShapes,
1310																					__global const float4* vertices,
1311																					__global const float4* uniqueEdges,
1312																					__global const btGpuFace* faces,
1313																					__global const int* indices,
1314																					__global btAabbCL* aabbs,
1315																					__global volatile float4* separatingNormals,
1316																					__global volatile int* hasSeparatingAxis,
1317																					int numPairs
1318																					)
1319{
1320
1321	int i = get_global_id(0);
1322
1323	if (i<numPairs)
1324	{
1325
1326
1327		int bodyIndexA = pairs[i].x;
1328		int bodyIndexB = pairs[i].y;
1329
1330		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1331		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1332
1333		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1334		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1335
1336
1337		//once the broadphase avoids static-static pairs, we can remove this test
1338		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
1339		{
1340			hasSeparatingAxis[i] = 0;
1341			return;
1342		}
1343
1344
1345		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))
1346		{
1347			hasSeparatingAxis[i] = 0;
1348			return;
1349		}
1350
1351		if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))
1352		{
1353			hasSeparatingAxis[i] = 0;
1354			return;
1355		}
1356
1357		int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1358
1359		float dmin = FLT_MAX;
1360
1361		float4 posA = rigidBodies[bodyIndexA].m_pos;
1362		posA.w = 0.f;
1363		float4 posB = rigidBodies[bodyIndexB].m_pos;
1364		posB.w = 0.f;
1365		float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1366		float4 ornA = rigidBodies[bodyIndexA].m_quat;
1367		float4 c0 = transform(&c0local, &posA, &ornA);
1368		float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1369		float4 ornB =rigidBodies[bodyIndexB].m_quat;
1370		float4 c1 = transform(&c1local,&posB,&ornB);
1371		const float4 DeltaC2 = c0 - c1;
1372		float4 sepNormal;
1373
1374		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1375																								posB,ornB,
1376																								DeltaC2,
1377																								vertices,uniqueEdges,faces,
1378																								indices,&sepNormal,&dmin);
1379		hasSeparatingAxis[i] = 4;
1380		if (!sepA)
1381		{
1382			hasSeparatingAxis[i] = 0;
1383		} else
1384		{
1385			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,
1386																									posA,ornA,
1387																									DeltaC2,
1388																									vertices,uniqueEdges,faces,
1389																									indices,&sepNormal,&dmin);
1390
1391			if (!sepB)
1392			{
1393				hasSeparatingAxis[i] = 0;
1394			} else
1395			{
1396				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1397																									posB,ornB,
1398																									DeltaC2,
1399																									vertices,uniqueEdges,faces,
1400																									indices,&sepNormal,&dmin);
1401				if (!sepEE)
1402				{
1403					hasSeparatingAxis[i] = 0;
1404				} else
1405				{
1406					hasSeparatingAxis[i] = 1;
1407					separatingNormals[i] = sepNormal;
1408				}
1409			}
1410		}
1411
1412	}
1413
1414}
1415
1416
1417__kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs,
1418																					__global const BodyData* rigidBodies,
1419																					__global const btCollidableGpu* collidables,
1420																					__global const ConvexPolyhedronCL* convexShapes,
1421																					__global const float4* vertices,
1422																					__global const float4* uniqueEdges,
1423																					__global const btGpuFace* faces,
1424																					__global const int* indices,
1425																					__global btAabbCL* aabbs,
1426																					__global volatile float4* separatingNormals,
1427																					__global volatile int* hasSeparatingAxis,
1428																					__global  float* dmins,
1429																					int numPairs
1430																					)
1431{
1432
1433	int i = get_global_id(0);
1434
1435	if (i<numPairs)
1436	{
1437
1438
1439		int bodyIndexA = pairs[i].x;
1440		int bodyIndexB = pairs[i].y;
1441
1442		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1443		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1444
1445		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1446		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1447
1448		hasSeparatingAxis[i] = 0;
1449
1450		//once the broadphase avoids static-static pairs, we can remove this test
1451		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))
1452		{
1453			return;
1454		}
1455
1456
1457		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))
1458		{
1459			return;
1460		}
1461
1462
1463		int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1464
1465		float dmin = FLT_MAX;
1466
1467		dmins[i] = dmin;
1468
1469		float4 posA = rigidBodies[bodyIndexA].m_pos;
1470		posA.w = 0.f;
1471		float4 posB = rigidBodies[bodyIndexB].m_pos;
1472		posB.w = 0.f;
1473		float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1474		float4 ornA = rigidBodies[bodyIndexA].m_quat;
1475		float4 c0 = transform(&c0local, &posA, &ornA);
1476		float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1477		float4 ornB =rigidBodies[bodyIndexB].m_quat;
1478		float4 c1 = transform(&c1local,&posB,&ornB);
1479		const float4 DeltaC2 = c0 - c1;
1480		float4 sepNormal;
1481
1482		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1483																								posB,ornB,
1484																								DeltaC2,
1485																								vertices,uniqueEdges,faces,
1486																								indices,&sepNormal,&dmin);
1487		hasSeparatingAxis[i] = 4;
1488		if (!sepA)
1489		{
1490			hasSeparatingAxis[i] = 0;
1491		} else
1492		{
1493			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,
1494																									posA,ornA,
1495																									DeltaC2,
1496																									vertices,uniqueEdges,faces,
1497																									indices,&sepNormal,&dmin);
1498
1499			if (sepB)
1500			{
1501				dmins[i] = dmin;
1502				hasSeparatingAxis[i] = 1;
1503				separatingNormals[i] = sepNormal;
1504			}
1505		}
1506
1507	}
1508
1509}
1510
1511
1512__kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs,
1513																					__global const BodyData* rigidBodies,
1514																					__global const btCollidableGpu* collidables,
1515																					__global const ConvexPolyhedronCL* convexShapes,
1516																					__global const float4* vertices,
1517																					__global const float4* uniqueEdges,
1518																					__global const btGpuFace* faces,
1519																					__global const int* indices,
1520																					__global btAabbCL* aabbs,
1521																					__global  float4* separatingNormals,
1522																					__global  int* hasSeparatingAxis,
1523																					__global  float* dmins,
1524																					__global const float4* unitSphereDirections,
1525																					int numUnitSphereDirections,
1526																					int numPairs
1527																					)
1528{
1529
1530	int i = get_global_id(0);
1531
1532	if (i<numPairs)
1533	{
1534
1535		if (hasSeparatingAxis[i])
1536		{
1537
1538			int bodyIndexA = pairs[i].x;
1539			int bodyIndexB = pairs[i].y;
1540
1541			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1542			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1543
1544			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1545			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1546
1547
1548			int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1549
1550			float dmin = dmins[i];
1551
1552			float4 posA = rigidBodies[bodyIndexA].m_pos;
1553			posA.w = 0.f;
1554			float4 posB = rigidBodies[bodyIndexB].m_pos;
1555			posB.w = 0.f;
1556			float4 c0local = convexShapes[shapeIndexA].m_localCenter;
1557			float4 ornA = rigidBodies[bodyIndexA].m_quat;
1558			float4 c0 = transform(&c0local, &posA, &ornA);
1559			float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1560			float4 ornB =rigidBodies[bodyIndexB].m_quat;
1561			float4 c1 = transform(&c1local,&posB,&ornB);
1562			const float4 DeltaC2 = c0 - c1;
1563			float4 sepNormal = separatingNormals[i];
1564
1565
1566
1567			bool sepEE = false;
1568			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;
1569			if (numEdgeEdgeDirections<=numUnitSphereDirections)
1570			{
1571				sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1572																									posB,ornB,
1573																									DeltaC2,
1574																									vertices,uniqueEdges,faces,
1575																									indices,&sepNormal,&dmin);
1576
1577					if (!sepEE)
1578					{
1579						hasSeparatingAxis[i] = 0;
1580					} else
1581					{
1582						hasSeparatingAxis[i] = 1;
1583						separatingNormals[i] = sepNormal;
1584					}
1585			}
1586			/*
1587			///else case is a separate kernel, to make Mac OSX OpenCL compiler happy
1588			else
1589			{
1590				sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,
1591																									posB,ornB,
1592																									DeltaC2,
1593																									vertices,unitSphereDirections,numUnitSphereDirections,
1594																									&sepNormal,&dmin);
1595					if (!sepEE)
1596					{
1597						hasSeparatingAxis[i] = 0;
1598					} else
1599					{
1600						hasSeparatingAxis[i] = 1;
1601						separatingNormals[i] = sepNormal;
1602					}
1603			}
1604			*/
1605		}		//if (hasSeparatingAxis[i])
1606	}//(i<numPairs)
1607}
1608
1609
1610
1611
1612
1613inline int	findClippingFaces(const float4 separatingNormal,
1614                      const ConvexPolyhedronCL* hullA,
1615					  __global const ConvexPolyhedronCL* hullB,
1616                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
1617                       __global float4* worldVertsA1,
1618                      __global float4* worldNormalsA1,
1619                      __global float4* worldVertsB1,
1620                      int capacityWorldVerts,
1621                      const float minDist, float maxDist,
1622					  const float4* verticesA,
1623                      const btGpuFace* facesA,
1624                      const int* indicesA,
1625					  __global const float4* verticesB,
1626                      __global const btGpuFace* facesB,
1627                      __global const int* indicesB,
1628                      __global int4* clippingFaces, int pairIndex)
1629{
1630	int numContactsOut = 0;
1631	int numWorldVertsB1= 0;
1632
1633
1634	int closestFaceB=0;
1635	float dmax = -FLT_MAX;
1636
1637	{
1638		for(int face=0;face<hullB->m_numFaces;face++)
1639		{
1640			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,
1641                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
1642			const float4 WorldNormal = qtRotate(ornB, Normal);
1643			float d = dot3F4(WorldNormal,separatingNormal);
1644			if (d > dmax)
1645			{
1646				dmax = d;
1647				closestFaceB = face;
1648			}
1649		}
1650	}
1651
1652	{
1653		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];
1654		int numVertices = polyB.m_numIndices;
1655        if (numVertices>capacityWorldVerts)
1656            numVertices = capacityWorldVerts;
1657
1658		for(int e0=0;e0<numVertices;e0++)
1659		{
1660            if (e0<capacityWorldVerts)
1661            {
1662                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
1663                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
1664            }
1665		}
1666	}
1667
1668    int closestFaceA=0;
1669	{
1670		float dmin = FLT_MAX;
1671		for(int face=0;face<hullA->m_numFaces;face++)
1672		{
1673			const float4 Normal = make_float4(
1674                                              facesA[hullA->m_faceOffset+face].m_plane.x,
1675                                              facesA[hullA->m_faceOffset+face].m_plane.y,
1676                                              facesA[hullA->m_faceOffset+face].m_plane.z,
1677                                              0.f);
1678			const float4 faceANormalWS = qtRotate(ornA,Normal);
1679
1680			float d = dot3F4(faceANormalWS,separatingNormal);
1681			if (d < dmin)
1682			{
1683				dmin = d;
1684				closestFaceA = face;
1685                worldNormalsA1[pairIndex] = faceANormalWS;
1686			}
1687		}
1688	}
1689
1690    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
1691    if (numVerticesA>capacityWorldVerts)
1692       numVerticesA = capacityWorldVerts;
1693
1694	for(int e0=0;e0<numVerticesA;e0++)
1695	{
1696        if (e0<capacityWorldVerts)
1697        {
1698            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
1699            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
1700        }
1701    }
1702
1703    clippingFaces[pairIndex].x = closestFaceA;
1704    clippingFaces[pairIndex].y = closestFaceB;
1705    clippingFaces[pairIndex].z = numVerticesA;
1706    clippingFaces[pairIndex].w = numWorldVertsB1;
1707
1708
1709	return numContactsOut;
1710}
1711
1712
1713
1714
1715// work-in-progress
1716__kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,
1717																					__global const BodyData* rigidBodies,
1718																					__global const btCollidableGpu* collidables,
1719																					__global const ConvexPolyhedronCL* convexShapes,
1720																					__global const float4* vertices,
1721																					__global const float4* uniqueEdges,
1722																					__global const btGpuFace* faces,
1723																					__global const int* indices,
1724																					__global const btGpuChildShape* gpuChildShapes,
1725																					__global btAabbCL* aabbs,
1726																					__global float4* concaveSeparatingNormalsOut,
1727																					__global int* concaveHasSeparatingNormals,
1728																					__global int4* clippingFacesOut,
1729																					__global float4* worldVertsA1GPU,
1730																					__global float4*  worldNormalsAGPU,
1731																					__global float4* worldVertsB1GPU,
1732																					int vertexFaceCapacity,
1733																					int numConcavePairs
1734																					)
1735{
1736
1737	int i = get_global_id(0);
1738	if (i>=numConcavePairs)
1739		return;
1740
1741	concaveHasSeparatingNormals[i] = 0;
1742
1743	int pairIdx = i;
1744
1745	int bodyIndexA = concavePairs[i].x;
1746	int bodyIndexB = concavePairs[i].y;
1747
1748	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;
1749	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;
1750
1751	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
1752	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
1753
1754	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&
1755		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)
1756	{
1757		concavePairs[pairIdx].w = -1;
1758		return;
1759	}
1760
1761
1762
1763	int numFacesA = convexShapes[shapeIndexA].m_numFaces;
1764	int numActualConcaveConvexTests = 0;
1765
1766	int f = concavePairs[i].z;
1767
1768	bool overlap = false;
1769
1770	ConvexPolyhedronCL convexPolyhedronA;
1771
1772	//add 3 vertices of the triangle
1773	convexPolyhedronA.m_numVertices = 3;
1774	convexPolyhedronA.m_vertexOffset = 0;
1775	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);
1776
1777	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];
1778	float4 triMinAabb, triMaxAabb;
1779	btAabbCL triAabb;
1780	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);
1781	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);
1782
1783	float4 verticesA[3];
1784	for (int i=0;i<3;i++)
1785	{
1786		int index = indices[face.m_indexOffset+i];
1787		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];
1788		verticesA[i] = vert;
1789		localCenter += vert;
1790
1791		triAabb.m_min = min(triAabb.m_min,vert);
1792		triAabb.m_max = max(triAabb.m_max,vert);
1793
1794	}
1795
1796	overlap = true;
1797	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;
1798	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;
1799	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;
1800
1801	if (overlap)
1802	{
1803		float dmin = FLT_MAX;
1804		int hasSeparatingAxis=5;
1805		float4 sepAxis=make_float4(1,2,3,4);
1806
1807		int localCC=0;
1808		numActualConcaveConvexTests++;
1809
1810		//a triangle has 3 unique edges
1811		convexPolyhedronA.m_numUniqueEdges = 3;
1812		convexPolyhedronA.m_uniqueEdgesOffset = 0;
1813		float4 uniqueEdgesA[3];
1814
1815		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);
1816		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);
1817		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);
1818
1819
1820		convexPolyhedronA.m_faceOffset = 0;
1821
1822		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);
1823
1824		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];
1825		int indicesA[3+3+2+2+2];
1826		int curUsedIndices=0;
1827		int fidx=0;
1828
1829		//front size of triangle
1830		{
1831			facesA[fidx].m_indexOffset=curUsedIndices;
1832			indicesA[0] = 0;
1833			indicesA[1] = 1;
1834			indicesA[2] = 2;
1835			curUsedIndices+=3;
1836			float c = face.m_plane.w;
1837			facesA[fidx].m_plane.x = normal.x;
1838			facesA[fidx].m_plane.y = normal.y;
1839			facesA[fidx].m_plane.z = normal.z;
1840			facesA[fidx].m_plane.w = c;
1841			facesA[fidx].m_numIndices=3;
1842		}
1843		fidx++;
1844		//back size of triangle
1845		{
1846			facesA[fidx].m_indexOffset=curUsedIndices;
1847			indicesA[3]=2;
1848			indicesA[4]=1;
1849			indicesA[5]=0;
1850			curUsedIndices+=3;
1851			float c = dot(normal,verticesA[0]);
1852			float c1 = -face.m_plane.w;
1853			facesA[fidx].m_plane.x = -normal.x;
1854			facesA[fidx].m_plane.y = -normal.y;
1855			facesA[fidx].m_plane.z = -normal.z;
1856			facesA[fidx].m_plane.w = c;
1857			facesA[fidx].m_numIndices=3;
1858		}
1859		fidx++;
1860
1861		bool addEdgePlanes = true;
1862		if (addEdgePlanes)
1863		{
1864			int numVertices=3;
1865			int prevVertex = numVertices-1;
1866			for (int i=0;i<numVertices;i++)
1867			{
1868				float4 v0 = verticesA[i];
1869				float4 v1 = verticesA[prevVertex];
1870
1871				float4 edgeNormal = normalize(cross(normal,v1-v0));
1872				float c = -dot(edgeNormal,v0);
1873
1874				facesA[fidx].m_numIndices = 2;
1875				facesA[fidx].m_indexOffset=curUsedIndices;
1876				indicesA[curUsedIndices++]=i;
1877				indicesA[curUsedIndices++]=prevVertex;
1878
1879				facesA[fidx].m_plane.x = edgeNormal.x;
1880				facesA[fidx].m_plane.y = edgeNormal.y;
1881				facesA[fidx].m_plane.z = edgeNormal.z;
1882				facesA[fidx].m_plane.w = c;
1883				fidx++;
1884				prevVertex = i;
1885			}
1886		}
1887		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;
1888		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);
1889
1890
1891		float4 posA = rigidBodies[bodyIndexA].m_pos;
1892		posA.w = 0.f;
1893		float4 posB = rigidBodies[bodyIndexB].m_pos;
1894		posB.w = 0.f;
1895
1896		float4 ornA = rigidBodies[bodyIndexA].m_quat;
1897		float4 ornB =rigidBodies[bodyIndexB].m_quat;
1898
1899
1900
1901
1902		///////////////////
1903		///compound shape support
1904
1905		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)
1906		{
1907			int compoundChild = concavePairs[pairIdx].w;
1908			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;
1909			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;
1910			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;
1911			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;
1912			float4 newPosB = transform(&childPosB,&posB,&ornB);
1913			float4 newOrnB = qtMul(ornB,childOrnB);
1914			posB = newPosB;
1915			ornB = newOrnB;
1916			shapeIndexB = collidables[childColIndexB].m_shapeIndex;
1917		}
1918		//////////////////
1919
1920		float4 c0local = convexPolyhedronA.m_localCenter;
1921		float4 c0 = transform(&c0local, &posA, &ornA);
1922		float4 c1local = convexShapes[shapeIndexB].m_localCenter;
1923		float4 c1 = transform(&c1local,&posB,&ornB);
1924		const float4 DeltaC2 = c0 - c1;
1925
1926
1927		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],
1928												posA,ornA,
1929												posB,ornB,
1930												DeltaC2,
1931												verticesA,uniqueEdgesA,facesA,indicesA,
1932												vertices,uniqueEdges,faces,indices,
1933												&sepAxis,&dmin);
1934		hasSeparatingAxis = 4;
1935		if (!sepA)
1936		{
1937			hasSeparatingAxis = 0;
1938		} else
1939		{
1940			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,
1941												posB,ornB,
1942												posA,ornA,
1943												DeltaC2,
1944												vertices,uniqueEdges,faces,indices,
1945												verticesA,uniqueEdgesA,facesA,indicesA,
1946												&sepAxis,&dmin);
1947
1948			if (!sepB)
1949			{
1950				hasSeparatingAxis = 0;
1951			} else
1952			{
1953				bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],
1954															posA,ornA,
1955															posB,ornB,
1956															DeltaC2,
1957															verticesA,uniqueEdgesA,facesA,indicesA,
1958															vertices,uniqueEdges,faces,indices,
1959															&sepAxis,&dmin);
1960
1961				if (!sepEE)
1962				{
1963					hasSeparatingAxis = 0;
1964				} else
1965				{
1966					hasSeparatingAxis = 1;
1967				}
1968			}
1969		}
1970
1971		if (hasSeparatingAxis)
1972		{
1973			sepAxis.w = dmin;
1974			concaveSeparatingNormalsOut[pairIdx]=sepAxis;
1975			concaveHasSeparatingNormals[i]=1;
1976
1977
1978			float minDist = -1e30f;
1979			float maxDist = 0.02f;
1980
1981
1982
1983			findClippingFaces(sepAxis,
1984                     &convexPolyhedronA,
1985					 &convexShapes[shapeIndexB],
1986					 posA,ornA,
1987					 posB,ornB,
1988                      worldVertsA1GPU,
1989                      worldNormalsAGPU,
1990                      worldVertsB1GPU,
1991					  vertexFaceCapacity,
1992                      minDist, maxDist,
1993                      verticesA,
1994                      facesA,
1995                      indicesA,
1996 					  vertices,
1997                      faces,
1998                      indices,
1999                      clippingFacesOut, pairIdx);
2000
2001
2002		} else
2003		{
2004			//mark this pair as in-active
2005			concavePairs[pairIdx].w = -1;
2006		}
2007	}
2008	else
2009	{
2010		//mark this pair as in-active
2011		concavePairs[pairIdx].w = -1;
2012	}
2013
2014	concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts
2015}
2016
2017
2018
2019