1/*
2Copyright (c) 2013 Advanced Micro Devices, Inc.
3
4This software is provided 'as-is', without any express or implied warranty.
5In no event will the authors be held liable for any damages arising from the use of this software.
6Permission is granted to anyone to use this software for any purpose,
7including commercial applications, and to alter it and redistribute it freely,
8subject to the following restrictions:
9
101. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
112. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
123. This notice may not be removed or altered from any source distribution.
13*/
14//Originally written by Erwin Coumans
15
16#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h"
17
18#pragma OPENCL EXTENSION cl_amd_printf : enable
19#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
20#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
21#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
22#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable
23
24
25#ifdef cl_ext_atomic_counters_32
26#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable
27#else
28#define counter32_t volatile global int*
29#endif
30
31typedef unsigned int u32;
32typedef unsigned short u16;
33typedef unsigned char u8;
34
35#define GET_GROUP_IDX get_group_id(0)
36#define GET_LOCAL_IDX get_local_id(0)
37#define GET_GLOBAL_IDX get_global_id(0)
38#define GET_GROUP_SIZE get_local_size(0)
39#define GET_NUM_GROUPS get_num_groups(0)
40#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)
41#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)
42#define AtomInc(x) atom_inc(&(x))
43#define AtomInc1(x, out) out = atom_inc(&(x))
44#define AppendInc(x, out) out = atomic_inc(x)
45#define AtomAdd(x, value) atom_add(&(x), value)
46#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )
47#define AtomXhg(x, value) atom_xchg ( &(x), value )
48
49
50#define SELECT_UINT4( b, a, condition ) select( b,a,condition )
51
52#define make_float4 (float4)
53#define make_float2 (float2)
54#define make_uint4 (uint4)
55#define make_int4 (int4)
56#define make_uint2 (uint2)
57#define make_int2 (int2)
58
59
60#define max2 max
61#define min2 min
62
63
64///////////////////////////////////////
65//	Vector
66///////////////////////////////////////
67__inline
68float fastDiv(float numerator, float denominator)
69{
70	return native_divide(numerator, denominator);
71//	return numerator/denominator;
72}
73
74__inline
75float4 fastDiv4(float4 numerator, float4 denominator)
76{
77	return native_divide(numerator, denominator);
78}
79
80__inline
81float fastSqrtf(float f2)
82{
83	return native_sqrt(f2);
84//	return sqrt(f2);
85}
86
87__inline
88float fastRSqrt(float f2)
89{
90	return native_rsqrt(f2);
91}
92
93__inline
94float fastLength4(float4 v)
95{
96	return fast_length(v);
97}
98
99__inline
100float4 fastNormalize4(float4 v)
101{
102	return fast_normalize(v);
103}
104
105
106__inline
107float sqrtf(float a)
108{
109//	return sqrt(a);
110	return native_sqrt(a);
111}
112
113__inline
114float4 cross3(float4 a1, float4 b1)
115{
116
117	float4 	a=make_float4(a1.xyz,0.f);
118	float4 	b=make_float4(b1.xyz,0.f);
119	//float4 	a=a1;
120	//float4 	b=b1;
121	return cross(a,b);
122}
123
124__inline
125float dot3F4(float4 a, float4 b)
126{
127	float4 a1 = make_float4(a.xyz,0.f);
128	float4 b1 = make_float4(b.xyz,0.f);
129	return dot(a1, b1);
130}
131
132__inline
133float length3(const float4 a)
134{
135	return sqrtf(dot3F4(a,a));
136}
137
138__inline
139float dot4(const float4 a, const float4 b)
140{
141	return dot( a, b );
142}
143
144//	for height
145__inline
146float dot3w1(const float4 point, const float4 eqn)
147{
148	return dot3F4(point,eqn) + eqn.w;
149}
150
151__inline
152float4 normalize3(const float4 a)
153{
154	float4 n = make_float4(a.x, a.y, a.z, 0.f);
155	return fastNormalize4( n );
156//	float length = sqrtf(dot3F4(a, a));
157//	return 1.f/length * a;
158}
159
160__inline
161float4 normalize4(const float4 a)
162{
163	float length = sqrtf(dot4(a, a));
164	return 1.f/length * a;
165}
166
167__inline
168float4 createEquation(const float4 a, const float4 b, const float4 c)
169{
170	float4 eqn;
171	float4 ab = b-a;
172	float4 ac = c-a;
173	eqn = normalize3( cross3(ab, ac) );
174	eqn.w = -dot3F4(eqn,a);
175	return eqn;
176}
177
178///////////////////////////////////////
179//	Matrix3x3
180///////////////////////////////////////
181
182typedef struct
183{
184	float4 m_row[3];
185}Matrix3x3;
186
187__inline
188Matrix3x3 mtZero();
189
190__inline
191Matrix3x3 mtIdentity();
192
193__inline
194Matrix3x3 mtTranspose(Matrix3x3 m);
195
196__inline
197Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);
198
199__inline
200float4 mtMul1(Matrix3x3 a, float4 b);
201
202__inline
203float4 mtMul3(float4 a, Matrix3x3 b);
204
205__inline
206Matrix3x3 mtZero()
207{
208	Matrix3x3 m;
209	m.m_row[0] = (float4)(0.f);
210	m.m_row[1] = (float4)(0.f);
211	m.m_row[2] = (float4)(0.f);
212	return m;
213}
214
215__inline
216Matrix3x3 mtIdentity()
217{
218	Matrix3x3 m;
219	m.m_row[0] = (float4)(1,0,0,0);
220	m.m_row[1] = (float4)(0,1,0,0);
221	m.m_row[2] = (float4)(0,0,1,0);
222	return m;
223}
224
225__inline
226Matrix3x3 mtTranspose(Matrix3x3 m)
227{
228	Matrix3x3 out;
229	out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);
230	out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);
231	out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);
232	return out;
233}
234
235__inline
236Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)
237{
238	Matrix3x3 transB;
239	transB = mtTranspose( b );
240	Matrix3x3 ans;
241	//	why this doesn't run when 0ing in the for{}
242	a.m_row[0].w = 0.f;
243	a.m_row[1].w = 0.f;
244	a.m_row[2].w = 0.f;
245	for(int i=0; i<3; i++)
246	{
247//	a.m_row[i].w = 0.f;
248		ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);
249		ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);
250		ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);
251		ans.m_row[i].w = 0.f;
252	}
253	return ans;
254}
255
256__inline
257float4 mtMul1(Matrix3x3 a, float4 b)
258{
259	float4 ans;
260	ans.x = dot3F4( a.m_row[0], b );
261	ans.y = dot3F4( a.m_row[1], b );
262	ans.z = dot3F4( a.m_row[2], b );
263	ans.w = 0.f;
264	return ans;
265}
266
267__inline
268float4 mtMul3(float4 a, Matrix3x3 b)
269{
270	float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);
271	float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);
272	float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);
273
274	float4 ans;
275	ans.x = dot3F4( a, colx );
276	ans.y = dot3F4( a, coly );
277	ans.z = dot3F4( a, colz );
278	return ans;
279}
280
281///////////////////////////////////////
282//	Quaternion
283///////////////////////////////////////
284
285typedef float4 Quaternion;
286
287__inline
288Quaternion qtMul(Quaternion a, Quaternion b);
289
290__inline
291Quaternion qtNormalize(Quaternion in);
292
293__inline
294float4 qtRotate(Quaternion q, float4 vec);
295
296__inline
297Quaternion qtInvert(Quaternion q);
298
299
300
301
302
303__inline
304Quaternion qtMul(Quaternion a, Quaternion b)
305{
306	Quaternion ans;
307	ans = cross3( a, b );
308	ans += a.w*b+b.w*a;
309//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);
310	ans.w = a.w*b.w - dot3F4(a, b);
311	return ans;
312}
313
314__inline
315Quaternion qtNormalize(Quaternion in)
316{
317	return fastNormalize4(in);
318//	in /= length( in );
319//	return in;
320}
321__inline
322float4 qtRotate(Quaternion q, float4 vec)
323{
324	Quaternion qInv = qtInvert( q );
325	float4 vcpy = vec;
326	vcpy.w = 0.f;
327	float4 out = qtMul(qtMul(q,vcpy),qInv);
328	return out;
329}
330
331__inline
332Quaternion qtInvert(Quaternion q)
333{
334	return (Quaternion)(-q.xyz, q.w);
335}
336
337__inline
338float4 qtInvRotate(const Quaternion q, float4 vec)
339{
340	return qtRotate( qtInvert( q ), vec );
341}
342
343
344
345
346#define WG_SIZE 64
347
348typedef struct
349{
350	float4 m_pos;
351	Quaternion m_quat;
352	float4 m_linVel;
353	float4 m_angVel;
354
355	u32 m_shapeIdx;
356	float m_invMass;
357	float m_restituitionCoeff;
358	float m_frictionCoeff;
359} Body;
360
361
362
363typedef struct
364{
365	Matrix3x3 m_invInertia;
366	Matrix3x3 m_initInvInertia;
367} Shape;
368
369typedef struct
370{
371	float4 m_linear;
372	float4 m_worldPos[4];
373	float4 m_center;
374	float m_jacCoeffInv[4];
375	float m_b[4];
376	float m_appliedRambdaDt[4];
377
378	float m_fJacCoeffInv[2];
379	float m_fAppliedRambdaDt[2];
380
381	u32 m_bodyA;
382	u32 m_bodyB;
383	int m_batchIdx;
384	u32 m_paddings;
385} Constraint4;
386
387
388
389
390
391
392__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)
393{
394	int i = GET_GLOBAL_IDX;
395
396	if( i < numContactManifolds)
397	{
398		int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;
399		bool isFixedA = (pa <0) || (pa == fixedBodyIndex);
400		int bodyIndexA = abs(pa);
401		if (!isFixedA)
402		{
403			 AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);
404		}
405		barrier(CLK_GLOBAL_MEM_FENCE);
406		int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;
407		bool isFixedB = (pb <0) || (pb == fixedBodyIndex);
408		int bodyIndexB = abs(pb);
409		if (!isFixedB)
410		{
411			AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);
412		}
413	}
414}
415
416__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)
417{
418	int i = GET_GLOBAL_IDX;
419
420	if( i < numSplitBodies)
421	{
422		linearVelocities[i] = make_float4(0);
423		angularVelocities[i] = make_float4(0);
424	}
425}
426
427
428__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
429__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
430{
431	int i = GET_GLOBAL_IDX;
432	if (i<numBodies)
433	{
434		if (gBodies[i].m_invMass)
435		{
436			int bodyOffset = offsetSplitBodies[i];
437			int count = bodyCount[i];
438			float factor = 1.f/((float)count);
439			float4 averageLinVel = make_float4(0.f);
440			float4 averageAngVel = make_float4(0.f);
441
442			for (int j=0;j<count;j++)
443			{
444				averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
445				averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
446			}
447
448			for (int j=0;j<count;j++)
449			{
450				deltaLinearVelocities[bodyOffset+j] = averageLinVel;
451				deltaAngularVelocities[bodyOffset+j] = averageAngVel;
452			}
453
454		}//bodies[i].m_invMass
455	}//i<numBodies
456}
457
458
459
460void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)
461{
462	*linear = make_float4(n.xyz,0.f);
463	*angular0 = cross3(r0, n);
464	*angular1 = -cross3(r1, n);
465}
466
467
468float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )
469{
470	return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);
471}
472
473
474float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,
475					float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)
476{
477	//	linear0,1 are normlized
478	float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
479	float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);
480	float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
481	float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);
482	return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);
483}
484
485
486void btPlaneSpace1 (float4 n, float4* p, float4* q);
487 void btPlaneSpace1 (float4 n, float4* p, float4* q)
488{
489  if (fabs(n.z) > 0.70710678f) {
490    // choose p in y-z plane
491    float a = n.y*n.y + n.z*n.z;
492    float k = 1.f/sqrt(a);
493    p[0].x = 0;
494	p[0].y = -n.z*k;
495	p[0].z = n.y*k;
496    // set q = n x p
497    q[0].x = a*k;
498	q[0].y = -n.x*p[0].z;
499	q[0].z = n.x*p[0].y;
500  }
501  else {
502    // choose p in x-y plane
503    float a = n.x*n.x + n.y*n.y;
504    float k = 1.f/sqrt(a);
505    p[0].x = -n.y*k;
506	p[0].y = n.x*k;
507	p[0].z = 0;
508    // set q = n x p
509    q[0].x = -n.z*p[0].y;
510	q[0].y = n.z*p[0].x;
511	q[0].z = a*k;
512  }
513}
514
515
516
517
518
519void solveContact(__global Constraint4* cs,
520			float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,
521			float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,
522			float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)
523{
524	float minRambdaDt = 0;
525	float maxRambdaDt = FLT_MAX;
526
527	for(int ic=0; ic<4; ic++)
528	{
529		if( cs->m_jacCoeffInv[ic] == 0.f ) continue;
530
531		float4 angular0, angular1, linear;
532		float4 r0 = cs->m_worldPos[ic] - posA;
533		float4 r1 = cs->m_worldPos[ic] - posB;
534		setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );
535
536
537
538		float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1,
539			*linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
540		rambdaDt *= cs->m_jacCoeffInv[ic];
541
542
543		{
544			float prevSum = cs->m_appliedRambdaDt[ic];
545			float updated = prevSum;
546			updated += rambdaDt;
547			updated = max2( updated, minRambdaDt );
548			updated = min2( updated, maxRambdaDt );
549			rambdaDt = updated - prevSum;
550			cs->m_appliedRambdaDt[ic] = updated;
551		}
552
553
554		float4 linImp0 = invMassA*linear*rambdaDt;
555		float4 linImp1 = invMassB*(-linear)*rambdaDt;
556		float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
557		float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
558
559
560		if (invMassA)
561		{
562			*dLinVelA += linImp0;
563			*dAngVelA += angImp0;
564		}
565		if (invMassB)
566		{
567			*dLinVelB += linImp1;
568			*dAngVelB += angImp1;
569		}
570	}
571}
572
573
574//	solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
575
576
577void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,
578__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
579__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
580{
581
582	//float frictionCoeff = ldsCs[0].m_linear.w;
583	int aIdx = ldsCs[0].m_bodyA;
584	int bIdx = ldsCs[0].m_bodyB;
585
586	float4 posA = gBodies[aIdx].m_pos;
587	float4 linVelA = gBodies[aIdx].m_linVel;
588	float4 angVelA = gBodies[aIdx].m_angVel;
589	float invMassA = gBodies[aIdx].m_invMass;
590	Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
591
592	float4 posB = gBodies[bIdx].m_pos;
593	float4 linVelB = gBodies[bIdx].m_linVel;
594	float4 angVelB = gBodies[bIdx].m_angVel;
595	float invMassB = gBodies[bIdx].m_invMass;
596	Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
597
598
599	float4 dLinVelA = make_float4(0,0,0,0);
600	float4 dAngVelA = make_float4(0,0,0,0);
601	float4 dLinVelB = make_float4(0,0,0,0);
602	float4 dAngVelB = make_float4(0,0,0,0);
603
604	int bodyOffsetA = offsetSplitBodies[aIdx];
605	int constraintOffsetA = contactConstraintOffsets[0].x;
606	int splitIndexA = bodyOffsetA+constraintOffsetA;
607
608	if (invMassA)
609	{
610		dLinVelA = deltaLinearVelocities[splitIndexA];
611		dAngVelA = deltaAngularVelocities[splitIndexA];
612	}
613
614	int bodyOffsetB = offsetSplitBodies[bIdx];
615	int constraintOffsetB = contactConstraintOffsets[0].y;
616	int splitIndexB= bodyOffsetB+constraintOffsetB;
617
618	if (invMassB)
619	{
620		dLinVelB = deltaLinearVelocities[splitIndexB];
621		dAngVelB = deltaAngularVelocities[splitIndexB];
622	}
623
624	solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,
625			posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);
626
627	if (invMassA)
628	{
629		deltaLinearVelocities[splitIndexA] = dLinVelA;
630		deltaAngularVelocities[splitIndexA] = dAngVelA;
631	}
632	if (invMassB)
633	{
634		deltaLinearVelocities[splitIndexB] = dLinVelB;
635		deltaAngularVelocities[splitIndexB] = dAngVelB;
636	}
637
638}
639
640
641__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
642__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
643float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
644)
645{
646	int i = GET_GLOBAL_IDX;
647	if (i<numManifolds)
648	{
649		solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
650	}
651}
652
653
654
655
656void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,
657							__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
658							__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)
659{
660	float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;
661	int aIdx = ldsCs[0].m_bodyA;
662	int bIdx = ldsCs[0].m_bodyB;
663
664
665	float4 posA = gBodies[aIdx].m_pos;
666	float4 linVelA = gBodies[aIdx].m_linVel;
667	float4 angVelA = gBodies[aIdx].m_angVel;
668	float invMassA = gBodies[aIdx].m_invMass;
669	Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
670
671	float4 posB = gBodies[bIdx].m_pos;
672	float4 linVelB = gBodies[bIdx].m_linVel;
673	float4 angVelB = gBodies[bIdx].m_angVel;
674	float invMassB = gBodies[bIdx].m_invMass;
675	Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
676
677
678	float4 dLinVelA = make_float4(0,0,0,0);
679	float4 dAngVelA = make_float4(0,0,0,0);
680	float4 dLinVelB = make_float4(0,0,0,0);
681	float4 dAngVelB = make_float4(0,0,0,0);
682
683	int bodyOffsetA = offsetSplitBodies[aIdx];
684	int constraintOffsetA = contactConstraintOffsets[0].x;
685	int splitIndexA = bodyOffsetA+constraintOffsetA;
686
687	if (invMassA)
688	{
689		dLinVelA = deltaLinearVelocities[splitIndexA];
690		dAngVelA = deltaAngularVelocities[splitIndexA];
691	}
692
693	int bodyOffsetB = offsetSplitBodies[bIdx];
694	int constraintOffsetB = contactConstraintOffsets[0].y;
695	int splitIndexB= bodyOffsetB+constraintOffsetB;
696
697	if (invMassB)
698	{
699		dLinVelB = deltaLinearVelocities[splitIndexB];
700		dAngVelB = deltaAngularVelocities[splitIndexB];
701	}
702
703
704
705
706	{
707		float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
708		float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
709
710		float sum = 0;
711		for(int j=0; j<4; j++)
712		{
713			sum +=ldsCs[0].m_appliedRambdaDt[j];
714		}
715		frictionCoeff = 0.7f;
716		for(int j=0; j<4; j++)
717		{
718			maxRambdaDt[j] = frictionCoeff*sum;
719			minRambdaDt[j] = -maxRambdaDt[j];
720		}
721
722
723//		solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,
724//			posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );
725
726
727		{
728
729			__global Constraint4* cs = ldsCs;
730
731			if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;
732			const float4 center = cs->m_center;
733
734			float4 n = -cs->m_linear;
735
736			float4 tangent[2];
737			btPlaneSpace1(n,&tangent[0],&tangent[1]);
738			float4 angular0, angular1, linear;
739			float4 r0 = center - posA;
740			float4 r1 = center - posB;
741			for(int i=0; i<2; i++)
742			{
743				setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );
744				float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
745											linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );
746				rambdaDt *= cs->m_fJacCoeffInv[i];
747
748				{
749					float prevSum = cs->m_fAppliedRambdaDt[i];
750					float updated = prevSum;
751					updated += rambdaDt;
752					updated = max2( updated, minRambdaDt[i] );
753					updated = min2( updated, maxRambdaDt[i] );
754					rambdaDt = updated - prevSum;
755					cs->m_fAppliedRambdaDt[i] = updated;
756				}
757
758				float4 linImp0 = invMassA*linear*rambdaDt;
759				float4 linImp1 = invMassB*(-linear)*rambdaDt;
760				float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
761				float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
762
763				dLinVelA += linImp0;
764				dAngVelA += angImp0;
765				dLinVelB += linImp1;
766				dAngVelB += angImp1;
767			}
768			{	//	angular damping for point constraint
769				float4 ab = normalize3( posB - posA );
770				float4 ac = normalize3( center - posA );
771				if( dot3F4( ab, ac ) > 0.95f  || (invMassA == 0.f || invMassB == 0.f))
772				{
773					float angNA = dot3F4( n, angVelA );
774					float angNB = dot3F4( n, angVelB );
775
776					dAngVelA -= (angNA*0.1f)*n;
777					dAngVelB -= (angNB*0.1f)*n;
778				}
779			}
780		}
781
782
783
784	}
785
786	if (invMassA)
787	{
788		deltaLinearVelocities[splitIndexA] = dLinVelA;
789		deltaAngularVelocities[splitIndexA] = dAngVelA;
790	}
791	if (invMassB)
792	{
793		deltaLinearVelocities[splitIndexB] = dLinVelB;
794		deltaAngularVelocities[splitIndexB] = dAngVelB;
795	}
796
797
798}
799
800
801__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,
802										__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,
803										__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,
804										float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds
805)
806{
807	int i = GET_GLOBAL_IDX;
808	if (i<numManifolds)
809	{
810		solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);
811	}
812}
813
814
815__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,
816									__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)
817{
818	int i = GET_GLOBAL_IDX;
819	if (i<numBodies)
820	{
821		if (gBodies[i].m_invMass)
822		{
823			int bodyOffset = offsetSplitBodies[i];
824			int count = bodyCount[i];
825			if (count)
826			{
827				gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset];
828				gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset];
829			}
830		}
831	}
832}
833
834
835
836void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,
837	const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB,
838	__global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,
839	Constraint4* dstC )
840{
841	dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
842	dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
843
844	float dtInv = 1.f/dt;
845	for(int ic=0; ic<4; ic++)
846	{
847		dstC->m_appliedRambdaDt[ic] = 0.f;
848	}
849	dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
850
851
852	dstC->m_linear = src->m_worldNormalOnB;
853	dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );
854	for(int ic=0; ic<4; ic++)
855	{
856		float4 r0 = src->m_worldPosB[ic] - posA;
857		float4 r1 = src->m_worldPosB[ic] - posB;
858
859		if( ic >= src->m_worldNormalOnB.w )//npoints
860		{
861			dstC->m_jacCoeffInv[ic] = 0.f;
862			continue;
863		}
864
865		float relVelN;
866		{
867			float4 linear, angular0, angular1;
868			setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);
869
870			dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
871				invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);
872
873			relVelN = calcRelVel(linear, -linear, angular0, angular1,
874				linVelA, angVelA, linVelB, angVelB);
875
876			float e = 0.f;//src->getRestituitionCoeff();
877			if( relVelN*relVelN < 0.004f ) e = 0.f;
878
879			dstC->m_b[ic] = e*relVelN;
880			//float penetration = src->m_worldPosB[ic].w;
881			dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;
882			dstC->m_appliedRambdaDt[ic] = 0.f;
883		}
884	}
885
886	if( src->m_worldNormalOnB.w > 0 )//npoints
887	{	//	prepare friction
888		float4 center = make_float4(0.f);
889		for(int i=0; i<src->m_worldNormalOnB.w; i++)
890			center += src->m_worldPosB[i];
891		center /= (float)src->m_worldNormalOnB.w;
892
893		float4 tangent[2];
894		btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);
895
896		float4 r[2];
897		r[0] = center - posA;
898		r[1] = center - posB;
899
900		for(int i=0; i<2; i++)
901		{
902			float4 linear, angular0, angular1;
903			setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);
904
905			dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
906				invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);
907			dstC->m_fAppliedRambdaDt[i] = 0.f;
908		}
909		dstC->m_center = center;
910	}
911
912	for(int i=0; i<4; i++)
913	{
914		if( i<src->m_worldNormalOnB.w )
915		{
916			dstC->m_worldPos[i] = src->m_worldPosB[i];
917		}
918		else
919		{
920			dstC->m_worldPos[i] = make_float4(0.f);
921		}
922	}
923}
924
925
926__kernel
927__attribute__((reqd_work_group_size(WG_SIZE,1,1)))
928void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut,
929__global const unsigned int* bodyCount,
930int nContacts,
931float dt,
932float positionDrift,
933float positionConstraintCoeff
934)
935{
936	int gIdx = GET_GLOBAL_IDX;
937
938	if( gIdx < nContacts )
939	{
940		int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
941		int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
942
943		float4 posA = gBodies[aIdx].m_pos;
944		float4 linVelA = gBodies[aIdx].m_linVel;
945		float4 angVelA = gBodies[aIdx].m_angVel;
946		float invMassA = gBodies[aIdx].m_invMass;
947		Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;
948
949		float4 posB = gBodies[bIdx].m_pos;
950		float4 linVelB = gBodies[bIdx].m_linVel;
951		float4 angVelB = gBodies[bIdx].m_angVel;
952		float invMassB = gBodies[bIdx].m_invMass;
953		Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;
954
955		Constraint4 cs;
956
957		float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;
958		float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;
959
960    	setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
961			&gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,
962			&cs  );
963
964		cs.m_batchIdx = gContact[gIdx].m_batchIdx;
965
966		gConstraintOut[gIdx] = cs;
967	}
968}