1/* 2Copyright (c) 2013 Advanced Micro Devices, Inc. 3 4This software is provided 'as-is', without any express or implied warranty. 5In no event will the authors be held liable for any damages arising from the use of this software. 6Permission is granted to anyone to use this software for any purpose, 7including commercial applications, and to alter it and redistribute it freely, 8subject to the following restrictions: 9 101. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 112. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 123. This notice may not be removed or altered from any source distribution. 13*/ 14//Originally written by Erwin Coumans 15 16#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" 17 18#pragma OPENCL EXTENSION cl_amd_printf : enable 19#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable 20#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable 21#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable 22#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable 23 24 25#ifdef cl_ext_atomic_counters_32 26#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable 27#else 28#define counter32_t volatile global int* 29#endif 30 31typedef unsigned int u32; 32typedef unsigned short u16; 33typedef unsigned char u8; 34 35#define GET_GROUP_IDX get_group_id(0) 36#define GET_LOCAL_IDX get_local_id(0) 37#define GET_GLOBAL_IDX get_global_id(0) 38#define GET_GROUP_SIZE get_local_size(0) 39#define GET_NUM_GROUPS get_num_groups(0) 40#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) 41#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) 42#define AtomInc(x) atom_inc(&(x)) 43#define AtomInc1(x, out) out = atom_inc(&(x)) 44#define AppendInc(x, out) out = atomic_inc(x) 45#define AtomAdd(x, value) atom_add(&(x), value) 46#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) 47#define AtomXhg(x, value) atom_xchg ( &(x), value ) 48 49 50#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) 51 52#define make_float4 (float4) 53#define make_float2 (float2) 54#define make_uint4 (uint4) 55#define make_int4 (int4) 56#define make_uint2 (uint2) 57#define make_int2 (int2) 58 59 60#define max2 max 61#define min2 min 62 63 64/////////////////////////////////////// 65// Vector 66/////////////////////////////////////// 67__inline 68float fastDiv(float numerator, float denominator) 69{ 70 return native_divide(numerator, denominator); 71// return numerator/denominator; 72} 73 74__inline 75float4 fastDiv4(float4 numerator, float4 denominator) 76{ 77 return native_divide(numerator, denominator); 78} 79 80__inline 81float fastSqrtf(float f2) 82{ 83 return native_sqrt(f2); 84// return sqrt(f2); 85} 86 87__inline 88float fastRSqrt(float f2) 89{ 90 return native_rsqrt(f2); 91} 92 93__inline 94float fastLength4(float4 v) 95{ 96 return fast_length(v); 97} 98 99__inline 100float4 fastNormalize4(float4 v) 101{ 102 return fast_normalize(v); 103} 104 105 106__inline 107float sqrtf(float a) 108{ 109// return sqrt(a); 110 return native_sqrt(a); 111} 112 113__inline 114float4 cross3(float4 a1, float4 b1) 115{ 116 117 float4 a=make_float4(a1.xyz,0.f); 118 float4 b=make_float4(b1.xyz,0.f); 119 //float4 a=a1; 120 //float4 b=b1; 121 return cross(a,b); 122} 123 124__inline 125float dot3F4(float4 a, float4 b) 126{ 127 float4 a1 = make_float4(a.xyz,0.f); 128 float4 b1 = make_float4(b.xyz,0.f); 129 return dot(a1, b1); 130} 131 132__inline 133float length3(const float4 a) 134{ 135 return sqrtf(dot3F4(a,a)); 136} 137 138__inline 139float dot4(const float4 a, const float4 b) 140{ 141 return dot( a, b ); 142} 143 144// for height 145__inline 146float dot3w1(const float4 point, const float4 eqn) 147{ 148 return dot3F4(point,eqn) + eqn.w; 149} 150 151__inline 152float4 normalize3(const float4 a) 153{ 154 float4 n = make_float4(a.x, a.y, a.z, 0.f); 155 return fastNormalize4( n ); 156// float length = sqrtf(dot3F4(a, a)); 157// return 1.f/length * a; 158} 159 160__inline 161float4 normalize4(const float4 a) 162{ 163 float length = sqrtf(dot4(a, a)); 164 return 1.f/length * a; 165} 166 167__inline 168float4 createEquation(const float4 a, const float4 b, const float4 c) 169{ 170 float4 eqn; 171 float4 ab = b-a; 172 float4 ac = c-a; 173 eqn = normalize3( cross3(ab, ac) ); 174 eqn.w = -dot3F4(eqn,a); 175 return eqn; 176} 177 178/////////////////////////////////////// 179// Matrix3x3 180/////////////////////////////////////// 181 182typedef struct 183{ 184 float4 m_row[3]; 185}Matrix3x3; 186 187__inline 188Matrix3x3 mtZero(); 189 190__inline 191Matrix3x3 mtIdentity(); 192 193__inline 194Matrix3x3 mtTranspose(Matrix3x3 m); 195 196__inline 197Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); 198 199__inline 200float4 mtMul1(Matrix3x3 a, float4 b); 201 202__inline 203float4 mtMul3(float4 a, Matrix3x3 b); 204 205__inline 206Matrix3x3 mtZero() 207{ 208 Matrix3x3 m; 209 m.m_row[0] = (float4)(0.f); 210 m.m_row[1] = (float4)(0.f); 211 m.m_row[2] = (float4)(0.f); 212 return m; 213} 214 215__inline 216Matrix3x3 mtIdentity() 217{ 218 Matrix3x3 m; 219 m.m_row[0] = (float4)(1,0,0,0); 220 m.m_row[1] = (float4)(0,1,0,0); 221 m.m_row[2] = (float4)(0,0,1,0); 222 return m; 223} 224 225__inline 226Matrix3x3 mtTranspose(Matrix3x3 m) 227{ 228 Matrix3x3 out; 229 out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); 230 out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); 231 out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); 232 return out; 233} 234 235__inline 236Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) 237{ 238 Matrix3x3 transB; 239 transB = mtTranspose( b ); 240 Matrix3x3 ans; 241 // why this doesn't run when 0ing in the for{} 242 a.m_row[0].w = 0.f; 243 a.m_row[1].w = 0.f; 244 a.m_row[2].w = 0.f; 245 for(int i=0; i<3; i++) 246 { 247// a.m_row[i].w = 0.f; 248 ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); 249 ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); 250 ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); 251 ans.m_row[i].w = 0.f; 252 } 253 return ans; 254} 255 256__inline 257float4 mtMul1(Matrix3x3 a, float4 b) 258{ 259 float4 ans; 260 ans.x = dot3F4( a.m_row[0], b ); 261 ans.y = dot3F4( a.m_row[1], b ); 262 ans.z = dot3F4( a.m_row[2], b ); 263 ans.w = 0.f; 264 return ans; 265} 266 267__inline 268float4 mtMul3(float4 a, Matrix3x3 b) 269{ 270 float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); 271 float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); 272 float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); 273 274 float4 ans; 275 ans.x = dot3F4( a, colx ); 276 ans.y = dot3F4( a, coly ); 277 ans.z = dot3F4( a, colz ); 278 return ans; 279} 280 281/////////////////////////////////////// 282// Quaternion 283/////////////////////////////////////// 284 285typedef float4 Quaternion; 286 287__inline 288Quaternion qtMul(Quaternion a, Quaternion b); 289 290__inline 291Quaternion qtNormalize(Quaternion in); 292 293__inline 294float4 qtRotate(Quaternion q, float4 vec); 295 296__inline 297Quaternion qtInvert(Quaternion q); 298 299 300 301 302 303__inline 304Quaternion qtMul(Quaternion a, Quaternion b) 305{ 306 Quaternion ans; 307 ans = cross3( a, b ); 308 ans += a.w*b+b.w*a; 309// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); 310 ans.w = a.w*b.w - dot3F4(a, b); 311 return ans; 312} 313 314__inline 315Quaternion qtNormalize(Quaternion in) 316{ 317 return fastNormalize4(in); 318// in /= length( in ); 319// return in; 320} 321__inline 322float4 qtRotate(Quaternion q, float4 vec) 323{ 324 Quaternion qInv = qtInvert( q ); 325 float4 vcpy = vec; 326 vcpy.w = 0.f; 327 float4 out = qtMul(qtMul(q,vcpy),qInv); 328 return out; 329} 330 331__inline 332Quaternion qtInvert(Quaternion q) 333{ 334 return (Quaternion)(-q.xyz, q.w); 335} 336 337__inline 338float4 qtInvRotate(const Quaternion q, float4 vec) 339{ 340 return qtRotate( qtInvert( q ), vec ); 341} 342 343 344 345 346#define WG_SIZE 64 347 348typedef struct 349{ 350 float4 m_pos; 351 Quaternion m_quat; 352 float4 m_linVel; 353 float4 m_angVel; 354 355 u32 m_shapeIdx; 356 float m_invMass; 357 float m_restituitionCoeff; 358 float m_frictionCoeff; 359} Body; 360 361 362 363typedef struct 364{ 365 Matrix3x3 m_invInertia; 366 Matrix3x3 m_initInvInertia; 367} Shape; 368 369typedef struct 370{ 371 float4 m_linear; 372 float4 m_worldPos[4]; 373 float4 m_center; 374 float m_jacCoeffInv[4]; 375 float m_b[4]; 376 float m_appliedRambdaDt[4]; 377 378 float m_fJacCoeffInv[2]; 379 float m_fAppliedRambdaDt[2]; 380 381 u32 m_bodyA; 382 u32 m_bodyB; 383 int m_batchIdx; 384 u32 m_paddings; 385} Constraint4; 386 387 388 389 390 391 392__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) 393{ 394 int i = GET_GLOBAL_IDX; 395 396 if( i < numContactManifolds) 397 { 398 int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; 399 bool isFixedA = (pa <0) || (pa == fixedBodyIndex); 400 int bodyIndexA = abs(pa); 401 if (!isFixedA) 402 { 403 AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x); 404 } 405 barrier(CLK_GLOBAL_MEM_FENCE); 406 int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; 407 bool isFixedB = (pb <0) || (pb == fixedBodyIndex); 408 int bodyIndexB = abs(pb); 409 if (!isFixedB) 410 { 411 AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y); 412 } 413 } 414} 415 416__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies) 417{ 418 int i = GET_GLOBAL_IDX; 419 420 if( i < numSplitBodies) 421 { 422 linearVelocities[i] = make_float4(0); 423 angularVelocities[i] = make_float4(0); 424 } 425} 426 427 428__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, 429__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) 430{ 431 int i = GET_GLOBAL_IDX; 432 if (i<numBodies) 433 { 434 if (gBodies[i].m_invMass) 435 { 436 int bodyOffset = offsetSplitBodies[i]; 437 int count = bodyCount[i]; 438 float factor = 1.f/((float)count); 439 float4 averageLinVel = make_float4(0.f); 440 float4 averageAngVel = make_float4(0.f); 441 442 for (int j=0;j<count;j++) 443 { 444 averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; 445 averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; 446 } 447 448 for (int j=0;j<count;j++) 449 { 450 deltaLinearVelocities[bodyOffset+j] = averageLinVel; 451 deltaAngularVelocities[bodyOffset+j] = averageAngVel; 452 } 453 454 }//bodies[i].m_invMass 455 }//i<numBodies 456} 457 458 459 460void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) 461{ 462 *linear = make_float4(n.xyz,0.f); 463 *angular0 = cross3(r0, n); 464 *angular1 = -cross3(r1, n); 465} 466 467 468float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) 469{ 470 return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); 471} 472 473 474float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, 475 float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB) 476{ 477 // linear0,1 are normlized 478 float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; 479 float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); 480 float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; 481 float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); 482 return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB); 483} 484 485 486void btPlaneSpace1 (float4 n, float4* p, float4* q); 487 void btPlaneSpace1 (float4 n, float4* p, float4* q) 488{ 489 if (fabs(n.z) > 0.70710678f) { 490 // choose p in y-z plane 491 float a = n.y*n.y + n.z*n.z; 492 float k = 1.f/sqrt(a); 493 p[0].x = 0; 494 p[0].y = -n.z*k; 495 p[0].z = n.y*k; 496 // set q = n x p 497 q[0].x = a*k; 498 q[0].y = -n.x*p[0].z; 499 q[0].z = n.x*p[0].y; 500 } 501 else { 502 // choose p in x-y plane 503 float a = n.x*n.x + n.y*n.y; 504 float k = 1.f/sqrt(a); 505 p[0].x = -n.y*k; 506 p[0].y = n.x*k; 507 p[0].z = 0; 508 // set q = n x p 509 q[0].x = -n.z*p[0].y; 510 q[0].y = n.z*p[0].x; 511 q[0].z = a*k; 512 } 513} 514 515 516 517 518 519void solveContact(__global Constraint4* cs, 520 float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, 521 float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB, 522 float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB) 523{ 524 float minRambdaDt = 0; 525 float maxRambdaDt = FLT_MAX; 526 527 for(int ic=0; ic<4; ic++) 528 { 529 if( cs->m_jacCoeffInv[ic] == 0.f ) continue; 530 531 float4 angular0, angular1, linear; 532 float4 r0 = cs->m_worldPos[ic] - posA; 533 float4 r1 = cs->m_worldPos[ic] - posB; 534 setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); 535 536 537 538 float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, 539 *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; 540 rambdaDt *= cs->m_jacCoeffInv[ic]; 541 542 543 { 544 float prevSum = cs->m_appliedRambdaDt[ic]; 545 float updated = prevSum; 546 updated += rambdaDt; 547 updated = max2( updated, minRambdaDt ); 548 updated = min2( updated, maxRambdaDt ); 549 rambdaDt = updated - prevSum; 550 cs->m_appliedRambdaDt[ic] = updated; 551 } 552 553 554 float4 linImp0 = invMassA*linear*rambdaDt; 555 float4 linImp1 = invMassB*(-linear)*rambdaDt; 556 float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; 557 float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; 558 559 560 if (invMassA) 561 { 562 *dLinVelA += linImp0; 563 *dAngVelA += angImp0; 564 } 565 if (invMassB) 566 { 567 *dLinVelB += linImp1; 568 *dAngVelB += angImp1; 569 } 570 } 571} 572 573 574// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); 575 576 577void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, 578__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, 579__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) 580{ 581 582 //float frictionCoeff = ldsCs[0].m_linear.w; 583 int aIdx = ldsCs[0].m_bodyA; 584 int bIdx = ldsCs[0].m_bodyB; 585 586 float4 posA = gBodies[aIdx].m_pos; 587 float4 linVelA = gBodies[aIdx].m_linVel; 588 float4 angVelA = gBodies[aIdx].m_angVel; 589 float invMassA = gBodies[aIdx].m_invMass; 590 Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; 591 592 float4 posB = gBodies[bIdx].m_pos; 593 float4 linVelB = gBodies[bIdx].m_linVel; 594 float4 angVelB = gBodies[bIdx].m_angVel; 595 float invMassB = gBodies[bIdx].m_invMass; 596 Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; 597 598 599 float4 dLinVelA = make_float4(0,0,0,0); 600 float4 dAngVelA = make_float4(0,0,0,0); 601 float4 dLinVelB = make_float4(0,0,0,0); 602 float4 dAngVelB = make_float4(0,0,0,0); 603 604 int bodyOffsetA = offsetSplitBodies[aIdx]; 605 int constraintOffsetA = contactConstraintOffsets[0].x; 606 int splitIndexA = bodyOffsetA+constraintOffsetA; 607 608 if (invMassA) 609 { 610 dLinVelA = deltaLinearVelocities[splitIndexA]; 611 dAngVelA = deltaAngularVelocities[splitIndexA]; 612 } 613 614 int bodyOffsetB = offsetSplitBodies[bIdx]; 615 int constraintOffsetB = contactConstraintOffsets[0].y; 616 int splitIndexB= bodyOffsetB+constraintOffsetB; 617 618 if (invMassB) 619 { 620 dLinVelB = deltaLinearVelocities[splitIndexB]; 621 dAngVelB = deltaAngularVelocities[splitIndexB]; 622 } 623 624 solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, 625 posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB); 626 627 if (invMassA) 628 { 629 deltaLinearVelocities[splitIndexA] = dLinVelA; 630 deltaAngularVelocities[splitIndexA] = dAngVelA; 631 } 632 if (invMassB) 633 { 634 deltaLinearVelocities[splitIndexB] = dLinVelB; 635 deltaAngularVelocities[splitIndexB] = dAngVelB; 636 } 637 638} 639 640 641__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , 642__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, 643float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds 644) 645{ 646 int i = GET_GLOBAL_IDX; 647 if (i<numManifolds) 648 { 649 solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); 650 } 651} 652 653 654 655 656void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, 657 __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, 658 __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) 659{ 660 float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w; 661 int aIdx = ldsCs[0].m_bodyA; 662 int bIdx = ldsCs[0].m_bodyB; 663 664 665 float4 posA = gBodies[aIdx].m_pos; 666 float4 linVelA = gBodies[aIdx].m_linVel; 667 float4 angVelA = gBodies[aIdx].m_angVel; 668 float invMassA = gBodies[aIdx].m_invMass; 669 Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; 670 671 float4 posB = gBodies[bIdx].m_pos; 672 float4 linVelB = gBodies[bIdx].m_linVel; 673 float4 angVelB = gBodies[bIdx].m_angVel; 674 float invMassB = gBodies[bIdx].m_invMass; 675 Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; 676 677 678 float4 dLinVelA = make_float4(0,0,0,0); 679 float4 dAngVelA = make_float4(0,0,0,0); 680 float4 dLinVelB = make_float4(0,0,0,0); 681 float4 dAngVelB = make_float4(0,0,0,0); 682 683 int bodyOffsetA = offsetSplitBodies[aIdx]; 684 int constraintOffsetA = contactConstraintOffsets[0].x; 685 int splitIndexA = bodyOffsetA+constraintOffsetA; 686 687 if (invMassA) 688 { 689 dLinVelA = deltaLinearVelocities[splitIndexA]; 690 dAngVelA = deltaAngularVelocities[splitIndexA]; 691 } 692 693 int bodyOffsetB = offsetSplitBodies[bIdx]; 694 int constraintOffsetB = contactConstraintOffsets[0].y; 695 int splitIndexB= bodyOffsetB+constraintOffsetB; 696 697 if (invMassB) 698 { 699 dLinVelB = deltaLinearVelocities[splitIndexB]; 700 dAngVelB = deltaAngularVelocities[splitIndexB]; 701 } 702 703 704 705 706 { 707 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; 708 float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; 709 710 float sum = 0; 711 for(int j=0; j<4; j++) 712 { 713 sum +=ldsCs[0].m_appliedRambdaDt[j]; 714 } 715 frictionCoeff = 0.7f; 716 for(int j=0; j<4; j++) 717 { 718 maxRambdaDt[j] = frictionCoeff*sum; 719 minRambdaDt[j] = -maxRambdaDt[j]; 720 } 721 722 723// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, 724// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); 725 726 727 { 728 729 __global Constraint4* cs = ldsCs; 730 731 if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; 732 const float4 center = cs->m_center; 733 734 float4 n = -cs->m_linear; 735 736 float4 tangent[2]; 737 btPlaneSpace1(n,&tangent[0],&tangent[1]); 738 float4 angular0, angular1, linear; 739 float4 r0 = center - posA; 740 float4 r1 = center - posB; 741 for(int i=0; i<2; i++) 742 { 743 setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); 744 float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, 745 linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB ); 746 rambdaDt *= cs->m_fJacCoeffInv[i]; 747 748 { 749 float prevSum = cs->m_fAppliedRambdaDt[i]; 750 float updated = prevSum; 751 updated += rambdaDt; 752 updated = max2( updated, minRambdaDt[i] ); 753 updated = min2( updated, maxRambdaDt[i] ); 754 rambdaDt = updated - prevSum; 755 cs->m_fAppliedRambdaDt[i] = updated; 756 } 757 758 float4 linImp0 = invMassA*linear*rambdaDt; 759 float4 linImp1 = invMassB*(-linear)*rambdaDt; 760 float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; 761 float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; 762 763 dLinVelA += linImp0; 764 dAngVelA += angImp0; 765 dLinVelB += linImp1; 766 dAngVelB += angImp1; 767 } 768 { // angular damping for point constraint 769 float4 ab = normalize3( posB - posA ); 770 float4 ac = normalize3( center - posA ); 771 if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) 772 { 773 float angNA = dot3F4( n, angVelA ); 774 float angNB = dot3F4( n, angVelB ); 775 776 dAngVelA -= (angNA*0.1f)*n; 777 dAngVelB -= (angNB*0.1f)*n; 778 } 779 } 780 } 781 782 783 784 } 785 786 if (invMassA) 787 { 788 deltaLinearVelocities[splitIndexA] = dLinVelA; 789 deltaAngularVelocities[splitIndexA] = dAngVelA; 790 } 791 if (invMassB) 792 { 793 deltaLinearVelocities[splitIndexB] = dLinVelB; 794 deltaAngularVelocities[splitIndexB] = dAngVelB; 795 } 796 797 798} 799 800 801__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , 802 __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, 803 __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, 804 float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds 805) 806{ 807 int i = GET_GLOBAL_IDX; 808 if (i<numManifolds) 809 { 810 solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); 811 } 812} 813 814 815__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, 816 __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) 817{ 818 int i = GET_GLOBAL_IDX; 819 if (i<numBodies) 820 { 821 if (gBodies[i].m_invMass) 822 { 823 int bodyOffset = offsetSplitBodies[i]; 824 int count = bodyCount[i]; 825 if (count) 826 { 827 gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset]; 828 gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset]; 829 } 830 } 831 } 832} 833 834 835 836void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, 837 const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, 838 __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB, 839 Constraint4* dstC ) 840{ 841 dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); 842 dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); 843 844 float dtInv = 1.f/dt; 845 for(int ic=0; ic<4; ic++) 846 { 847 dstC->m_appliedRambdaDt[ic] = 0.f; 848 } 849 dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; 850 851 852 dstC->m_linear = src->m_worldNormalOnB; 853 dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() ); 854 for(int ic=0; ic<4; ic++) 855 { 856 float4 r0 = src->m_worldPosB[ic] - posA; 857 float4 r1 = src->m_worldPosB[ic] - posB; 858 859 if( ic >= src->m_worldNormalOnB.w )//npoints 860 { 861 dstC->m_jacCoeffInv[ic] = 0.f; 862 continue; 863 } 864 865 float relVelN; 866 { 867 float4 linear, angular0, angular1; 868 setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); 869 870 dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, 871 invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB); 872 873 relVelN = calcRelVel(linear, -linear, angular0, angular1, 874 linVelA, angVelA, linVelB, angVelB); 875 876 float e = 0.f;//src->getRestituitionCoeff(); 877 if( relVelN*relVelN < 0.004f ) e = 0.f; 878 879 dstC->m_b[ic] = e*relVelN; 880 //float penetration = src->m_worldPosB[ic].w; 881 dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv; 882 dstC->m_appliedRambdaDt[ic] = 0.f; 883 } 884 } 885 886 if( src->m_worldNormalOnB.w > 0 )//npoints 887 { // prepare friction 888 float4 center = make_float4(0.f); 889 for(int i=0; i<src->m_worldNormalOnB.w; i++) 890 center += src->m_worldPosB[i]; 891 center /= (float)src->m_worldNormalOnB.w; 892 893 float4 tangent[2]; 894 btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]); 895 896 float4 r[2]; 897 r[0] = center - posA; 898 r[1] = center - posB; 899 900 for(int i=0; i<2; i++) 901 { 902 float4 linear, angular0, angular1; 903 setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); 904 905 dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, 906 invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB); 907 dstC->m_fAppliedRambdaDt[i] = 0.f; 908 } 909 dstC->m_center = center; 910 } 911 912 for(int i=0; i<4; i++) 913 { 914 if( i<src->m_worldNormalOnB.w ) 915 { 916 dstC->m_worldPos[i] = src->m_worldPosB[i]; 917 } 918 else 919 { 920 dstC->m_worldPos[i] = make_float4(0.f); 921 } 922 } 923} 924 925 926__kernel 927__attribute__((reqd_work_group_size(WG_SIZE,1,1))) 928void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, 929__global const unsigned int* bodyCount, 930int nContacts, 931float dt, 932float positionDrift, 933float positionConstraintCoeff 934) 935{ 936 int gIdx = GET_GLOBAL_IDX; 937 938 if( gIdx < nContacts ) 939 { 940 int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); 941 int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); 942 943 float4 posA = gBodies[aIdx].m_pos; 944 float4 linVelA = gBodies[aIdx].m_linVel; 945 float4 angVelA = gBodies[aIdx].m_angVel; 946 float invMassA = gBodies[aIdx].m_invMass; 947 Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; 948 949 float4 posB = gBodies[bIdx].m_pos; 950 float4 linVelB = gBodies[bIdx].m_linVel; 951 float4 angVelB = gBodies[bIdx].m_angVel; 952 float invMassB = gBodies[bIdx].m_invMass; 953 Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; 954 955 Constraint4 cs; 956 957 float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1; 958 float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1; 959 960 setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, 961 &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, 962 &cs ); 963 964 cs.m_batchIdx = gContact[gIdx].m_batchIdx; 965 966 gConstraintOut[gIdx] = cs; 967 } 968}