1 2/* 3Copyright (c) 2012 Advanced Micro Devices, Inc. 4 5This software is provided 'as-is', without any express or implied warranty. 6In no event will the authors be held liable for any damages arising from the use of this software. 7Permission is granted to anyone to use this software for any purpose, 8including commercial applications, and to alter it and redistribute it freely, 9subject to the following restrictions: 10 111. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 122. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 133. This notice may not be removed or altered from any source distribution. 14*/ 15//Originally written by Takahiro Harada 16 17#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" 18 19#pragma OPENCL EXTENSION cl_amd_printf : enable 20#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable 21#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable 22#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable 23#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable 24 25 26#ifdef cl_ext_atomic_counters_32 27#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable 28#else 29#define counter32_t volatile global int* 30#endif 31 32typedef unsigned int u32; 33typedef unsigned short u16; 34typedef unsigned char u8; 35 36#define GET_GROUP_IDX get_group_id(0) 37#define GET_LOCAL_IDX get_local_id(0) 38#define GET_GLOBAL_IDX get_global_id(0) 39#define GET_GROUP_SIZE get_local_size(0) 40#define GET_NUM_GROUPS get_num_groups(0) 41#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) 42#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) 43#define AtomInc(x) atom_inc(&(x)) 44#define AtomInc1(x, out) out = atom_inc(&(x)) 45#define AppendInc(x, out) out = atomic_inc(x) 46#define AtomAdd(x, value) atom_add(&(x), value) 47#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) 48#define AtomXhg(x, value) atom_xchg ( &(x), value ) 49 50 51#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) 52 53#define make_float4 (float4) 54#define make_float2 (float2) 55#define make_uint4 (uint4) 56#define make_int4 (int4) 57#define make_uint2 (uint2) 58#define make_int2 (int2) 59 60 61#define max2 max 62#define min2 min 63 64 65/////////////////////////////////////// 66// Vector 67/////////////////////////////////////// 68__inline 69float fastDiv(float numerator, float denominator) 70{ 71 return native_divide(numerator, denominator); 72// return numerator/denominator; 73} 74 75__inline 76float4 fastDiv4(float4 numerator, float4 denominator) 77{ 78 return native_divide(numerator, denominator); 79} 80 81__inline 82float fastSqrtf(float f2) 83{ 84 return native_sqrt(f2); 85// return sqrt(f2); 86} 87 88__inline 89float fastRSqrt(float f2) 90{ 91 return native_rsqrt(f2); 92} 93 94__inline 95float fastLength4(float4 v) 96{ 97 return fast_length(v); 98} 99 100__inline 101float4 fastNormalize4(float4 v) 102{ 103 return fast_normalize(v); 104} 105 106 107__inline 108float sqrtf(float a) 109{ 110// return sqrt(a); 111 return native_sqrt(a); 112} 113 114__inline 115float4 cross3(float4 a, float4 b) 116{ 117 return cross(a,b); 118} 119 120__inline 121float dot3F4(float4 a, float4 b) 122{ 123 float4 a1 = make_float4(a.xyz,0.f); 124 float4 b1 = make_float4(b.xyz,0.f); 125 return dot(a1, b1); 126} 127 128__inline 129float length3(const float4 a) 130{ 131 return sqrtf(dot3F4(a,a)); 132} 133 134__inline 135float dot4(const float4 a, const float4 b) 136{ 137 return dot( a, b ); 138} 139 140// for height 141__inline 142float dot3w1(const float4 point, const float4 eqn) 143{ 144 return dot3F4(point,eqn) + eqn.w; 145} 146 147__inline 148float4 normalize3(const float4 a) 149{ 150 float4 n = make_float4(a.x, a.y, a.z, 0.f); 151 return fastNormalize4( n ); 152// float length = sqrtf(dot3F4(a, a)); 153// return 1.f/length * a; 154} 155 156__inline 157float4 normalize4(const float4 a) 158{ 159 float length = sqrtf(dot4(a, a)); 160 return 1.f/length * a; 161} 162 163__inline 164float4 createEquation(const float4 a, const float4 b, const float4 c) 165{ 166 float4 eqn; 167 float4 ab = b-a; 168 float4 ac = c-a; 169 eqn = normalize3( cross3(ab, ac) ); 170 eqn.w = -dot3F4(eqn,a); 171 return eqn; 172} 173 174 175 176#define WG_SIZE 64 177 178 179 180 181 182 183 184typedef struct 185{ 186 int m_nConstraints; 187 int m_start; 188 int m_batchIdx; 189 int m_nSplit; 190// int m_paddings[1]; 191} ConstBuffer; 192 193typedef struct 194{ 195 int m_solveFriction; 196 int m_maxBatch; // long batch really kills the performance 197 int m_batchIdx; 198 int m_nSplit; 199// int m_paddings[1]; 200} ConstBufferBatchSolve; 201 202 203 204 205 206 207 208typedef struct 209{ 210 int m_valInt0; 211 int m_valInt1; 212 int m_valInt2; 213 int m_valInt3; 214 215 float m_val0; 216 float m_val1; 217 float m_val2; 218 float m_val3; 219} SolverDebugInfo; 220 221 222 223 224 225 226typedef struct 227{ 228 int m_nContacts; 229 float m_dt; 230 float m_positionDrift; 231 float m_positionConstraintCoeff; 232} ConstBufferCTC; 233 234__kernel 235__attribute__((reqd_work_group_size(WG_SIZE,1,1))) 236void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, 237int nContacts, 238float dt, 239float positionDrift, 240float positionConstraintCoeff 241) 242{ 243 int gIdx = GET_GLOBAL_IDX; 244 245 if( gIdx < nContacts ) 246 { 247 int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); 248 int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); 249 250 float4 posA = gBodies[aIdx].m_pos; 251 float4 linVelA = gBodies[aIdx].m_linVel; 252 float4 angVelA = gBodies[aIdx].m_angVel; 253 float invMassA = gBodies[aIdx].m_invMass; 254 b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; 255 256 float4 posB = gBodies[bIdx].m_pos; 257 float4 linVelB = gBodies[bIdx].m_linVel; 258 float4 angVelB = gBodies[bIdx].m_angVel; 259 float invMassB = gBodies[bIdx].m_invMass; 260 b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; 261 262 b3ContactConstraint4_t cs; 263 264 setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, 265 &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, 266 &cs ); 267 268 cs.m_batchIdx = gContact[gIdx].m_batchIdx; 269 270 gConstraintOut[gIdx] = cs; 271 } 272} 273 274 275 276 277 278