1
2 #include "b3GpuJacobiContactSolver.h"
3 #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
4 #include "Bullet3Common/b3AlignedObjectArray.h"
5 #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
6 class b3Vector3;
7 #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
8 #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h"
9 #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
10 #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
11 #include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h"
12 #include "Bullet3Common/b3Logging.h"
13 #include "b3GpuConstraint4.h"
14 #include "Bullet3Common/shared/b3Int2.h"
15 #include "Bullet3Common/shared/b3Int4.h"
16 #define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl"
17
18 struct b3GpuJacobiSolverInternalData
19 {
20 //btRadixSort32CL* m_sort32;
21 //btBoundSearchCL* m_search;
22 b3PrefixScanCL* m_scan;
23
24 b3OpenCLArray<unsigned int>* m_bodyCount;
25 b3OpenCLArray<b3Int2>* m_contactConstraintOffsets;
26 b3OpenCLArray<unsigned int>* m_offsetSplitBodies;
27
28 b3OpenCLArray<b3Vector3>* m_deltaLinearVelocities;
29 b3OpenCLArray<b3Vector3>* m_deltaAngularVelocities;
30
31 b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocitiesCPU;
32 b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocitiesCPU;
33
34 b3OpenCLArray<b3GpuConstraint4>* m_contactConstraints;
35
36 b3FillCL* m_filler;
37
38 cl_kernel m_countBodiesKernel;
39 cl_kernel m_contactToConstraintSplitKernel;
40 cl_kernel m_clearVelocitiesKernel;
41 cl_kernel m_averageVelocitiesKernel;
42 cl_kernel m_updateBodyVelocitiesKernel;
43 cl_kernel m_solveContactKernel;
44 cl_kernel m_solveFrictionKernel;
45 };
46
b3GpuJacobiContactSolver(cl_context ctx,cl_device_id device,cl_command_queue queue,int pairCapacity)47 b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
48 : m_context(ctx),
49 m_device(device),
50 m_queue(queue)
51 {
52 m_data = new b3GpuJacobiSolverInternalData;
53 m_data->m_scan = new b3PrefixScanCL(m_context, m_device, m_queue);
54 m_data->m_bodyCount = new b3OpenCLArray<unsigned int>(m_context, m_queue);
55 m_data->m_filler = new b3FillCL(m_context, m_device, m_queue);
56 m_data->m_contactConstraintOffsets = new b3OpenCLArray<b3Int2>(m_context, m_queue);
57 m_data->m_offsetSplitBodies = new b3OpenCLArray<unsigned int>(m_context, m_queue);
58 m_data->m_contactConstraints = new b3OpenCLArray<b3GpuConstraint4>(m_context, m_queue);
59 m_data->m_deltaLinearVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue);
60 m_data->m_deltaAngularVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue);
61
62 cl_int pErrNum;
63 const char* additionalMacros = "";
64 const char* solverUtilsSource = solverUtilsCL;
65 {
66 cl_program solverUtilsProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverUtilsSource, &pErrNum, additionalMacros, SOLVER_UTILS_KERNEL_PATH);
67 b3Assert(solverUtilsProg);
68 m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
69 b3Assert(m_data->m_countBodiesKernel);
70
71 m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg, additionalMacros);
72 b3Assert(m_data->m_contactToConstraintSplitKernel);
73 m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
74 b3Assert(m_data->m_clearVelocitiesKernel);
75
76 m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
77 b3Assert(m_data->m_averageVelocitiesKernel);
78
79 m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros);
80 b3Assert(m_data->m_updateBodyVelocitiesKernel);
81
82 m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros);
83 b3Assert(m_data->m_solveContactKernel);
84
85 m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros);
86 b3Assert(m_data->m_solveFrictionKernel);
87 }
88 }
89
~b3GpuJacobiContactSolver()90 b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver()
91 {
92 clReleaseKernel(m_data->m_solveContactKernel);
93 clReleaseKernel(m_data->m_solveFrictionKernel);
94 clReleaseKernel(m_data->m_countBodiesKernel);
95 clReleaseKernel(m_data->m_contactToConstraintSplitKernel);
96 clReleaseKernel(m_data->m_averageVelocitiesKernel);
97 clReleaseKernel(m_data->m_updateBodyVelocitiesKernel);
98 clReleaseKernel(m_data->m_clearVelocitiesKernel);
99
100 delete m_data->m_deltaLinearVelocities;
101 delete m_data->m_deltaAngularVelocities;
102 delete m_data->m_contactConstraints;
103 delete m_data->m_offsetSplitBodies;
104 delete m_data->m_contactConstraintOffsets;
105 delete m_data->m_bodyCount;
106 delete m_data->m_filler;
107 delete m_data->m_scan;
108 delete m_data;
109 }
110
make_float4(float v)111 b3Vector3 make_float4(float v)
112 {
113 return b3MakeVector3(v, v, v);
114 }
115
make_float4(float x,float y,float z,float w)116 b3Vector4 make_float4(float x, float y, float z, float w)
117 {
118 return b3MakeVector4(x, y, z, w);
119 }
120
calcRelVel(const b3Vector3 & l0,const b3Vector3 & l1,const b3Vector3 & a0,const b3Vector3 & a1,const b3Vector3 & linVel0,const b3Vector3 & angVel0,const b3Vector3 & linVel1,const b3Vector3 & angVel1)121 static inline float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1,
122 const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1)
123 {
124 return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1);
125 }
126
setLinearAndAngular(const b3Vector3 & n,const b3Vector3 & r0,const b3Vector3 & r1,b3Vector3 & linear,b3Vector3 & angular0,b3Vector3 & angular1)127 static inline void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1,
128 b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1)
129 {
130 linear = n;
131 angular0 = b3Cross(r0, n);
132 angular1 = -b3Cross(r1, n);
133 }
134
solveContact(b3GpuConstraint4 & cs,const b3Vector3 & posA,const b3Vector3 & linVelARO,const b3Vector3 & angVelARO,float invMassA,const b3Matrix3x3 & invInertiaA,const b3Vector3 & posB,const b3Vector3 & linVelBRO,const b3Vector3 & angVelBRO,float invMassB,const b3Matrix3x3 & invInertiaB,float maxRambdaDt[4],float minRambdaDt[4],b3Vector3 & dLinVelA,b3Vector3 & dAngVelA,b3Vector3 & dLinVelB,b3Vector3 & dAngVelB)135 static __inline void solveContact(b3GpuConstraint4& cs,
136 const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA,
137 const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB,
138 float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB)
139 {
140 for (int ic = 0; ic < 4; ic++)
141 {
142 // dont necessary because this makes change to 0
143 if (cs.m_jacCoeffInv[ic] == 0.f) continue;
144
145 {
146 b3Vector3 angular0, angular1, linear;
147 b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
148 b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
149 setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1);
150
151 float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1,
152 linVelARO + dLinVelA, angVelARO + dAngVelA, linVelBRO + dLinVelB, angVelBRO + dAngVelB) +
153 cs.m_b[ic];
154 rambdaDt *= cs.m_jacCoeffInv[ic];
155
156 {
157 float prevSum = cs.m_appliedRambdaDt[ic];
158 float updated = prevSum;
159 updated += rambdaDt;
160 updated = b3Max(updated, minRambdaDt[ic]);
161 updated = b3Min(updated, maxRambdaDt[ic]);
162 rambdaDt = updated - prevSum;
163 cs.m_appliedRambdaDt[ic] = updated;
164 }
165
166 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
167 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
168 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
169 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
170 #ifdef _WIN32
171 b3Assert(_finite(linImp0.getX()));
172 b3Assert(_finite(linImp1.getX()));
173 #endif
174
175 if (invMassA)
176 {
177 dLinVelA += linImp0;
178 dAngVelA += angImp0;
179 }
180 if (invMassB)
181 {
182 dLinVelB += linImp1;
183 dAngVelB += angImp1;
184 }
185 }
186 }
187 }
188
solveContact3(b3GpuConstraint4 * cs,b3Vector3 * posAPtr,b3Vector3 * linVelA,b3Vector3 * angVelA,float invMassA,const b3Matrix3x3 & invInertiaA,b3Vector3 * posBPtr,b3Vector3 * linVelB,b3Vector3 * angVelB,float invMassB,const b3Matrix3x3 & invInertiaB,b3Vector3 * dLinVelA,b3Vector3 * dAngVelA,b3Vector3 * dLinVelB,b3Vector3 * dAngVelB)189 void solveContact3(b3GpuConstraint4* cs,
190 b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
191 b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
192 b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB)
193 {
194 float minRambdaDt = 0;
195 float maxRambdaDt = FLT_MAX;
196
197 for (int ic = 0; ic < 4; ic++)
198 {
199 if (cs->m_jacCoeffInv[ic] == 0.f) continue;
200
201 b3Vector3 angular0, angular1, linear;
202 b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr;
203 b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr;
204 setLinearAndAngular(cs->m_linear, r0, r1, linear, angular0, angular1);
205
206 float rambdaDt = calcRelVel(cs->m_linear, -cs->m_linear, angular0, angular1,
207 *linVelA + *dLinVelA, *angVelA + *dAngVelA, *linVelB + *dLinVelB, *angVelB + *dAngVelB) +
208 cs->m_b[ic];
209 rambdaDt *= cs->m_jacCoeffInv[ic];
210
211 {
212 float prevSum = cs->m_appliedRambdaDt[ic];
213 float updated = prevSum;
214 updated += rambdaDt;
215 updated = b3Max(updated, minRambdaDt);
216 updated = b3Min(updated, maxRambdaDt);
217 rambdaDt = updated - prevSum;
218 cs->m_appliedRambdaDt[ic] = updated;
219 }
220
221 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
222 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
223 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
224 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
225
226 if (invMassA)
227 {
228 *dLinVelA += linImp0;
229 *dAngVelA += angImp0;
230 }
231 if (invMassB)
232 {
233 *dLinVelB += linImp1;
234 *dAngVelB += angImp1;
235 }
236 }
237 }
238
solveFriction(b3GpuConstraint4 & cs,const b3Vector3 & posA,const b3Vector3 & linVelARO,const b3Vector3 & angVelARO,float invMassA,const b3Matrix3x3 & invInertiaA,const b3Vector3 & posB,const b3Vector3 & linVelBRO,const b3Vector3 & angVelBRO,float invMassB,const b3Matrix3x3 & invInertiaB,float maxRambdaDt[4],float minRambdaDt[4],b3Vector3 & dLinVelA,b3Vector3 & dAngVelA,b3Vector3 & dLinVelB,b3Vector3 & dAngVelB)239 static inline void solveFriction(b3GpuConstraint4& cs,
240 const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA,
241 const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB,
242 float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB)
243 {
244 b3Vector3 linVelA = linVelARO + dLinVelA;
245 b3Vector3 linVelB = linVelBRO + dLinVelB;
246 b3Vector3 angVelA = angVelARO + dAngVelA;
247 b3Vector3 angVelB = angVelBRO + dAngVelB;
248
249 if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return;
250 const b3Vector3& center = (const b3Vector3&)cs.m_center;
251
252 b3Vector3 n = -(const b3Vector3&)cs.m_linear;
253
254 b3Vector3 tangent[2];
255 #if 1
256 b3PlaneSpace1(n, tangent[0], tangent[1]);
257 #else
258 b3Vector3 r = cs.m_worldPos[0] - center;
259 tangent[0] = cross3(n, r);
260 tangent[1] = cross3(tangent[0], n);
261 tangent[0] = normalize3(tangent[0]);
262 tangent[1] = normalize3(tangent[1]);
263 #endif
264
265 b3Vector3 angular0, angular1, linear;
266 b3Vector3 r0 = center - posA;
267 b3Vector3 r1 = center - posB;
268 for (int i = 0; i < 2; i++)
269 {
270 setLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1);
271 float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
272 linVelA, angVelA, linVelB, angVelB);
273 rambdaDt *= cs.m_fJacCoeffInv[i];
274
275 {
276 float prevSum = cs.m_fAppliedRambdaDt[i];
277 float updated = prevSum;
278 updated += rambdaDt;
279 updated = b3Max(updated, minRambdaDt[i]);
280 updated = b3Min(updated, maxRambdaDt[i]);
281 rambdaDt = updated - prevSum;
282 cs.m_fAppliedRambdaDt[i] = updated;
283 }
284
285 b3Vector3 linImp0 = invMassA * linear * rambdaDt;
286 b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
287 b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
288 b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
289 #ifdef _WIN32
290 b3Assert(_finite(linImp0.getX()));
291 b3Assert(_finite(linImp1.getX()));
292 #endif
293 if (invMassA)
294 {
295 dLinVelA += linImp0;
296 dAngVelA += angImp0;
297 }
298 if (invMassB)
299 {
300 dLinVelB += linImp1;
301 dAngVelB += angImp1;
302 }
303 }
304
305 { // angular damping for point constraint
306 b3Vector3 ab = (posB - posA).normalized();
307 b3Vector3 ac = (center - posA).normalized();
308 if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
309 {
310 float angNA = b3Dot(n, angVelA);
311 float angNB = b3Dot(n, angVelB);
312
313 if (invMassA)
314 dAngVelA -= (angNA * 0.1f) * n;
315 if (invMassB)
316 dAngVelB -= (angNB * 0.1f) * n;
317 }
318 }
319 }
320
calcJacCoeff(const b3Vector3 & linear0,const b3Vector3 & linear1,const b3Vector3 & angular0,const b3Vector3 & angular1,float invMass0,const b3Matrix3x3 * invInertia0,float invMass1,const b3Matrix3x3 * invInertia1,float countA,float countB)321 float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1,
322 float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB)
323 {
324 // linear0,1 are normlized
325 float jmj0 = invMass0; //dot3F4(linear0, linear0)*invMass0;
326
327 float jmj1 = b3Dot(mtMul3(angular0, *invInertia0), angular0);
328 float jmj2 = invMass1; //dot3F4(linear1, linear1)*invMass1;
329 float jmj3 = b3Dot(mtMul3(angular1, *invInertia1), angular1);
330 return -1.f / ((jmj0 + jmj1) * countA + (jmj2 + jmj3) * countB);
331 // return -1.f/((jmj0+jmj1)+(jmj2+jmj3));
332 }
333
setConstraint4(const b3Vector3 & posA,const b3Vector3 & linVelA,const b3Vector3 & angVelA,float invMassA,const b3Matrix3x3 & invInertiaA,const b3Vector3 & posB,const b3Vector3 & linVelB,const b3Vector3 & angVelB,float invMassB,const b3Matrix3x3 & invInertiaB,b3Contact4 * src,float dt,float positionDrift,float positionConstraintCoeff,float countA,float countB,b3GpuConstraint4 * dstC)334 void setConstraint4(const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
335 const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
336 b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB,
337 b3GpuConstraint4* dstC)
338 {
339 dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
340 dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
341
342 float dtInv = 1.f / dt;
343 for (int ic = 0; ic < 4; ic++)
344 {
345 dstC->m_appliedRambdaDt[ic] = 0.f;
346 }
347 dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
348
349 dstC->m_linear = src->m_worldNormalOnB;
350 dstC->m_linear[3] = 0.7f; //src->getFrictionCoeff() );
351 for (int ic = 0; ic < 4; ic++)
352 {
353 b3Vector3 r0 = src->m_worldPosB[ic] - posA;
354 b3Vector3 r1 = src->m_worldPosB[ic] - posB;
355
356 if (ic >= src->m_worldNormalOnB[3]) //npoints
357 {
358 dstC->m_jacCoeffInv[ic] = 0.f;
359 continue;
360 }
361
362 float relVelN;
363 {
364 b3Vector3 linear, angular0, angular1;
365 setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1);
366
367 dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
368 invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB);
369
370 relVelN = calcRelVel(linear, -linear, angular0, angular1,
371 linVelA, angVelA, linVelB, angVelB);
372
373 float e = 0.f; //src->getRestituitionCoeff();
374 if (relVelN * relVelN < 0.004f)
375 {
376 e = 0.f;
377 }
378
379 dstC->m_b[ic] = e * relVelN;
380 //float penetration = src->m_worldPos[ic].w;
381 dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift) * positionConstraintCoeff * dtInv;
382 dstC->m_appliedRambdaDt[ic] = 0.f;
383 }
384 }
385
386 if (src->m_worldNormalOnB[3] > 0) //npoints
387 { // prepare friction
388 b3Vector3 center = make_float4(0.f);
389 for (int i = 0; i < src->m_worldNormalOnB[3]; i++)
390 center += src->m_worldPosB[i];
391 center /= (float)src->m_worldNormalOnB[3];
392
393 b3Vector3 tangent[2];
394 b3PlaneSpace1(src->m_worldNormalOnB, tangent[0], tangent[1]);
395
396 b3Vector3 r[2];
397 r[0] = center - posA;
398 r[1] = center - posB;
399
400 for (int i = 0; i < 2; i++)
401 {
402 b3Vector3 linear, angular0, angular1;
403 setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1);
404
405 dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
406 invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB);
407 dstC->m_fAppliedRambdaDt[i] = 0.f;
408 }
409 dstC->m_center = center;
410 }
411
412 for (int i = 0; i < 4; i++)
413 {
414 if (i < src->m_worldNormalOnB[3])
415 {
416 dstC->m_worldPos[i] = src->m_worldPosB[i];
417 }
418 else
419 {
420 dstC->m_worldPos[i] = make_float4(0.f);
421 }
422 }
423 }
424
ContactToConstraintKernel(b3Contact4 * gContact,b3RigidBodyData * gBodies,b3InertiaData * gShapes,b3GpuConstraint4 * gConstraintOut,int nContacts,float dt,float positionDrift,float positionConstraintCoeff,int gIdx,b3AlignedObjectArray<unsigned int> & bodyCount)425 void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyData* gBodies, b3InertiaData* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts,
426 float dt,
427 float positionDrift,
428 float positionConstraintCoeff, int gIdx, b3AlignedObjectArray<unsigned int>& bodyCount)
429 {
430 //int gIdx = 0;//GET_GLOBAL_IDX;
431
432 if (gIdx < nContacts)
433 {
434 int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
435 int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
436
437 b3Vector3 posA = gBodies[aIdx].m_pos;
438 b3Vector3 linVelA = gBodies[aIdx].m_linVel;
439 b3Vector3 angVelA = gBodies[aIdx].m_angVel;
440 float invMassA = gBodies[aIdx].m_invMass;
441 b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld; //.m_invInertia;
442
443 b3Vector3 posB = gBodies[bIdx].m_pos;
444 b3Vector3 linVelB = gBodies[bIdx].m_linVel;
445 b3Vector3 angVelB = gBodies[bIdx].m_angVel;
446 float invMassB = gBodies[bIdx].m_invMass;
447 b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld; //m_invInertia;
448
449 b3GpuConstraint4 cs;
450 float countA = invMassA ? (float)(bodyCount[aIdx]) : 1;
451 float countB = invMassB ? (float)(bodyCount[bIdx]) : 1;
452 setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
453 &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, countA, countB,
454 &cs);
455
456 cs.m_batchIdx = gContact[gIdx].m_batchIdx;
457
458 gConstraintOut[gIdx] = cs;
459 }
460 }
461
solveGroupHost(b3RigidBodyData * bodies,b3InertiaData * inertias,int numBodies,b3Contact4 * manifoldPtr,int numManifolds,const b3JacobiSolverInfo & solverInfo)462 void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo)
463 {
464 B3_PROFILE("b3GpuJacobiContactSolver::solveGroup");
465
466 b3AlignedObjectArray<unsigned int> bodyCount;
467 bodyCount.resize(numBodies);
468 for (int i = 0; i < numBodies; i++)
469 bodyCount[i] = 0;
470
471 b3AlignedObjectArray<b3Int2> contactConstraintOffsets;
472 contactConstraintOffsets.resize(numManifolds);
473
474 for (int i = 0; i < numManifolds; i++)
475 {
476 int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;
477 int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;
478
479 bool isFixedA = (pa < 0) || (pa == solverInfo.m_fixedBodyIndex);
480 bool isFixedB = (pb < 0) || (pb == solverInfo.m_fixedBodyIndex);
481
482 int bodyIndexA = manifoldPtr[i].getBodyA();
483 int bodyIndexB = manifoldPtr[i].getBodyB();
484
485 if (!isFixedA)
486 {
487 contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
488 bodyCount[bodyIndexA]++;
489 }
490 if (!isFixedB)
491 {
492 contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
493 bodyCount[bodyIndexB]++;
494 }
495 }
496
497 b3AlignedObjectArray<unsigned int> offsetSplitBodies;
498 offsetSplitBodies.resize(numBodies);
499 unsigned int totalNumSplitBodies;
500 m_data->m_scan->executeHost(bodyCount, offsetSplitBodies, numBodies, &totalNumSplitBodies);
501 int numlastBody = bodyCount[numBodies - 1];
502 totalNumSplitBodies += numlastBody;
503 printf("totalNumSplitBodies = %d\n", totalNumSplitBodies);
504
505 b3AlignedObjectArray<b3GpuConstraint4> contactConstraints;
506 contactConstraints.resize(numManifolds);
507
508 for (int i = 0; i < numManifolds; i++)
509 {
510 ContactToConstraintKernel(&manifoldPtr[0], bodies, inertias, &contactConstraints[0], numManifolds,
511 solverInfo.m_deltaTime,
512 solverInfo.m_positionDrift,
513 solverInfo.m_positionConstraintCoeff,
514 i, bodyCount);
515 }
516 int maxIter = solverInfo.m_numIterations;
517
518 b3AlignedObjectArray<b3Vector3> deltaLinearVelocities;
519 b3AlignedObjectArray<b3Vector3> deltaAngularVelocities;
520 deltaLinearVelocities.resize(totalNumSplitBodies);
521 deltaAngularVelocities.resize(totalNumSplitBodies);
522 for (unsigned int i = 0; i < totalNumSplitBodies; i++)
523 {
524 deltaLinearVelocities[i].setZero();
525 deltaAngularVelocities[i].setZero();
526 }
527
528 for (int iter = 0; iter < maxIter; iter++)
529 {
530 int i = 0;
531 for (i = 0; i < numManifolds; i++)
532 {
533 //float frictionCoeff = contactConstraints[i].getFrictionCoeff();
534 int aIdx = (int)contactConstraints[i].m_bodyA;
535 int bIdx = (int)contactConstraints[i].m_bodyB;
536 b3RigidBodyData& bodyA = bodies[aIdx];
537 b3RigidBodyData& bodyB = bodies[bIdx];
538
539 b3Vector3 zero = b3MakeVector3(0, 0, 0);
540
541 b3Vector3* dlvAPtr = &zero;
542 b3Vector3* davAPtr = &zero;
543 b3Vector3* dlvBPtr = &zero;
544 b3Vector3* davBPtr = &zero;
545
546 if (bodyA.m_invMass)
547 {
548 int bodyOffsetA = offsetSplitBodies[aIdx];
549 int constraintOffsetA = contactConstraintOffsets[i].x;
550 int splitIndexA = bodyOffsetA + constraintOffsetA;
551 dlvAPtr = &deltaLinearVelocities[splitIndexA];
552 davAPtr = &deltaAngularVelocities[splitIndexA];
553 }
554
555 if (bodyB.m_invMass)
556 {
557 int bodyOffsetB = offsetSplitBodies[bIdx];
558 int constraintOffsetB = contactConstraintOffsets[i].y;
559 int splitIndexB = bodyOffsetB + constraintOffsetB;
560 dlvBPtr = &deltaLinearVelocities[splitIndexB];
561 davBPtr = &deltaAngularVelocities[splitIndexB];
562 }
563
564 {
565 float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
566 float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
567
568 solveContact(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld,
569 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
570 maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr);
571 }
572 }
573
574 //easy
575 for (int i = 0; i < numBodies; i++)
576 {
577 if (bodies[i].m_invMass)
578 {
579 int bodyOffset = offsetSplitBodies[i];
580 int count = bodyCount[i];
581 float factor = 1.f / float(count);
582 b3Vector3 averageLinVel;
583 averageLinVel.setZero();
584 b3Vector3 averageAngVel;
585 averageAngVel.setZero();
586 for (int j = 0; j < count; j++)
587 {
588 averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor;
589 averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor;
590 }
591 for (int j = 0; j < count; j++)
592 {
593 deltaLinearVelocities[bodyOffset + j] = averageLinVel;
594 deltaAngularVelocities[bodyOffset + j] = averageAngVel;
595 }
596 }
597 }
598 }
599 for (int iter = 0; iter < maxIter; iter++)
600 {
601 //int i=0;
602
603 //solve friction
604
605 for (int i = 0; i < numManifolds; i++)
606 {
607 float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
608 float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
609
610 float sum = 0;
611 for (int j = 0; j < 4; j++)
612 {
613 sum += contactConstraints[i].m_appliedRambdaDt[j];
614 }
615 float frictionCoeff = contactConstraints[i].getFrictionCoeff();
616 int aIdx = (int)contactConstraints[i].m_bodyA;
617 int bIdx = (int)contactConstraints[i].m_bodyB;
618 b3RigidBodyData& bodyA = bodies[aIdx];
619 b3RigidBodyData& bodyB = bodies[bIdx];
620
621 b3Vector3 zero = b3MakeVector3(0, 0, 0);
622
623 b3Vector3* dlvAPtr = &zero;
624 b3Vector3* davAPtr = &zero;
625 b3Vector3* dlvBPtr = &zero;
626 b3Vector3* davBPtr = &zero;
627
628 if (bodyA.m_invMass)
629 {
630 int bodyOffsetA = offsetSplitBodies[aIdx];
631 int constraintOffsetA = contactConstraintOffsets[i].x;
632 int splitIndexA = bodyOffsetA + constraintOffsetA;
633 dlvAPtr = &deltaLinearVelocities[splitIndexA];
634 davAPtr = &deltaAngularVelocities[splitIndexA];
635 }
636
637 if (bodyB.m_invMass)
638 {
639 int bodyOffsetB = offsetSplitBodies[bIdx];
640 int constraintOffsetB = contactConstraintOffsets[i].y;
641 int splitIndexB = bodyOffsetB + constraintOffsetB;
642 dlvBPtr = &deltaLinearVelocities[splitIndexB];
643 davBPtr = &deltaAngularVelocities[splitIndexB];
644 }
645
646 for (int j = 0; j < 4; j++)
647 {
648 maxRambdaDt[j] = frictionCoeff * sum;
649 minRambdaDt[j] = -maxRambdaDt[j];
650 }
651
652 solveFriction(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld,
653 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
654 maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr);
655 }
656
657 //easy
658 for (int i = 0; i < numBodies; i++)
659 {
660 if (bodies[i].m_invMass)
661 {
662 int bodyOffset = offsetSplitBodies[i];
663 int count = bodyCount[i];
664 float factor = 1.f / float(count);
665 b3Vector3 averageLinVel;
666 averageLinVel.setZero();
667 b3Vector3 averageAngVel;
668 averageAngVel.setZero();
669 for (int j = 0; j < count; j++)
670 {
671 averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor;
672 averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor;
673 }
674 for (int j = 0; j < count; j++)
675 {
676 deltaLinearVelocities[bodyOffset + j] = averageLinVel;
677 deltaAngularVelocities[bodyOffset + j] = averageAngVel;
678 }
679 }
680 }
681 }
682
683 //easy
684 for (int i = 0; i < numBodies; i++)
685 {
686 if (bodies[i].m_invMass)
687 {
688 int bodyOffset = offsetSplitBodies[i];
689 int count = bodyCount[i];
690 if (count)
691 {
692 bodies[i].m_linVel += deltaLinearVelocities[bodyOffset];
693 bodies[i].m_angVel += deltaAngularVelocities[bodyOffset];
694 }
695 }
696 }
697 }
698
solveContacts(int numBodies,cl_mem bodyBuf,cl_mem inertiaBuf,int numContacts,cl_mem contactBuf,const struct b3Config & config,int static0Index)699 void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index)
700 //
701 //
702 //void b3GpuJacobiContactSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* bodies,b3OpenCLArray<b3InertiaData>* inertias,b3OpenCLArray<b3Contact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo)
703 {
704 b3JacobiSolverInfo solverInfo;
705 solverInfo.m_fixedBodyIndex = static0Index;
706
707 B3_PROFILE("b3GpuJacobiContactSolver::solveGroup");
708
709 //int numBodies = bodies->size();
710 int numManifolds = numContacts; //manifoldPtr->size();
711
712 {
713 B3_PROFILE("resize");
714 m_data->m_bodyCount->resize(numBodies);
715 }
716
717 unsigned int val = 0;
718 b3Int2 val2;
719 val2.x = 0;
720 val2.y = 0;
721
722 {
723 B3_PROFILE("m_filler");
724 m_data->m_contactConstraintOffsets->resize(numManifolds);
725 m_data->m_filler->execute(*m_data->m_bodyCount, val, numBodies);
726
727 m_data->m_filler->execute(*m_data->m_contactConstraintOffsets, val2, numManifolds);
728 }
729
730 {
731 B3_PROFILE("m_countBodiesKernel");
732 b3LauncherCL launcher(this->m_queue, m_data->m_countBodiesKernel, "m_countBodiesKernel");
733 launcher.setBuffer(contactBuf); //manifoldPtr->getBufferCL());
734 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
735 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
736 launcher.setConst(numManifolds);
737 launcher.setConst(solverInfo.m_fixedBodyIndex);
738 launcher.launch1D(numManifolds);
739 }
740 unsigned int totalNumSplitBodies = 0;
741 {
742 B3_PROFILE("m_scan->execute");
743
744 m_data->m_offsetSplitBodies->resize(numBodies);
745 m_data->m_scan->execute(*m_data->m_bodyCount, *m_data->m_offsetSplitBodies, numBodies, &totalNumSplitBodies);
746 totalNumSplitBodies += m_data->m_bodyCount->at(numBodies - 1);
747 }
748
749 {
750 B3_PROFILE("m_data->m_contactConstraints->resize");
751 //int numContacts = manifoldPtr->size();
752 m_data->m_contactConstraints->resize(numContacts);
753 }
754
755 {
756 B3_PROFILE("contactToConstraintSplitKernel");
757 b3LauncherCL launcher(m_queue, m_data->m_contactToConstraintSplitKernel, "m_contactToConstraintSplitKernel");
758 launcher.setBuffer(contactBuf);
759 launcher.setBuffer(bodyBuf);
760 launcher.setBuffer(inertiaBuf);
761 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
762 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
763 launcher.setConst(numContacts);
764 launcher.setConst(solverInfo.m_deltaTime);
765 launcher.setConst(solverInfo.m_positionDrift);
766 launcher.setConst(solverInfo.m_positionConstraintCoeff);
767 launcher.launch1D(numContacts, 64);
768 }
769
770 {
771 B3_PROFILE("m_data->m_deltaLinearVelocities->resize");
772 m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
773 m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
774 }
775
776 {
777 B3_PROFILE("m_clearVelocitiesKernel");
778 b3LauncherCL launch(m_queue, m_data->m_clearVelocitiesKernel, "m_clearVelocitiesKernel");
779 launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
780 launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
781 launch.setConst(totalNumSplitBodies);
782 launch.launch1D(totalNumSplitBodies);
783 clFinish(m_queue);
784 }
785
786 int maxIter = solverInfo.m_numIterations;
787
788 for (int iter = 0; iter < maxIter; iter++)
789 {
790 {
791 B3_PROFILE("m_solveContactKernel");
792 b3LauncherCL launcher(m_queue, m_data->m_solveContactKernel, "m_solveContactKernel");
793 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
794 launcher.setBuffer(bodyBuf);
795 launcher.setBuffer(inertiaBuf);
796 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
797 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
798 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
799 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
800 launcher.setConst(solverInfo.m_deltaTime);
801 launcher.setConst(solverInfo.m_positionDrift);
802 launcher.setConst(solverInfo.m_positionConstraintCoeff);
803 launcher.setConst(solverInfo.m_fixedBodyIndex);
804 launcher.setConst(numManifolds);
805
806 launcher.launch1D(numManifolds);
807 clFinish(m_queue);
808 }
809
810 {
811 B3_PROFILE("average velocities");
812 b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel");
813 launcher.setBuffer(bodyBuf);
814 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
815 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
816 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
817 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
818 launcher.setConst(numBodies);
819 launcher.launch1D(numBodies);
820 clFinish(m_queue);
821 }
822
823 {
824 B3_PROFILE("m_solveFrictionKernel");
825 b3LauncherCL launcher(m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel");
826 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
827 launcher.setBuffer(bodyBuf);
828 launcher.setBuffer(inertiaBuf);
829 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
830 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
831 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
832 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
833 launcher.setConst(solverInfo.m_deltaTime);
834 launcher.setConst(solverInfo.m_positionDrift);
835 launcher.setConst(solverInfo.m_positionConstraintCoeff);
836 launcher.setConst(solverInfo.m_fixedBodyIndex);
837 launcher.setConst(numManifolds);
838
839 launcher.launch1D(numManifolds);
840 clFinish(m_queue);
841 }
842
843 {
844 B3_PROFILE("average velocities");
845 b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel");
846 launcher.setBuffer(bodyBuf);
847 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
848 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
849 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
850 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
851 launcher.setConst(numBodies);
852 launcher.launch1D(numBodies);
853 clFinish(m_queue);
854 }
855 }
856
857 {
858 B3_PROFILE("update body velocities");
859 b3LauncherCL launcher(m_queue, m_data->m_updateBodyVelocitiesKernel, "m_updateBodyVelocitiesKernel");
860 launcher.setBuffer(bodyBuf);
861 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
862 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
863 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
864 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
865 launcher.setConst(numBodies);
866 launcher.launch1D(numBodies);
867 clFinish(m_queue);
868 }
869 }
870
871 #if 0
872
873 void b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray<b3RigidBodyData>* bodiesGPU,b3OpenCLArray<b3InertiaData>* inertiasGPU,b3OpenCLArray<b3Contact4>* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo)
874 {
875
876 b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
877 bodiesGPU->copyToHost(bodiesCPU);
878 b3AlignedObjectArray<b3InertiaData> inertiasCPU;
879 inertiasGPU->copyToHost(inertiasCPU);
880 b3AlignedObjectArray<b3Contact4> manifoldPtrCPU;
881 manifoldPtrGPU->copyToHost(manifoldPtrCPU);
882
883 int numBodiesCPU = bodiesGPU->size();
884 int numManifoldsCPU = manifoldPtrGPU->size();
885 B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed");
886
887 b3AlignedObjectArray<unsigned int> bodyCount;
888 bodyCount.resize(numBodiesCPU);
889 for (int i=0;i<numBodiesCPU;i++)
890 bodyCount[i] = 0;
891
892 b3AlignedObjectArray<b3Int2> contactConstraintOffsets;
893 contactConstraintOffsets.resize(numManifoldsCPU);
894
895
896 for (int i=0;i<numManifoldsCPU;i++)
897 {
898 int pa = manifoldPtrCPU[i].m_bodyAPtrAndSignBit;
899 int pb = manifoldPtrCPU[i].m_bodyBPtrAndSignBit;
900
901 bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex);
902 bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex);
903
904 int bodyIndexA = manifoldPtrCPU[i].getBodyA();
905 int bodyIndexB = manifoldPtrCPU[i].getBodyB();
906
907 if (!isFixedA)
908 {
909 contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
910 bodyCount[bodyIndexA]++;
911 }
912 if (!isFixedB)
913 {
914 contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
915 bodyCount[bodyIndexB]++;
916 }
917 }
918
919 b3AlignedObjectArray<unsigned int> offsetSplitBodies;
920 offsetSplitBodies.resize(numBodiesCPU);
921 unsigned int totalNumSplitBodiesCPU;
922 m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU);
923 int numlastBody = bodyCount[numBodiesCPU-1];
924 totalNumSplitBodiesCPU += numlastBody;
925
926 int numBodies = bodiesGPU->size();
927 int numManifolds = manifoldPtrGPU->size();
928
929 m_data->m_bodyCount->resize(numBodies);
930
931 unsigned int val=0;
932 b3Int2 val2;
933 val2.x=0;
934 val2.y=0;
935
936 {
937 B3_PROFILE("m_filler");
938 m_data->m_contactConstraintOffsets->resize(numManifolds);
939 m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies);
940
941
942 m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds);
943 }
944
945 {
946 B3_PROFILE("m_countBodiesKernel");
947 b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
948 launcher.setBuffer(manifoldPtrGPU->getBufferCL());
949 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
950 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
951 launcher.setConst(numManifolds);
952 launcher.setConst(solverInfo.m_fixedBodyIndex);
953 launcher.launch1D(numManifolds);
954 }
955
956 unsigned int totalNumSplitBodies=0;
957 m_data->m_offsetSplitBodies->resize(numBodies);
958 m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies);
959 totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1);
960
961 if (totalNumSplitBodies != totalNumSplitBodiesCPU)
962 {
963 printf("error in totalNumSplitBodies!\n");
964 }
965
966 int numContacts = manifoldPtrGPU->size();
967 m_data->m_contactConstraints->resize(numContacts);
968
969
970 {
971 B3_PROFILE("contactToConstraintSplitKernel");
972 b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
973 launcher.setBuffer(manifoldPtrGPU->getBufferCL());
974 launcher.setBuffer(bodiesGPU->getBufferCL());
975 launcher.setBuffer(inertiasGPU->getBufferCL());
976 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
977 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
978 launcher.setConst(numContacts);
979 launcher.setConst(solverInfo.m_deltaTime);
980 launcher.setConst(solverInfo.m_positionDrift);
981 launcher.setConst(solverInfo.m_positionConstraintCoeff);
982 launcher.launch1D( numContacts, 64 );
983 clFinish(m_queue);
984 }
985
986
987
988 b3AlignedObjectArray<b3GpuConstraint4> contactConstraints;
989 contactConstraints.resize(numManifoldsCPU);
990
991 for (int i=0;i<numManifoldsCPU;i++)
992 {
993 ContactToConstraintKernel(&manifoldPtrCPU[0],&bodiesCPU[0],&inertiasCPU[0],&contactConstraints[0],numManifoldsCPU,
994 solverInfo.m_deltaTime,
995 solverInfo.m_positionDrift,
996 solverInfo.m_positionConstraintCoeff,
997 i, bodyCount);
998 }
999 int maxIter = solverInfo.m_numIterations;
1000
1001
1002 b3AlignedObjectArray<b3Vector3> deltaLinearVelocities;
1003 b3AlignedObjectArray<b3Vector3> deltaAngularVelocities;
1004 deltaLinearVelocities.resize(totalNumSplitBodiesCPU);
1005 deltaAngularVelocities.resize(totalNumSplitBodiesCPU);
1006 for (int i=0;i<totalNumSplitBodiesCPU;i++)
1007 {
1008 deltaLinearVelocities[i].setZero();
1009 deltaAngularVelocities[i].setZero();
1010 }
1011
1012 m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
1013 m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
1014
1015
1016
1017 {
1018 B3_PROFILE("m_clearVelocitiesKernel");
1019 b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel);
1020 launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1021 launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1022 launch.setConst(totalNumSplitBodies);
1023 launch.launch1D(totalNumSplitBodies);
1024 }
1025
1026
1027 ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1028
1029
1030 m_data->m_contactConstraints->copyToHost(contactConstraints);
1031 m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies);
1032 m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets);
1033 m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
1034 m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
1035
1036 for (int iter = 0;iter<maxIter;iter++)
1037 {
1038
1039 {
1040 B3_PROFILE("m_solveContactKernel");
1041 b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel );
1042 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
1043 launcher.setBuffer(bodiesGPU->getBufferCL());
1044 launcher.setBuffer(inertiasGPU->getBufferCL());
1045 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
1046 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1047 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1048 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1049 launcher.setConst(solverInfo.m_deltaTime);
1050 launcher.setConst(solverInfo.m_positionDrift);
1051 launcher.setConst(solverInfo.m_positionConstraintCoeff);
1052 launcher.setConst(solverInfo.m_fixedBodyIndex);
1053 launcher.setConst(numManifolds);
1054
1055 launcher.launch1D(numManifolds);
1056 clFinish(m_queue);
1057 }
1058
1059
1060 int i=0;
1061 for( i=0; i<numManifoldsCPU; i++)
1062 {
1063
1064 float frictionCoeff = contactConstraints[i].getFrictionCoeff();
1065 int aIdx = (int)contactConstraints[i].m_bodyA;
1066 int bIdx = (int)contactConstraints[i].m_bodyB;
1067 b3RigidBodyData& bodyA = bodiesCPU[aIdx];
1068 b3RigidBodyData& bodyB = bodiesCPU[bIdx];
1069
1070 b3Vector3 zero(0,0,0);
1071
1072 b3Vector3* dlvAPtr=&zero;
1073 b3Vector3* davAPtr=&zero;
1074 b3Vector3* dlvBPtr=&zero;
1075 b3Vector3* davBPtr=&zero;
1076
1077 if (bodyA.m_invMass)
1078 {
1079 int bodyOffsetA = offsetSplitBodies[aIdx];
1080 int constraintOffsetA = contactConstraintOffsets[i].x;
1081 int splitIndexA = bodyOffsetA+constraintOffsetA;
1082 dlvAPtr = &deltaLinearVelocities[splitIndexA];
1083 davAPtr = &deltaAngularVelocities[splitIndexA];
1084 }
1085
1086 if (bodyB.m_invMass)
1087 {
1088 int bodyOffsetB = offsetSplitBodies[bIdx];
1089 int constraintOffsetB = contactConstraintOffsets[i].y;
1090 int splitIndexB= bodyOffsetB+constraintOffsetB;
1091 dlvBPtr =&deltaLinearVelocities[splitIndexB];
1092 davBPtr = &deltaAngularVelocities[splitIndexB];
1093 }
1094
1095
1096
1097 {
1098 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
1099 float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
1100
1101 solveContact( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld,
1102 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
1103 maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr );
1104
1105
1106 }
1107 }
1108
1109
1110 {
1111 B3_PROFILE("average velocities");
1112 b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
1113 launcher.setBuffer(bodiesGPU->getBufferCL());
1114 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1115 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1116 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1117 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1118 launcher.setConst(numBodies);
1119 launcher.launch1D(numBodies);
1120 clFinish(m_queue);
1121 }
1122
1123 //easy
1124 for (int i=0;i<numBodiesCPU;i++)
1125 {
1126 if (bodiesCPU[i].m_invMass)
1127 {
1128 int bodyOffset = offsetSplitBodies[i];
1129 int count = bodyCount[i];
1130 float factor = 1.f/float(count);
1131 b3Vector3 averageLinVel;
1132 averageLinVel.setZero();
1133 b3Vector3 averageAngVel;
1134 averageAngVel.setZero();
1135 for (int j=0;j<count;j++)
1136 {
1137 averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
1138 averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
1139 }
1140 for (int j=0;j<count;j++)
1141 {
1142 deltaLinearVelocities[bodyOffset+j] = averageLinVel;
1143 deltaAngularVelocities[bodyOffset+j] = averageAngVel;
1144 }
1145 }
1146 }
1147 // m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities);
1148 //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities);
1149 m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
1150 m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
1151
1152 #if 0
1153
1154 {
1155 B3_PROFILE("m_solveFrictionKernel");
1156 b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel);
1157 launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
1158 launcher.setBuffer(bodiesGPU->getBufferCL());
1159 launcher.setBuffer(inertiasGPU->getBufferCL());
1160 launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
1161 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1162 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1163 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1164 launcher.setConst(solverInfo.m_deltaTime);
1165 launcher.setConst(solverInfo.m_positionDrift);
1166 launcher.setConst(solverInfo.m_positionConstraintCoeff);
1167 launcher.setConst(solverInfo.m_fixedBodyIndex);
1168 launcher.setConst(numManifolds);
1169
1170 launcher.launch1D(numManifolds);
1171 clFinish(m_queue);
1172 }
1173
1174 //solve friction
1175
1176 for(int i=0; i<numManifoldsCPU; i++)
1177 {
1178 float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
1179 float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
1180
1181 float sum = 0;
1182 for(int j=0; j<4; j++)
1183 {
1184 sum +=contactConstraints[i].m_appliedRambdaDt[j];
1185 }
1186 float frictionCoeff = contactConstraints[i].getFrictionCoeff();
1187 int aIdx = (int)contactConstraints[i].m_bodyA;
1188 int bIdx = (int)contactConstraints[i].m_bodyB;
1189 b3RigidBodyData& bodyA = bodiesCPU[aIdx];
1190 b3RigidBodyData& bodyB = bodiesCPU[bIdx];
1191
1192 b3Vector3 zero(0,0,0);
1193
1194 b3Vector3* dlvAPtr=&zero;
1195 b3Vector3* davAPtr=&zero;
1196 b3Vector3* dlvBPtr=&zero;
1197 b3Vector3* davBPtr=&zero;
1198
1199 if (bodyA.m_invMass)
1200 {
1201 int bodyOffsetA = offsetSplitBodies[aIdx];
1202 int constraintOffsetA = contactConstraintOffsets[i].x;
1203 int splitIndexA = bodyOffsetA+constraintOffsetA;
1204 dlvAPtr = &deltaLinearVelocities[splitIndexA];
1205 davAPtr = &deltaAngularVelocities[splitIndexA];
1206 }
1207
1208 if (bodyB.m_invMass)
1209 {
1210 int bodyOffsetB = offsetSplitBodies[bIdx];
1211 int constraintOffsetB = contactConstraintOffsets[i].y;
1212 int splitIndexB= bodyOffsetB+constraintOffsetB;
1213 dlvBPtr =&deltaLinearVelocities[splitIndexB];
1214 davBPtr = &deltaAngularVelocities[splitIndexB];
1215 }
1216
1217 for(int j=0; j<4; j++)
1218 {
1219 maxRambdaDt[j] = frictionCoeff*sum;
1220 minRambdaDt[j] = -maxRambdaDt[j];
1221 }
1222
1223 solveFriction( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,inertiasCPU[aIdx].m_invInertiaWorld,
1224 (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
1225 maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr);
1226
1227 }
1228
1229 {
1230 B3_PROFILE("average velocities");
1231 b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
1232 launcher.setBuffer(bodiesGPU->getBufferCL());
1233 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1234 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1235 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1236 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1237 launcher.setConst(numBodies);
1238 launcher.launch1D(numBodies);
1239 clFinish(m_queue);
1240 }
1241
1242 //easy
1243 for (int i=0;i<numBodiesCPU;i++)
1244 {
1245 if (bodiesCPU[i].m_invMass)
1246 {
1247 int bodyOffset = offsetSplitBodies[i];
1248 int count = bodyCount[i];
1249 float factor = 1.f/float(count);
1250 b3Vector3 averageLinVel;
1251 averageLinVel.setZero();
1252 b3Vector3 averageAngVel;
1253 averageAngVel.setZero();
1254 for (int j=0;j<count;j++)
1255 {
1256 averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
1257 averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
1258 }
1259 for (int j=0;j<count;j++)
1260 {
1261 deltaLinearVelocities[bodyOffset+j] = averageLinVel;
1262 deltaAngularVelocities[bodyOffset+j] = averageAngVel;
1263 }
1264 }
1265 }
1266
1267 #endif
1268
1269 }
1270
1271 {
1272 B3_PROFILE("update body velocities");
1273 b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel);
1274 launcher.setBuffer(bodiesGPU->getBufferCL());
1275 launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
1276 launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
1277 launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
1278 launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
1279 launcher.setConst(numBodies);
1280 launcher.launch1D(numBodies);
1281 clFinish(m_queue);
1282 }
1283
1284
1285 //easy
1286 for (int i=0;i<numBodiesCPU;i++)
1287 {
1288 if (bodiesCPU[i].m_invMass)
1289 {
1290 int bodyOffset = offsetSplitBodies[i];
1291 int count = bodyCount[i];
1292 if (count)
1293 {
1294 bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset];
1295 bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset];
1296 }
1297 }
1298 }
1299
1300
1301 // bodiesGPU->copyFromHost(bodiesCPU);
1302
1303
1304 }
1305 #endif
1306