1 // =============================================================================
2 // PROJECT CHRONO - http://projectchrono.org
3 //
4 // Copyright (c) 2014 projectchrono.org
5 // All rights reserved.
6 //
7 // Use of this source code is governed by a BSD-style license that can be found
8 // in the LICENSE file at the top level of the distribution and at
9 // http://projectchrono.org/license-chrono.txt.
10 //
11 // =============================================================================
12 // Author: Milad Rakhsha
13 // =============================================================================
14 #include <thrust/extrema.h>
15 #include <thrust/sort.h>
16 #include "chrono_fsi/physics/ChFsiForceIISPH.cuh"
17 #include "chrono_fsi/physics/ChSphGeneral.cuh"
18 #define RESOLUTION_LENGTH_MULT_IISPH 2.0
19
20 //==========================================================================================================================================
21 namespace chrono {
22 namespace fsi {
23 // double precision atomic add function
datomicAdd(double * address,double val)24 __device__ inline double datomicAdd(double* address, double val) {
25 unsigned long long int* address_as_ull = (unsigned long long int*)address;
26
27 unsigned long long int old = *address_as_ull, assumed;
28
29 do {
30 assumed = old;
31 old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
32 } while (assumed != old);
33
34 return __longlong_as_double(old);
35 }
36
ChFsiForceIISPH(std::shared_ptr<ChBce> otherBceWorker,std::shared_ptr<SphMarkerDataD> otherSortedSphMarkersD,std::shared_ptr<ProximityDataD> otherMarkersProximityD,std::shared_ptr<FsiGeneralData> otherFsiGeneralData,std::shared_ptr<SimParams> otherParamsH,std::shared_ptr<NumberOfObjects> otherNumObjects)37 ChFsiForceIISPH::ChFsiForceIISPH(std::shared_ptr<ChBce> otherBceWorker,
38 std::shared_ptr<SphMarkerDataD> otherSortedSphMarkersD,
39 std::shared_ptr<ProximityDataD> otherMarkersProximityD,
40 std::shared_ptr<FsiGeneralData> otherFsiGeneralData,
41 std::shared_ptr<SimParams> otherParamsH,
42 std::shared_ptr<NumberOfObjects> otherNumObjects)
43 : ChFsiForce(otherBceWorker,
44 otherSortedSphMarkersD,
45 otherMarkersProximityD,
46 otherFsiGeneralData,
47 otherParamsH,
48 otherNumObjects) {}
49 //--------------------------------------------------------------------------------------------------------------------------------
~ChFsiForceIISPH()50 ChFsiForceIISPH::~ChFsiForceIISPH() {}
51 //--------------------------------------------------------------------------------------------------------------------------------
Finalize()52 void ChFsiForceIISPH::Finalize() {
53 ChFsiForce::Finalize();
54 cudaMemcpyToSymbolAsync(paramsD, paramsH.get(), sizeof(SimParams));
55 cudaMemcpyToSymbolAsync(numObjectsD, numObjectsH.get(), sizeof(NumberOfObjects));
56 cudaMemcpyFromSymbol(paramsH.get(), paramsD, sizeof(SimParams));
57 cudaDeviceSynchronize();
58 }
59 //--------------------------------------------------------------------------------------------------------------------------------
V_i_np__AND__d_ii_kernel(Real4 * sortedPosRad,Real3 * sortedVelMas,Real4 * sortedRhoPreMu,Real3 * d_ii,Real3 * V_i_np,Real * sumWij_inv,Real * G_tensor,uint * cellStart,uint * cellEnd,Real delta_t,const size_t numAllMarkers,volatile bool * isErrorD)60 __global__ void V_i_np__AND__d_ii_kernel(Real4* sortedPosRad, // input: sorted positions
61 Real3* sortedVelMas,
62 Real4* sortedRhoPreMu,
63 Real3* d_ii,
64 Real3* V_i_np,
65 Real* sumWij_inv,
66 Real* G_tensor,
67 uint* cellStart,
68 uint* cellEnd,
69 Real delta_t,
70 const size_t numAllMarkers,
71 volatile bool* isErrorD) {
72 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
73 if (i_idx >= numAllMarkers || sortedRhoPreMu[i_idx].w <= -2) {
74 return;
75 }
76 // sortedRhoPreMu[i_idx].x = sortedRhoPreMu[i_idx].x / sumWij_inv[i_idx];
77 Real h_i = sortedPosRad[i_idx].w;
78 Real m_i = h_i * h_i * h_i * paramsD.rho0;
79
80 Real mu_0 = paramsD.mu0;
81 Real epsilon = paramsD.epsMinMarkersDis;
82 Real dT = delta_t;
83 Real3 source_term = paramsD.gravity + paramsD.bodyForce3;
84 Real RHO_0 = paramsD.rho0;
85 if (sortedRhoPreMu[i_idx].x < EPSILON) {
86 printf("density is %f,ref density= %f\n", sortedRhoPreMu[i_idx].x, RHO_0);
87 }
88
89 Real3 posi = mR3(sortedPosRad[i_idx]);
90 Real3 Veli = sortedVelMas[i_idx];
91 Real Rhoi = sortedRhoPreMu[i_idx].x;
92 Real3 My_d_ii = mR3(0);
93 Real3 My_F_i_np = mR3(0);
94
95 // get address in grid
96 int3 gridPos = calcGridPos(posi);
97
98 for (int z = -1; z <= 1; z++) {
99 for (int y = -1; y <= 1; y++) {
100 for (int x = -1; x <= 1; x++) {
101 int3 neighbourPos = gridPos + mI3(x, y, z);
102 uint gridHash = calcGridHash(neighbourPos);
103 // get start of bucket for this cell
104 uint startIndex = cellStart[gridHash];
105 if (startIndex != 0xffffffff) { // cell is not empty
106 uint endIndex = cellEnd[gridHash];
107 for (uint j = startIndex; j < endIndex; j++) {
108 Real3 posj = mR3(sortedPosRad[j]);
109 Real3 rij = Distance(posi, posj);
110 Real d = length(rij);
111
112 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
113 continue;
114 Real3 eij = rij / d;
115
116 Real3 Velj = sortedVelMas[j];
117 Real Rhoj = sortedRhoPreMu[j].x;
118 Real h_j = sortedPosRad[j].w;
119
120 if (Rhoj == 0) {
121 printf("Bug F_i_np__AND__d_ii_kernel i=%d j=%d, hi=%f, hj=%f\n", i_idx, j, h_i, h_j);
122 }
123
124 Real m_j = h_j * h_j * h_j * paramsD.rho0;
125 Real h_ij = 0.5 * (h_j + h_i);
126 Real3 grad_ij = GradWh(rij, h_ij);
127 My_d_ii += m_j * (-(dT * dT) / (Rhoi * Rhoi)) * grad_ij;
128
129 Real Rho_bar = (Rhoj + Rhoi) * 0.5;
130 Real3 V_ij = (Veli - Velj);
131 // Real nu = mu_0 * paramsD.HSML * 320 / Rho_bar;
132 // Real3 muNumerator = nu * fmin(0.0, dot(rij, V_ij)) * grad_ij;
133 Real3 muNumerator = 2 * mu_0 * dot(rij, grad_ij) * V_ij;
134 Real muDenominator = (Rho_bar * Rho_bar) * (d * d + h_ij * h_ij * epsilon);
135 // if ((sortedRhoPreMu[i_idx].w < 0 && sortedRhoPreMu[j].w < 0))
136 // if (sortedRhoPreMu[i_idx].w < 0 || (sortedRhoPreMu[i_idx].w >= 0 &&
137 // sortedRhoPreMu[j].w < 0))
138 My_F_i_np += m_j * muNumerator / muDenominator;
139
140 Real Wd = W3h(d, h_ij);
141 My_F_i_np -= paramsD.kappa / m_i * m_j * Wd * rij;
142 }
143 }
144 }
145 }
146 }
147
148 // if (!paramsD.Conservative_Form)
149 // My_F_i_np = mu_0 * LaplacainVi;
150
151 My_F_i_np *= m_i;
152
153 My_F_i_np += m_i * source_term;
154 d_ii[i_idx] = My_d_ii;
155 V_i_np[i_idx] = (My_F_i_np * dT + Veli); // This does not contain m_0?
156 }
157 //--------------------------------------------------------------------------------------------------------------------------------
158 //--------------------------------------------------------------------------------------------------------------------------------
Rho_np_AND_a_ii_AND_sum_m_GradW(Real4 * sortedPosRad,Real4 * sortedRhoPreMu,Real * rho_np,Real * a_ii,Real * p_old,Real3 * V_np,Real3 * d_ii,Real3 * sum_m_GradW,uint * cellStart,uint * cellEnd,Real delta_t,const size_t numAllMarkers,volatile bool * isErrorD)159 __global__ void Rho_np_AND_a_ii_AND_sum_m_GradW(Real4* sortedPosRad,
160 Real4* sortedRhoPreMu,
161 Real* rho_np, // Write
162 Real* a_ii, // Write
163 Real* p_old, // Write
164 Real3* V_np, // Read
165 Real3* d_ii, // Read
166 Real3* sum_m_GradW,
167 uint* cellStart,
168 uint* cellEnd,
169 Real delta_t,
170 const size_t numAllMarkers,
171 volatile bool* isErrorD) {
172 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
173 if (i_idx >= numAllMarkers || sortedRhoPreMu[i_idx].w <= -2) {
174 return;
175 }
176
177 Real h_i = sortedPosRad[i_idx].w;
178 Real m_i = h_i * h_i * h_i * paramsD.rho0;
179
180 Real3 posi = mR3(sortedPosRad[i_idx]);
181 Real3 Veli_np = V_np[i_idx];
182 Real Rho_i = sortedRhoPreMu[i_idx].x;
183 Real3 my_d_ii = d_ii[i_idx];
184 Real rho_temp = 0;
185 Real my_a_ii = 0;
186 Real3 My_sum_m_gradW = mR3(0);
187 Real dT = delta_t;
188 // get address in gridj
189 int3 gridPos = calcGridPos(posi);
190
191 //
192 // examine neighbouring cells
193 for (int z = -1; z <= 1; z++) {
194 for (int y = -1; y <= 1; y++) {
195 for (int x = -1; x <= 1; x++) {
196 int3 neighbourPos = gridPos + mI3(x, y, z);
197 uint gridHash = calcGridHash(neighbourPos);
198 // get start of bucket for this cell
199 uint startIndex = cellStart[gridHash];
200 if (startIndex != 0xffffffff) { // cell is not empty
201 // iterate over particles in this cell
202 uint endIndex = cellEnd[gridHash];
203
204 for (uint j = startIndex; j < endIndex; j++) {
205 Real3 posj = mR3(sortedPosRad[j]);
206 Real3 dist3 = Distance(posi, posj);
207 Real d = length(dist3);
208 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
209 continue;
210 Real h_j = sortedPosRad[j].w;
211 Real m_j = h_j * h_j * h_j * paramsD.rho0;
212 Real h_ij = 0.5 * (h_j + h_i);
213 Real3 Velj_np = V_np[j];
214 Real3 grad_i_wij = GradWh(dist3, h_ij);
215 rho_temp += m_j * dot((Veli_np - Velj_np), grad_i_wij);
216 Real3 d_ji = m_i * (-(dT * dT) / (Rho_i * Rho_i)) * (-grad_i_wij);
217 my_a_ii += m_j * dot((my_d_ii - d_ji), grad_i_wij);
218 My_sum_m_gradW += m_j * grad_i_wij;
219 }
220 }
221 }
222 }
223 }
224 rho_np[i_idx] = dT * rho_temp + sortedRhoPreMu[i_idx].x;
225
226 // Note: a_ii can become zero and when this can cause divide by 0 issues for free particles
227 a_ii[i_idx] = abs(my_a_ii) > EPSILON ? my_a_ii : 1.0;
228 sum_m_GradW[i_idx] = My_sum_m_gradW;
229
230 p_old[i_idx] = sortedRhoPreMu[i_idx].y; // = 1000; // Note that this is outside of the for loop
231 }
232 //--------------------------------------------------------------------------------------------------------------------------------
233
Calc_dij_pj(Real3 * dij_pj,Real3 * F_p,Real3 * d_ii,Real4 * sortedPosRad,Real3 * sortedVelMas,Real4 * sortedRhoPreMu,Real * p_old,uint * cellStart,uint * cellEnd,Real delta_t,const size_t numAllMarkers,volatile bool * isErrorD)234 __global__ void Calc_dij_pj(Real3* dij_pj, // write
235 Real3* F_p, // Write
236 Real3* d_ii, // Read
237 Real4* sortedPosRad,
238 Real3* sortedVelMas,
239 Real4* sortedRhoPreMu,
240 Real* p_old,
241 uint* cellStart,
242 uint* cellEnd,
243 Real delta_t,
244 const size_t numAllMarkers,
245 volatile bool* isErrorD) {
246 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
247 if (i_idx >= numAllMarkers || sortedRhoPreMu[i_idx].w <= -2) {
248 return;
249 }
250 Real h_i = sortedPosRad[i_idx].w;
251 Real m_i = h_i * h_i * h_i * paramsD.rho0;
252
253 Real3 my_F_p = mR3(0);
254 Real p_i_old = p_old[i_idx];
255 Real3 pos_i = mR3(sortedPosRad[i_idx]);
256 Real Rho_i = sortedRhoPreMu[i_idx].x;
257 if (sortedRhoPreMu[i_idx].x < EPSILON) {
258 printf("(Calc_dij_pj) My density is %f in Calc_dij_pj\n", sortedRhoPreMu[i_idx].x);
259 }
260 Real dT = delta_t;
261
262 Real3 My_dij_pj = mR3(0);
263 int3 gridPos = calcGridPos(pos_i);
264 for (int z = -1; z <= 1; z++) {
265 for (int y = -1; y <= 1; y++) {
266 for (int x = -1; x <= 1; x++) {
267 int3 neighbourPos = gridPos + mI3(x, y, z);
268 uint gridHash = calcGridHash(neighbourPos);
269 // get start of bucket for this cell
270 uint startIndex = cellStart[gridHash];
271 if (startIndex != 0xffffffff) { // cell is not empty
272 // iterate over particles in this cell
273 uint endIndex = cellEnd[gridHash];
274
275 for (uint j = startIndex; j < endIndex; j++) {
276 Real3 pos_j = mR3(sortedPosRad[j]);
277 Real3 dist3 = Distance(pos_i, pos_j);
278 Real d = length(dist3);
279 ////CHECK THIS CONDITION!!!
280 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
281 continue;
282 Real h_j = sortedPosRad[j].w;
283 Real m_j = h_j * h_j * h_j * paramsD.rho0;
284 Real h_ij = 0.5 * (h_j + h_i);
285 Real3 grad_i_wij = GradWh(dist3, h_ij);
286 Real Rho_j = sortedRhoPreMu[j].x;
287 Real p_j_old = p_old[j];
288 My_dij_pj += m_j * (-(dT * dT) / (Rho_j * Rho_j)) * grad_i_wij * p_j_old;
289 my_F_p += m_j * ((p_i_old / (Rho_i * Rho_i)) + (p_j_old / (Rho_j * Rho_j))) * grad_i_wij;
290 }
291 }
292 }
293 }
294 }
295 dij_pj[i_idx] = My_dij_pj;
296 F_p[i_idx] = -m_i * my_F_p;
297 }
298
299 //--------------------------------------------------------------------------------------------------------------------------------
CalcNumber_Contacts(uint * numContacts,Real4 * sortedPosRad,Real4 * sortedRhoPreMu,uint * cellStart,uint * cellEnd,const size_t numAllMarkers,volatile bool * isErrorD)300 __global__ void CalcNumber_Contacts(uint* numContacts,
301 Real4* sortedPosRad,
302 Real4* sortedRhoPreMu,
303 uint* cellStart,
304 uint* cellEnd,
305 const size_t numAllMarkers,
306 volatile bool* isErrorD) {
307 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
308 if (i_idx >= numAllMarkers) {
309 return;
310 }
311 if (sortedRhoPreMu[i_idx].w <= -2) {
312 numContacts[i_idx] = 1;
313 return;
314 }
315
316 Real h_i = sortedPosRad[i_idx].w;
317 // Real m_i = h_i * h_i * h_i * paramsD.rho0;
318
319 int myType = sortedRhoPreMu[i_idx].w;
320 Real3 pos_i = mR3(sortedPosRad[i_idx]);
321
322 uint numCol[400];
323 int counter = 1;
324 numCol[0] = i_idx; // The first one is always the idx of the marker itself
325
326 int3 gridPos = calcGridPos(pos_i);
327 for (int z = -1; z <= 1; z++) {
328 for (int y = -1; y <= 1; y++) {
329 for (int x = -1; x <= 1; x++) {
330 int3 neighbourPos = gridPos + mI3(x, y, z);
331 uint gridHash = calcGridHash(neighbourPos);
332 // get start of bucket for this cell
333 uint startIndex = cellStart[gridHash];
334 if (startIndex != 0xffffffff) {
335 // iterate over particles in this cell
336 uint endIndex = cellEnd[gridHash];
337 for (uint j = startIndex; j < endIndex; j++) {
338 Real3 pos_j = mR3(sortedPosRad[j]);
339 Real3 dist3 = Distance(pos_i, pos_j);
340 Real d = length(dist3);
341 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
342 continue;
343 bool AlreadyHave = false;
344 for (uint findCol = 1; findCol <= counter; findCol++) {
345 if (numCol[findCol] == j) {
346 AlreadyHave = true;
347 continue;
348 }
349 }
350
351 // Room for improvment ...
352 if (!AlreadyHave) {
353 numCol[counter] = j;
354 counter++;
355 // Do not count BCE-BCE interactions...
356 if (myType >= 0 && sortedRhoPreMu[j].w >= 0 && paramsD.bceType == BceVersion::ADAMI)
357 counter--;
358 }
359
360 if (myType != -1) // For BCE no need to go deeper than this...
361 continue;
362
363 Real h_j = sortedPosRad[j].w;
364 int3 gridPosJ = calcGridPos(pos_j);
365
366 for (int zz = -1; zz <= 1; zz++) {
367 for (int yy = -1; yy <= 1; yy++) {
368 for (int xx = -1; xx <= 1; xx++) {
369 int3 neighbourPosJ = gridPosJ + mI3(xx, yy, zz);
370 uint gridHashJ = calcGridHash(neighbourPosJ);
371 uint startIndexJ = cellStart[gridHashJ];
372 if (startIndexJ != 0xffffffff) { // cell is not empty
373 uint endIndexJ = cellEnd[gridHashJ];
374 for (uint k = startIndexJ; k < endIndexJ; k++) {
375 Real3 pos_k = mR3(sortedPosRad[k]);
376 Real3 dist3jk = Distance(pos_j, pos_k);
377 Real djk = length(dist3jk);
378 if (djk > RESOLUTION_LENGTH_MULT * h_j || k == j || k == i_idx ||
379 sortedRhoPreMu[k].w <= -2)
380 continue;
381 bool AlreadyHave2 = false;
382 for (uint findCol = 1; findCol <= counter; findCol++) {
383 if (numCol[findCol] == k) {
384 AlreadyHave2 = true;
385 continue;
386 }
387 }
388 if (!AlreadyHave2) {
389 numCol[counter] = k;
390 counter++;
391 }
392 }
393 }
394 }
395 }
396 }
397
398 }
399 }
400 }
401 }
402 }
403
404 numContacts[i_idx] = counter + 10;
405 }
406
407 //--------------------------------------------------------------------------------------------------------------------------------
Calc_summGradW(Real3 * summGradW,Real4 * sortedPosRad,Real4 * sortedRhoPreMu,uint * cellStart,uint * cellEnd,const size_t numAllMarkers,volatile bool * isErrorD)408 __global__ void Calc_summGradW(Real3* summGradW, // write
409 Real4* sortedPosRad,
410 Real4* sortedRhoPreMu,
411 uint* cellStart,
412 uint* cellEnd,
413 const size_t numAllMarkers,
414 volatile bool* isErrorD) {
415 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
416 if (i_idx >= numAllMarkers) {
417 return;
418 }
419 if (sortedRhoPreMu[i_idx].w <= -2) {
420 return;
421 }
422 Real h_i = sortedPosRad[i_idx].w;
423 // Real m_i = h_i * h_i * h_i * paramsD.rho0;
424
425 Real3 pos_i = mR3(sortedPosRad[i_idx]);
426 Real3 My_summgradW = mR3(0);
427 // Real dT = paramsD.dT;
428 int3 gridPos = calcGridPos(pos_i);
429 for (int z = -1; z <= 1; z++) {
430 for (int y = -1; y <= 1; y++) {
431 for (int x = -1; x <= 1; x++) {
432 int3 neighbourPos = gridPos + mI3(x, y, z);
433 uint gridHash = calcGridHash(neighbourPos);
434 // get start of bucket for this cell
435 uint startIndex = cellStart[gridHash];
436 if (startIndex != 0xffffffff) { // cell is not empty
437 // iterate over particles in this cell
438 uint endIndex = cellEnd[gridHash];
439
440 for (uint j = startIndex; j < endIndex; j++) {
441 Real3 pos_j = mR3(sortedPosRad[j]);
442 Real3 dist3 = Distance(pos_i, pos_j);
443 Real d = length(dist3);
444 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
445 continue;
446 Real h_j = sortedPosRad[j].w;
447 Real m_j = h_j * h_j * h_j * paramsD.rho0;
448 Real h_ij = 0.5 * (h_j + h_i);
449 Real3 grad_i_wij = GradWh(dist3, h_ij);
450 My_summgradW += m_j * grad_i_wij;
451 }
452 }
453 }
454 }
455 }
456 summGradW[i_idx] = My_summgradW;
457 }
458
459 //--------------------------------------------------------------------------------------------------------------------------------
Calc_BC_aij_Bi(const uint i_idx,Real * csrValA,uint * csrColIndA,unsigned long int * GlobalcsrColIndA,uint * numContacts,Real * a_ii,Real * B_i,Real4 * sortedPosRad,Real3 * sortedVelMas,const Real4 * sortedRhoPreMu,Real3 * V_new,Real * p_old,Real3 * Normals,Real * G_i,Real * sumWij_inv,Real4 * qD,Real3 * rigidSPH_MeshPos_LRF_D,Real3 * posRigid_fsiBodies_D,Real4 * velMassRigid_fsiBodies_D,Real3 * omegaVelLRF_fsiBodies_D,Real3 * accRigid_fsiBodies_D,Real3 * omegaAccLRF_fsiBodies_D,uint * rigidIdentifierD,Real3 * pos_fsi_fea_D,Real3 * vel_fsi_fea_D,Real3 * acc_fsi_fea_D,uint * FlexIdentifierD,const int numFlex1D,uint2 * CableElementsNodes,uint4 * ShellelementsNodes,int4 updatePortion,uint * gridMarkerIndexD,const uint * cellStart,const uint * cellEnd,const size_t numAllMarkers,bool IsSPARSE)460 __device__ void Calc_BC_aij_Bi(const uint i_idx,
461 Real* csrValA,
462 uint* csrColIndA,
463 unsigned long int* GlobalcsrColIndA,
464 uint* numContacts,
465 // The above 4 vectors are used for CSR form.
466 Real* a_ii, // write
467 Real* B_i,
468 Real4* sortedPosRad,
469 Real3* sortedVelMas,
470 const Real4* sortedRhoPreMu,
471 Real3* V_new,
472 Real* p_old,
473 Real3* Normals,
474 Real* G_i,
475 Real* sumWij_inv,
476
477 Real4* qD,
478 Real3* rigidSPH_MeshPos_LRF_D,
479 Real3* posRigid_fsiBodies_D,
480 Real4* velMassRigid_fsiBodies_D,
481 Real3* omegaVelLRF_fsiBodies_D,
482 Real3* accRigid_fsiBodies_D,
483 Real3* omegaAccLRF_fsiBodies_D,
484 uint* rigidIdentifierD,
485
486 Real3* pos_fsi_fea_D,
487 Real3* vel_fsi_fea_D,
488 Real3* acc_fsi_fea_D,
489 uint* FlexIdentifierD,
490 const int numFlex1D,
491 uint2* CableElementsNodes,
492 uint4* ShellelementsNodes,
493
494 int4 updatePortion,
495 uint* gridMarkerIndexD,
496 const uint* cellStart,
497 const uint* cellEnd,
498 const size_t numAllMarkers,
499 bool IsSPARSE) {
500 uint csrStartIdx = numContacts[i_idx] + 1;
501 uint csrEndIdx = numContacts[i_idx + 1];
502
503 Real h_i = sortedPosRad[i_idx].w;
504 // Real m_i = h_i * h_i * h_i * paramsD.rho0;
505 Real3 my_normal = Normals[i_idx];
506
507 Real3 source_term = paramsD.gravity + paramsD.bodyForce3;
508 // if (bceIndex >= numObjectsD.numRigid_SphMarkers) {
509 // return;
510 // }
511
512 // int Original_idx = gridMarkerIndexD[i_idx];
513 Real3 myAcc = mR3(0.0);
514 Real3 V_prescribed = mR3(0.0);
515
516 // if (!(sortedRhoPreMu[i_idx].w >= 0 && sortedRhoPreMu[i_idx].w <= 3))
517 // printf("type of marker is %f\n", sortedRhoPreMu[i_idx].w);
518
519 BCE_Vel_Acc(i_idx, myAcc, V_prescribed, sortedPosRad, updatePortion, gridMarkerIndexD, qD, rigidSPH_MeshPos_LRF_D,
520 posRigid_fsiBodies_D, velMassRigid_fsiBodies_D, omegaVelLRF_fsiBodies_D, accRigid_fsiBodies_D,
521 omegaAccLRF_fsiBodies_D, rigidIdentifierD, pos_fsi_fea_D, vel_fsi_fea_D, acc_fsi_fea_D, FlexIdentifierD,
522 numFlex1D, CableElementsNodes, ShellelementsNodes);
523
524 for (int c = csrStartIdx; c < csrEndIdx; c++) {
525 csrValA[c] = 0;
526 csrColIndA[c] = i_idx;
527 GlobalcsrColIndA[c] = i_idx + numAllMarkers * i_idx;
528 }
529
530 // if ((csrEndIdx - csrStartIdx) != uint(0)) {
531 Real3 numeratorv = mR3(0);
532 Real denumenator = 0;
533 Real pRHS = 0;
534 // Real Rho_i = sortedRhoPreMu[i_idx].x;
535 Real3 pos_i = mR3(sortedPosRad[i_idx]);
536 // get address in grid
537 int3 gridPos = calcGridPos(pos_i);
538
539 uint counter = 0;
540 for (int z = -1; z <= 1; z++) {
541 for (int y = -1; y <= 1; y++) {
542 for (int x = -1; x <= 1; x++) {
543 int3 neighbourPos = gridPos + mI3(x, y, z);
544 uint gridHash = calcGridHash(neighbourPos);
545 // get start of bucket for this cell
546 uint startIndex = cellStart[gridHash];
547 if (startIndex != 0xffffffff) { // cell is not empty
548 uint endIndex = cellEnd[gridHash];
549 for (uint j = startIndex; j < endIndex; j++) {
550 Real3 pos_j = mR3(sortedPosRad[j]);
551 Real3 dist3 = Distance(pos_i, pos_j);
552 Real d = length(dist3);
553
554 if (d > RESOLUTION_LENGTH_MULT * h_i || j == i_idx)
555 continue;
556
557 Real h_j = sortedPosRad[j].w;
558 // Real m_j = h_j * h_j * h_j * paramsD.rho0;
559 // Real rhoj = sortedRhoPreMu[j].x;
560 Real h_ij = 0.5 * (h_j + h_i);
561 Real Wd = W3h(d, h_ij);
562 Real3 Vel_j = sortedVelMas[j];
563
564 if (paramsD.bceType != BceVersion::ADAMI) {
565 if (sortedRhoPreMu[j].w == -1.0 || dot(my_normal, mR3(pos_i - pos_j)) > 0) {
566 Real3 grad_i_wij = GradWh(dist3, h_ij);
567 csrValA[csrStartIdx - 1] += dot(grad_i_wij, my_normal);
568 csrValA[counter + csrStartIdx] = -dot(grad_i_wij, my_normal);
569 csrColIndA[counter + csrStartIdx] = j;
570 GlobalcsrColIndA[counter + csrStartIdx] = j + numAllMarkers * i_idx;
571 counter++;
572 if (sortedRhoPreMu[j].w != -1)
573 continue;
574 numeratorv += Vel_j * Wd;
575 denumenator += Wd;
576 }
577 } else {
578 if (sortedRhoPreMu[j].w != -1 || sortedRhoPreMu[j].w <= -2)
579 continue;
580 numeratorv += Vel_j * Wd;
581 denumenator += Wd;
582 pRHS += dot(source_term - myAcc, dist3) * sortedRhoPreMu[j].x * Wd;
583 csrValA[counter + csrStartIdx] = -Wd;
584 csrColIndA[counter + csrStartIdx] = j;
585 GlobalcsrColIndA[counter + csrStartIdx] = j + numAllMarkers * i_idx;
586 counter++;
587 }
588 }
589 }
590 }
591 }
592 }
593
594 if (abs(denumenator) < EPSILON) {
595 V_new[i_idx] = 2 * V_prescribed;
596 B_i[i_idx] = 0;
597 if (paramsD.bceType == BceVersion::ADAMI) {
598 csrValA[csrStartIdx - 1] = a_ii[i_idx];
599 csrColIndA[csrStartIdx - 1] = i_idx;
600 GlobalcsrColIndA[csrStartIdx - 1] = i_idx + numAllMarkers * i_idx;
601 }
602 } else {
603 Real Scaling = a_ii[i_idx] / denumenator;
604 V_new[i_idx] = 2 * V_prescribed - numeratorv / denumenator;
605
606 if (paramsD.bceType == BceVersion::ADAMI) {
607 B_i[i_idx] = pRHS;
608 csrValA[csrStartIdx - 1] = denumenator;
609 csrColIndA[csrStartIdx - 1] = i_idx;
610 GlobalcsrColIndA[csrStartIdx - 1] = i_idx + numAllMarkers * i_idx;
611
612 for (int i = csrStartIdx - 1; i < csrEndIdx; i++)
613 csrValA[i] *= Scaling;
614 B_i[i_idx] *= Scaling;
615 }
616 }
617
618 if (paramsD.bceType != BceVersion::ADAMI) {
619 Real Scaling = a_ii[i_idx];
620 if (abs(csrValA[csrStartIdx - 1]) > EPSILON) {
621 Scaling = a_ii[i_idx]; // csrValA[csrStartIdx - 1];
622 for (int count = csrStartIdx - 1; count < csrEndIdx; count++)
623 csrValA[count] *= Scaling;
624 } else {
625 clearRow(i_idx, csrStartIdx - 1, csrEndIdx, csrValA, B_i);
626 for (int count = csrStartIdx - 1; count < csrEndIdx; count++) {
627 int j = csrColIndA[counter];
628 Real3 pos_j = mR3(sortedPosRad[j]);
629 Real3 dist3 = Distance(pos_i, pos_j);
630 Real d = length(dist3);
631 if (d > RESOLUTION_LENGTH_MULT * h_i || j == i_idx) {
632 csrValA[count] = 0.0;
633 continue;
634 }
635 Real h_j = sortedPosRad[j].w;
636 Real h_ij = 0.5 * (h_j + h_i);
637 Real Wd = W3h(d, h_ij);
638 csrValA[count] = sumWij_inv[j] * Wd * Scaling;
639 }
640 csrValA[csrStartIdx - 1] -= 1.0 * Scaling;
641 }
642
643 B_i[i_idx] = 0.0 * Scaling;
644 }
645
646 sortedVelMas[i_idx] = V_new[i_idx];
647 } // namespace fsi
648 //--------------------------------------------------------------------------------------------------------------------------------
649
650 //--------------------------------------------------------------------------------------------------------------------------------
Calc_fluid_aij_Bi(const uint i_idx,Real * csrValA,uint * csrColIndA,unsigned long int * GlobalcsrColIndA,uint * numContacts,Real * B_i,Real3 * d_ii,Real * a_ii,Real * rho_np,Real3 * summGradW,Real4 * sortedPosRad,Real4 * sortedRhoPreMu,uint * cellStart,uint * cellEnd,Real delta_t,const int numAllMarkers,bool IsSPARSE)651 __device__ void Calc_fluid_aij_Bi(const uint i_idx,
652 Real* csrValA,
653 uint* csrColIndA,
654 unsigned long int* GlobalcsrColIndA,
655 uint* numContacts,
656 // The above 4 vectors are used for CSR form.
657 Real* B_i,
658 Real3* d_ii, // Read
659 Real* a_ii, // Read
660 Real* rho_np, // Read
661 Real3* summGradW,
662 Real4* sortedPosRad,
663 Real4* sortedRhoPreMu,
664 uint* cellStart,
665 uint* cellEnd,
666 Real delta_t,
667 const int numAllMarkers,
668 bool IsSPARSE) {
669 Real3 pos_i = mR3(sortedPosRad[i_idx]);
670 Real dT = delta_t;
671
672 int counter = 0; // There is always one non-zero at each row- The marker itself
673 B_i[i_idx] = paramsD.rho0 - rho_np[i_idx];
674
675 uint csrStartIdx = numContacts[i_idx] + 1; // Reserve the starting index for the A_ii
676 uint csrEndIdx = numContacts[i_idx + 1];
677
678 Real h_i = sortedPosRad[i_idx].w;
679 // Real m_i = h_i * h_i * h_i * paramsD.rho0;
680
681 // for (int c = csrStartIdx; c < csrEndIdx; c++) {
682 // csrValA[c] = a_ii[i_idx];
683 // csrColIndA[c] = i_idx;
684 // GlobalcsrColIndA[c] = i_idx + numAllMarkers * i_idx;
685 // }
686
687 int3 gridPos = calcGridPos(pos_i);
688 for (int z = -1; z <= 1; z++) {
689 for (int y = -1; y <= 1; y++) {
690 for (int x = -1; x <= 1; x++) {
691 int3 neighbourPos = gridPos + mI3(x, y, z);
692 uint gridHash = calcGridHash(neighbourPos);
693 // get start of bucket for this cell
694 uint startIndex = cellStart[gridHash];
695 if (startIndex != 0xffffffff) { // cell is not empty
696 // iterate over particles in this cell
697 uint endIndex = cellEnd[gridHash];
698 // Real Rho_i = sortedRhoPreMu[i_idx].x;
699
700 for (uint j = startIndex; j < endIndex; j++) {
701 Real3 pos_j = mR3(sortedPosRad[j]);
702 Real3 dist3 = Distance(pos_i, pos_j);
703 Real d = length(dist3);
704 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
705 continue;
706 Real h_j = sortedPosRad[j].w;
707 Real h_ij = 0.5 * (h_j + h_i);
708 Real3 grad_i_wij = GradWh(dist3, h_ij);
709 Real Rho_j = sortedRhoPreMu[j].x;
710 Real m_j = h_j * h_j * h_j * paramsD.rho0;
711 Real3 d_it = m_j * (-(dT * dT) / (Rho_j * Rho_j)) * grad_i_wij;
712 Real My_a_ij_1 = m_j * dot(d_it, summGradW[i_idx]);
713 Real My_a_ij_2 = m_j * dot(d_ii[j], grad_i_wij);
714 Real My_a_ij_12 = My_a_ij_1 - My_a_ij_2;
715 bool DONE1 = false;
716
717 for (uint findCol = csrStartIdx; findCol < csrEndIdx; findCol++) {
718 if (csrColIndA[findCol] == j) {
719 csrValA[findCol] += My_a_ij_12;
720 csrColIndA[findCol] = j;
721 GlobalcsrColIndA[findCol] = j + numAllMarkers * i_idx;
722 DONE1 = true;
723 continue;
724 }
725 }
726 if (!DONE1) {
727 csrValA[counter + csrStartIdx] += My_a_ij_12;
728 csrColIndA[counter + csrStartIdx] = j;
729 GlobalcsrColIndA[counter + csrStartIdx] = j + numAllMarkers * i_idx;
730 counter++;
731 }
732 int3 gridPosJ = calcGridPos(pos_j);
733 for (int zz = -1; zz <= 1; zz++) {
734 for (int yy = -1; yy <= 1; yy++) {
735 for (int xx = -1; xx <= 1; xx++) {
736 int3 neighbourPosJ = gridPosJ + mI3(xx, yy, zz);
737 uint gridHashJ = calcGridHash(neighbourPosJ);
738 uint startIndexJ = cellStart[gridHashJ];
739 if (startIndexJ != 0xffffffff) { // cell is not empty
740 uint endIndexJ = cellEnd[gridHashJ];
741 for (uint k = startIndexJ; k < endIndexJ; k++) {
742 Real3 pos_k = mR3(sortedPosRad[k]);
743 Real3 dist3jk = Distance(pos_j, pos_k);
744 Real djk = length(dist3jk);
745 if (djk > RESOLUTION_LENGTH_MULT_IISPH * h_j || k == j || k == i_idx ||
746 sortedRhoPreMu[k].w <= -2)
747 continue;
748 Real h_k = sortedPosRad[j].w;
749 Real h_jk = 0.5 * (h_j + h_k);
750 Real3 grad_j_wjk = GradWh(dist3jk, h_jk);
751 Real m_k = cube(sortedPosRad[k].w) * paramsD.rho0;
752 Real Rho_k = sortedRhoPreMu[k].x;
753 Real3 d_jk = m_k * (-(dT * dT) / (Rho_k * Rho_k)) * grad_j_wjk;
754 Real My_a_ij_3 = m_j * dot(d_jk, grad_i_wij);
755 bool DONE2 = false;
756
757 for (uint findCol = csrStartIdx; findCol < csrEndIdx; findCol++) {
758 if (csrColIndA[findCol] == k) {
759 csrValA[findCol] -= My_a_ij_3;
760 csrColIndA[findCol] = k;
761 GlobalcsrColIndA[findCol] = k + numAllMarkers * i_idx;
762 DONE2 = true;
763 continue;
764 }
765 }
766 if (!DONE2) {
767 csrValA[counter + csrStartIdx] -= My_a_ij_3;
768 csrColIndA[counter + csrStartIdx] = k;
769 GlobalcsrColIndA[counter + csrStartIdx] = k + numAllMarkers * i_idx;
770 counter++;
771 }
772 }
773 }
774 }
775 }
776 }
777
778 }
779 }
780 }
781 }
782 }
783
784 for (int myIdx = csrStartIdx; myIdx < csrEndIdx; myIdx++) {
785 if (csrColIndA[myIdx] == i_idx)
786 csrValA[myIdx] = a_ii[i_idx];
787 }
788
789 csrValA[csrStartIdx - 1] = a_ii[i_idx];
790 csrColIndA[csrStartIdx - 1] = i_idx;
791 GlobalcsrColIndA[csrStartIdx - 1] = i_idx + numAllMarkers * i_idx;
792
793 if (sortedRhoPreMu[i_idx].x < 0.999 * paramsD.rho0) {
794 csrValA[csrStartIdx - 1] = a_ii[i_idx];
795 for (int myIdx = csrStartIdx; myIdx < csrEndIdx; myIdx++) {
796 csrValA[myIdx] = 0.0;
797 B_i[i_idx] = 0.0;
798 }
799 }
800
801 Real RHS = B_i[i_idx];
802 B_i[i_idx] = RHS; // fminf(0.0, RHS);
803 }
804
805 //--------------------------------------------------------------------------------------------------------------------------------
FormAXB(Real * csrValA,uint * csrColIndA,unsigned long int * GlobalcsrColIndA,uint * numContacts,Real * a_ij,Real * B_i,Real3 * d_ii,Real * a_ii,Real3 * summGradW,Real4 * sortedPosRad,Real3 * sortedVelMas,Real4 * sortedRhoPreMu,Real3 * V_new,Real * p_old,Real3 * Normals,Real * G_i,Real * sumWij_inv,Real * rho_np,Real4 * qD,Real3 * rigidSPH_MeshPos_LRF_D,Real3 * posRigid_fsiBodies_D,Real4 * velMassRigid_fsiBodies_D,Real3 * omegaVelLRF_fsiBodies_D,Real3 * accRigid_fsiBodies_D,Real3 * omegaAccLRF_fsiBodies_D,uint * rigidIdentifierD,Real3 * pos_fsi_fea_D,Real3 * vel_fsi_fea_D,Real3 * acc_fsi_fea_D,uint * FlexIdentifierD,const int numFlex1D,uint2 * CableElementsNodes,uint4 * ShellelementsNodes,int4 updatePortion,uint * gridMarkerIndexD,uint * cellStart,uint * cellEnd,Real delta_t,const size_t numAllMarkers,bool IsSPARSE,volatile bool * isError)806 __global__ void FormAXB(Real* csrValA,
807 uint* csrColIndA,
808 unsigned long int* GlobalcsrColIndA,
809
810 uint* numContacts,
811 // The above 4 vectors are used for CSR form.
812 Real* a_ij, // write
813 Real* B_i, // write
814 Real3* d_ii, // Read
815 Real* a_ii, // Read
816 Real3* summGradW,
817 Real4* sortedPosRad,
818 Real3* sortedVelMas,
819 Real4* sortedRhoPreMu,
820 Real3* V_new,
821 Real* p_old,
822 Real3* Normals,
823 Real* G_i,
824 Real* sumWij_inv,
825 Real* rho_np,
826
827 Real4* qD,
828 Real3* rigidSPH_MeshPos_LRF_D,
829 Real3* posRigid_fsiBodies_D,
830 Real4* velMassRigid_fsiBodies_D,
831 Real3* omegaVelLRF_fsiBodies_D,
832 Real3* accRigid_fsiBodies_D,
833 Real3* omegaAccLRF_fsiBodies_D,
834 uint* rigidIdentifierD,
835
836 Real3* pos_fsi_fea_D,
837 Real3* vel_fsi_fea_D,
838 Real3* acc_fsi_fea_D,
839 uint* FlexIdentifierD,
840 const int numFlex1D,
841 uint2* CableElementsNodes,
842 uint4* ShellelementsNodes,
843
844 int4 updatePortion,
845 uint* gridMarkerIndexD,
846 uint* cellStart,
847 uint* cellEnd,
848 Real delta_t,
849 const size_t numAllMarkers,
850 bool IsSPARSE,
851 volatile bool* isError) {
852 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
853 if (i_idx >= numAllMarkers) {
854 return;
855 }
856
857 // Real m_0 = paramsD.markerMass;
858 // Real RHO_0 = paramsD.rho0;
859 // Real dT = paramsD.dT;
860 // Real3 gravity = paramsD.gravity;
861
862 int TYPE_OF_NARKER = sortedRhoPreMu[i_idx].w;
863
864 if (TYPE_OF_NARKER <= -2) {
865 B_i[i_idx] = 0;
866 uint csrStartIdx = numContacts[i_idx];
867 // This needs to be check to see if it messes up the condition number of the matrix
868 csrValA[csrStartIdx] = 1.0;
869 csrColIndA[csrStartIdx] = i_idx;
870 GlobalcsrColIndA[csrStartIdx] = i_idx + numAllMarkers * i_idx;
871 } else if (TYPE_OF_NARKER == -1) {
872 Calc_fluid_aij_Bi(i_idx, csrValA, csrColIndA, GlobalcsrColIndA, numContacts, B_i, d_ii, a_ii, rho_np, summGradW,
873 sortedPosRad, sortedRhoPreMu, cellStart, cellEnd, delta_t, numAllMarkers, true);
874
875 } else if (TYPE_OF_NARKER > -1)
876 Calc_BC_aij_Bi(i_idx, csrValA, csrColIndA, GlobalcsrColIndA, numContacts, a_ii, B_i, sortedPosRad, sortedVelMas,
877
878 sortedRhoPreMu, V_new, p_old, Normals, G_i, sumWij_inv,
879
880 qD, rigidSPH_MeshPos_LRF_D, posRigid_fsiBodies_D, velMassRigid_fsiBodies_D,
881 omegaVelLRF_fsiBodies_D, accRigid_fsiBodies_D, omegaAccLRF_fsiBodies_D, rigidIdentifierD,
882
883 pos_fsi_fea_D, vel_fsi_fea_D, acc_fsi_fea_D, FlexIdentifierD, numFlex1D, CableElementsNodes,
884 ShellelementsNodes,
885
886 updatePortion, gridMarkerIndexD, cellStart, cellEnd, numAllMarkers, true);
887 }
888
889 //--------------------------------------------------------------------------------------------------------------------------------
890 //--------------------------------------------------------------------------------------------------------------------------------
Calc_Pressure_AXB_USING_CSR(Real * csrValA,Real * a_ii,uint * csrColIndA,uint * numContacts,Real4 * sortedRhoPreMu,Real * sumWij_inv,Real3 * sortedVelMas,Real3 * V_new,Real * p_old,Real * B_i,Real * Residuals,const size_t numAllMarkers,volatile bool * isErrorD)891 __global__ void Calc_Pressure_AXB_USING_CSR(Real* csrValA,
892 Real* a_ii,
893 uint* csrColIndA,
894 uint* numContacts,
895 Real4* sortedRhoPreMu,
896 Real* sumWij_inv,
897 Real3* sortedVelMas,
898 Real3* V_new,
899 Real* p_old,
900 Real* B_i, // Read
901 Real* Residuals,
902 const size_t numAllMarkers,
903 volatile bool* isErrorD) {
904 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
905 if (i_idx >= numAllMarkers) {
906 return;
907 }
908 if (sortedRhoPreMu[i_idx].w <= -2) {
909 return;
910 }
911
912 // Real RHO_0 = paramsD.rho0;
913 // bool ClampPressure = paramsD.ClampPressure;
914 // Real Max_Pressure = paramsD.Max_Pressure;
915 uint startIdx = numContacts[i_idx] + 1; // numContacts[i_idx] is the diagonal itself
916 uint endIdx = numContacts[i_idx + 1];
917
918 Real aij_pj = 0;
919 // Real error = aij_pj + sortedRhoPreMu[i_idx].y * csrValA[startIdx - 1] - B_i[i_idx];
920
921 for (int myIdx = startIdx; myIdx < endIdx; myIdx++) {
922 if (csrColIndA[myIdx] != i_idx)
923 aij_pj += csrValA[myIdx] * p_old[csrColIndA[myIdx]];
924 }
925 Real RHS = B_i[i_idx];
926 Residuals[i_idx] = abs(RHS - aij_pj - p_old[i_idx] * csrValA[startIdx - 1]);
927 sortedRhoPreMu[i_idx].y = (RHS - aij_pj) / csrValA[startIdx - 1];
928
929 // if (paramsD.ClampPressure && sortedRhoPreMu[i_idx].y < 0)
930 // sortedRhoPreMu[i_idx].y = 0;
931 if (!isfinite(aij_pj)) {
932 printf("a_ij *p_j became Nan in Calc_Pressure_AXB_USING_CSR ");
933 }
934 }
935
936 //--------------------------------------------------------------------------------------------------------------------------------
Calc_Pressure(Real * a_ii,Real3 * d_ii,Real3 * dij_pj,Real * rho_np,Real * rho_p,Real * Residuals,Real3 * F_p,Real4 * sortedPosRad,Real3 * sortedVelMas,Real4 * sortedRhoPreMu,Real4 * qD,Real3 * rigidSPH_MeshPos_LRF_D,Real3 * posRigid_fsiBodies_D,Real4 * velMassRigid_fsiBodies_D,Real3 * omegaVelLRF_fsiBodies_D,Real3 * accRigid_fsiBodies_D,Real3 * omegaAccLRF_fsiBodies_D,uint * rigidIdentifierD,Real3 * pos_fsi_fea_D,Real3 * vel_fsi_fea_D,Real3 * acc_fsi_fea_D,uint * FlexIdentifierD,const int numFlex1D,uint2 * CableElementsNodes,uint4 * ShellelementsNodes,int4 updatePortion,uint * gridMarkerIndexD,Real * p_old,Real3 * V_new,uint * cellStart,uint * cellEnd,Real delta_t,const size_t numAllMarkers,volatile bool * isErrorD)937 __global__ void Calc_Pressure(Real* a_ii, // Read
938 Real3* d_ii, // Read
939 Real3* dij_pj, // Read
940 Real* rho_np, // Read
941 Real* rho_p, // Write
942 Real* Residuals,
943 Real3* F_p,
944 Real4* sortedPosRad,
945 Real3* sortedVelMas,
946 Real4* sortedRhoPreMu,
947
948 Real4* qD,
949 Real3* rigidSPH_MeshPos_LRF_D,
950 Real3* posRigid_fsiBodies_D,
951 Real4* velMassRigid_fsiBodies_D,
952 Real3* omegaVelLRF_fsiBodies_D,
953 Real3* accRigid_fsiBodies_D,
954 Real3* omegaAccLRF_fsiBodies_D,
955 uint* rigidIdentifierD,
956
957 Real3* pos_fsi_fea_D,
958 Real3* vel_fsi_fea_D,
959 Real3* acc_fsi_fea_D,
960 uint* FlexIdentifierD,
961 const int numFlex1D,
962 uint2* CableElementsNodes,
963 uint4* ShellelementsNodes,
964
965 int4 updatePortion,
966 uint* gridMarkerIndexD,
967
968 Real* p_old,
969 Real3* V_new,
970 uint* cellStart,
971 uint* cellEnd,
972 Real delta_t,
973
974 const size_t numAllMarkers,
975 volatile bool* isErrorD) {
976 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
977 if (i_idx >= numAllMarkers) {
978 return;
979 }
980 if (sortedRhoPreMu[i_idx].w <= -2) {
981 return;
982 }
983
984 Real h_i = sortedPosRad[i_idx].w;
985 Real m_i = h_i * h_i * h_i * paramsD.rho0;
986
987 Real RHO_0 = paramsD.rho0;
988 Real dT = delta_t;
989 Real3 source_term = paramsD.gravity + paramsD.bodyForce3;
990
991 if (sortedRhoPreMu[i_idx].x < EPSILON) {
992 printf("(Calc_Pressure)My density is %f in Calc_Pressure\n", sortedRhoPreMu[i_idx].x);
993 }
994 int myType = sortedRhoPreMu[i_idx].w;
995 Real Rho_i = sortedRhoPreMu[i_idx].x;
996 Real p_i = p_old[i_idx];
997 Real3 pos_i = mR3(sortedPosRad[i_idx]);
998 Real p_new = 0;
999 Real my_rho_p = 0;
1000 Real3 F_i_p = F_p[i_idx];
1001
1002 if (myType == -1) {
1003 if (Rho_i < 0.999 * RHO_0) {
1004 p_new = 0;
1005 Residuals[i_idx] = 0;
1006 } else {
1007 Real3 my_dij_pj = dij_pj[i_idx];
1008 Real sum_dij_pj = 0; // This is the first summation term in the expression for the pressure.
1009 Real sum_djj_pj = 0; // This is the second summation term in the expression for the pressure.
1010 Real sum_djk_pk = 0; // This is the last summation term in the expression for the pressure.
1011 int3 gridPosI = calcGridPos(pos_i);
1012 for (int z = -1; z <= 1; z++) {
1013 for (int y = -1; y <= 1; y++) {
1014 for (int x = -1; x <= 1; x++) {
1015 int3 neighbourPosI = gridPosI + mI3(x, y, z);
1016 uint gridHashI = calcGridHash(neighbourPosI);
1017 // get start of bucket for this cell
1018 uint startIndexI = cellStart[gridHashI];
1019 if (startIndexI != 0xffffffff) {
1020 uint endIndexI = cellEnd[gridHashI];
1021 for (uint j = startIndexI; j < endIndexI; j++) {
1022 Real3 pos_j = mR3(sortedPosRad[j]);
1023 Real3 dist3ij = Distance(pos_i, pos_j);
1024 Real dij = length(dist3ij);
1025 if (dij > RESOLUTION_LENGTH_MULT * paramsD.HSML || i_idx == j ||
1026 sortedRhoPreMu[j].w <= -2)
1027 continue;
1028 // Real Rho_j = sortedRhoPreMu[j].x;
1029 Real p_j_old = p_old[j];
1030 Real h_j = sortedPosRad[j].w;
1031 Real m_j = h_j * h_j * h_j * paramsD.rho0;
1032
1033 Real3 djj = d_ii[j];
1034 Real3 F_j_p = F_p[j];
1035
1036 Real h_ij = 0.5 * (h_j + h_i);
1037 Real3 grad_i_wij = GradWh(dist3ij, h_ij);
1038 Real3 d_ji = m_i * (-(dT * dT) / (Rho_i * Rho_i)) * (-grad_i_wij);
1039 Real3 djk_pk = dij_pj[j] - d_ji * p_i;
1040 sum_dij_pj += m_j * dot(my_dij_pj, grad_i_wij);
1041 sum_djj_pj += m_j * dot(djj, grad_i_wij) * p_j_old;
1042 sum_djk_pk += m_j * dot(djk_pk, grad_i_wij);
1043 my_rho_p += (dT * dT) * m_j * dot((F_i_p / m_i - F_j_p / m_j), grad_i_wij);
1044 }
1045 }
1046 }
1047 }
1048 }
1049
1050 // Real RHS = fminf(0.0, RHO_0 - rho_np[i_idx]);
1051 Real RHS = RHO_0 - rho_np[i_idx];
1052 Real aij_pj = +sum_dij_pj - sum_djj_pj - sum_djk_pk;
1053 p_new = (RHS - aij_pj) / a_ii[i_idx];
1054 Residuals[i_idx] = abs(RHS - aij_pj - p_old[i_idx] * a_ii[i_idx]);
1055 // sortedRhoPreMu[i_idx].x = aij_pj + p_new * a_ii[i_idx] + RHO_0 - RHS;
1056 }
1057 } else { // Do Adami BC
1058
1059 Real3 myAcc = mR3(0);
1060 Real3 V_prescribed = mR3(0);
1061 BCE_Vel_Acc(i_idx, myAcc, V_prescribed, sortedPosRad, updatePortion, gridMarkerIndexD, qD,
1062 rigidSPH_MeshPos_LRF_D, posRigid_fsiBodies_D, velMassRigid_fsiBodies_D, omegaVelLRF_fsiBodies_D,
1063 accRigid_fsiBodies_D, omegaAccLRF_fsiBodies_D, rigidIdentifierD, pos_fsi_fea_D, vel_fsi_fea_D,
1064 acc_fsi_fea_D, FlexIdentifierD, numFlex1D, CableElementsNodes, ShellelementsNodes);
1065
1066 Real3 numeratorv = mR3(0);
1067 Real denumenator = 0;
1068 Real numeratorp = 0;
1069 Real3 Vel_i;
1070
1071 // get address in grid
1072 int3 gridPos = calcGridPos(pos_i);
1073 for (int z = -1; z <= 1; z++) {
1074 for (int y = -1; y <= 1; y++) {
1075 for (int x = -1; x <= 1; x++) {
1076 int3 neighbourPos = gridPos + mI3(x, y, z);
1077 uint gridHash = calcGridHash(neighbourPos);
1078 // get start of bucket for this cell
1079 uint startIndex = cellStart[gridHash];
1080 if (startIndex != 0xffffffff) { // cell is not empty
1081 uint endIndex = cellEnd[gridHash];
1082 for (uint j = startIndex; j < endIndex; j++) {
1083 Real3 pos_j = mR3(sortedPosRad[j]);
1084 Real3 dist3 = Distance(pos_i, pos_j);
1085 Real d = length(dist3);
1086 if (d > RESOLUTION_LENGTH_MULT * paramsD.HSML || sortedRhoPreMu[j].w != -1)
1087 continue;
1088 // OLD VELOCITY IS SHOULD BE OBDATED NOT THE NEW ONE!!!!!
1089 Real3 Vel_j = sortedVelMas[j];
1090 Real p_j = p_old[j];
1091 Real3 F_j_p = F_p[j];
1092 Real h_j = sortedPosRad[j].w;
1093 Real m_j = h_j * h_j * h_j * paramsD.rho0;
1094 // Real rhoj = sortedRhoPreMu[j].x;
1095
1096 Real h_ij = 0.5 * (h_j + h_i);
1097 Real Wd = W3h(d, h_ij);
1098 numeratorv += Vel_j * Wd;
1099 numeratorp += p_j * Wd + dot(source_term - myAcc, dist3) * sortedRhoPreMu[j].x * Wd;
1100 denumenator += Wd;
1101 Real3 TobeUsed = (F_i_p / m_i - F_j_p / m_j);
1102 my_rho_p += (dT * dT) * m_j * dot(TobeUsed, GradWh(dist3, h_ij));
1103
1104 if (isnan(numeratorp))
1105 printf("Something is wrong here..., %f\n", numeratorp);
1106 }
1107 }
1108 }
1109 }
1110 }
1111 if (abs(denumenator) < EPSILON) {
1112 p_new = 0;
1113 Vel_i = 2 * V_prescribed;
1114 } else {
1115 Vel_i = 2 * V_prescribed - numeratorv / denumenator;
1116 p_new = numeratorp / denumenator;
1117 }
1118
1119 Residuals[i_idx] = abs(numeratorp - denumenator * p_old[i_idx]) * a_ii[i_idx];
1120 V_new[i_idx] = Vel_i;
1121 }
1122 // if (paramsD.ClampPressure && p_new < 0.0)
1123 // p_new = 0.0;
1124 rho_p[i_idx] = my_rho_p;
1125 sortedRhoPreMu[i_idx].y = p_new;
1126 }
1127
1128 //--------------------------------------------------------------------------------------------------------------------------------
Update_AND_Calc_Res(Real3 * sortedVelMas,Real4 * sortedRhoPreMu,Real * p_old,Real3 * V_new,Real * rho_p,Real * rho_np,Real * Residuals,const size_t numAllMarkers,const int Iteration,Real params_relaxation,bool IsSPARSE,volatile bool * isErrorD)1129 __global__ void Update_AND_Calc_Res(Real3* sortedVelMas,
1130 Real4* sortedRhoPreMu,
1131 Real* p_old,
1132 Real3* V_new,
1133 Real* rho_p,
1134 Real* rho_np,
1135 Real* Residuals,
1136 const size_t numAllMarkers,
1137 const int Iteration,
1138 Real params_relaxation,
1139 bool IsSPARSE,
1140 volatile bool* isErrorD) {
1141 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
1142
1143 if (i_idx >= numAllMarkers) {
1144 return;
1145 }
1146 if (sortedRhoPreMu[i_idx].w <= -2) {
1147 return;
1148 }
1149
1150 // p_i = (1 - relax) * p_old_i + relax * p_i;
1151
1152 sortedRhoPreMu[i_idx].y = (1 - params_relaxation) * p_old[i_idx] + params_relaxation * sortedRhoPreMu[i_idx].y;
1153 // if(!paramsD.USE_LinearSolver)
1154 // p_old[i_idx] = sortedRhoPreMu[i_idx].y;
1155 // if (paramsD.ClampPressure && sortedRhoPreMu[i_idx].y < 0)
1156 // sortedRhoPreMu[i_idx].y = 0;
1157 // Real AbsRes = abs(sortedRhoPreMu[i_idx].y - p_old[i_idx]);
1158
1159 // Real Updated_rho = rho_np[i_idx] + rho_p[i_idx];
1160 // Real rho_res = abs(1000 - sortedRhoPreMu[i_idx].x); // Hard-coded for now
1161 Real p_res = 0;
1162 // p_res = abs(sortedRhoPreMu[i_idx].y - p_old[i_idx]) / (abs(p_old[i_idx]) + 0.00001);
1163 p_res = abs(sortedRhoPreMu[i_idx].y - p_old[i_idx]);
1164 p_old[i_idx] = sortedRhoPreMu[i_idx].y;
1165
1166 Residuals[i_idx] = p_res;
1167 }
1168
1169 //--------------------------------------------------------------------------------------------------------------------------------
CalcForces(Real3 * new_vel,Real4 * derivVelRhoD,Real4 * sortedPosRad,Real3 * sortedVelMas,Real4 * sortedRhoPreMu,Real * sumWij_inv,Real * p_old,Real3 * r_shift,uint * cellStart,uint * cellEnd,Real delta_t,size_t numAllMarkers,volatile bool * isErrorD)1170 __global__ void CalcForces(Real3* new_vel, // Write
1171 Real4* derivVelRhoD,
1172 Real4* sortedPosRad, // Read
1173 Real3* sortedVelMas, // Read
1174 Real4* sortedRhoPreMu,
1175 Real* sumWij_inv,
1176 Real* p_old,
1177 Real3* r_shift,
1178 uint* cellStart,
1179 uint* cellEnd,
1180 Real delta_t,
1181
1182 size_t numAllMarkers,
1183 volatile bool* isErrorD) {
1184 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
1185 if (i_idx >= numAllMarkers) {
1186 return;
1187 }
1188 if (sortedRhoPreMu[i_idx].w <= -2) {
1189 sortedRhoPreMu[i_idx].x = 0;
1190 sortedRhoPreMu[i_idx].y = 0;
1191 sortedRhoPreMu[i_idx].z = 0;
1192 return;
1193 }
1194
1195 // if (sortedRhoPreMu[i_idx].w > -1) {
1196 // return;
1197 // }
1198
1199 Real mu_0 = paramsD.mu0;
1200 Real h_i = sortedPosRad[i_idx].w;
1201 Real m_i = h_i * h_i * h_i * paramsD.rho0;
1202
1203 Real dT = delta_t;
1204 Real3 source_term = paramsD.gravity + paramsD.bodyForce3;
1205 Real epsilon = paramsD.epsMinMarkersDis;
1206 Real3 posi = mR3(sortedPosRad[i_idx]);
1207 Real3 Veli = sortedVelMas[i_idx];
1208
1209 Real p_i;
1210 // if (sortedRhoPreMu[i_idx].w == -1)
1211 p_i = sortedRhoPreMu[i_idx].y;
1212 // else
1213 // p_i = p_old[i_idx];
1214
1215 Real rho_i = sortedRhoPreMu[i_idx].x;
1216 Real3 F_i_mu = mR3(0);
1217 Real3 F_i_surface_tension = mR3(0);
1218 Real3 F_i_p = mR3(0);
1219 if ((sortedRhoPreMu[i_idx].x > 3 * paramsD.rho0 || sortedRhoPreMu[i_idx].x < 0) && sortedRhoPreMu[i_idx].w < 0)
1220 printf("too large/small density marker %d, type=%f\n", i_idx, sortedRhoPreMu[i_idx].w);
1221
1222 Real r0 = 0;
1223 int Ni = 0;
1224 Real mi_bar = 0;
1225 Real3 inner_sum = mR3(0);
1226
1227 int3 gridPos = calcGridPos(posi);
1228
1229 // get address in grid
1230 for (int z = -1; z <= 1; z++) {
1231 for (int y = -1; y <= 1; y++) {
1232 for (int x = -1; x <= 1; x++) {
1233 int3 neighbourPos = gridPos + mI3(x, y, z);
1234 uint gridHash = calcGridHash(neighbourPos);
1235 // get start of bucket for this cell
1236 uint startIndex = cellStart[gridHash];
1237 uint endIndex = cellEnd[gridHash];
1238 for (uint j = startIndex; j < endIndex; j++) {
1239 Real3 posj = mR3(sortedPosRad[j]);
1240 Real3 rij = Distance(posi, posj);
1241 Real d = length(rij);
1242 if (d > RESOLUTION_LENGTH_MULT * h_i || sortedRhoPreMu[j].w <= -2 || i_idx == j)
1243 continue;
1244
1245 Real3 eij = rij / d;
1246 Real h_j = sortedPosRad[j].w;
1247 Real m_j = h_j * h_j * h_j * paramsD.rho0;
1248
1249 mi_bar += m_j;
1250 Ni++;
1251 r0 += d;
1252 inner_sum += m_j * rij / (d * d * d);
1253
1254 Real h_ij = 0.5 * (h_j + h_i);
1255 Real Wd = m_j * W3h(d, h_ij);
1256 Real3 grad_ij = GradWh(rij, h_ij);
1257
1258 Real3 Velj = sortedVelMas[j];
1259 Real p_j = sortedRhoPreMu[j].y;
1260 Real rho_j = sortedRhoPreMu[j].x;
1261
1262 Real3 V_ij = (Veli - Velj);
1263 // Only Consider (fluid-fluid + fluid-solid) or Solid-Fluid Interaction
1264 if (sortedRhoPreMu[i_idx].w < 0 || (sortedRhoPreMu[i_idx].w >= 0 && sortedRhoPreMu[j].w < 0))
1265 F_i_p += -m_j * ((p_i / (rho_i * rho_i)) + (p_j / (rho_j * rho_j))) * grad_ij;
1266
1267 Real Rho_bar = (rho_j + rho_i) * 0.5;
1268 // Real nu = mu_0 * paramsD.HSML * 320 / Rho_bar;
1269 // Real3 muNumerator = nu * fminf(0.0, dot(rij, V_ij)) * grad_ij;
1270 Real3 muNumerator = 2 * mu_0 * dot(rij, grad_ij) * V_ij;
1271 Real muDenominator = (Rho_bar * Rho_bar) * (d * d + paramsD.HSML * paramsD.HSML * epsilon);
1272 // Only Consider (fluid-fluid + fluid-solid) or Solid-Fluid Interaction
1273 if (sortedRhoPreMu[i_idx].w < 0 || (sortedRhoPreMu[i_idx].w >= 0 && sortedRhoPreMu[j].w < 0))
1274 // if ((sortedRhoPreMu[i_idx].w < 0 && sortedRhoPreMu[j].w < 0))
1275 F_i_mu += m_j * muNumerator / muDenominator;
1276 if (!isfinite(length(F_i_mu))) {
1277 printf("F_i_np in CalcForces returns Nan or Inf");
1278 }
1279 }
1280 }
1281 }
1282
1283 if (Ni != 0) {
1284 r0 /= Ni;
1285 mi_bar /= Ni;
1286 }
1287 if (mi_bar > EPSILON)
1288 r_shift[i_idx] = paramsD.beta_shifting * r0 * r0 * paramsD.v_Max * dT / mi_bar * inner_sum;
1289
1290 // Forces are per unit mass at this point.
1291 derivVelRhoD[i_idx] = mR4((F_i_p + F_i_mu) * m_i);
1292
1293 // Add the source_term only to the fluid markers
1294 if (sortedRhoPreMu[i_idx].w == -1) {
1295 derivVelRhoD[i_idx] = derivVelRhoD[i_idx] + mR4(source_term) * m_i;
1296 }
1297
1298 new_vel[i_idx] = Veli + dT * mR3(derivVelRhoD[i_idx]) / m_i + r_shift[i_idx] / dT;
1299
1300 if (!isfinite(length(new_vel[i_idx])) || !isfinite(length(derivVelRhoD[i_idx])) ||
1301 !isfinite(length(r_shift[i_idx])))
1302 printf("%d= new_vel=%.2f,derivVelRhoD=%.2f,r_shift=%.2f, F_i_p=%f, F_i_mu=%f\n", i_idx,
1303 length(new_vel[i_idx]), length(derivVelRhoD[i_idx]), length(r_shift[i_idx]), length(F_i_p),
1304 length(F_i_mu));
1305 }
1306 }
1307
1308 //--------------------------------------------------------------------------------------------------------------------------------
FinalizePressure(Real4 * sortedPosRad,Real4 * sortedRhoPreMu,Real * p_old,Real3 * F_p,uint * cellStart,uint * cellEnd,size_t numAllMarkers,Real p_shift,volatile bool * isErrorD)1309 __global__ void FinalizePressure(Real4* sortedPosRad, // Read
1310 Real4* sortedRhoPreMu,
1311 Real* p_old,
1312 Real3* F_p, // Write
1313 uint* cellStart,
1314 uint* cellEnd,
1315 size_t numAllMarkers,
1316 Real p_shift,
1317 volatile bool* isErrorD) {
1318 uint i_idx = blockIdx.x * blockDim.x + threadIdx.x;
1319 if (i_idx >= numAllMarkers) {
1320 return;
1321 }
1322 if (sortedRhoPreMu[i_idx].w <= -2) {
1323 return;
1324 }
1325 if (!(isfinite(sortedRhoPreMu[i_idx].x) && isfinite(sortedRhoPreMu[i_idx].y) && isfinite(sortedRhoPreMu[i_idx].z) &&
1326 isfinite(sortedRhoPreMu[i_idx].w))) {
1327 printf("rhoPreMu is NAN: thrown from FinalizePressure ! %f,%f,%f\\n", sortedRhoPreMu[i_idx].x,
1328 sortedRhoPreMu[i_idx].y, sortedRhoPreMu[i_idx].z);
1329 sortedRhoPreMu[i_idx].y = 0.0;
1330 }
1331 // if (p_shift < 0)
1332 sortedRhoPreMu[i_idx].y = p_old[i_idx] + ((paramsD.ClampPressure) ? paramsD.BASEPRES : 0.0); //- p_shift;
1333
1334 if (paramsD.ClampPressure && sortedRhoPreMu[i_idx].y < 0)
1335 sortedRhoPreMu[i_idx].y = 0;
1336
1337 // if (sortedRhoPreMu[i_idx].y < 0)
1338 // sortedRhoPreMu[i_idx].y = (p_old[i_idx] > 0) ? p_old[i_idx] : 0.0;
1339
1340 if (sortedRhoPreMu[i_idx].y > paramsD.Max_Pressure)
1341 sortedRhoPreMu[i_idx].y = paramsD.Max_Pressure;
1342 }
1343
1344 //--------------------------------------------------------------------------------------------------------------------------------
calcPressureIISPH(std::shared_ptr<FsiBodiesDataD> otherFsiBodiesD,thrust::device_vector<Real3> pos_fsi_fea_D,thrust::device_vector<Real3> vel_fsi_fea_D,thrust::device_vector<Real3> acc_fsi_fea_D,thrust::device_vector<Real> sumWij_inv,thrust::device_vector<Real> & p_old,thrust::device_vector<Real3> Normals,thrust::device_vector<Real> G_i,thrust::device_vector<Real> & Color)1345 void ChFsiForceIISPH::calcPressureIISPH(std::shared_ptr<FsiBodiesDataD> otherFsiBodiesD,
1346 thrust::device_vector<Real3> pos_fsi_fea_D,
1347 thrust::device_vector<Real3> vel_fsi_fea_D,
1348 thrust::device_vector<Real3> acc_fsi_fea_D,
1349 thrust::device_vector<Real> sumWij_inv,
1350 thrust::device_vector<Real>& p_old,
1351 thrust::device_vector<Real3> Normals,
1352 thrust::device_vector<Real> G_i,
1353 thrust::device_vector<Real>& Color) {
1354 // Real RES = paramsH->PPE_res;
1355
1356 PPE_SolutionType mySolutionType = paramsH->PPE_Solution_type;
1357 std::cout << "time step in calcPressureIISPH " << paramsH->dT << std::endl;
1358
1359 double total_step_timeClock = clock();
1360 bool *isErrorH, *isErrorD;
1361 isErrorH = (bool*)malloc(sizeof(bool));
1362 cudaMalloc((void**)&isErrorD, sizeof(bool));
1363 *isErrorH = false;
1364 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1365 //------------------------------------------------------------------------
1366 // thread per particle
1367 uint numThreads, numBlocks;
1368 size_t numAllMarkers = (int)numObjectsH->numAllMarkers;
1369 computeGridSize((uint)numAllMarkers, 256, numBlocks, numThreads);
1370
1371 *isErrorH = false;
1372 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1373
1374 thrust::device_vector<Real3> d_ii(numAllMarkers);
1375 thrust::device_vector<Real3> V_np(numAllMarkers);
1376 thrust::fill(d_ii.begin(), d_ii.end(), mR3(0.0));
1377 thrust::fill(V_np.begin(), V_np.end(), mR3(0.0));
1378 *isErrorH = false;
1379 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1380 V_i_np__AND__d_ii_kernel<<<numBlocks, numThreads>>>(
1381 mR4CAST(sortedSphMarkersD->posRadD), mR3CAST(sortedSphMarkersD->velMasD),
1382 mR4CAST(sortedSphMarkersD->rhoPresMuD), mR3CAST(d_ii), mR3CAST(V_np), R1CAST(sumWij_inv), R1CAST(G_i),
1383 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), paramsH->dT, numAllMarkers,
1384 isErrorD);
1385
1386 cudaDeviceSynchronize();
1387 cudaCheckError();
1388 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1389 if (*isErrorH == true) {
1390 throw std::runtime_error("Error! program crashed after F_i_np__AND__d_ii_kernel!\n");
1391 }
1392
1393 thrust::device_vector<Real> a_ii(numAllMarkers);
1394 thrust::device_vector<Real> rho_np(numAllMarkers);
1395 thrust::fill(a_ii.begin(), a_ii.end(), 0.0);
1396 thrust::fill(rho_np.begin(), rho_np.end(), 0.0);
1397 thrust::fill(p_old.begin(), p_old.end(), 0.0);
1398 thrust::device_vector<Real3> summGradW(numAllMarkers);
1399
1400 *isErrorH = false;
1401 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1402 Rho_np_AND_a_ii_AND_sum_m_GradW<<<numBlocks, numThreads>>>(
1403 mR4CAST(sortedSphMarkersD->posRadD), mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(rho_np), R1CAST(a_ii),
1404 R1CAST(p_old), mR3CAST(V_np), mR3CAST(d_ii), mR3CAST(summGradW), U1CAST(markersProximityD->cellStartD),
1405 U1CAST(markersProximityD->cellEndD), paramsH->dT, numAllMarkers, isErrorD);
1406
1407 cudaDeviceSynchronize();
1408 cudaCheckError();
1409 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1410 if (*isErrorH == true) {
1411 throw std::runtime_error("Error! program crashed after F_i_np__AND__d_ii_kernel!\n");
1412 }
1413
1414 thrust::device_vector<Real3> V_new(numAllMarkers);
1415 thrust::fill(V_new.begin(), V_new.end(), mR3(0.0));
1416 thrust::device_vector<Real> a_ij;
1417 thrust::device_vector<Real> B_i(numAllMarkers);
1418 thrust::device_vector<uint> csrColIndA;
1419 thrust::device_vector<uint> numContacts(numAllMarkers);
1420 thrust::device_vector<unsigned long int> GlobalcsrColIndA;
1421 thrust::device_vector<Real> csrValA;
1422
1423 double durationFormAXB;
1424
1425 int numFlexbodies = (int)numObjectsH->numFlexBodies1D + (int)numObjectsH->numFlexBodies2D;
1426
1427 int haveGhost = (numObjectsH->numGhostMarkers > 0) ? 1 : 0;
1428 int haveHelper = (numObjectsH->numHelperMarkers > 0) ? 1 : 0;
1429
1430 int4 updatePortion =
1431 mI4(fsiGeneralData->referenceArray[haveGhost + haveHelper + 0].y, // end of fluid
1432 fsiGeneralData->referenceArray[haveGhost + haveHelper + 1].y, // end of boundary
1433 fsiGeneralData->referenceArray[haveGhost + haveHelper + 1 + numObjectsH->numRigidBodies].y,
1434 fsiGeneralData->referenceArray[haveGhost + haveHelper + 1 + numObjectsH->numRigidBodies + numFlexbodies].y);
1435
1436 uint NNZ;
1437 if (mySolutionType == PPE_SolutionType::FORM_SPARSE_MATRIX) {
1438 thrust::fill(a_ij.begin(), a_ij.end(), 0.0);
1439 thrust::fill(B_i.begin(), B_i.end(), 0.0);
1440 // thrust::fill(summGradW.begin(), summGradW.end(), mR3(0.0));
1441 thrust::fill(numContacts.begin(), numContacts.end(), 0.0);
1442 //------------------------------------------------------------------------
1443 //------------- MatrixJacobi
1444 //------------------------------------------------------------------------
1445
1446 bool SPARSE_FLAG = true;
1447 double FormAXBClock = clock();
1448 thrust::device_vector<Real> Residuals(numAllMarkers);
1449 thrust::fill(Residuals.begin(), Residuals.end(), 1.0);
1450 thrust::device_vector<Real> rho_p(numAllMarkers);
1451 thrust::fill(rho_p.begin(), rho_p.end(), 0.0);
1452
1453 *isErrorH = false;
1454 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1455 CalcNumber_Contacts<<<numBlocks, numThreads>>>(
1456 U1CAST(numContacts), mR4CAST(sortedSphMarkersD->posRadD), mR4CAST(sortedSphMarkersD->rhoPresMuD),
1457 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), numAllMarkers, isErrorD);
1458
1459 cudaDeviceSynchronize();
1460 cudaCheckError();
1461 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1462 if (*isErrorH == true) {
1463 throw std::runtime_error("Error! program crashed after CalcNumber_Contacts!\n");
1464 }
1465 uint MAX_CONTACT = thrust::reduce(numContacts.begin(), numContacts.end(), 0, thrust::maximum<Real>());
1466 std::cout << "Max contact between SPH particles: " << MAX_CONTACT << std::endl;
1467
1468 uint LastVal = numContacts[numAllMarkers - 1];
1469 thrust::exclusive_scan(numContacts.begin(), numContacts.end(), numContacts.begin());
1470 numContacts.push_back(LastVal + numContacts[numAllMarkers - 1]);
1471 NNZ = numContacts[numAllMarkers];
1472
1473 csrValA.resize(NNZ);
1474 csrColIndA.resize(NNZ);
1475 GlobalcsrColIndA.resize(NNZ);
1476
1477 thrust::fill(csrValA.begin(), csrValA.end(), 0.0);
1478 thrust::fill(GlobalcsrColIndA.begin(), GlobalcsrColIndA.end(), 0.0);
1479 thrust::fill(csrColIndA.begin(), csrColIndA.end(), 0.0);
1480
1481 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1482
1483 std::cout << "updatePortion of BC: " << updatePortion.x << " " << updatePortion.y << " " << updatePortion.z
1484 << " " << updatePortion.w << "\n ";
1485
1486 FormAXB<<<numBlocks, numThreads>>>(
1487 R1CAST(csrValA), U1CAST(csrColIndA), LU1CAST(GlobalcsrColIndA), U1CAST(numContacts), R1CAST(a_ij),
1488 R1CAST(B_i), mR3CAST(d_ii), R1CAST(a_ii), mR3CAST(summGradW), mR4CAST(sortedSphMarkersD->posRadD),
1489 mR3CAST(sortedSphMarkersD->velMasD), mR4CAST(sortedSphMarkersD->rhoPresMuD), mR3CAST(V_new), R1CAST(p_old),
1490 mR3CAST(Normals), R1CAST(G_i), R1CAST(sumWij_inv), R1CAST(rho_np),
1491
1492 mR4CAST(otherFsiBodiesD->q_fsiBodies_D), mR3CAST(fsiGeneralData->rigidSPH_MeshPos_LRF_D),
1493 mR3CAST(otherFsiBodiesD->posRigid_fsiBodies_D), mR4CAST(otherFsiBodiesD->velMassRigid_fsiBodies_D),
1494 mR3CAST(otherFsiBodiesD->omegaVelLRF_fsiBodies_D), mR3CAST(otherFsiBodiesD->accRigid_fsiBodies_D),
1495 mR3CAST(otherFsiBodiesD->omegaAccLRF_fsiBodies_D), U1CAST(fsiGeneralData->rigidIdentifierD),
1496
1497 mR3CAST(pos_fsi_fea_D), mR3CAST(vel_fsi_fea_D), mR3CAST(acc_fsi_fea_D),
1498 U1CAST(fsiGeneralData->FlexIdentifierD), (int)numObjectsH->numFlexBodies1D,
1499 U2CAST(fsiGeneralData->CableElementsNodes), U4CAST(fsiGeneralData->ShellElementsNodes),
1500
1501 updatePortion, U1CAST(markersProximityD->gridMarkerIndexD), U1CAST(markersProximityD->cellStartD),
1502 U1CAST(markersProximityD->cellEndD), paramsH->dT, numAllMarkers, SPARSE_FLAG, isErrorD);
1503
1504 cudaDeviceSynchronize();
1505 cudaCheckError();
1506 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1507 if (*isErrorH == true) {
1508 throw std::runtime_error("Error! program crashed after F_i_np__AND__d_ii_kernel!\n");
1509 }
1510
1511 durationFormAXB = (clock() - FormAXBClock) / (double)CLOCKS_PER_SEC;
1512 }
1513 //------------------------------------------------------------------------
1514 //------------- Iterative loop
1515 //------------------------------------------------------------------------
1516 int Iteration = 0;
1517 Real MaxRes = 100;
1518 thrust::device_vector<Real> Residuals(numAllMarkers);
1519 thrust::fill(Residuals.begin(), Residuals.end(), 1.0);
1520 thrust::device_vector<Real3> dij_pj(numAllMarkers);
1521 thrust::fill(dij_pj.begin(), dij_pj.end(), mR3(0.0));
1522 thrust::device_vector<Real3> F_p(numAllMarkers);
1523 thrust::fill(F_p.begin(), F_p.end(), mR3(0.0));
1524 thrust::device_vector<Real> rho_p(numAllMarkers);
1525 thrust::fill(rho_p.begin(), rho_p.end(), 0.0);
1526
1527 double LinearSystemClock = clock();
1528
1529 myLinearSolver->SetVerbose(paramsH->Verbose_monitoring);
1530 myLinearSolver->SetAbsRes(paramsH->LinearSolver_Abs_Tol);
1531 myLinearSolver->SetRelRes(paramsH->LinearSolver_Rel_Tol);
1532 myLinearSolver->SetIterationLimit(paramsH->LinearSolver_Max_Iter);
1533
1534 if (paramsH->USE_LinearSolver) {
1535 if (paramsH->PPE_Solution_type != PPE_SolutionType::FORM_SPARSE_MATRIX) {
1536 printf(
1537 "You should paramsH->PPE_Solution_type == FORM_SPARSE_MATRIX in order to use the "
1538 "chrono_fsi linear "
1539 "solvers\n");
1540 exit(0);
1541 }
1542
1543 myLinearSolver->Solve((int)numAllMarkers, NNZ, R1CAST(csrValA), U1CAST(numContacts), U1CAST(csrColIndA),
1544 R1CAST(p_old), R1CAST(B_i));
1545 cudaCheckError();
1546 } else {
1547 while ((MaxRes > paramsH->LinearSolver_Abs_Tol || Iteration < 3) &&
1548 Iteration < paramsH->LinearSolver_Max_Iter) {
1549 *isErrorH = false;
1550 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1551 Initialize_Variables<<<numBlocks, numThreads>>>(mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(p_old),
1552 mR3CAST(sortedSphMarkersD->velMasD), mR3CAST(V_new),
1553 numAllMarkers, isErrorD);
1554 cudaDeviceSynchronize();
1555 cudaCheckError();
1556 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1557 if (*isErrorH == true) {
1558 throw std::runtime_error("Error! program crashed after Initialize_Variables!\n");
1559 }
1560
1561 if (mySolutionType == PPE_SolutionType::MATRIX_FREE) {
1562 *isErrorH = false;
1563 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1564 Calc_dij_pj<<<numBlocks, numThreads>>>(
1565 mR3CAST(dij_pj), mR3CAST(F_p), mR3CAST(d_ii), mR4CAST(sortedSphMarkersD->posRadD),
1566 mR3CAST(sortedSphMarkersD->velMasD), mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(p_old),
1567 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), paramsH->dT,
1568 numAllMarkers, isErrorD);
1569 cudaDeviceSynchronize();
1570 cudaCheckError();
1571 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1572 if (*isErrorH == true) {
1573 throw std::runtime_error("Error! program crashed after Calc_dij_pj!\n");
1574 }
1575
1576 *isErrorH = false;
1577 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1578 Calc_Pressure<<<numBlocks, numThreads>>>(
1579 R1CAST(a_ii), mR3CAST(d_ii), mR3CAST(dij_pj), R1CAST(rho_np), R1CAST(rho_p), R1CAST(Residuals),
1580 mR3CAST(F_p), mR4CAST(sortedSphMarkersD->posRadD), mR3CAST(sortedSphMarkersD->velMasD),
1581 mR4CAST(sortedSphMarkersD->rhoPresMuD),
1582
1583 mR4CAST(otherFsiBodiesD->q_fsiBodies_D), mR3CAST(fsiGeneralData->rigidSPH_MeshPos_LRF_D),
1584 mR3CAST(otherFsiBodiesD->posRigid_fsiBodies_D), mR4CAST(otherFsiBodiesD->velMassRigid_fsiBodies_D),
1585 mR3CAST(otherFsiBodiesD->omegaVelLRF_fsiBodies_D), mR3CAST(otherFsiBodiesD->accRigid_fsiBodies_D),
1586 mR3CAST(otherFsiBodiesD->omegaAccLRF_fsiBodies_D), U1CAST(fsiGeneralData->rigidIdentifierD),
1587
1588 mR3CAST(pos_fsi_fea_D), mR3CAST(vel_fsi_fea_D), mR3CAST(acc_fsi_fea_D),
1589 U1CAST(fsiGeneralData->FlexIdentifierD), (int)numObjectsH->numFlexBodies1D,
1590 U2CAST(fsiGeneralData->CableElementsNodes), U4CAST(fsiGeneralData->ShellElementsNodes),
1591 updatePortion, U1CAST(markersProximityD->gridMarkerIndexD), R1CAST(p_old), mR3CAST(V_new),
1592 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), paramsH->dT,
1593 numAllMarkers, isErrorD);
1594
1595 cudaDeviceSynchronize();
1596 cudaCheckError();
1597 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1598 if (*isErrorH == true) {
1599 throw std::runtime_error("Error! program crashed after Calc_Pressure!\n");
1600 }
1601 }
1602
1603 if (mySolutionType == PPE_SolutionType::FORM_SPARSE_MATRIX) {
1604 *isErrorH = false;
1605 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1606 Calc_Pressure_AXB_USING_CSR<<<numBlocks, numThreads>>>(
1607 R1CAST(csrValA), R1CAST(a_ii), U1CAST(csrColIndA), U1CAST(numContacts),
1608 mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(sumWij_inv), mR3CAST(sortedSphMarkersD->velMasD),
1609 mR3CAST(V_new), R1CAST(p_old), R1CAST(B_i), R1CAST(Residuals), numAllMarkers, isErrorD);
1610 cudaDeviceSynchronize();
1611 cudaCheckError();
1612 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1613 if (*isErrorH == true) {
1614 throw std::runtime_error("Error! program crashed after Iterative_pressure_update!\n");
1615 }
1616 }
1617 *isErrorH = false;
1618 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1619
1620 Update_AND_Calc_Res<<<numBlocks, numThreads>>>(
1621 mR3CAST(sortedSphMarkersD->velMasD), mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(p_old),
1622 mR3CAST(V_new), R1CAST(rho_p), R1CAST(rho_np), R1CAST(Residuals), numAllMarkers, Iteration,
1623 paramsH->PPE_relaxation, false, isErrorD);
1624 cudaDeviceSynchronize();
1625 cudaCheckError();
1626 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1627 if (*isErrorH == true) {
1628 throw std::runtime_error("Error! program crashed after Iterative_pressure_update!\n");
1629 }
1630
1631 Iteration++;
1632
1633 thrust::device_vector<Real>::iterator iter = thrust::max_element(Residuals.begin(), Residuals.end());
1634 auto position = iter - Residuals.begin();
1635 MaxRes = *iter;
1636
1637 // MaxRes =
1638 // thrust::reduce(Residuals.begin(), Residuals.end(), 0.0, thrust::plus<Real>()) /
1639 // numObjectsH->numAllMarkers;
1640
1641 // Real PMAX = thrust::reduce(p_old.begin(), p_old.end(), 0.0, thrust::maximum<Real>());
1642 // MaxRes = thrust::reduce(Residuals.begin(), Residuals.end(), 0.0, thrust::plus<Real>()) /
1643 // numObjectsH->numAllMarkers;
1644 // MaxRes = thrust::reduce(Residuals.begin(), Residuals.end(), 0.0, thrust::maximum<Real>());
1645 // Real R_np = thrust::reduce(rho_np.begin(), rho_np.end(), 0.0, thrust::plus<Real>()) /
1646 // rho_np.size();
1647 // Real R_p = thrust::reduce(rho_p.begin(), rho_p.end(), 0.0, thrust::plus<Real>()) /
1648 // rho_p.size();
1649 //
1650 if (paramsH->Verbose_monitoring)
1651 printf("Iter= %d, Res= %f\n", Iteration, MaxRes);
1652 }
1653 }
1654
1655 thrust::device_vector<Real>::iterator iter = thrust::min_element(p_old.begin(), p_old.end());
1656 auto position = iter - p_old.begin();
1657 Real shift_p = *iter;
1658 // Real shift_p = 0;
1659
1660 // This must be run if linear solver is used
1661 if (paramsH->USE_LinearSolver || paramsH->ClampPressure) {
1662 printf("Shifting pressure values by %f\n", -shift_p);
1663 *isErrorH = false;
1664 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1665 FinalizePressure<<<numBlocks, numThreads>>>(
1666 mR4CAST(sortedSphMarkersD->posRadD), mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(p_old), mR3CAST(F_p),
1667 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), numAllMarkers, shift_p,
1668 isErrorD);
1669 cudaDeviceSynchronize();
1670 cudaCheckError();
1671 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1672 if (*isErrorH == true) {
1673 throw std::runtime_error("Error! program crashed after FinalizePressure!\n");
1674 }
1675 }
1676 double durationLinearSystem = (clock() - LinearSystemClock) / (double)CLOCKS_PER_SEC;
1677 double durationtotal_step_time = (clock() - total_step_timeClock) / (double)CLOCKS_PER_SEC;
1678
1679 printf("---------------IISPH CLOCK-------------------\n");
1680 printf(" Total: %f \n FormAXB: %f\n Linear System: %f \n", durationtotal_step_time, durationFormAXB,
1681 durationLinearSystem);
1682 if (!paramsH->USE_LinearSolver)
1683 printf(" Iter (Jacobi+SOR)# = %d, to Res= %.3e \n", Iteration, MaxRes);
1684 if (paramsH->USE_LinearSolver)
1685 if (myLinearSolver->GetSolverStatus()) {
1686 std::cout << " Solver converged to " << myLinearSolver->GetResidual() << " tolerance";
1687 std::cout << " after " << myLinearSolver->GetNumIterations() << " iterations" << std::endl;
1688 } else {
1689 std::cout << "Failed to converge after " << myLinearSolver->GetIterationLimit() << " iterations";
1690 std::cout << " (" << myLinearSolver->GetResidual() << " final residual)" << std::endl;
1691 }
1692
1693 //------------------------------------------------------------------------
1694 //------------------------------------------------------------------------
1695 cudaFree(isErrorD);
1696 free(isErrorH);
1697 }
1698
ForceSPH(std::shared_ptr<SphMarkerDataD> otherSphMarkersD,std::shared_ptr<FsiBodiesDataD> otherFsiBodiesD,std::shared_ptr<FsiMeshDataD> otherFsiMeshD)1699 void ChFsiForceIISPH::ForceSPH(std::shared_ptr<SphMarkerDataD> otherSphMarkersD,
1700 std::shared_ptr<FsiBodiesDataD> otherFsiBodiesD,
1701 std::shared_ptr<FsiMeshDataD> otherFsiMeshD) {
1702 sphMarkersD = otherSphMarkersD;
1703 int numAllMarkers = (int)numObjectsH->numAllMarkers;
1704 int numHelperMarkers = (int)numObjectsH->numHelperMarkers;
1705 fsiCollisionSystem->ArrangeData(sphMarkersD);
1706
1707 thrust::device_vector<Real3>::iterator iter =
1708 thrust::max_element(sortedSphMarkersD->velMasD.begin(), sortedSphMarkersD->velMasD.end(), compare_Real3_mag());
1709 Real MaxVel = length(*iter);
1710
1711 if (paramsH->Adaptive_time_stepping) {
1712 Real dt_CFL = paramsH->Co_number * paramsH->HSML / MaxVel;
1713 Real dt_nu = 0.25 * paramsH->HSML * paramsH->HSML / (paramsH->mu0 / paramsH->rho0);
1714 Real dt_body = 0.25 * std::sqrt(paramsH->HSML / length(paramsH->bodyForce3 + paramsH->gravity));
1715 Real dt = std::fmin(dt_body, std::fmin(dt_CFL, dt_nu));
1716 if (dt / paramsH->dT_Max > 0.7 && dt / paramsH->dT_Max < 1)
1717 paramsH->dT = paramsH->dT_Max * 0.5;
1718 else
1719 paramsH->dT = std::fmin(dt, paramsH->dT_Max);
1720
1721 CopyParams_NumberOfObjects(paramsH, numObjectsH);
1722
1723 printf(" time step=%.3e, dt_Max=%.3e, dt_CFL=%.3e (CFL=%.2g), dt_nu=%.3e, dt_body=%.3e\n", paramsH->dT,
1724 paramsH->dT_Max, dt_CFL, paramsH->Co_number, dt_nu, dt_body);
1725 }
1726
1727 bool *isErrorH, *isErrorD, *isErrorD2;
1728
1729 isErrorH = (bool*)malloc(sizeof(bool));
1730 cudaMalloc((void**)&isErrorD, sizeof(bool));
1731 cudaMalloc((void**)&isErrorD2, sizeof(bool));
1732 *isErrorH = false;
1733 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1734 cudaMemcpy(isErrorD2, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1735
1736 uint numThreads, numBlocks;
1737 computeGridSize(numAllMarkers, 256, numBlocks, numThreads);
1738 printf("numBlocks: %d, numThreads: %d, numAllMarker:%d \n", numBlocks, numThreads, numAllMarkers);
1739
1740 thrust::device_vector<Real> Color(numAllMarkers);
1741 thrust::fill(Color.begin(), Color.end(), 1.0e10);
1742 thrust::device_vector<Real> _sumWij_inv(numAllMarkers);
1743 thrust::fill(_sumWij_inv.begin(), _sumWij_inv.end(), 0.0);
1744 thrust::device_vector<Real> G_i(numAllMarkers * 9);
1745 thrust::fill(G_i.begin(), G_i.end(), 0);
1746 *isErrorH = false;
1747
1748 cudaMemcpy(isErrorD, isErrorH, sizeof(bool), cudaMemcpyHostToDevice);
1749
1750 thrust::device_vector<uint> Contact_i(numAllMarkers);
1751 thrust::fill(Contact_i.begin(), Contact_i.end(), 0);
1752 calcRho_kernel<<<numBlocks, numThreads>>>(
1753 mR4CAST(sortedSphMarkersD->posRadD), mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(_sumWij_inv),
1754 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), U1CAST(Contact_i), numAllMarkers,
1755 isErrorD);
1756 cudaDeviceSynchronize();
1757 cudaCheckError();
1758 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1759 if (*isErrorH == true) {
1760 throw std::runtime_error("Error! program crashed after calcRho_kernel!\n");
1761 }
1762
1763 thrust::device_vector<Real3> Normals(numAllMarkers);
1764
1765 calcNormalizedRho_kernel<<<numBlocks, numThreads>>>(
1766 mR4CAST(sortedSphMarkersD->posRadD), mR3CAST(sortedSphMarkersD->velMasD),
1767 mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(_sumWij_inv), R1CAST(G_i), mR3CAST(Normals), R1CAST(Color),
1768 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), numAllMarkers, isErrorD);
1769 cudaDeviceSynchronize();
1770 cudaCheckError();
1771 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1772 if (*isErrorH == true) {
1773 throw std::runtime_error("Error! program crashed after calcNormalizedRho_kernel!\n");
1774 }
1775
1776 thrust::device_vector<Real> p_old(numAllMarkers, 0.0);
1777 calcPressureIISPH(otherFsiBodiesD, otherFsiMeshD->pos_fsi_fea_D, otherFsiMeshD->vel_fsi_fea_D,
1778 otherFsiMeshD->acc_fsi_fea_D, _sumWij_inv, p_old, Normals, G_i, Color);
1779
1780 //------------------------------------------------------------------------
1781 // thread per particle
1782 // std::cout << "dT in ForceSPH after calcPressure: " << paramsH->dT << "\n";
1783 double CalcForcesClock = clock();
1784
1785 thrust::fill(vel_vis_Sorted_D.begin(), vel_vis_Sorted_D.end(), mR3(0.0));
1786 thrust::fill(derivVelRhoD_Sorted_D.begin(), derivVelRhoD_Sorted_D.end(), mR4(0.0));
1787 thrust::fill(vel_XSPH_Sorted_D.begin(), vel_XSPH_Sorted_D.end(), mR3(0.0));
1788 thrust::device_vector<Real3> dr_shift(numAllMarkers);
1789 thrust::fill(dr_shift.begin(), dr_shift.end(), mR3(0.0));
1790
1791 thrust::device_vector<Real3> NEW_Vel(numAllMarkers, mR3(0.0));
1792
1793 CalcForces<<<numBlocks, numThreads>>>(mR3CAST(NEW_Vel), mR4CAST(derivVelRhoD_Sorted_D),
1794 mR4CAST(sortedSphMarkersD->posRadD), mR3CAST(sortedSphMarkersD->velMasD),
1795 mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(_sumWij_inv), R1CAST(p_old),
1796 mR3CAST(dr_shift), U1CAST(markersProximityD->cellStartD),
1797 U1CAST(markersProximityD->cellEndD), paramsH->dT, numAllMarkers, isErrorD);
1798 cudaDeviceSynchronize();
1799 cudaCheckError();
1800
1801 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1802 if (*isErrorH == true) {
1803 throw std::runtime_error("Error! program crashed in CalcForces!\n");
1804 }
1805 double calcforce = (clock() - CalcForcesClock) / (double)CLOCKS_PER_SEC;
1806 printf(" Force Computation: %f \n", calcforce);
1807 double UpdateClock = clock();
1808
1809 sortedSphMarkersD->velMasD = NEW_Vel;
1810 UpdateDensity<<<numBlocks, numThreads>>>(
1811 mR3CAST(vel_vis_Sorted_D), mR3CAST(vel_XSPH_Sorted_D), mR3CAST(sortedSphMarkersD->velMasD),
1812 mR4CAST(sortedSphMarkersD->posRadD), mR4CAST(sortedSphMarkersD->rhoPresMuD), R1CAST(_sumWij_inv),
1813 U1CAST(markersProximityD->cellStartD), U1CAST(markersProximityD->cellEndD), numAllMarkers, isErrorD);
1814
1815 cudaDeviceSynchronize();
1816 cudaCheckError();
1817
1818 cudaMemcpy(isErrorH, isErrorD, sizeof(bool), cudaMemcpyDeviceToHost);
1819 if (*isErrorH == true) {
1820 throw std::runtime_error("Error! program crashed in CalcForces!\n");
1821 }
1822 CopySortedToOriginal_NonInvasive_R3(fsiGeneralData->vel_XSPH_D, vel_XSPH_Sorted_D,
1823 markersProximityD->gridMarkerIndexD);
1824 CopySortedToOriginal_NonInvasive_R3(fsiGeneralData->vis_vel_SPH_D, vel_vis_Sorted_D,
1825 markersProximityD->gridMarkerIndexD);
1826 CopySortedToOriginal_NonInvasive_R3(sphMarkersD->velMasD, sortedSphMarkersD->velMasD,
1827 markersProximityD->gridMarkerIndexD);
1828 CopySortedToOriginal_NonInvasive_R4(sphMarkersD->posRadD, sortedSphMarkersD->posRadD,
1829 markersProximityD->gridMarkerIndexD);
1830
1831 CopySortedToOriginal_NonInvasive_R4(sphMarkersD->rhoPresMuD, sortedSphMarkersD->rhoPresMuD,
1832 markersProximityD->gridMarkerIndexD);
1833 CopySortedToOriginal_NonInvasive_R4(fsiGeneralData->derivVelRhoD, derivVelRhoD_Sorted_D,
1834 markersProximityD->gridMarkerIndexD);
1835 printf(" Update information: %f \n", (clock() - UpdateClock) / (double)CLOCKS_PER_SEC);
1836 printf("----------------------------------------------\n");
1837 }
1838
1839 } // namespace fsi
1840 } // namespace chrono
1841