1 #ifndef LIBFLATARRAY_EXAMPLES_SMOOTHED_PARTICLE_HYDRODYNAMICS_KERNELS_HPP
2 #define LIBFLATARRAY_EXAMPLES_SMOOTHED_PARTICLE_HYDRODYNAMICS_KERNELS_HPP
3
4 #define PI float(M_PI)
5
6 class InteractionBuffer
7 {
8 public:
9 inline
InteractionBuffer(float rho_i=0,float v_x_i=0,float v_y_i=0,float rho_j=0,float v_x_j=0,float v_y_j=0,float delta_x=0,float delta_y=0,float dist_squared=0,int i=0,int j=0)10 explicit InteractionBuffer(
11 float rho_i = 0,
12 float v_x_i = 0,
13 float v_y_i = 0,
14 float rho_j = 0,
15 float v_x_j = 0,
16 float v_y_j = 0,
17 float delta_x = 0,
18 float delta_y = 0,
19 float dist_squared = 0,
20 int i = 0,
21 int j = 0) :
22 rho_i(rho_i),
23 v_x_i(v_x_i),
24 v_y_i(v_y_i),
25 rho_j(rho_j),
26 v_x_j(v_x_j),
27 v_y_j(v_y_j),
28 delta_x(delta_x),
29 delta_y(delta_y),
30 dist_squared(dist_squared),
31 i(i),
32 j(j)
33 {}
34
35 float rho_i;
36 float v_x_i;
37 float v_y_i;
38 float rho_j;
39 float v_x_j;
40 float v_y_j;
41 float delta_x;
42 float delta_y;
43 float dist_squared;
44 int i;
45 int j;
46 };
47 LIBFLATARRAY_REGISTER_SOA(
48 InteractionBuffer,
49 ((float)(rho_i))
50 ((float)(v_x_i))
51 ((float)(v_y_i))
52 ((float)(rho_j))
53 ((float)(v_x_j))
54 ((float)(v_y_j))
55 ((float)(delta_x))
56 ((float)(delta_y))
57 ((float)(dist_squared))
58 ((int)(i))
59 ((int)(j)))
60
61 template<typename CELL, long DIM_X, long DIM_Y, long DIM_Z, long INDEX>
compute_density_lfa(int n,LibFlatArray::soa_accessor<CELL,DIM_X,DIM_Y,DIM_Z,INDEX> particles,float h,float mass)62 void compute_density_lfa(int n, LibFlatArray::soa_accessor<CELL, DIM_X, DIM_Y, DIM_Z, INDEX> particles, float h, float mass)
63 {
64 typedef LibFlatArray::soa_accessor<CELL, DIM_X, DIM_Y, DIM_Z, INDEX> soa_accessor;
65 typedef typename LibFlatArray::estimate_optimum_short_vec_type<float, soa_accessor>::VALUE FLOAT;
66
67 LibFlatArray::loop_peeler<FLOAT>(&particles.index(), n, [&particles, h, mass](auto my_float, long *i, long end) {
68 typedef decltype(my_float) FLOAT;
69 float h_squared = h * h;
70 FLOAT h_squared_vec(h_squared);
71
72 for (; particles.index() < end; particles += FLOAT::ARITY) {
73 &particles.rho() << 4.0f * mass / PI / h_squared_vec;
74 }
75 });
76
77 particles.index() = 0;
78
79 float h_squared = h * h;
80 float h_pow_8 = h_squared * h_squared * h_squared * h_squared;
81 float C = 4 * mass / M_PI / h_pow_8;
82
83 soa_accessor particles_i(particles.data(), 0);
84 soa_accessor particles_j(particles.data(), 0);
85
86 for (particles_i.index() = 0; particles_i.index() < n; ++particles_i) {
87 float pos_x_i = particles_i.pos_x();
88 float pos_y_i = particles_i.pos_y();
89
90 particles_j.index() = particles_i.index() + 1;
91
92 LibFlatArray::loop_peeler<FLOAT>(
93 &particles_j.index(), n,
94 [&particles_i, &particles_j, h, mass, pos_x_i, pos_y_i, C]
95 (auto my_float, long *x, int end)
96 {
97 typedef decltype(my_float) FLOAT;
98 float h_squared = h * h;
99 FLOAT h_squared_vec(h_squared);
100
101 for (; particles_j.index() < end;) {
102 FLOAT delta_x = FLOAT(pos_x_i) - &particles_j.pos_x();
103 FLOAT delta_y = FLOAT(pos_y_i) - &particles_j.pos_y();
104 FLOAT dist_squared = delta_x * delta_x + delta_y * delta_y;
105 FLOAT overlap = h_squared_vec - dist_squared;
106
107 if (LibFlatArray::any(overlap > FLOAT(0.0))) {
108 for (int e = 0; e < FLOAT::ARITY; ++e) {
109 float o = get(overlap, e);
110 if (o > 0) {
111 float rho_ij = C * o * o * o;
112 particles_i.rho() += rho_ij;
113 particles_j.rho() += rho_ij;
114 }
115 ++particles_j;
116 }
117 } else {
118 particles_j += FLOAT::ARITY;
119 }
120 }
121 });
122 }
123 }
124
125 template<int ARITY, typename SOA_ACCESSOR, typename SOA_ARRAY>
handle_interactions(SOA_ACCESSOR & particles,SOA_ARRAY & interaction_buf,const float h,const float rho0,const float C_0,const float C_p,const float C_v)126 void handle_interactions(SOA_ACCESSOR& particles, SOA_ARRAY& interaction_buf, const float h, const float rho0, const float C_0, const float C_p, const float C_v)
127 {
128 typedef LibFlatArray::short_vec<float, ARITY> FLOAT;
129 for (std::size_t f = 0; f < interaction_buf.size(); f += FLOAT::ARITY) {
130 FLOAT q = sqrt(FLOAT(&interaction_buf[f].dist_squared())) / h;
131
132 FLOAT u = 1.0f - q;
133 FLOAT w_0 = u * C_0 / &interaction_buf[f].rho_i() / &interaction_buf[f].rho_j();
134 FLOAT w_p = w_0 * C_p * (FLOAT(&interaction_buf[f].rho_i()) + &interaction_buf[f].rho_j() - 2 * rho0) * u / q;
135 FLOAT w_v = w_0 * C_v;
136 FLOAT delta_v_x = FLOAT(&interaction_buf[f].v_x_i()) - &interaction_buf[f].v_x_j();
137 FLOAT delta_v_y = FLOAT(&interaction_buf[f].v_y_i()) - &interaction_buf[f].v_y_j();
138
139 FLOAT add_x = w_p * &interaction_buf[f].delta_x() + w_v * delta_v_x;
140 FLOAT add_y = w_p * &interaction_buf[f].delta_y() + w_v * delta_v_y;
141 // scalar store to avoid simultaneous overwrites:
142 for (int i = 0; i < FLOAT::ARITY; ++i) {
143 float add_x_scalar = get(add_x, i);
144 float add_y_scalar = get(add_y, i);
145 (&particles.a_x())[interaction_buf[f + i].i()] += add_x_scalar;
146 (&particles.a_y())[interaction_buf[f + i].i()] += add_y_scalar;
147 (&particles.a_x())[interaction_buf[f + i].j()] -= add_x_scalar;
148 (&particles.a_y())[interaction_buf[f + i].j()] -= add_y_scalar;
149 }
150 }
151 interaction_buf.clear();
152 }
153
154 template<typename SOA_ACCESSOR>
compute_accel_lfa(int n,SOA_ACCESSOR particles,float mass,float g,float h,float k,float rho0,float mu)155 void compute_accel_lfa(
156 int n,
157 SOA_ACCESSOR particles,
158 float mass,
159 float g,
160 float h,
161 float k,
162 float rho0,
163 float mu)
164 {
165 typedef typename LibFlatArray::estimate_optimum_short_vec_type<float, SOA_ACCESSOR>::VALUE FLOAT;
166
167 const float h_squared = h * h;
168 const float C_0 = mass / M_PI / (h_squared * h_squared);
169 const float C_p = 15 * k;
170 const float C_v = -40 * mu;
171
172 // gravity:
173 LibFlatArray::loop_peeler<FLOAT>(
174 &particles.index(), long(n),
175 [&particles, g](auto float_var, long *x, long end) {
176
177 typedef decltype(float_var) FLOAT;
178
179 for (; particles.index() < end; particles += FLOAT::ARITY) {
180 &particles.a_x() << FLOAT(0.0f);
181 &particles.a_y() << FLOAT(-g);
182 }
183 });
184
185 particles.index() = 0;
186
187 typedef LibFlatArray::soa_array<InteractionBuffer, FLOAT::ARITY> soa_array;
188 // typedef LibFlatArray::soa_array<InteractionBuffer, 16> soa_array;
189 // std::cout << "buf: " << soa_array::SIZE << "\n";
190 soa_array interaction_buf;
191
192 SOA_ACCESSOR particles_i = particles;
193 SOA_ACCESSOR particles_j = particles;
194
195 // Now compute interaction forces
196 for (particles_i.index() = 0; particles_i.index() < n; ++particles_i) {
197 particles_j.index() = particles_i.index() + 1;
198
199 LibFlatArray::loop_peeler<FLOAT>(
200 &particles_j.index(), n,
201 [&particles, &particles_i, &particles_j, h, rho0, h_squared, C_0, C_p, C_v, &interaction_buf]
202 (auto my_float, long *x, long end) {
203
204 typedef decltype(my_float) FLOAT;
205 FLOAT pos_x_i = particles_i.pos_x();
206 FLOAT pos_y_i = particles_i.pos_y();
207
208 for (; particles_j.index() < end; particles_j += FLOAT::ARITY) {
209 FLOAT delta_x = pos_x_i - &particles_j.pos_x();
210 FLOAT delta_y = pos_y_i - &particles_j.pos_y();
211 FLOAT dist_squared = delta_x * delta_x + delta_y * delta_y;
212
213 if (LibFlatArray::any(dist_squared < FLOAT(h_squared))) {
214 for (int e = 0; e < FLOAT::ARITY; ++e, ++particles_j) {
215 if (get(dist_squared, e) < h_squared) {
216 interaction_buf << InteractionBuffer(
217 particles_i.rho(),
218 particles_i.v_x(),
219 particles_i.v_y(),
220
221 particles_j.rho(),
222 particles_j.v_x(),
223 particles_j.v_y(),
224
225 get(delta_x, e),
226 get(delta_y, e),
227 get(dist_squared, e),
228 particles_i.index(),
229 particles_j.index());
230 }
231 if (interaction_buf.size() == soa_array::SIZE) {
232 handle_interactions<soa_array::SIZE>(particles, interaction_buf, h, rho0, C_0, C_p, C_v);
233 }
234 }
235 particles_j += -FLOAT::ARITY;
236 }
237 }
238 });
239 }
240 handle_interactions<1>(particles, interaction_buf, h, rho0, C_0, C_p, C_v);
241 }
242
243 #endif
244