1 #ifndef LIBFLATARRAY_EXAMPLES_SMOOTHED_PARTICLE_HYDRODYNAMICS_KERNELS_HPP
2 #define LIBFLATARRAY_EXAMPLES_SMOOTHED_PARTICLE_HYDRODYNAMICS_KERNELS_HPP
3 
4 #define PI float(M_PI)
5 
6 class InteractionBuffer
7 {
8 public:
9     inline
InteractionBuffer(float rho_i=0,float v_x_i=0,float v_y_i=0,float rho_j=0,float v_x_j=0,float v_y_j=0,float delta_x=0,float delta_y=0,float dist_squared=0,int i=0,int j=0)10     explicit InteractionBuffer(
11         float rho_i = 0,
12         float v_x_i = 0,
13         float v_y_i = 0,
14         float rho_j = 0,
15         float v_x_j = 0,
16         float v_y_j = 0,
17         float delta_x = 0,
18         float delta_y = 0,
19         float dist_squared = 0,
20         int i = 0,
21         int j = 0) :
22         rho_i(rho_i),
23         v_x_i(v_x_i),
24         v_y_i(v_y_i),
25         rho_j(rho_j),
26         v_x_j(v_x_j),
27         v_y_j(v_y_j),
28         delta_x(delta_x),
29         delta_y(delta_y),
30         dist_squared(dist_squared),
31         i(i),
32         j(j)
33     {}
34 
35     float rho_i;
36     float v_x_i;
37     float v_y_i;
38     float rho_j;
39     float v_x_j;
40     float v_y_j;
41     float delta_x;
42     float delta_y;
43     float dist_squared;
44     int i;
45     int j;
46 };
47 LIBFLATARRAY_REGISTER_SOA(
48     InteractionBuffer,
49     ((float)(rho_i))
50     ((float)(v_x_i))
51     ((float)(v_y_i))
52     ((float)(rho_j))
53     ((float)(v_x_j))
54     ((float)(v_y_j))
55     ((float)(delta_x))
56     ((float)(delta_y))
57     ((float)(dist_squared))
58     ((int)(i))
59     ((int)(j)))
60 
61 template<typename CELL, long DIM_X, long DIM_Y, long DIM_Z, long INDEX>
compute_density_lfa(int n,LibFlatArray::soa_accessor<CELL,DIM_X,DIM_Y,DIM_Z,INDEX> particles,float h,float mass)62 void compute_density_lfa(int n, LibFlatArray::soa_accessor<CELL, DIM_X, DIM_Y, DIM_Z, INDEX> particles, float h, float mass)
63 {
64     typedef LibFlatArray::soa_accessor<CELL, DIM_X, DIM_Y, DIM_Z, INDEX> soa_accessor;
65     typedef typename LibFlatArray::estimate_optimum_short_vec_type<float, soa_accessor>::VALUE FLOAT;
66 
67     LibFlatArray::loop_peeler<FLOAT>(&particles.index(), n, [&particles, h, mass](auto my_float, long *i, long end) {
68             typedef decltype(my_float) FLOAT;
69             float h_squared = h * h;
70             FLOAT h_squared_vec(h_squared);
71 
72             for (; particles.index() < end; particles += FLOAT::ARITY) {
73                 &particles.rho() << 4.0f * mass / PI / h_squared_vec;
74             }
75         });
76 
77     particles.index() = 0;
78 
79     float h_squared = h * h;
80     float h_pow_8 = h_squared * h_squared * h_squared * h_squared;
81     float C = 4 * mass / M_PI / h_pow_8;
82 
83     soa_accessor particles_i(particles.data(), 0);
84     soa_accessor particles_j(particles.data(), 0);
85 
86     for (particles_i.index() = 0; particles_i.index() < n; ++particles_i) {
87         float pos_x_i = particles_i.pos_x();
88         float pos_y_i = particles_i.pos_y();
89 
90         particles_j.index() = particles_i.index() + 1;
91 
92         LibFlatArray::loop_peeler<FLOAT>(
93             &particles_j.index(), n,
94             [&particles_i, &particles_j, h, mass, pos_x_i, pos_y_i, C]
95             (auto my_float, long *x, int end)
96             {
97                 typedef decltype(my_float) FLOAT;
98                 float h_squared = h * h;
99                 FLOAT h_squared_vec(h_squared);
100 
101                 for (; particles_j.index() < end;) {
102                     FLOAT delta_x = FLOAT(pos_x_i) - &particles_j.pos_x();
103                     FLOAT delta_y = FLOAT(pos_y_i) - &particles_j.pos_y();
104                     FLOAT dist_squared = delta_x * delta_x + delta_y * delta_y;
105                     FLOAT overlap = h_squared_vec - dist_squared;
106 
107                     if (LibFlatArray::any(overlap > FLOAT(0.0))) {
108                         for (int e = 0; e < FLOAT::ARITY; ++e) {
109                             float o = get(overlap, e);
110                             if (o > 0) {
111                                 float rho_ij = C * o * o * o;
112                                 particles_i.rho() += rho_ij;
113                                 particles_j.rho() += rho_ij;
114                             }
115                             ++particles_j;
116                         }
117                     } else {
118                         particles_j += FLOAT::ARITY;
119                     }
120                 }
121             });
122     }
123 }
124 
125 template<int ARITY, typename SOA_ACCESSOR, typename SOA_ARRAY>
handle_interactions(SOA_ACCESSOR & particles,SOA_ARRAY & interaction_buf,const float h,const float rho0,const float C_0,const float C_p,const float C_v)126 void handle_interactions(SOA_ACCESSOR& particles, SOA_ARRAY& interaction_buf, const float h, const float rho0, const float C_0, const float C_p, const float C_v)
127 {
128     typedef LibFlatArray::short_vec<float, ARITY> FLOAT;
129     for (std::size_t f = 0; f < interaction_buf.size(); f += FLOAT::ARITY) {
130         FLOAT q = sqrt(FLOAT(&interaction_buf[f].dist_squared())) / h;
131 
132         FLOAT u = 1.0f - q;
133         FLOAT w_0 = u * C_0 / &interaction_buf[f].rho_i() / &interaction_buf[f].rho_j();
134         FLOAT w_p = w_0 * C_p * (FLOAT(&interaction_buf[f].rho_i()) + &interaction_buf[f].rho_j() - 2 * rho0) * u / q;
135         FLOAT w_v = w_0 * C_v;
136         FLOAT delta_v_x = FLOAT(&interaction_buf[f].v_x_i()) - &interaction_buf[f].v_x_j();
137         FLOAT delta_v_y = FLOAT(&interaction_buf[f].v_y_i()) - &interaction_buf[f].v_y_j();
138 
139         FLOAT add_x = w_p * &interaction_buf[f].delta_x() + w_v * delta_v_x;
140         FLOAT add_y = w_p * &interaction_buf[f].delta_y() + w_v * delta_v_y;
141         // scalar store to avoid simultaneous overwrites:
142         for (int i = 0; i < FLOAT::ARITY; ++i) {
143             float add_x_scalar = get(add_x, i);
144             float add_y_scalar = get(add_y, i);
145             (&particles.a_x())[interaction_buf[f + i].i()] += add_x_scalar;
146             (&particles.a_y())[interaction_buf[f + i].i()] += add_y_scalar;
147             (&particles.a_x())[interaction_buf[f + i].j()] -= add_x_scalar;
148             (&particles.a_y())[interaction_buf[f + i].j()] -= add_y_scalar;
149         }
150     }
151     interaction_buf.clear();
152 }
153 
154 template<typename SOA_ACCESSOR>
compute_accel_lfa(int n,SOA_ACCESSOR particles,float mass,float g,float h,float k,float rho0,float mu)155 void compute_accel_lfa(
156     int n,
157     SOA_ACCESSOR particles,
158     float mass,
159     float g,
160     float h,
161     float k,
162     float rho0,
163     float mu)
164 {
165     typedef typename LibFlatArray::estimate_optimum_short_vec_type<float, SOA_ACCESSOR>::VALUE FLOAT;
166 
167     const float h_squared = h * h;
168     const float C_0 = mass / M_PI / (h_squared * h_squared);
169     const float C_p = 15 * k;
170     const float C_v = -40 * mu;
171 
172     // gravity:
173     LibFlatArray::loop_peeler<FLOAT>(
174         &particles.index(), long(n),
175         [&particles, g](auto float_var, long *x, long end) {
176 
177             typedef decltype(float_var) FLOAT;
178 
179             for (; particles.index() < end; particles += FLOAT::ARITY) {
180                 &particles.a_x() << FLOAT(0.0f);
181                 &particles.a_y() << FLOAT(-g);
182             }
183         });
184 
185     particles.index() = 0;
186 
187     typedef LibFlatArray::soa_array<InteractionBuffer, FLOAT::ARITY> soa_array;
188     // typedef LibFlatArray::soa_array<InteractionBuffer, 16> soa_array;
189     // std::cout << "buf: " << soa_array::SIZE << "\n";
190     soa_array interaction_buf;
191 
192     SOA_ACCESSOR particles_i = particles;
193     SOA_ACCESSOR particles_j = particles;
194 
195     // Now compute interaction forces
196     for (particles_i.index() = 0; particles_i.index() < n; ++particles_i) {
197         particles_j.index() = particles_i.index() + 1;
198 
199         LibFlatArray::loop_peeler<FLOAT>(
200             &particles_j.index(), n,
201             [&particles, &particles_i, &particles_j, h, rho0, h_squared, C_0, C_p, C_v, &interaction_buf]
202             (auto my_float, long *x, long end) {
203 
204                 typedef decltype(my_float) FLOAT;
205                 FLOAT pos_x_i = particles_i.pos_x();
206                 FLOAT pos_y_i = particles_i.pos_y();
207 
208                 for (; particles_j.index() < end; particles_j += FLOAT::ARITY) {
209                     FLOAT delta_x = pos_x_i - &particles_j.pos_x();
210                     FLOAT delta_y = pos_y_i - &particles_j.pos_y();
211                     FLOAT dist_squared = delta_x * delta_x + delta_y * delta_y;
212 
213                     if (LibFlatArray::any(dist_squared < FLOAT(h_squared))) {
214                         for (int e = 0; e < FLOAT::ARITY; ++e, ++particles_j) {
215                             if (get(dist_squared, e) < h_squared) {
216                                 interaction_buf << InteractionBuffer(
217                                     particles_i.rho(),
218                                     particles_i.v_x(),
219                                     particles_i.v_y(),
220 
221                                     particles_j.rho(),
222                                     particles_j.v_x(),
223                                     particles_j.v_y(),
224 
225                                     get(delta_x, e),
226                                     get(delta_y, e),
227                                     get(dist_squared, e),
228                                     particles_i.index(),
229                                     particles_j.index());
230                             }
231                             if (interaction_buf.size() == soa_array::SIZE) {
232                                 handle_interactions<soa_array::SIZE>(particles, interaction_buf, h, rho0, C_0, C_p, C_v);
233                             }
234                         }
235                         particles_j += -FLOAT::ARITY;
236                     }
237                 }
238             });
239     }
240     handle_interactions<1>(particles, interaction_buf, h, rho0, C_0, C_p, C_v);
241 }
242 
243 #endif
244