1 /**
2 * Copyright 2016 Andreas Schäfer
3 *
4 * Distributed under the Boost Software License, Version 1.0. (See accompanying
5 * file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #include <libflatarray/soa_grid.hpp>
9 #include <map>
10
11 #include "test.hpp"
12
13 class ActiveElement
14 {
15 public:
16 __host__
17 __device__
ActiveElement()18 ActiveElement()
19 {
20 val += 100000;
21 }
22
23 __host__
24 __device__
~ActiveElement()25 ~ActiveElement()
26 {
27 val += 1000000;
28 }
29
operator ==(ActiveElement other) const30 inline bool operator==(ActiveElement other) const
31 {
32 return val == other.val;
33 }
34
35 int val;
36 };
37
38 class PassiveElement
39 {
40 public:
operator ==(PassiveElement other) const41 inline bool operator==(PassiveElement other) const
42 {
43 return val == other.val;
44 }
45
46 int val;
47 };
48
49 class ConstructorDestructorTestCellActive
50 {
51 public:
52 inline
ConstructorDestructorTestCellActive(double temperature=0.0,bool alive=false)53 explicit ConstructorDestructorTestCellActive(double temperature=0.0, bool alive=false) :
54 temperature(temperature),
55 alive(alive)
56 {}
57
operator ==(const ConstructorDestructorTestCellActive & other) const58 inline bool operator==(const ConstructorDestructorTestCellActive& other) const
59 {
60 return
61 (temperature == other.temperature) &&
62 (alive == other.alive) &&
63 (element == other.element);
64 }
65
operator !=(const ConstructorDestructorTestCellActive & other) const66 inline bool operator!=(const ConstructorDestructorTestCellActive& other) const
67 {
68 return !(*this == other);
69 }
70
71 double temperature;
72 bool alive;
73 ActiveElement element;
74 };
75
76 class ConstructorDestructorTestCellPassive
77 {
78 public:
79 inline
ConstructorDestructorTestCellPassive(double temperature=0.0,bool alive=false)80 explicit ConstructorDestructorTestCellPassive(double temperature=0.0, bool alive=false) :
81 temperature(temperature),
82 alive(alive)
83 {}
84
operator ==(const ConstructorDestructorTestCellPassive & other) const85 inline bool operator==(const ConstructorDestructorTestCellPassive& other) const
86 {
87 return
88 (temperature == other.temperature) &&
89 (alive == other.alive) &&
90 (element == other.element);
91 }
92
operator !=(const ConstructorDestructorTestCellPassive & other) const93 inline bool operator!=(const ConstructorDestructorTestCellPassive& other) const
94 {
95 return !(*this == other);
96 }
97
98 double temperature;
99 bool alive;
100 PassiveElement element;
101 };
102
103 class CellWithArrayMember
104 {
105 public:
106 __host__
107 __device__
108 inline
CellWithArrayMember(int j=0)109 explicit CellWithArrayMember(int j = 0) :
110 j(j)
111 {
112 i[0] = j + 1;
113 i[1] = j + 2;
114 i[2] = j + 3;
115
116 x[0] = j + 0.4;
117 x[1] = j + 0.5;
118 }
119
120 __host__
121 __device__
122 inline
CellWithArrayMember(int newI[3],double newX[2],int j)123 CellWithArrayMember(int newI[3], double newX[2], int j) :
124 j(j)
125 {
126 i[0] = newI[0];
127 i[1] = newI[1];
128 i[1] = newI[2];
129
130 x[0] = newX[0];
131 x[1] = newX[1];
132 }
133
134 int i[3];
135 int j;
136 double x[2];
137 };
138
139 class CellWithActiveArrayMember
140 {
141 public:
142 __host__
143 __device__
144 inline
CellWithActiveArrayMember(int j=0)145 explicit CellWithActiveArrayMember(int j = 0) :
146 j(j)
147 {
148 i[0] = j + 1;
149 i[1] = j + 2;
150 i[2] = j + 3;
151 }
152
153 int i[3];
154 int j;
155 ActiveElement elements[2];
156 };
157
158 class CellWithPassiveArrayMember
159 {
160 public:
161 __host__
162 __device__
163 inline
CellWithPassiveArrayMember(int j=0)164 explicit CellWithPassiveArrayMember(int j = 0) :
165 j(j)
166 {
167 i[0] = j + 1;
168 i[1] = j + 2;
169 i[2] = j + 3;
170 }
171
172 int i[3];
173 int j;
174 PassiveElement elements[2];
175 };
176
177 LIBFLATARRAY_REGISTER_SOA(ConstructorDestructorTestCellActive,
178 ((double)(temperature))
179 ((ActiveElement)(element))
180 ((bool)(alive)) )
181
182 LIBFLATARRAY_REGISTER_SOA(ConstructorDestructorTestCellPassive,
183 ((double)(temperature))
184 ((PassiveElement)(element))
185 ((bool)(alive)) )
186
187 LIBFLATARRAY_REGISTER_SOA(CellWithArrayMember,
188 ((int)(i)(3))
189 ((int)(j))
190 ((double)(x)(2)) )
191
192 LIBFLATARRAY_REGISTER_SOA(CellWithActiveArrayMember,
193 ((int)(i)(3))
194 ((int)(j))
195 ((ActiveElement)(elements)(2)) )
196
197 LIBFLATARRAY_REGISTER_SOA(CellWithPassiveArrayMember,
198 ((int)(i)(3))
199 ((int)(j))
200 ((PassiveElement)(elements)(2)) )
201
202 namespace LibFlatArray {
203
204 std::map<std::size_t, char*> allocation_cache;
205
206 /**
207 * We fake allocation here to make sure our grids in the tests below
208 * get the same pointers. We need this to be sure that we're working
209 * on the same memory region with each.
210 */
211 template<class T>
212 class fake_cuda_allocator
213 {
214 public:
215 typedef ptrdiff_t difference_type;
216 typedef T* pointer;
217 typedef const T* const_pointer;
218 typedef T& reference;
219 typedef const T& const_reference;
220 typedef T value_type;
221
allocate(std::size_t n,const void * =0)222 pointer allocate(std::size_t n, const void* = 0)
223 {
224 if (allocation_cache[n] != 0) {
225 return allocation_cache[n];
226 }
227
228
229 pointer ret = 0;
230 cudaMalloc(&ret, n * sizeof(T));
231 allocation_cache[n] = ret;
232 return ret;
233 }
234
deallocate(pointer p,std::size_t)235 void deallocate(pointer p, std::size_t)
236 {
237 // intentionally left blank
238 }
239
deallocate_all()240 void deallocate_all()
241 {
242 for (typename std::map<std::size_t, pointer>::iterator i = allocation_cache.begin(); i != allocation_cache.end(); ++i) {
243 cudaFree(i->second);
244 i->second = 0;
245 }
246 }
247 };
248
ADD_TEST(TestCUDAConstructionDestruction)249 ADD_TEST(TestCUDAConstructionDestruction)
250 {
251 char *data = 0;
252 {
253 // prep device memory with consecutive numbers:
254 soa_grid<ConstructorDestructorTestCellPassive, fake_cuda_allocator<char>, true> device_grid(20, 10, 5);
255 data = device_grid.data();
256
257 soa_grid<ConstructorDestructorTestCellPassive> host_grid(20, 10, 5);
258 for (int z = 0; z < 5; ++z) {
259 for (int y = 0; y < 10; ++y) {
260 for (int x = 0; x < 20; ++x) {
261 ConstructorDestructorTestCellPassive cell((x + 1) * (y + 1), true);
262 cell.element.val = x + y * 20 + z * 20 * 10;
263 host_grid.set(x, y, z, cell);
264
265 cell = host_grid.get(x, y, z);
266 }
267 }
268 }
269 cudaMemcpy(device_grid.data(), host_grid.data(), device_grid.byte_size(), cudaMemcpyHostToDevice);
270
271 }
272 {
273 // ensure c-tor was run by checking increment on all elements:
274 soa_grid<ConstructorDestructorTestCellActive, fake_cuda_allocator<char>, true> device_grid(20, 10, 5);
275 BOOST_TEST(data == device_grid.data());
276
277 soa_grid<ConstructorDestructorTestCellPassive> host_grid(20, 10, 5);
278 cudaMemcpy(host_grid.data(), device_grid.data(), device_grid.byte_size(), cudaMemcpyDeviceToHost);
279 for (int z = 0; z < 5; ++z) {
280 for (int y = 0; y < 10; ++y) {
281 for (int x = 0; x < 20; ++x) {
282 ConstructorDestructorTestCellPassive cell = host_grid.get(x, y, z);
283 int expected = x + y * 20 + z * 20 * 10 + 100000;
284
285 BOOST_TEST(cell.element.val == expected);
286 BOOST_TEST(cell.temperature == 0);
287 BOOST_TEST(cell.alive == false);
288 }
289 }
290 }
291 }
292 {
293 // ensure d-tor was run by checking increment on all elements:
294 soa_grid<ConstructorDestructorTestCellPassive> host_grid(20, 10, 5);
295 cudaMemcpy(host_grid.data(), data, host_grid.byte_size(), cudaMemcpyDeviceToHost);
296 for (int z = 0; z < 5; ++z) {
297 for (int y = 0; y < 10; ++y) {
298 for (int x = 0; x < 20; ++x) {
299 ConstructorDestructorTestCellPassive cell = host_grid.get(x, y, z);
300 int expected = x + y * 20 + z * 20 * 10 + 1100000;
301
302 BOOST_TEST(cell.element.val == expected);
303 BOOST_TEST(cell.temperature == 0);
304 BOOST_TEST(cell.alive == false);
305 }
306 }
307 }
308 }
309
310 fake_cuda_allocator<char>().deallocate_all();
311 }
312
ADD_TEST(TestCUDAGetSetSingleElements)313 ADD_TEST(TestCUDAGetSetSingleElements)
314 {
315 soa_grid<ConstructorDestructorTestCellPassive, cuda_allocator<char>, true> device_grid(40, 13, 8);
316
317 for (int z = 0; z < 8; ++z) {
318 for (int y = 0; y < 13; ++y) {
319 for (int x = 0; x < 40; ++x) {
320 ConstructorDestructorTestCellPassive cell((x + 2) * (y + 2), true);
321 cell.element.val = 10000 + x + y * 40 + z * 40 * 13;
322 device_grid.set(x, y, z, cell);
323 }
324 }
325 }
326
327 for (int z = 0; z < 8; ++z) {
328 for (int y = 0; y < 13; ++y) {
329 for (int x = 0; x < 40; ++x) {
330 ConstructorDestructorTestCellPassive cell = device_grid.get(x, y, z);
331
332 int expected = 10000 + x + y * 40 + z * 40 * 13;
333 BOOST_TEST(cell.element.val == expected);
334 BOOST_TEST(cell.temperature == ((x + 2) * (y + 2)));
335 BOOST_TEST(cell.alive == true);
336 }
337 }
338 }
339 }
340
ADD_TEST(TestCUDAGetSetMultipleElements)341 ADD_TEST(TestCUDAGetSetMultipleElements)
342 {
343 soa_grid<ConstructorDestructorTestCellPassive, cuda_allocator<char>, true> device_grid(35, 25, 15);
344
345 for (int z = 0; z < 15; ++z) {
346 for (int y = 0; y < 25; ++y) {
347 std::vector<ConstructorDestructorTestCellPassive> cells(35);
348 for (int x = 0; x < 35; ++x) {
349 cells[x].alive = x % 2;
350 cells[x].temperature = x * y * z;
351 cells[x].element.val = 20000 + x + y * 35 + z * 35 * 25;
352 }
353
354 device_grid.set(0, y, z, cells.data(), 35);
355 }
356 }
357
358 for (int z = 0; z < 15; ++z) {
359 for (int y = 0; y < 25; ++y) {
360 std::vector<ConstructorDestructorTestCellPassive> cells(35);
361 device_grid.get(0, y, z, cells.data(), 35);
362
363 for (int x = 0; x < 35; ++x) {
364 int expected = 20000 + x + y * 35 + z * 35 * 25;
365
366 BOOST_TEST(cells[x].element.val == expected);
367 BOOST_TEST(cells[x].alive == (x % 2));
368 BOOST_TEST(cells[x].temperature == (x * y * z));
369 }
370 }
371 }
372 }
373
ADD_TEST(TestCUDALoadSaveElements)374 ADD_TEST(TestCUDALoadSaveElements)
375 {
376 soa_grid<ConstructorDestructorTestCellPassive> host_grid(21, 10, 9);
377 for (int z = 0; z < 9; ++z) {
378 for (int y = 0; y < 10; ++y) {
379 for (int x = 0; x < 21; ++x) {
380 ConstructorDestructorTestCellPassive cell;
381 cell.alive = ((x % 3) == 0);
382 cell.temperature = x * y * z * -1;
383 cell.element.val = 30000 + x + y * 21 + z * 21 * 10;
384 host_grid.set(x, y, z, cell);
385 }
386 }
387 }
388
389 std::vector<char> buffer(10 * aggregated_member_size<ConstructorDestructorTestCellPassive>::VALUE);
390 host_grid.save(11, 9, 8, buffer.data(), 10);
391
392 soa_grid<ConstructorDestructorTestCellPassive, cuda_allocator<char>, true> device_grid(31, 20, 19);
393 device_grid.load(21, 19, 18, buffer.data(), 10);
394
395 for (int i = 0; i < 20; ++i) {
396 ConstructorDestructorTestCellPassive cell;
397 cell.alive = i % 4;
398 cell.temperature = 4711 + i;
399 cell.element.val = 100 * i;
400 device_grid.set(i + 1, 5, 6, cell);
401 }
402
403 buffer.resize(20 * aggregated_member_size<ConstructorDestructorTestCellPassive>::VALUE);
404 device_grid.save(1, 5, 6, buffer.data(), 20);
405
406 // very load:
407 soa_grid<ConstructorDestructorTestCellPassive> host_grid2(31, 20, 19);
408 cudaMemcpy(host_grid2.data(), device_grid.data(), device_grid.byte_size(), cudaMemcpyDeviceToHost);
409
410 for (int i = 0; i < 10; ++i) {
411 ConstructorDestructorTestCellPassive cell = host_grid2.get(21 + i, 19, 18);
412
413 bool expectedAlive = (((i + 11) % 3) == 0);
414 double expectedTemperature = (11 + i) * 9 * 8 * -1;
415 int expectedVal = 30000 + (11 + i) + 9 * 21 + 8 * 21 * 10;
416
417 BOOST_TEST(cell.alive == expectedAlive);
418 BOOST_TEST(cell.temperature == expectedTemperature);
419 BOOST_TEST(cell.element.val == expectedVal);
420 }
421
422 // verify save:
423 double *temperature = (double*)(buffer.data() + 0 * 20);
424 int *val = (int*) (buffer.data() + 8 * 20);
425 bool *alive = (bool*) (buffer.data() + 12 * 20);
426
427 for (int i = 0; i < 20; ++i) {
428 bool expectedAlive = i % 4;
429 double expectedTemperature = 4711 + i;
430 int expectedVal = i * 100;
431
432 BOOST_TEST(expectedAlive == alive[i]);
433 BOOST_TEST(expectedTemperature == temperature[i]);
434 BOOST_TEST(expectedVal == val[i]);
435 }
436
437 // sanity check:
438 cudaDeviceSynchronize();
439 cudaError_t error = cudaGetLastError();
440 if (error != cudaSuccess) {
441 std::cerr << "ERROR: " << cudaGetErrorString(error) << "\n";
442 throw std::runtime_error("CUDA error");
443 }
444 }
445
ADD_TEST(TestCUDAArrayMembersGetSet)446 ADD_TEST(TestCUDAArrayMembersGetSet)
447 {
448 // test set/get single elements:
449 soa_grid<CellWithArrayMember, cuda_allocator<char>, true> device_grid(12, 23, 34);
450
451 for (int z = 0; z < 34; ++z) {
452 for (int y = 0; y < 23; ++y) {
453 for (int x = 0; x < 12; ++x) {
454 CellWithArrayMember cell;
455 cell.i[0] = x;
456 cell.i[1] = y;
457 cell.i[2] = z;
458 cell.j = x * y * z;
459 cell.x[0] = x + y + 0.1;
460 cell.x[1] = y + z + 0.2;
461
462 device_grid.set(x, y, z, cell);
463 }
464 }
465 }
466
467 for (int z = 0; z < 34; ++z) {
468 for (int y = 0; y < 23; ++y) {
469 for (int x = 0; x < 12; ++x) {
470 int expectedCellI0 = x;
471 int expectedCellI1 = y;
472 int expectedCellI2 = z;
473 int expectedCellJ = x * y * z;
474 double expectedCellX0 = x + y + 0.1;
475 double expectedCellX1 = y + z + 0.2;
476
477 CellWithArrayMember cell = device_grid.get(x, y, z);
478
479 BOOST_TEST(expectedCellI0 == cell.i[0]);
480 BOOST_TEST(expectedCellI1 == cell.i[1]);
481 BOOST_TEST(expectedCellI2 == cell.i[2]);
482
483 BOOST_TEST(expectedCellJ == cell.j);
484
485 BOOST_TEST(expectedCellX0 == cell.x[0]);
486 BOOST_TEST(expectedCellX1 == cell.x[1]);
487 }
488 }
489 }
490 }
491
ADD_TEST(TestCUDAArrayMembersGetSetMultiple)492 ADD_TEST(TestCUDAArrayMembersGetSetMultiple)
493 {
494 // test set/get single elements:
495 soa_grid<CellWithArrayMember, cuda_allocator<char>, true> device_grid(40, 23, 34);
496
497 for (int z = 0; z < 34; ++z) {
498 for (int y = 0; y < 23; ++y) {
499 CellWithArrayMember cells[40];
500 for (int x = 0; x < 40; ++x) {
501 cells[x].i[0] = x;
502 cells[x].i[1] = y;
503 cells[x].i[2] = z;
504 cells[x].j = x * y * z;
505 cells[x].x[0] = x + y + 0.1;
506 cells[x].x[1] = y + z + 0.2;
507 }
508
509 device_grid.set(0, y, z, cells, 40);
510 }
511 }
512
513 for (int z = 0; z < 34; ++z) {
514 for (int y = 0; y < 23; ++y) {
515 CellWithArrayMember cells[40];
516 device_grid.get(0, y, z, cells, 40);
517
518 for (int x = 0; x < 40; ++x) {
519 int expectedCellI0 = x;
520 int expectedCellI1 = y;
521 int expectedCellI2 = z;
522 int expectedCellJ = x * y * z;
523 double expectedCellX0 = x + y + 0.1;
524 double expectedCellX1 = y + z + 0.2;
525
526 BOOST_TEST(expectedCellI0 == cells[x].i[0]);
527 BOOST_TEST(expectedCellI1 == cells[x].i[1]);
528 BOOST_TEST(expectedCellI2 == cells[x].i[2]);
529
530 BOOST_TEST(expectedCellJ == cells[x].j);
531
532 BOOST_TEST(expectedCellX0 == cells[x].x[0]);
533 BOOST_TEST(expectedCellX1 == cells[x].x[1]);
534 }
535 }
536 }
537 }
538
ADD_TEST(TestCUDAArrayMembersConstructDestruct)539 ADD_TEST(TestCUDAArrayMembersConstructDestruct)
540 {
541 char *data = 0;
542 {
543 // prep device memory with consecutive numbers:
544 soa_grid<CellWithPassiveArrayMember, fake_cuda_allocator<char>, true> device_grid(8, 9, 13);
545 data = device_grid.data();
546
547 soa_grid<CellWithPassiveArrayMember> host_grid(8, 9, 13);
548 for (int z = 0; z < 13; ++z) {
549 for (int y = 0; y < 9; ++y) {
550 for (int x = 0; x < 8; ++x) {
551 CellWithPassiveArrayMember cell((x + 1) * (y + 1));
552 cell.elements[0].val = 40000 + x + y * 8 + z * 8 * 9;
553 cell.elements[1].val = 50000 + x + y * 8 + z * 8 * 9;
554 host_grid.set(x, y, z, cell);
555
556 cell = host_grid.get(x, y, z);
557 }
558 }
559 }
560 cudaMemcpy(device_grid.data(), host_grid.data(), device_grid.byte_size(), cudaMemcpyHostToDevice);
561
562 }
563 {
564 // ensure c-tor was run by checking increment on all elements:
565 soa_grid<CellWithActiveArrayMember, fake_cuda_allocator<char>, true> device_grid(8, 9, 13);
566 BOOST_TEST(data == device_grid.data());
567
568 soa_grid<CellWithPassiveArrayMember> host_grid(8, 9, 13);
569 cudaMemcpy(host_grid.data(), device_grid.data(), device_grid.byte_size(), cudaMemcpyDeviceToHost);
570 for (int z = 0; z < 13; ++z) {
571 for (int y = 0; y < 9; ++y) {
572 for (int x = 0; x < 8; ++x) {
573 CellWithPassiveArrayMember cell = host_grid.get(x, y, z);
574 int expected0 = 40000 + x + y * 8 + z * 8 * 9 + 100000;
575 int expected1 = 50000 + x + y * 8 + z * 8 * 9 + 100000;
576
577 BOOST_TEST(cell.elements[0].val == expected0);
578 BOOST_TEST(cell.elements[1].val == expected1);
579
580 BOOST_TEST(cell.i[0] == 0);
581 BOOST_TEST(cell.i[1] == 0);
582 BOOST_TEST(cell.i[2] == 0);
583 }
584 }
585 }
586 }
587 {
588 // ensure d-tor was run by checking increment on all elements:
589 soa_grid<CellWithPassiveArrayMember> host_grid(8, 9, 13);
590 cudaMemcpy(host_grid.data(), data, host_grid.byte_size(), cudaMemcpyDeviceToHost);
591 for (int z = 0; z < 13; ++z) {
592 for (int y = 0; y < 9; ++y) {
593 for (int x = 0; x < 8; ++x) {
594 CellWithPassiveArrayMember cell = host_grid.get(x, y, z);
595 int expected0 = 40000 + x + y * 8 + z * 8 * 9 + 1100000;
596 int expected1 = 50000 + x + y * 8 + z * 8 * 9 + 1100000;
597
598 BOOST_TEST(cell.elements[0].val == expected0);
599 BOOST_TEST(cell.elements[1].val == expected1);
600
601 BOOST_TEST(cell.i[0] == 0);
602 BOOST_TEST(cell.i[1] == 0);
603 BOOST_TEST(cell.i[2] == 0);
604 }
605 }
606 }
607 }
608
609 fake_cuda_allocator<char>().deallocate_all();
610 }
611
ADD_TEST(TestCUDAArrayMembersLoadSave)612 ADD_TEST(TestCUDAArrayMembersLoadSave)
613 {
614 soa_grid<CellWithPassiveArrayMember, cuda_allocator<char>, true> device_grid(45, 35, 25);
615 for (int z = 0; z < 25; ++z) {
616 for (int y = 0; y < 35; ++y) {
617 for (int x = 0; x < 45; ++x) {
618 CellWithPassiveArrayMember cell;
619 cell.i[0] = x;
620 cell.i[1] = y;
621 cell.i[2] = z;
622 cell.j = x * y * z;
623 cell.elements[0].val = 4711 + x * y;
624 cell.elements[1].val = 666 + y * z;
625
626 device_grid.set(x, y, z, cell);
627 }
628 }
629 }
630
631 std::vector<char> buffer(aggregated_member_size<CellWithPassiveArrayMember>::VALUE * 33);
632 device_grid.save(12, 34, 24, buffer.data(), 33);
633
634 soa_grid<CellWithPassiveArrayMember, cuda_allocator<char>, true> device_grid2(35, 20, 5);
635 device_grid2.load(2, 19, 4, buffer.data(), 33);
636
637 for (int x = 0; x < 33; ++x) {
638 CellWithPassiveArrayMember cell = device_grid2.get(x + 2, 19, 4);
639
640 int expectedI0 = x + 12;
641 int expectedI1 = 34;
642 int expectedI2 = 24;
643
644 int expectedJ = (x + 12) * 34 * 24;
645
646 int expectedElements0 = 4711 + (x + 12) * 34;
647 int expectedElements1 = 666 + 34 * 24;
648
649 BOOST_TEST(cell.i[0] == expectedI0);
650 BOOST_TEST(cell.i[1] == expectedI1);
651 BOOST_TEST(cell.i[2] == expectedI2);
652
653 BOOST_TEST(cell.j == expectedJ);
654
655 BOOST_TEST(cell.elements[0].val == expectedElements0);
656 BOOST_TEST(cell.elements[1].val == expectedElements1);
657 }
658 }
659
660 }
661
main(int argc,char ** argv)662 int main(int argc, char **argv)
663 {
664 return 0;
665 }
666