1 //****************************************************************************//
2 // Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
3 // //
4 // Distributed under the terms of the GNU General Public License (GPL) //
5 // //
6 // This code is distributed in the hope that it will be useful, //
7 // but WITHOUT ANY WARRANTY; without even the implied warranty of //
8 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
9 // General Public License for more details. //
10 // //
11 // The full text of the GPL is available at: //
12 // //
13 // http://www.gnu.org/licenses/ //
14 //****************************************************************************//
15
16 #include <algorithm>
17 #include <array>
18 #include <chrono>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <iomanip>
22 #include <iostream>
23 #include <vector>
24 #include <x86intrin.h>
25
26 #include "epu.hpp"
27
28 using namespace std;
29 using namespace std::chrono;
30 using namespace HPCombi;
31
32 #define ASSERT(test) if (!(test)) cout << "Test failed in file " << __FILE__ \
33 << " line " << __LINE__ << ": " #test << endl
34
rand_sample(size_t sz)35 std::vector<epu8> rand_sample(size_t sz) {
36 std::vector<epu8> res;
37 for (size_t i=0; i < sz; i++)
38 res.push_back(random_epu8(256));
39 return res;
40 }
41
rand_perm()42 inline epu8 rand_perm() {
43 epu8 res = epu8id;
44 auto &ar = as_array(res);
45 std::random_shuffle(ar.begin(), ar.end());
46 return res;
47 }
48
rand_perms(int sz)49 std::vector<epu8> rand_perms(int sz) {
50 std::vector<epu8> res(sz);
51 std::srand(std::time(0));
52 for (int i = 0; i < sz; i++)
53 res[i] = rand_perm();
54 return res;
55 }
56
57 template <typename Func>
timethat(Func fun,int rep=1,double reftime=0)58 double timethat(Func fun, int rep = 1, double reftime = 0) {
59 using std::chrono::duration;
60 using std::chrono::duration_cast;
61 using std::chrono::high_resolution_clock;
62 auto tstart = high_resolution_clock::now();
63 for (int i = 0; i < rep; i++)
64 fun();
65 auto tfin = high_resolution_clock::now();
66
67 auto tm = duration_cast<duration<double>>(tfin - tstart);
68 std::cout << "time = " << std::fixed << std::setprecision(6) << tm.count()
69 << "s";
70 if (reftime != 0)
71 std::cout << ", speedup = " << std::setprecision(3) << reftime / tm.count();
72 std::cout << std::endl;
73 return tm.count();
74 }
75
76 struct RoundsMask {
77 // commented out due to a bug in gcc
RoundsMaskRoundsMask78 /* constexpr */ RoundsMask() : arr() {
79 for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i)
80 arr[i] = HPCombi::sorting_rounds[i] < epu8id;
81 }
82 epu8 arr[HPCombi::sorting_rounds.size()];
83 };
84
85 const auto rounds_mask = RoundsMask();
86
sort_pair(epu8 a)87 inline epu8 sort_pair(epu8 a) {
88 for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) {
89 epu8 minab, maxab, b = permuted(a, HPCombi::sorting_rounds[i]);
90 minab = _mm_min_epi8(a, b);
91 maxab = _mm_max_epi8(a, b);
92 a = _mm_blendv_epi8(minab, maxab, rounds_mask.arr[i]);
93 }
94 return a;
95 }
96
sort_odd_even(epu8 a)97 inline epu8 sort_odd_even(epu8 a) {
98 const uint8_t FF = 0xff;
99 static const epu8 even =
100 {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
101 static const epu8 odd =
102 {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15};
103 static const epu8 mask =
104 {0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF};
105 epu8 b, minab, maxab;
106 for (unsigned i = 0; i < 8; ++i) {
107 b = permuted(a, even);
108 minab = _mm_min_epi8(a, b);
109 maxab = _mm_max_epi8(a, b);
110 a = _mm_blendv_epi8(minab, maxab, mask);
111 b = permuted(a, odd);
112 minab = _mm_min_epi8(a, b);
113 maxab = _mm_max_epi8(a, b);
114 a = _mm_blendv_epi8(maxab, minab, mask);
115 }
116 return a;
117 }
118
insertion_sort(epu8 p)119 inline epu8 insertion_sort(epu8 p) {
120 auto &a = HPCombi::as_array(p);
121 for (int i = 0; i < 16; i++)
122 for (int j = i; j > 0 && a[j] < a[j - 1]; j--)
123 std::swap(a[j], a[j - 1]);
124 return p;
125 }
126
radix_sort(epu8 p)127 inline epu8 radix_sort(epu8 p) {
128 auto &a = HPCombi::as_array(p);
129 std::array<uint8_t, 16> stat {};
130 for (int i = 0; i < 16; i++)
131 stat[a[i]]++;
132 int c = 0;
133 for (int i = 0; i < 16; i++)
134 for (int j = 0; j < stat[i]; j++)
135 a[c++] = i;
136 return p;
137 }
138
main()139 int main() {
140 // epu8 a = { 5, 4,12,15,10, 8, 9, 2, 3,13,14, 0, 1, 7,11, 6};
141
142 auto vrand = rand_perms(1000);
143 int rep = 10000;
144 cout << "Std lib: ";
145 double reftime = timethat(
146 [vrand]() {
147 for (epu8 v : vrand) {
148 auto &ar = as_array(v);
149 std::sort(ar.begin(), ar.end());
150 ASSERT(equal(v, epu8id)); // avoid optimization
151 }
152 },
153 rep);
154 cout << "OddEv : ";
155 timethat(
156 [vrand]() {
157 for (epu8 v : vrand)
158 ASSERT(equal(sort_odd_even(v), epu8id));
159 },
160 rep, reftime);
161 cout << "Insert : ";
162 timethat(
163 [vrand]() {
164 for (epu8 v : vrand)
165 ASSERT(equal(insertion_sort(v), epu8id));
166 },
167 rep, reftime);
168 cout << "Radix16: ";
169 timethat(
170 [vrand]() {
171 for (epu8 v : vrand)
172 ASSERT(equal(radix_sort(v), epu8id));
173 },
174 rep, reftime);
175 cout << "Pair : ";
176 timethat(
177 [vrand]() {
178 for (epu8 v : vrand)
179 ASSERT(equal(sort_pair(v), epu8id));
180 },
181 rep, reftime);
182 cout << "Funct : ";
183 timethat(
184 [vrand]() {
185 for (epu8 v : vrand)
186 ASSERT(equal(sorted(v), epu8id));
187 },
188 rep, reftime);
189
190 return EXIT_SUCCESS;
191 }
192