1 //****************************************************************************//
2 //       Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>,           //
3 //                                                                            //
4 //  Distributed under the terms of the GNU General Public License (GPL)       //
5 //                                                                            //
6 //    This code is distributed in the hope that it will be useful,            //
7 //    but WITHOUT ANY WARRANTY; without even the implied warranty of          //
8 //    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU       //
9 //   General Public License for more details.                                 //
10 //                                                                            //
11 //  The full text of the GPL is available at:                                 //
12 //                                                                            //
13 //                  http://www.gnu.org/licenses/                              //
14 //****************************************************************************//
15 
16 #include <algorithm>
17 #include <array>
18 #include <chrono>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <iomanip>
22 #include <iostream>
23 #include <vector>
24 #include <x86intrin.h>
25 
26 #include "epu.hpp"
27 
28 using namespace std;
29 using namespace std::chrono;
30 using namespace HPCombi;
31 
32 #define ASSERT(test) if (!(test)) cout << "Test failed in file " << __FILE__ \
33                                        << " line " << __LINE__ << ": " #test << endl
34 
rand_sample(size_t sz)35 std::vector<epu8> rand_sample(size_t sz) {
36     std::vector<epu8> res;
37     for (size_t i=0; i < sz; i++)
38         res.push_back(random_epu8(256));
39     return res;
40 }
41 
rand_perm()42 inline epu8 rand_perm() {
43   epu8 res = epu8id;
44   auto &ar = as_array(res);
45   std::random_shuffle(ar.begin(), ar.end());
46   return res;
47 }
48 
rand_perms(int sz)49 std::vector<epu8> rand_perms(int sz) {
50   std::vector<epu8> res(sz);
51   std::srand(std::time(0));
52   for (int i = 0; i < sz; i++)
53     res[i] = rand_perm();
54   return res;
55 }
56 
57 template <typename Func>
timethat(Func fun,int rep=1,double reftime=0)58 double timethat(Func fun, int rep = 1, double reftime = 0) {
59   using std::chrono::duration;
60   using std::chrono::duration_cast;
61   using std::chrono::high_resolution_clock;
62   auto tstart = high_resolution_clock::now();
63   for (int i = 0; i < rep; i++)
64     fun();
65   auto tfin = high_resolution_clock::now();
66 
67   auto tm = duration_cast<duration<double>>(tfin - tstart);
68   std::cout << "time = " << std::fixed << std::setprecision(6) << tm.count()
69             << "s";
70   if (reftime != 0)
71     std::cout << ", speedup = " << std::setprecision(3) << reftime / tm.count();
72   std::cout << std::endl;
73   return tm.count();
74 }
75 
76 struct RoundsMask {
77   // commented out due to a bug in gcc
RoundsMaskRoundsMask78     /* constexpr */ RoundsMask() : arr() {
79         for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i)
80             arr[i] = HPCombi::sorting_rounds[i] < epu8id;
81     }
82     epu8 arr[HPCombi::sorting_rounds.size()];
83 };
84 
85 const auto rounds_mask = RoundsMask();
86 
sort_pair(epu8 a)87 inline epu8 sort_pair(epu8 a) {
88     for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) {
89         epu8 minab, maxab, b = permuted(a, HPCombi::sorting_rounds[i]);
90         minab = _mm_min_epi8(a, b);
91         maxab = _mm_max_epi8(a, b);
92         a = _mm_blendv_epi8(minab, maxab, rounds_mask.arr[i]);
93     }
94     return a;
95 }
96 
sort_odd_even(epu8 a)97 inline epu8 sort_odd_even(epu8 a) {
98     const uint8_t FF = 0xff;
99     static const epu8 even =
100         {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
101     static const epu8 odd =
102         {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15};
103     static const epu8 mask =
104         {0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF};
105     epu8 b, minab, maxab;
106     for (unsigned i = 0; i < 8; ++i) {
107         b = permuted(a, even);
108         minab = _mm_min_epi8(a, b);
109         maxab = _mm_max_epi8(a, b);
110         a = _mm_blendv_epi8(minab, maxab, mask);
111         b = permuted(a, odd);
112         minab = _mm_min_epi8(a, b);
113         maxab = _mm_max_epi8(a, b);
114         a = _mm_blendv_epi8(maxab, minab, mask);
115     }
116     return a;
117 }
118 
insertion_sort(epu8 p)119 inline epu8 insertion_sort(epu8 p) {
120     auto &a = HPCombi::as_array(p);
121     for (int i = 0; i < 16; i++)
122         for (int j = i; j > 0 && a[j] < a[j - 1]; j--)
123             std::swap(a[j], a[j - 1]);
124     return p;
125 }
126 
radix_sort(epu8 p)127 inline epu8 radix_sort(epu8 p) {
128     auto &a = HPCombi::as_array(p);
129     std::array<uint8_t, 16> stat {};
130     for (int i = 0; i < 16; i++)
131         stat[a[i]]++;
132     int c = 0;
133     for (int i = 0; i < 16; i++)
134         for (int j = 0; j < stat[i]; j++)
135             a[c++] = i;
136     return p;
137 }
138 
main()139 int main() {
140     // epu8 a = { 5, 4,12,15,10, 8, 9, 2, 3,13,14, 0, 1, 7,11, 6};
141 
142     auto vrand = rand_perms(1000);
143     int rep = 10000;
144     cout << "Std lib: ";
145     double reftime = timethat(
146         [vrand]() {
147             for (epu8 v : vrand) {
148                 auto &ar = as_array(v);
149                 std::sort(ar.begin(), ar.end());
150                 ASSERT(equal(v, epu8id));  // avoid optimization
151             }
152         },
153         rep);
154     cout << "OddEv : ";
155     timethat(
156         [vrand]() {
157             for (epu8 v : vrand)
158                 ASSERT(equal(sort_odd_even(v), epu8id));
159         },
160         rep, reftime);
161     cout << "Insert : ";
162     timethat(
163         [vrand]() {
164             for (epu8 v : vrand)
165                 ASSERT(equal(insertion_sort(v), epu8id));
166         },
167         rep, reftime);
168     cout << "Radix16: ";
169     timethat(
170         [vrand]() {
171             for (epu8 v : vrand)
172                 ASSERT(equal(radix_sort(v), epu8id));
173         },
174         rep, reftime);
175     cout << "Pair  : ";
176     timethat(
177         [vrand]() {
178             for (epu8 v : vrand)
179                 ASSERT(equal(sort_pair(v), epu8id));
180         },
181         rep, reftime);
182     cout << "Funct  : ";
183     timethat(
184         [vrand]() {
185             for (epu8 v : vrand)
186                 ASSERT(equal(sorted(v), epu8id));
187         },
188         rep, reftime);
189 
190     return EXIT_SUCCESS;
191 }
192