1 // Copyright 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stdio.h>
16 
17 #include <algorithm>
18 
19 #include "hwy/base.h"
20 
21 // Based on A.7 in "Entwurf und Implementierung vektorisierter
22 // Sortieralgorithmen" and code by Mark Blacher.
PrintMergeNetwork16x2()23 void PrintMergeNetwork16x2() {
24   for (int i = 8; i < 16; ++i) {
25     printf("v%x = st.SwapAdjacent(d, v%x);\n", i, i);
26   }
27   for (int i = 0; i < 8; ++i) {
28     printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
29   }
30   for (int i = 0; i < 4; ++i) {
31     printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 4, i + 4);
32     printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 12, i + 12);
33   }
34   for (int i = 0; i < 4; ++i) {
35     printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
36     printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
37   }
38   for (int i = 0; i < 16; i += 4) {
39     printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 2, i + 2);
40     printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 3, i + 3);
41   }
42   for (int i = 0; i < 16; i += 4) {
43     printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
44     printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
45   }
46   for (int i = 0; i < 16; i += 2) {
47     printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 1, i + 1);
48   }
49   for (int i = 0; i < 16; i += 2) {
50     printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
51   }
52   for (int i = 0; i < 16; ++i) {
53     printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
54   }
55   printf("\n");
56 }
57 
PrintMergeNetwork16x4()58 void PrintMergeNetwork16x4() {
59   printf("\n");
60 
61   for (int i = 8; i < 16; ++i) {
62     printf("v%x = st.Reverse4(d, v%x);\n", i, i);
63   }
64   for (int i = 0; i < 8; ++i) {
65     printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
66   }
67   for (int i = 0; i < 4; ++i) {
68     printf("v%x = st.Reverse4(d, v%x);\n", i + 4, i + 4);
69     printf("v%x = st.Reverse4(d, v%x);\n", i + 12, i + 12);
70   }
71   for (int i = 0; i < 4; ++i) {
72     printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
73     printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
74   }
75   for (int i = 0; i < 16; i += 4) {
76     printf("v%x = st.Reverse4(d, v%x);\n", i + 2, i + 2);
77     printf("v%x = st.Reverse4(d, v%x);\n", i + 3, i + 3);
78   }
79   for (int i = 0; i < 16; i += 4) {
80     printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
81     printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
82   }
83   for (int i = 0; i < 16; i += 2) {
84     printf("v%x = st.Reverse4(d, v%x);\n", i + 1, i + 1);
85   }
86   for (int i = 0; i < 16; i += 2) {
87     printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
88   }
89   for (int i = 0; i < 16; ++i) {
90     printf("v%x = st.SortPairsReverse4(d, v%x);\n", i, i);
91   }
92   for (int i = 0; i < 16; ++i) {
93     printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
94   }
95 }
96 
PrintMergeNetwork16x8()97 void PrintMergeNetwork16x8() {
98   printf("\n");
99 
100   for (int i = 8; i < 16; ++i) {
101     printf("v%x = st.ReverseKeys8(d, v%x);\n", i, i);
102   }
103   for (int i = 0; i < 8; ++i) {
104     printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
105   }
106   for (int i = 0; i < 4; ++i) {
107     printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 4, i + 4);
108     printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 12, i + 12);
109   }
110   for (int i = 0; i < 4; ++i) {
111     printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
112     printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
113   }
114   for (int i = 0; i < 16; i += 4) {
115     printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 2, i + 2);
116     printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 3, i + 3);
117   }
118   for (int i = 0; i < 16; i += 4) {
119     printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
120     printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
121   }
122   for (int i = 0; i < 16; i += 2) {
123     printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 1, i + 1);
124   }
125   for (int i = 0; i < 16; i += 2) {
126     printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
127   }
128   for (int i = 0; i < 16; ++i) {
129     printf("v%x = st.SortPairsReverse8(d, v%x);\n", i, i);
130   }
131   for (int i = 0; i < 16; ++i) {
132     printf("v%x = st.SortPairsDistance2<kOrder>(d, v%x);\n", i, i);
133   }
134   for (int i = 0; i < 16; ++i) {
135     printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
136   }
137 }
138 
PrintMergeNetwork16x16()139 void PrintMergeNetwork16x16() {
140   printf("\n");
141 
142   for (int i = 8; i < 16; ++i) {
143     printf("v%x = st.ReverseKeys16(d, v%x);\n", i, i);
144   }
145   for (int i = 0; i < 8; ++i) {
146     printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
147   }
148   for (int i = 0; i < 4; ++i) {
149     printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 4, i + 4);
150     printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 12, i + 12);
151   }
152   for (int i = 0; i < 4; ++i) {
153     printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
154     printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
155   }
156   for (int i = 0; i < 16; i += 4) {
157     printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 2, i + 2);
158     printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 3, i + 3);
159   }
160   for (int i = 0; i < 16; i += 4) {
161     printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
162     printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
163   }
164   for (int i = 0; i < 16; i += 2) {
165     printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 1, i + 1);
166   }
167   for (int i = 0; i < 16; i += 2) {
168     printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
169   }
170   for (int i = 0; i < 16; ++i) {
171     printf("v%x = st.SortPairsReverse16<kOrder>(d, v%x);\n", i, i);
172   }
173   for (int i = 0; i < 16; ++i) {
174     printf("v%x = st.SortPairsDistance4<kOrder>(d, v%x);\n", i, i);
175   }
176   for (int i = 0; i < 16; ++i) {
177     printf("v%x = st.SortPairsDistance2<kOrder>(d, v%x);\n", i, i);
178   }
179   for (int i = 0; i < 16; ++i) {
180     printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
181   }
182 }
183 
main(int argc,char ** argv)184 int main(int argc, char** argv) {
185   PrintMergeNetwork16x2();
186   PrintMergeNetwork16x4();
187   PrintMergeNetwork16x8();
188   PrintMergeNetwork16x16();
189   return 0;
190 }
191