1 // Copyright 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <stdio.h>
16
17 #include <algorithm>
18
19 #include "hwy/base.h"
20
21 // Based on A.7 in "Entwurf und Implementierung vektorisierter
22 // Sortieralgorithmen" and code by Mark Blacher.
PrintMergeNetwork16x2()23 void PrintMergeNetwork16x2() {
24 for (int i = 8; i < 16; ++i) {
25 printf("v%x = st.SwapAdjacent(d, v%x);\n", i, i);
26 }
27 for (int i = 0; i < 8; ++i) {
28 printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
29 }
30 for (int i = 0; i < 4; ++i) {
31 printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 4, i + 4);
32 printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 12, i + 12);
33 }
34 for (int i = 0; i < 4; ++i) {
35 printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
36 printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
37 }
38 for (int i = 0; i < 16; i += 4) {
39 printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 2, i + 2);
40 printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 3, i + 3);
41 }
42 for (int i = 0; i < 16; i += 4) {
43 printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
44 printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
45 }
46 for (int i = 0; i < 16; i += 2) {
47 printf("v%x = st.SwapAdjacent(d, v%x);\n", i + 1, i + 1);
48 }
49 for (int i = 0; i < 16; i += 2) {
50 printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
51 }
52 for (int i = 0; i < 16; ++i) {
53 printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
54 }
55 printf("\n");
56 }
57
PrintMergeNetwork16x4()58 void PrintMergeNetwork16x4() {
59 printf("\n");
60
61 for (int i = 8; i < 16; ++i) {
62 printf("v%x = st.Reverse4(d, v%x);\n", i, i);
63 }
64 for (int i = 0; i < 8; ++i) {
65 printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
66 }
67 for (int i = 0; i < 4; ++i) {
68 printf("v%x = st.Reverse4(d, v%x);\n", i + 4, i + 4);
69 printf("v%x = st.Reverse4(d, v%x);\n", i + 12, i + 12);
70 }
71 for (int i = 0; i < 4; ++i) {
72 printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
73 printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
74 }
75 for (int i = 0; i < 16; i += 4) {
76 printf("v%x = st.Reverse4(d, v%x);\n", i + 2, i + 2);
77 printf("v%x = st.Reverse4(d, v%x);\n", i + 3, i + 3);
78 }
79 for (int i = 0; i < 16; i += 4) {
80 printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
81 printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
82 }
83 for (int i = 0; i < 16; i += 2) {
84 printf("v%x = st.Reverse4(d, v%x);\n", i + 1, i + 1);
85 }
86 for (int i = 0; i < 16; i += 2) {
87 printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
88 }
89 for (int i = 0; i < 16; ++i) {
90 printf("v%x = st.SortPairsReverse4(d, v%x);\n", i, i);
91 }
92 for (int i = 0; i < 16; ++i) {
93 printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
94 }
95 }
96
PrintMergeNetwork16x8()97 void PrintMergeNetwork16x8() {
98 printf("\n");
99
100 for (int i = 8; i < 16; ++i) {
101 printf("v%x = st.ReverseKeys8(d, v%x);\n", i, i);
102 }
103 for (int i = 0; i < 8; ++i) {
104 printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
105 }
106 for (int i = 0; i < 4; ++i) {
107 printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 4, i + 4);
108 printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 12, i + 12);
109 }
110 for (int i = 0; i < 4; ++i) {
111 printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
112 printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
113 }
114 for (int i = 0; i < 16; i += 4) {
115 printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 2, i + 2);
116 printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 3, i + 3);
117 }
118 for (int i = 0; i < 16; i += 4) {
119 printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
120 printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
121 }
122 for (int i = 0; i < 16; i += 2) {
123 printf("v%x = st.ReverseKeys8(d, v%x);\n", i + 1, i + 1);
124 }
125 for (int i = 0; i < 16; i += 2) {
126 printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
127 }
128 for (int i = 0; i < 16; ++i) {
129 printf("v%x = st.SortPairsReverse8(d, v%x);\n", i, i);
130 }
131 for (int i = 0; i < 16; ++i) {
132 printf("v%x = st.SortPairsDistance2<kOrder>(d, v%x);\n", i, i);
133 }
134 for (int i = 0; i < 16; ++i) {
135 printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
136 }
137 }
138
PrintMergeNetwork16x16()139 void PrintMergeNetwork16x16() {
140 printf("\n");
141
142 for (int i = 8; i < 16; ++i) {
143 printf("v%x = st.ReverseKeys16(d, v%x);\n", i, i);
144 }
145 for (int i = 0; i < 8; ++i) {
146 printf("st.Sort2(d, v%x, v%x);\n", i, 15 - i);
147 }
148 for (int i = 0; i < 4; ++i) {
149 printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 4, i + 4);
150 printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 12, i + 12);
151 }
152 for (int i = 0; i < 4; ++i) {
153 printf("st.Sort2(d, v%x, v%x);\n", i, 7 - i);
154 printf("st.Sort2(d, v%x, v%x);\n", i + 8, 15 - i);
155 }
156 for (int i = 0; i < 16; i += 4) {
157 printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 2, i + 2);
158 printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 3, i + 3);
159 }
160 for (int i = 0; i < 16; i += 4) {
161 printf("st.Sort2(d, v%x, v%x);\n", i, i + 3);
162 printf("st.Sort2(d, v%x, v%x);\n", i + 1, i + 2);
163 }
164 for (int i = 0; i < 16; i += 2) {
165 printf("v%x = st.ReverseKeys16(d, v%x);\n", i + 1, i + 1);
166 }
167 for (int i = 0; i < 16; i += 2) {
168 printf("st.Sort2(d, v%x, v%x);\n", i, i + 1);
169 }
170 for (int i = 0; i < 16; ++i) {
171 printf("v%x = st.SortPairsReverse16<kOrder>(d, v%x);\n", i, i);
172 }
173 for (int i = 0; i < 16; ++i) {
174 printf("v%x = st.SortPairsDistance4<kOrder>(d, v%x);\n", i, i);
175 }
176 for (int i = 0; i < 16; ++i) {
177 printf("v%x = st.SortPairsDistance2<kOrder>(d, v%x);\n", i, i);
178 }
179 for (int i = 0; i < 16; ++i) {
180 printf("v%x = st.SortPairsDistance1<kOrder>(d, v%x);\n", i, i);
181 }
182 }
183
main(int argc,char ** argv)184 int main(int argc, char** argv) {
185 PrintMergeNetwork16x2();
186 PrintMergeNetwork16x4();
187 PrintMergeNetwork16x8();
188 PrintMergeNetwork16x16();
189 return 0;
190 }
191