1 /*
2    BLAKE2 reference source code package - optimized C implementations
3 
4    Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
5    terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6    your option.  The terms of these licenses can be found at:
7 
8    - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9    - OpenSSL license   : https://www.openssl.org/source/license.html
10    - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
11 
12    More information about the BLAKE2 hash function can be found at
13    https://blake2.net.
14 */
15 #ifndef BLAKE2B_LOAD_SSE2_H
16 #define BLAKE2B_LOAD_SSE2_H
17 
18 #define vec_merge_hi(a, b) vec_mergeh(a,b)
19 #define vec_merge_hi_lo(a, b) vec_mergeh(a,(uint64x2_p)vec_sld((uint8x16_p)b,(uint8x16_p)b,8))
20 #define vec_merge_lo(a, b) vec_mergel(a,b)
21 
22 #if defined(NATIVE_BIG_ENDIAN)
23 # define vec_shl_8(a,b) (uint64x2_p)vec_sld((uint8x16_p)a, (uint8x16_p)b, 8);
24 #else
25 # define vec_shl_8(a,b) (uint64x2_p)vec_sld((uint8x16_p)b, (uint8x16_p)a, 16-8);
26 #endif
27 
28 #define LOAD_MSG_0_1(b0, b1) \
29 do { \
30 	 b0 = vec_merge_hi(m0, m1); \
31 	 b1 = vec_merge_hi(m2, m3); \
32 } while(0)
33 
34 #define LOAD_MSG_0_2(b0, b1) \
35 do { \
36 	 b0 = vec_merge_lo(m0, m1); \
37 	 b1 = vec_merge_lo(m2, m3); \
38 } while(0)
39 
40 #define LOAD_MSG_0_3(b0, b1) \
41 do { \
42 	 b0 = vec_merge_hi(m4, m5); \
43 	 b1 = vec_merge_hi(m6, m7); \
44 } while(0)
45 
46 #define LOAD_MSG_0_4(b0, b1) \
47 do { \
48 	 b0 = vec_merge_lo(m4, m5); \
49 	 b1 = vec_merge_lo(m6, m7); \
50 } while(0)
51 
52 #define LOAD_MSG_1_1(b0, b1) \
53 do { \
54 	 b0 = vec_merge_hi(m7, m2); \
55 	 b1 = vec_merge_lo(m4, m6); \
56 } while(0)
57 
58 #define LOAD_MSG_1_2(b0, b1) \
59 do { \
60 	 b0 = vec_merge_hi(m5, m4); \
61 	 b1 = vec_shl_8(m7, m3); \
62 } while(0)
63 
64 #define LOAD_MSG_1_3(b0, b1) \
65 do { \
66 	 b0 = vec_shl_8(m0, m0); \
67 	 b1 = vec_merge_lo(m5, m2); \
68 } while(0)
69 
70 #define LOAD_MSG_1_4(b0, b1) \
71 do { \
72 	 b0 = vec_merge_hi(m6, m1); \
73 	 b1 = vec_merge_lo(m3, m1); \
74 } while(0)
75 
76 #define LOAD_MSG_2_1(b0, b1) \
77 do { \
78 	 b0 = vec_shl_8(m5, m6); \
79 	 b1 = vec_merge_lo(m2, m7); \
80 } while(0)
81 
82 #define LOAD_MSG_2_2(b0, b1) \
83 do { \
84 	 b0 = vec_merge_hi(m4, m0); \
85 	 b1 = vec_merge_hi_lo(m1, m6); \
86 } while(0)
87 
88 #define LOAD_MSG_2_3(b0, b1) \
89    do { \
90 	 b0 = vec_merge_hi_lo(m5, m1); \
91 	 b1 = vec_merge_lo(m3, m4); \
92 } while(0)
93 
94 #define LOAD_MSG_2_4(b0, b1) \
95    do { \
96 	 b0 = vec_merge_hi(m7, m3); \
97 	 b1 = vec_shl_8(m0, m2); \
98 } while(0)
99 
100 #define LOAD_MSG_3_1(b0, b1) \
101    do { \
102 	 b0 = vec_merge_lo(m3, m1); \
103 	 b1 = vec_merge_lo(m6, m5); \
104 } while(0)
105 
106 #define LOAD_MSG_3_2(b0, b1) \
107    do { \
108 	 b0 = vec_merge_lo(m4, m0); \
109 	 b1 = vec_merge_hi(m6, m7); \
110 } while(0)
111 
112 #define LOAD_MSG_3_3(b0, b1) \
113    do { \
114 	 b0 = vec_merge_hi_lo(m1, m2); \
115 	 b1 = vec_merge_hi_lo(m2, m7); \
116 } while(0)
117 
118 #define LOAD_MSG_3_4(b0, b1) \
119    do { \
120 	 b0 = vec_merge_hi(m3, m5); \
121 	 b1 = vec_merge_hi(m0, m4); \
122 } while(0)
123 
124 #define LOAD_MSG_4_1(b0, b1) \
125    do { \
126 	 b0 = vec_merge_lo(m4, m2); \
127 	 b1 = vec_merge_hi(m1, m5); \
128 } while(0)
129 
130 #define LOAD_MSG_4_2(b0, b1) \
131    do { \
132 	 b0 = vec_merge_hi_lo(m0, m3); \
133 	 b1 = vec_merge_hi_lo(m2, m7); \
134 } while(0)
135 
136 #define LOAD_MSG_4_3(b0, b1) \
137    do { \
138 	 b0 = vec_merge_hi_lo(m7, m5); \
139 	 b1 = vec_merge_hi_lo(m3, m1); \
140 } while(0)
141 
142 #define LOAD_MSG_4_4(b0, b1) \
143    do { \
144 	 b0 = vec_shl_8(m0, m6); \
145 	 b1 = vec_merge_hi_lo(m4, m6); \
146 } while(0)
147 
148 #define LOAD_MSG_5_1(b0, b1) \
149    do { \
150 	 b0 = vec_merge_hi(m1, m3); \
151 	 b1 = vec_merge_hi(m0, m4); \
152 } while(0)
153 
154 #define LOAD_MSG_5_2(b0, b1) \
155    do { \
156 	 b0 = vec_merge_hi(m6, m5); \
157 	 b1 = vec_merge_lo(m5, m1); \
158 } while(0)
159 
160 #define LOAD_MSG_5_3(b0, b1) \
161    do { \
162 	 b0 = vec_merge_hi_lo(m2, m3); \
163 	 b1 = vec_merge_lo(m7, m0); \
164 } while(0)
165 
166 #define LOAD_MSG_5_4(b0, b1) \
167    do { \
168 	 b0 = vec_merge_lo(m6, m2); \
169 	 b1 = vec_merge_hi_lo(m7, m4); \
170 } while(0)
171 
172 #define LOAD_MSG_6_1(b0, b1) \
173    do { \
174 	 b0 = vec_merge_hi_lo(m6, m0); \
175 	 b1 = vec_merge_hi(m7, m2); \
176 } while(0)
177 
178 #define LOAD_MSG_6_2(b0, b1) \
179    do { \
180 	 b0 = vec_merge_lo(m2, m7); \
181 	 b1 = vec_shl_8(m6, m5); \
182 } while(0)
183 
184 #define LOAD_MSG_6_3(b0, b1) \
185    do { \
186 	 b0 = vec_merge_hi(m0, m3); \
187 	 b1 = vec_shl_8(m4, m4); \
188 } while(0)
189 
190 #define LOAD_MSG_6_4(b0, b1) \
191    do { \
192 	 b0 = vec_merge_lo(m3, m1); \
193 	 b1 = vec_merge_hi_lo(m1, m5); \
194 } while(0)
195 
196 #define LOAD_MSG_7_1(b0, b1) \
197    do { \
198 	 b0 = vec_merge_lo(m6, m3); \
199 	 b1 = vec_merge_hi_lo(m6, m1); \
200 } while(0)
201 
202 #define LOAD_MSG_7_2(b0, b1) \
203    do { \
204 	 b0 = vec_shl_8(m5, m7); \
205 	 b1 = vec_merge_lo(m0, m4); \
206 } while(0)
207 
208 #define LOAD_MSG_7_3(b0, b1) \
209    do { \
210 	 b0 = vec_merge_lo(m2, m7); \
211 	 b1 = vec_merge_hi(m4, m1); \
212 } while(0)
213 
214 #define LOAD_MSG_7_4(b0, b1) \
215    do { \
216 	 b0 = vec_merge_hi(m0, m2); \
217 	 b1 = vec_merge_hi(m3, m5); \
218 } while(0)
219 
220 #define LOAD_MSG_8_1(b0, b1) \
221    do { \
222 	 b0 = vec_merge_hi(m3, m7); \
223 	 b1 = vec_shl_8(m5, m0); \
224 } while(0)
225 
226 #define LOAD_MSG_8_2(b0, b1) \
227    do { \
228 	 b0 = vec_merge_lo(m7, m4); \
229 	 b1 = vec_shl_8(m1, m4); \
230 } while(0)
231 
232 #define LOAD_MSG_8_3(b0, b1) \
233    do { \
234 	 b0 = m6; \
235 	 b1 = vec_shl_8(m0, m5); \
236 } while(0)
237 
238 #define LOAD_MSG_8_4(b0, b1) \
239    do { \
240 	 b0 = vec_merge_hi_lo(m1, m3); \
241 	 b1 = m2; \
242 } while(0)
243 
244 #define LOAD_MSG_9_1(b0, b1) \
245    do { \
246 	 b0 = vec_merge_hi(m5, m4); \
247 	 b1 = vec_merge_lo(m3, m0); \
248 } while(0)
249 
250 #define LOAD_MSG_9_2(b0, b1) \
251    do { \
252 	 b0 = vec_merge_hi(m1, m2); \
253 	 b1 = vec_merge_hi_lo(m3, m2); \
254 } while(0)
255 
256 #define LOAD_MSG_9_3(b0, b1) \
257    do { \
258 	 b0 = vec_merge_lo(m7, m4); \
259 	 b1 = vec_merge_lo(m1, m6); \
260 } while(0)
261 
262 #define LOAD_MSG_9_4(b0, b1) \
263    do { \
264 	 b0 = vec_shl_8(m5, m7); \
265 	 b1 = vec_merge_hi(m6, m0); \
266 } while(0)
267 
268 #define LOAD_MSG_10_1(b0, b1) \
269    do { \
270 	 b0 = vec_merge_hi(m0, m1); \
271 	 b1 = vec_merge_hi(m2, m3); \
272 } while(0)
273 
274 #define LOAD_MSG_10_2(b0, b1) \
275    do { \
276 	 b0 = vec_merge_lo(m0, m1); \
277 	 b1 = vec_merge_lo(m2, m3); \
278 } while(0)
279 
280 #define LOAD_MSG_10_3(b0, b1) \
281    do { \
282 	 b0 = vec_merge_hi(m4, m5); \
283 	 b1 = vec_merge_hi(m6, m7); \
284 } while(0)
285 
286 #define LOAD_MSG_10_4(b0, b1) \
287    do { \
288 	 b0 = vec_merge_lo(m4, m5); \
289 	 b1 = vec_merge_lo(m6, m7); \
290 } while(0)
291 
292 #define LOAD_MSG_11_1(b0, b1) \
293    do { \
294 	 b0 = vec_merge_hi(m7, m2); \
295 	 b1 = vec_merge_lo(m4, m6); \
296 } while(0)
297 
298 #define LOAD_MSG_11_2(b0, b1) \
299    do { \
300 	 b0 = vec_merge_hi(m5, m4); \
301 	 b1 = vec_shl_8(m7, m3); \
302 } while(0)
303 
304 #define LOAD_MSG_11_3(b0, b1) \
305    do { \
306 	 b0 = vec_shl_8(m0, m0); \
307 	 b1 = vec_merge_lo(m5, m2); \
308 } while(0)
309 
310 #define LOAD_MSG_11_4(b0, b1) \
311    do { \
312 	 b0 = vec_merge_hi(m6, m1); \
313 	 b1 = vec_merge_lo(m3, m1); \
314 } while(0)
315 
316 #endif
317