1 /*
2    BLAKE2 reference source code package - optimized C implementations
3 
4    Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
5    terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6    your option.  The terms of these licenses can be found at:
7 
8    - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9    - OpenSSL license   : https://www.openssl.org/source/license.html
10    - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
11 
12    More information about the BLAKE2 hash function can be found at
13    https://blake2.net.
14 */
15 #ifndef BLAKE2B_LOAD_SSE41_H
16 #define BLAKE2B_LOAD_SSE41_H
17 
18 #define LOAD_MSG_0_1(b0, b1) \
19 do \
20 { \
21 b0 = _mm_unpacklo_epi64(m0, m1); \
22 b1 = _mm_unpacklo_epi64(m2, m3); \
23 } while(0)
24 
25 
26 #define LOAD_MSG_0_2(b0, b1) \
27 do \
28 { \
29 b0 = _mm_unpackhi_epi64(m0, m1); \
30 b1 = _mm_unpackhi_epi64(m2, m3); \
31 } while(0)
32 
33 
34 #define LOAD_MSG_0_3(b0, b1) \
35 do \
36 { \
37 b0 = _mm_unpacklo_epi64(m4, m5); \
38 b1 = _mm_unpacklo_epi64(m6, m7); \
39 } while(0)
40 
41 
42 #define LOAD_MSG_0_4(b0, b1) \
43 do \
44 { \
45 b0 = _mm_unpackhi_epi64(m4, m5); \
46 b1 = _mm_unpackhi_epi64(m6, m7); \
47 } while(0)
48 
49 
50 #define LOAD_MSG_1_1(b0, b1) \
51 do \
52 { \
53 b0 = _mm_unpacklo_epi64(m7, m2); \
54 b1 = _mm_unpackhi_epi64(m4, m6); \
55 } while(0)
56 
57 
58 #define LOAD_MSG_1_2(b0, b1) \
59 do \
60 { \
61 b0 = _mm_unpacklo_epi64(m5, m4); \
62 b1 = _mm_alignr_epi8(m3, m7, 8); \
63 } while(0)
64 
65 
66 #define LOAD_MSG_1_3(b0, b1) \
67 do \
68 { \
69 b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
70 b1 = _mm_unpackhi_epi64(m5, m2); \
71 } while(0)
72 
73 
74 #define LOAD_MSG_1_4(b0, b1) \
75 do \
76 { \
77 b0 = _mm_unpacklo_epi64(m6, m1); \
78 b1 = _mm_unpackhi_epi64(m3, m1); \
79 } while(0)
80 
81 
82 #define LOAD_MSG_2_1(b0, b1) \
83 do \
84 { \
85 b0 = _mm_alignr_epi8(m6, m5, 8); \
86 b1 = _mm_unpackhi_epi64(m2, m7); \
87 } while(0)
88 
89 
90 #define LOAD_MSG_2_2(b0, b1) \
91 do \
92 { \
93 b0 = _mm_unpacklo_epi64(m4, m0); \
94 b1 = _mm_blend_epi16(m1, m6, 0xF0); \
95 } while(0)
96 
97 
98 #define LOAD_MSG_2_3(b0, b1) \
99 do \
100 { \
101 b0 = _mm_blend_epi16(m5, m1, 0xF0); \
102 b1 = _mm_unpackhi_epi64(m3, m4); \
103 } while(0)
104 
105 
106 #define LOAD_MSG_2_4(b0, b1) \
107 do \
108 { \
109 b0 = _mm_unpacklo_epi64(m7, m3); \
110 b1 = _mm_alignr_epi8(m2, m0, 8); \
111 } while(0)
112 
113 
114 #define LOAD_MSG_3_1(b0, b1) \
115 do \
116 { \
117 b0 = _mm_unpackhi_epi64(m3, m1); \
118 b1 = _mm_unpackhi_epi64(m6, m5); \
119 } while(0)
120 
121 
122 #define LOAD_MSG_3_2(b0, b1) \
123 do \
124 { \
125 b0 = _mm_unpackhi_epi64(m4, m0); \
126 b1 = _mm_unpacklo_epi64(m6, m7); \
127 } while(0)
128 
129 
130 #define LOAD_MSG_3_3(b0, b1) \
131 do \
132 { \
133 b0 = _mm_blend_epi16(m1, m2, 0xF0); \
134 b1 = _mm_blend_epi16(m2, m7, 0xF0); \
135 } while(0)
136 
137 
138 #define LOAD_MSG_3_4(b0, b1) \
139 do \
140 { \
141 b0 = _mm_unpacklo_epi64(m3, m5); \
142 b1 = _mm_unpacklo_epi64(m0, m4); \
143 } while(0)
144 
145 
146 #define LOAD_MSG_4_1(b0, b1) \
147 do \
148 { \
149 b0 = _mm_unpackhi_epi64(m4, m2); \
150 b1 = _mm_unpacklo_epi64(m1, m5); \
151 } while(0)
152 
153 
154 #define LOAD_MSG_4_2(b0, b1) \
155 do \
156 { \
157 b0 = _mm_blend_epi16(m0, m3, 0xF0); \
158 b1 = _mm_blend_epi16(m2, m7, 0xF0); \
159 } while(0)
160 
161 
162 #define LOAD_MSG_4_3(b0, b1) \
163 do \
164 { \
165 b0 = _mm_blend_epi16(m7, m5, 0xF0); \
166 b1 = _mm_blend_epi16(m3, m1, 0xF0); \
167 } while(0)
168 
169 
170 #define LOAD_MSG_4_4(b0, b1) \
171 do \
172 { \
173 b0 = _mm_alignr_epi8(m6, m0, 8); \
174 b1 = _mm_blend_epi16(m4, m6, 0xF0); \
175 } while(0)
176 
177 
178 #define LOAD_MSG_5_1(b0, b1) \
179 do \
180 { \
181 b0 = _mm_unpacklo_epi64(m1, m3); \
182 b1 = _mm_unpacklo_epi64(m0, m4); \
183 } while(0)
184 
185 
186 #define LOAD_MSG_5_2(b0, b1) \
187 do \
188 { \
189 b0 = _mm_unpacklo_epi64(m6, m5); \
190 b1 = _mm_unpackhi_epi64(m5, m1); \
191 } while(0)
192 
193 
194 #define LOAD_MSG_5_3(b0, b1) \
195 do \
196 { \
197 b0 = _mm_blend_epi16(m2, m3, 0xF0); \
198 b1 = _mm_unpackhi_epi64(m7, m0); \
199 } while(0)
200 
201 
202 #define LOAD_MSG_5_4(b0, b1) \
203 do \
204 { \
205 b0 = _mm_unpackhi_epi64(m6, m2); \
206 b1 = _mm_blend_epi16(m7, m4, 0xF0); \
207 } while(0)
208 
209 
210 #define LOAD_MSG_6_1(b0, b1) \
211 do \
212 { \
213 b0 = _mm_blend_epi16(m6, m0, 0xF0); \
214 b1 = _mm_unpacklo_epi64(m7, m2); \
215 } while(0)
216 
217 
218 #define LOAD_MSG_6_2(b0, b1) \
219 do \
220 { \
221 b0 = _mm_unpackhi_epi64(m2, m7); \
222 b1 = _mm_alignr_epi8(m5, m6, 8); \
223 } while(0)
224 
225 
226 #define LOAD_MSG_6_3(b0, b1) \
227 do \
228 { \
229 b0 = _mm_unpacklo_epi64(m0, m3); \
230 b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
231 } while(0)
232 
233 
234 #define LOAD_MSG_6_4(b0, b1) \
235 do \
236 { \
237 b0 = _mm_unpackhi_epi64(m3, m1); \
238 b1 = _mm_blend_epi16(m1, m5, 0xF0); \
239 } while(0)
240 
241 
242 #define LOAD_MSG_7_1(b0, b1) \
243 do \
244 { \
245 b0 = _mm_unpackhi_epi64(m6, m3); \
246 b1 = _mm_blend_epi16(m6, m1, 0xF0); \
247 } while(0)
248 
249 
250 #define LOAD_MSG_7_2(b0, b1) \
251 do \
252 { \
253 b0 = _mm_alignr_epi8(m7, m5, 8); \
254 b1 = _mm_unpackhi_epi64(m0, m4); \
255 } while(0)
256 
257 
258 #define LOAD_MSG_7_3(b0, b1) \
259 do \
260 { \
261 b0 = _mm_unpackhi_epi64(m2, m7); \
262 b1 = _mm_unpacklo_epi64(m4, m1); \
263 } while(0)
264 
265 
266 #define LOAD_MSG_7_4(b0, b1) \
267 do \
268 { \
269 b0 = _mm_unpacklo_epi64(m0, m2); \
270 b1 = _mm_unpacklo_epi64(m3, m5); \
271 } while(0)
272 
273 
274 #define LOAD_MSG_8_1(b0, b1) \
275 do \
276 { \
277 b0 = _mm_unpacklo_epi64(m3, m7); \
278 b1 = _mm_alignr_epi8(m0, m5, 8); \
279 } while(0)
280 
281 
282 #define LOAD_MSG_8_2(b0, b1) \
283 do \
284 { \
285 b0 = _mm_unpackhi_epi64(m7, m4); \
286 b1 = _mm_alignr_epi8(m4, m1, 8); \
287 } while(0)
288 
289 
290 #define LOAD_MSG_8_3(b0, b1) \
291 do \
292 { \
293 b0 = m6; \
294 b1 = _mm_alignr_epi8(m5, m0, 8); \
295 } while(0)
296 
297 
298 #define LOAD_MSG_8_4(b0, b1) \
299 do \
300 { \
301 b0 = _mm_blend_epi16(m1, m3, 0xF0); \
302 b1 = m2; \
303 } while(0)
304 
305 
306 #define LOAD_MSG_9_1(b0, b1) \
307 do \
308 { \
309 b0 = _mm_unpacklo_epi64(m5, m4); \
310 b1 = _mm_unpackhi_epi64(m3, m0); \
311 } while(0)
312 
313 
314 #define LOAD_MSG_9_2(b0, b1) \
315 do \
316 { \
317 b0 = _mm_unpacklo_epi64(m1, m2); \
318 b1 = _mm_blend_epi16(m3, m2, 0xF0); \
319 } while(0)
320 
321 
322 #define LOAD_MSG_9_3(b0, b1) \
323 do \
324 { \
325 b0 = _mm_unpackhi_epi64(m7, m4); \
326 b1 = _mm_unpackhi_epi64(m1, m6); \
327 } while(0)
328 
329 
330 #define LOAD_MSG_9_4(b0, b1) \
331 do \
332 { \
333 b0 = _mm_alignr_epi8(m7, m5, 8); \
334 b1 = _mm_unpacklo_epi64(m6, m0); \
335 } while(0)
336 
337 
338 #define LOAD_MSG_10_1(b0, b1) \
339 do \
340 { \
341 b0 = _mm_unpacklo_epi64(m0, m1); \
342 b1 = _mm_unpacklo_epi64(m2, m3); \
343 } while(0)
344 
345 
346 #define LOAD_MSG_10_2(b0, b1) \
347 do \
348 { \
349 b0 = _mm_unpackhi_epi64(m0, m1); \
350 b1 = _mm_unpackhi_epi64(m2, m3); \
351 } while(0)
352 
353 
354 #define LOAD_MSG_10_3(b0, b1) \
355 do \
356 { \
357 b0 = _mm_unpacklo_epi64(m4, m5); \
358 b1 = _mm_unpacklo_epi64(m6, m7); \
359 } while(0)
360 
361 
362 #define LOAD_MSG_10_4(b0, b1) \
363 do \
364 { \
365 b0 = _mm_unpackhi_epi64(m4, m5); \
366 b1 = _mm_unpackhi_epi64(m6, m7); \
367 } while(0)
368 
369 
370 #define LOAD_MSG_11_1(b0, b1) \
371 do \
372 { \
373 b0 = _mm_unpacklo_epi64(m7, m2); \
374 b1 = _mm_unpackhi_epi64(m4, m6); \
375 } while(0)
376 
377 
378 #define LOAD_MSG_11_2(b0, b1) \
379 do \
380 { \
381 b0 = _mm_unpacklo_epi64(m5, m4); \
382 b1 = _mm_alignr_epi8(m3, m7, 8); \
383 } while(0)
384 
385 
386 #define LOAD_MSG_11_3(b0, b1) \
387 do \
388 { \
389 b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
390 b1 = _mm_unpackhi_epi64(m5, m2); \
391 } while(0)
392 
393 
394 #define LOAD_MSG_11_4(b0, b1) \
395 do \
396 { \
397 b0 = _mm_unpacklo_epi64(m6, m1); \
398 b1 = _mm_unpackhi_epi64(m3, m1); \
399 } while(0)
400 
401 
402 #endif
403