10ac341f1SConrad Meyer /*
20ac341f1SConrad Meyer    BLAKE2 reference source code package - optimized C implementations
30ac341f1SConrad Meyer 
40ac341f1SConrad Meyer    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
50ac341f1SConrad Meyer 
60ac341f1SConrad Meyer    To the extent possible under law, the author(s) have dedicated all copyright
70ac341f1SConrad Meyer    and related and neighboring rights to this software to the public domain
80ac341f1SConrad Meyer    worldwide. This software is distributed without any warranty.
90ac341f1SConrad Meyer 
100ac341f1SConrad Meyer    You should have received a copy of the CC0 Public Domain Dedication along
110ac341f1SConrad Meyer    with
120ac341f1SConrad Meyer    this software. If not, see
130ac341f1SConrad Meyer    <http://creativecommons.org/publicdomain/zero/1.0/>.
140ac341f1SConrad Meyer */
150ac341f1SConrad Meyer 
160ac341f1SConrad Meyer #ifndef blake2b_load_sse2_H
170ac341f1SConrad Meyer #define blake2b_load_sse2_H
180ac341f1SConrad Meyer 
190ac341f1SConrad Meyer #define LOAD_MSG_0_1(b0, b1)     \
200ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m2, m0); \
210ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m6, m4)
220ac341f1SConrad Meyer #define LOAD_MSG_0_2(b0, b1)     \
230ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m3, m1); \
240ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m7, m5)
250ac341f1SConrad Meyer #define LOAD_MSG_0_3(b0, b1)      \
260ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m10, m8); \
270ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m14, m12)
280ac341f1SConrad Meyer #define LOAD_MSG_0_4(b0, b1)      \
290ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m11, m9); \
300ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m15, m13)
310ac341f1SConrad Meyer #define LOAD_MSG_1_1(b0, b1)      \
320ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m4, m14); \
330ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m13, m9)
340ac341f1SConrad Meyer #define LOAD_MSG_1_2(b0, b1)      \
350ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m8, m10); \
360ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m6, m15)
370ac341f1SConrad Meyer #define LOAD_MSG_1_3(b0, b1)     \
380ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m0, m1); \
390ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m5, m11)
400ac341f1SConrad Meyer #define LOAD_MSG_1_4(b0, b1)      \
410ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m2, m12); \
420ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m3, m7)
430ac341f1SConrad Meyer #define LOAD_MSG_2_1(b0, b1)       \
440ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m12, m11); \
450ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m15, m5)
460ac341f1SConrad Meyer #define LOAD_MSG_2_2(b0, b1)     \
470ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m0, m8); \
480ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m13, m2)
490ac341f1SConrad Meyer #define LOAD_MSG_2_3(b0, b1)      \
500ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m3, m10); \
510ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m9, m7)
520ac341f1SConrad Meyer #define LOAD_MSG_2_4(b0, b1)      \
530ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m6, m14); \
540ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m4, m1)
550ac341f1SConrad Meyer #define LOAD_MSG_3_1(b0, b1)     \
560ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m3, m7); \
570ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m11, m13)
580ac341f1SConrad Meyer #define LOAD_MSG_3_2(b0, b1)     \
590ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m1, m9); \
600ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m14, m12)
610ac341f1SConrad Meyer #define LOAD_MSG_3_3(b0, b1)     \
620ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m5, m2); \
630ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m15, m4)
640ac341f1SConrad Meyer #define LOAD_MSG_3_4(b0, b1)      \
650ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m10, m6); \
660ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m8, m0)
670ac341f1SConrad Meyer #define LOAD_MSG_4_1(b0, b1)     \
680ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m5, m9); \
690ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m10, m2)
700ac341f1SConrad Meyer #define LOAD_MSG_4_2(b0, b1)     \
710ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m7, m0); \
720ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m15, m4)
730ac341f1SConrad Meyer #define LOAD_MSG_4_3(b0, b1)       \
740ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m11, m14); \
750ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m3, m6)
760ac341f1SConrad Meyer #define LOAD_MSG_4_4(b0, b1)      \
770ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m12, m1); \
780ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m13, m8)
790ac341f1SConrad Meyer #define LOAD_MSG_5_1(b0, b1)     \
800ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m6, m2); \
810ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m8, m0)
820ac341f1SConrad Meyer #define LOAD_MSG_5_2(b0, b1)       \
830ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m10, m12); \
840ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m3, m11)
850ac341f1SConrad Meyer #define LOAD_MSG_5_3(b0, b1)     \
860ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m7, m4); \
870ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m1, m15)
880ac341f1SConrad Meyer #define LOAD_MSG_5_4(b0, b1)      \
890ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m5, m13); \
900ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m9, m14)
910ac341f1SConrad Meyer #define LOAD_MSG_6_1(b0, b1)      \
920ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m1, m12); \
930ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m4, m14)
940ac341f1SConrad Meyer #define LOAD_MSG_6_2(b0, b1)      \
950ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m15, m5); \
960ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m10, m13)
970ac341f1SConrad Meyer #define LOAD_MSG_6_3(b0, b1)     \
980ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m6, m0); \
990ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m8, m9)
1000ac341f1SConrad Meyer #define LOAD_MSG_6_4(b0, b1)     \
1010ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m3, m7); \
1020ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m11, m2)
1030ac341f1SConrad Meyer #define LOAD_MSG_7_1(b0, b1)      \
1040ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m7, m13); \
1050ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m3, m12)
1060ac341f1SConrad Meyer #define LOAD_MSG_7_2(b0, b1)       \
1070ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m14, m11); \
1080ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m9, m1)
1090ac341f1SConrad Meyer #define LOAD_MSG_7_3(b0, b1)      \
1100ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m15, m5); \
1110ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m2, m8)
1120ac341f1SConrad Meyer #define LOAD_MSG_7_4(b0, b1)     \
1130ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m4, m0); \
1140ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m10, m6)
1150ac341f1SConrad Meyer #define LOAD_MSG_8_1(b0, b1)      \
1160ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m14, m6); \
1170ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m0, m11)
1180ac341f1SConrad Meyer #define LOAD_MSG_8_2(b0, b1)      \
1190ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m9, m15); \
1200ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m8, m3)
1210ac341f1SConrad Meyer #define LOAD_MSG_8_3(b0, b1)       \
1220ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m13, m12); \
1230ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m10, m1)
1240ac341f1SConrad Meyer #define LOAD_MSG_8_4(b0, b1)     \
1250ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m7, m2); \
1260ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m5, m4)
1270ac341f1SConrad Meyer #define LOAD_MSG_9_1(b0, b1)      \
1280ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m8, m10); \
1290ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m1, m7)
1300ac341f1SConrad Meyer #define LOAD_MSG_9_2(b0, b1)     \
1310ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m4, m2); \
1320ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m5, m6)
1330ac341f1SConrad Meyer #define LOAD_MSG_9_3(b0, b1)      \
1340ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m9, m15); \
1350ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m13, m3)
1360ac341f1SConrad Meyer #define LOAD_MSG_9_4(b0, b1)       \
1370ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m14, m11); \
1380ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m0, m12)
1390ac341f1SConrad Meyer #define LOAD_MSG_10_1(b0, b1)    \
1400ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m2, m0); \
1410ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m6, m4)
1420ac341f1SConrad Meyer #define LOAD_MSG_10_2(b0, b1)    \
1430ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m3, m1); \
1440ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m7, m5)
1450ac341f1SConrad Meyer #define LOAD_MSG_10_3(b0, b1)     \
1460ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m10, m8); \
1470ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m14, m12)
1480ac341f1SConrad Meyer #define LOAD_MSG_10_4(b0, b1)     \
1490ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m11, m9); \
1500ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m15, m13)
1510ac341f1SConrad Meyer #define LOAD_MSG_11_1(b0, b1)     \
1520ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m4, m14); \
1530ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m13, m9)
1540ac341f1SConrad Meyer #define LOAD_MSG_11_2(b0, b1)     \
1550ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m8, m10); \
1560ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m6, m15)
1570ac341f1SConrad Meyer #define LOAD_MSG_11_3(b0, b1)    \
1580ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m0, m1); \
1590ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m5, m11)
1600ac341f1SConrad Meyer #define LOAD_MSG_11_4(b0, b1)     \
1610ac341f1SConrad Meyer     b0 = _mm_set_epi64x(m2, m12); \
1620ac341f1SConrad Meyer     b1 = _mm_set_epi64x(m3, m7)
1630ac341f1SConrad Meyer 
1640ac341f1SConrad Meyer #endif
165