1*33772c1eSriastradh /*
2*33772c1eSriastradh    BLAKE2 reference source code package - optimized C implementations
3*33772c1eSriastradh 
4*33772c1eSriastradh    Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5*33772c1eSriastradh 
6*33772c1eSriastradh    To the extent possible under law, the author(s) have dedicated all copyright
7*33772c1eSriastradh    and related and neighboring rights to this software to the public domain
8*33772c1eSriastradh    worldwide. This software is distributed without any warranty.
9*33772c1eSriastradh 
10*33772c1eSriastradh    You should have received a copy of the CC0 Public Domain Dedication along
11*33772c1eSriastradh    with
12*33772c1eSriastradh    this software. If not, see
13*33772c1eSriastradh    <http://creativecommons.org/publicdomain/zero/1.0/>.
14*33772c1eSriastradh */
15*33772c1eSriastradh 
16*33772c1eSriastradh #ifndef blake2b_load_sse2_H
17*33772c1eSriastradh #define blake2b_load_sse2_H
18*33772c1eSriastradh 
19*33772c1eSriastradh #define LOAD_MSG_0_1(b0, b1)     \
20*33772c1eSriastradh     b0 = _mm_set_epi64x(m2, m0); \
21*33772c1eSriastradh     b1 = _mm_set_epi64x(m6, m4)
22*33772c1eSriastradh #define LOAD_MSG_0_2(b0, b1)     \
23*33772c1eSriastradh     b0 = _mm_set_epi64x(m3, m1); \
24*33772c1eSriastradh     b1 = _mm_set_epi64x(m7, m5)
25*33772c1eSriastradh #define LOAD_MSG_0_3(b0, b1)      \
26*33772c1eSriastradh     b0 = _mm_set_epi64x(m10, m8); \
27*33772c1eSriastradh     b1 = _mm_set_epi64x(m14, m12)
28*33772c1eSriastradh #define LOAD_MSG_0_4(b0, b1)      \
29*33772c1eSriastradh     b0 = _mm_set_epi64x(m11, m9); \
30*33772c1eSriastradh     b1 = _mm_set_epi64x(m15, m13)
31*33772c1eSriastradh #define LOAD_MSG_1_1(b0, b1)      \
32*33772c1eSriastradh     b0 = _mm_set_epi64x(m4, m14); \
33*33772c1eSriastradh     b1 = _mm_set_epi64x(m13, m9)
34*33772c1eSriastradh #define LOAD_MSG_1_2(b0, b1)      \
35*33772c1eSriastradh     b0 = _mm_set_epi64x(m8, m10); \
36*33772c1eSriastradh     b1 = _mm_set_epi64x(m6, m15)
37*33772c1eSriastradh #define LOAD_MSG_1_3(b0, b1)     \
38*33772c1eSriastradh     b0 = _mm_set_epi64x(m0, m1); \
39*33772c1eSriastradh     b1 = _mm_set_epi64x(m5, m11)
40*33772c1eSriastradh #define LOAD_MSG_1_4(b0, b1)      \
41*33772c1eSriastradh     b0 = _mm_set_epi64x(m2, m12); \
42*33772c1eSriastradh     b1 = _mm_set_epi64x(m3, m7)
43*33772c1eSriastradh #define LOAD_MSG_2_1(b0, b1)       \
44*33772c1eSriastradh     b0 = _mm_set_epi64x(m12, m11); \
45*33772c1eSriastradh     b1 = _mm_set_epi64x(m15, m5)
46*33772c1eSriastradh #define LOAD_MSG_2_2(b0, b1)     \
47*33772c1eSriastradh     b0 = _mm_set_epi64x(m0, m8); \
48*33772c1eSriastradh     b1 = _mm_set_epi64x(m13, m2)
49*33772c1eSriastradh #define LOAD_MSG_2_3(b0, b1)      \
50*33772c1eSriastradh     b0 = _mm_set_epi64x(m3, m10); \
51*33772c1eSriastradh     b1 = _mm_set_epi64x(m9, m7)
52*33772c1eSriastradh #define LOAD_MSG_2_4(b0, b1)      \
53*33772c1eSriastradh     b0 = _mm_set_epi64x(m6, m14); \
54*33772c1eSriastradh     b1 = _mm_set_epi64x(m4, m1)
55*33772c1eSriastradh #define LOAD_MSG_3_1(b0, b1)     \
56*33772c1eSriastradh     b0 = _mm_set_epi64x(m3, m7); \
57*33772c1eSriastradh     b1 = _mm_set_epi64x(m11, m13)
58*33772c1eSriastradh #define LOAD_MSG_3_2(b0, b1)     \
59*33772c1eSriastradh     b0 = _mm_set_epi64x(m1, m9); \
60*33772c1eSriastradh     b1 = _mm_set_epi64x(m14, m12)
61*33772c1eSriastradh #define LOAD_MSG_3_3(b0, b1)     \
62*33772c1eSriastradh     b0 = _mm_set_epi64x(m5, m2); \
63*33772c1eSriastradh     b1 = _mm_set_epi64x(m15, m4)
64*33772c1eSriastradh #define LOAD_MSG_3_4(b0, b1)      \
65*33772c1eSriastradh     b0 = _mm_set_epi64x(m10, m6); \
66*33772c1eSriastradh     b1 = _mm_set_epi64x(m8, m0)
67*33772c1eSriastradh #define LOAD_MSG_4_1(b0, b1)     \
68*33772c1eSriastradh     b0 = _mm_set_epi64x(m5, m9); \
69*33772c1eSriastradh     b1 = _mm_set_epi64x(m10, m2)
70*33772c1eSriastradh #define LOAD_MSG_4_2(b0, b1)     \
71*33772c1eSriastradh     b0 = _mm_set_epi64x(m7, m0); \
72*33772c1eSriastradh     b1 = _mm_set_epi64x(m15, m4)
73*33772c1eSriastradh #define LOAD_MSG_4_3(b0, b1)       \
74*33772c1eSriastradh     b0 = _mm_set_epi64x(m11, m14); \
75*33772c1eSriastradh     b1 = _mm_set_epi64x(m3, m6)
76*33772c1eSriastradh #define LOAD_MSG_4_4(b0, b1)      \
77*33772c1eSriastradh     b0 = _mm_set_epi64x(m12, m1); \
78*33772c1eSriastradh     b1 = _mm_set_epi64x(m13, m8)
79*33772c1eSriastradh #define LOAD_MSG_5_1(b0, b1)     \
80*33772c1eSriastradh     b0 = _mm_set_epi64x(m6, m2); \
81*33772c1eSriastradh     b1 = _mm_set_epi64x(m8, m0)
82*33772c1eSriastradh #define LOAD_MSG_5_2(b0, b1)       \
83*33772c1eSriastradh     b0 = _mm_set_epi64x(m10, m12); \
84*33772c1eSriastradh     b1 = _mm_set_epi64x(m3, m11)
85*33772c1eSriastradh #define LOAD_MSG_5_3(b0, b1)     \
86*33772c1eSriastradh     b0 = _mm_set_epi64x(m7, m4); \
87*33772c1eSriastradh     b1 = _mm_set_epi64x(m1, m15)
88*33772c1eSriastradh #define LOAD_MSG_5_4(b0, b1)      \
89*33772c1eSriastradh     b0 = _mm_set_epi64x(m5, m13); \
90*33772c1eSriastradh     b1 = _mm_set_epi64x(m9, m14)
91*33772c1eSriastradh #define LOAD_MSG_6_1(b0, b1)      \
92*33772c1eSriastradh     b0 = _mm_set_epi64x(m1, m12); \
93*33772c1eSriastradh     b1 = _mm_set_epi64x(m4, m14)
94*33772c1eSriastradh #define LOAD_MSG_6_2(b0, b1)      \
95*33772c1eSriastradh     b0 = _mm_set_epi64x(m15, m5); \
96*33772c1eSriastradh     b1 = _mm_set_epi64x(m10, m13)
97*33772c1eSriastradh #define LOAD_MSG_6_3(b0, b1)     \
98*33772c1eSriastradh     b0 = _mm_set_epi64x(m6, m0); \
99*33772c1eSriastradh     b1 = _mm_set_epi64x(m8, m9)
100*33772c1eSriastradh #define LOAD_MSG_6_4(b0, b1)     \
101*33772c1eSriastradh     b0 = _mm_set_epi64x(m3, m7); \
102*33772c1eSriastradh     b1 = _mm_set_epi64x(m11, m2)
103*33772c1eSriastradh #define LOAD_MSG_7_1(b0, b1)      \
104*33772c1eSriastradh     b0 = _mm_set_epi64x(m7, m13); \
105*33772c1eSriastradh     b1 = _mm_set_epi64x(m3, m12)
106*33772c1eSriastradh #define LOAD_MSG_7_2(b0, b1)       \
107*33772c1eSriastradh     b0 = _mm_set_epi64x(m14, m11); \
108*33772c1eSriastradh     b1 = _mm_set_epi64x(m9, m1)
109*33772c1eSriastradh #define LOAD_MSG_7_3(b0, b1)      \
110*33772c1eSriastradh     b0 = _mm_set_epi64x(m15, m5); \
111*33772c1eSriastradh     b1 = _mm_set_epi64x(m2, m8)
112*33772c1eSriastradh #define LOAD_MSG_7_4(b0, b1)     \
113*33772c1eSriastradh     b0 = _mm_set_epi64x(m4, m0); \
114*33772c1eSriastradh     b1 = _mm_set_epi64x(m10, m6)
115*33772c1eSriastradh #define LOAD_MSG_8_1(b0, b1)      \
116*33772c1eSriastradh     b0 = _mm_set_epi64x(m14, m6); \
117*33772c1eSriastradh     b1 = _mm_set_epi64x(m0, m11)
118*33772c1eSriastradh #define LOAD_MSG_8_2(b0, b1)      \
119*33772c1eSriastradh     b0 = _mm_set_epi64x(m9, m15); \
120*33772c1eSriastradh     b1 = _mm_set_epi64x(m8, m3)
121*33772c1eSriastradh #define LOAD_MSG_8_3(b0, b1)       \
122*33772c1eSriastradh     b0 = _mm_set_epi64x(m13, m12); \
123*33772c1eSriastradh     b1 = _mm_set_epi64x(m10, m1)
124*33772c1eSriastradh #define LOAD_MSG_8_4(b0, b1)     \
125*33772c1eSriastradh     b0 = _mm_set_epi64x(m7, m2); \
126*33772c1eSriastradh     b1 = _mm_set_epi64x(m5, m4)
127*33772c1eSriastradh #define LOAD_MSG_9_1(b0, b1)      \
128*33772c1eSriastradh     b0 = _mm_set_epi64x(m8, m10); \
129*33772c1eSriastradh     b1 = _mm_set_epi64x(m1, m7)
130*33772c1eSriastradh #define LOAD_MSG_9_2(b0, b1)     \
131*33772c1eSriastradh     b0 = _mm_set_epi64x(m4, m2); \
132*33772c1eSriastradh     b1 = _mm_set_epi64x(m5, m6)
133*33772c1eSriastradh #define LOAD_MSG_9_3(b0, b1)      \
134*33772c1eSriastradh     b0 = _mm_set_epi64x(m9, m15); \
135*33772c1eSriastradh     b1 = _mm_set_epi64x(m13, m3)
136*33772c1eSriastradh #define LOAD_MSG_9_4(b0, b1)       \
137*33772c1eSriastradh     b0 = _mm_set_epi64x(m14, m11); \
138*33772c1eSriastradh     b1 = _mm_set_epi64x(m0, m12)
139*33772c1eSriastradh #define LOAD_MSG_10_1(b0, b1)    \
140*33772c1eSriastradh     b0 = _mm_set_epi64x(m2, m0); \
141*33772c1eSriastradh     b1 = _mm_set_epi64x(m6, m4)
142*33772c1eSriastradh #define LOAD_MSG_10_2(b0, b1)    \
143*33772c1eSriastradh     b0 = _mm_set_epi64x(m3, m1); \
144*33772c1eSriastradh     b1 = _mm_set_epi64x(m7, m5)
145*33772c1eSriastradh #define LOAD_MSG_10_3(b0, b1)     \
146*33772c1eSriastradh     b0 = _mm_set_epi64x(m10, m8); \
147*33772c1eSriastradh     b1 = _mm_set_epi64x(m14, m12)
148*33772c1eSriastradh #define LOAD_MSG_10_4(b0, b1)     \
149*33772c1eSriastradh     b0 = _mm_set_epi64x(m11, m9); \
150*33772c1eSriastradh     b1 = _mm_set_epi64x(m15, m13)
151*33772c1eSriastradh #define LOAD_MSG_11_1(b0, b1)     \
152*33772c1eSriastradh     b0 = _mm_set_epi64x(m4, m14); \
153*33772c1eSriastradh     b1 = _mm_set_epi64x(m13, m9)
154*33772c1eSriastradh #define LOAD_MSG_11_2(b0, b1)     \
155*33772c1eSriastradh     b0 = _mm_set_epi64x(m8, m10); \
156*33772c1eSriastradh     b1 = _mm_set_epi64x(m6, m15)
157*33772c1eSriastradh #define LOAD_MSG_11_3(b0, b1)    \
158*33772c1eSriastradh     b0 = _mm_set_epi64x(m0, m1); \
159*33772c1eSriastradh     b1 = _mm_set_epi64x(m5, m11)
160*33772c1eSriastradh #define LOAD_MSG_11_4(b0, b1)     \
161*33772c1eSriastradh     b0 = _mm_set_epi64x(m2, m12); \
162*33772c1eSriastradh     b1 = _mm_set_epi64x(m3, m7)
163*33772c1eSriastradh 
164*33772c1eSriastradh #endif
165