1*33772c1eSriastradh /* 2*33772c1eSriastradh BLAKE2 reference source code package - optimized C implementations 3*33772c1eSriastradh 4*33772c1eSriastradh Written in 2012 by Samuel Neves <sneves@dei.uc.pt> 5*33772c1eSriastradh 6*33772c1eSriastradh To the extent possible under law, the author(s) have dedicated all copyright 7*33772c1eSriastradh and related and neighboring rights to this software to the public domain 8*33772c1eSriastradh worldwide. This software is distributed without any warranty. 9*33772c1eSriastradh 10*33772c1eSriastradh You should have received a copy of the CC0 Public Domain Dedication along 11*33772c1eSriastradh with 12*33772c1eSriastradh this software. If not, see 13*33772c1eSriastradh <http://creativecommons.org/publicdomain/zero/1.0/>. 14*33772c1eSriastradh */ 15*33772c1eSriastradh 16*33772c1eSriastradh #ifndef blake2b_load_sse2_H 17*33772c1eSriastradh #define blake2b_load_sse2_H 18*33772c1eSriastradh 19*33772c1eSriastradh #define LOAD_MSG_0_1(b0, b1) \ 20*33772c1eSriastradh b0 = _mm_set_epi64x(m2, m0); \ 21*33772c1eSriastradh b1 = _mm_set_epi64x(m6, m4) 22*33772c1eSriastradh #define LOAD_MSG_0_2(b0, b1) \ 23*33772c1eSriastradh b0 = _mm_set_epi64x(m3, m1); \ 24*33772c1eSriastradh b1 = _mm_set_epi64x(m7, m5) 25*33772c1eSriastradh #define LOAD_MSG_0_3(b0, b1) \ 26*33772c1eSriastradh b0 = _mm_set_epi64x(m10, m8); \ 27*33772c1eSriastradh b1 = _mm_set_epi64x(m14, m12) 28*33772c1eSriastradh #define LOAD_MSG_0_4(b0, b1) \ 29*33772c1eSriastradh b0 = _mm_set_epi64x(m11, m9); \ 30*33772c1eSriastradh b1 = _mm_set_epi64x(m15, m13) 31*33772c1eSriastradh #define LOAD_MSG_1_1(b0, b1) \ 32*33772c1eSriastradh b0 = _mm_set_epi64x(m4, m14); \ 33*33772c1eSriastradh b1 = _mm_set_epi64x(m13, m9) 34*33772c1eSriastradh #define LOAD_MSG_1_2(b0, b1) \ 35*33772c1eSriastradh b0 = _mm_set_epi64x(m8, m10); \ 36*33772c1eSriastradh b1 = _mm_set_epi64x(m6, m15) 37*33772c1eSriastradh #define LOAD_MSG_1_3(b0, b1) \ 38*33772c1eSriastradh b0 = _mm_set_epi64x(m0, m1); \ 39*33772c1eSriastradh b1 = _mm_set_epi64x(m5, m11) 40*33772c1eSriastradh #define LOAD_MSG_1_4(b0, b1) \ 41*33772c1eSriastradh b0 = _mm_set_epi64x(m2, m12); \ 42*33772c1eSriastradh b1 = _mm_set_epi64x(m3, m7) 43*33772c1eSriastradh #define LOAD_MSG_2_1(b0, b1) \ 44*33772c1eSriastradh b0 = _mm_set_epi64x(m12, m11); \ 45*33772c1eSriastradh b1 = _mm_set_epi64x(m15, m5) 46*33772c1eSriastradh #define LOAD_MSG_2_2(b0, b1) \ 47*33772c1eSriastradh b0 = _mm_set_epi64x(m0, m8); \ 48*33772c1eSriastradh b1 = _mm_set_epi64x(m13, m2) 49*33772c1eSriastradh #define LOAD_MSG_2_3(b0, b1) \ 50*33772c1eSriastradh b0 = _mm_set_epi64x(m3, m10); \ 51*33772c1eSriastradh b1 = _mm_set_epi64x(m9, m7) 52*33772c1eSriastradh #define LOAD_MSG_2_4(b0, b1) \ 53*33772c1eSriastradh b0 = _mm_set_epi64x(m6, m14); \ 54*33772c1eSriastradh b1 = _mm_set_epi64x(m4, m1) 55*33772c1eSriastradh #define LOAD_MSG_3_1(b0, b1) \ 56*33772c1eSriastradh b0 = _mm_set_epi64x(m3, m7); \ 57*33772c1eSriastradh b1 = _mm_set_epi64x(m11, m13) 58*33772c1eSriastradh #define LOAD_MSG_3_2(b0, b1) \ 59*33772c1eSriastradh b0 = _mm_set_epi64x(m1, m9); \ 60*33772c1eSriastradh b1 = _mm_set_epi64x(m14, m12) 61*33772c1eSriastradh #define LOAD_MSG_3_3(b0, b1) \ 62*33772c1eSriastradh b0 = _mm_set_epi64x(m5, m2); \ 63*33772c1eSriastradh b1 = _mm_set_epi64x(m15, m4) 64*33772c1eSriastradh #define LOAD_MSG_3_4(b0, b1) \ 65*33772c1eSriastradh b0 = _mm_set_epi64x(m10, m6); \ 66*33772c1eSriastradh b1 = _mm_set_epi64x(m8, m0) 67*33772c1eSriastradh #define LOAD_MSG_4_1(b0, b1) \ 68*33772c1eSriastradh b0 = _mm_set_epi64x(m5, m9); \ 69*33772c1eSriastradh b1 = _mm_set_epi64x(m10, m2) 70*33772c1eSriastradh #define LOAD_MSG_4_2(b0, b1) \ 71*33772c1eSriastradh b0 = _mm_set_epi64x(m7, m0); \ 72*33772c1eSriastradh b1 = _mm_set_epi64x(m15, m4) 73*33772c1eSriastradh #define LOAD_MSG_4_3(b0, b1) \ 74*33772c1eSriastradh b0 = _mm_set_epi64x(m11, m14); \ 75*33772c1eSriastradh b1 = _mm_set_epi64x(m3, m6) 76*33772c1eSriastradh #define LOAD_MSG_4_4(b0, b1) \ 77*33772c1eSriastradh b0 = _mm_set_epi64x(m12, m1); \ 78*33772c1eSriastradh b1 = _mm_set_epi64x(m13, m8) 79*33772c1eSriastradh #define LOAD_MSG_5_1(b0, b1) \ 80*33772c1eSriastradh b0 = _mm_set_epi64x(m6, m2); \ 81*33772c1eSriastradh b1 = _mm_set_epi64x(m8, m0) 82*33772c1eSriastradh #define LOAD_MSG_5_2(b0, b1) \ 83*33772c1eSriastradh b0 = _mm_set_epi64x(m10, m12); \ 84*33772c1eSriastradh b1 = _mm_set_epi64x(m3, m11) 85*33772c1eSriastradh #define LOAD_MSG_5_3(b0, b1) \ 86*33772c1eSriastradh b0 = _mm_set_epi64x(m7, m4); \ 87*33772c1eSriastradh b1 = _mm_set_epi64x(m1, m15) 88*33772c1eSriastradh #define LOAD_MSG_5_4(b0, b1) \ 89*33772c1eSriastradh b0 = _mm_set_epi64x(m5, m13); \ 90*33772c1eSriastradh b1 = _mm_set_epi64x(m9, m14) 91*33772c1eSriastradh #define LOAD_MSG_6_1(b0, b1) \ 92*33772c1eSriastradh b0 = _mm_set_epi64x(m1, m12); \ 93*33772c1eSriastradh b1 = _mm_set_epi64x(m4, m14) 94*33772c1eSriastradh #define LOAD_MSG_6_2(b0, b1) \ 95*33772c1eSriastradh b0 = _mm_set_epi64x(m15, m5); \ 96*33772c1eSriastradh b1 = _mm_set_epi64x(m10, m13) 97*33772c1eSriastradh #define LOAD_MSG_6_3(b0, b1) \ 98*33772c1eSriastradh b0 = _mm_set_epi64x(m6, m0); \ 99*33772c1eSriastradh b1 = _mm_set_epi64x(m8, m9) 100*33772c1eSriastradh #define LOAD_MSG_6_4(b0, b1) \ 101*33772c1eSriastradh b0 = _mm_set_epi64x(m3, m7); \ 102*33772c1eSriastradh b1 = _mm_set_epi64x(m11, m2) 103*33772c1eSriastradh #define LOAD_MSG_7_1(b0, b1) \ 104*33772c1eSriastradh b0 = _mm_set_epi64x(m7, m13); \ 105*33772c1eSriastradh b1 = _mm_set_epi64x(m3, m12) 106*33772c1eSriastradh #define LOAD_MSG_7_2(b0, b1) \ 107*33772c1eSriastradh b0 = _mm_set_epi64x(m14, m11); \ 108*33772c1eSriastradh b1 = _mm_set_epi64x(m9, m1) 109*33772c1eSriastradh #define LOAD_MSG_7_3(b0, b1) \ 110*33772c1eSriastradh b0 = _mm_set_epi64x(m15, m5); \ 111*33772c1eSriastradh b1 = _mm_set_epi64x(m2, m8) 112*33772c1eSriastradh #define LOAD_MSG_7_4(b0, b1) \ 113*33772c1eSriastradh b0 = _mm_set_epi64x(m4, m0); \ 114*33772c1eSriastradh b1 = _mm_set_epi64x(m10, m6) 115*33772c1eSriastradh #define LOAD_MSG_8_1(b0, b1) \ 116*33772c1eSriastradh b0 = _mm_set_epi64x(m14, m6); \ 117*33772c1eSriastradh b1 = _mm_set_epi64x(m0, m11) 118*33772c1eSriastradh #define LOAD_MSG_8_2(b0, b1) \ 119*33772c1eSriastradh b0 = _mm_set_epi64x(m9, m15); \ 120*33772c1eSriastradh b1 = _mm_set_epi64x(m8, m3) 121*33772c1eSriastradh #define LOAD_MSG_8_3(b0, b1) \ 122*33772c1eSriastradh b0 = _mm_set_epi64x(m13, m12); \ 123*33772c1eSriastradh b1 = _mm_set_epi64x(m10, m1) 124*33772c1eSriastradh #define LOAD_MSG_8_4(b0, b1) \ 125*33772c1eSriastradh b0 = _mm_set_epi64x(m7, m2); \ 126*33772c1eSriastradh b1 = _mm_set_epi64x(m5, m4) 127*33772c1eSriastradh #define LOAD_MSG_9_1(b0, b1) \ 128*33772c1eSriastradh b0 = _mm_set_epi64x(m8, m10); \ 129*33772c1eSriastradh b1 = _mm_set_epi64x(m1, m7) 130*33772c1eSriastradh #define LOAD_MSG_9_2(b0, b1) \ 131*33772c1eSriastradh b0 = _mm_set_epi64x(m4, m2); \ 132*33772c1eSriastradh b1 = _mm_set_epi64x(m5, m6) 133*33772c1eSriastradh #define LOAD_MSG_9_3(b0, b1) \ 134*33772c1eSriastradh b0 = _mm_set_epi64x(m9, m15); \ 135*33772c1eSriastradh b1 = _mm_set_epi64x(m13, m3) 136*33772c1eSriastradh #define LOAD_MSG_9_4(b0, b1) \ 137*33772c1eSriastradh b0 = _mm_set_epi64x(m14, m11); \ 138*33772c1eSriastradh b1 = _mm_set_epi64x(m0, m12) 139*33772c1eSriastradh #define LOAD_MSG_10_1(b0, b1) \ 140*33772c1eSriastradh b0 = _mm_set_epi64x(m2, m0); \ 141*33772c1eSriastradh b1 = _mm_set_epi64x(m6, m4) 142*33772c1eSriastradh #define LOAD_MSG_10_2(b0, b1) \ 143*33772c1eSriastradh b0 = _mm_set_epi64x(m3, m1); \ 144*33772c1eSriastradh b1 = _mm_set_epi64x(m7, m5) 145*33772c1eSriastradh #define LOAD_MSG_10_3(b0, b1) \ 146*33772c1eSriastradh b0 = _mm_set_epi64x(m10, m8); \ 147*33772c1eSriastradh b1 = _mm_set_epi64x(m14, m12) 148*33772c1eSriastradh #define LOAD_MSG_10_4(b0, b1) \ 149*33772c1eSriastradh b0 = _mm_set_epi64x(m11, m9); \ 150*33772c1eSriastradh b1 = _mm_set_epi64x(m15, m13) 151*33772c1eSriastradh #define LOAD_MSG_11_1(b0, b1) \ 152*33772c1eSriastradh b0 = _mm_set_epi64x(m4, m14); \ 153*33772c1eSriastradh b1 = _mm_set_epi64x(m13, m9) 154*33772c1eSriastradh #define LOAD_MSG_11_2(b0, b1) \ 155*33772c1eSriastradh b0 = _mm_set_epi64x(m8, m10); \ 156*33772c1eSriastradh b1 = _mm_set_epi64x(m6, m15) 157*33772c1eSriastradh #define LOAD_MSG_11_3(b0, b1) \ 158*33772c1eSriastradh b0 = _mm_set_epi64x(m0, m1); \ 159*33772c1eSriastradh b1 = _mm_set_epi64x(m5, m11) 160*33772c1eSriastradh #define LOAD_MSG_11_4(b0, b1) \ 161*33772c1eSriastradh b0 = _mm_set_epi64x(m2, m12); \ 162*33772c1eSriastradh b1 = _mm_set_epi64x(m3, m7) 163*33772c1eSriastradh 164*33772c1eSriastradh #endif 165