10ac341f1SConrad Meyer /* 20ac341f1SConrad Meyer BLAKE2 reference source code package - optimized C implementations 30ac341f1SConrad Meyer 40ac341f1SConrad Meyer Written in 2012 by Samuel Neves <sneves@dei.uc.pt> 50ac341f1SConrad Meyer 60ac341f1SConrad Meyer To the extent possible under law, the author(s) have dedicated all copyright 70ac341f1SConrad Meyer and related and neighboring rights to this software to the public domain 80ac341f1SConrad Meyer worldwide. This software is distributed without any warranty. 90ac341f1SConrad Meyer 100ac341f1SConrad Meyer You should have received a copy of the CC0 Public Domain Dedication along 110ac341f1SConrad Meyer with 120ac341f1SConrad Meyer this software. If not, see 130ac341f1SConrad Meyer <http://creativecommons.org/publicdomain/zero/1.0/>. 140ac341f1SConrad Meyer */ 150ac341f1SConrad Meyer 160ac341f1SConrad Meyer #ifndef blake2b_load_sse2_H 170ac341f1SConrad Meyer #define blake2b_load_sse2_H 180ac341f1SConrad Meyer 190ac341f1SConrad Meyer #define LOAD_MSG_0_1(b0, b1) \ 200ac341f1SConrad Meyer b0 = _mm_set_epi64x(m2, m0); \ 210ac341f1SConrad Meyer b1 = _mm_set_epi64x(m6, m4) 220ac341f1SConrad Meyer #define LOAD_MSG_0_2(b0, b1) \ 230ac341f1SConrad Meyer b0 = _mm_set_epi64x(m3, m1); \ 240ac341f1SConrad Meyer b1 = _mm_set_epi64x(m7, m5) 250ac341f1SConrad Meyer #define LOAD_MSG_0_3(b0, b1) \ 260ac341f1SConrad Meyer b0 = _mm_set_epi64x(m10, m8); \ 270ac341f1SConrad Meyer b1 = _mm_set_epi64x(m14, m12) 280ac341f1SConrad Meyer #define LOAD_MSG_0_4(b0, b1) \ 290ac341f1SConrad Meyer b0 = _mm_set_epi64x(m11, m9); \ 300ac341f1SConrad Meyer b1 = _mm_set_epi64x(m15, m13) 310ac341f1SConrad Meyer #define LOAD_MSG_1_1(b0, b1) \ 320ac341f1SConrad Meyer b0 = _mm_set_epi64x(m4, m14); \ 330ac341f1SConrad Meyer b1 = _mm_set_epi64x(m13, m9) 340ac341f1SConrad Meyer #define LOAD_MSG_1_2(b0, b1) \ 350ac341f1SConrad Meyer b0 = _mm_set_epi64x(m8, m10); \ 360ac341f1SConrad Meyer b1 = _mm_set_epi64x(m6, m15) 370ac341f1SConrad Meyer #define LOAD_MSG_1_3(b0, b1) \ 380ac341f1SConrad Meyer b0 = _mm_set_epi64x(m0, m1); \ 390ac341f1SConrad Meyer b1 = _mm_set_epi64x(m5, m11) 400ac341f1SConrad Meyer #define LOAD_MSG_1_4(b0, b1) \ 410ac341f1SConrad Meyer b0 = _mm_set_epi64x(m2, m12); \ 420ac341f1SConrad Meyer b1 = _mm_set_epi64x(m3, m7) 430ac341f1SConrad Meyer #define LOAD_MSG_2_1(b0, b1) \ 440ac341f1SConrad Meyer b0 = _mm_set_epi64x(m12, m11); \ 450ac341f1SConrad Meyer b1 = _mm_set_epi64x(m15, m5) 460ac341f1SConrad Meyer #define LOAD_MSG_2_2(b0, b1) \ 470ac341f1SConrad Meyer b0 = _mm_set_epi64x(m0, m8); \ 480ac341f1SConrad Meyer b1 = _mm_set_epi64x(m13, m2) 490ac341f1SConrad Meyer #define LOAD_MSG_2_3(b0, b1) \ 500ac341f1SConrad Meyer b0 = _mm_set_epi64x(m3, m10); \ 510ac341f1SConrad Meyer b1 = _mm_set_epi64x(m9, m7) 520ac341f1SConrad Meyer #define LOAD_MSG_2_4(b0, b1) \ 530ac341f1SConrad Meyer b0 = _mm_set_epi64x(m6, m14); \ 540ac341f1SConrad Meyer b1 = _mm_set_epi64x(m4, m1) 550ac341f1SConrad Meyer #define LOAD_MSG_3_1(b0, b1) \ 560ac341f1SConrad Meyer b0 = _mm_set_epi64x(m3, m7); \ 570ac341f1SConrad Meyer b1 = _mm_set_epi64x(m11, m13) 580ac341f1SConrad Meyer #define LOAD_MSG_3_2(b0, b1) \ 590ac341f1SConrad Meyer b0 = _mm_set_epi64x(m1, m9); \ 600ac341f1SConrad Meyer b1 = _mm_set_epi64x(m14, m12) 610ac341f1SConrad Meyer #define LOAD_MSG_3_3(b0, b1) \ 620ac341f1SConrad Meyer b0 = _mm_set_epi64x(m5, m2); \ 630ac341f1SConrad Meyer b1 = _mm_set_epi64x(m15, m4) 640ac341f1SConrad Meyer #define LOAD_MSG_3_4(b0, b1) \ 650ac341f1SConrad Meyer b0 = _mm_set_epi64x(m10, m6); \ 660ac341f1SConrad Meyer b1 = _mm_set_epi64x(m8, m0) 670ac341f1SConrad Meyer #define LOAD_MSG_4_1(b0, b1) \ 680ac341f1SConrad Meyer b0 = _mm_set_epi64x(m5, m9); \ 690ac341f1SConrad Meyer b1 = _mm_set_epi64x(m10, m2) 700ac341f1SConrad Meyer #define LOAD_MSG_4_2(b0, b1) \ 710ac341f1SConrad Meyer b0 = _mm_set_epi64x(m7, m0); \ 720ac341f1SConrad Meyer b1 = _mm_set_epi64x(m15, m4) 730ac341f1SConrad Meyer #define LOAD_MSG_4_3(b0, b1) \ 740ac341f1SConrad Meyer b0 = _mm_set_epi64x(m11, m14); \ 750ac341f1SConrad Meyer b1 = _mm_set_epi64x(m3, m6) 760ac341f1SConrad Meyer #define LOAD_MSG_4_4(b0, b1) \ 770ac341f1SConrad Meyer b0 = _mm_set_epi64x(m12, m1); \ 780ac341f1SConrad Meyer b1 = _mm_set_epi64x(m13, m8) 790ac341f1SConrad Meyer #define LOAD_MSG_5_1(b0, b1) \ 800ac341f1SConrad Meyer b0 = _mm_set_epi64x(m6, m2); \ 810ac341f1SConrad Meyer b1 = _mm_set_epi64x(m8, m0) 820ac341f1SConrad Meyer #define LOAD_MSG_5_2(b0, b1) \ 830ac341f1SConrad Meyer b0 = _mm_set_epi64x(m10, m12); \ 840ac341f1SConrad Meyer b1 = _mm_set_epi64x(m3, m11) 850ac341f1SConrad Meyer #define LOAD_MSG_5_3(b0, b1) \ 860ac341f1SConrad Meyer b0 = _mm_set_epi64x(m7, m4); \ 870ac341f1SConrad Meyer b1 = _mm_set_epi64x(m1, m15) 880ac341f1SConrad Meyer #define LOAD_MSG_5_4(b0, b1) \ 890ac341f1SConrad Meyer b0 = _mm_set_epi64x(m5, m13); \ 900ac341f1SConrad Meyer b1 = _mm_set_epi64x(m9, m14) 910ac341f1SConrad Meyer #define LOAD_MSG_6_1(b0, b1) \ 920ac341f1SConrad Meyer b0 = _mm_set_epi64x(m1, m12); \ 930ac341f1SConrad Meyer b1 = _mm_set_epi64x(m4, m14) 940ac341f1SConrad Meyer #define LOAD_MSG_6_2(b0, b1) \ 950ac341f1SConrad Meyer b0 = _mm_set_epi64x(m15, m5); \ 960ac341f1SConrad Meyer b1 = _mm_set_epi64x(m10, m13) 970ac341f1SConrad Meyer #define LOAD_MSG_6_3(b0, b1) \ 980ac341f1SConrad Meyer b0 = _mm_set_epi64x(m6, m0); \ 990ac341f1SConrad Meyer b1 = _mm_set_epi64x(m8, m9) 1000ac341f1SConrad Meyer #define LOAD_MSG_6_4(b0, b1) \ 1010ac341f1SConrad Meyer b0 = _mm_set_epi64x(m3, m7); \ 1020ac341f1SConrad Meyer b1 = _mm_set_epi64x(m11, m2) 1030ac341f1SConrad Meyer #define LOAD_MSG_7_1(b0, b1) \ 1040ac341f1SConrad Meyer b0 = _mm_set_epi64x(m7, m13); \ 1050ac341f1SConrad Meyer b1 = _mm_set_epi64x(m3, m12) 1060ac341f1SConrad Meyer #define LOAD_MSG_7_2(b0, b1) \ 1070ac341f1SConrad Meyer b0 = _mm_set_epi64x(m14, m11); \ 1080ac341f1SConrad Meyer b1 = _mm_set_epi64x(m9, m1) 1090ac341f1SConrad Meyer #define LOAD_MSG_7_3(b0, b1) \ 1100ac341f1SConrad Meyer b0 = _mm_set_epi64x(m15, m5); \ 1110ac341f1SConrad Meyer b1 = _mm_set_epi64x(m2, m8) 1120ac341f1SConrad Meyer #define LOAD_MSG_7_4(b0, b1) \ 1130ac341f1SConrad Meyer b0 = _mm_set_epi64x(m4, m0); \ 1140ac341f1SConrad Meyer b1 = _mm_set_epi64x(m10, m6) 1150ac341f1SConrad Meyer #define LOAD_MSG_8_1(b0, b1) \ 1160ac341f1SConrad Meyer b0 = _mm_set_epi64x(m14, m6); \ 1170ac341f1SConrad Meyer b1 = _mm_set_epi64x(m0, m11) 1180ac341f1SConrad Meyer #define LOAD_MSG_8_2(b0, b1) \ 1190ac341f1SConrad Meyer b0 = _mm_set_epi64x(m9, m15); \ 1200ac341f1SConrad Meyer b1 = _mm_set_epi64x(m8, m3) 1210ac341f1SConrad Meyer #define LOAD_MSG_8_3(b0, b1) \ 1220ac341f1SConrad Meyer b0 = _mm_set_epi64x(m13, m12); \ 1230ac341f1SConrad Meyer b1 = _mm_set_epi64x(m10, m1) 1240ac341f1SConrad Meyer #define LOAD_MSG_8_4(b0, b1) \ 1250ac341f1SConrad Meyer b0 = _mm_set_epi64x(m7, m2); \ 1260ac341f1SConrad Meyer b1 = _mm_set_epi64x(m5, m4) 1270ac341f1SConrad Meyer #define LOAD_MSG_9_1(b0, b1) \ 1280ac341f1SConrad Meyer b0 = _mm_set_epi64x(m8, m10); \ 1290ac341f1SConrad Meyer b1 = _mm_set_epi64x(m1, m7) 1300ac341f1SConrad Meyer #define LOAD_MSG_9_2(b0, b1) \ 1310ac341f1SConrad Meyer b0 = _mm_set_epi64x(m4, m2); \ 1320ac341f1SConrad Meyer b1 = _mm_set_epi64x(m5, m6) 1330ac341f1SConrad Meyer #define LOAD_MSG_9_3(b0, b1) \ 1340ac341f1SConrad Meyer b0 = _mm_set_epi64x(m9, m15); \ 1350ac341f1SConrad Meyer b1 = _mm_set_epi64x(m13, m3) 1360ac341f1SConrad Meyer #define LOAD_MSG_9_4(b0, b1) \ 1370ac341f1SConrad Meyer b0 = _mm_set_epi64x(m14, m11); \ 1380ac341f1SConrad Meyer b1 = _mm_set_epi64x(m0, m12) 1390ac341f1SConrad Meyer #define LOAD_MSG_10_1(b0, b1) \ 1400ac341f1SConrad Meyer b0 = _mm_set_epi64x(m2, m0); \ 1410ac341f1SConrad Meyer b1 = _mm_set_epi64x(m6, m4) 1420ac341f1SConrad Meyer #define LOAD_MSG_10_2(b0, b1) \ 1430ac341f1SConrad Meyer b0 = _mm_set_epi64x(m3, m1); \ 1440ac341f1SConrad Meyer b1 = _mm_set_epi64x(m7, m5) 1450ac341f1SConrad Meyer #define LOAD_MSG_10_3(b0, b1) \ 1460ac341f1SConrad Meyer b0 = _mm_set_epi64x(m10, m8); \ 1470ac341f1SConrad Meyer b1 = _mm_set_epi64x(m14, m12) 1480ac341f1SConrad Meyer #define LOAD_MSG_10_4(b0, b1) \ 1490ac341f1SConrad Meyer b0 = _mm_set_epi64x(m11, m9); \ 1500ac341f1SConrad Meyer b1 = _mm_set_epi64x(m15, m13) 1510ac341f1SConrad Meyer #define LOAD_MSG_11_1(b0, b1) \ 1520ac341f1SConrad Meyer b0 = _mm_set_epi64x(m4, m14); \ 1530ac341f1SConrad Meyer b1 = _mm_set_epi64x(m13, m9) 1540ac341f1SConrad Meyer #define LOAD_MSG_11_2(b0, b1) \ 1550ac341f1SConrad Meyer b0 = _mm_set_epi64x(m8, m10); \ 1560ac341f1SConrad Meyer b1 = _mm_set_epi64x(m6, m15) 1570ac341f1SConrad Meyer #define LOAD_MSG_11_3(b0, b1) \ 1580ac341f1SConrad Meyer b0 = _mm_set_epi64x(m0, m1); \ 1590ac341f1SConrad Meyer b1 = _mm_set_epi64x(m5, m11) 1600ac341f1SConrad Meyer #define LOAD_MSG_11_4(b0, b1) \ 1610ac341f1SConrad Meyer b0 = _mm_set_epi64x(m2, m12); \ 1620ac341f1SConrad Meyer b1 = _mm_set_epi64x(m3, m7) 1630ac341f1SConrad Meyer 1640ac341f1SConrad Meyer #endif 165