1 /* 2 BLAKE2 reference source code package - optimized C implementations 3 4 Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the 5 terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at 6 your option. The terms of these licenses can be found at: 7 8 - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 9 - OpenSSL license : https://www.openssl.org/source/license.html 10 - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 11 12 More information about the BLAKE2 hash function can be found at 13 https://blake2.net. 14 */ 15 #ifndef BLAKE2B_LOAD_SSE2_H 16 #define BLAKE2B_LOAD_SSE2_H 17 18 #define vec_merge_hi(a, b) vec_mergeh(a,b) 19 #define vec_merge_hi_lo(a, b) vec_mergeh(a,(uint64x2_p)vec_sld((uint8x16_p)b,(uint8x16_p)b,8)) 20 #define vec_merge_lo(a, b) vec_mergel(a,b) 21 22 #if defined(NATIVE_BIG_ENDIAN) 23 # define vec_shl_8(a,b) (uint64x2_p)vec_sld((uint8x16_p)a, (uint8x16_p)b, 8); 24 #else 25 # define vec_shl_8(a,b) (uint64x2_p)vec_sld((uint8x16_p)b, (uint8x16_p)a, 16-8); 26 #endif 27 28 #define LOAD_MSG_0_1(b0, b1) \ 29 do { \ 30 b0 = vec_merge_hi(m0, m1); \ 31 b1 = vec_merge_hi(m2, m3); \ 32 } while(0) 33 34 #define LOAD_MSG_0_2(b0, b1) \ 35 do { \ 36 b0 = vec_merge_lo(m0, m1); \ 37 b1 = vec_merge_lo(m2, m3); \ 38 } while(0) 39 40 #define LOAD_MSG_0_3(b0, b1) \ 41 do { \ 42 b0 = vec_merge_hi(m4, m5); \ 43 b1 = vec_merge_hi(m6, m7); \ 44 } while(0) 45 46 #define LOAD_MSG_0_4(b0, b1) \ 47 do { \ 48 b0 = vec_merge_lo(m4, m5); \ 49 b1 = vec_merge_lo(m6, m7); \ 50 } while(0) 51 52 #define LOAD_MSG_1_1(b0, b1) \ 53 do { \ 54 b0 = vec_merge_hi(m7, m2); \ 55 b1 = vec_merge_lo(m4, m6); \ 56 } while(0) 57 58 #define LOAD_MSG_1_2(b0, b1) \ 59 do { \ 60 b0 = vec_merge_hi(m5, m4); \ 61 b1 = vec_shl_8(m7, m3); \ 62 } while(0) 63 64 #define LOAD_MSG_1_3(b0, b1) \ 65 do { \ 66 b0 = vec_shl_8(m0, m0); \ 67 b1 = vec_merge_lo(m5, m2); \ 68 } while(0) 69 70 #define LOAD_MSG_1_4(b0, b1) \ 71 do { \ 72 b0 = vec_merge_hi(m6, m1); \ 73 b1 = vec_merge_lo(m3, m1); \ 74 } while(0) 75 76 #define LOAD_MSG_2_1(b0, b1) \ 77 do { \ 78 b0 = vec_shl_8(m5, m6); \ 79 b1 = vec_merge_lo(m2, m7); \ 80 } while(0) 81 82 #define LOAD_MSG_2_2(b0, b1) \ 83 do { \ 84 b0 = vec_merge_hi(m4, m0); \ 85 b1 = vec_merge_hi_lo(m1, m6); \ 86 } while(0) 87 88 #define LOAD_MSG_2_3(b0, b1) \ 89 do { \ 90 b0 = vec_merge_hi_lo(m5, m1); \ 91 b1 = vec_merge_lo(m3, m4); \ 92 } while(0) 93 94 #define LOAD_MSG_2_4(b0, b1) \ 95 do { \ 96 b0 = vec_merge_hi(m7, m3); \ 97 b1 = vec_shl_8(m0, m2); \ 98 } while(0) 99 100 #define LOAD_MSG_3_1(b0, b1) \ 101 do { \ 102 b0 = vec_merge_lo(m3, m1); \ 103 b1 = vec_merge_lo(m6, m5); \ 104 } while(0) 105 106 #define LOAD_MSG_3_2(b0, b1) \ 107 do { \ 108 b0 = vec_merge_lo(m4, m0); \ 109 b1 = vec_merge_hi(m6, m7); \ 110 } while(0) 111 112 #define LOAD_MSG_3_3(b0, b1) \ 113 do { \ 114 b0 = vec_merge_hi_lo(m1, m2); \ 115 b1 = vec_merge_hi_lo(m2, m7); \ 116 } while(0) 117 118 #define LOAD_MSG_3_4(b0, b1) \ 119 do { \ 120 b0 = vec_merge_hi(m3, m5); \ 121 b1 = vec_merge_hi(m0, m4); \ 122 } while(0) 123 124 #define LOAD_MSG_4_1(b0, b1) \ 125 do { \ 126 b0 = vec_merge_lo(m4, m2); \ 127 b1 = vec_merge_hi(m1, m5); \ 128 } while(0) 129 130 #define LOAD_MSG_4_2(b0, b1) \ 131 do { \ 132 b0 = vec_merge_hi_lo(m0, m3); \ 133 b1 = vec_merge_hi_lo(m2, m7); \ 134 } while(0) 135 136 #define LOAD_MSG_4_3(b0, b1) \ 137 do { \ 138 b0 = vec_merge_hi_lo(m7, m5); \ 139 b1 = vec_merge_hi_lo(m3, m1); \ 140 } while(0) 141 142 #define LOAD_MSG_4_4(b0, b1) \ 143 do { \ 144 b0 = vec_shl_8(m0, m6); \ 145 b1 = vec_merge_hi_lo(m4, m6); \ 146 } while(0) 147 148 #define LOAD_MSG_5_1(b0, b1) \ 149 do { \ 150 b0 = vec_merge_hi(m1, m3); \ 151 b1 = vec_merge_hi(m0, m4); \ 152 } while(0) 153 154 #define LOAD_MSG_5_2(b0, b1) \ 155 do { \ 156 b0 = vec_merge_hi(m6, m5); \ 157 b1 = vec_merge_lo(m5, m1); \ 158 } while(0) 159 160 #define LOAD_MSG_5_3(b0, b1) \ 161 do { \ 162 b0 = vec_merge_hi_lo(m2, m3); \ 163 b1 = vec_merge_lo(m7, m0); \ 164 } while(0) 165 166 #define LOAD_MSG_5_4(b0, b1) \ 167 do { \ 168 b0 = vec_merge_lo(m6, m2); \ 169 b1 = vec_merge_hi_lo(m7, m4); \ 170 } while(0) 171 172 #define LOAD_MSG_6_1(b0, b1) \ 173 do { \ 174 b0 = vec_merge_hi_lo(m6, m0); \ 175 b1 = vec_merge_hi(m7, m2); \ 176 } while(0) 177 178 #define LOAD_MSG_6_2(b0, b1) \ 179 do { \ 180 b0 = vec_merge_lo(m2, m7); \ 181 b1 = vec_shl_8(m6, m5); \ 182 } while(0) 183 184 #define LOAD_MSG_6_3(b0, b1) \ 185 do { \ 186 b0 = vec_merge_hi(m0, m3); \ 187 b1 = vec_shl_8(m4, m4); \ 188 } while(0) 189 190 #define LOAD_MSG_6_4(b0, b1) \ 191 do { \ 192 b0 = vec_merge_lo(m3, m1); \ 193 b1 = vec_merge_hi_lo(m1, m5); \ 194 } while(0) 195 196 #define LOAD_MSG_7_1(b0, b1) \ 197 do { \ 198 b0 = vec_merge_lo(m6, m3); \ 199 b1 = vec_merge_hi_lo(m6, m1); \ 200 } while(0) 201 202 #define LOAD_MSG_7_2(b0, b1) \ 203 do { \ 204 b0 = vec_shl_8(m5, m7); \ 205 b1 = vec_merge_lo(m0, m4); \ 206 } while(0) 207 208 #define LOAD_MSG_7_3(b0, b1) \ 209 do { \ 210 b0 = vec_merge_lo(m2, m7); \ 211 b1 = vec_merge_hi(m4, m1); \ 212 } while(0) 213 214 #define LOAD_MSG_7_4(b0, b1) \ 215 do { \ 216 b0 = vec_merge_hi(m0, m2); \ 217 b1 = vec_merge_hi(m3, m5); \ 218 } while(0) 219 220 #define LOAD_MSG_8_1(b0, b1) \ 221 do { \ 222 b0 = vec_merge_hi(m3, m7); \ 223 b1 = vec_shl_8(m5, m0); \ 224 } while(0) 225 226 #define LOAD_MSG_8_2(b0, b1) \ 227 do { \ 228 b0 = vec_merge_lo(m7, m4); \ 229 b1 = vec_shl_8(m1, m4); \ 230 } while(0) 231 232 #define LOAD_MSG_8_3(b0, b1) \ 233 do { \ 234 b0 = m6; \ 235 b1 = vec_shl_8(m0, m5); \ 236 } while(0) 237 238 #define LOAD_MSG_8_4(b0, b1) \ 239 do { \ 240 b0 = vec_merge_hi_lo(m1, m3); \ 241 b1 = m2; \ 242 } while(0) 243 244 #define LOAD_MSG_9_1(b0, b1) \ 245 do { \ 246 b0 = vec_merge_hi(m5, m4); \ 247 b1 = vec_merge_lo(m3, m0); \ 248 } while(0) 249 250 #define LOAD_MSG_9_2(b0, b1) \ 251 do { \ 252 b0 = vec_merge_hi(m1, m2); \ 253 b1 = vec_merge_hi_lo(m3, m2); \ 254 } while(0) 255 256 #define LOAD_MSG_9_3(b0, b1) \ 257 do { \ 258 b0 = vec_merge_lo(m7, m4); \ 259 b1 = vec_merge_lo(m1, m6); \ 260 } while(0) 261 262 #define LOAD_MSG_9_4(b0, b1) \ 263 do { \ 264 b0 = vec_shl_8(m5, m7); \ 265 b1 = vec_merge_hi(m6, m0); \ 266 } while(0) 267 268 #define LOAD_MSG_10_1(b0, b1) \ 269 do { \ 270 b0 = vec_merge_hi(m0, m1); \ 271 b1 = vec_merge_hi(m2, m3); \ 272 } while(0) 273 274 #define LOAD_MSG_10_2(b0, b1) \ 275 do { \ 276 b0 = vec_merge_lo(m0, m1); \ 277 b1 = vec_merge_lo(m2, m3); \ 278 } while(0) 279 280 #define LOAD_MSG_10_3(b0, b1) \ 281 do { \ 282 b0 = vec_merge_hi(m4, m5); \ 283 b1 = vec_merge_hi(m6, m7); \ 284 } while(0) 285 286 #define LOAD_MSG_10_4(b0, b1) \ 287 do { \ 288 b0 = vec_merge_lo(m4, m5); \ 289 b1 = vec_merge_lo(m6, m7); \ 290 } while(0) 291 292 #define LOAD_MSG_11_1(b0, b1) \ 293 do { \ 294 b0 = vec_merge_hi(m7, m2); \ 295 b1 = vec_merge_lo(m4, m6); \ 296 } while(0) 297 298 #define LOAD_MSG_11_2(b0, b1) \ 299 do { \ 300 b0 = vec_merge_hi(m5, m4); \ 301 b1 = vec_shl_8(m7, m3); \ 302 } while(0) 303 304 #define LOAD_MSG_11_3(b0, b1) \ 305 do { \ 306 b0 = vec_shl_8(m0, m0); \ 307 b1 = vec_merge_lo(m5, m2); \ 308 } while(0) 309 310 #define LOAD_MSG_11_4(b0, b1) \ 311 do { \ 312 b0 = vec_merge_hi(m6, m1); \ 313 b1 = vec_merge_lo(m3, m1); \ 314 } while(0) 315 316 #endif 317