1 // keccak_simd.cpp - written and placed in the public domain by Jeffrey Walton.
2 //
3 //    This source file uses intrinsics to gain access to SSE and
4 //    NEON instructions. A separate source file is needed because
5 //    additional CXXFLAGS are required to enable the appropriate
6 //    instructions sets in some build configurations.
7 
8 // The XKCP package is provided by Guido Bertoni, Joan Daemen, Seth Hoffert,
9 // Michael Peeters, Gilles Van Assche, and Ronny Van Keer. The code was
10 // placed public domain by the authors.
11 
12 // KeccakF1600x2_SSE is ParallelHash128. The SSE2 ParallelHash128
13 // implementation was extracted from XKCP using the following command.
14 //
15 // gcc -I lib/common -I lib/low/KeccakP-1600/Optimized
16 //   -I lib/low/KeccakP-1600-times2/SIMD128/SSE2ufull
17 //   lib/low/KeccakP-1600-times2/SIMD128/KeccakP-1600-times2-SIMD128.c -E
18 
19 #include "pch.h"
20 #include "config.h"
21 #include "keccak.h"
22 #include "misc.h"
23 
24 #if (CRYPTOPP_SSSE3_AVAILABLE)
25 # include <emmintrin.h>
26 # include <tmmintrin.h>
27 #endif
28 
29 #if defined(__XOP__)
30 # include <ammintrin.h>
31 # if defined(__GNUC__)
32 #  include <x86intrin.h>
33 # endif
34 #endif
35 
36 // Squash MS LNK4221 and libtool warnings
37 extern const char KECCAK_SIMD_FNAME[] = __FILE__;
38 
39 NAMESPACE_BEGIN(CryptoPP)
40 
41 #if (CRYPTOPP_SSSE3_AVAILABLE)
42 
43 // The Keccak ParallelHash128 core function
44 extern void KeccakF1600x2_SSE(word64 *state);
45 
46 // The F1600 round constants
47 extern const word64 KeccakF1600Constants[24];
48 
49 CRYPTOPP_ALIGN_DATA(16)
50 const word64
51 rho8[2] = {W64LIT(0x0605040302010007), W64LIT(0x0E0D0C0B0A09080F)};
52 
53 CRYPTOPP_ALIGN_DATA(16)
54 const word64
55 rho56[2] = {W64LIT(0x0007060504030201), W64LIT(0x080F0E0D0C0B0A09)};
56 
57 #if defined(__XOP__)
58 # define ROL64in128(a, o)    _mm_roti_epi64((a), (o))
59 # define ROL64in128_8(a)     ROL64in128((a), 8)
60 # define ROL64in128_56(a)    ROL64in128((a), 56)
61 #else
62 # define ROL64in128(a, o)    _mm_or_si128(_mm_slli_epi64((a), (o)), _mm_srli_epi64(a, 64-(o)))
63 # define ROL64in128_8(a)     _mm_shuffle_epi8((a), _mm_load_si128(CONST_M128_CAST(rho8)))
64 # define ROL64in128_56(a)    _mm_shuffle_epi8((a), _mm_load_si128(CONST_M128_CAST(rho56)))
65 #endif
66 
67 // Damn Visual Studio is missing too many intrinsics...
SPLAT64(const word64 a)68 inline __m128i SPLAT64(const word64 a)
69 {
70 #if defined(_MSC_VER)
71     double x; std::memcpy(&x, &a, 8);
72     return _mm_castpd_si128(_mm_loaddup_pd(&x));
73 #else
74     return _mm_set1_epi64x(a);
75 #endif
76 }
77 
78 // The Keccak ParallelHash128 core function
KeccakF1600x2_SSE(word64 * state)79 void KeccakF1600x2_SSE(word64 *state)
80 {
81     __m128i Aba, Abe, Abi, Abo, Abu;
82     __m128i Aga, Age, Agi, Ago, Agu;
83     __m128i Aka, Ake, Aki, Ako, Aku;
84     __m128i Ama, Ame, Ami, Amo, Amu;
85     __m128i Asa, Ase, Asi, Aso, Asu;
86     __m128i Bba, Bbe, Bbi, Bbo, Bbu;
87     __m128i Bga, Bge, Bgi, Bgo, Bgu;
88     __m128i Bka, Bke, Bki, Bko, Bku;
89     __m128i Bma, Bme, Bmi, Bmo, Bmu;
90     __m128i Bsa, Bse, Bsi, Bso, Bsu;
91     __m128i Ca, Ce, Ci, Co, Cu;
92     __m128i Da, De, Di, Do, Du;
93     __m128i Eba, Ebe, Ebi, Ebo, Ebu;
94     __m128i Ega, Ege, Egi, Ego, Egu;
95     __m128i Eka, Eke, Eki, Eko, Eku;
96     __m128i Ema, Eme, Emi, Emo, Emu;
97     __m128i Esa, Ese, Esi, Eso, Esu;
98 
99     __m128i* lanes = reinterpret_cast<__m128i*>(state);
100     Aba = _mm_loadu_si128(CONST_M128_CAST(lanes+ 0));
101     Abe = _mm_loadu_si128(CONST_M128_CAST(lanes+ 1));
102     Abi = _mm_loadu_si128(CONST_M128_CAST(lanes+ 2));
103     Abo = _mm_loadu_si128(CONST_M128_CAST(lanes+ 3));
104     Abu = _mm_loadu_si128(CONST_M128_CAST(lanes+ 4));
105     Aga = _mm_loadu_si128(CONST_M128_CAST(lanes+ 5));
106     Age = _mm_loadu_si128(CONST_M128_CAST(lanes+ 6));
107     Agi = _mm_loadu_si128(CONST_M128_CAST(lanes+ 7));
108     Ago = _mm_loadu_si128(CONST_M128_CAST(lanes+ 8));
109     Agu = _mm_loadu_si128(CONST_M128_CAST(lanes+ 9));
110     Aka = _mm_loadu_si128(CONST_M128_CAST(lanes+10));
111     Ake = _mm_loadu_si128(CONST_M128_CAST(lanes+11));
112     Aki = _mm_loadu_si128(CONST_M128_CAST(lanes+12));
113     Ako = _mm_loadu_si128(CONST_M128_CAST(lanes+13));
114     Aku = _mm_loadu_si128(CONST_M128_CAST(lanes+14));
115     Ama = _mm_loadu_si128(CONST_M128_CAST(lanes+15));
116     Ame = _mm_loadu_si128(CONST_M128_CAST(lanes+16));
117     Ami = _mm_loadu_si128(CONST_M128_CAST(lanes+17));
118     Amo = _mm_loadu_si128(CONST_M128_CAST(lanes+18));
119     Amu = _mm_loadu_si128(CONST_M128_CAST(lanes+19));
120     Asa = _mm_loadu_si128(CONST_M128_CAST(lanes+20));
121     Ase = _mm_loadu_si128(CONST_M128_CAST(lanes+21));
122     Asi = _mm_loadu_si128(CONST_M128_CAST(lanes+22));
123     Aso = _mm_loadu_si128(CONST_M128_CAST(lanes+23));
124     Asu = _mm_loadu_si128(CONST_M128_CAST(lanes+24));
125 
126     Ca = _mm_xor_si128(Aba, _mm_xor_si128(Aga, _mm_xor_si128(Aka, _mm_xor_si128(Ama, Asa))));
127     Ce = _mm_xor_si128(Abe, _mm_xor_si128(Age, _mm_xor_si128(Ake, _mm_xor_si128(Ame, Ase))));
128     Ci = _mm_xor_si128(Abi, _mm_xor_si128(Agi, _mm_xor_si128(Aki, _mm_xor_si128(Ami, Asi))));
129     Co = _mm_xor_si128(Abo, _mm_xor_si128(Ago, _mm_xor_si128(Ako, _mm_xor_si128(Amo, Aso))));
130     Cu = _mm_xor_si128(Abu, _mm_xor_si128(Agu, _mm_xor_si128(Aku, _mm_xor_si128(Amu, Asu))));
131     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
132     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
133     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
134     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
135     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
136 
137     Aba = _mm_xor_si128(Aba, Da);
138     Bba = Aba;
139     Age = _mm_xor_si128(Age, De);
140     Bbe = ROL64in128(Age, 44);
141     Aki = _mm_xor_si128(Aki, Di);
142     Bbi = ROL64in128(Aki, 43);
143     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
144     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[0]));
145     Ca = Eba;
146     Amo = _mm_xor_si128(Amo, Do);
147     Bbo = ROL64in128(Amo, 21);
148     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
149     Ce = Ebe;
150     Asu = _mm_xor_si128(Asu, Du);
151     Bbu = ROL64in128(Asu, 14);
152     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
153     Ci = Ebi;
154     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
155     Co = Ebo;
156     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
157     Cu = Ebu;
158     Abo = _mm_xor_si128(Abo, Do);
159     Bga = ROL64in128(Abo, 28);
160     Agu = _mm_xor_si128(Agu, Du);
161     Bge = ROL64in128(Agu, 20);
162     Aka = _mm_xor_si128(Aka, Da);
163     Bgi = ROL64in128(Aka, 3);
164     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
165     Ca = _mm_xor_si128(Ca, Ega);
166     Ame = _mm_xor_si128(Ame, De);
167     Bgo = ROL64in128(Ame, 45);
168     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
169     Ce = _mm_xor_si128(Ce, Ege);
170     Asi = _mm_xor_si128(Asi, Di);
171     Bgu = ROL64in128(Asi, 61);
172     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
173     Ci = _mm_xor_si128(Ci, Egi);
174     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
175     Co = _mm_xor_si128(Co, Ego);
176     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
177     Cu = _mm_xor_si128(Cu, Egu);
178     Abe = _mm_xor_si128(Abe, De);
179     Bka = ROL64in128(Abe, 1);
180     Agi = _mm_xor_si128(Agi, Di);
181     Bke = ROL64in128(Agi, 6);
182     Ako = _mm_xor_si128(Ako, Do);
183     Bki = ROL64in128(Ako, 25);
184     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
185     Ca = _mm_xor_si128(Ca, Eka);
186     Amu = _mm_xor_si128(Amu, Du);
187     Bko = ROL64in128_8(Amu);
188     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
189     Ce = _mm_xor_si128(Ce, Eke);
190     Asa = _mm_xor_si128(Asa, Da);
191     Bku = ROL64in128(Asa, 18);
192     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
193     Ci = _mm_xor_si128(Ci, Eki);
194     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
195     Co = _mm_xor_si128(Co, Eko);
196     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
197     Cu = _mm_xor_si128(Cu, Eku);
198     Abu = _mm_xor_si128(Abu, Du);
199     Bma = ROL64in128(Abu, 27);
200     Aga = _mm_xor_si128(Aga, Da);
201     Bme = ROL64in128(Aga, 36);
202     Ake = _mm_xor_si128(Ake, De);
203     Bmi = ROL64in128(Ake, 10);
204     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
205     Ca = _mm_xor_si128(Ca, Ema);
206     Ami = _mm_xor_si128(Ami, Di);
207     Bmo = ROL64in128(Ami, 15);
208     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
209     Ce = _mm_xor_si128(Ce, Eme);
210     Aso = _mm_xor_si128(Aso, Do);
211     Bmu = ROL64in128_56(Aso);
212     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
213     Ci = _mm_xor_si128(Ci, Emi);
214     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
215     Co = _mm_xor_si128(Co, Emo);
216     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
217     Cu = _mm_xor_si128(Cu, Emu);
218     Abi = _mm_xor_si128(Abi, Di);
219     Bsa = ROL64in128(Abi, 62);
220     Ago = _mm_xor_si128(Ago, Do);
221     Bse = ROL64in128(Ago, 55);
222     Aku = _mm_xor_si128(Aku, Du);
223     Bsi = ROL64in128(Aku, 39);
224     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
225     Ca = _mm_xor_si128(Ca, Esa);
226     Ama = _mm_xor_si128(Ama, Da);
227     Bso = ROL64in128(Ama, 41);
228     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
229     Ce = _mm_xor_si128(Ce, Ese);
230     Ase = _mm_xor_si128(Ase, De);
231     Bsu = ROL64in128(Ase, 2);
232     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
233     Ci = _mm_xor_si128(Ci, Esi);
234     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
235     Co = _mm_xor_si128(Co, Eso);
236     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
237     Cu = _mm_xor_si128(Cu, Esu);
238     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
239     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
240     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
241     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
242     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
243     Eba = _mm_xor_si128(Eba, Da);
244     Bba = Eba;
245     Ege = _mm_xor_si128(Ege, De);
246     Bbe = ROL64in128(Ege, 44);
247     Eki = _mm_xor_si128(Eki, Di);
248     Bbi = ROL64in128(Eki, 43);
249     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
250     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[1]));
251     Ca = Aba;
252     Emo = _mm_xor_si128(Emo, Do);
253     Bbo = ROL64in128(Emo, 21);
254     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
255     Ce = Abe;
256     Esu = _mm_xor_si128(Esu, Du);
257     Bbu = ROL64in128(Esu, 14);
258     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
259     Ci = Abi;
260     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
261     Co = Abo;
262     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
263     Cu = Abu;
264     Ebo = _mm_xor_si128(Ebo, Do);
265     Bga = ROL64in128(Ebo, 28);
266     Egu = _mm_xor_si128(Egu, Du);
267     Bge = ROL64in128(Egu, 20);
268     Eka = _mm_xor_si128(Eka, Da);
269     Bgi = ROL64in128(Eka, 3);
270     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
271     Ca = _mm_xor_si128(Ca, Aga);
272     Eme = _mm_xor_si128(Eme, De);
273     Bgo = ROL64in128(Eme, 45);
274     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
275     Ce = _mm_xor_si128(Ce, Age);
276     Esi = _mm_xor_si128(Esi, Di);
277     Bgu = ROL64in128(Esi, 61);
278     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
279     Ci = _mm_xor_si128(Ci, Agi);
280     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
281     Co = _mm_xor_si128(Co, Ago);
282     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
283     Cu = _mm_xor_si128(Cu, Agu);
284     Ebe = _mm_xor_si128(Ebe, De);
285     Bka = ROL64in128(Ebe, 1);
286     Egi = _mm_xor_si128(Egi, Di);
287     Bke = ROL64in128(Egi, 6);
288     Eko = _mm_xor_si128(Eko, Do);
289     Bki = ROL64in128(Eko, 25);
290     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
291     Ca = _mm_xor_si128(Ca, Aka);
292     Emu = _mm_xor_si128(Emu, Du);
293     Bko = ROL64in128_8(Emu);
294     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
295     Ce = _mm_xor_si128(Ce, Ake);
296     Esa = _mm_xor_si128(Esa, Da);
297     Bku = ROL64in128(Esa, 18);
298     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
299     Ci = _mm_xor_si128(Ci, Aki);
300     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
301     Co = _mm_xor_si128(Co, Ako);
302     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
303     Cu = _mm_xor_si128(Cu, Aku);
304     Ebu = _mm_xor_si128(Ebu, Du);
305     Bma = ROL64in128(Ebu, 27);
306     Ega = _mm_xor_si128(Ega, Da);
307     Bme = ROL64in128(Ega, 36);
308     Eke = _mm_xor_si128(Eke, De);
309     Bmi = ROL64in128(Eke, 10);
310     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
311     Ca = _mm_xor_si128(Ca, Ama);
312     Emi = _mm_xor_si128(Emi, Di);
313     Bmo = ROL64in128(Emi, 15);
314     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
315     Ce = _mm_xor_si128(Ce, Ame);
316     Eso = _mm_xor_si128(Eso, Do);
317     Bmu = ROL64in128_56(Eso);
318     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
319     Ci = _mm_xor_si128(Ci, Ami);
320     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
321     Co = _mm_xor_si128(Co, Amo);
322     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
323     Cu = _mm_xor_si128(Cu, Amu);
324     Ebi = _mm_xor_si128(Ebi, Di);
325     Bsa = ROL64in128(Ebi, 62);
326     Ego = _mm_xor_si128(Ego, Do);
327     Bse = ROL64in128(Ego, 55);
328     Eku = _mm_xor_si128(Eku, Du);
329     Bsi = ROL64in128(Eku, 39);
330     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
331     Ca = _mm_xor_si128(Ca, Asa);
332     Ema = _mm_xor_si128(Ema, Da);
333     Bso = ROL64in128(Ema, 41);
334     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
335     Ce = _mm_xor_si128(Ce, Ase);
336     Ese = _mm_xor_si128(Ese, De);
337     Bsu = ROL64in128(Ese, 2);
338     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
339     Ci = _mm_xor_si128(Ci, Asi);
340     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
341     Co = _mm_xor_si128(Co, Aso);
342     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
343     Cu = _mm_xor_si128(Cu, Asu);
344     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
345     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
346     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
347     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
348     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
349     Aba = _mm_xor_si128(Aba, Da);
350     Bba = Aba;
351     Age = _mm_xor_si128(Age, De);
352     Bbe = ROL64in128(Age, 44);
353     Aki = _mm_xor_si128(Aki, Di);
354     Bbi = ROL64in128(Aki, 43);
355     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
356     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[2]));
357     Ca = Eba;
358     Amo = _mm_xor_si128(Amo, Do);
359     Bbo = ROL64in128(Amo, 21);
360     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
361     Ce = Ebe;
362     Asu = _mm_xor_si128(Asu, Du);
363     Bbu = ROL64in128(Asu, 14);
364     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
365     Ci = Ebi;
366     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
367     Co = Ebo;
368     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
369     Cu = Ebu;
370     Abo = _mm_xor_si128(Abo, Do);
371     Bga = ROL64in128(Abo, 28);
372     Agu = _mm_xor_si128(Agu, Du);
373     Bge = ROL64in128(Agu, 20);
374     Aka = _mm_xor_si128(Aka, Da);
375     Bgi = ROL64in128(Aka, 3);
376     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
377     Ca = _mm_xor_si128(Ca, Ega);
378     Ame = _mm_xor_si128(Ame, De);
379     Bgo = ROL64in128(Ame, 45);
380     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
381     Ce = _mm_xor_si128(Ce, Ege);
382     Asi = _mm_xor_si128(Asi, Di);
383     Bgu = ROL64in128(Asi, 61);
384     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
385     Ci = _mm_xor_si128(Ci, Egi);
386     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
387     Co = _mm_xor_si128(Co, Ego);
388     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
389     Cu = _mm_xor_si128(Cu, Egu);
390     Abe = _mm_xor_si128(Abe, De);
391     Bka = ROL64in128(Abe, 1);
392     Agi = _mm_xor_si128(Agi, Di);
393     Bke = ROL64in128(Agi, 6);
394     Ako = _mm_xor_si128(Ako, Do);
395     Bki = ROL64in128(Ako, 25);
396     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
397     Ca = _mm_xor_si128(Ca, Eka);
398     Amu = _mm_xor_si128(Amu, Du);
399     Bko = ROL64in128_8(Amu);
400     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
401     Ce = _mm_xor_si128(Ce, Eke);
402     Asa = _mm_xor_si128(Asa, Da);
403     Bku = ROL64in128(Asa, 18);
404     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
405     Ci = _mm_xor_si128(Ci, Eki);
406     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
407     Co = _mm_xor_si128(Co, Eko);
408     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
409     Cu = _mm_xor_si128(Cu, Eku);
410     Abu = _mm_xor_si128(Abu, Du);
411     Bma = ROL64in128(Abu, 27);
412     Aga = _mm_xor_si128(Aga, Da);
413     Bme = ROL64in128(Aga, 36);
414     Ake = _mm_xor_si128(Ake, De);
415     Bmi = ROL64in128(Ake, 10);
416     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
417     Ca = _mm_xor_si128(Ca, Ema);
418     Ami = _mm_xor_si128(Ami, Di);
419     Bmo = ROL64in128(Ami, 15);
420     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
421     Ce = _mm_xor_si128(Ce, Eme);
422     Aso = _mm_xor_si128(Aso, Do);
423     Bmu = ROL64in128_56(Aso);
424     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
425     Ci = _mm_xor_si128(Ci, Emi);
426     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
427     Co = _mm_xor_si128(Co, Emo);
428     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
429     Cu = _mm_xor_si128(Cu, Emu);
430     Abi = _mm_xor_si128(Abi, Di);
431     Bsa = ROL64in128(Abi, 62);
432     Ago = _mm_xor_si128(Ago, Do);
433     Bse = ROL64in128(Ago, 55);
434     Aku = _mm_xor_si128(Aku, Du);
435     Bsi = ROL64in128(Aku, 39);
436     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
437     Ca = _mm_xor_si128(Ca, Esa);
438     Ama = _mm_xor_si128(Ama, Da);
439     Bso = ROL64in128(Ama, 41);
440     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
441     Ce = _mm_xor_si128(Ce, Ese);
442     Ase = _mm_xor_si128(Ase, De);
443     Bsu = ROL64in128(Ase, 2);
444     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
445     Ci = _mm_xor_si128(Ci, Esi);
446     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
447     Co = _mm_xor_si128(Co, Eso);
448     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
449     Cu = _mm_xor_si128(Cu, Esu);
450     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
451     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
452     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
453     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
454     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
455     Eba = _mm_xor_si128(Eba, Da);
456     Bba = Eba;
457     Ege = _mm_xor_si128(Ege, De);
458     Bbe = ROL64in128(Ege, 44);
459     Eki = _mm_xor_si128(Eki, Di);
460     Bbi = ROL64in128(Eki, 43);
461     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
462     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[3]));
463     Ca = Aba;
464     Emo = _mm_xor_si128(Emo, Do);
465     Bbo = ROL64in128(Emo, 21);
466     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
467     Ce = Abe;
468     Esu = _mm_xor_si128(Esu, Du);
469     Bbu = ROL64in128(Esu, 14);
470     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
471     Ci = Abi;
472     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
473     Co = Abo;
474     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
475     Cu = Abu;
476     Ebo = _mm_xor_si128(Ebo, Do);
477     Bga = ROL64in128(Ebo, 28);
478     Egu = _mm_xor_si128(Egu, Du);
479     Bge = ROL64in128(Egu, 20);
480     Eka = _mm_xor_si128(Eka, Da);
481     Bgi = ROL64in128(Eka, 3);
482     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
483     Ca = _mm_xor_si128(Ca, Aga);
484     Eme = _mm_xor_si128(Eme, De);
485     Bgo = ROL64in128(Eme, 45);
486     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
487     Ce = _mm_xor_si128(Ce, Age);
488     Esi = _mm_xor_si128(Esi, Di);
489     Bgu = ROL64in128(Esi, 61);
490     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
491     Ci = _mm_xor_si128(Ci, Agi);
492     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
493     Co = _mm_xor_si128(Co, Ago);
494     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
495     Cu = _mm_xor_si128(Cu, Agu);
496     Ebe = _mm_xor_si128(Ebe, De);
497     Bka = ROL64in128(Ebe, 1);
498     Egi = _mm_xor_si128(Egi, Di);
499     Bke = ROL64in128(Egi, 6);
500     Eko = _mm_xor_si128(Eko, Do);
501     Bki = ROL64in128(Eko, 25);
502     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
503     Ca = _mm_xor_si128(Ca, Aka);
504     Emu = _mm_xor_si128(Emu, Du);
505     Bko = ROL64in128_8(Emu);
506     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
507     Ce = _mm_xor_si128(Ce, Ake);
508     Esa = _mm_xor_si128(Esa, Da);
509     Bku = ROL64in128(Esa, 18);
510     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
511     Ci = _mm_xor_si128(Ci, Aki);
512     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
513     Co = _mm_xor_si128(Co, Ako);
514     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
515     Cu = _mm_xor_si128(Cu, Aku);
516     Ebu = _mm_xor_si128(Ebu, Du);
517     Bma = ROL64in128(Ebu, 27);
518     Ega = _mm_xor_si128(Ega, Da);
519     Bme = ROL64in128(Ega, 36);
520     Eke = _mm_xor_si128(Eke, De);
521     Bmi = ROL64in128(Eke, 10);
522     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
523     Ca = _mm_xor_si128(Ca, Ama);
524     Emi = _mm_xor_si128(Emi, Di);
525     Bmo = ROL64in128(Emi, 15);
526     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
527     Ce = _mm_xor_si128(Ce, Ame);
528     Eso = _mm_xor_si128(Eso, Do);
529     Bmu = ROL64in128_56(Eso);
530     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
531     Ci = _mm_xor_si128(Ci, Ami);
532     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
533     Co = _mm_xor_si128(Co, Amo);
534     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
535     Cu = _mm_xor_si128(Cu, Amu);
536     Ebi = _mm_xor_si128(Ebi, Di);
537     Bsa = ROL64in128(Ebi, 62);
538     Ego = _mm_xor_si128(Ego, Do);
539     Bse = ROL64in128(Ego, 55);
540     Eku = _mm_xor_si128(Eku, Du);
541     Bsi = ROL64in128(Eku, 39);
542     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
543     Ca = _mm_xor_si128(Ca, Asa);
544     Ema = _mm_xor_si128(Ema, Da);
545     Bso = ROL64in128(Ema, 41);
546     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
547     Ce = _mm_xor_si128(Ce, Ase);
548     Ese = _mm_xor_si128(Ese, De);
549     Bsu = ROL64in128(Ese, 2);
550     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
551     Ci = _mm_xor_si128(Ci, Asi);
552     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
553     Co = _mm_xor_si128(Co, Aso);
554     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
555     Cu = _mm_xor_si128(Cu, Asu);
556     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
557     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
558     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
559     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
560     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
561     Aba = _mm_xor_si128(Aba, Da);
562     Bba = Aba;
563     Age = _mm_xor_si128(Age, De);
564     Bbe = ROL64in128(Age, 44);
565     Aki = _mm_xor_si128(Aki, Di);
566     Bbi = ROL64in128(Aki, 43);
567     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
568     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[4]));
569     Ca = Eba;
570     Amo = _mm_xor_si128(Amo, Do);
571     Bbo = ROL64in128(Amo, 21);
572     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
573     Ce = Ebe;
574     Asu = _mm_xor_si128(Asu, Du);
575     Bbu = ROL64in128(Asu, 14);
576     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
577     Ci = Ebi;
578     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
579     Co = Ebo;
580     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
581     Cu = Ebu;
582     Abo = _mm_xor_si128(Abo, Do);
583     Bga = ROL64in128(Abo, 28);
584     Agu = _mm_xor_si128(Agu, Du);
585     Bge = ROL64in128(Agu, 20);
586     Aka = _mm_xor_si128(Aka, Da);
587     Bgi = ROL64in128(Aka, 3);
588     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
589     Ca = _mm_xor_si128(Ca, Ega);
590     Ame = _mm_xor_si128(Ame, De);
591     Bgo = ROL64in128(Ame, 45);
592     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
593     Ce = _mm_xor_si128(Ce, Ege);
594     Asi = _mm_xor_si128(Asi, Di);
595     Bgu = ROL64in128(Asi, 61);
596     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
597     Ci = _mm_xor_si128(Ci, Egi);
598     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
599     Co = _mm_xor_si128(Co, Ego);
600     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
601     Cu = _mm_xor_si128(Cu, Egu);
602     Abe = _mm_xor_si128(Abe, De);
603     Bka = ROL64in128(Abe, 1);
604     Agi = _mm_xor_si128(Agi, Di);
605     Bke = ROL64in128(Agi, 6);
606     Ako = _mm_xor_si128(Ako, Do);
607     Bki = ROL64in128(Ako, 25);
608     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
609     Ca = _mm_xor_si128(Ca, Eka);
610     Amu = _mm_xor_si128(Amu, Du);
611     Bko = ROL64in128_8(Amu);
612     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
613     Ce = _mm_xor_si128(Ce, Eke);
614     Asa = _mm_xor_si128(Asa, Da);
615     Bku = ROL64in128(Asa, 18);
616     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
617     Ci = _mm_xor_si128(Ci, Eki);
618     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
619     Co = _mm_xor_si128(Co, Eko);
620     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
621     Cu = _mm_xor_si128(Cu, Eku);
622     Abu = _mm_xor_si128(Abu, Du);
623     Bma = ROL64in128(Abu, 27);
624     Aga = _mm_xor_si128(Aga, Da);
625     Bme = ROL64in128(Aga, 36);
626     Ake = _mm_xor_si128(Ake, De);
627     Bmi = ROL64in128(Ake, 10);
628     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
629     Ca = _mm_xor_si128(Ca, Ema);
630     Ami = _mm_xor_si128(Ami, Di);
631     Bmo = ROL64in128(Ami, 15);
632     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
633     Ce = _mm_xor_si128(Ce, Eme);
634     Aso = _mm_xor_si128(Aso, Do);
635     Bmu = ROL64in128_56(Aso);
636     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
637     Ci = _mm_xor_si128(Ci, Emi);
638     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
639     Co = _mm_xor_si128(Co, Emo);
640     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
641     Cu = _mm_xor_si128(Cu, Emu);
642     Abi = _mm_xor_si128(Abi, Di);
643     Bsa = ROL64in128(Abi, 62);
644     Ago = _mm_xor_si128(Ago, Do);
645     Bse = ROL64in128(Ago, 55);
646     Aku = _mm_xor_si128(Aku, Du);
647     Bsi = ROL64in128(Aku, 39);
648     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
649     Ca = _mm_xor_si128(Ca, Esa);
650     Ama = _mm_xor_si128(Ama, Da);
651     Bso = ROL64in128(Ama, 41);
652     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
653     Ce = _mm_xor_si128(Ce, Ese);
654     Ase = _mm_xor_si128(Ase, De);
655     Bsu = ROL64in128(Ase, 2);
656     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
657     Ci = _mm_xor_si128(Ci, Esi);
658     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
659     Co = _mm_xor_si128(Co, Eso);
660     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
661     Cu = _mm_xor_si128(Cu, Esu);
662     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
663     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
664     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
665     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
666     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
667     Eba = _mm_xor_si128(Eba, Da);
668     Bba = Eba;
669     Ege = _mm_xor_si128(Ege, De);
670     Bbe = ROL64in128(Ege, 44);
671     Eki = _mm_xor_si128(Eki, Di);
672     Bbi = ROL64in128(Eki, 43);
673     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
674     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[5]));
675     Ca = Aba;
676     Emo = _mm_xor_si128(Emo, Do);
677     Bbo = ROL64in128(Emo, 21);
678     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
679     Ce = Abe;
680     Esu = _mm_xor_si128(Esu, Du);
681     Bbu = ROL64in128(Esu, 14);
682     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
683     Ci = Abi;
684     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
685     Co = Abo;
686     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
687     Cu = Abu;
688     Ebo = _mm_xor_si128(Ebo, Do);
689     Bga = ROL64in128(Ebo, 28);
690     Egu = _mm_xor_si128(Egu, Du);
691     Bge = ROL64in128(Egu, 20);
692     Eka = _mm_xor_si128(Eka, Da);
693     Bgi = ROL64in128(Eka, 3);
694     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
695     Ca = _mm_xor_si128(Ca, Aga);
696     Eme = _mm_xor_si128(Eme, De);
697     Bgo = ROL64in128(Eme, 45);
698     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
699     Ce = _mm_xor_si128(Ce, Age);
700     Esi = _mm_xor_si128(Esi, Di);
701     Bgu = ROL64in128(Esi, 61);
702     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
703     Ci = _mm_xor_si128(Ci, Agi);
704     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
705     Co = _mm_xor_si128(Co, Ago);
706     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
707     Cu = _mm_xor_si128(Cu, Agu);
708     Ebe = _mm_xor_si128(Ebe, De);
709     Bka = ROL64in128(Ebe, 1);
710     Egi = _mm_xor_si128(Egi, Di);
711     Bke = ROL64in128(Egi, 6);
712     Eko = _mm_xor_si128(Eko, Do);
713     Bki = ROL64in128(Eko, 25);
714     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
715     Ca = _mm_xor_si128(Ca, Aka);
716     Emu = _mm_xor_si128(Emu, Du);
717     Bko = ROL64in128_8(Emu);
718     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
719     Ce = _mm_xor_si128(Ce, Ake);
720     Esa = _mm_xor_si128(Esa, Da);
721     Bku = ROL64in128(Esa, 18);
722     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
723     Ci = _mm_xor_si128(Ci, Aki);
724     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
725     Co = _mm_xor_si128(Co, Ako);
726     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
727     Cu = _mm_xor_si128(Cu, Aku);
728     Ebu = _mm_xor_si128(Ebu, Du);
729     Bma = ROL64in128(Ebu, 27);
730     Ega = _mm_xor_si128(Ega, Da);
731     Bme = ROL64in128(Ega, 36);
732     Eke = _mm_xor_si128(Eke, De);
733     Bmi = ROL64in128(Eke, 10);
734     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
735     Ca = _mm_xor_si128(Ca, Ama);
736     Emi = _mm_xor_si128(Emi, Di);
737     Bmo = ROL64in128(Emi, 15);
738     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
739     Ce = _mm_xor_si128(Ce, Ame);
740     Eso = _mm_xor_si128(Eso, Do);
741     Bmu = ROL64in128_56(Eso);
742     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
743     Ci = _mm_xor_si128(Ci, Ami);
744     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
745     Co = _mm_xor_si128(Co, Amo);
746     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
747     Cu = _mm_xor_si128(Cu, Amu);
748     Ebi = _mm_xor_si128(Ebi, Di);
749     Bsa = ROL64in128(Ebi, 62);
750     Ego = _mm_xor_si128(Ego, Do);
751     Bse = ROL64in128(Ego, 55);
752     Eku = _mm_xor_si128(Eku, Du);
753     Bsi = ROL64in128(Eku, 39);
754     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
755     Ca = _mm_xor_si128(Ca, Asa);
756     Ema = _mm_xor_si128(Ema, Da);
757     Bso = ROL64in128(Ema, 41);
758     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
759     Ce = _mm_xor_si128(Ce, Ase);
760     Ese = _mm_xor_si128(Ese, De);
761     Bsu = ROL64in128(Ese, 2);
762     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
763     Ci = _mm_xor_si128(Ci, Asi);
764     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
765     Co = _mm_xor_si128(Co, Aso);
766     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
767     Cu = _mm_xor_si128(Cu, Asu);
768     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
769     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
770     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
771     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
772     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
773     Aba = _mm_xor_si128(Aba, Da);
774     Bba = Aba;
775     Age = _mm_xor_si128(Age, De);
776     Bbe = ROL64in128(Age, 44);
777     Aki = _mm_xor_si128(Aki, Di);
778     Bbi = ROL64in128(Aki, 43);
779     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
780     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[6]));
781     Ca = Eba;
782     Amo = _mm_xor_si128(Amo, Do);
783     Bbo = ROL64in128(Amo, 21);
784     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
785     Ce = Ebe;
786     Asu = _mm_xor_si128(Asu, Du);
787     Bbu = ROL64in128(Asu, 14);
788     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
789     Ci = Ebi;
790     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
791     Co = Ebo;
792     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
793     Cu = Ebu;
794     Abo = _mm_xor_si128(Abo, Do);
795     Bga = ROL64in128(Abo, 28);
796     Agu = _mm_xor_si128(Agu, Du);
797     Bge = ROL64in128(Agu, 20);
798     Aka = _mm_xor_si128(Aka, Da);
799     Bgi = ROL64in128(Aka, 3);
800     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
801     Ca = _mm_xor_si128(Ca, Ega);
802     Ame = _mm_xor_si128(Ame, De);
803     Bgo = ROL64in128(Ame, 45);
804     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
805     Ce = _mm_xor_si128(Ce, Ege);
806     Asi = _mm_xor_si128(Asi, Di);
807     Bgu = ROL64in128(Asi, 61);
808     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
809     Ci = _mm_xor_si128(Ci, Egi);
810     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
811     Co = _mm_xor_si128(Co, Ego);
812     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
813     Cu = _mm_xor_si128(Cu, Egu);
814     Abe = _mm_xor_si128(Abe, De);
815     Bka = ROL64in128(Abe, 1);
816     Agi = _mm_xor_si128(Agi, Di);
817     Bke = ROL64in128(Agi, 6);
818     Ako = _mm_xor_si128(Ako, Do);
819     Bki = ROL64in128(Ako, 25);
820     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
821     Ca = _mm_xor_si128(Ca, Eka);
822     Amu = _mm_xor_si128(Amu, Du);
823     Bko = ROL64in128_8(Amu);
824     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
825     Ce = _mm_xor_si128(Ce, Eke);
826     Asa = _mm_xor_si128(Asa, Da);
827     Bku = ROL64in128(Asa, 18);
828     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
829     Ci = _mm_xor_si128(Ci, Eki);
830     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
831     Co = _mm_xor_si128(Co, Eko);
832     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
833     Cu = _mm_xor_si128(Cu, Eku);
834     Abu = _mm_xor_si128(Abu, Du);
835     Bma = ROL64in128(Abu, 27);
836     Aga = _mm_xor_si128(Aga, Da);
837     Bme = ROL64in128(Aga, 36);
838     Ake = _mm_xor_si128(Ake, De);
839     Bmi = ROL64in128(Ake, 10);
840     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
841     Ca = _mm_xor_si128(Ca, Ema);
842     Ami = _mm_xor_si128(Ami, Di);
843     Bmo = ROL64in128(Ami, 15);
844     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
845     Ce = _mm_xor_si128(Ce, Eme);
846     Aso = _mm_xor_si128(Aso, Do);
847     Bmu = ROL64in128_56(Aso);
848     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
849     Ci = _mm_xor_si128(Ci, Emi);
850     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
851     Co = _mm_xor_si128(Co, Emo);
852     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
853     Cu = _mm_xor_si128(Cu, Emu);
854     Abi = _mm_xor_si128(Abi, Di);
855     Bsa = ROL64in128(Abi, 62);
856     Ago = _mm_xor_si128(Ago, Do);
857     Bse = ROL64in128(Ago, 55);
858     Aku = _mm_xor_si128(Aku, Du);
859     Bsi = ROL64in128(Aku, 39);
860     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
861     Ca = _mm_xor_si128(Ca, Esa);
862     Ama = _mm_xor_si128(Ama, Da);
863     Bso = ROL64in128(Ama, 41);
864     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
865     Ce = _mm_xor_si128(Ce, Ese);
866     Ase = _mm_xor_si128(Ase, De);
867     Bsu = ROL64in128(Ase, 2);
868     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
869     Ci = _mm_xor_si128(Ci, Esi);
870     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
871     Co = _mm_xor_si128(Co, Eso);
872     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
873     Cu = _mm_xor_si128(Cu, Esu);
874     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
875     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
876     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
877     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
878     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
879     Eba = _mm_xor_si128(Eba, Da);
880     Bba = Eba;
881     Ege = _mm_xor_si128(Ege, De);
882     Bbe = ROL64in128(Ege, 44);
883     Eki = _mm_xor_si128(Eki, Di);
884     Bbi = ROL64in128(Eki, 43);
885     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
886     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[7]));
887     Ca = Aba;
888     Emo = _mm_xor_si128(Emo, Do);
889     Bbo = ROL64in128(Emo, 21);
890     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
891     Ce = Abe;
892     Esu = _mm_xor_si128(Esu, Du);
893     Bbu = ROL64in128(Esu, 14);
894     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
895     Ci = Abi;
896     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
897     Co = Abo;
898     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
899     Cu = Abu;
900     Ebo = _mm_xor_si128(Ebo, Do);
901     Bga = ROL64in128(Ebo, 28);
902     Egu = _mm_xor_si128(Egu, Du);
903     Bge = ROL64in128(Egu, 20);
904     Eka = _mm_xor_si128(Eka, Da);
905     Bgi = ROL64in128(Eka, 3);
906     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
907     Ca = _mm_xor_si128(Ca, Aga);
908     Eme = _mm_xor_si128(Eme, De);
909     Bgo = ROL64in128(Eme, 45);
910     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
911     Ce = _mm_xor_si128(Ce, Age);
912     Esi = _mm_xor_si128(Esi, Di);
913     Bgu = ROL64in128(Esi, 61);
914     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
915     Ci = _mm_xor_si128(Ci, Agi);
916     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
917     Co = _mm_xor_si128(Co, Ago);
918     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
919     Cu = _mm_xor_si128(Cu, Agu);
920     Ebe = _mm_xor_si128(Ebe, De);
921     Bka = ROL64in128(Ebe, 1);
922     Egi = _mm_xor_si128(Egi, Di);
923     Bke = ROL64in128(Egi, 6);
924     Eko = _mm_xor_si128(Eko, Do);
925     Bki = ROL64in128(Eko, 25);
926     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
927     Ca = _mm_xor_si128(Ca, Aka);
928     Emu = _mm_xor_si128(Emu, Du);
929     Bko = ROL64in128_8(Emu);
930     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
931     Ce = _mm_xor_si128(Ce, Ake);
932     Esa = _mm_xor_si128(Esa, Da);
933     Bku = ROL64in128(Esa, 18);
934     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
935     Ci = _mm_xor_si128(Ci, Aki);
936     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
937     Co = _mm_xor_si128(Co, Ako);
938     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
939     Cu = _mm_xor_si128(Cu, Aku);
940     Ebu = _mm_xor_si128(Ebu, Du);
941     Bma = ROL64in128(Ebu, 27);
942     Ega = _mm_xor_si128(Ega, Da);
943     Bme = ROL64in128(Ega, 36);
944     Eke = _mm_xor_si128(Eke, De);
945     Bmi = ROL64in128(Eke, 10);
946     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
947     Ca = _mm_xor_si128(Ca, Ama);
948     Emi = _mm_xor_si128(Emi, Di);
949     Bmo = ROL64in128(Emi, 15);
950     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
951     Ce = _mm_xor_si128(Ce, Ame);
952     Eso = _mm_xor_si128(Eso, Do);
953     Bmu = ROL64in128_56(Eso);
954     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
955     Ci = _mm_xor_si128(Ci, Ami);
956     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
957     Co = _mm_xor_si128(Co, Amo);
958     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
959     Cu = _mm_xor_si128(Cu, Amu);
960     Ebi = _mm_xor_si128(Ebi, Di);
961     Bsa = ROL64in128(Ebi, 62);
962     Ego = _mm_xor_si128(Ego, Do);
963     Bse = ROL64in128(Ego, 55);
964     Eku = _mm_xor_si128(Eku, Du);
965     Bsi = ROL64in128(Eku, 39);
966     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
967     Ca = _mm_xor_si128(Ca, Asa);
968     Ema = _mm_xor_si128(Ema, Da);
969     Bso = ROL64in128(Ema, 41);
970     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
971     Ce = _mm_xor_si128(Ce, Ase);
972     Ese = _mm_xor_si128(Ese, De);
973     Bsu = ROL64in128(Ese, 2);
974     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
975     Ci = _mm_xor_si128(Ci, Asi);
976     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
977     Co = _mm_xor_si128(Co, Aso);
978     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
979     Cu = _mm_xor_si128(Cu, Asu);
980     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
981     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
982     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
983     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
984     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
985     Aba = _mm_xor_si128(Aba, Da);
986     Bba = Aba;
987     Age = _mm_xor_si128(Age, De);
988     Bbe = ROL64in128(Age, 44);
989     Aki = _mm_xor_si128(Aki, Di);
990     Bbi = ROL64in128(Aki, 43);
991     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
992     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[8]));
993     Ca = Eba;
994     Amo = _mm_xor_si128(Amo, Do);
995     Bbo = ROL64in128(Amo, 21);
996     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
997     Ce = Ebe;
998     Asu = _mm_xor_si128(Asu, Du);
999     Bbu = ROL64in128(Asu, 14);
1000     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1001     Ci = Ebi;
1002     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1003     Co = Ebo;
1004     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1005     Cu = Ebu;
1006     Abo = _mm_xor_si128(Abo, Do);
1007     Bga = ROL64in128(Abo, 28);
1008     Agu = _mm_xor_si128(Agu, Du);
1009     Bge = ROL64in128(Agu, 20);
1010     Aka = _mm_xor_si128(Aka, Da);
1011     Bgi = ROL64in128(Aka, 3);
1012     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1013     Ca = _mm_xor_si128(Ca, Ega);
1014     Ame = _mm_xor_si128(Ame, De);
1015     Bgo = ROL64in128(Ame, 45);
1016     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1017     Ce = _mm_xor_si128(Ce, Ege);
1018     Asi = _mm_xor_si128(Asi, Di);
1019     Bgu = ROL64in128(Asi, 61);
1020     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1021     Ci = _mm_xor_si128(Ci, Egi);
1022     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1023     Co = _mm_xor_si128(Co, Ego);
1024     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1025     Cu = _mm_xor_si128(Cu, Egu);
1026     Abe = _mm_xor_si128(Abe, De);
1027     Bka = ROL64in128(Abe, 1);
1028     Agi = _mm_xor_si128(Agi, Di);
1029     Bke = ROL64in128(Agi, 6);
1030     Ako = _mm_xor_si128(Ako, Do);
1031     Bki = ROL64in128(Ako, 25);
1032     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1033     Ca = _mm_xor_si128(Ca, Eka);
1034     Amu = _mm_xor_si128(Amu, Du);
1035     Bko = ROL64in128_8(Amu);
1036     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1037     Ce = _mm_xor_si128(Ce, Eke);
1038     Asa = _mm_xor_si128(Asa, Da);
1039     Bku = ROL64in128(Asa, 18);
1040     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1041     Ci = _mm_xor_si128(Ci, Eki);
1042     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1043     Co = _mm_xor_si128(Co, Eko);
1044     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1045     Cu = _mm_xor_si128(Cu, Eku);
1046     Abu = _mm_xor_si128(Abu, Du);
1047     Bma = ROL64in128(Abu, 27);
1048     Aga = _mm_xor_si128(Aga, Da);
1049     Bme = ROL64in128(Aga, 36);
1050     Ake = _mm_xor_si128(Ake, De);
1051     Bmi = ROL64in128(Ake, 10);
1052     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1053     Ca = _mm_xor_si128(Ca, Ema);
1054     Ami = _mm_xor_si128(Ami, Di);
1055     Bmo = ROL64in128(Ami, 15);
1056     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1057     Ce = _mm_xor_si128(Ce, Eme);
1058     Aso = _mm_xor_si128(Aso, Do);
1059     Bmu = ROL64in128_56(Aso);
1060     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1061     Ci = _mm_xor_si128(Ci, Emi);
1062     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1063     Co = _mm_xor_si128(Co, Emo);
1064     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1065     Cu = _mm_xor_si128(Cu, Emu);
1066     Abi = _mm_xor_si128(Abi, Di);
1067     Bsa = ROL64in128(Abi, 62);
1068     Ago = _mm_xor_si128(Ago, Do);
1069     Bse = ROL64in128(Ago, 55);
1070     Aku = _mm_xor_si128(Aku, Du);
1071     Bsi = ROL64in128(Aku, 39);
1072     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1073     Ca = _mm_xor_si128(Ca, Esa);
1074     Ama = _mm_xor_si128(Ama, Da);
1075     Bso = ROL64in128(Ama, 41);
1076     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1077     Ce = _mm_xor_si128(Ce, Ese);
1078     Ase = _mm_xor_si128(Ase, De);
1079     Bsu = ROL64in128(Ase, 2);
1080     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1081     Ci = _mm_xor_si128(Ci, Esi);
1082     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1083     Co = _mm_xor_si128(Co, Eso);
1084     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1085     Cu = _mm_xor_si128(Cu, Esu);
1086     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1087     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1088     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1089     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1090     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1091     Eba = _mm_xor_si128(Eba, Da);
1092     Bba = Eba;
1093     Ege = _mm_xor_si128(Ege, De);
1094     Bbe = ROL64in128(Ege, 44);
1095     Eki = _mm_xor_si128(Eki, Di);
1096     Bbi = ROL64in128(Eki, 43);
1097     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1098     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[9]));
1099     Ca = Aba;
1100     Emo = _mm_xor_si128(Emo, Do);
1101     Bbo = ROL64in128(Emo, 21);
1102     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1103     Ce = Abe;
1104     Esu = _mm_xor_si128(Esu, Du);
1105     Bbu = ROL64in128(Esu, 14);
1106     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1107     Ci = Abi;
1108     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1109     Co = Abo;
1110     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1111     Cu = Abu;
1112     Ebo = _mm_xor_si128(Ebo, Do);
1113     Bga = ROL64in128(Ebo, 28);
1114     Egu = _mm_xor_si128(Egu, Du);
1115     Bge = ROL64in128(Egu, 20);
1116     Eka = _mm_xor_si128(Eka, Da);
1117     Bgi = ROL64in128(Eka, 3);
1118     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1119     Ca = _mm_xor_si128(Ca, Aga);
1120     Eme = _mm_xor_si128(Eme, De);
1121     Bgo = ROL64in128(Eme, 45);
1122     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1123     Ce = _mm_xor_si128(Ce, Age);
1124     Esi = _mm_xor_si128(Esi, Di);
1125     Bgu = ROL64in128(Esi, 61);
1126     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1127     Ci = _mm_xor_si128(Ci, Agi);
1128     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1129     Co = _mm_xor_si128(Co, Ago);
1130     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1131     Cu = _mm_xor_si128(Cu, Agu);
1132     Ebe = _mm_xor_si128(Ebe, De);
1133     Bka = ROL64in128(Ebe, 1);
1134     Egi = _mm_xor_si128(Egi, Di);
1135     Bke = ROL64in128(Egi, 6);
1136     Eko = _mm_xor_si128(Eko, Do);
1137     Bki = ROL64in128(Eko, 25);
1138     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1139     Ca = _mm_xor_si128(Ca, Aka);
1140     Emu = _mm_xor_si128(Emu, Du);
1141     Bko = ROL64in128_8(Emu);
1142     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1143     Ce = _mm_xor_si128(Ce, Ake);
1144     Esa = _mm_xor_si128(Esa, Da);
1145     Bku = ROL64in128(Esa, 18);
1146     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1147     Ci = _mm_xor_si128(Ci, Aki);
1148     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1149     Co = _mm_xor_si128(Co, Ako);
1150     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1151     Cu = _mm_xor_si128(Cu, Aku);
1152     Ebu = _mm_xor_si128(Ebu, Du);
1153     Bma = ROL64in128(Ebu, 27);
1154     Ega = _mm_xor_si128(Ega, Da);
1155     Bme = ROL64in128(Ega, 36);
1156     Eke = _mm_xor_si128(Eke, De);
1157     Bmi = ROL64in128(Eke, 10);
1158     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1159     Ca = _mm_xor_si128(Ca, Ama);
1160     Emi = _mm_xor_si128(Emi, Di);
1161     Bmo = ROL64in128(Emi, 15);
1162     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1163     Ce = _mm_xor_si128(Ce, Ame);
1164     Eso = _mm_xor_si128(Eso, Do);
1165     Bmu = ROL64in128_56(Eso);
1166     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1167     Ci = _mm_xor_si128(Ci, Ami);
1168     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1169     Co = _mm_xor_si128(Co, Amo);
1170     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1171     Cu = _mm_xor_si128(Cu, Amu);
1172     Ebi = _mm_xor_si128(Ebi, Di);
1173     Bsa = ROL64in128(Ebi, 62);
1174     Ego = _mm_xor_si128(Ego, Do);
1175     Bse = ROL64in128(Ego, 55);
1176     Eku = _mm_xor_si128(Eku, Du);
1177     Bsi = ROL64in128(Eku, 39);
1178     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1179     Ca = _mm_xor_si128(Ca, Asa);
1180     Ema = _mm_xor_si128(Ema, Da);
1181     Bso = ROL64in128(Ema, 41);
1182     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1183     Ce = _mm_xor_si128(Ce, Ase);
1184     Ese = _mm_xor_si128(Ese, De);
1185     Bsu = ROL64in128(Ese, 2);
1186     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1187     Ci = _mm_xor_si128(Ci, Asi);
1188     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1189     Co = _mm_xor_si128(Co, Aso);
1190     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1191     Cu = _mm_xor_si128(Cu, Asu);
1192     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1193     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1194     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1195     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1196     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1197     Aba = _mm_xor_si128(Aba, Da);
1198     Bba = Aba;
1199     Age = _mm_xor_si128(Age, De);
1200     Bbe = ROL64in128(Age, 44);
1201     Aki = _mm_xor_si128(Aki, Di);
1202     Bbi = ROL64in128(Aki, 43);
1203     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1204     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[10]));
1205     Ca = Eba;
1206     Amo = _mm_xor_si128(Amo, Do);
1207     Bbo = ROL64in128(Amo, 21);
1208     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1209     Ce = Ebe;
1210     Asu = _mm_xor_si128(Asu, Du);
1211     Bbu = ROL64in128(Asu, 14);
1212     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1213     Ci = Ebi;
1214     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1215     Co = Ebo;
1216     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1217     Cu = Ebu;
1218     Abo = _mm_xor_si128(Abo, Do);
1219     Bga = ROL64in128(Abo, 28);
1220     Agu = _mm_xor_si128(Agu, Du);
1221     Bge = ROL64in128(Agu, 20);
1222     Aka = _mm_xor_si128(Aka, Da);
1223     Bgi = ROL64in128(Aka, 3);
1224     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1225     Ca = _mm_xor_si128(Ca, Ega);
1226     Ame = _mm_xor_si128(Ame, De);
1227     Bgo = ROL64in128(Ame, 45);
1228     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1229     Ce = _mm_xor_si128(Ce, Ege);
1230     Asi = _mm_xor_si128(Asi, Di);
1231     Bgu = ROL64in128(Asi, 61);
1232     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1233     Ci = _mm_xor_si128(Ci, Egi);
1234     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1235     Co = _mm_xor_si128(Co, Ego);
1236     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1237     Cu = _mm_xor_si128(Cu, Egu);
1238     Abe = _mm_xor_si128(Abe, De);
1239     Bka = ROL64in128(Abe, 1);
1240     Agi = _mm_xor_si128(Agi, Di);
1241     Bke = ROL64in128(Agi, 6);
1242     Ako = _mm_xor_si128(Ako, Do);
1243     Bki = ROL64in128(Ako, 25);
1244     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1245     Ca = _mm_xor_si128(Ca, Eka);
1246     Amu = _mm_xor_si128(Amu, Du);
1247     Bko = ROL64in128_8(Amu);
1248     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1249     Ce = _mm_xor_si128(Ce, Eke);
1250     Asa = _mm_xor_si128(Asa, Da);
1251     Bku = ROL64in128(Asa, 18);
1252     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1253     Ci = _mm_xor_si128(Ci, Eki);
1254     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1255     Co = _mm_xor_si128(Co, Eko);
1256     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1257     Cu = _mm_xor_si128(Cu, Eku);
1258     Abu = _mm_xor_si128(Abu, Du);
1259     Bma = ROL64in128(Abu, 27);
1260     Aga = _mm_xor_si128(Aga, Da);
1261     Bme = ROL64in128(Aga, 36);
1262     Ake = _mm_xor_si128(Ake, De);
1263     Bmi = ROL64in128(Ake, 10);
1264     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1265     Ca = _mm_xor_si128(Ca, Ema);
1266     Ami = _mm_xor_si128(Ami, Di);
1267     Bmo = ROL64in128(Ami, 15);
1268     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1269     Ce = _mm_xor_si128(Ce, Eme);
1270     Aso = _mm_xor_si128(Aso, Do);
1271     Bmu = ROL64in128_56(Aso);
1272     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1273     Ci = _mm_xor_si128(Ci, Emi);
1274     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1275     Co = _mm_xor_si128(Co, Emo);
1276     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1277     Cu = _mm_xor_si128(Cu, Emu);
1278     Abi = _mm_xor_si128(Abi, Di);
1279     Bsa = ROL64in128(Abi, 62);
1280     Ago = _mm_xor_si128(Ago, Do);
1281     Bse = ROL64in128(Ago, 55);
1282     Aku = _mm_xor_si128(Aku, Du);
1283     Bsi = ROL64in128(Aku, 39);
1284     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1285     Ca = _mm_xor_si128(Ca, Esa);
1286     Ama = _mm_xor_si128(Ama, Da);
1287     Bso = ROL64in128(Ama, 41);
1288     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1289     Ce = _mm_xor_si128(Ce, Ese);
1290     Ase = _mm_xor_si128(Ase, De);
1291     Bsu = ROL64in128(Ase, 2);
1292     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1293     Ci = _mm_xor_si128(Ci, Esi);
1294     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1295     Co = _mm_xor_si128(Co, Eso);
1296     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1297     Cu = _mm_xor_si128(Cu, Esu);
1298     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1299     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1300     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1301     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1302     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1303     Eba = _mm_xor_si128(Eba, Da);
1304     Bba = Eba;
1305     Ege = _mm_xor_si128(Ege, De);
1306     Bbe = ROL64in128(Ege, 44);
1307     Eki = _mm_xor_si128(Eki, Di);
1308     Bbi = ROL64in128(Eki, 43);
1309     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1310     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[11]));
1311     Ca = Aba;
1312     Emo = _mm_xor_si128(Emo, Do);
1313     Bbo = ROL64in128(Emo, 21);
1314     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1315     Ce = Abe;
1316     Esu = _mm_xor_si128(Esu, Du);
1317     Bbu = ROL64in128(Esu, 14);
1318     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1319     Ci = Abi;
1320     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1321     Co = Abo;
1322     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1323     Cu = Abu;
1324     Ebo = _mm_xor_si128(Ebo, Do);
1325     Bga = ROL64in128(Ebo, 28);
1326     Egu = _mm_xor_si128(Egu, Du);
1327     Bge = ROL64in128(Egu, 20);
1328     Eka = _mm_xor_si128(Eka, Da);
1329     Bgi = ROL64in128(Eka, 3);
1330     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1331     Ca = _mm_xor_si128(Ca, Aga);
1332     Eme = _mm_xor_si128(Eme, De);
1333     Bgo = ROL64in128(Eme, 45);
1334     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1335     Ce = _mm_xor_si128(Ce, Age);
1336     Esi = _mm_xor_si128(Esi, Di);
1337     Bgu = ROL64in128(Esi, 61);
1338     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1339     Ci = _mm_xor_si128(Ci, Agi);
1340     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1341     Co = _mm_xor_si128(Co, Ago);
1342     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1343     Cu = _mm_xor_si128(Cu, Agu);
1344     Ebe = _mm_xor_si128(Ebe, De);
1345     Bka = ROL64in128(Ebe, 1);
1346     Egi = _mm_xor_si128(Egi, Di);
1347     Bke = ROL64in128(Egi, 6);
1348     Eko = _mm_xor_si128(Eko, Do);
1349     Bki = ROL64in128(Eko, 25);
1350     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1351     Ca = _mm_xor_si128(Ca, Aka);
1352     Emu = _mm_xor_si128(Emu, Du);
1353     Bko = ROL64in128_8(Emu);
1354     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1355     Ce = _mm_xor_si128(Ce, Ake);
1356     Esa = _mm_xor_si128(Esa, Da);
1357     Bku = ROL64in128(Esa, 18);
1358     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1359     Ci = _mm_xor_si128(Ci, Aki);
1360     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1361     Co = _mm_xor_si128(Co, Ako);
1362     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1363     Cu = _mm_xor_si128(Cu, Aku);
1364     Ebu = _mm_xor_si128(Ebu, Du);
1365     Bma = ROL64in128(Ebu, 27);
1366     Ega = _mm_xor_si128(Ega, Da);
1367     Bme = ROL64in128(Ega, 36);
1368     Eke = _mm_xor_si128(Eke, De);
1369     Bmi = ROL64in128(Eke, 10);
1370     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1371     Ca = _mm_xor_si128(Ca, Ama);
1372     Emi = _mm_xor_si128(Emi, Di);
1373     Bmo = ROL64in128(Emi, 15);
1374     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1375     Ce = _mm_xor_si128(Ce, Ame);
1376     Eso = _mm_xor_si128(Eso, Do);
1377     Bmu = ROL64in128_56(Eso);
1378     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1379     Ci = _mm_xor_si128(Ci, Ami);
1380     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1381     Co = _mm_xor_si128(Co, Amo);
1382     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1383     Cu = _mm_xor_si128(Cu, Amu);
1384     Ebi = _mm_xor_si128(Ebi, Di);
1385     Bsa = ROL64in128(Ebi, 62);
1386     Ego = _mm_xor_si128(Ego, Do);
1387     Bse = ROL64in128(Ego, 55);
1388     Eku = _mm_xor_si128(Eku, Du);
1389     Bsi = ROL64in128(Eku, 39);
1390     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1391     Ca = _mm_xor_si128(Ca, Asa);
1392     Ema = _mm_xor_si128(Ema, Da);
1393     Bso = ROL64in128(Ema, 41);
1394     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1395     Ce = _mm_xor_si128(Ce, Ase);
1396     Ese = _mm_xor_si128(Ese, De);
1397     Bsu = ROL64in128(Ese, 2);
1398     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1399     Ci = _mm_xor_si128(Ci, Asi);
1400     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1401     Co = _mm_xor_si128(Co, Aso);
1402     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1403     Cu = _mm_xor_si128(Cu, Asu);
1404     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1405     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1406     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1407     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1408     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1409     Aba = _mm_xor_si128(Aba, Da);
1410     Bba = Aba;
1411     Age = _mm_xor_si128(Age, De);
1412     Bbe = ROL64in128(Age, 44);
1413     Aki = _mm_xor_si128(Aki, Di);
1414     Bbi = ROL64in128(Aki, 43);
1415     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1416     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[12]));
1417     Ca = Eba;
1418     Amo = _mm_xor_si128(Amo, Do);
1419     Bbo = ROL64in128(Amo, 21);
1420     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1421     Ce = Ebe;
1422     Asu = _mm_xor_si128(Asu, Du);
1423     Bbu = ROL64in128(Asu, 14);
1424     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1425     Ci = Ebi;
1426     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1427     Co = Ebo;
1428     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1429     Cu = Ebu;
1430     Abo = _mm_xor_si128(Abo, Do);
1431     Bga = ROL64in128(Abo, 28);
1432     Agu = _mm_xor_si128(Agu, Du);
1433     Bge = ROL64in128(Agu, 20);
1434     Aka = _mm_xor_si128(Aka, Da);
1435     Bgi = ROL64in128(Aka, 3);
1436     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1437     Ca = _mm_xor_si128(Ca, Ega);
1438     Ame = _mm_xor_si128(Ame, De);
1439     Bgo = ROL64in128(Ame, 45);
1440     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1441     Ce = _mm_xor_si128(Ce, Ege);
1442     Asi = _mm_xor_si128(Asi, Di);
1443     Bgu = ROL64in128(Asi, 61);
1444     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1445     Ci = _mm_xor_si128(Ci, Egi);
1446     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1447     Co = _mm_xor_si128(Co, Ego);
1448     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1449     Cu = _mm_xor_si128(Cu, Egu);
1450     Abe = _mm_xor_si128(Abe, De);
1451     Bka = ROL64in128(Abe, 1);
1452     Agi = _mm_xor_si128(Agi, Di);
1453     Bke = ROL64in128(Agi, 6);
1454     Ako = _mm_xor_si128(Ako, Do);
1455     Bki = ROL64in128(Ako, 25);
1456     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1457     Ca = _mm_xor_si128(Ca, Eka);
1458     Amu = _mm_xor_si128(Amu, Du);
1459     Bko = ROL64in128_8(Amu);
1460     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1461     Ce = _mm_xor_si128(Ce, Eke);
1462     Asa = _mm_xor_si128(Asa, Da);
1463     Bku = ROL64in128(Asa, 18);
1464     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1465     Ci = _mm_xor_si128(Ci, Eki);
1466     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1467     Co = _mm_xor_si128(Co, Eko);
1468     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1469     Cu = _mm_xor_si128(Cu, Eku);
1470     Abu = _mm_xor_si128(Abu, Du);
1471     Bma = ROL64in128(Abu, 27);
1472     Aga = _mm_xor_si128(Aga, Da);
1473     Bme = ROL64in128(Aga, 36);
1474     Ake = _mm_xor_si128(Ake, De);
1475     Bmi = ROL64in128(Ake, 10);
1476     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1477     Ca = _mm_xor_si128(Ca, Ema);
1478     Ami = _mm_xor_si128(Ami, Di);
1479     Bmo = ROL64in128(Ami, 15);
1480     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1481     Ce = _mm_xor_si128(Ce, Eme);
1482     Aso = _mm_xor_si128(Aso, Do);
1483     Bmu = ROL64in128_56(Aso);
1484     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1485     Ci = _mm_xor_si128(Ci, Emi);
1486     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1487     Co = _mm_xor_si128(Co, Emo);
1488     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1489     Cu = _mm_xor_si128(Cu, Emu);
1490     Abi = _mm_xor_si128(Abi, Di);
1491     Bsa = ROL64in128(Abi, 62);
1492     Ago = _mm_xor_si128(Ago, Do);
1493     Bse = ROL64in128(Ago, 55);
1494     Aku = _mm_xor_si128(Aku, Du);
1495     Bsi = ROL64in128(Aku, 39);
1496     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1497     Ca = _mm_xor_si128(Ca, Esa);
1498     Ama = _mm_xor_si128(Ama, Da);
1499     Bso = ROL64in128(Ama, 41);
1500     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1501     Ce = _mm_xor_si128(Ce, Ese);
1502     Ase = _mm_xor_si128(Ase, De);
1503     Bsu = ROL64in128(Ase, 2);
1504     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1505     Ci = _mm_xor_si128(Ci, Esi);
1506     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1507     Co = _mm_xor_si128(Co, Eso);
1508     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1509     Cu = _mm_xor_si128(Cu, Esu);
1510     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1511     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1512     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1513     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1514     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1515     Eba = _mm_xor_si128(Eba, Da);
1516     Bba = Eba;
1517     Ege = _mm_xor_si128(Ege, De);
1518     Bbe = ROL64in128(Ege, 44);
1519     Eki = _mm_xor_si128(Eki, Di);
1520     Bbi = ROL64in128(Eki, 43);
1521     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1522     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[13]));
1523     Ca = Aba;
1524     Emo = _mm_xor_si128(Emo, Do);
1525     Bbo = ROL64in128(Emo, 21);
1526     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1527     Ce = Abe;
1528     Esu = _mm_xor_si128(Esu, Du);
1529     Bbu = ROL64in128(Esu, 14);
1530     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1531     Ci = Abi;
1532     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1533     Co = Abo;
1534     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1535     Cu = Abu;
1536     Ebo = _mm_xor_si128(Ebo, Do);
1537     Bga = ROL64in128(Ebo, 28);
1538     Egu = _mm_xor_si128(Egu, Du);
1539     Bge = ROL64in128(Egu, 20);
1540     Eka = _mm_xor_si128(Eka, Da);
1541     Bgi = ROL64in128(Eka, 3);
1542     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1543     Ca = _mm_xor_si128(Ca, Aga);
1544     Eme = _mm_xor_si128(Eme, De);
1545     Bgo = ROL64in128(Eme, 45);
1546     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1547     Ce = _mm_xor_si128(Ce, Age);
1548     Esi = _mm_xor_si128(Esi, Di);
1549     Bgu = ROL64in128(Esi, 61);
1550     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1551     Ci = _mm_xor_si128(Ci, Agi);
1552     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1553     Co = _mm_xor_si128(Co, Ago);
1554     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1555     Cu = _mm_xor_si128(Cu, Agu);
1556     Ebe = _mm_xor_si128(Ebe, De);
1557     Bka = ROL64in128(Ebe, 1);
1558     Egi = _mm_xor_si128(Egi, Di);
1559     Bke = ROL64in128(Egi, 6);
1560     Eko = _mm_xor_si128(Eko, Do);
1561     Bki = ROL64in128(Eko, 25);
1562     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1563     Ca = _mm_xor_si128(Ca, Aka);
1564     Emu = _mm_xor_si128(Emu, Du);
1565     Bko = ROL64in128_8(Emu);
1566     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1567     Ce = _mm_xor_si128(Ce, Ake);
1568     Esa = _mm_xor_si128(Esa, Da);
1569     Bku = ROL64in128(Esa, 18);
1570     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1571     Ci = _mm_xor_si128(Ci, Aki);
1572     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1573     Co = _mm_xor_si128(Co, Ako);
1574     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1575     Cu = _mm_xor_si128(Cu, Aku);
1576     Ebu = _mm_xor_si128(Ebu, Du);
1577     Bma = ROL64in128(Ebu, 27);
1578     Ega = _mm_xor_si128(Ega, Da);
1579     Bme = ROL64in128(Ega, 36);
1580     Eke = _mm_xor_si128(Eke, De);
1581     Bmi = ROL64in128(Eke, 10);
1582     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1583     Ca = _mm_xor_si128(Ca, Ama);
1584     Emi = _mm_xor_si128(Emi, Di);
1585     Bmo = ROL64in128(Emi, 15);
1586     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1587     Ce = _mm_xor_si128(Ce, Ame);
1588     Eso = _mm_xor_si128(Eso, Do);
1589     Bmu = ROL64in128_56(Eso);
1590     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1591     Ci = _mm_xor_si128(Ci, Ami);
1592     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1593     Co = _mm_xor_si128(Co, Amo);
1594     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1595     Cu = _mm_xor_si128(Cu, Amu);
1596     Ebi = _mm_xor_si128(Ebi, Di);
1597     Bsa = ROL64in128(Ebi, 62);
1598     Ego = _mm_xor_si128(Ego, Do);
1599     Bse = ROL64in128(Ego, 55);
1600     Eku = _mm_xor_si128(Eku, Du);
1601     Bsi = ROL64in128(Eku, 39);
1602     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1603     Ca = _mm_xor_si128(Ca, Asa);
1604     Ema = _mm_xor_si128(Ema, Da);
1605     Bso = ROL64in128(Ema, 41);
1606     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1607     Ce = _mm_xor_si128(Ce, Ase);
1608     Ese = _mm_xor_si128(Ese, De);
1609     Bsu = ROL64in128(Ese, 2);
1610     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1611     Ci = _mm_xor_si128(Ci, Asi);
1612     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1613     Co = _mm_xor_si128(Co, Aso);
1614     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1615     Cu = _mm_xor_si128(Cu, Asu);
1616     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1617     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1618     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1619     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1620     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1621     Aba = _mm_xor_si128(Aba, Da);
1622     Bba = Aba;
1623     Age = _mm_xor_si128(Age, De);
1624     Bbe = ROL64in128(Age, 44);
1625     Aki = _mm_xor_si128(Aki, Di);
1626     Bbi = ROL64in128(Aki, 43);
1627     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1628     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[14]));
1629     Ca = Eba;
1630     Amo = _mm_xor_si128(Amo, Do);
1631     Bbo = ROL64in128(Amo, 21);
1632     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1633     Ce = Ebe;
1634     Asu = _mm_xor_si128(Asu, Du);
1635     Bbu = ROL64in128(Asu, 14);
1636     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1637     Ci = Ebi;
1638     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1639     Co = Ebo;
1640     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1641     Cu = Ebu;
1642     Abo = _mm_xor_si128(Abo, Do);
1643     Bga = ROL64in128(Abo, 28);
1644     Agu = _mm_xor_si128(Agu, Du);
1645     Bge = ROL64in128(Agu, 20);
1646     Aka = _mm_xor_si128(Aka, Da);
1647     Bgi = ROL64in128(Aka, 3);
1648     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1649     Ca = _mm_xor_si128(Ca, Ega);
1650     Ame = _mm_xor_si128(Ame, De);
1651     Bgo = ROL64in128(Ame, 45);
1652     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1653     Ce = _mm_xor_si128(Ce, Ege);
1654     Asi = _mm_xor_si128(Asi, Di);
1655     Bgu = ROL64in128(Asi, 61);
1656     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1657     Ci = _mm_xor_si128(Ci, Egi);
1658     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1659     Co = _mm_xor_si128(Co, Ego);
1660     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1661     Cu = _mm_xor_si128(Cu, Egu);
1662     Abe = _mm_xor_si128(Abe, De);
1663     Bka = ROL64in128(Abe, 1);
1664     Agi = _mm_xor_si128(Agi, Di);
1665     Bke = ROL64in128(Agi, 6);
1666     Ako = _mm_xor_si128(Ako, Do);
1667     Bki = ROL64in128(Ako, 25);
1668     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1669     Ca = _mm_xor_si128(Ca, Eka);
1670     Amu = _mm_xor_si128(Amu, Du);
1671     Bko = ROL64in128_8(Amu);
1672     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1673     Ce = _mm_xor_si128(Ce, Eke);
1674     Asa = _mm_xor_si128(Asa, Da);
1675     Bku = ROL64in128(Asa, 18);
1676     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1677     Ci = _mm_xor_si128(Ci, Eki);
1678     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1679     Co = _mm_xor_si128(Co, Eko);
1680     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1681     Cu = _mm_xor_si128(Cu, Eku);
1682     Abu = _mm_xor_si128(Abu, Du);
1683     Bma = ROL64in128(Abu, 27);
1684     Aga = _mm_xor_si128(Aga, Da);
1685     Bme = ROL64in128(Aga, 36);
1686     Ake = _mm_xor_si128(Ake, De);
1687     Bmi = ROL64in128(Ake, 10);
1688     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1689     Ca = _mm_xor_si128(Ca, Ema);
1690     Ami = _mm_xor_si128(Ami, Di);
1691     Bmo = ROL64in128(Ami, 15);
1692     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1693     Ce = _mm_xor_si128(Ce, Eme);
1694     Aso = _mm_xor_si128(Aso, Do);
1695     Bmu = ROL64in128_56(Aso);
1696     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1697     Ci = _mm_xor_si128(Ci, Emi);
1698     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1699     Co = _mm_xor_si128(Co, Emo);
1700     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1701     Cu = _mm_xor_si128(Cu, Emu);
1702     Abi = _mm_xor_si128(Abi, Di);
1703     Bsa = ROL64in128(Abi, 62);
1704     Ago = _mm_xor_si128(Ago, Do);
1705     Bse = ROL64in128(Ago, 55);
1706     Aku = _mm_xor_si128(Aku, Du);
1707     Bsi = ROL64in128(Aku, 39);
1708     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1709     Ca = _mm_xor_si128(Ca, Esa);
1710     Ama = _mm_xor_si128(Ama, Da);
1711     Bso = ROL64in128(Ama, 41);
1712     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1713     Ce = _mm_xor_si128(Ce, Ese);
1714     Ase = _mm_xor_si128(Ase, De);
1715     Bsu = ROL64in128(Ase, 2);
1716     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1717     Ci = _mm_xor_si128(Ci, Esi);
1718     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1719     Co = _mm_xor_si128(Co, Eso);
1720     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1721     Cu = _mm_xor_si128(Cu, Esu);
1722     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1723     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1724     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1725     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1726     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1727     Eba = _mm_xor_si128(Eba, Da);
1728     Bba = Eba;
1729     Ege = _mm_xor_si128(Ege, De);
1730     Bbe = ROL64in128(Ege, 44);
1731     Eki = _mm_xor_si128(Eki, Di);
1732     Bbi = ROL64in128(Eki, 43);
1733     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1734     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[15]));
1735     Ca = Aba;
1736     Emo = _mm_xor_si128(Emo, Do);
1737     Bbo = ROL64in128(Emo, 21);
1738     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1739     Ce = Abe;
1740     Esu = _mm_xor_si128(Esu, Du);
1741     Bbu = ROL64in128(Esu, 14);
1742     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1743     Ci = Abi;
1744     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1745     Co = Abo;
1746     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1747     Cu = Abu;
1748     Ebo = _mm_xor_si128(Ebo, Do);
1749     Bga = ROL64in128(Ebo, 28);
1750     Egu = _mm_xor_si128(Egu, Du);
1751     Bge = ROL64in128(Egu, 20);
1752     Eka = _mm_xor_si128(Eka, Da);
1753     Bgi = ROL64in128(Eka, 3);
1754     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1755     Ca = _mm_xor_si128(Ca, Aga);
1756     Eme = _mm_xor_si128(Eme, De);
1757     Bgo = ROL64in128(Eme, 45);
1758     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1759     Ce = _mm_xor_si128(Ce, Age);
1760     Esi = _mm_xor_si128(Esi, Di);
1761     Bgu = ROL64in128(Esi, 61);
1762     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1763     Ci = _mm_xor_si128(Ci, Agi);
1764     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1765     Co = _mm_xor_si128(Co, Ago);
1766     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1767     Cu = _mm_xor_si128(Cu, Agu);
1768     Ebe = _mm_xor_si128(Ebe, De);
1769     Bka = ROL64in128(Ebe, 1);
1770     Egi = _mm_xor_si128(Egi, Di);
1771     Bke = ROL64in128(Egi, 6);
1772     Eko = _mm_xor_si128(Eko, Do);
1773     Bki = ROL64in128(Eko, 25);
1774     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1775     Ca = _mm_xor_si128(Ca, Aka);
1776     Emu = _mm_xor_si128(Emu, Du);
1777     Bko = ROL64in128_8(Emu);
1778     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1779     Ce = _mm_xor_si128(Ce, Ake);
1780     Esa = _mm_xor_si128(Esa, Da);
1781     Bku = ROL64in128(Esa, 18);
1782     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1783     Ci = _mm_xor_si128(Ci, Aki);
1784     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1785     Co = _mm_xor_si128(Co, Ako);
1786     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1787     Cu = _mm_xor_si128(Cu, Aku);
1788     Ebu = _mm_xor_si128(Ebu, Du);
1789     Bma = ROL64in128(Ebu, 27);
1790     Ega = _mm_xor_si128(Ega, Da);
1791     Bme = ROL64in128(Ega, 36);
1792     Eke = _mm_xor_si128(Eke, De);
1793     Bmi = ROL64in128(Eke, 10);
1794     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1795     Ca = _mm_xor_si128(Ca, Ama);
1796     Emi = _mm_xor_si128(Emi, Di);
1797     Bmo = ROL64in128(Emi, 15);
1798     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1799     Ce = _mm_xor_si128(Ce, Ame);
1800     Eso = _mm_xor_si128(Eso, Do);
1801     Bmu = ROL64in128_56(Eso);
1802     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1803     Ci = _mm_xor_si128(Ci, Ami);
1804     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1805     Co = _mm_xor_si128(Co, Amo);
1806     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1807     Cu = _mm_xor_si128(Cu, Amu);
1808     Ebi = _mm_xor_si128(Ebi, Di);
1809     Bsa = ROL64in128(Ebi, 62);
1810     Ego = _mm_xor_si128(Ego, Do);
1811     Bse = ROL64in128(Ego, 55);
1812     Eku = _mm_xor_si128(Eku, Du);
1813     Bsi = ROL64in128(Eku, 39);
1814     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1815     Ca = _mm_xor_si128(Ca, Asa);
1816     Ema = _mm_xor_si128(Ema, Da);
1817     Bso = ROL64in128(Ema, 41);
1818     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1819     Ce = _mm_xor_si128(Ce, Ase);
1820     Ese = _mm_xor_si128(Ese, De);
1821     Bsu = ROL64in128(Ese, 2);
1822     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1823     Ci = _mm_xor_si128(Ci, Asi);
1824     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1825     Co = _mm_xor_si128(Co, Aso);
1826     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1827     Cu = _mm_xor_si128(Cu, Asu);
1828     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1829     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1830     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1831     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1832     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1833     Aba = _mm_xor_si128(Aba, Da);
1834     Bba = Aba;
1835     Age = _mm_xor_si128(Age, De);
1836     Bbe = ROL64in128(Age, 44);
1837     Aki = _mm_xor_si128(Aki, Di);
1838     Bbi = ROL64in128(Aki, 43);
1839     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1840     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[16]));
1841     Ca = Eba;
1842     Amo = _mm_xor_si128(Amo, Do);
1843     Bbo = ROL64in128(Amo, 21);
1844     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1845     Ce = Ebe;
1846     Asu = _mm_xor_si128(Asu, Du);
1847     Bbu = ROL64in128(Asu, 14);
1848     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1849     Ci = Ebi;
1850     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1851     Co = Ebo;
1852     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1853     Cu = Ebu;
1854     Abo = _mm_xor_si128(Abo, Do);
1855     Bga = ROL64in128(Abo, 28);
1856     Agu = _mm_xor_si128(Agu, Du);
1857     Bge = ROL64in128(Agu, 20);
1858     Aka = _mm_xor_si128(Aka, Da);
1859     Bgi = ROL64in128(Aka, 3);
1860     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1861     Ca = _mm_xor_si128(Ca, Ega);
1862     Ame = _mm_xor_si128(Ame, De);
1863     Bgo = ROL64in128(Ame, 45);
1864     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1865     Ce = _mm_xor_si128(Ce, Ege);
1866     Asi = _mm_xor_si128(Asi, Di);
1867     Bgu = ROL64in128(Asi, 61);
1868     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1869     Ci = _mm_xor_si128(Ci, Egi);
1870     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1871     Co = _mm_xor_si128(Co, Ego);
1872     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1873     Cu = _mm_xor_si128(Cu, Egu);
1874     Abe = _mm_xor_si128(Abe, De);
1875     Bka = ROL64in128(Abe, 1);
1876     Agi = _mm_xor_si128(Agi, Di);
1877     Bke = ROL64in128(Agi, 6);
1878     Ako = _mm_xor_si128(Ako, Do);
1879     Bki = ROL64in128(Ako, 25);
1880     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1881     Ca = _mm_xor_si128(Ca, Eka);
1882     Amu = _mm_xor_si128(Amu, Du);
1883     Bko = ROL64in128_8(Amu);
1884     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1885     Ce = _mm_xor_si128(Ce, Eke);
1886     Asa = _mm_xor_si128(Asa, Da);
1887     Bku = ROL64in128(Asa, 18);
1888     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1889     Ci = _mm_xor_si128(Ci, Eki);
1890     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1891     Co = _mm_xor_si128(Co, Eko);
1892     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1893     Cu = _mm_xor_si128(Cu, Eku);
1894     Abu = _mm_xor_si128(Abu, Du);
1895     Bma = ROL64in128(Abu, 27);
1896     Aga = _mm_xor_si128(Aga, Da);
1897     Bme = ROL64in128(Aga, 36);
1898     Ake = _mm_xor_si128(Ake, De);
1899     Bmi = ROL64in128(Ake, 10);
1900     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
1901     Ca = _mm_xor_si128(Ca, Ema);
1902     Ami = _mm_xor_si128(Ami, Di);
1903     Bmo = ROL64in128(Ami, 15);
1904     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
1905     Ce = _mm_xor_si128(Ce, Eme);
1906     Aso = _mm_xor_si128(Aso, Do);
1907     Bmu = ROL64in128_56(Aso);
1908     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
1909     Ci = _mm_xor_si128(Ci, Emi);
1910     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
1911     Co = _mm_xor_si128(Co, Emo);
1912     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
1913     Cu = _mm_xor_si128(Cu, Emu);
1914     Abi = _mm_xor_si128(Abi, Di);
1915     Bsa = ROL64in128(Abi, 62);
1916     Ago = _mm_xor_si128(Ago, Do);
1917     Bse = ROL64in128(Ago, 55);
1918     Aku = _mm_xor_si128(Aku, Du);
1919     Bsi = ROL64in128(Aku, 39);
1920     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
1921     Ca = _mm_xor_si128(Ca, Esa);
1922     Ama = _mm_xor_si128(Ama, Da);
1923     Bso = ROL64in128(Ama, 41);
1924     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
1925     Ce = _mm_xor_si128(Ce, Ese);
1926     Ase = _mm_xor_si128(Ase, De);
1927     Bsu = ROL64in128(Ase, 2);
1928     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
1929     Ci = _mm_xor_si128(Ci, Esi);
1930     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
1931     Co = _mm_xor_si128(Co, Eso);
1932     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
1933     Cu = _mm_xor_si128(Cu, Esu);
1934     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
1935     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
1936     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
1937     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
1938     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
1939     Eba = _mm_xor_si128(Eba, Da);
1940     Bba = Eba;
1941     Ege = _mm_xor_si128(Ege, De);
1942     Bbe = ROL64in128(Ege, 44);
1943     Eki = _mm_xor_si128(Eki, Di);
1944     Bbi = ROL64in128(Eki, 43);
1945     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
1946     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[17]));
1947     Ca = Aba;
1948     Emo = _mm_xor_si128(Emo, Do);
1949     Bbo = ROL64in128(Emo, 21);
1950     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
1951     Ce = Abe;
1952     Esu = _mm_xor_si128(Esu, Du);
1953     Bbu = ROL64in128(Esu, 14);
1954     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
1955     Ci = Abi;
1956     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
1957     Co = Abo;
1958     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
1959     Cu = Abu;
1960     Ebo = _mm_xor_si128(Ebo, Do);
1961     Bga = ROL64in128(Ebo, 28);
1962     Egu = _mm_xor_si128(Egu, Du);
1963     Bge = ROL64in128(Egu, 20);
1964     Eka = _mm_xor_si128(Eka, Da);
1965     Bgi = ROL64in128(Eka, 3);
1966     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
1967     Ca = _mm_xor_si128(Ca, Aga);
1968     Eme = _mm_xor_si128(Eme, De);
1969     Bgo = ROL64in128(Eme, 45);
1970     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
1971     Ce = _mm_xor_si128(Ce, Age);
1972     Esi = _mm_xor_si128(Esi, Di);
1973     Bgu = ROL64in128(Esi, 61);
1974     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
1975     Ci = _mm_xor_si128(Ci, Agi);
1976     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
1977     Co = _mm_xor_si128(Co, Ago);
1978     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
1979     Cu = _mm_xor_si128(Cu, Agu);
1980     Ebe = _mm_xor_si128(Ebe, De);
1981     Bka = ROL64in128(Ebe, 1);
1982     Egi = _mm_xor_si128(Egi, Di);
1983     Bke = ROL64in128(Egi, 6);
1984     Eko = _mm_xor_si128(Eko, Do);
1985     Bki = ROL64in128(Eko, 25);
1986     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
1987     Ca = _mm_xor_si128(Ca, Aka);
1988     Emu = _mm_xor_si128(Emu, Du);
1989     Bko = ROL64in128_8(Emu);
1990     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
1991     Ce = _mm_xor_si128(Ce, Ake);
1992     Esa = _mm_xor_si128(Esa, Da);
1993     Bku = ROL64in128(Esa, 18);
1994     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
1995     Ci = _mm_xor_si128(Ci, Aki);
1996     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
1997     Co = _mm_xor_si128(Co, Ako);
1998     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
1999     Cu = _mm_xor_si128(Cu, Aku);
2000     Ebu = _mm_xor_si128(Ebu, Du);
2001     Bma = ROL64in128(Ebu, 27);
2002     Ega = _mm_xor_si128(Ega, Da);
2003     Bme = ROL64in128(Ega, 36);
2004     Eke = _mm_xor_si128(Eke, De);
2005     Bmi = ROL64in128(Eke, 10);
2006     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2007     Ca = _mm_xor_si128(Ca, Ama);
2008     Emi = _mm_xor_si128(Emi, Di);
2009     Bmo = ROL64in128(Emi, 15);
2010     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2011     Ce = _mm_xor_si128(Ce, Ame);
2012     Eso = _mm_xor_si128(Eso, Do);
2013     Bmu = ROL64in128_56(Eso);
2014     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2015     Ci = _mm_xor_si128(Ci, Ami);
2016     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2017     Co = _mm_xor_si128(Co, Amo);
2018     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2019     Cu = _mm_xor_si128(Cu, Amu);
2020     Ebi = _mm_xor_si128(Ebi, Di);
2021     Bsa = ROL64in128(Ebi, 62);
2022     Ego = _mm_xor_si128(Ego, Do);
2023     Bse = ROL64in128(Ego, 55);
2024     Eku = _mm_xor_si128(Eku, Du);
2025     Bsi = ROL64in128(Eku, 39);
2026     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2027     Ca = _mm_xor_si128(Ca, Asa);
2028     Ema = _mm_xor_si128(Ema, Da);
2029     Bso = ROL64in128(Ema, 41);
2030     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2031     Ce = _mm_xor_si128(Ce, Ase);
2032     Ese = _mm_xor_si128(Ese, De);
2033     Bsu = ROL64in128(Ese, 2);
2034     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2035     Ci = _mm_xor_si128(Ci, Asi);
2036     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2037     Co = _mm_xor_si128(Co, Aso);
2038     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2039     Cu = _mm_xor_si128(Cu, Asu);
2040     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
2041     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
2042     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
2043     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
2044     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
2045     Aba = _mm_xor_si128(Aba, Da);
2046     Bba = Aba;
2047     Age = _mm_xor_si128(Age, De);
2048     Bbe = ROL64in128(Age, 44);
2049     Aki = _mm_xor_si128(Aki, Di);
2050     Bbi = ROL64in128(Aki, 43);
2051     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
2052     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[18]));
2053     Ca = Eba;
2054     Amo = _mm_xor_si128(Amo, Do);
2055     Bbo = ROL64in128(Amo, 21);
2056     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
2057     Ce = Ebe;
2058     Asu = _mm_xor_si128(Asu, Du);
2059     Bbu = ROL64in128(Asu, 14);
2060     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
2061     Ci = Ebi;
2062     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
2063     Co = Ebo;
2064     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
2065     Cu = Ebu;
2066     Abo = _mm_xor_si128(Abo, Do);
2067     Bga = ROL64in128(Abo, 28);
2068     Agu = _mm_xor_si128(Agu, Du);
2069     Bge = ROL64in128(Agu, 20);
2070     Aka = _mm_xor_si128(Aka, Da);
2071     Bgi = ROL64in128(Aka, 3);
2072     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
2073     Ca = _mm_xor_si128(Ca, Ega);
2074     Ame = _mm_xor_si128(Ame, De);
2075     Bgo = ROL64in128(Ame, 45);
2076     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
2077     Ce = _mm_xor_si128(Ce, Ege);
2078     Asi = _mm_xor_si128(Asi, Di);
2079     Bgu = ROL64in128(Asi, 61);
2080     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
2081     Ci = _mm_xor_si128(Ci, Egi);
2082     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
2083     Co = _mm_xor_si128(Co, Ego);
2084     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
2085     Cu = _mm_xor_si128(Cu, Egu);
2086     Abe = _mm_xor_si128(Abe, De);
2087     Bka = ROL64in128(Abe, 1);
2088     Agi = _mm_xor_si128(Agi, Di);
2089     Bke = ROL64in128(Agi, 6);
2090     Ako = _mm_xor_si128(Ako, Do);
2091     Bki = ROL64in128(Ako, 25);
2092     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
2093     Ca = _mm_xor_si128(Ca, Eka);
2094     Amu = _mm_xor_si128(Amu, Du);
2095     Bko = ROL64in128_8(Amu);
2096     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
2097     Ce = _mm_xor_si128(Ce, Eke);
2098     Asa = _mm_xor_si128(Asa, Da);
2099     Bku = ROL64in128(Asa, 18);
2100     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
2101     Ci = _mm_xor_si128(Ci, Eki);
2102     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
2103     Co = _mm_xor_si128(Co, Eko);
2104     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
2105     Cu = _mm_xor_si128(Cu, Eku);
2106     Abu = _mm_xor_si128(Abu, Du);
2107     Bma = ROL64in128(Abu, 27);
2108     Aga = _mm_xor_si128(Aga, Da);
2109     Bme = ROL64in128(Aga, 36);
2110     Ake = _mm_xor_si128(Ake, De);
2111     Bmi = ROL64in128(Ake, 10);
2112     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2113     Ca = _mm_xor_si128(Ca, Ema);
2114     Ami = _mm_xor_si128(Ami, Di);
2115     Bmo = ROL64in128(Ami, 15);
2116     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2117     Ce = _mm_xor_si128(Ce, Eme);
2118     Aso = _mm_xor_si128(Aso, Do);
2119     Bmu = ROL64in128_56(Aso);
2120     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2121     Ci = _mm_xor_si128(Ci, Emi);
2122     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2123     Co = _mm_xor_si128(Co, Emo);
2124     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2125     Cu = _mm_xor_si128(Cu, Emu);
2126     Abi = _mm_xor_si128(Abi, Di);
2127     Bsa = ROL64in128(Abi, 62);
2128     Ago = _mm_xor_si128(Ago, Do);
2129     Bse = ROL64in128(Ago, 55);
2130     Aku = _mm_xor_si128(Aku, Du);
2131     Bsi = ROL64in128(Aku, 39);
2132     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2133     Ca = _mm_xor_si128(Ca, Esa);
2134     Ama = _mm_xor_si128(Ama, Da);
2135     Bso = ROL64in128(Ama, 41);
2136     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2137     Ce = _mm_xor_si128(Ce, Ese);
2138     Ase = _mm_xor_si128(Ase, De);
2139     Bsu = ROL64in128(Ase, 2);
2140     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2141     Ci = _mm_xor_si128(Ci, Esi);
2142     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2143     Co = _mm_xor_si128(Co, Eso);
2144     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2145     Cu = _mm_xor_si128(Cu, Esu);
2146     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
2147     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
2148     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
2149     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
2150     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
2151     Eba = _mm_xor_si128(Eba, Da);
2152     Bba = Eba;
2153     Ege = _mm_xor_si128(Ege, De);
2154     Bbe = ROL64in128(Ege, 44);
2155     Eki = _mm_xor_si128(Eki, Di);
2156     Bbi = ROL64in128(Eki, 43);
2157     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
2158     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[19]));
2159     Ca = Aba;
2160     Emo = _mm_xor_si128(Emo, Do);
2161     Bbo = ROL64in128(Emo, 21);
2162     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
2163     Ce = Abe;
2164     Esu = _mm_xor_si128(Esu, Du);
2165     Bbu = ROL64in128(Esu, 14);
2166     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
2167     Ci = Abi;
2168     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
2169     Co = Abo;
2170     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
2171     Cu = Abu;
2172     Ebo = _mm_xor_si128(Ebo, Do);
2173     Bga = ROL64in128(Ebo, 28);
2174     Egu = _mm_xor_si128(Egu, Du);
2175     Bge = ROL64in128(Egu, 20);
2176     Eka = _mm_xor_si128(Eka, Da);
2177     Bgi = ROL64in128(Eka, 3);
2178     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
2179     Ca = _mm_xor_si128(Ca, Aga);
2180     Eme = _mm_xor_si128(Eme, De);
2181     Bgo = ROL64in128(Eme, 45);
2182     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
2183     Ce = _mm_xor_si128(Ce, Age);
2184     Esi = _mm_xor_si128(Esi, Di);
2185     Bgu = ROL64in128(Esi, 61);
2186     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
2187     Ci = _mm_xor_si128(Ci, Agi);
2188     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
2189     Co = _mm_xor_si128(Co, Ago);
2190     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
2191     Cu = _mm_xor_si128(Cu, Agu);
2192     Ebe = _mm_xor_si128(Ebe, De);
2193     Bka = ROL64in128(Ebe, 1);
2194     Egi = _mm_xor_si128(Egi, Di);
2195     Bke = ROL64in128(Egi, 6);
2196     Eko = _mm_xor_si128(Eko, Do);
2197     Bki = ROL64in128(Eko, 25);
2198     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
2199     Ca = _mm_xor_si128(Ca, Aka);
2200     Emu = _mm_xor_si128(Emu, Du);
2201     Bko = ROL64in128_8(Emu);
2202     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
2203     Ce = _mm_xor_si128(Ce, Ake);
2204     Esa = _mm_xor_si128(Esa, Da);
2205     Bku = ROL64in128(Esa, 18);
2206     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
2207     Ci = _mm_xor_si128(Ci, Aki);
2208     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
2209     Co = _mm_xor_si128(Co, Ako);
2210     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
2211     Cu = _mm_xor_si128(Cu, Aku);
2212     Ebu = _mm_xor_si128(Ebu, Du);
2213     Bma = ROL64in128(Ebu, 27);
2214     Ega = _mm_xor_si128(Ega, Da);
2215     Bme = ROL64in128(Ega, 36);
2216     Eke = _mm_xor_si128(Eke, De);
2217     Bmi = ROL64in128(Eke, 10);
2218     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2219     Ca = _mm_xor_si128(Ca, Ama);
2220     Emi = _mm_xor_si128(Emi, Di);
2221     Bmo = ROL64in128(Emi, 15);
2222     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2223     Ce = _mm_xor_si128(Ce, Ame);
2224     Eso = _mm_xor_si128(Eso, Do);
2225     Bmu = ROL64in128_56(Eso);
2226     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2227     Ci = _mm_xor_si128(Ci, Ami);
2228     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2229     Co = _mm_xor_si128(Co, Amo);
2230     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2231     Cu = _mm_xor_si128(Cu, Amu);
2232     Ebi = _mm_xor_si128(Ebi, Di);
2233     Bsa = ROL64in128(Ebi, 62);
2234     Ego = _mm_xor_si128(Ego, Do);
2235     Bse = ROL64in128(Ego, 55);
2236     Eku = _mm_xor_si128(Eku, Du);
2237     Bsi = ROL64in128(Eku, 39);
2238     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2239     Ca = _mm_xor_si128(Ca, Asa);
2240     Ema = _mm_xor_si128(Ema, Da);
2241     Bso = ROL64in128(Ema, 41);
2242     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2243     Ce = _mm_xor_si128(Ce, Ase);
2244     Ese = _mm_xor_si128(Ese, De);
2245     Bsu = ROL64in128(Ese, 2);
2246     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2247     Ci = _mm_xor_si128(Ci, Asi);
2248     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2249     Co = _mm_xor_si128(Co, Aso);
2250     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2251     Cu = _mm_xor_si128(Cu, Asu);
2252     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
2253     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
2254     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
2255     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
2256     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
2257     Aba = _mm_xor_si128(Aba, Da);
2258     Bba = Aba;
2259     Age = _mm_xor_si128(Age, De);
2260     Bbe = ROL64in128(Age, 44);
2261     Aki = _mm_xor_si128(Aki, Di);
2262     Bbi = ROL64in128(Aki, 43);
2263     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
2264     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[20]));
2265     Ca = Eba;
2266     Amo = _mm_xor_si128(Amo, Do);
2267     Bbo = ROL64in128(Amo, 21);
2268     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
2269     Ce = Ebe;
2270     Asu = _mm_xor_si128(Asu, Du);
2271     Bbu = ROL64in128(Asu, 14);
2272     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
2273     Ci = Ebi;
2274     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
2275     Co = Ebo;
2276     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
2277     Cu = Ebu;
2278     Abo = _mm_xor_si128(Abo, Do);
2279     Bga = ROL64in128(Abo, 28);
2280     Agu = _mm_xor_si128(Agu, Du);
2281     Bge = ROL64in128(Agu, 20);
2282     Aka = _mm_xor_si128(Aka, Da);
2283     Bgi = ROL64in128(Aka, 3);
2284     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
2285     Ca = _mm_xor_si128(Ca, Ega);
2286     Ame = _mm_xor_si128(Ame, De);
2287     Bgo = ROL64in128(Ame, 45);
2288     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
2289     Ce = _mm_xor_si128(Ce, Ege);
2290     Asi = _mm_xor_si128(Asi, Di);
2291     Bgu = ROL64in128(Asi, 61);
2292     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
2293     Ci = _mm_xor_si128(Ci, Egi);
2294     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
2295     Co = _mm_xor_si128(Co, Ego);
2296     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
2297     Cu = _mm_xor_si128(Cu, Egu);
2298     Abe = _mm_xor_si128(Abe, De);
2299     Bka = ROL64in128(Abe, 1);
2300     Agi = _mm_xor_si128(Agi, Di);
2301     Bke = ROL64in128(Agi, 6);
2302     Ako = _mm_xor_si128(Ako, Do);
2303     Bki = ROL64in128(Ako, 25);
2304     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
2305     Ca = _mm_xor_si128(Ca, Eka);
2306     Amu = _mm_xor_si128(Amu, Du);
2307     Bko = ROL64in128_8(Amu);
2308     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
2309     Ce = _mm_xor_si128(Ce, Eke);
2310     Asa = _mm_xor_si128(Asa, Da);
2311     Bku = ROL64in128(Asa, 18);
2312     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
2313     Ci = _mm_xor_si128(Ci, Eki);
2314     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
2315     Co = _mm_xor_si128(Co, Eko);
2316     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
2317     Cu = _mm_xor_si128(Cu, Eku);
2318     Abu = _mm_xor_si128(Abu, Du);
2319     Bma = ROL64in128(Abu, 27);
2320     Aga = _mm_xor_si128(Aga, Da);
2321     Bme = ROL64in128(Aga, 36);
2322     Ake = _mm_xor_si128(Ake, De);
2323     Bmi = ROL64in128(Ake, 10);
2324     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2325     Ca = _mm_xor_si128(Ca, Ema);
2326     Ami = _mm_xor_si128(Ami, Di);
2327     Bmo = ROL64in128(Ami, 15);
2328     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2329     Ce = _mm_xor_si128(Ce, Eme);
2330     Aso = _mm_xor_si128(Aso, Do);
2331     Bmu = ROL64in128_56(Aso);
2332     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2333     Ci = _mm_xor_si128(Ci, Emi);
2334     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2335     Co = _mm_xor_si128(Co, Emo);
2336     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2337     Cu = _mm_xor_si128(Cu, Emu);
2338     Abi = _mm_xor_si128(Abi, Di);
2339     Bsa = ROL64in128(Abi, 62);
2340     Ago = _mm_xor_si128(Ago, Do);
2341     Bse = ROL64in128(Ago, 55);
2342     Aku = _mm_xor_si128(Aku, Du);
2343     Bsi = ROL64in128(Aku, 39);
2344     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2345     Ca = _mm_xor_si128(Ca, Esa);
2346     Ama = _mm_xor_si128(Ama, Da);
2347     Bso = ROL64in128(Ama, 41);
2348     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2349     Ce = _mm_xor_si128(Ce, Ese);
2350     Ase = _mm_xor_si128(Ase, De);
2351     Bsu = ROL64in128(Ase, 2);
2352     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2353     Ci = _mm_xor_si128(Ci, Esi);
2354     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2355     Co = _mm_xor_si128(Co, Eso);
2356     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2357     Cu = _mm_xor_si128(Cu, Esu);
2358     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
2359     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
2360     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
2361     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
2362     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
2363     Eba = _mm_xor_si128(Eba, Da);
2364     Bba = Eba;
2365     Ege = _mm_xor_si128(Ege, De);
2366     Bbe = ROL64in128(Ege, 44);
2367     Eki = _mm_xor_si128(Eki, Di);
2368     Bbi = ROL64in128(Eki, 43);
2369     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
2370     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[21]));
2371     Ca = Aba;
2372     Emo = _mm_xor_si128(Emo, Do);
2373     Bbo = ROL64in128(Emo, 21);
2374     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
2375     Ce = Abe;
2376     Esu = _mm_xor_si128(Esu, Du);
2377     Bbu = ROL64in128(Esu, 14);
2378     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
2379     Ci = Abi;
2380     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
2381     Co = Abo;
2382     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
2383     Cu = Abu;
2384     Ebo = _mm_xor_si128(Ebo, Do);
2385     Bga = ROL64in128(Ebo, 28);
2386     Egu = _mm_xor_si128(Egu, Du);
2387     Bge = ROL64in128(Egu, 20);
2388     Eka = _mm_xor_si128(Eka, Da);
2389     Bgi = ROL64in128(Eka, 3);
2390     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
2391     Ca = _mm_xor_si128(Ca, Aga);
2392     Eme = _mm_xor_si128(Eme, De);
2393     Bgo = ROL64in128(Eme, 45);
2394     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
2395     Ce = _mm_xor_si128(Ce, Age);
2396     Esi = _mm_xor_si128(Esi, Di);
2397     Bgu = ROL64in128(Esi, 61);
2398     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
2399     Ci = _mm_xor_si128(Ci, Agi);
2400     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
2401     Co = _mm_xor_si128(Co, Ago);
2402     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
2403     Cu = _mm_xor_si128(Cu, Agu);
2404     Ebe = _mm_xor_si128(Ebe, De);
2405     Bka = ROL64in128(Ebe, 1);
2406     Egi = _mm_xor_si128(Egi, Di);
2407     Bke = ROL64in128(Egi, 6);
2408     Eko = _mm_xor_si128(Eko, Do);
2409     Bki = ROL64in128(Eko, 25);
2410     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
2411     Ca = _mm_xor_si128(Ca, Aka);
2412     Emu = _mm_xor_si128(Emu, Du);
2413     Bko = ROL64in128_8(Emu);
2414     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
2415     Ce = _mm_xor_si128(Ce, Ake);
2416     Esa = _mm_xor_si128(Esa, Da);
2417     Bku = ROL64in128(Esa, 18);
2418     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
2419     Ci = _mm_xor_si128(Ci, Aki);
2420     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
2421     Co = _mm_xor_si128(Co, Ako);
2422     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
2423     Cu = _mm_xor_si128(Cu, Aku);
2424     Ebu = _mm_xor_si128(Ebu, Du);
2425     Bma = ROL64in128(Ebu, 27);
2426     Ega = _mm_xor_si128(Ega, Da);
2427     Bme = ROL64in128(Ega, 36);
2428     Eke = _mm_xor_si128(Eke, De);
2429     Bmi = ROL64in128(Eke, 10);
2430     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2431     Ca = _mm_xor_si128(Ca, Ama);
2432     Emi = _mm_xor_si128(Emi, Di);
2433     Bmo = ROL64in128(Emi, 15);
2434     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2435     Ce = _mm_xor_si128(Ce, Ame);
2436     Eso = _mm_xor_si128(Eso, Do);
2437     Bmu = ROL64in128_56(Eso);
2438     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2439     Ci = _mm_xor_si128(Ci, Ami);
2440     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2441     Co = _mm_xor_si128(Co, Amo);
2442     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2443     Cu = _mm_xor_si128(Cu, Amu);
2444     Ebi = _mm_xor_si128(Ebi, Di);
2445     Bsa = ROL64in128(Ebi, 62);
2446     Ego = _mm_xor_si128(Ego, Do);
2447     Bse = ROL64in128(Ego, 55);
2448     Eku = _mm_xor_si128(Eku, Du);
2449     Bsi = ROL64in128(Eku, 39);
2450     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2451     Ca = _mm_xor_si128(Ca, Asa);
2452     Ema = _mm_xor_si128(Ema, Da);
2453     Bso = ROL64in128(Ema, 41);
2454     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2455     Ce = _mm_xor_si128(Ce, Ase);
2456     Ese = _mm_xor_si128(Ese, De);
2457     Bsu = ROL64in128(Ese, 2);
2458     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2459     Ci = _mm_xor_si128(Ci, Asi);
2460     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2461     Co = _mm_xor_si128(Co, Aso);
2462     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2463     Cu = _mm_xor_si128(Cu, Asu);
2464     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
2465     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
2466     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
2467     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
2468     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
2469     Aba = _mm_xor_si128(Aba, Da);
2470     Bba = Aba;
2471     Age = _mm_xor_si128(Age, De);
2472     Bbe = ROL64in128(Age, 44);
2473     Aki = _mm_xor_si128(Aki, Di);
2474     Bbi = ROL64in128(Aki, 43);
2475     Eba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
2476     Eba = _mm_xor_si128(Eba, SPLAT64(KeccakF1600Constants[22]));
2477     Ca = Eba;
2478     Amo = _mm_xor_si128(Amo, Do);
2479     Bbo = ROL64in128(Amo, 21);
2480     Ebe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
2481     Ce = Ebe;
2482     Asu = _mm_xor_si128(Asu, Du);
2483     Bbu = ROL64in128(Asu, 14);
2484     Ebi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
2485     Ci = Ebi;
2486     Ebo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
2487     Co = Ebo;
2488     Ebu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
2489     Cu = Ebu;
2490     Abo = _mm_xor_si128(Abo, Do);
2491     Bga = ROL64in128(Abo, 28);
2492     Agu = _mm_xor_si128(Agu, Du);
2493     Bge = ROL64in128(Agu, 20);
2494     Aka = _mm_xor_si128(Aka, Da);
2495     Bgi = ROL64in128(Aka, 3);
2496     Ega = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
2497     Ca = _mm_xor_si128(Ca, Ega);
2498     Ame = _mm_xor_si128(Ame, De);
2499     Bgo = ROL64in128(Ame, 45);
2500     Ege = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
2501     Ce = _mm_xor_si128(Ce, Ege);
2502     Asi = _mm_xor_si128(Asi, Di);
2503     Bgu = ROL64in128(Asi, 61);
2504     Egi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
2505     Ci = _mm_xor_si128(Ci, Egi);
2506     Ego = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
2507     Co = _mm_xor_si128(Co, Ego);
2508     Egu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
2509     Cu = _mm_xor_si128(Cu, Egu);
2510     Abe = _mm_xor_si128(Abe, De);
2511     Bka = ROL64in128(Abe, 1);
2512     Agi = _mm_xor_si128(Agi, Di);
2513     Bke = ROL64in128(Agi, 6);
2514     Ako = _mm_xor_si128(Ako, Do);
2515     Bki = ROL64in128(Ako, 25);
2516     Eka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
2517     Ca = _mm_xor_si128(Ca, Eka);
2518     Amu = _mm_xor_si128(Amu, Du);
2519     Bko = ROL64in128_8(Amu);
2520     Eke = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
2521     Ce = _mm_xor_si128(Ce, Eke);
2522     Asa = _mm_xor_si128(Asa, Da);
2523     Bku = ROL64in128(Asa, 18);
2524     Eki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
2525     Ci = _mm_xor_si128(Ci, Eki);
2526     Eko = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
2527     Co = _mm_xor_si128(Co, Eko);
2528     Eku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
2529     Cu = _mm_xor_si128(Cu, Eku);
2530     Abu = _mm_xor_si128(Abu, Du);
2531     Bma = ROL64in128(Abu, 27);
2532     Aga = _mm_xor_si128(Aga, Da);
2533     Bme = ROL64in128(Aga, 36);
2534     Ake = _mm_xor_si128(Ake, De);
2535     Bmi = ROL64in128(Ake, 10);
2536     Ema = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2537     Ca = _mm_xor_si128(Ca, Ema);
2538     Ami = _mm_xor_si128(Ami, Di);
2539     Bmo = ROL64in128(Ami, 15);
2540     Eme = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2541     Ce = _mm_xor_si128(Ce, Eme);
2542     Aso = _mm_xor_si128(Aso, Do);
2543     Bmu = ROL64in128_56(Aso);
2544     Emi = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2545     Ci = _mm_xor_si128(Ci, Emi);
2546     Emo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2547     Co = _mm_xor_si128(Co, Emo);
2548     Emu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2549     Cu = _mm_xor_si128(Cu, Emu);
2550     Abi = _mm_xor_si128(Abi, Di);
2551     Bsa = ROL64in128(Abi, 62);
2552     Ago = _mm_xor_si128(Ago, Do);
2553     Bse = ROL64in128(Ago, 55);
2554     Aku = _mm_xor_si128(Aku, Du);
2555     Bsi = ROL64in128(Aku, 39);
2556     Esa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2557     Ca = _mm_xor_si128(Ca, Esa);
2558     Ama = _mm_xor_si128(Ama, Da);
2559     Bso = ROL64in128(Ama, 41);
2560     Ese = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2561     Ce = _mm_xor_si128(Ce, Ese);
2562     Ase = _mm_xor_si128(Ase, De);
2563     Bsu = ROL64in128(Ase, 2);
2564     Esi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2565     Ci = _mm_xor_si128(Ci, Esi);
2566     Eso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2567     Co = _mm_xor_si128(Co, Eso);
2568     Esu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2569     Cu = _mm_xor_si128(Cu, Esu);
2570     Da = _mm_xor_si128(Cu, ROL64in128(Ce, 1));
2571     De = _mm_xor_si128(Ca, ROL64in128(Ci, 1));
2572     Di = _mm_xor_si128(Ce, ROL64in128(Co, 1));
2573     Do = _mm_xor_si128(Ci, ROL64in128(Cu, 1));
2574     Du = _mm_xor_si128(Co, ROL64in128(Ca, 1));
2575     Eba = _mm_xor_si128(Eba, Da);
2576     Bba = Eba;
2577     Ege = _mm_xor_si128(Ege, De);
2578     Bbe = ROL64in128(Ege, 44);
2579     Eki = _mm_xor_si128(Eki, Di);
2580     Bbi = ROL64in128(Eki, 43);
2581     Aba = _mm_xor_si128(Bba, _mm_andnot_si128(Bbe, Bbi));
2582     Aba = _mm_xor_si128(Aba, SPLAT64(KeccakF1600Constants[23]));
2583     Emo = _mm_xor_si128(Emo, Do);
2584     Bbo = ROL64in128(Emo, 21);
2585     Abe = _mm_xor_si128(Bbe, _mm_andnot_si128(Bbi, Bbo));
2586     Esu = _mm_xor_si128(Esu, Du);
2587     Bbu = ROL64in128(Esu, 14);
2588     Abi = _mm_xor_si128(Bbi, _mm_andnot_si128(Bbo, Bbu));
2589     Abo = _mm_xor_si128(Bbo, _mm_andnot_si128(Bbu, Bba));
2590     Abu = _mm_xor_si128(Bbu, _mm_andnot_si128(Bba, Bbe));
2591     Ebo = _mm_xor_si128(Ebo, Do);
2592     Bga = ROL64in128(Ebo, 28);
2593     Egu = _mm_xor_si128(Egu, Du);
2594     Bge = ROL64in128(Egu, 20);
2595     Eka = _mm_xor_si128(Eka, Da);
2596     Bgi = ROL64in128(Eka, 3);
2597     Aga = _mm_xor_si128(Bga, _mm_andnot_si128(Bge, Bgi));
2598     Eme = _mm_xor_si128(Eme, De);
2599     Bgo = ROL64in128(Eme, 45);
2600     Age = _mm_xor_si128(Bge, _mm_andnot_si128(Bgi, Bgo));
2601     Esi = _mm_xor_si128(Esi, Di);
2602     Bgu = ROL64in128(Esi, 61);
2603     Agi = _mm_xor_si128(Bgi, _mm_andnot_si128(Bgo, Bgu));
2604     Ago = _mm_xor_si128(Bgo, _mm_andnot_si128(Bgu, Bga));
2605     Agu = _mm_xor_si128(Bgu, _mm_andnot_si128(Bga, Bge));
2606     Ebe = _mm_xor_si128(Ebe, De);
2607     Bka = ROL64in128(Ebe, 1);
2608     Egi = _mm_xor_si128(Egi, Di);
2609     Bke = ROL64in128(Egi, 6);
2610     Eko = _mm_xor_si128(Eko, Do);
2611     Bki = ROL64in128(Eko, 25);
2612     Aka = _mm_xor_si128(Bka, _mm_andnot_si128(Bke, Bki));
2613     Emu = _mm_xor_si128(Emu, Du);
2614     Bko = ROL64in128_8(Emu);
2615     Ake = _mm_xor_si128(Bke, _mm_andnot_si128(Bki, Bko));
2616     Esa = _mm_xor_si128(Esa, Da);
2617     Bku = ROL64in128(Esa, 18);
2618     Aki = _mm_xor_si128(Bki, _mm_andnot_si128(Bko, Bku));
2619     Ako = _mm_xor_si128(Bko, _mm_andnot_si128(Bku, Bka));
2620     Aku = _mm_xor_si128(Bku, _mm_andnot_si128(Bka, Bke));
2621     Ebu = _mm_xor_si128(Ebu, Du);
2622     Bma = ROL64in128(Ebu, 27);
2623     Ega = _mm_xor_si128(Ega, Da);
2624     Bme = ROL64in128(Ega, 36);
2625     Eke = _mm_xor_si128(Eke, De);
2626     Bmi = ROL64in128(Eke, 10);
2627     Ama = _mm_xor_si128(Bma, _mm_andnot_si128(Bme, Bmi));
2628     Emi = _mm_xor_si128(Emi, Di);
2629     Bmo = ROL64in128(Emi, 15);
2630     Ame = _mm_xor_si128(Bme, _mm_andnot_si128(Bmi, Bmo));
2631     Eso = _mm_xor_si128(Eso, Do);
2632     Bmu = ROL64in128_56(Eso);
2633     Ami = _mm_xor_si128(Bmi, _mm_andnot_si128(Bmo, Bmu));
2634     Amo = _mm_xor_si128(Bmo, _mm_andnot_si128(Bmu, Bma));
2635     Amu = _mm_xor_si128(Bmu, _mm_andnot_si128(Bma, Bme));
2636     Ebi = _mm_xor_si128(Ebi, Di);
2637     Bsa = ROL64in128(Ebi, 62);
2638     Ego = _mm_xor_si128(Ego, Do);
2639     Bse = ROL64in128(Ego, 55);
2640     Eku = _mm_xor_si128(Eku, Du);
2641     Bsi = ROL64in128(Eku, 39);
2642     Asa = _mm_xor_si128(Bsa, _mm_andnot_si128(Bse, Bsi));
2643     Ema = _mm_xor_si128(Ema, Da);
2644     Bso = ROL64in128(Ema, 41);
2645     Ase = _mm_xor_si128(Bse, _mm_andnot_si128(Bsi, Bso));
2646     Ese = _mm_xor_si128(Ese, De);
2647     Bsu = ROL64in128(Ese, 2);
2648     Asi = _mm_xor_si128(Bsi, _mm_andnot_si128(Bso, Bsu));
2649     Aso = _mm_xor_si128(Bso, _mm_andnot_si128(Bsu, Bsa));
2650     Asu = _mm_xor_si128(Bsu, _mm_andnot_si128(Bsa, Bse));
2651 
2652     _mm_storeu_si128(M128_CAST(lanes+ 0), Aba);
2653     _mm_storeu_si128(M128_CAST(lanes+ 1), Abe);
2654     _mm_storeu_si128(M128_CAST(lanes+ 2), Abi);
2655     _mm_storeu_si128(M128_CAST(lanes+ 3), Abo);
2656     _mm_storeu_si128(M128_CAST(lanes+ 4), Abu);
2657     _mm_storeu_si128(M128_CAST(lanes+ 5), Aga);
2658     _mm_storeu_si128(M128_CAST(lanes+ 6), Age);
2659     _mm_storeu_si128(M128_CAST(lanes+ 7), Agi);
2660     _mm_storeu_si128(M128_CAST(lanes+ 8), Ago);
2661     _mm_storeu_si128(M128_CAST(lanes+ 9), Agu);
2662     _mm_storeu_si128(M128_CAST(lanes+10), Aka);
2663     _mm_storeu_si128(M128_CAST(lanes+11), Ake);
2664     _mm_storeu_si128(M128_CAST(lanes+12), Aki);
2665     _mm_storeu_si128(M128_CAST(lanes+13), Ako);
2666     _mm_storeu_si128(M128_CAST(lanes+14), Aku);
2667     _mm_storeu_si128(M128_CAST(lanes+15), Ama);
2668     _mm_storeu_si128(M128_CAST(lanes+16), Ame);
2669     _mm_storeu_si128(M128_CAST(lanes+17), Ami);
2670     _mm_storeu_si128(M128_CAST(lanes+18), Amo);
2671     _mm_storeu_si128(M128_CAST(lanes+19), Amu);
2672     _mm_storeu_si128(M128_CAST(lanes+20), Asa);
2673     _mm_storeu_si128(M128_CAST(lanes+21), Ase);
2674     _mm_storeu_si128(M128_CAST(lanes+22), Asi);
2675     _mm_storeu_si128(M128_CAST(lanes+23), Aso);
2676     _mm_storeu_si128(M128_CAST(lanes+24), Asu);
2677 }
2678 
2679 #endif
2680 
2681 NAMESPACE_END
2682