1 /* MIT License
2  *
3  * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to deal
7  * in the Software without restriction, including without limitation the rights
8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9  * copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in all
13  * copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "Hacl_Chacha20Poly1305_32.h"
25 
26 /* Forward declaration from chacha20-ppc64le.S */
27 void chacha20vsx(uint32_t len, uint8_t *output, uint8_t *block, uint8_t *k,
28                  uint8_t *nonce, uint32_t ctr);
29 
30 static inline void
31 poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text)
32 {
33     uint32_t n = len / (uint32_t)16U;
34     uint32_t r = len % (uint32_t)16U;
35     uint8_t *blocks = text;
36     uint8_t *rem = text + n * (uint32_t)16U;
37     uint64_t *pre0 = ctx + (uint32_t)5U;
38     uint64_t *acc0 = ctx;
39     uint32_t nb = n * (uint32_t)16U / (uint32_t)16U;
40     uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U;
41     for (uint32_t i = (uint32_t)0U; i < nb; i++) {
42         uint8_t *block = blocks + i * (uint32_t)16U;
43         uint64_t e[5U] = { 0U };
44         uint64_t u0 = load64_le(block);
45         uint64_t lo = u0;
46         uint64_t u = load64_le(block + (uint32_t)8U);
47         uint64_t hi = u;
48         uint64_t f0 = lo;
49         uint64_t f1 = hi;
50         uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
51         uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
52         uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
53         uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
54         uint64_t f40 = f1 >> (uint32_t)40U;
55         uint64_t f01 = f010;
56         uint64_t f111 = f110;
57         uint64_t f2 = f20;
58         uint64_t f3 = f30;
59         uint64_t f41 = f40;
60         e[0U] = f01;
61         e[1U] = f111;
62         e[2U] = f2;
63         e[3U] = f3;
64         e[4U] = f41;
65         uint64_t b = (uint64_t)0x1000000U;
66         uint64_t mask = b;
67         uint64_t f4 = e[4U];
68         e[4U] = f4 | mask;
69         uint64_t *r1 = pre0;
70         uint64_t *r5 = pre0 + (uint32_t)5U;
71         uint64_t r0 = r1[0U];
72         uint64_t r11 = r1[1U];
73         uint64_t r2 = r1[2U];
74         uint64_t r3 = r1[3U];
75         uint64_t r4 = r1[4U];
76         uint64_t r51 = r5[1U];
77         uint64_t r52 = r5[2U];
78         uint64_t r53 = r5[3U];
79         uint64_t r54 = r5[4U];
80         uint64_t f10 = e[0U];
81         uint64_t f11 = e[1U];
82         uint64_t f12 = e[2U];
83         uint64_t f13 = e[3U];
84         uint64_t f14 = e[4U];
85         uint64_t a0 = acc0[0U];
86         uint64_t a1 = acc0[1U];
87         uint64_t a2 = acc0[2U];
88         uint64_t a3 = acc0[3U];
89         uint64_t a4 = acc0[4U];
90         uint64_t a01 = a0 + f10;
91         uint64_t a11 = a1 + f11;
92         uint64_t a21 = a2 + f12;
93         uint64_t a31 = a3 + f13;
94         uint64_t a41 = a4 + f14;
95         uint64_t a02 = r0 * a01;
96         uint64_t a12 = r11 * a01;
97         uint64_t a22 = r2 * a01;
98         uint64_t a32 = r3 * a01;
99         uint64_t a42 = r4 * a01;
100         uint64_t a03 = a02 + r54 * a11;
101         uint64_t a13 = a12 + r0 * a11;
102         uint64_t a23 = a22 + r11 * a11;
103         uint64_t a33 = a32 + r2 * a11;
104         uint64_t a43 = a42 + r3 * a11;
105         uint64_t a04 = a03 + r53 * a21;
106         uint64_t a14 = a13 + r54 * a21;
107         uint64_t a24 = a23 + r0 * a21;
108         uint64_t a34 = a33 + r11 * a21;
109         uint64_t a44 = a43 + r2 * a21;
110         uint64_t a05 = a04 + r52 * a31;
111         uint64_t a15 = a14 + r53 * a31;
112         uint64_t a25 = a24 + r54 * a31;
113         uint64_t a35 = a34 + r0 * a31;
114         uint64_t a45 = a44 + r11 * a31;
115         uint64_t a06 = a05 + r51 * a41;
116         uint64_t a16 = a15 + r52 * a41;
117         uint64_t a26 = a25 + r53 * a41;
118         uint64_t a36 = a35 + r54 * a41;
119         uint64_t a46 = a45 + r0 * a41;
120         uint64_t t0 = a06;
121         uint64_t t1 = a16;
122         uint64_t t2 = a26;
123         uint64_t t3 = a36;
124         uint64_t t4 = a46;
125         uint64_t mask26 = (uint64_t)0x3ffffffU;
126         uint64_t z0 = t0 >> (uint32_t)26U;
127         uint64_t z1 = t3 >> (uint32_t)26U;
128         uint64_t x0 = t0 & mask26;
129         uint64_t x3 = t3 & mask26;
130         uint64_t x1 = t1 + z0;
131         uint64_t x4 = t4 + z1;
132         uint64_t z01 = x1 >> (uint32_t)26U;
133         uint64_t z11 = x4 >> (uint32_t)26U;
134         uint64_t t = z11 << (uint32_t)2U;
135         uint64_t z12 = z11 + t;
136         uint64_t x11 = x1 & mask26;
137         uint64_t x41 = x4 & mask26;
138         uint64_t x2 = t2 + z01;
139         uint64_t x01 = x0 + z12;
140         uint64_t z02 = x2 >> (uint32_t)26U;
141         uint64_t z13 = x01 >> (uint32_t)26U;
142         uint64_t x21 = x2 & mask26;
143         uint64_t x02 = x01 & mask26;
144         uint64_t x31 = x3 + z02;
145         uint64_t x12 = x11 + z13;
146         uint64_t z03 = x31 >> (uint32_t)26U;
147         uint64_t x32 = x31 & mask26;
148         uint64_t x42 = x41 + z03;
149         uint64_t o0 = x02;
150         uint64_t o1 = x12;
151         uint64_t o2 = x21;
152         uint64_t o3 = x32;
153         uint64_t o4 = x42;
154         acc0[0U] = o0;
155         acc0[1U] = o1;
156         acc0[2U] = o2;
157         acc0[3U] = o3;
158         acc0[4U] = o4;
159     }
160     if (rem1 > (uint32_t)0U) {
161         uint8_t *last = blocks + nb * (uint32_t)16U;
162         uint64_t e[5U] = { 0U };
163         uint8_t tmp[16U] = { 0U };
164         memcpy(tmp, last, rem1 * sizeof(last[0U]));
165         uint64_t u0 = load64_le(tmp);
166         uint64_t lo = u0;
167         uint64_t u = load64_le(tmp + (uint32_t)8U);
168         uint64_t hi = u;
169         uint64_t f0 = lo;
170         uint64_t f1 = hi;
171         uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
172         uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
173         uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
174         uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
175         uint64_t f40 = f1 >> (uint32_t)40U;
176         uint64_t f01 = f010;
177         uint64_t f111 = f110;
178         uint64_t f2 = f20;
179         uint64_t f3 = f30;
180         uint64_t f4 = f40;
181         e[0U] = f01;
182         e[1U] = f111;
183         e[2U] = f2;
184         e[3U] = f3;
185         e[4U] = f4;
186         uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
187         uint64_t mask = b;
188         uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
189         e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask;
190         uint64_t *r1 = pre0;
191         uint64_t *r5 = pre0 + (uint32_t)5U;
192         uint64_t r0 = r1[0U];
193         uint64_t r11 = r1[1U];
194         uint64_t r2 = r1[2U];
195         uint64_t r3 = r1[3U];
196         uint64_t r4 = r1[4U];
197         uint64_t r51 = r5[1U];
198         uint64_t r52 = r5[2U];
199         uint64_t r53 = r5[3U];
200         uint64_t r54 = r5[4U];
201         uint64_t f10 = e[0U];
202         uint64_t f11 = e[1U];
203         uint64_t f12 = e[2U];
204         uint64_t f13 = e[3U];
205         uint64_t f14 = e[4U];
206         uint64_t a0 = acc0[0U];
207         uint64_t a1 = acc0[1U];
208         uint64_t a2 = acc0[2U];
209         uint64_t a3 = acc0[3U];
210         uint64_t a4 = acc0[4U];
211         uint64_t a01 = a0 + f10;
212         uint64_t a11 = a1 + f11;
213         uint64_t a21 = a2 + f12;
214         uint64_t a31 = a3 + f13;
215         uint64_t a41 = a4 + f14;
216         uint64_t a02 = r0 * a01;
217         uint64_t a12 = r11 * a01;
218         uint64_t a22 = r2 * a01;
219         uint64_t a32 = r3 * a01;
220         uint64_t a42 = r4 * a01;
221         uint64_t a03 = a02 + r54 * a11;
222         uint64_t a13 = a12 + r0 * a11;
223         uint64_t a23 = a22 + r11 * a11;
224         uint64_t a33 = a32 + r2 * a11;
225         uint64_t a43 = a42 + r3 * a11;
226         uint64_t a04 = a03 + r53 * a21;
227         uint64_t a14 = a13 + r54 * a21;
228         uint64_t a24 = a23 + r0 * a21;
229         uint64_t a34 = a33 + r11 * a21;
230         uint64_t a44 = a43 + r2 * a21;
231         uint64_t a05 = a04 + r52 * a31;
232         uint64_t a15 = a14 + r53 * a31;
233         uint64_t a25 = a24 + r54 * a31;
234         uint64_t a35 = a34 + r0 * a31;
235         uint64_t a45 = a44 + r11 * a31;
236         uint64_t a06 = a05 + r51 * a41;
237         uint64_t a16 = a15 + r52 * a41;
238         uint64_t a26 = a25 + r53 * a41;
239         uint64_t a36 = a35 + r54 * a41;
240         uint64_t a46 = a45 + r0 * a41;
241         uint64_t t0 = a06;
242         uint64_t t1 = a16;
243         uint64_t t2 = a26;
244         uint64_t t3 = a36;
245         uint64_t t4 = a46;
246         uint64_t mask26 = (uint64_t)0x3ffffffU;
247         uint64_t z0 = t0 >> (uint32_t)26U;
248         uint64_t z1 = t3 >> (uint32_t)26U;
249         uint64_t x0 = t0 & mask26;
250         uint64_t x3 = t3 & mask26;
251         uint64_t x1 = t1 + z0;
252         uint64_t x4 = t4 + z1;
253         uint64_t z01 = x1 >> (uint32_t)26U;
254         uint64_t z11 = x4 >> (uint32_t)26U;
255         uint64_t t = z11 << (uint32_t)2U;
256         uint64_t z12 = z11 + t;
257         uint64_t x11 = x1 & mask26;
258         uint64_t x41 = x4 & mask26;
259         uint64_t x2 = t2 + z01;
260         uint64_t x01 = x0 + z12;
261         uint64_t z02 = x2 >> (uint32_t)26U;
262         uint64_t z13 = x01 >> (uint32_t)26U;
263         uint64_t x21 = x2 & mask26;
264         uint64_t x02 = x01 & mask26;
265         uint64_t x31 = x3 + z02;
266         uint64_t x12 = x11 + z13;
267         uint64_t z03 = x31 >> (uint32_t)26U;
268         uint64_t x32 = x31 & mask26;
269         uint64_t x42 = x41 + z03;
270         uint64_t o0 = x02;
271         uint64_t o1 = x12;
272         uint64_t o2 = x21;
273         uint64_t o3 = x32;
274         uint64_t o4 = x42;
275         acc0[0U] = o0;
276         acc0[1U] = o1;
277         acc0[2U] = o2;
278         acc0[3U] = o3;
279         acc0[4U] = o4;
280     }
281     uint8_t tmp[16U] = { 0U };
282     memcpy(tmp, rem, r * sizeof(rem[0U]));
283     if (r > (uint32_t)0U) {
284         uint64_t *pre = ctx + (uint32_t)5U;
285         uint64_t *acc = ctx;
286         uint64_t e[5U] = { 0U };
287         uint64_t u0 = load64_le(tmp);
288         uint64_t lo = u0;
289         uint64_t u = load64_le(tmp + (uint32_t)8U);
290         uint64_t hi = u;
291         uint64_t f0 = lo;
292         uint64_t f1 = hi;
293         uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
294         uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
295         uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
296         uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
297         uint64_t f40 = f1 >> (uint32_t)40U;
298         uint64_t f01 = f010;
299         uint64_t f111 = f110;
300         uint64_t f2 = f20;
301         uint64_t f3 = f30;
302         uint64_t f41 = f40;
303         e[0U] = f01;
304         e[1U] = f111;
305         e[2U] = f2;
306         e[3U] = f3;
307         e[4U] = f41;
308         uint64_t b = (uint64_t)0x1000000U;
309         uint64_t mask = b;
310         uint64_t f4 = e[4U];
311         e[4U] = f4 | mask;
312         uint64_t *r1 = pre;
313         uint64_t *r5 = pre + (uint32_t)5U;
314         uint64_t r0 = r1[0U];
315         uint64_t r11 = r1[1U];
316         uint64_t r2 = r1[2U];
317         uint64_t r3 = r1[3U];
318         uint64_t r4 = r1[4U];
319         uint64_t r51 = r5[1U];
320         uint64_t r52 = r5[2U];
321         uint64_t r53 = r5[3U];
322         uint64_t r54 = r5[4U];
323         uint64_t f10 = e[0U];
324         uint64_t f11 = e[1U];
325         uint64_t f12 = e[2U];
326         uint64_t f13 = e[3U];
327         uint64_t f14 = e[4U];
328         uint64_t a0 = acc[0U];
329         uint64_t a1 = acc[1U];
330         uint64_t a2 = acc[2U];
331         uint64_t a3 = acc[3U];
332         uint64_t a4 = acc[4U];
333         uint64_t a01 = a0 + f10;
334         uint64_t a11 = a1 + f11;
335         uint64_t a21 = a2 + f12;
336         uint64_t a31 = a3 + f13;
337         uint64_t a41 = a4 + f14;
338         uint64_t a02 = r0 * a01;
339         uint64_t a12 = r11 * a01;
340         uint64_t a22 = r2 * a01;
341         uint64_t a32 = r3 * a01;
342         uint64_t a42 = r4 * a01;
343         uint64_t a03 = a02 + r54 * a11;
344         uint64_t a13 = a12 + r0 * a11;
345         uint64_t a23 = a22 + r11 * a11;
346         uint64_t a33 = a32 + r2 * a11;
347         uint64_t a43 = a42 + r3 * a11;
348         uint64_t a04 = a03 + r53 * a21;
349         uint64_t a14 = a13 + r54 * a21;
350         uint64_t a24 = a23 + r0 * a21;
351         uint64_t a34 = a33 + r11 * a21;
352         uint64_t a44 = a43 + r2 * a21;
353         uint64_t a05 = a04 + r52 * a31;
354         uint64_t a15 = a14 + r53 * a31;
355         uint64_t a25 = a24 + r54 * a31;
356         uint64_t a35 = a34 + r0 * a31;
357         uint64_t a45 = a44 + r11 * a31;
358         uint64_t a06 = a05 + r51 * a41;
359         uint64_t a16 = a15 + r52 * a41;
360         uint64_t a26 = a25 + r53 * a41;
361         uint64_t a36 = a35 + r54 * a41;
362         uint64_t a46 = a45 + r0 * a41;
363         uint64_t t0 = a06;
364         uint64_t t1 = a16;
365         uint64_t t2 = a26;
366         uint64_t t3 = a36;
367         uint64_t t4 = a46;
368         uint64_t mask26 = (uint64_t)0x3ffffffU;
369         uint64_t z0 = t0 >> (uint32_t)26U;
370         uint64_t z1 = t3 >> (uint32_t)26U;
371         uint64_t x0 = t0 & mask26;
372         uint64_t x3 = t3 & mask26;
373         uint64_t x1 = t1 + z0;
374         uint64_t x4 = t4 + z1;
375         uint64_t z01 = x1 >> (uint32_t)26U;
376         uint64_t z11 = x4 >> (uint32_t)26U;
377         uint64_t t = z11 << (uint32_t)2U;
378         uint64_t z12 = z11 + t;
379         uint64_t x11 = x1 & mask26;
380         uint64_t x41 = x4 & mask26;
381         uint64_t x2 = t2 + z01;
382         uint64_t x01 = x0 + z12;
383         uint64_t z02 = x2 >> (uint32_t)26U;
384         uint64_t z13 = x01 >> (uint32_t)26U;
385         uint64_t x21 = x2 & mask26;
386         uint64_t x02 = x01 & mask26;
387         uint64_t x31 = x3 + z02;
388         uint64_t x12 = x11 + z13;
389         uint64_t z03 = x31 >> (uint32_t)26U;
390         uint64_t x32 = x31 & mask26;
391         uint64_t x42 = x41 + z03;
392         uint64_t o0 = x02;
393         uint64_t o1 = x12;
394         uint64_t o2 = x21;
395         uint64_t o3 = x32;
396         uint64_t o4 = x42;
397         acc[0U] = o0;
398         acc[1U] = o1;
399         acc[2U] = o2;
400         acc[3U] = o3;
401         acc[4U] = o4;
402         return;
403     }
404 }
405 
406 static inline void
407 poly1305_do_32(
408     uint8_t *k,
409     uint32_t aadlen,
410     uint8_t *aad,
411     uint32_t mlen,
412     uint8_t *m,
413     uint8_t *out)
414 {
415     uint64_t ctx[25U] = { 0U };
416     uint8_t block[16U] = { 0U };
417     Hacl_Poly1305_32_poly1305_init(ctx, k);
418     poly1305_padded_32(ctx, aadlen, aad);
419     poly1305_padded_32(ctx, mlen, m);
420     store64_le(block, (uint64_t)aadlen);
421     store64_le(block + (uint32_t)8U, (uint64_t)mlen);
422     uint64_t *pre = ctx + (uint32_t)5U;
423     uint64_t *acc = ctx;
424     uint64_t e[5U] = { 0U };
425     uint64_t u0 = load64_le(block);
426     uint64_t lo = u0;
427     uint64_t u = load64_le(block + (uint32_t)8U);
428     uint64_t hi = u;
429     uint64_t f0 = lo;
430     uint64_t f1 = hi;
431     uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
432     uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
433     uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
434     uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
435     uint64_t f40 = f1 >> (uint32_t)40U;
436     uint64_t f01 = f010;
437     uint64_t f111 = f110;
438     uint64_t f2 = f20;
439     uint64_t f3 = f30;
440     uint64_t f41 = f40;
441     e[0U] = f01;
442     e[1U] = f111;
443     e[2U] = f2;
444     e[3U] = f3;
445     e[4U] = f41;
446     uint64_t b = (uint64_t)0x1000000U;
447     uint64_t mask = b;
448     uint64_t f4 = e[4U];
449     e[4U] = f4 | mask;
450     uint64_t *r = pre;
451     uint64_t *r5 = pre + (uint32_t)5U;
452     uint64_t r0 = r[0U];
453     uint64_t r1 = r[1U];
454     uint64_t r2 = r[2U];
455     uint64_t r3 = r[3U];
456     uint64_t r4 = r[4U];
457     uint64_t r51 = r5[1U];
458     uint64_t r52 = r5[2U];
459     uint64_t r53 = r5[3U];
460     uint64_t r54 = r5[4U];
461     uint64_t f10 = e[0U];
462     uint64_t f11 = e[1U];
463     uint64_t f12 = e[2U];
464     uint64_t f13 = e[3U];
465     uint64_t f14 = e[4U];
466     uint64_t a0 = acc[0U];
467     uint64_t a1 = acc[1U];
468     uint64_t a2 = acc[2U];
469     uint64_t a3 = acc[3U];
470     uint64_t a4 = acc[4U];
471     uint64_t a01 = a0 + f10;
472     uint64_t a11 = a1 + f11;
473     uint64_t a21 = a2 + f12;
474     uint64_t a31 = a3 + f13;
475     uint64_t a41 = a4 + f14;
476     uint64_t a02 = r0 * a01;
477     uint64_t a12 = r1 * a01;
478     uint64_t a22 = r2 * a01;
479     uint64_t a32 = r3 * a01;
480     uint64_t a42 = r4 * a01;
481     uint64_t a03 = a02 + r54 * a11;
482     uint64_t a13 = a12 + r0 * a11;
483     uint64_t a23 = a22 + r1 * a11;
484     uint64_t a33 = a32 + r2 * a11;
485     uint64_t a43 = a42 + r3 * a11;
486     uint64_t a04 = a03 + r53 * a21;
487     uint64_t a14 = a13 + r54 * a21;
488     uint64_t a24 = a23 + r0 * a21;
489     uint64_t a34 = a33 + r1 * a21;
490     uint64_t a44 = a43 + r2 * a21;
491     uint64_t a05 = a04 + r52 * a31;
492     uint64_t a15 = a14 + r53 * a31;
493     uint64_t a25 = a24 + r54 * a31;
494     uint64_t a35 = a34 + r0 * a31;
495     uint64_t a45 = a44 + r1 * a31;
496     uint64_t a06 = a05 + r51 * a41;
497     uint64_t a16 = a15 + r52 * a41;
498     uint64_t a26 = a25 + r53 * a41;
499     uint64_t a36 = a35 + r54 * a41;
500     uint64_t a46 = a45 + r0 * a41;
501     uint64_t t0 = a06;
502     uint64_t t1 = a16;
503     uint64_t t2 = a26;
504     uint64_t t3 = a36;
505     uint64_t t4 = a46;
506     uint64_t mask26 = (uint64_t)0x3ffffffU;
507     uint64_t z0 = t0 >> (uint32_t)26U;
508     uint64_t z1 = t3 >> (uint32_t)26U;
509     uint64_t x0 = t0 & mask26;
510     uint64_t x3 = t3 & mask26;
511     uint64_t x1 = t1 + z0;
512     uint64_t x4 = t4 + z1;
513     uint64_t z01 = x1 >> (uint32_t)26U;
514     uint64_t z11 = x4 >> (uint32_t)26U;
515     uint64_t t = z11 << (uint32_t)2U;
516     uint64_t z12 = z11 + t;
517     uint64_t x11 = x1 & mask26;
518     uint64_t x41 = x4 & mask26;
519     uint64_t x2 = t2 + z01;
520     uint64_t x01 = x0 + z12;
521     uint64_t z02 = x2 >> (uint32_t)26U;
522     uint64_t z13 = x01 >> (uint32_t)26U;
523     uint64_t x21 = x2 & mask26;
524     uint64_t x02 = x01 & mask26;
525     uint64_t x31 = x3 + z02;
526     uint64_t x12 = x11 + z13;
527     uint64_t z03 = x31 >> (uint32_t)26U;
528     uint64_t x32 = x31 & mask26;
529     uint64_t x42 = x41 + z03;
530     uint64_t o0 = x02;
531     uint64_t o1 = x12;
532     uint64_t o2 = x21;
533     uint64_t o3 = x32;
534     uint64_t o4 = x42;
535     acc[0U] = o0;
536     acc[1U] = o1;
537     acc[2U] = o2;
538     acc[3U] = o3;
539     acc[4U] = o4;
540     Hacl_Poly1305_32_poly1305_finish(out, k, ctx);
541 }
542 
543 void
544 Chacha20Poly1305_vsx_aead_encrypt(
545     uint8_t *k,
546     uint8_t *n,
547     uint32_t aadlen,
548     uint8_t *aad,
549     uint32_t mlen,
550     uint8_t *m,
551     uint8_t *cipher,
552     uint8_t *mac)
553 {
554     chacha20vsx(mlen, cipher, m, k, n, (uint32_t)1U);
555     uint8_t tmp[64U] = { 0U };
556     chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
557     uint8_t *key = tmp;
558     poly1305_do_32(key, aadlen, aad, mlen, cipher, mac);
559 }
560 
561 uint32_t
562 Chacha20Poly1305_vsx_aead_decrypt(
563     uint8_t *k,
564     uint8_t *n,
565     uint32_t aadlen,
566     uint8_t *aad,
567     uint32_t mlen,
568     uint8_t *m,
569     uint8_t *cipher,
570     uint8_t *mac)
571 {
572     uint8_t computed_mac[16U] = { 0U };
573     uint8_t tmp[64U] = { 0U };
574     chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
575     uint8_t *key = tmp;
576     poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac);
577     uint8_t res = (uint8_t)255U;
578     for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
579         uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
580         res = uu____0 & res;
581     }
582     uint8_t z = res;
583     if (z == (uint8_t)255U) {
584         chacha20vsx(mlen, m, cipher, k, n, (uint32_t)1U);
585         return (uint32_t)0U;
586     }
587     return (uint32_t)1U;
588 }
589