1 /* MIT License
2 *
3 * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "Hacl_Chacha20Poly1305_32.h"
25
26 /* Forward declaration from chacha20-ppc64le.S */
27 void chacha20vsx(uint32_t len, uint8_t *output, uint8_t *block, uint8_t *k,
28 uint8_t *nonce, uint32_t ctr);
29
30 static inline void
poly1305_padded_32(uint64_t * ctx,uint32_t len,uint8_t * text)31 poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text)
32 {
33 uint32_t n = len / (uint32_t)16U;
34 uint32_t r = len % (uint32_t)16U;
35 uint8_t *blocks = text;
36 uint8_t *rem = text + n * (uint32_t)16U;
37 uint64_t *pre0 = ctx + (uint32_t)5U;
38 uint64_t *acc0 = ctx;
39 uint32_t nb = n * (uint32_t)16U / (uint32_t)16U;
40 uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U;
41 for (uint32_t i = (uint32_t)0U; i < nb; i++) {
42 uint8_t *block = blocks + i * (uint32_t)16U;
43 uint64_t e[5U] = { 0U };
44 uint64_t u0 = load64_le(block);
45 uint64_t lo = u0;
46 uint64_t u = load64_le(block + (uint32_t)8U);
47 uint64_t hi = u;
48 uint64_t f0 = lo;
49 uint64_t f1 = hi;
50 uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
51 uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
52 uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
53 uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
54 uint64_t f40 = f1 >> (uint32_t)40U;
55 uint64_t f01 = f010;
56 uint64_t f111 = f110;
57 uint64_t f2 = f20;
58 uint64_t f3 = f30;
59 uint64_t f41 = f40;
60 e[0U] = f01;
61 e[1U] = f111;
62 e[2U] = f2;
63 e[3U] = f3;
64 e[4U] = f41;
65 uint64_t b = (uint64_t)0x1000000U;
66 uint64_t mask = b;
67 uint64_t f4 = e[4U];
68 e[4U] = f4 | mask;
69 uint64_t *r1 = pre0;
70 uint64_t *r5 = pre0 + (uint32_t)5U;
71 uint64_t r0 = r1[0U];
72 uint64_t r11 = r1[1U];
73 uint64_t r2 = r1[2U];
74 uint64_t r3 = r1[3U];
75 uint64_t r4 = r1[4U];
76 uint64_t r51 = r5[1U];
77 uint64_t r52 = r5[2U];
78 uint64_t r53 = r5[3U];
79 uint64_t r54 = r5[4U];
80 uint64_t f10 = e[0U];
81 uint64_t f11 = e[1U];
82 uint64_t f12 = e[2U];
83 uint64_t f13 = e[3U];
84 uint64_t f14 = e[4U];
85 uint64_t a0 = acc0[0U];
86 uint64_t a1 = acc0[1U];
87 uint64_t a2 = acc0[2U];
88 uint64_t a3 = acc0[3U];
89 uint64_t a4 = acc0[4U];
90 uint64_t a01 = a0 + f10;
91 uint64_t a11 = a1 + f11;
92 uint64_t a21 = a2 + f12;
93 uint64_t a31 = a3 + f13;
94 uint64_t a41 = a4 + f14;
95 uint64_t a02 = r0 * a01;
96 uint64_t a12 = r11 * a01;
97 uint64_t a22 = r2 * a01;
98 uint64_t a32 = r3 * a01;
99 uint64_t a42 = r4 * a01;
100 uint64_t a03 = a02 + r54 * a11;
101 uint64_t a13 = a12 + r0 * a11;
102 uint64_t a23 = a22 + r11 * a11;
103 uint64_t a33 = a32 + r2 * a11;
104 uint64_t a43 = a42 + r3 * a11;
105 uint64_t a04 = a03 + r53 * a21;
106 uint64_t a14 = a13 + r54 * a21;
107 uint64_t a24 = a23 + r0 * a21;
108 uint64_t a34 = a33 + r11 * a21;
109 uint64_t a44 = a43 + r2 * a21;
110 uint64_t a05 = a04 + r52 * a31;
111 uint64_t a15 = a14 + r53 * a31;
112 uint64_t a25 = a24 + r54 * a31;
113 uint64_t a35 = a34 + r0 * a31;
114 uint64_t a45 = a44 + r11 * a31;
115 uint64_t a06 = a05 + r51 * a41;
116 uint64_t a16 = a15 + r52 * a41;
117 uint64_t a26 = a25 + r53 * a41;
118 uint64_t a36 = a35 + r54 * a41;
119 uint64_t a46 = a45 + r0 * a41;
120 uint64_t t0 = a06;
121 uint64_t t1 = a16;
122 uint64_t t2 = a26;
123 uint64_t t3 = a36;
124 uint64_t t4 = a46;
125 uint64_t mask26 = (uint64_t)0x3ffffffU;
126 uint64_t z0 = t0 >> (uint32_t)26U;
127 uint64_t z1 = t3 >> (uint32_t)26U;
128 uint64_t x0 = t0 & mask26;
129 uint64_t x3 = t3 & mask26;
130 uint64_t x1 = t1 + z0;
131 uint64_t x4 = t4 + z1;
132 uint64_t z01 = x1 >> (uint32_t)26U;
133 uint64_t z11 = x4 >> (uint32_t)26U;
134 uint64_t t = z11 << (uint32_t)2U;
135 uint64_t z12 = z11 + t;
136 uint64_t x11 = x1 & mask26;
137 uint64_t x41 = x4 & mask26;
138 uint64_t x2 = t2 + z01;
139 uint64_t x01 = x0 + z12;
140 uint64_t z02 = x2 >> (uint32_t)26U;
141 uint64_t z13 = x01 >> (uint32_t)26U;
142 uint64_t x21 = x2 & mask26;
143 uint64_t x02 = x01 & mask26;
144 uint64_t x31 = x3 + z02;
145 uint64_t x12 = x11 + z13;
146 uint64_t z03 = x31 >> (uint32_t)26U;
147 uint64_t x32 = x31 & mask26;
148 uint64_t x42 = x41 + z03;
149 uint64_t o0 = x02;
150 uint64_t o1 = x12;
151 uint64_t o2 = x21;
152 uint64_t o3 = x32;
153 uint64_t o4 = x42;
154 acc0[0U] = o0;
155 acc0[1U] = o1;
156 acc0[2U] = o2;
157 acc0[3U] = o3;
158 acc0[4U] = o4;
159 }
160 if (rem1 > (uint32_t)0U) {
161 uint8_t *last = blocks + nb * (uint32_t)16U;
162 uint64_t e[5U] = { 0U };
163 uint8_t tmp[16U] = { 0U };
164 memcpy(tmp, last, rem1 * sizeof(last[0U]));
165 uint64_t u0 = load64_le(tmp);
166 uint64_t lo = u0;
167 uint64_t u = load64_le(tmp + (uint32_t)8U);
168 uint64_t hi = u;
169 uint64_t f0 = lo;
170 uint64_t f1 = hi;
171 uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
172 uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
173 uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
174 uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
175 uint64_t f40 = f1 >> (uint32_t)40U;
176 uint64_t f01 = f010;
177 uint64_t f111 = f110;
178 uint64_t f2 = f20;
179 uint64_t f3 = f30;
180 uint64_t f4 = f40;
181 e[0U] = f01;
182 e[1U] = f111;
183 e[2U] = f2;
184 e[3U] = f3;
185 e[4U] = f4;
186 uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
187 uint64_t mask = b;
188 uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
189 e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask;
190 uint64_t *r1 = pre0;
191 uint64_t *r5 = pre0 + (uint32_t)5U;
192 uint64_t r0 = r1[0U];
193 uint64_t r11 = r1[1U];
194 uint64_t r2 = r1[2U];
195 uint64_t r3 = r1[3U];
196 uint64_t r4 = r1[4U];
197 uint64_t r51 = r5[1U];
198 uint64_t r52 = r5[2U];
199 uint64_t r53 = r5[3U];
200 uint64_t r54 = r5[4U];
201 uint64_t f10 = e[0U];
202 uint64_t f11 = e[1U];
203 uint64_t f12 = e[2U];
204 uint64_t f13 = e[3U];
205 uint64_t f14 = e[4U];
206 uint64_t a0 = acc0[0U];
207 uint64_t a1 = acc0[1U];
208 uint64_t a2 = acc0[2U];
209 uint64_t a3 = acc0[3U];
210 uint64_t a4 = acc0[4U];
211 uint64_t a01 = a0 + f10;
212 uint64_t a11 = a1 + f11;
213 uint64_t a21 = a2 + f12;
214 uint64_t a31 = a3 + f13;
215 uint64_t a41 = a4 + f14;
216 uint64_t a02 = r0 * a01;
217 uint64_t a12 = r11 * a01;
218 uint64_t a22 = r2 * a01;
219 uint64_t a32 = r3 * a01;
220 uint64_t a42 = r4 * a01;
221 uint64_t a03 = a02 + r54 * a11;
222 uint64_t a13 = a12 + r0 * a11;
223 uint64_t a23 = a22 + r11 * a11;
224 uint64_t a33 = a32 + r2 * a11;
225 uint64_t a43 = a42 + r3 * a11;
226 uint64_t a04 = a03 + r53 * a21;
227 uint64_t a14 = a13 + r54 * a21;
228 uint64_t a24 = a23 + r0 * a21;
229 uint64_t a34 = a33 + r11 * a21;
230 uint64_t a44 = a43 + r2 * a21;
231 uint64_t a05 = a04 + r52 * a31;
232 uint64_t a15 = a14 + r53 * a31;
233 uint64_t a25 = a24 + r54 * a31;
234 uint64_t a35 = a34 + r0 * a31;
235 uint64_t a45 = a44 + r11 * a31;
236 uint64_t a06 = a05 + r51 * a41;
237 uint64_t a16 = a15 + r52 * a41;
238 uint64_t a26 = a25 + r53 * a41;
239 uint64_t a36 = a35 + r54 * a41;
240 uint64_t a46 = a45 + r0 * a41;
241 uint64_t t0 = a06;
242 uint64_t t1 = a16;
243 uint64_t t2 = a26;
244 uint64_t t3 = a36;
245 uint64_t t4 = a46;
246 uint64_t mask26 = (uint64_t)0x3ffffffU;
247 uint64_t z0 = t0 >> (uint32_t)26U;
248 uint64_t z1 = t3 >> (uint32_t)26U;
249 uint64_t x0 = t0 & mask26;
250 uint64_t x3 = t3 & mask26;
251 uint64_t x1 = t1 + z0;
252 uint64_t x4 = t4 + z1;
253 uint64_t z01 = x1 >> (uint32_t)26U;
254 uint64_t z11 = x4 >> (uint32_t)26U;
255 uint64_t t = z11 << (uint32_t)2U;
256 uint64_t z12 = z11 + t;
257 uint64_t x11 = x1 & mask26;
258 uint64_t x41 = x4 & mask26;
259 uint64_t x2 = t2 + z01;
260 uint64_t x01 = x0 + z12;
261 uint64_t z02 = x2 >> (uint32_t)26U;
262 uint64_t z13 = x01 >> (uint32_t)26U;
263 uint64_t x21 = x2 & mask26;
264 uint64_t x02 = x01 & mask26;
265 uint64_t x31 = x3 + z02;
266 uint64_t x12 = x11 + z13;
267 uint64_t z03 = x31 >> (uint32_t)26U;
268 uint64_t x32 = x31 & mask26;
269 uint64_t x42 = x41 + z03;
270 uint64_t o0 = x02;
271 uint64_t o1 = x12;
272 uint64_t o2 = x21;
273 uint64_t o3 = x32;
274 uint64_t o4 = x42;
275 acc0[0U] = o0;
276 acc0[1U] = o1;
277 acc0[2U] = o2;
278 acc0[3U] = o3;
279 acc0[4U] = o4;
280 }
281 uint8_t tmp[16U] = { 0U };
282 memcpy(tmp, rem, r * sizeof(rem[0U]));
283 if (r > (uint32_t)0U) {
284 uint64_t *pre = ctx + (uint32_t)5U;
285 uint64_t *acc = ctx;
286 uint64_t e[5U] = { 0U };
287 uint64_t u0 = load64_le(tmp);
288 uint64_t lo = u0;
289 uint64_t u = load64_le(tmp + (uint32_t)8U);
290 uint64_t hi = u;
291 uint64_t f0 = lo;
292 uint64_t f1 = hi;
293 uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
294 uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
295 uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
296 uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
297 uint64_t f40 = f1 >> (uint32_t)40U;
298 uint64_t f01 = f010;
299 uint64_t f111 = f110;
300 uint64_t f2 = f20;
301 uint64_t f3 = f30;
302 uint64_t f41 = f40;
303 e[0U] = f01;
304 e[1U] = f111;
305 e[2U] = f2;
306 e[3U] = f3;
307 e[4U] = f41;
308 uint64_t b = (uint64_t)0x1000000U;
309 uint64_t mask = b;
310 uint64_t f4 = e[4U];
311 e[4U] = f4 | mask;
312 uint64_t *r1 = pre;
313 uint64_t *r5 = pre + (uint32_t)5U;
314 uint64_t r0 = r1[0U];
315 uint64_t r11 = r1[1U];
316 uint64_t r2 = r1[2U];
317 uint64_t r3 = r1[3U];
318 uint64_t r4 = r1[4U];
319 uint64_t r51 = r5[1U];
320 uint64_t r52 = r5[2U];
321 uint64_t r53 = r5[3U];
322 uint64_t r54 = r5[4U];
323 uint64_t f10 = e[0U];
324 uint64_t f11 = e[1U];
325 uint64_t f12 = e[2U];
326 uint64_t f13 = e[3U];
327 uint64_t f14 = e[4U];
328 uint64_t a0 = acc[0U];
329 uint64_t a1 = acc[1U];
330 uint64_t a2 = acc[2U];
331 uint64_t a3 = acc[3U];
332 uint64_t a4 = acc[4U];
333 uint64_t a01 = a0 + f10;
334 uint64_t a11 = a1 + f11;
335 uint64_t a21 = a2 + f12;
336 uint64_t a31 = a3 + f13;
337 uint64_t a41 = a4 + f14;
338 uint64_t a02 = r0 * a01;
339 uint64_t a12 = r11 * a01;
340 uint64_t a22 = r2 * a01;
341 uint64_t a32 = r3 * a01;
342 uint64_t a42 = r4 * a01;
343 uint64_t a03 = a02 + r54 * a11;
344 uint64_t a13 = a12 + r0 * a11;
345 uint64_t a23 = a22 + r11 * a11;
346 uint64_t a33 = a32 + r2 * a11;
347 uint64_t a43 = a42 + r3 * a11;
348 uint64_t a04 = a03 + r53 * a21;
349 uint64_t a14 = a13 + r54 * a21;
350 uint64_t a24 = a23 + r0 * a21;
351 uint64_t a34 = a33 + r11 * a21;
352 uint64_t a44 = a43 + r2 * a21;
353 uint64_t a05 = a04 + r52 * a31;
354 uint64_t a15 = a14 + r53 * a31;
355 uint64_t a25 = a24 + r54 * a31;
356 uint64_t a35 = a34 + r0 * a31;
357 uint64_t a45 = a44 + r11 * a31;
358 uint64_t a06 = a05 + r51 * a41;
359 uint64_t a16 = a15 + r52 * a41;
360 uint64_t a26 = a25 + r53 * a41;
361 uint64_t a36 = a35 + r54 * a41;
362 uint64_t a46 = a45 + r0 * a41;
363 uint64_t t0 = a06;
364 uint64_t t1 = a16;
365 uint64_t t2 = a26;
366 uint64_t t3 = a36;
367 uint64_t t4 = a46;
368 uint64_t mask26 = (uint64_t)0x3ffffffU;
369 uint64_t z0 = t0 >> (uint32_t)26U;
370 uint64_t z1 = t3 >> (uint32_t)26U;
371 uint64_t x0 = t0 & mask26;
372 uint64_t x3 = t3 & mask26;
373 uint64_t x1 = t1 + z0;
374 uint64_t x4 = t4 + z1;
375 uint64_t z01 = x1 >> (uint32_t)26U;
376 uint64_t z11 = x4 >> (uint32_t)26U;
377 uint64_t t = z11 << (uint32_t)2U;
378 uint64_t z12 = z11 + t;
379 uint64_t x11 = x1 & mask26;
380 uint64_t x41 = x4 & mask26;
381 uint64_t x2 = t2 + z01;
382 uint64_t x01 = x0 + z12;
383 uint64_t z02 = x2 >> (uint32_t)26U;
384 uint64_t z13 = x01 >> (uint32_t)26U;
385 uint64_t x21 = x2 & mask26;
386 uint64_t x02 = x01 & mask26;
387 uint64_t x31 = x3 + z02;
388 uint64_t x12 = x11 + z13;
389 uint64_t z03 = x31 >> (uint32_t)26U;
390 uint64_t x32 = x31 & mask26;
391 uint64_t x42 = x41 + z03;
392 uint64_t o0 = x02;
393 uint64_t o1 = x12;
394 uint64_t o2 = x21;
395 uint64_t o3 = x32;
396 uint64_t o4 = x42;
397 acc[0U] = o0;
398 acc[1U] = o1;
399 acc[2U] = o2;
400 acc[3U] = o3;
401 acc[4U] = o4;
402 return;
403 }
404 }
405
406 static inline void
poly1305_do_32(uint8_t * k,uint32_t aadlen,uint8_t * aad,uint32_t mlen,uint8_t * m,uint8_t * out)407 poly1305_do_32(
408 uint8_t *k,
409 uint32_t aadlen,
410 uint8_t *aad,
411 uint32_t mlen,
412 uint8_t *m,
413 uint8_t *out)
414 {
415 uint64_t ctx[25U] = { 0U };
416 uint8_t block[16U] = { 0U };
417 Hacl_Poly1305_32_poly1305_init(ctx, k);
418 poly1305_padded_32(ctx, aadlen, aad);
419 poly1305_padded_32(ctx, mlen, m);
420 store64_le(block, (uint64_t)aadlen);
421 store64_le(block + (uint32_t)8U, (uint64_t)mlen);
422 uint64_t *pre = ctx + (uint32_t)5U;
423 uint64_t *acc = ctx;
424 uint64_t e[5U] = { 0U };
425 uint64_t u0 = load64_le(block);
426 uint64_t lo = u0;
427 uint64_t u = load64_le(block + (uint32_t)8U);
428 uint64_t hi = u;
429 uint64_t f0 = lo;
430 uint64_t f1 = hi;
431 uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
432 uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
433 uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
434 uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
435 uint64_t f40 = f1 >> (uint32_t)40U;
436 uint64_t f01 = f010;
437 uint64_t f111 = f110;
438 uint64_t f2 = f20;
439 uint64_t f3 = f30;
440 uint64_t f41 = f40;
441 e[0U] = f01;
442 e[1U] = f111;
443 e[2U] = f2;
444 e[3U] = f3;
445 e[4U] = f41;
446 uint64_t b = (uint64_t)0x1000000U;
447 uint64_t mask = b;
448 uint64_t f4 = e[4U];
449 e[4U] = f4 | mask;
450 uint64_t *r = pre;
451 uint64_t *r5 = pre + (uint32_t)5U;
452 uint64_t r0 = r[0U];
453 uint64_t r1 = r[1U];
454 uint64_t r2 = r[2U];
455 uint64_t r3 = r[3U];
456 uint64_t r4 = r[4U];
457 uint64_t r51 = r5[1U];
458 uint64_t r52 = r5[2U];
459 uint64_t r53 = r5[3U];
460 uint64_t r54 = r5[4U];
461 uint64_t f10 = e[0U];
462 uint64_t f11 = e[1U];
463 uint64_t f12 = e[2U];
464 uint64_t f13 = e[3U];
465 uint64_t f14 = e[4U];
466 uint64_t a0 = acc[0U];
467 uint64_t a1 = acc[1U];
468 uint64_t a2 = acc[2U];
469 uint64_t a3 = acc[3U];
470 uint64_t a4 = acc[4U];
471 uint64_t a01 = a0 + f10;
472 uint64_t a11 = a1 + f11;
473 uint64_t a21 = a2 + f12;
474 uint64_t a31 = a3 + f13;
475 uint64_t a41 = a4 + f14;
476 uint64_t a02 = r0 * a01;
477 uint64_t a12 = r1 * a01;
478 uint64_t a22 = r2 * a01;
479 uint64_t a32 = r3 * a01;
480 uint64_t a42 = r4 * a01;
481 uint64_t a03 = a02 + r54 * a11;
482 uint64_t a13 = a12 + r0 * a11;
483 uint64_t a23 = a22 + r1 * a11;
484 uint64_t a33 = a32 + r2 * a11;
485 uint64_t a43 = a42 + r3 * a11;
486 uint64_t a04 = a03 + r53 * a21;
487 uint64_t a14 = a13 + r54 * a21;
488 uint64_t a24 = a23 + r0 * a21;
489 uint64_t a34 = a33 + r1 * a21;
490 uint64_t a44 = a43 + r2 * a21;
491 uint64_t a05 = a04 + r52 * a31;
492 uint64_t a15 = a14 + r53 * a31;
493 uint64_t a25 = a24 + r54 * a31;
494 uint64_t a35 = a34 + r0 * a31;
495 uint64_t a45 = a44 + r1 * a31;
496 uint64_t a06 = a05 + r51 * a41;
497 uint64_t a16 = a15 + r52 * a41;
498 uint64_t a26 = a25 + r53 * a41;
499 uint64_t a36 = a35 + r54 * a41;
500 uint64_t a46 = a45 + r0 * a41;
501 uint64_t t0 = a06;
502 uint64_t t1 = a16;
503 uint64_t t2 = a26;
504 uint64_t t3 = a36;
505 uint64_t t4 = a46;
506 uint64_t mask26 = (uint64_t)0x3ffffffU;
507 uint64_t z0 = t0 >> (uint32_t)26U;
508 uint64_t z1 = t3 >> (uint32_t)26U;
509 uint64_t x0 = t0 & mask26;
510 uint64_t x3 = t3 & mask26;
511 uint64_t x1 = t1 + z0;
512 uint64_t x4 = t4 + z1;
513 uint64_t z01 = x1 >> (uint32_t)26U;
514 uint64_t z11 = x4 >> (uint32_t)26U;
515 uint64_t t = z11 << (uint32_t)2U;
516 uint64_t z12 = z11 + t;
517 uint64_t x11 = x1 & mask26;
518 uint64_t x41 = x4 & mask26;
519 uint64_t x2 = t2 + z01;
520 uint64_t x01 = x0 + z12;
521 uint64_t z02 = x2 >> (uint32_t)26U;
522 uint64_t z13 = x01 >> (uint32_t)26U;
523 uint64_t x21 = x2 & mask26;
524 uint64_t x02 = x01 & mask26;
525 uint64_t x31 = x3 + z02;
526 uint64_t x12 = x11 + z13;
527 uint64_t z03 = x31 >> (uint32_t)26U;
528 uint64_t x32 = x31 & mask26;
529 uint64_t x42 = x41 + z03;
530 uint64_t o0 = x02;
531 uint64_t o1 = x12;
532 uint64_t o2 = x21;
533 uint64_t o3 = x32;
534 uint64_t o4 = x42;
535 acc[0U] = o0;
536 acc[1U] = o1;
537 acc[2U] = o2;
538 acc[3U] = o3;
539 acc[4U] = o4;
540 Hacl_Poly1305_32_poly1305_finish(out, k, ctx);
541 }
542
543 void
Chacha20Poly1305_vsx_aead_encrypt(uint8_t * k,uint8_t * n,uint32_t aadlen,uint8_t * aad,uint32_t mlen,uint8_t * m,uint8_t * cipher,uint8_t * mac)544 Chacha20Poly1305_vsx_aead_encrypt(
545 uint8_t *k,
546 uint8_t *n,
547 uint32_t aadlen,
548 uint8_t *aad,
549 uint32_t mlen,
550 uint8_t *m,
551 uint8_t *cipher,
552 uint8_t *mac)
553 {
554 chacha20vsx(mlen, cipher, m, k, n, (uint32_t)1U);
555 uint8_t tmp[64U] = { 0U };
556 chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
557 uint8_t *key = tmp;
558 poly1305_do_32(key, aadlen, aad, mlen, cipher, mac);
559 }
560
561 uint32_t
Chacha20Poly1305_vsx_aead_decrypt(uint8_t * k,uint8_t * n,uint32_t aadlen,uint8_t * aad,uint32_t mlen,uint8_t * m,uint8_t * cipher,uint8_t * mac)562 Chacha20Poly1305_vsx_aead_decrypt(
563 uint8_t *k,
564 uint8_t *n,
565 uint32_t aadlen,
566 uint8_t *aad,
567 uint32_t mlen,
568 uint8_t *m,
569 uint8_t *cipher,
570 uint8_t *mac)
571 {
572 uint8_t computed_mac[16U] = { 0U };
573 uint8_t tmp[64U] = { 0U };
574 chacha20vsx((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
575 uint8_t *key = tmp;
576 poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac);
577 uint8_t res = (uint8_t)255U;
578 for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
579 uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
580 res = uu____0 & res;
581 }
582 uint8_t z = res;
583 if (z == (uint8_t)255U) {
584 chacha20vsx(mlen, m, cipher, k, n, (uint32_t)1U);
585 return (uint32_t)0U;
586 }
587 return (uint32_t)1U;
588 }
589