1 /* MIT License
2 *
3 * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in all
13 * copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "Hacl_Chacha20_Vec256.h"
25
26 static inline void
double_round_256(Lib_IntVector_Intrinsics_vec256 * st)27 double_round_256(Lib_IntVector_Intrinsics_vec256 *st)
28 {
29 st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]);
30 Lib_IntVector_Intrinsics_vec256 std = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]);
31 st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std, (uint32_t)16U);
32 st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]);
33 Lib_IntVector_Intrinsics_vec256 std0 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]);
34 st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std0, (uint32_t)12U);
35 st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]);
36 Lib_IntVector_Intrinsics_vec256 std1 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]);
37 st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std1, (uint32_t)8U);
38 st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]);
39 Lib_IntVector_Intrinsics_vec256 std2 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]);
40 st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std2, (uint32_t)7U);
41 st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]);
42 Lib_IntVector_Intrinsics_vec256 std3 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]);
43 st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std3, (uint32_t)16U);
44 st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]);
45 Lib_IntVector_Intrinsics_vec256 std4 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]);
46 st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std4, (uint32_t)12U);
47 st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]);
48 Lib_IntVector_Intrinsics_vec256 std5 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]);
49 st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std5, (uint32_t)8U);
50 st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]);
51 Lib_IntVector_Intrinsics_vec256 std6 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]);
52 st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std6, (uint32_t)7U);
53 st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]);
54 Lib_IntVector_Intrinsics_vec256 std7 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]);
55 st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std7, (uint32_t)16U);
56 st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]);
57 Lib_IntVector_Intrinsics_vec256 std8 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]);
58 st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std8, (uint32_t)12U);
59 st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]);
60 Lib_IntVector_Intrinsics_vec256 std9 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]);
61 st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std9, (uint32_t)8U);
62 st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]);
63 Lib_IntVector_Intrinsics_vec256 std10 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]);
64 st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std10, (uint32_t)7U);
65 st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]);
66 Lib_IntVector_Intrinsics_vec256 std11 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]);
67 st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std11, (uint32_t)16U);
68 st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]);
69 Lib_IntVector_Intrinsics_vec256 std12 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]);
70 st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std12, (uint32_t)12U);
71 st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]);
72 Lib_IntVector_Intrinsics_vec256 std13 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]);
73 st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std13, (uint32_t)8U);
74 st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]);
75 Lib_IntVector_Intrinsics_vec256 std14 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]);
76 st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std14, (uint32_t)7U);
77 st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]);
78 Lib_IntVector_Intrinsics_vec256 std15 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]);
79 st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std15, (uint32_t)16U);
80 st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]);
81 Lib_IntVector_Intrinsics_vec256 std16 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]);
82 st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std16, (uint32_t)12U);
83 st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]);
84 Lib_IntVector_Intrinsics_vec256 std17 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]);
85 st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std17, (uint32_t)8U);
86 st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]);
87 Lib_IntVector_Intrinsics_vec256 std18 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]);
88 st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std18, (uint32_t)7U);
89 st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]);
90 Lib_IntVector_Intrinsics_vec256 std19 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]);
91 st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std19, (uint32_t)16U);
92 st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]);
93 Lib_IntVector_Intrinsics_vec256 std20 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]);
94 st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std20, (uint32_t)12U);
95 st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]);
96 Lib_IntVector_Intrinsics_vec256 std21 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]);
97 st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std21, (uint32_t)8U);
98 st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]);
99 Lib_IntVector_Intrinsics_vec256 std22 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]);
100 st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std22, (uint32_t)7U);
101 st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]);
102 Lib_IntVector_Intrinsics_vec256 std23 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]);
103 st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std23, (uint32_t)16U);
104 st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]);
105 Lib_IntVector_Intrinsics_vec256 std24 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]);
106 st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std24, (uint32_t)12U);
107 st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]);
108 Lib_IntVector_Intrinsics_vec256 std25 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]);
109 st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std25, (uint32_t)8U);
110 st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]);
111 Lib_IntVector_Intrinsics_vec256 std26 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]);
112 st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std26, (uint32_t)7U);
113 st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]);
114 Lib_IntVector_Intrinsics_vec256 std27 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]);
115 st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std27, (uint32_t)16U);
116 st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]);
117 Lib_IntVector_Intrinsics_vec256 std28 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]);
118 st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std28, (uint32_t)12U);
119 st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]);
120 Lib_IntVector_Intrinsics_vec256 std29 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]);
121 st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std29, (uint32_t)8U);
122 st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]);
123 Lib_IntVector_Intrinsics_vec256 std30 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]);
124 st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std30, (uint32_t)7U);
125 }
126
127 static inline void
chacha20_core_256(Lib_IntVector_Intrinsics_vec256 * k,Lib_IntVector_Intrinsics_vec256 * ctx,uint32_t ctr)128 chacha20_core_256(
129 Lib_IntVector_Intrinsics_vec256 *k,
130 Lib_IntVector_Intrinsics_vec256 *ctx,
131 uint32_t ctr)
132 {
133 memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec256));
134 uint32_t ctr_u32 = (uint32_t)8U * ctr;
135 Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32);
136 k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
137 double_round_256(k);
138 double_round_256(k);
139 double_round_256(k);
140 double_round_256(k);
141 double_round_256(k);
142 double_round_256(k);
143 double_round_256(k);
144 double_round_256(k);
145 double_round_256(k);
146 double_round_256(k);
147 for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
148 Lib_IntVector_Intrinsics_vec256 *os = k;
149 Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]);
150 os[i] = x;
151 }
152 k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
153 }
154
155 static inline void
chacha20_init_256(Lib_IntVector_Intrinsics_vec256 * ctx,uint8_t * k,uint8_t * n,uint32_t ctr)156 chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
157 {
158 uint32_t ctx1[16U] = { 0U };
159 uint32_t *uu____0 = ctx1;
160 for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) {
161 uint32_t *os = uu____0;
162 uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
163 os[i] = x;
164 }
165 uint32_t *uu____1 = ctx1 + (uint32_t)4U;
166 for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) {
167 uint32_t *os = uu____1;
168 uint8_t *bj = k + i * (uint32_t)4U;
169 uint32_t u = load32_le(bj);
170 uint32_t r = u;
171 uint32_t x = r;
172 os[i] = x;
173 }
174 ctx1[12U] = ctr;
175 uint32_t *uu____2 = ctx1 + (uint32_t)13U;
176 for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) {
177 uint32_t *os = uu____2;
178 uint8_t *bj = n + i * (uint32_t)4U;
179 uint32_t u = load32_le(bj);
180 uint32_t r = u;
181 uint32_t x = r;
182 os[i] = x;
183 }
184 for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
185 Lib_IntVector_Intrinsics_vec256 *os = ctx;
186 uint32_t x = ctx1[i];
187 Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x);
188 os[i] = x0;
189 }
190 Lib_IntVector_Intrinsics_vec256
191 ctr1 =
192 Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U,
193 (uint32_t)1U,
194 (uint32_t)2U,
195 (uint32_t)3U,
196 (uint32_t)4U,
197 (uint32_t)5U,
198 (uint32_t)6U,
199 (uint32_t)7U);
200 Lib_IntVector_Intrinsics_vec256 c12 = ctx[12U];
201 ctx[12U] = Lib_IntVector_Intrinsics_vec256_add32(c12, ctr1);
202 }
203
204 void
Hacl_Chacha20_Vec256_chacha20_encrypt_256(uint32_t len,uint8_t * out,uint8_t * text,uint8_t * key,uint8_t * n,uint32_t ctr)205 Hacl_Chacha20_Vec256_chacha20_encrypt_256(
206 uint32_t len,
207 uint8_t *out,
208 uint8_t *text,
209 uint8_t *key,
210 uint8_t *n,
211 uint32_t ctr)
212 {
213 Lib_IntVector_Intrinsics_vec256 ctx[16U];
214 for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
215 ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero;
216 chacha20_init_256(ctx, key, n, ctr);
217 uint32_t rem = len % (uint32_t)512U;
218 uint32_t nb = len / (uint32_t)512U;
219 uint32_t rem1 = len % (uint32_t)512U;
220 for (uint32_t i = (uint32_t)0U; i < nb; i++) {
221 uint8_t *uu____0 = out + i * (uint32_t)512U;
222 uint8_t *uu____1 = text + i * (uint32_t)512U;
223 Lib_IntVector_Intrinsics_vec256 k[16U];
224 for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
225 k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
226 chacha20_core_256(k, ctx, i);
227 Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
228 Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
229 Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
230 Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
231 Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
232 Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
233 Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
234 Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
235 Lib_IntVector_Intrinsics_vec256
236 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
237 Lib_IntVector_Intrinsics_vec256
238 v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
239 Lib_IntVector_Intrinsics_vec256
240 v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
241 Lib_IntVector_Intrinsics_vec256
242 v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
243 Lib_IntVector_Intrinsics_vec256
244 v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
245 Lib_IntVector_Intrinsics_vec256
246 v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
247 Lib_IntVector_Intrinsics_vec256
248 v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
249 Lib_IntVector_Intrinsics_vec256
250 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
251 Lib_IntVector_Intrinsics_vec256
252 v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
253 Lib_IntVector_Intrinsics_vec256
254 v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
255 Lib_IntVector_Intrinsics_vec256
256 v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
257 Lib_IntVector_Intrinsics_vec256
258 v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
259 Lib_IntVector_Intrinsics_vec256
260 v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
261 Lib_IntVector_Intrinsics_vec256
262 v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
263 Lib_IntVector_Intrinsics_vec256
264 v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
265 Lib_IntVector_Intrinsics_vec256
266 v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
267 Lib_IntVector_Intrinsics_vec256
268 v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
269 Lib_IntVector_Intrinsics_vec256
270 v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
271 Lib_IntVector_Intrinsics_vec256
272 v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
273 Lib_IntVector_Intrinsics_vec256
274 v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
275 Lib_IntVector_Intrinsics_vec256
276 v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
277 Lib_IntVector_Intrinsics_vec256
278 v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
279 Lib_IntVector_Intrinsics_vec256
280 v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
281 Lib_IntVector_Intrinsics_vec256
282 v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
283 Lib_IntVector_Intrinsics_vec256 v0 = v0___;
284 Lib_IntVector_Intrinsics_vec256 v1 = v2___;
285 Lib_IntVector_Intrinsics_vec256 v2 = v4___;
286 Lib_IntVector_Intrinsics_vec256 v3 = v6___;
287 Lib_IntVector_Intrinsics_vec256 v4 = v1___;
288 Lib_IntVector_Intrinsics_vec256 v5 = v3___;
289 Lib_IntVector_Intrinsics_vec256 v6 = v5___;
290 Lib_IntVector_Intrinsics_vec256 v7 = v7___;
291 Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
292 Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
293 Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
294 Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
295 Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
296 Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
297 Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
298 Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
299 Lib_IntVector_Intrinsics_vec256
300 v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
301 Lib_IntVector_Intrinsics_vec256
302 v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
303 Lib_IntVector_Intrinsics_vec256
304 v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
305 Lib_IntVector_Intrinsics_vec256
306 v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
307 Lib_IntVector_Intrinsics_vec256
308 v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
309 Lib_IntVector_Intrinsics_vec256
310 v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
311 Lib_IntVector_Intrinsics_vec256
312 v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
313 Lib_IntVector_Intrinsics_vec256
314 v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
315 Lib_IntVector_Intrinsics_vec256
316 v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
317 Lib_IntVector_Intrinsics_vec256
318 v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
319 Lib_IntVector_Intrinsics_vec256
320 v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
321 Lib_IntVector_Intrinsics_vec256
322 v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
323 Lib_IntVector_Intrinsics_vec256
324 v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
325 Lib_IntVector_Intrinsics_vec256
326 v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
327 Lib_IntVector_Intrinsics_vec256
328 v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
329 Lib_IntVector_Intrinsics_vec256
330 v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
331 Lib_IntVector_Intrinsics_vec256
332 v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
333 Lib_IntVector_Intrinsics_vec256
334 v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
335 Lib_IntVector_Intrinsics_vec256
336 v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
337 Lib_IntVector_Intrinsics_vec256
338 v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
339 Lib_IntVector_Intrinsics_vec256
340 v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
341 Lib_IntVector_Intrinsics_vec256
342 v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
343 Lib_IntVector_Intrinsics_vec256
344 v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
345 Lib_IntVector_Intrinsics_vec256
346 v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
347 Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
348 Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
349 Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
350 Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
351 Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
352 Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
353 Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
354 Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
355 k[0U] = v0;
356 k[1U] = v8;
357 k[2U] = v1;
358 k[3U] = v9;
359 k[4U] = v2;
360 k[5U] = v10;
361 k[6U] = v3;
362 k[7U] = v11;
363 k[8U] = v4;
364 k[9U] = v12;
365 k[10U] = v5;
366 k[11U] = v13;
367 k[12U] = v6;
368 k[13U] = v14;
369 k[14U] = v7;
370 k[15U] = v15;
371 for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) {
372 Lib_IntVector_Intrinsics_vec256
373 x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U);
374 Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]);
375 Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);
376 }
377 }
378 if (rem1 > (uint32_t)0U) {
379 uint8_t *uu____2 = out + nb * (uint32_t)512U;
380 uint8_t *uu____3 = text + nb * (uint32_t)512U;
381 uint8_t plain[512U] = { 0U };
382 memcpy(plain, uu____3, rem * sizeof(uint8_t));
383 Lib_IntVector_Intrinsics_vec256 k[16U];
384 for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
385 k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
386 chacha20_core_256(k, ctx, nb);
387 Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
388 Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
389 Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
390 Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
391 Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
392 Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
393 Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
394 Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
395 Lib_IntVector_Intrinsics_vec256
396 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
397 Lib_IntVector_Intrinsics_vec256
398 v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
399 Lib_IntVector_Intrinsics_vec256
400 v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
401 Lib_IntVector_Intrinsics_vec256
402 v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
403 Lib_IntVector_Intrinsics_vec256
404 v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
405 Lib_IntVector_Intrinsics_vec256
406 v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
407 Lib_IntVector_Intrinsics_vec256
408 v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
409 Lib_IntVector_Intrinsics_vec256
410 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
411 Lib_IntVector_Intrinsics_vec256
412 v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
413 Lib_IntVector_Intrinsics_vec256
414 v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
415 Lib_IntVector_Intrinsics_vec256
416 v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
417 Lib_IntVector_Intrinsics_vec256
418 v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
419 Lib_IntVector_Intrinsics_vec256
420 v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
421 Lib_IntVector_Intrinsics_vec256
422 v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
423 Lib_IntVector_Intrinsics_vec256
424 v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
425 Lib_IntVector_Intrinsics_vec256
426 v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
427 Lib_IntVector_Intrinsics_vec256
428 v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
429 Lib_IntVector_Intrinsics_vec256
430 v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
431 Lib_IntVector_Intrinsics_vec256
432 v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
433 Lib_IntVector_Intrinsics_vec256
434 v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
435 Lib_IntVector_Intrinsics_vec256
436 v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
437 Lib_IntVector_Intrinsics_vec256
438 v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
439 Lib_IntVector_Intrinsics_vec256
440 v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
441 Lib_IntVector_Intrinsics_vec256
442 v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
443 Lib_IntVector_Intrinsics_vec256 v0 = v0___;
444 Lib_IntVector_Intrinsics_vec256 v1 = v2___;
445 Lib_IntVector_Intrinsics_vec256 v2 = v4___;
446 Lib_IntVector_Intrinsics_vec256 v3 = v6___;
447 Lib_IntVector_Intrinsics_vec256 v4 = v1___;
448 Lib_IntVector_Intrinsics_vec256 v5 = v3___;
449 Lib_IntVector_Intrinsics_vec256 v6 = v5___;
450 Lib_IntVector_Intrinsics_vec256 v7 = v7___;
451 Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
452 Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
453 Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
454 Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
455 Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
456 Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
457 Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
458 Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
459 Lib_IntVector_Intrinsics_vec256
460 v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
461 Lib_IntVector_Intrinsics_vec256
462 v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
463 Lib_IntVector_Intrinsics_vec256
464 v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
465 Lib_IntVector_Intrinsics_vec256
466 v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
467 Lib_IntVector_Intrinsics_vec256
468 v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
469 Lib_IntVector_Intrinsics_vec256
470 v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
471 Lib_IntVector_Intrinsics_vec256
472 v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
473 Lib_IntVector_Intrinsics_vec256
474 v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
475 Lib_IntVector_Intrinsics_vec256
476 v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
477 Lib_IntVector_Intrinsics_vec256
478 v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
479 Lib_IntVector_Intrinsics_vec256
480 v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
481 Lib_IntVector_Intrinsics_vec256
482 v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
483 Lib_IntVector_Intrinsics_vec256
484 v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
485 Lib_IntVector_Intrinsics_vec256
486 v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
487 Lib_IntVector_Intrinsics_vec256
488 v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
489 Lib_IntVector_Intrinsics_vec256
490 v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
491 Lib_IntVector_Intrinsics_vec256
492 v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
493 Lib_IntVector_Intrinsics_vec256
494 v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
495 Lib_IntVector_Intrinsics_vec256
496 v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
497 Lib_IntVector_Intrinsics_vec256
498 v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
499 Lib_IntVector_Intrinsics_vec256
500 v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
501 Lib_IntVector_Intrinsics_vec256
502 v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
503 Lib_IntVector_Intrinsics_vec256
504 v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
505 Lib_IntVector_Intrinsics_vec256
506 v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
507 Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
508 Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
509 Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
510 Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
511 Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
512 Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
513 Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
514 Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
515 k[0U] = v0;
516 k[1U] = v8;
517 k[2U] = v1;
518 k[3U] = v9;
519 k[4U] = v2;
520 k[5U] = v10;
521 k[6U] = v3;
522 k[7U] = v11;
523 k[8U] = v4;
524 k[9U] = v12;
525 k[10U] = v5;
526 k[11U] = v13;
527 k[12U] = v6;
528 k[13U] = v14;
529 k[14U] = v7;
530 k[15U] = v15;
531 for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
532 Lib_IntVector_Intrinsics_vec256
533 x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U);
534 Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]);
535 Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);
536 }
537 memcpy(uu____2, plain, rem * sizeof(uint8_t));
538 }
539 }
540
541 void
Hacl_Chacha20_Vec256_chacha20_decrypt_256(uint32_t len,uint8_t * out,uint8_t * cipher,uint8_t * key,uint8_t * n,uint32_t ctr)542 Hacl_Chacha20_Vec256_chacha20_decrypt_256(
543 uint32_t len,
544 uint8_t *out,
545 uint8_t *cipher,
546 uint8_t *key,
547 uint8_t *n,
548 uint32_t ctr)
549 {
550 Lib_IntVector_Intrinsics_vec256 ctx[16U];
551 for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
552 ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero;
553 chacha20_init_256(ctx, key, n, ctr);
554 uint32_t rem = len % (uint32_t)512U;
555 uint32_t nb = len / (uint32_t)512U;
556 uint32_t rem1 = len % (uint32_t)512U;
557 for (uint32_t i = (uint32_t)0U; i < nb; i++) {
558 uint8_t *uu____0 = out + i * (uint32_t)512U;
559 uint8_t *uu____1 = cipher + i * (uint32_t)512U;
560 Lib_IntVector_Intrinsics_vec256 k[16U];
561 for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
562 k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
563 chacha20_core_256(k, ctx, i);
564 Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
565 Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
566 Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
567 Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
568 Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
569 Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
570 Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
571 Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
572 Lib_IntVector_Intrinsics_vec256
573 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
574 Lib_IntVector_Intrinsics_vec256
575 v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
576 Lib_IntVector_Intrinsics_vec256
577 v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
578 Lib_IntVector_Intrinsics_vec256
579 v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
580 Lib_IntVector_Intrinsics_vec256
581 v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
582 Lib_IntVector_Intrinsics_vec256
583 v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
584 Lib_IntVector_Intrinsics_vec256
585 v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
586 Lib_IntVector_Intrinsics_vec256
587 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
588 Lib_IntVector_Intrinsics_vec256
589 v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
590 Lib_IntVector_Intrinsics_vec256
591 v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
592 Lib_IntVector_Intrinsics_vec256
593 v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
594 Lib_IntVector_Intrinsics_vec256
595 v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
596 Lib_IntVector_Intrinsics_vec256
597 v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
598 Lib_IntVector_Intrinsics_vec256
599 v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
600 Lib_IntVector_Intrinsics_vec256
601 v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
602 Lib_IntVector_Intrinsics_vec256
603 v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
604 Lib_IntVector_Intrinsics_vec256
605 v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
606 Lib_IntVector_Intrinsics_vec256
607 v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
608 Lib_IntVector_Intrinsics_vec256
609 v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
610 Lib_IntVector_Intrinsics_vec256
611 v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
612 Lib_IntVector_Intrinsics_vec256
613 v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
614 Lib_IntVector_Intrinsics_vec256
615 v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
616 Lib_IntVector_Intrinsics_vec256
617 v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
618 Lib_IntVector_Intrinsics_vec256
619 v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
620 Lib_IntVector_Intrinsics_vec256 v0 = v0___;
621 Lib_IntVector_Intrinsics_vec256 v1 = v2___;
622 Lib_IntVector_Intrinsics_vec256 v2 = v4___;
623 Lib_IntVector_Intrinsics_vec256 v3 = v6___;
624 Lib_IntVector_Intrinsics_vec256 v4 = v1___;
625 Lib_IntVector_Intrinsics_vec256 v5 = v3___;
626 Lib_IntVector_Intrinsics_vec256 v6 = v5___;
627 Lib_IntVector_Intrinsics_vec256 v7 = v7___;
628 Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
629 Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
630 Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
631 Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
632 Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
633 Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
634 Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
635 Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
636 Lib_IntVector_Intrinsics_vec256
637 v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
638 Lib_IntVector_Intrinsics_vec256
639 v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
640 Lib_IntVector_Intrinsics_vec256
641 v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
642 Lib_IntVector_Intrinsics_vec256
643 v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
644 Lib_IntVector_Intrinsics_vec256
645 v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
646 Lib_IntVector_Intrinsics_vec256
647 v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
648 Lib_IntVector_Intrinsics_vec256
649 v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
650 Lib_IntVector_Intrinsics_vec256
651 v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
652 Lib_IntVector_Intrinsics_vec256
653 v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
654 Lib_IntVector_Intrinsics_vec256
655 v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
656 Lib_IntVector_Intrinsics_vec256
657 v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
658 Lib_IntVector_Intrinsics_vec256
659 v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
660 Lib_IntVector_Intrinsics_vec256
661 v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
662 Lib_IntVector_Intrinsics_vec256
663 v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
664 Lib_IntVector_Intrinsics_vec256
665 v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
666 Lib_IntVector_Intrinsics_vec256
667 v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
668 Lib_IntVector_Intrinsics_vec256
669 v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
670 Lib_IntVector_Intrinsics_vec256
671 v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
672 Lib_IntVector_Intrinsics_vec256
673 v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
674 Lib_IntVector_Intrinsics_vec256
675 v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
676 Lib_IntVector_Intrinsics_vec256
677 v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
678 Lib_IntVector_Intrinsics_vec256
679 v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
680 Lib_IntVector_Intrinsics_vec256
681 v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
682 Lib_IntVector_Intrinsics_vec256
683 v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
684 Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
685 Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
686 Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
687 Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
688 Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
689 Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
690 Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
691 Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
692 k[0U] = v0;
693 k[1U] = v8;
694 k[2U] = v1;
695 k[3U] = v9;
696 k[4U] = v2;
697 k[5U] = v10;
698 k[6U] = v3;
699 k[7U] = v11;
700 k[8U] = v4;
701 k[9U] = v12;
702 k[10U] = v5;
703 k[11U] = v13;
704 k[12U] = v6;
705 k[13U] = v14;
706 k[14U] = v7;
707 k[15U] = v15;
708 for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) {
709 Lib_IntVector_Intrinsics_vec256
710 x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U);
711 Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]);
712 Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);
713 }
714 }
715 if (rem1 > (uint32_t)0U) {
716 uint8_t *uu____2 = out + nb * (uint32_t)512U;
717 uint8_t *uu____3 = cipher + nb * (uint32_t)512U;
718 uint8_t plain[512U] = { 0U };
719 memcpy(plain, uu____3, rem * sizeof(uint8_t));
720 Lib_IntVector_Intrinsics_vec256 k[16U];
721 for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
722 k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
723 chacha20_core_256(k, ctx, nb);
724 Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
725 Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
726 Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
727 Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
728 Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
729 Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
730 Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
731 Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
732 Lib_IntVector_Intrinsics_vec256
733 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
734 Lib_IntVector_Intrinsics_vec256
735 v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
736 Lib_IntVector_Intrinsics_vec256
737 v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
738 Lib_IntVector_Intrinsics_vec256
739 v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
740 Lib_IntVector_Intrinsics_vec256
741 v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
742 Lib_IntVector_Intrinsics_vec256
743 v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
744 Lib_IntVector_Intrinsics_vec256
745 v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
746 Lib_IntVector_Intrinsics_vec256
747 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
748 Lib_IntVector_Intrinsics_vec256
749 v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
750 Lib_IntVector_Intrinsics_vec256
751 v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
752 Lib_IntVector_Intrinsics_vec256
753 v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
754 Lib_IntVector_Intrinsics_vec256
755 v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
756 Lib_IntVector_Intrinsics_vec256
757 v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
758 Lib_IntVector_Intrinsics_vec256
759 v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
760 Lib_IntVector_Intrinsics_vec256
761 v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
762 Lib_IntVector_Intrinsics_vec256
763 v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
764 Lib_IntVector_Intrinsics_vec256
765 v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
766 Lib_IntVector_Intrinsics_vec256
767 v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
768 Lib_IntVector_Intrinsics_vec256
769 v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
770 Lib_IntVector_Intrinsics_vec256
771 v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
772 Lib_IntVector_Intrinsics_vec256
773 v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
774 Lib_IntVector_Intrinsics_vec256
775 v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
776 Lib_IntVector_Intrinsics_vec256
777 v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
778 Lib_IntVector_Intrinsics_vec256
779 v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
780 Lib_IntVector_Intrinsics_vec256 v0 = v0___;
781 Lib_IntVector_Intrinsics_vec256 v1 = v2___;
782 Lib_IntVector_Intrinsics_vec256 v2 = v4___;
783 Lib_IntVector_Intrinsics_vec256 v3 = v6___;
784 Lib_IntVector_Intrinsics_vec256 v4 = v1___;
785 Lib_IntVector_Intrinsics_vec256 v5 = v3___;
786 Lib_IntVector_Intrinsics_vec256 v6 = v5___;
787 Lib_IntVector_Intrinsics_vec256 v7 = v7___;
788 Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
789 Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
790 Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
791 Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
792 Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
793 Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
794 Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
795 Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
796 Lib_IntVector_Intrinsics_vec256
797 v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
798 Lib_IntVector_Intrinsics_vec256
799 v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
800 Lib_IntVector_Intrinsics_vec256
801 v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
802 Lib_IntVector_Intrinsics_vec256
803 v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
804 Lib_IntVector_Intrinsics_vec256
805 v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
806 Lib_IntVector_Intrinsics_vec256
807 v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
808 Lib_IntVector_Intrinsics_vec256
809 v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
810 Lib_IntVector_Intrinsics_vec256
811 v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
812 Lib_IntVector_Intrinsics_vec256
813 v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
814 Lib_IntVector_Intrinsics_vec256
815 v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
816 Lib_IntVector_Intrinsics_vec256
817 v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
818 Lib_IntVector_Intrinsics_vec256
819 v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
820 Lib_IntVector_Intrinsics_vec256
821 v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
822 Lib_IntVector_Intrinsics_vec256
823 v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
824 Lib_IntVector_Intrinsics_vec256
825 v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
826 Lib_IntVector_Intrinsics_vec256
827 v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
828 Lib_IntVector_Intrinsics_vec256
829 v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
830 Lib_IntVector_Intrinsics_vec256
831 v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
832 Lib_IntVector_Intrinsics_vec256
833 v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
834 Lib_IntVector_Intrinsics_vec256
835 v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
836 Lib_IntVector_Intrinsics_vec256
837 v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
838 Lib_IntVector_Intrinsics_vec256
839 v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
840 Lib_IntVector_Intrinsics_vec256
841 v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
842 Lib_IntVector_Intrinsics_vec256
843 v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
844 Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
845 Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
846 Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
847 Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
848 Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
849 Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
850 Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
851 Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
852 k[0U] = v0;
853 k[1U] = v8;
854 k[2U] = v1;
855 k[3U] = v9;
856 k[4U] = v2;
857 k[5U] = v10;
858 k[6U] = v3;
859 k[7U] = v11;
860 k[8U] = v4;
861 k[9U] = v12;
862 k[10U] = v5;
863 k[11U] = v13;
864 k[12U] = v6;
865 k[13U] = v14;
866 k[14U] = v7;
867 k[15U] = v15;
868 for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
869 Lib_IntVector_Intrinsics_vec256
870 x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U);
871 Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]);
872 Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);
873 }
874 memcpy(uu____2, plain, rem * sizeof(uint8_t));
875 }
876 }
877