1 /*
2  * crypto_helper.c - emulate v8 Crypto Extensions instructions
3  *
4  * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  */
11 
12 #include "qemu/osdep.h"
13 
14 #include "cpu.h"
15 #include "exec/helper-proto.h"
16 #include "crypto/aes.h"
17 
18 union CRYPTO_STATE {
19     uint8_t    bytes[16];
20     uint32_t   words[4];
21     uint64_t   l[2];
22 };
23 
24 #ifdef HOST_WORDS_BIGENDIAN
25 #define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
26 #define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
27 #else
28 #define CR_ST_BYTE(state, i)   (state.bytes[i])
29 #define CR_ST_WORD(state, i)   (state.words[i])
30 #endif
31 
HELPER(crypto_aese)32 void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
33 {
34     static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
35     static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
36     uint64_t *rd = vd;
37     uint64_t *rm = vm;
38     union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
39     union CRYPTO_STATE st = { .l = { rd[0], rd[1] } };
40     int i;
41 
42     assert(decrypt < 2);
43 
44     /* xor state vector with round key */
45     rk.l[0] ^= st.l[0];
46     rk.l[1] ^= st.l[1];
47 
48     /* combine ShiftRows operation and sbox substitution */
49     for (i = 0; i < 16; i++) {
50         CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
51     }
52 
53     rd[0] = st.l[0];
54     rd[1] = st.l[1];
55 }
56 
HELPER(crypto_aesmc)57 void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
58 {
59     static uint32_t const mc[][256] = { {
60         /* MixColumns lookup table */
61         0x00000000, 0x03010102, 0x06020204, 0x05030306,
62         0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
63         0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
64         0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
65         0x30101020, 0x33111122, 0x36121224, 0x35131326,
66         0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
67         0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
68         0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
69         0x60202040, 0x63212142, 0x66222244, 0x65232346,
70         0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
71         0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
72         0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
73         0x50303060, 0x53313162, 0x56323264, 0x55333366,
74         0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
75         0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
76         0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
77         0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
78         0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
79         0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
80         0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
81         0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
82         0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
83         0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
84         0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
85         0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
86         0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
87         0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
88         0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
89         0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
90         0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
91         0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
92         0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
93         0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
94         0x97848413, 0x94858511, 0x91868617, 0x92878715,
95         0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
96         0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
97         0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
98         0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
99         0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
100         0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
101         0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
102         0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
103         0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
104         0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
105         0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
106         0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
107         0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
108         0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
109         0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
110         0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
111         0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
112         0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
113         0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
114         0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
115         0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
116         0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
117         0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
118         0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
119         0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
120         0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
121         0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
122         0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
123         0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
124         0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
125     }, {
126         /* Inverse MixColumns lookup table */
127         0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
128         0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
129         0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
130         0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
131         0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
132         0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
133         0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
134         0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
135         0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
136         0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
137         0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
138         0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
139         0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
140         0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
141         0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
142         0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
143         0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
144         0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
145         0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
146         0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
147         0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
148         0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
149         0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
150         0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
151         0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
152         0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
153         0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
154         0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
155         0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
156         0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
157         0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
158         0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
159         0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
160         0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
161         0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
162         0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
163         0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
164         0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
165         0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
166         0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
167         0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
168         0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
169         0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
170         0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
171         0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
172         0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
173         0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
174         0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
175         0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
176         0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
177         0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
178         0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
179         0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
180         0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
181         0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
182         0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
183         0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
184         0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
185         0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
186         0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
187         0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
188         0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
189         0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
190         0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
191     } };
192 
193     uint64_t *rd = vd;
194     uint64_t *rm = vm;
195     union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
196     int i;
197 
198     assert(decrypt < 2);
199 
200     for (i = 0; i < 16; i += 4) {
201         CR_ST_WORD(st, i >> 2) =
202             mc[decrypt][CR_ST_BYTE(st, i)] ^
203             rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
204             rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
205             rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
206     }
207 
208     rd[0] = st.l[0];
209     rd[1] = st.l[1];
210 }
211 
212 /*
213  * SHA-1 logical functions
214  */
215 
cho(uint32_t x,uint32_t y,uint32_t z)216 static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
217 {
218     return (x & (y ^ z)) ^ z;
219 }
220 
par(uint32_t x,uint32_t y,uint32_t z)221 static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
222 {
223     return x ^ y ^ z;
224 }
225 
maj(uint32_t x,uint32_t y,uint32_t z)226 static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
227 {
228     return (x & y) | ((x | y) & z);
229 }
230 
HELPER(crypto_sha1_3reg)231 void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
232 {
233     uint64_t *rd = vd;
234     uint64_t *rn = vn;
235     uint64_t *rm = vm;
236     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
237     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
238     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
239 
240     if (op == 3) { /* sha1su0 */
241         d.l[0] ^= d.l[1] ^ m.l[0];
242         d.l[1] ^= n.l[0] ^ m.l[1];
243     } else {
244         int i;
245 
246         for (i = 0; i < 4; i++) {
247             uint32_t t;
248 
249             switch (op) {
250             case 0: /* sha1c */
251                 t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
252                 break;
253             case 1: /* sha1p */
254                 t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
255                 break;
256             case 2: /* sha1m */
257                 t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
258                 break;
259             default:
260                 g_assert_not_reached();
261             }
262             t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
263                  + CR_ST_WORD(m, i);
264 
265             CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
266             CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
267             CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
268             CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
269             CR_ST_WORD(d, 0) = t;
270         }
271     }
272     rd[0] = d.l[0];
273     rd[1] = d.l[1];
274 }
275 
HELPER(crypto_sha1h)276 void HELPER(crypto_sha1h)(void *vd, void *vm)
277 {
278     uint64_t *rd = vd;
279     uint64_t *rm = vm;
280     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
281 
282     CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
283     CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
284 
285     rd[0] = m.l[0];
286     rd[1] = m.l[1];
287 }
288 
HELPER(crypto_sha1su1)289 void HELPER(crypto_sha1su1)(void *vd, void *vm)
290 {
291     uint64_t *rd = vd;
292     uint64_t *rm = vm;
293     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
294     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
295 
296     CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
297     CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
298     CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
299     CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
300 
301     rd[0] = d.l[0];
302     rd[1] = d.l[1];
303 }
304 
305 /*
306  * The SHA-256 logical functions, according to
307  * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
308  */
309 
S0(uint32_t x)310 static uint32_t S0(uint32_t x)
311 {
312     return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
313 }
314 
S1(uint32_t x)315 static uint32_t S1(uint32_t x)
316 {
317     return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
318 }
319 
s0(uint32_t x)320 static uint32_t s0(uint32_t x)
321 {
322     return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
323 }
324 
s1(uint32_t x)325 static uint32_t s1(uint32_t x)
326 {
327     return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
328 }
329 
HELPER(crypto_sha256h)330 void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
331 {
332     uint64_t *rd = vd;
333     uint64_t *rn = vn;
334     uint64_t *rm = vm;
335     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
336     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
337     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
338     int i;
339 
340     for (i = 0; i < 4; i++) {
341         uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
342                      + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
343                      + CR_ST_WORD(m, i);
344 
345         CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
346         CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
347         CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
348         CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
349 
350         t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
351              + S0(CR_ST_WORD(d, 0));
352 
353         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
354         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
355         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
356         CR_ST_WORD(d, 0) = t;
357     }
358 
359     rd[0] = d.l[0];
360     rd[1] = d.l[1];
361 }
362 
HELPER(crypto_sha256h2)363 void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
364 {
365     uint64_t *rd = vd;
366     uint64_t *rn = vn;
367     uint64_t *rm = vm;
368     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
369     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
370     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
371     int i;
372 
373     for (i = 0; i < 4; i++) {
374         uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
375                      + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
376                      + CR_ST_WORD(m, i);
377 
378         CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
379         CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
380         CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
381         CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
382     }
383 
384     rd[0] = d.l[0];
385     rd[1] = d.l[1];
386 }
387 
HELPER(crypto_sha256su0)388 void HELPER(crypto_sha256su0)(void *vd, void *vm)
389 {
390     uint64_t *rd = vd;
391     uint64_t *rm = vm;
392     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
393     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
394 
395     CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
396     CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
397     CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
398     CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
399 
400     rd[0] = d.l[0];
401     rd[1] = d.l[1];
402 }
403 
HELPER(crypto_sha256su1)404 void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
405 {
406     uint64_t *rd = vd;
407     uint64_t *rn = vn;
408     uint64_t *rm = vm;
409     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
410     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
411     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
412 
413     CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
414     CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
415     CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
416     CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
417 
418     rd[0] = d.l[0];
419     rd[1] = d.l[1];
420 }
421 
422 /*
423  * The SHA-512 logical functions (same as above but using 64-bit operands)
424  */
425 
cho512(uint64_t x,uint64_t y,uint64_t z)426 static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
427 {
428     return (x & (y ^ z)) ^ z;
429 }
430 
maj512(uint64_t x,uint64_t y,uint64_t z)431 static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
432 {
433     return (x & y) | ((x | y) & z);
434 }
435 
S0_512(uint64_t x)436 static uint64_t S0_512(uint64_t x)
437 {
438     return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
439 }
440 
S1_512(uint64_t x)441 static uint64_t S1_512(uint64_t x)
442 {
443     return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
444 }
445 
s0_512(uint64_t x)446 static uint64_t s0_512(uint64_t x)
447 {
448     return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
449 }
450 
s1_512(uint64_t x)451 static uint64_t s1_512(uint64_t x)
452 {
453     return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
454 }
455 
HELPER(crypto_sha512h)456 void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
457 {
458     uint64_t *rd = vd;
459     uint64_t *rn = vn;
460     uint64_t *rm = vm;
461     uint64_t d0 = rd[0];
462     uint64_t d1 = rd[1];
463 
464     d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
465     d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
466 
467     rd[0] = d0;
468     rd[1] = d1;
469 }
470 
HELPER(crypto_sha512h2)471 void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
472 {
473     uint64_t *rd = vd;
474     uint64_t *rn = vn;
475     uint64_t *rm = vm;
476     uint64_t d0 = rd[0];
477     uint64_t d1 = rd[1];
478 
479     d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
480     d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
481 
482     rd[0] = d0;
483     rd[1] = d1;
484 }
485 
HELPER(crypto_sha512su0)486 void HELPER(crypto_sha512su0)(void *vd, void *vn)
487 {
488     uint64_t *rd = vd;
489     uint64_t *rn = vn;
490     uint64_t d0 = rd[0];
491     uint64_t d1 = rd[1];
492 
493     d0 += s0_512(rd[1]);
494     d1 += s0_512(rn[0]);
495 
496     rd[0] = d0;
497     rd[1] = d1;
498 }
499 
HELPER(crypto_sha512su1)500 void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
501 {
502     uint64_t *rd = vd;
503     uint64_t *rn = vn;
504     uint64_t *rm = vm;
505 
506     rd[0] += s1_512(rn[0]) + rm[0];
507     rd[1] += s1_512(rn[1]) + rm[1];
508 }
509 
HELPER(crypto_sm3partw1)510 void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
511 {
512     uint64_t *rd = vd;
513     uint64_t *rn = vn;
514     uint64_t *rm = vm;
515     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
516     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
517     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
518     uint32_t t;
519 
520     t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
521     CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
522 
523     t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
524     CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
525 
526     t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
527     CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
528 
529     t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
530     CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
531 
532     rd[0] = d.l[0];
533     rd[1] = d.l[1];
534 }
535 
HELPER(crypto_sm3partw2)536 void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
537 {
538     uint64_t *rd = vd;
539     uint64_t *rn = vn;
540     uint64_t *rm = vm;
541     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
542     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
543     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
544     uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
545 
546     CR_ST_WORD(d, 0) ^= t;
547     CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
548     CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
549     CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
550                         ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
551 
552     rd[0] = d.l[0];
553     rd[1] = d.l[1];
554 }
555 
HELPER(crypto_sm3tt)556 void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
557                           uint32_t opcode)
558 {
559     uint64_t *rd = vd;
560     uint64_t *rn = vn;
561     uint64_t *rm = vm;
562     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
563     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
564     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
565     uint32_t t;
566 
567     assert(imm2 < 4);
568 
569     if (opcode == 0 || opcode == 2) {
570         /* SM3TT1A, SM3TT2A */
571         t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
572     } else if (opcode == 1) {
573         /* SM3TT1B */
574         t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
575     } else if (opcode == 3) {
576         /* SM3TT2B */
577         t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
578     } else {
579         g_assert_not_reached();
580     }
581 
582     t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
583 
584     CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
585 
586     if (opcode < 2) {
587         /* SM3TT1A, SM3TT1B */
588         t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
589 
590         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
591     } else {
592         /* SM3TT2A, SM3TT2B */
593         t += CR_ST_WORD(n, 3);
594         t ^= rol32(t, 9) ^ rol32(t, 17);
595 
596         CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
597     }
598 
599     CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
600     CR_ST_WORD(d, 3) = t;
601 
602     rd[0] = d.l[0];
603     rd[1] = d.l[1];
604 }
605 
606 static uint8_t const sm4_sbox[] = {
607     0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
608     0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
609     0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
610     0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
611     0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
612     0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
613     0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
614     0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
615     0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
616     0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
617     0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
618     0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
619     0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
620     0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
621     0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
622     0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
623     0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
624     0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
625     0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
626     0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
627     0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
628     0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
629     0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
630     0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
631     0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
632     0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
633     0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
634     0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
635     0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
636     0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
637     0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
638     0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
639 };
640 
HELPER(crypto_sm4e)641 void HELPER(crypto_sm4e)(void *vd, void *vn)
642 {
643     uint64_t *rd = vd;
644     uint64_t *rn = vn;
645     union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
646     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
647     uint32_t t, i;
648 
649     for (i = 0; i < 4; i++) {
650         t = CR_ST_WORD(d, (i + 1) % 4) ^
651             CR_ST_WORD(d, (i + 2) % 4) ^
652             CR_ST_WORD(d, (i + 3) % 4) ^
653             CR_ST_WORD(n, i);
654 
655         t = sm4_sbox[t & 0xff] |
656             sm4_sbox[(t >> 8) & 0xff] << 8 |
657             sm4_sbox[(t >> 16) & 0xff] << 16 |
658             sm4_sbox[(t >> 24) & 0xff] << 24;
659 
660         CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
661                             rol32(t, 24);
662     }
663 
664     rd[0] = d.l[0];
665     rd[1] = d.l[1];
666 }
667 
HELPER(crypto_sm4ekey)668 void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
669 {
670     uint64_t *rd = vd;
671     uint64_t *rn = vn;
672     uint64_t *rm = vm;
673     union CRYPTO_STATE d;
674     union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
675     union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
676     uint32_t t, i;
677 
678     d = n;
679     for (i = 0; i < 4; i++) {
680         t = CR_ST_WORD(d, (i + 1) % 4) ^
681             CR_ST_WORD(d, (i + 2) % 4) ^
682             CR_ST_WORD(d, (i + 3) % 4) ^
683             CR_ST_WORD(m, i);
684 
685         t = sm4_sbox[t & 0xff] |
686             sm4_sbox[(t >> 8) & 0xff] << 8 |
687             sm4_sbox[(t >> 16) & 0xff] << 16 |
688             sm4_sbox[(t >> 24) & 0xff] << 24;
689 
690         CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
691     }
692 
693     rd[0] = d.l[0];
694     rd[1] = d.l[1];
695 }
696