xref: /qemu/host/include/i386/host/crypto/aes-round.h (revision d6a24436)
1*d6a24436SRichard Henderson /*
2*d6a24436SRichard Henderson  * x86 specific aes acceleration.
3*d6a24436SRichard Henderson  * SPDX-License-Identifier: GPL-2.0-or-later
4*d6a24436SRichard Henderson  */
5*d6a24436SRichard Henderson 
6*d6a24436SRichard Henderson #ifndef X86_HOST_CRYPTO_AES_ROUND_H
7*d6a24436SRichard Henderson #define X86_HOST_CRYPTO_AES_ROUND_H
8*d6a24436SRichard Henderson 
9*d6a24436SRichard Henderson #include "host/cpuinfo.h"
10*d6a24436SRichard Henderson #include <immintrin.h>
11*d6a24436SRichard Henderson 
12*d6a24436SRichard Henderson #if defined(__AES__) && defined(__SSSE3__)
13*d6a24436SRichard Henderson # define HAVE_AES_ACCEL  true
14*d6a24436SRichard Henderson # define ATTR_AES_ACCEL
15*d6a24436SRichard Henderson #else
16*d6a24436SRichard Henderson # define HAVE_AES_ACCEL  likely(cpuinfo & CPUINFO_AES)
17*d6a24436SRichard Henderson # define ATTR_AES_ACCEL  __attribute__((target("aes,ssse3")))
18*d6a24436SRichard Henderson #endif
19*d6a24436SRichard Henderson 
20*d6a24436SRichard Henderson static inline __m128i ATTR_AES_ACCEL
aes_accel_bswap(__m128i x)21*d6a24436SRichard Henderson aes_accel_bswap(__m128i x)
22*d6a24436SRichard Henderson {
23*d6a24436SRichard Henderson     return _mm_shuffle_epi8(x, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8,
24*d6a24436SRichard Henderson                                             9, 10, 11, 12, 13, 14, 15));
25*d6a24436SRichard Henderson }
26*d6a24436SRichard Henderson 
27*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesenc_MC_accel(AESState * ret,const AESState * st,bool be)28*d6a24436SRichard Henderson aesenc_MC_accel(AESState *ret, const AESState *st, bool be)
29*d6a24436SRichard Henderson {
30*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
31*d6a24436SRichard Henderson     __m128i z = _mm_setzero_si128();
32*d6a24436SRichard Henderson 
33*d6a24436SRichard Henderson     if (be) {
34*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
35*d6a24436SRichard Henderson         t = _mm_aesdeclast_si128(t, z);
36*d6a24436SRichard Henderson         t = _mm_aesenc_si128(t, z);
37*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
38*d6a24436SRichard Henderson     } else {
39*d6a24436SRichard Henderson         t = _mm_aesdeclast_si128(t, z);
40*d6a24436SRichard Henderson         t = _mm_aesenc_si128(t, z);
41*d6a24436SRichard Henderson     }
42*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
43*d6a24436SRichard Henderson }
44*d6a24436SRichard Henderson 
45*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesenc_SB_SR_AK_accel(AESState * ret,const AESState * st,const AESState * rk,bool be)46*d6a24436SRichard Henderson aesenc_SB_SR_AK_accel(AESState *ret, const AESState *st,
47*d6a24436SRichard Henderson                       const AESState *rk, bool be)
48*d6a24436SRichard Henderson {
49*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
50*d6a24436SRichard Henderson     __m128i k = (__m128i)rk->v;
51*d6a24436SRichard Henderson 
52*d6a24436SRichard Henderson     if (be) {
53*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
54*d6a24436SRichard Henderson         k = aes_accel_bswap(k);
55*d6a24436SRichard Henderson         t = _mm_aesenclast_si128(t, k);
56*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
57*d6a24436SRichard Henderson     } else {
58*d6a24436SRichard Henderson         t = _mm_aesenclast_si128(t, k);
59*d6a24436SRichard Henderson     }
60*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
61*d6a24436SRichard Henderson }
62*d6a24436SRichard Henderson 
63*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesenc_SB_SR_MC_AK_accel(AESState * ret,const AESState * st,const AESState * rk,bool be)64*d6a24436SRichard Henderson aesenc_SB_SR_MC_AK_accel(AESState *ret, const AESState *st,
65*d6a24436SRichard Henderson                          const AESState *rk, bool be)
66*d6a24436SRichard Henderson {
67*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
68*d6a24436SRichard Henderson     __m128i k = (__m128i)rk->v;
69*d6a24436SRichard Henderson 
70*d6a24436SRichard Henderson     if (be) {
71*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
72*d6a24436SRichard Henderson         k = aes_accel_bswap(k);
73*d6a24436SRichard Henderson         t = _mm_aesenc_si128(t, k);
74*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
75*d6a24436SRichard Henderson     } else {
76*d6a24436SRichard Henderson         t = _mm_aesenc_si128(t, k);
77*d6a24436SRichard Henderson     }
78*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
79*d6a24436SRichard Henderson }
80*d6a24436SRichard Henderson 
81*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesdec_IMC_accel(AESState * ret,const AESState * st,bool be)82*d6a24436SRichard Henderson aesdec_IMC_accel(AESState *ret, const AESState *st, bool be)
83*d6a24436SRichard Henderson {
84*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
85*d6a24436SRichard Henderson 
86*d6a24436SRichard Henderson     if (be) {
87*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
88*d6a24436SRichard Henderson         t = _mm_aesimc_si128(t);
89*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
90*d6a24436SRichard Henderson     } else {
91*d6a24436SRichard Henderson         t = _mm_aesimc_si128(t);
92*d6a24436SRichard Henderson     }
93*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
94*d6a24436SRichard Henderson }
95*d6a24436SRichard Henderson 
96*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesdec_ISB_ISR_AK_accel(AESState * ret,const AESState * st,const AESState * rk,bool be)97*d6a24436SRichard Henderson aesdec_ISB_ISR_AK_accel(AESState *ret, const AESState *st,
98*d6a24436SRichard Henderson                         const AESState *rk, bool be)
99*d6a24436SRichard Henderson {
100*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
101*d6a24436SRichard Henderson     __m128i k = (__m128i)rk->v;
102*d6a24436SRichard Henderson 
103*d6a24436SRichard Henderson     if (be) {
104*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
105*d6a24436SRichard Henderson         k = aes_accel_bswap(k);
106*d6a24436SRichard Henderson         t = _mm_aesdeclast_si128(t, k);
107*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
108*d6a24436SRichard Henderson     } else {
109*d6a24436SRichard Henderson         t = _mm_aesdeclast_si128(t, k);
110*d6a24436SRichard Henderson     }
111*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
112*d6a24436SRichard Henderson }
113*d6a24436SRichard Henderson 
114*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesdec_ISB_ISR_AK_IMC_accel(AESState * ret,const AESState * st,const AESState * rk,bool be)115*d6a24436SRichard Henderson aesdec_ISB_ISR_AK_IMC_accel(AESState *ret, const AESState *st,
116*d6a24436SRichard Henderson                             const AESState *rk, bool be)
117*d6a24436SRichard Henderson {
118*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
119*d6a24436SRichard Henderson     __m128i k = (__m128i)rk->v;
120*d6a24436SRichard Henderson 
121*d6a24436SRichard Henderson     if (be) {
122*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
123*d6a24436SRichard Henderson         k = aes_accel_bswap(k);
124*d6a24436SRichard Henderson         t = _mm_aesdeclast_si128(t, k);
125*d6a24436SRichard Henderson         t = _mm_aesimc_si128(t);
126*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
127*d6a24436SRichard Henderson     } else {
128*d6a24436SRichard Henderson         t = _mm_aesdeclast_si128(t, k);
129*d6a24436SRichard Henderson         t = _mm_aesimc_si128(t);
130*d6a24436SRichard Henderson     }
131*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
132*d6a24436SRichard Henderson }
133*d6a24436SRichard Henderson 
134*d6a24436SRichard Henderson static inline void ATTR_AES_ACCEL
aesdec_ISB_ISR_IMC_AK_accel(AESState * ret,const AESState * st,const AESState * rk,bool be)135*d6a24436SRichard Henderson aesdec_ISB_ISR_IMC_AK_accel(AESState *ret, const AESState *st,
136*d6a24436SRichard Henderson                             const AESState *rk, bool be)
137*d6a24436SRichard Henderson {
138*d6a24436SRichard Henderson     __m128i t = (__m128i)st->v;
139*d6a24436SRichard Henderson     __m128i k = (__m128i)rk->v;
140*d6a24436SRichard Henderson 
141*d6a24436SRichard Henderson     if (be) {
142*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
143*d6a24436SRichard Henderson         k = aes_accel_bswap(k);
144*d6a24436SRichard Henderson         t = _mm_aesdec_si128(t, k);
145*d6a24436SRichard Henderson         t = aes_accel_bswap(t);
146*d6a24436SRichard Henderson     } else {
147*d6a24436SRichard Henderson         t = _mm_aesdec_si128(t, k);
148*d6a24436SRichard Henderson     }
149*d6a24436SRichard Henderson     ret->v = (AESStateVec)t;
150*d6a24436SRichard Henderson }
151*d6a24436SRichard Henderson 
152*d6a24436SRichard Henderson #endif /* X86_HOST_CRYPTO_AES_ROUND_H */
153