1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
4
5 The redistribution and use of this software (with or without changes)
6 is allowed without the payment of fees or royalties provided that:
7
8 source code distributions include the above copyright notice, this
9 list of conditions and the following disclaimer;
10
11 binary distributions include the above copyright notice, this list
12 of conditions and the following disclaimer in their documentation.
13
14 This software is provided 'as is' with no explicit or implied warranties
15 in respect of its operation, including, but not limited to, correctness
16 and fitness for purpose.
17 ---------------------------------------------------------------------------
18 Issue Date: 20/12/2007
19 */
20
21 #include "aesopt.h"
22 #include "aestab.h"
23
24 #if defined( USE_INTEL_AES_IF_PRESENT )
25 #include <aes/aesaes_ni.h>
26 #else
27 /* map names here to provide the external API ('name' -> 'aes_name') */
28 # define aes_xi(x) aes_ ## x
29 #endif
30
31 #if defined(__cplusplus)
32 extern "C"
33 {
34 #endif
35
36 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
37 #define so(y,x,c) word_out(y, c, s(x,c))
38
39 #if defined(ARRAYS)
40 #define locals(y,x) x[4],y[4]
41 #else
42 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
43 #endif
44
45 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
46 s(y,2) = s(x,2); s(y,3) = s(x,3);
47 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
48 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
49 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
50
51 #if ( FUNCS_IN_C & ENCRYPTION_IN_C )
52
53 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
54 Pentium optimiation with small code but this is poor for decryption
55 so we need to control this with the following VC++ pragmas
56 */
57
58 #if defined( _MSC_VER ) && !defined( _WIN64 )
59 #pragma optimize( "s", on )
60 #endif
61
62 /* Given the column (c) of the output state variable, the following
63 macros give the input state variables which are needed in its
64 computation for each row (r) of the state. All the alternative
65 macros give the same end values but expand into different ways
66 of calculating these values. In particular the complex macro
67 used for dynamically variable block sizes is designed to expand
68 to a compile time constant whenever possible but will expand to
69 conditional clauses on some branches (I am grateful to Frank
70 Yellin for this construction)
71 */
72
73 #define fwd_var(x,r,c)\
74 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
75 : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
76 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
77 : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
78
79 #if defined(FT4_SET)
80 #undef dec_fmvars
81 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
82 #elif defined(FT1_SET)
83 #undef dec_fmvars
84 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
85 #else
86 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
87 #endif
88
89 #if defined(FL4_SET)
90 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
91 #elif defined(FL1_SET)
92 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
93 #else
94 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
95 #endif
96
aes_xi(encrypt)97 AES_RETURN aes_xi(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1])
98 { uint32_t locals(b0, b1);
99 const uint32_t *kp;
100 #if defined( dec_fmvars )
101 dec_fmvars; /* declare variables for fwd_mcol() if needed */
102 #endif
103
104 if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16)
105 return EXIT_FAILURE;
106
107 kp = cx->ks;
108 state_in(b0, in, kp);
109
110 #if (ENC_UNROLL == FULL)
111
112 switch(cx->inf.b[0])
113 {
114 case 14 * 16:
115 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
116 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
117 kp += 2 * N_COLS;
118 case 12 * 16:
119 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
120 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
121 kp += 2 * N_COLS;
122 case 10 * 16:
123 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
124 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
125 round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
126 round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
127 round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
128 round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
129 round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
130 round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
131 round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
132 round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
133 }
134
135 #else
136
137 #if (ENC_UNROLL == PARTIAL)
138 { uint32_t rnd;
139 for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
140 {
141 kp += N_COLS;
142 round(fwd_rnd, b1, b0, kp);
143 kp += N_COLS;
144 round(fwd_rnd, b0, b1, kp);
145 }
146 kp += N_COLS;
147 round(fwd_rnd, b1, b0, kp);
148 #else
149 { uint32_t rnd;
150 for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
151 {
152 kp += N_COLS;
153 round(fwd_rnd, b1, b0, kp);
154 l_copy(b0, b1);
155 }
156 #endif
157 kp += N_COLS;
158 round(fwd_lrnd, b0, b1, kp);
159 }
160 #endif
161
162 state_out(out, b0);
163 return EXIT_SUCCESS;
164 }
165
166 #endif
167
168 #if ( FUNCS_IN_C & DECRYPTION_IN_C)
169
170 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
171 Pentium optimiation with small code but this is poor for decryption
172 so we need to control this with the following VC++ pragmas
173 */
174
175 #if defined( _MSC_VER ) && !defined( _WIN64 )
176 #pragma optimize( "t", on )
177 #endif
178
179 /* Given the column (c) of the output state variable, the following
180 macros give the input state variables which are needed in its
181 computation for each row (r) of the state. All the alternative
182 macros give the same end values but expand into different ways
183 of calculating these values. In particular the complex macro
184 used for dynamically variable block sizes is designed to expand
185 to a compile time constant whenever possible but will expand to
186 conditional clauses on some branches (I am grateful to Frank
187 Yellin for this construction)
188 */
189
190 #define inv_var(x,r,c)\
191 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
192 : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
193 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
194 : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
195
196 #if defined(IT4_SET)
197 #undef dec_imvars
198 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
199 #elif defined(IT1_SET)
200 #undef dec_imvars
201 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
202 #else
203 #define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
204 #endif
205
206 #if defined(IL4_SET)
207 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
208 #elif defined(IL1_SET)
209 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
210 #else
211 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
212 #endif
213
214 /* This code can work with the decryption key schedule in the */
215 /* order that is used for encrytpion (where the 1st decryption */
216 /* round key is at the high end ot the schedule) or with a key */
217 /* schedule that has been reversed to put the 1st decryption */
218 /* round key at the low end of the schedule in memory (when */
219 /* AES_REV_DKS is defined) */
220
221 #ifdef AES_REV_DKS
222 #define key_ofs 0
223 #define rnd_key(n) (kp + n * N_COLS)
224 #else
225 #define key_ofs 1
226 #define rnd_key(n) (kp - n * N_COLS)
227 #endif
228
229 AES_RETURN aes_xi(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1])
230 { uint32_t locals(b0, b1);
231 #if defined( dec_imvars )
232 dec_imvars; /* declare variables for inv_mcol() if needed */
233 #endif
234 const uint32_t *kp;
235
236 if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16)
237 return EXIT_FAILURE;
238
239 kp = cx->ks + (key_ofs ? (cx->inf.b[0] >> 2) : 0);
240 state_in(b0, in, kp);
241
242 #if (DEC_UNROLL == FULL)
243
244 kp = cx->ks + (key_ofs ? 0 : (cx->inf.b[0] >> 2));
245 switch(cx->inf.b[0])
246 {
247 case 14 * 16:
248 round(inv_rnd, b1, b0, rnd_key(-13));
249 round(inv_rnd, b0, b1, rnd_key(-12));
250 case 12 * 16:
251 round(inv_rnd, b1, b0, rnd_key(-11));
252 round(inv_rnd, b0, b1, rnd_key(-10));
253 case 10 * 16:
254 round(inv_rnd, b1, b0, rnd_key(-9));
255 round(inv_rnd, b0, b1, rnd_key(-8));
256 round(inv_rnd, b1, b0, rnd_key(-7));
257 round(inv_rnd, b0, b1, rnd_key(-6));
258 round(inv_rnd, b1, b0, rnd_key(-5));
259 round(inv_rnd, b0, b1, rnd_key(-4));
260 round(inv_rnd, b1, b0, rnd_key(-3));
261 round(inv_rnd, b0, b1, rnd_key(-2));
262 round(inv_rnd, b1, b0, rnd_key(-1));
263 round(inv_lrnd, b0, b1, rnd_key( 0));
264 }
265
266 #else
267
268 #if (DEC_UNROLL == PARTIAL)
269 { uint32_t rnd;
270 for(rnd = 0; rnd < (cx->inf.b[0] >> 5) - 1; ++rnd)
271 {
272 kp = rnd_key(1);
273 round(inv_rnd, b1, b0, kp);
274 kp = rnd_key(1);
275 round(inv_rnd, b0, b1, kp);
276 }
277 kp = rnd_key(1);
278 round(inv_rnd, b1, b0, kp);
279 #else
280 { uint32_t rnd;
281 for(rnd = 0; rnd < (cx->inf.b[0] >> 4) - 1; ++rnd)
282 {
283 kp = rnd_key(1);
284 round(inv_rnd, b1, b0, kp);
285 l_copy(b0, b1);
286 }
287 #endif
288 kp = rnd_key(1);
289 round(inv_lrnd, b0, b1, kp);
290 }
291 #endif
292
293 state_out(out, b0);
294 return EXIT_SUCCESS;
295 }
296
297 #endif
298
299 #if defined(__cplusplus)
300 }
301 #endif
302