1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 2003, Dr Brian Gladman < >, Worcester, UK.
4 All rights reserved.
5
6 LICENSE TERMS
7
8 The free distribution and use of this software in both source and binary
9 form is allowed (with or without changes) provided that:
10
11 1. distributions of this source code include the above copyright
12 notice, this list of conditions and the following disclaimer;
13
14 2. distributions in binary form include the above copyright
15 notice, this list of conditions and the following disclaimer
16 in the documentation and/or other associated materials;
17
18 3. the copyright holder's name is not used to endorse products
19 built using this software without specific written permission.
20
21 ALTERNATIVELY, provided that this notice is retained in full, this product
22 may be distributed under the terms of the GNU General Public License (GPL),
23 in which case the provisions of the GPL apply INSTEAD OF those given above.
24
25 DISCLAIMER
26
27 This software is provided 'as is' with no explicit or implied warranties
28 in respect of its properties, including, but not limited to, correctness
29 and/or fitness for purpose.
30 ---------------------------------------------------------------------------
31 Issue Date: 26/08/2003
32
33 This file contains the code for implementing encryption and decryption
34 for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
35 can optionally be replaced by code written in assembler using NASM. For
36 further details see the file aesopt.h
37 */
38
39 #include "aesopt.h"
40
41 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
42 #define so(y,x,c) word_out(y, c, s(x,c))
43
44 #if defined(ARRAYS)
45 #define locals(y,x) x[4],y[4]
46 #else
47 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
48 #endif
49
50 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
51 s(y,2) = s(x,2); s(y,3) = s(x,3);
52 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
53 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
54 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
55
56 #if defined(ENCRYPTION) && !defined(AES_ASM)
57
58 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
59 Pentium optimization with small code but this is poor for decryption
60 so we need to control this with the following VC++ pragmas
61 */
62
63 #if defined(_MSC_VER)
64 #pragma optimize( "s", on )
65 #endif
66
67 /* Given the column (c) of the output state variable, the following
68 macros give the input state variables which are needed in its
69 computation for each row (r) of the state. All the alternative
70 macros give the same end values but expand into different ways
71 of calculating these values. In particular the complex macro
72 used for dynamically variable block sizes is designed to expand
73 to a compile time constant whenever possible but will expand to
74 conditional clauses on some branches (I am grateful to Frank
75 Yellin for this construction)
76 */
77
78 #define fwd_var(x,r,c)\
79 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
80 : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
81 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
82 : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
83
84 #if defined(FT4_SET)
85 #undef dec_fmvars
86 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
87 #elif defined(FT1_SET)
88 #undef dec_fmvars
89 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
90 #else
91 #define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
92 #endif
93
94 #if defined(FL4_SET)
95 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
96 #elif defined(FL1_SET)
97 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
98 #else
99 #define fwd_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
100 #endif
101
aes_encrypt(const void * in_blk,void * out_blk,const aes_encrypt_ctx cx[1])102 aes_rval aes_encrypt(const void *in_blk, void *out_blk, const aes_encrypt_ctx cx[1])
103 { aes_32t locals(b0, b1);
104 const aes_32t *kp = cx->ks;
105 #ifdef dec_fmvars
106 dec_fmvars; /* declare variables for fwd_mcol() if needed */
107 #endif
108
109 aes_32t nr = (kp[45] ^ kp[52] ^ kp[53] ? kp[52] : 14);
110
111 #ifdef AES_ERR_CHK
112 if( (nr != 10 || !(kp[0] | kp[3] | kp[4]))
113 && (nr != 12 || !(kp[0] | kp[5] | kp[6]))
114 && (nr != 14 || !(kp[0] | kp[7] | kp[8])) )
115 return aes_error;
116 #endif
117
118 state_in(b0, in_blk, kp);
119
120 #if (ENC_UNROLL == FULL)
121
122 switch(nr)
123 {
124 case 14:
125 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
126 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
127 kp += 2 * N_COLS;
128 /* Falls through. */
129 case 12:
130 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
131 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
132 kp += 2 * N_COLS;
133 /* Falls through. */
134 case 10:
135 round(fwd_rnd, b1, b0, kp + 1 * N_COLS);
136 round(fwd_rnd, b0, b1, kp + 2 * N_COLS);
137 round(fwd_rnd, b1, b0, kp + 3 * N_COLS);
138 round(fwd_rnd, b0, b1, kp + 4 * N_COLS);
139 round(fwd_rnd, b1, b0, kp + 5 * N_COLS);
140 round(fwd_rnd, b0, b1, kp + 6 * N_COLS);
141 round(fwd_rnd, b1, b0, kp + 7 * N_COLS);
142 round(fwd_rnd, b0, b1, kp + 8 * N_COLS);
143 round(fwd_rnd, b1, b0, kp + 9 * N_COLS);
144 round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
145 }
146
147 #else
148
149 #if (ENC_UNROLL == PARTIAL)
150 { aes_32t rnd;
151 for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
152 {
153 kp += N_COLS;
154 round(fwd_rnd, b1, b0, kp);
155 kp += N_COLS;
156 round(fwd_rnd, b0, b1, kp);
157 }
158 kp += N_COLS;
159 round(fwd_rnd, b1, b0, kp);
160 #else
161 { aes_32t rnd;
162 for(rnd = 0; rnd < nr - 1; ++rnd)
163 {
164 kp += N_COLS;
165 round(fwd_rnd, b1, b0, kp);
166 l_copy(b0, b1);
167 }
168 #endif
169 kp += N_COLS;
170 round(fwd_lrnd, b0, b1, kp);
171 }
172 #endif
173
174 state_out(out_blk, b0);
175 #ifdef AES_ERR_CHK
176 return aes_good;
177 #endif
178 }
179
180 #endif
181
182 #if defined(DECRYPTION) && !defined(AES_ASM)
183
184 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
185 Pentium optimization with small code but this is poor for decryption
186 so we need to control this with the following VC++ pragmas
187 */
188
189 #if defined(_MSC_VER)
190 #pragma optimize( "t", on )
191 #endif
192
193 /* Given the column (c) of the output state variable, the following
194 macros give the input state variables which are needed in its
195 computation for each row (r) of the state. All the alternative
196 macros give the same end values but expand into different ways
197 of calculating these values. In particular the complex macro
198 used for dynamically variable block sizes is designed to expand
199 to a compile time constant whenever possible but will expand to
200 conditional clauses on some branches (I am grateful to Frank
201 Yellin for this construction)
202 */
203
204 #define inv_var(x,r,c)\
205 ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
206 : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
207 : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
208 : ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
209
210 #if defined(IT4_SET)
211 #undef dec_imvars
212 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
213 #elif defined(IT1_SET)
214 #undef dec_imvars
215 #define inv_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
216 #else
217 #define inv_rnd(y,x,k,c) (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
218 #endif
219
220 #if defined(IL4_SET)
221 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
222 #elif defined(IL1_SET)
223 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
224 #else
225 #define inv_lrnd(y,x,k,c) (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
226 #endif
227
228 aes_rval aes_decrypt(const void *in_blk, void *out_blk, const aes_decrypt_ctx cx[1])
229 { aes_32t locals(b0, b1);
230 #ifdef dec_imvars
231 dec_imvars; /* declare variables for inv_mcol() if needed */
232 #endif
233
234 aes_32t nr = (cx->ks[45] ^ cx->ks[52] ^ cx->ks[53] ? cx->ks[52] : 14);
235 const aes_32t *kp = cx->ks + nr * N_COLS;
236
237 #ifdef AES_ERR_CHK
238 if( (nr != 10 || !(cx->ks[0] | cx->ks[3] | cx->ks[4]))
239 && (nr != 12 || !(cx->ks[0] | cx->ks[5] | cx->ks[6]))
240 && (nr != 14 || !(cx->ks[0] | cx->ks[7] | cx->ks[8])) )
241 return aes_error;
242 #endif
243
244 state_in(b0, in_blk, kp);
245
246 #if (DEC_UNROLL == FULL)
247
248 switch(nr)
249 {
250 case 14:
251 round(inv_rnd, b1, b0, kp - 1 * N_COLS);
252 round(inv_rnd, b0, b1, kp - 2 * N_COLS);
253 kp -= 2 * N_COLS;
254 /* Falls through. */
255 case 12:
256 round(inv_rnd, b1, b0, kp - 1 * N_COLS);
257 round(inv_rnd, b0, b1, kp - 2 * N_COLS);
258 kp -= 2 * N_COLS;
259 /* Falls through. */
260 case 10:
261 round(inv_rnd, b1, b0, kp - 1 * N_COLS);
262 round(inv_rnd, b0, b1, kp - 2 * N_COLS);
263 round(inv_rnd, b1, b0, kp - 3 * N_COLS);
264 round(inv_rnd, b0, b1, kp - 4 * N_COLS);
265 round(inv_rnd, b1, b0, kp - 5 * N_COLS);
266 round(inv_rnd, b0, b1, kp - 6 * N_COLS);
267 round(inv_rnd, b1, b0, kp - 7 * N_COLS);
268 round(inv_rnd, b0, b1, kp - 8 * N_COLS);
269 round(inv_rnd, b1, b0, kp - 9 * N_COLS);
270 round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
271 }
272
273 #else
274
275 #if (DEC_UNROLL == PARTIAL)
276 { aes_32t rnd;
277 for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
278 {
279 kp -= N_COLS;
280 round(inv_rnd, b1, b0, kp);
281 kp -= N_COLS;
282 round(inv_rnd, b0, b1, kp);
283 }
284 kp -= N_COLS;
285 round(inv_rnd, b1, b0, kp);
286 #else
287 { aes_32t rnd;
288 for(rnd = 0; rnd < nr - 1; ++rnd)
289 {
290 kp -= N_COLS;
291 round(inv_rnd, b1, b0, kp);
292 l_copy(b0, b1);
293 }
294 #endif
295 kp -= N_COLS;
296 round(inv_lrnd, b0, b1, kp);
297 }
298 #endif
299
300 state_out(out_blk, b0);
301 #ifdef AES_ERR_CHK
302 return aes_good;
303 #endif
304 }
305
306 #endif
307
308