1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 2003, Dr Brian Gladman <                 >, Worcester, UK.
4  All rights reserved.
5 
6  LICENSE TERMS
7 
8  The free distribution and use of this software in both source and binary
9  form is allowed (with or without changes) provided that:
10 
11    1. distributions of this source code include the above copyright
12       notice, this list of conditions and the following disclaimer;
13 
14    2. distributions in binary form include the above copyright
15       notice, this list of conditions and the following disclaimer
16       in the documentation and/or other associated materials;
17 
18    3. the copyright holder's name is not used to endorse products
19       built using this software without specific written permission.
20 
21  ALTERNATIVELY, provided that this notice is retained in full, this product
22  may be distributed under the terms of the GNU General Public License (GPL),
23  in which case the provisions of the GPL apply INSTEAD OF those given above.
24 
25  DISCLAIMER
26 
27  This software is provided 'as is' with no explicit or implied warranties
28  in respect of its properties, including, but not limited to, correctness
29  and/or fitness for purpose.
30  ---------------------------------------------------------------------------
31  Issue Date: 26/08/2003
32 
33  This file contains the code for implementing encryption and decryption
34  for AES (Rijndael) for block and key sizes of 16, 24 and 32 bytes. It
35  can optionally be replaced by code written in assembler using NASM. For
36  further details see the file aesopt.h
37 */
38 
39 #include "aesopt.h"
40 
41 #define si(y,x,k,c) (s(y,c) = word_in(x, c) ^ (k)[c])
42 #define so(y,x,c)   word_out(y, c, s(x,c))
43 
44 #if defined(ARRAYS)
45 #define locals(y,x)     x[4],y[4]
46 #else
47 #define locals(y,x)     x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
48 #endif
49 
50 #define l_copy(y, x)    s(y,0) = s(x,0); s(y,1) = s(x,1); \
51                         s(y,2) = s(x,2); s(y,3) = s(x,3);
52 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
53 #define state_out(y,x)  so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
54 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
55 
56 #if defined(ENCRYPTION) && !defined(AES_ASM)
57 
58 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
59    Pentium optimization with small code but this is poor for decryption
60    so we need to control this with the following VC++ pragmas
61 */
62 
63 #if defined(_MSC_VER)
64 #pragma optimize( "s", on )
65 #endif
66 
67 /* Given the column (c) of the output state variable, the following
68    macros give the input state variables which are needed in its
69    computation for each row (r) of the state. All the alternative
70    macros give the same end values but expand into different ways
71    of calculating these values.  In particular the complex macro
72    used for dynamically variable block sizes is designed to expand
73    to a compile time constant whenever possible but will expand to
74    conditional clauses on some branches (I am grateful to Frank
75    Yellin for this construction)
76 */
77 
78 #define fwd_var(x,r,c)\
79  ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
80  : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
81  : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
82  :          ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
83 
84 #if defined(FT4_SET)
85 #undef  dec_fmvars
86 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
87 #elif defined(FT1_SET)
88 #undef  dec_fmvars
89 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(f,n),fwd_var,rf1,c))
90 #else
91 #define fwd_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ fwd_mcol(no_table(x,t_use(s,box),fwd_var,rf1,c)))
92 #endif
93 
94 #if defined(FL4_SET)
95 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,l),fwd_var,rf1,c))
96 #elif defined(FL1_SET)
97 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(f,l),fwd_var,rf1,c))
98 #else
99 #define fwd_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(s,box),fwd_var,rf1,c))
100 #endif
101 
aes_encrypt(const void * in_blk,void * out_blk,const aes_encrypt_ctx cx[1])102 aes_rval aes_encrypt(const void *in_blk, void *out_blk, const aes_encrypt_ctx cx[1])
103 {   aes_32t         locals(b0, b1);
104     const aes_32t   *kp = cx->ks;
105 #ifdef dec_fmvars
106     dec_fmvars; /* declare variables for fwd_mcol() if needed */
107 #endif
108 
109     aes_32t nr = (kp[45] ^ kp[52] ^ kp[53] ? kp[52] : 14);
110 
111 #ifdef AES_ERR_CHK
112     if(   (nr != 10 || !(kp[0] | kp[3] | kp[4]))
113        && (nr != 12 || !(kp[0] | kp[5] | kp[6]))
114        && (nr != 14 || !(kp[0] | kp[7] | kp[8])) )
115         return aes_error;
116 #endif
117 
118     state_in(b0, in_blk, kp);
119 
120 #if (ENC_UNROLL == FULL)
121 
122     switch(nr)
123     {
124     case 14:
125         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
126         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
127         kp += 2 * N_COLS;
128         /* Falls through. */
129     case 12:
130         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
131         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
132         kp += 2 * N_COLS;
133         /* Falls through. */
134     case 10:
135         round(fwd_rnd,  b1, b0, kp + 1 * N_COLS);
136         round(fwd_rnd,  b0, b1, kp + 2 * N_COLS);
137         round(fwd_rnd,  b1, b0, kp + 3 * N_COLS);
138         round(fwd_rnd,  b0, b1, kp + 4 * N_COLS);
139         round(fwd_rnd,  b1, b0, kp + 5 * N_COLS);
140         round(fwd_rnd,  b0, b1, kp + 6 * N_COLS);
141         round(fwd_rnd,  b1, b0, kp + 7 * N_COLS);
142         round(fwd_rnd,  b0, b1, kp + 8 * N_COLS);
143         round(fwd_rnd,  b1, b0, kp + 9 * N_COLS);
144         round(fwd_lrnd, b0, b1, kp +10 * N_COLS);
145     }
146 
147 #else
148 
149 #if (ENC_UNROLL == PARTIAL)
150     {   aes_32t    rnd;
151         for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
152         {
153             kp += N_COLS;
154             round(fwd_rnd, b1, b0, kp);
155             kp += N_COLS;
156             round(fwd_rnd, b0, b1, kp);
157         }
158         kp += N_COLS;
159         round(fwd_rnd,  b1, b0, kp);
160 #else
161     {   aes_32t    rnd;
162         for(rnd = 0; rnd < nr - 1; ++rnd)
163         {
164             kp += N_COLS;
165             round(fwd_rnd, b1, b0, kp);
166             l_copy(b0, b1);
167         }
168 #endif
169         kp += N_COLS;
170         round(fwd_lrnd, b0, b1, kp);
171     }
172 #endif
173 
174     state_out(out_blk, b0);
175 #ifdef AES_ERR_CHK
176     return aes_good;
177 #endif
178 }
179 
180 #endif
181 
182 #if defined(DECRYPTION) && !defined(AES_ASM)
183 
184 /* Visual C++ .Net v7.1 provides the fastest encryption code when using
185    Pentium optimization with small code but this is poor for decryption
186    so we need to control this with the following VC++ pragmas
187 */
188 
189 #if defined(_MSC_VER)
190 #pragma optimize( "t", on )
191 #endif
192 
193 /* Given the column (c) of the output state variable, the following
194    macros give the input state variables which are needed in its
195    computation for each row (r) of the state. All the alternative
196    macros give the same end values but expand into different ways
197    of calculating these values.  In particular the complex macro
198    used for dynamically variable block sizes is designed to expand
199    to a compile time constant whenever possible but will expand to
200    conditional clauses on some branches (I am grateful to Frank
201    Yellin for this construction)
202 */
203 
204 #define inv_var(x,r,c)\
205  ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
206  : r == 1 ? ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))\
207  : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
208  :          ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0)))
209 
210 #if defined(IT4_SET)
211 #undef  dec_imvars
212 #define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,n),inv_var,rf1,c))
213 #elif defined(IT1_SET)
214 #undef  dec_imvars
215 #define inv_rnd(y,x,k,c)    (s(y,c) = (k)[c] ^ one_table(x,upr,t_use(i,n),inv_var,rf1,c))
216 #else
217 #define inv_rnd(y,x,k,c)    (s(y,c) = inv_mcol((k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c)))
218 #endif
219 
220 #if defined(IL4_SET)
221 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ four_tables(x,t_use(i,l),inv_var,rf1,c))
222 #elif defined(IL1_SET)
223 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ one_table(x,ups,t_use(i,l),inv_var,rf1,c))
224 #else
225 #define inv_lrnd(y,x,k,c)   (s(y,c) = (k)[c] ^ no_table(x,t_use(i,box),inv_var,rf1,c))
226 #endif
227 
228 aes_rval aes_decrypt(const void *in_blk, void *out_blk, const aes_decrypt_ctx cx[1])
229 {   aes_32t        locals(b0, b1);
230 #ifdef dec_imvars
231     dec_imvars; /* declare variables for inv_mcol() if needed */
232 #endif
233 
234     aes_32t nr = (cx->ks[45] ^ cx->ks[52] ^ cx->ks[53] ? cx->ks[52] : 14);
235     const aes_32t *kp = cx->ks + nr * N_COLS;
236 
237 #ifdef AES_ERR_CHK
238     if(   (nr != 10 || !(cx->ks[0] | cx->ks[3] | cx->ks[4]))
239        && (nr != 12 || !(cx->ks[0] | cx->ks[5] | cx->ks[6]))
240        && (nr != 14 || !(cx->ks[0] | cx->ks[7] | cx->ks[8])) )
241         return aes_error;
242 #endif
243 
244     state_in(b0, in_blk, kp);
245 
246 #if (DEC_UNROLL == FULL)
247 
248     switch(nr)
249     {
250     case 14:
251         round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
252         round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
253         kp -= 2 * N_COLS;
254         /* Falls through. */
255     case 12:
256         round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
257         round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
258         kp -= 2 * N_COLS;
259         /* Falls through. */
260     case 10:
261         round(inv_rnd,  b1, b0, kp -  1 * N_COLS);
262         round(inv_rnd,  b0, b1, kp -  2 * N_COLS);
263         round(inv_rnd,  b1, b0, kp -  3 * N_COLS);
264         round(inv_rnd,  b0, b1, kp -  4 * N_COLS);
265         round(inv_rnd,  b1, b0, kp -  5 * N_COLS);
266         round(inv_rnd,  b0, b1, kp -  6 * N_COLS);
267         round(inv_rnd,  b1, b0, kp -  7 * N_COLS);
268         round(inv_rnd,  b0, b1, kp -  8 * N_COLS);
269         round(inv_rnd,  b1, b0, kp -  9 * N_COLS);
270         round(inv_lrnd, b0, b1, kp - 10 * N_COLS);
271     }
272 
273 #else
274 
275 #if (DEC_UNROLL == PARTIAL)
276     {   aes_32t    rnd;
277         for(rnd = 0; rnd < (nr >> 1) - 1; ++rnd)
278         {
279             kp -= N_COLS;
280             round(inv_rnd, b1, b0, kp);
281             kp -= N_COLS;
282             round(inv_rnd, b0, b1, kp);
283         }
284         kp -= N_COLS;
285         round(inv_rnd, b1, b0, kp);
286 #else
287     {   aes_32t    rnd;
288         for(rnd = 0; rnd < nr - 1; ++rnd)
289         {
290             kp -= N_COLS;
291             round(inv_rnd, b1, b0, kp);
292             l_copy(b0, b1);
293         }
294 #endif
295         kp -= N_COLS;
296         round(inv_lrnd, b0, b1, kp);
297     }
298 #endif
299 
300     state_out(out_blk, b0);
301 #ifdef AES_ERR_CHK
302     return aes_good;
303 #endif
304 }
305 
306 #endif
307 
308