1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #ifdef FREEBL_NO_DEPEND
6 #include "stubs.h"
7 #endif
8
9 #include "prinit.h"
10 #include "prenv.h"
11 #include "prerr.h"
12 #include "secerr.h"
13
14 #include "prtypes.h"
15 #include "blapi.h"
16 #include "rijndael.h"
17
18 #include "cts.h"
19 #include "ctr.h"
20 #include "gcm.h"
21 #include "mpi.h"
22
23 #ifdef USE_HW_AES
24 #include "intel-aes.h"
25 #endif
26 #ifdef INTEL_GCM
27 #include "intel-gcm.h"
28 #endif /* INTEL_GCM */
29
30 /* Forward declarations */
31 void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
32 unsigned int Nk);
33 void rijndael_native_encryptBlock(AESContext *cx,
34 unsigned char *output,
35 const unsigned char *input);
36
37 /* Stub definitions for the above rijndael_native_* functions, which
38 * shouldn't be used unless NSS_X86_OR_X64 is defined */
39 #ifndef NSS_X86_OR_X64
40 void
rijndael_native_key_expansion(AESContext * cx,const unsigned char * key,unsigned int Nk)41 rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
42 unsigned int Nk)
43 {
44 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
45 PORT_Assert(0);
46 }
47
48 void
rijndael_native_encryptBlock(AESContext * cx,unsigned char * output,const unsigned char * input)49 rijndael_native_encryptBlock(AESContext *cx,
50 unsigned char *output,
51 const unsigned char *input)
52 {
53 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
54 PORT_Assert(0);
55 }
56 #endif /* NSS_X86_OR_X64 */
57
58 /*
59 * There are currently three ways to build this code, varying in performance
60 * and code size.
61 *
62 * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab
63 * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table
64 * values "on-the-fly", using gfm
65 * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros
66 *
67 * The default is RIJNDAEL_INCLUDE_TABLES.
68 */
69
70 /*
71 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
72 * T**-1[0..4], IMXC[0..4]
73 * When building anything else, includes S, S**-1, Rcon
74 */
75 #include "rijndael32.tab"
76
77 #if defined(RIJNDAEL_INCLUDE_TABLES)
78 /*
79 * RIJNDAEL_INCLUDE_TABLES
80 */
81 #define T0(i) _T0[i]
82 #define T1(i) _T1[i]
83 #define T2(i) _T2[i]
84 #define T3(i) _T3[i]
85 #define TInv0(i) _TInv0[i]
86 #define TInv1(i) _TInv1[i]
87 #define TInv2(i) _TInv2[i]
88 #define TInv3(i) _TInv3[i]
89 #define IMXC0(b) _IMXC0[b]
90 #define IMXC1(b) _IMXC1[b]
91 #define IMXC2(b) _IMXC2[b]
92 #define IMXC3(b) _IMXC3[b]
93 /* The S-box can be recovered from the T-tables */
94 #ifdef IS_LITTLE_ENDIAN
95 #define SBOX(b) ((PRUint8)_T3[b])
96 #else
97 #define SBOX(b) ((PRUint8)_T1[b])
98 #endif
99 #define SINV(b) (_SInv[b])
100
101 #else /* not RIJNDAEL_INCLUDE_TABLES */
102
103 /*
104 * Code for generating T-table values.
105 */
106
107 #ifdef IS_LITTLE_ENDIAN
108 #define WORD4(b0, b1, b2, b3) \
109 ((((PRUint32)b3) << 24) | \
110 (((PRUint32)b2) << 16) | \
111 (((PRUint32)b1) << 8) | \
112 ((PRUint32)b0))
113 #else
114 #define WORD4(b0, b1, b2, b3) \
115 ((((PRUint32)b0) << 24) | \
116 (((PRUint32)b1) << 16) | \
117 (((PRUint32)b2) << 8) | \
118 ((PRUint32)b3))
119 #endif
120
121 /*
122 * Define the S and S**-1 tables (both have been stored)
123 */
124 #define SBOX(b) (_S[b])
125 #define SINV(b) (_SInv[b])
126
127 /*
128 * The function xtime, used for Galois field multiplication
129 */
130 #define XTIME(a) \
131 ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
132
133 /* Choose GFM method (macros or function) */
134 #if defined(RIJNDAEL_GENERATE_VALUES_MACRO)
135
136 /*
137 * Galois field GF(2**8) multipliers, in macro form
138 */
139 #define GFM01(a) \
140 (a) /* a * 01 = a, the identity */
141 #define GFM02(a) \
142 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
143 #define GFM04(a) \
144 (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
145 #define GFM08(a) \
146 (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
147 #define GFM03(a) \
148 (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
149 #define GFM09(a) \
150 (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
151 #define GFM0B(a) \
152 (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
153 #define GFM0D(a) \
154 (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
155 #define GFM0E(a) \
156 (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
157
158 #else /* RIJNDAEL_GENERATE_VALUES */
159
160 /* GF_MULTIPLY
161 *
162 * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
163 */
164 PRUint8
gfm(PRUint8 a,PRUint8 b)165 gfm(PRUint8 a, PRUint8 b)
166 {
167 PRUint8 res = 0;
168 while (b > 0) {
169 res = (b & 0x01) ? res ^ a : res;
170 a = XTIME(a);
171 b >>= 1;
172 }
173 return res;
174 }
175
176 #define GFM01(a) \
177 (a) /* a * 01 = a, the identity */
178 #define GFM02(a) \
179 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
180 #define GFM03(a) \
181 (gfm(a, 0x03)) /* a * 03 */
182 #define GFM09(a) \
183 (gfm(a, 0x09)) /* a * 09 */
184 #define GFM0B(a) \
185 (gfm(a, 0x0B)) /* a * 0B */
186 #define GFM0D(a) \
187 (gfm(a, 0x0D)) /* a * 0D */
188 #define GFM0E(a) \
189 (gfm(a, 0x0E)) /* a * 0E */
190
191 #endif /* choosing GFM function */
192
193 /*
194 * The T-tables
195 */
196 #define G_T0(i) \
197 (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))
198 #define G_T1(i) \
199 (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))
200 #define G_T2(i) \
201 (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))
202 #define G_T3(i) \
203 (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))
204
205 /*
206 * The inverse T-tables
207 */
208 #define G_TInv0(i) \
209 (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))
210 #define G_TInv1(i) \
211 (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))
212 #define G_TInv2(i) \
213 (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))
214 #define G_TInv3(i) \
215 (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))
216
217 /*
218 * The inverse mix column tables
219 */
220 #define G_IMXC0(i) \
221 (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))
222 #define G_IMXC1(i) \
223 (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))
224 #define G_IMXC2(i) \
225 (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))
226 #define G_IMXC3(i) \
227 (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))
228
229 /* Now choose the T-table indexing method */
230 #if defined(RIJNDAEL_GENERATE_VALUES)
231 /* generate values for the tables with a function*/
232 static PRUint32
gen_TInvXi(PRUint8 tx,PRUint8 i)233 gen_TInvXi(PRUint8 tx, PRUint8 i)
234 {
235 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
236 si01 = SINV(i);
237 si02 = XTIME(si01);
238 si04 = XTIME(si02);
239 si08 = XTIME(si04);
240 si03 = si02 ^ si01;
241 si09 = si08 ^ si01;
242 si0B = si08 ^ si03;
243 si0D = si09 ^ si04;
244 si0E = si08 ^ si04 ^ si02;
245 switch (tx) {
246 case 0:
247 return WORD4(si0E, si09, si0D, si0B);
248 case 1:
249 return WORD4(si0B, si0E, si09, si0D);
250 case 2:
251 return WORD4(si0D, si0B, si0E, si09);
252 case 3:
253 return WORD4(si09, si0D, si0B, si0E);
254 }
255 return -1;
256 }
257 #define T0(i) G_T0(i)
258 #define T1(i) G_T1(i)
259 #define T2(i) G_T2(i)
260 #define T3(i) G_T3(i)
261 #define TInv0(i) gen_TInvXi(0, i)
262 #define TInv1(i) gen_TInvXi(1, i)
263 #define TInv2(i) gen_TInvXi(2, i)
264 #define TInv3(i) gen_TInvXi(3, i)
265 #define IMXC0(b) G_IMXC0(b)
266 #define IMXC1(b) G_IMXC1(b)
267 #define IMXC2(b) G_IMXC2(b)
268 #define IMXC3(b) G_IMXC3(b)
269 #else /* RIJNDAEL_GENERATE_VALUES_MACRO */
270 /* generate values for the tables with macros */
271 #define T0(i) G_T0(i)
272 #define T1(i) G_T1(i)
273 #define T2(i) G_T2(i)
274 #define T3(i) G_T3(i)
275 #define TInv0(i) G_TInv0(i)
276 #define TInv1(i) G_TInv1(i)
277 #define TInv2(i) G_TInv2(i)
278 #define TInv3(i) G_TInv3(i)
279 #define IMXC0(b) G_IMXC0(b)
280 #define IMXC1(b) G_IMXC1(b)
281 #define IMXC2(b) G_IMXC2(b)
282 #define IMXC3(b) G_IMXC3(b)
283 #endif /* choose T-table indexing method */
284
285 #endif /* not RIJNDAEL_INCLUDE_TABLES */
286
287 /**************************************************************************
288 *
289 * Stuff related to the Rijndael key schedule
290 *
291 *************************************************************************/
292
293 #define SUBBYTE(w) \
294 ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \
295 (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \
296 (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \
297 (((PRUint32)SBOX((w)&0xff))))
298
299 #ifdef IS_LITTLE_ENDIAN
300 #define ROTBYTE(b) \
301 ((b >> 8) | (b << 24))
302 #else
303 #define ROTBYTE(b) \
304 ((b << 8) | (b >> 24))
305 #endif
306
307 /* rijndael_key_expansion7
308 *
309 * Generate the expanded key from the key input by the user.
310 * XXX
311 * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte
312 * transformation is done periodically. The period is every 4 bytes, and
313 * since 7%4 != 0 this happens at different times for each key word (unlike
314 * Nk == 8 where it happens twice in every key word, in the same positions).
315 * For now, I'm implementing this case "dumbly", w/o any unrolling.
316 */
317 static void
rijndael_key_expansion7(AESContext * cx,const unsigned char * key,unsigned int Nk)318 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
319 {
320 unsigned int i;
321 PRUint32 *W;
322 PRUint32 *pW;
323 PRUint32 tmp;
324 W = cx->expandedKey;
325 /* 1. the first Nk words contain the cipher key */
326 memcpy(W, key, Nk * 4);
327 i = Nk;
328 /* 2. loop until full expanded key is obtained */
329 pW = W + i - 1;
330 for (; i < cx->Nb * (cx->Nr + 1); ++i) {
331 tmp = *pW++;
332 if (i % Nk == 0)
333 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
334 else if (i % Nk == 4)
335 tmp = SUBBYTE(tmp);
336 *pW = W[i - Nk] ^ tmp;
337 }
338 }
339
340 /* rijndael_key_expansion
341 *
342 * Generate the expanded key from the key input by the user.
343 */
344 static void
rijndael_key_expansion(AESContext * cx,const unsigned char * key,unsigned int Nk)345 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
346 {
347 unsigned int i;
348 PRUint32 *W;
349 PRUint32 *pW;
350 PRUint32 tmp;
351 unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
352 if (Nk == 7) {
353 rijndael_key_expansion7(cx, key, Nk);
354 return;
355 }
356 W = cx->expandedKey;
357 /* The first Nk words contain the input cipher key */
358 memcpy(W, key, Nk * 4);
359 i = Nk;
360 pW = W + i - 1;
361 /* Loop over all sets of Nk words, except the last */
362 while (i < round_key_words - Nk) {
363 tmp = *pW++;
364 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
365 *pW = W[i++ - Nk] ^ tmp;
366 tmp = *pW++;
367 *pW = W[i++ - Nk] ^ tmp;
368 tmp = *pW++;
369 *pW = W[i++ - Nk] ^ tmp;
370 tmp = *pW++;
371 *pW = W[i++ - Nk] ^ tmp;
372 if (Nk == 4)
373 continue;
374 switch (Nk) {
375 case 8:
376 tmp = *pW++;
377 tmp = SUBBYTE(tmp);
378 *pW = W[i++ - Nk] ^ tmp;
379 case 7:
380 tmp = *pW++;
381 *pW = W[i++ - Nk] ^ tmp;
382 case 6:
383 tmp = *pW++;
384 *pW = W[i++ - Nk] ^ tmp;
385 case 5:
386 tmp = *pW++;
387 *pW = W[i++ - Nk] ^ tmp;
388 }
389 }
390 /* Generate the last word */
391 tmp = *pW++;
392 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
393 *pW = W[i++ - Nk] ^ tmp;
394 /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However,
395 * since the above loop generated all but the last Nk key words, there
396 * is no more need for the SubByte transformation.
397 */
398 if (Nk < 8) {
399 for (; i < round_key_words; ++i) {
400 tmp = *pW++;
401 *pW = W[i - Nk] ^ tmp;
402 }
403 } else {
404 /* except in the case when Nk == 8. Then one more SubByte may have
405 * to be performed, at i % Nk == 4.
406 */
407 for (; i < round_key_words; ++i) {
408 tmp = *pW++;
409 if (i % Nk == 4)
410 tmp = SUBBYTE(tmp);
411 *pW = W[i - Nk] ^ tmp;
412 }
413 }
414 }
415
416 /* rijndael_invkey_expansion
417 *
418 * Generate the expanded key for the inverse cipher from the key input by
419 * the user.
420 */
421 static void
rijndael_invkey_expansion(AESContext * cx,const unsigned char * key,unsigned int Nk)422 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
423 {
424 unsigned int r;
425 PRUint32 *roundkeyw;
426 PRUint8 *b;
427 int Nb = cx->Nb;
428 /* begins like usual key expansion ... */
429 rijndael_key_expansion(cx, key, Nk);
430 /* ... but has the additional step of InvMixColumn,
431 * excepting the first and last round keys.
432 */
433 roundkeyw = cx->expandedKey + cx->Nb;
434 for (r = 1; r < cx->Nr; ++r) {
435 /* each key word, roundkeyw, represents a column in the key
436 * matrix. Each column is multiplied by the InvMixColumn matrix.
437 * [ 0E 0B 0D 09 ] [ b0 ]
438 * [ 09 0E 0B 0D ] * [ b1 ]
439 * [ 0D 09 0E 0B ] [ b2 ]
440 * [ 0B 0D 09 0E ] [ b3 ]
441 */
442 b = (PRUint8 *)roundkeyw;
443 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
444 b = (PRUint8 *)roundkeyw;
445 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
446 b = (PRUint8 *)roundkeyw;
447 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
448 b = (PRUint8 *)roundkeyw;
449 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
450 if (Nb <= 4)
451 continue;
452 switch (Nb) {
453 case 8:
454 b = (PRUint8 *)roundkeyw;
455 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
456 IMXC2(b[2]) ^ IMXC3(b[3]);
457 case 7:
458 b = (PRUint8 *)roundkeyw;
459 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
460 IMXC2(b[2]) ^ IMXC3(b[3]);
461 case 6:
462 b = (PRUint8 *)roundkeyw;
463 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
464 IMXC2(b[2]) ^ IMXC3(b[3]);
465 case 5:
466 b = (PRUint8 *)roundkeyw;
467 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
468 IMXC2(b[2]) ^ IMXC3(b[3]);
469 }
470 }
471 }
472
473 /**************************************************************************
474 *
475 * Stuff related to Rijndael encryption/decryption.
476 *
477 *************************************************************************/
478
479 #ifdef IS_LITTLE_ENDIAN
480 #define BYTE0WORD(w) ((w)&0x000000ff)
481 #define BYTE1WORD(w) ((w)&0x0000ff00)
482 #define BYTE2WORD(w) ((w)&0x00ff0000)
483 #define BYTE3WORD(w) ((w)&0xff000000)
484 #else
485 #define BYTE0WORD(w) ((w)&0xff000000)
486 #define BYTE1WORD(w) ((w)&0x00ff0000)
487 #define BYTE2WORD(w) ((w)&0x0000ff00)
488 #define BYTE3WORD(w) ((w)&0x000000ff)
489 #endif
490
491 typedef union {
492 PRUint32 w[4];
493 PRUint8 b[16];
494 } rijndael_state;
495
496 #define COLUMN_0(state) state.w[0]
497 #define COLUMN_1(state) state.w[1]
498 #define COLUMN_2(state) state.w[2]
499 #define COLUMN_3(state) state.w[3]
500
501 #define STATE_BYTE(i) state.b[i]
502
503 static void NO_SANITIZE_ALIGNMENT
rijndael_encryptBlock128(AESContext * cx,unsigned char * output,const unsigned char * input)504 rijndael_encryptBlock128(AESContext *cx,
505 unsigned char *output,
506 const unsigned char *input)
507 {
508 unsigned int r;
509 PRUint32 *roundkeyw;
510 rijndael_state state;
511 PRUint32 C0, C1, C2, C3;
512 #if defined(NSS_X86_OR_X64)
513 #define pIn input
514 #define pOut output
515 #else
516 unsigned char *pIn, *pOut;
517 PRUint32 inBuf[4], outBuf[4];
518
519 if ((ptrdiff_t)input & 0x3) {
520 memcpy(inBuf, input, sizeof inBuf);
521 pIn = (unsigned char *)inBuf;
522 } else {
523 pIn = (unsigned char *)input;
524 }
525 if ((ptrdiff_t)output & 0x3) {
526 pOut = (unsigned char *)outBuf;
527 } else {
528 pOut = (unsigned char *)output;
529 }
530 #endif
531 roundkeyw = cx->expandedKey;
532 /* Step 1: Add Round Key 0 to initial state */
533 COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;
534 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;
535 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;
536 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
537 /* Step 2: Loop over rounds [1..NR-1] */
538 for (r = 1; r < cx->Nr; ++r) {
539 /* Do ShiftRow, ByteSub, and MixColumn all at once */
540 C0 = T0(STATE_BYTE(0)) ^
541 T1(STATE_BYTE(5)) ^
542 T2(STATE_BYTE(10)) ^
543 T3(STATE_BYTE(15));
544 C1 = T0(STATE_BYTE(4)) ^
545 T1(STATE_BYTE(9)) ^
546 T2(STATE_BYTE(14)) ^
547 T3(STATE_BYTE(3));
548 C2 = T0(STATE_BYTE(8)) ^
549 T1(STATE_BYTE(13)) ^
550 T2(STATE_BYTE(2)) ^
551 T3(STATE_BYTE(7));
552 C3 = T0(STATE_BYTE(12)) ^
553 T1(STATE_BYTE(1)) ^
554 T2(STATE_BYTE(6)) ^
555 T3(STATE_BYTE(11));
556 /* Round key addition */
557 COLUMN_0(state) = C0 ^ *roundkeyw++;
558 COLUMN_1(state) = C1 ^ *roundkeyw++;
559 COLUMN_2(state) = C2 ^ *roundkeyw++;
560 COLUMN_3(state) = C3 ^ *roundkeyw++;
561 }
562 /* Step 3: Do the last round */
563 /* Final round does not employ MixColumn */
564 C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
565 (BYTE1WORD(T3(STATE_BYTE(5)))) |
566 (BYTE2WORD(T0(STATE_BYTE(10)))) |
567 (BYTE3WORD(T1(STATE_BYTE(15))))) ^
568 *roundkeyw++;
569 C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
570 (BYTE1WORD(T3(STATE_BYTE(9)))) |
571 (BYTE2WORD(T0(STATE_BYTE(14)))) |
572 (BYTE3WORD(T1(STATE_BYTE(3))))) ^
573 *roundkeyw++;
574 C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
575 (BYTE1WORD(T3(STATE_BYTE(13)))) |
576 (BYTE2WORD(T0(STATE_BYTE(2)))) |
577 (BYTE3WORD(T1(STATE_BYTE(7))))) ^
578 *roundkeyw++;
579 C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
580 (BYTE1WORD(T3(STATE_BYTE(1)))) |
581 (BYTE2WORD(T0(STATE_BYTE(6)))) |
582 (BYTE3WORD(T1(STATE_BYTE(11))))) ^
583 *roundkeyw++;
584 *((PRUint32 *)pOut) = C0;
585 *((PRUint32 *)(pOut + 4)) = C1;
586 *((PRUint32 *)(pOut + 8)) = C2;
587 *((PRUint32 *)(pOut + 12)) = C3;
588 #if defined(NSS_X86_OR_X64)
589 #undef pIn
590 #undef pOut
591 #else
592 if ((ptrdiff_t)output & 0x3) {
593 memcpy(output, outBuf, sizeof outBuf);
594 }
595 #endif
596 }
597
598 static SECStatus NO_SANITIZE_ALIGNMENT
rijndael_decryptBlock128(AESContext * cx,unsigned char * output,const unsigned char * input)599 rijndael_decryptBlock128(AESContext *cx,
600 unsigned char *output,
601 const unsigned char *input)
602 {
603 int r;
604 PRUint32 *roundkeyw;
605 rijndael_state state;
606 PRUint32 C0, C1, C2, C3;
607 #if defined(NSS_X86_OR_X64)
608 #define pIn input
609 #define pOut output
610 #else
611 unsigned char *pIn, *pOut;
612 PRUint32 inBuf[4], outBuf[4];
613
614 if ((ptrdiff_t)input & 0x3) {
615 memcpy(inBuf, input, sizeof inBuf);
616 pIn = (unsigned char *)inBuf;
617 } else {
618 pIn = (unsigned char *)input;
619 }
620 if ((ptrdiff_t)output & 0x3) {
621 pOut = (unsigned char *)outBuf;
622 } else {
623 pOut = (unsigned char *)output;
624 }
625 #endif
626 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
627 /* reverse the final key addition */
628 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
629 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
630 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
631 COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;
632 /* Loop over rounds in reverse [NR..1] */
633 for (r = cx->Nr; r > 1; --r) {
634 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
635 C0 = TInv0(STATE_BYTE(0)) ^
636 TInv1(STATE_BYTE(13)) ^
637 TInv2(STATE_BYTE(10)) ^
638 TInv3(STATE_BYTE(7));
639 C1 = TInv0(STATE_BYTE(4)) ^
640 TInv1(STATE_BYTE(1)) ^
641 TInv2(STATE_BYTE(14)) ^
642 TInv3(STATE_BYTE(11));
643 C2 = TInv0(STATE_BYTE(8)) ^
644 TInv1(STATE_BYTE(5)) ^
645 TInv2(STATE_BYTE(2)) ^
646 TInv3(STATE_BYTE(15));
647 C3 = TInv0(STATE_BYTE(12)) ^
648 TInv1(STATE_BYTE(9)) ^
649 TInv2(STATE_BYTE(6)) ^
650 TInv3(STATE_BYTE(3));
651 /* Invert the key addition step */
652 COLUMN_3(state) = C3 ^ *roundkeyw--;
653 COLUMN_2(state) = C2 ^ *roundkeyw--;
654 COLUMN_1(state) = C1 ^ *roundkeyw--;
655 COLUMN_0(state) = C0 ^ *roundkeyw--;
656 }
657 /* inverse sub */
658 pOut[0] = SINV(STATE_BYTE(0));
659 pOut[1] = SINV(STATE_BYTE(13));
660 pOut[2] = SINV(STATE_BYTE(10));
661 pOut[3] = SINV(STATE_BYTE(7));
662 pOut[4] = SINV(STATE_BYTE(4));
663 pOut[5] = SINV(STATE_BYTE(1));
664 pOut[6] = SINV(STATE_BYTE(14));
665 pOut[7] = SINV(STATE_BYTE(11));
666 pOut[8] = SINV(STATE_BYTE(8));
667 pOut[9] = SINV(STATE_BYTE(5));
668 pOut[10] = SINV(STATE_BYTE(2));
669 pOut[11] = SINV(STATE_BYTE(15));
670 pOut[12] = SINV(STATE_BYTE(12));
671 pOut[13] = SINV(STATE_BYTE(9));
672 pOut[14] = SINV(STATE_BYTE(6));
673 pOut[15] = SINV(STATE_BYTE(3));
674 /* final key addition */
675 *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
676 *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
677 *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
678 *((PRUint32 *)pOut) ^= *roundkeyw--;
679 #if defined(NSS_X86_OR_X64)
680 #undef pIn
681 #undef pOut
682 #else
683 if ((ptrdiff_t)output & 0x3) {
684 memcpy(output, outBuf, sizeof outBuf);
685 }
686 #endif
687 return SECSuccess;
688 }
689
690 /**************************************************************************
691 *
692 * Rijndael modes of operation (ECB and CBC)
693 *
694 *************************************************************************/
695
696 static SECStatus
rijndael_encryptECB(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen)697 rijndael_encryptECB(AESContext *cx, unsigned char *output,
698 unsigned int *outputLen, unsigned int maxOutputLen,
699 const unsigned char *input, unsigned int inputLen)
700 {
701 AESBlockFunc *encryptor;
702
703 if (aesni_support()) {
704 /* Use hardware acceleration for normal AES parameters. */
705 encryptor = &rijndael_native_encryptBlock;
706 } else {
707 encryptor = &rijndael_encryptBlock128;
708 }
709 while (inputLen > 0) {
710 (*encryptor)(cx, output, input);
711 output += AES_BLOCK_SIZE;
712 input += AES_BLOCK_SIZE;
713 inputLen -= AES_BLOCK_SIZE;
714 }
715 return SECSuccess;
716 }
717
718 static SECStatus
rijndael_encryptCBC(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen)719 rijndael_encryptCBC(AESContext *cx, unsigned char *output,
720 unsigned int *outputLen, unsigned int maxOutputLen,
721 const unsigned char *input, unsigned int inputLen)
722 {
723 unsigned int j;
724 unsigned char *lastblock;
725 unsigned char inblock[AES_BLOCK_SIZE * 8];
726
727 if (!inputLen)
728 return SECSuccess;
729 lastblock = cx->iv;
730 while (inputLen > 0) {
731 /* XOR with the last block (IV if first block) */
732 for (j = 0; j < AES_BLOCK_SIZE; ++j) {
733 inblock[j] = input[j] ^ lastblock[j];
734 }
735 /* encrypt */
736 rijndael_encryptBlock128(cx, output, inblock);
737 /* move to the next block */
738 lastblock = output;
739 output += AES_BLOCK_SIZE;
740 input += AES_BLOCK_SIZE;
741 inputLen -= AES_BLOCK_SIZE;
742 }
743 memcpy(cx->iv, lastblock, AES_BLOCK_SIZE);
744 return SECSuccess;
745 }
746
747 static SECStatus
rijndael_decryptECB(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen)748 rijndael_decryptECB(AESContext *cx, unsigned char *output,
749 unsigned int *outputLen, unsigned int maxOutputLen,
750 const unsigned char *input, unsigned int inputLen)
751 {
752 while (inputLen > 0) {
753 if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) {
754 return SECFailure;
755 }
756 output += AES_BLOCK_SIZE;
757 input += AES_BLOCK_SIZE;
758 inputLen -= AES_BLOCK_SIZE;
759 }
760 return SECSuccess;
761 }
762
763 static SECStatus
rijndael_decryptCBC(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen)764 rijndael_decryptCBC(AESContext *cx, unsigned char *output,
765 unsigned int *outputLen, unsigned int maxOutputLen,
766 const unsigned char *input, unsigned int inputLen)
767 {
768 const unsigned char *in;
769 unsigned char *out;
770 unsigned int j;
771 unsigned char newIV[AES_BLOCK_SIZE];
772
773 if (!inputLen)
774 return SECSuccess;
775 PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
776 in = input + (inputLen - AES_BLOCK_SIZE);
777 memcpy(newIV, in, AES_BLOCK_SIZE);
778 out = output + (inputLen - AES_BLOCK_SIZE);
779 while (inputLen > AES_BLOCK_SIZE) {
780 if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
781 return SECFailure;
782 }
783 for (j = 0; j < AES_BLOCK_SIZE; ++j)
784 out[j] ^= in[(int)(j - AES_BLOCK_SIZE)];
785 out -= AES_BLOCK_SIZE;
786 in -= AES_BLOCK_SIZE;
787 inputLen -= AES_BLOCK_SIZE;
788 }
789 if (in == input) {
790 if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
791 return SECFailure;
792 }
793 for (j = 0; j < AES_BLOCK_SIZE; ++j)
794 out[j] ^= cx->iv[j];
795 }
796 memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
797 return SECSuccess;
798 }
799
800 /************************************************************************
801 *
802 * BLAPI Interface functions
803 *
804 * The following functions implement the encryption routines defined in
805 * BLAPI for the AES cipher, Rijndael.
806 *
807 ***********************************************************************/
808
809 AESContext *
AES_AllocateContext(void)810 AES_AllocateContext(void)
811 {
812 return PORT_ZNewAligned(AESContext, 16, mem);
813 }
814
815 /*
816 ** Initialize a new AES context suitable for AES encryption/decryption in
817 ** the ECB or CBC mode.
818 ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
819 */
820 static SECStatus
aes_InitContext(AESContext * cx,const unsigned char * key,unsigned int keysize,const unsigned char * iv,int mode,unsigned int encrypt)821 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
822 const unsigned char *iv, int mode, unsigned int encrypt)
823 {
824 unsigned int Nk;
825 PRBool use_hw_aes;
826 /* According to AES, block lengths are 128 and key lengths are 128, 192, or
827 * 256 bits. We support other key sizes as well [128, 256] as long as the
828 * length in bytes is divisible by 4.
829 */
830
831 if (key == NULL ||
832 keysize < AES_BLOCK_SIZE ||
833 keysize > 32 ||
834 keysize % 4 != 0) {
835 PORT_SetError(SEC_ERROR_INVALID_ARGS);
836 return SECFailure;
837 }
838 if (mode != NSS_AES && mode != NSS_AES_CBC) {
839 PORT_SetError(SEC_ERROR_INVALID_ARGS);
840 return SECFailure;
841 }
842 if (mode == NSS_AES_CBC && iv == NULL) {
843 PORT_SetError(SEC_ERROR_INVALID_ARGS);
844 return SECFailure;
845 }
846 if (!cx) {
847 PORT_SetError(SEC_ERROR_INVALID_ARGS);
848 return SECFailure;
849 }
850 use_hw_aes = aesni_support() && (keysize % 8) == 0;
851 /* Nb = (block size in bits) / 32 */
852 cx->Nb = AES_BLOCK_SIZE / 4;
853 /* Nk = (key size in bits) / 32 */
854 Nk = keysize / 4;
855 /* Obtain number of rounds from "table" */
856 cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
857 /* copy in the iv, if neccessary */
858 if (mode == NSS_AES_CBC) {
859 memcpy(cx->iv, iv, AES_BLOCK_SIZE);
860 #ifdef USE_HW_AES
861 if (use_hw_aes) {
862 cx->worker = (freeblCipherFunc)
863 intel_aes_cbc_worker(encrypt, keysize);
864 } else
865 #endif
866 {
867 cx->worker = (freeblCipherFunc)(encrypt
868 ? &rijndael_encryptCBC
869 : &rijndael_decryptCBC);
870 }
871 } else {
872 #ifdef USE_HW_AES
873 if (use_hw_aes) {
874 cx->worker = (freeblCipherFunc)
875 intel_aes_ecb_worker(encrypt, keysize);
876 } else
877 #endif
878 {
879 cx->worker = (freeblCipherFunc)(encrypt
880 ? &rijndael_encryptECB
881 : &rijndael_decryptECB);
882 }
883 }
884 PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
885 if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
886 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
887 return SECFailure;
888 }
889 #ifdef USE_HW_AES
890 if (use_hw_aes) {
891 intel_aes_init(encrypt, keysize);
892 } else
893 #endif
894 {
895 /* Generate expanded key */
896 if (encrypt) {
897 if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES ||
898 cx->mode == NSS_AES_CTR)) {
899 PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);
900 /* Prepare hardware key for normal AES parameters. */
901 rijndael_native_key_expansion(cx, key, Nk);
902 } else {
903 rijndael_key_expansion(cx, key, Nk);
904 }
905 } else {
906 rijndael_invkey_expansion(cx, key, Nk);
907 }
908 }
909 cx->worker_cx = cx;
910 cx->destroy = NULL;
911 cx->isBlock = PR_TRUE;
912 return SECSuccess;
913 }
914
915 SECStatus
AES_InitContext(AESContext * cx,const unsigned char * key,unsigned int keysize,const unsigned char * iv,int mode,unsigned int encrypt,unsigned int blocksize)916 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
917 const unsigned char *iv, int mode, unsigned int encrypt,
918 unsigned int blocksize)
919 {
920 int basemode = mode;
921 PRBool baseencrypt = encrypt;
922 SECStatus rv;
923
924 if (blocksize != AES_BLOCK_SIZE) {
925 PORT_SetError(SEC_ERROR_INVALID_ARGS);
926 return SECFailure;
927 }
928
929 switch (mode) {
930 case NSS_AES_CTS:
931 basemode = NSS_AES_CBC;
932 break;
933 case NSS_AES_GCM:
934 case NSS_AES_CTR:
935 basemode = NSS_AES;
936 baseencrypt = PR_TRUE;
937 break;
938 }
939 /* Make sure enough is initialized so we can safely call Destroy. */
940 cx->worker_cx = NULL;
941 cx->destroy = NULL;
942 cx->mode = mode;
943 rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt);
944 if (rv != SECSuccess) {
945 AES_DestroyContext(cx, PR_FALSE);
946 return rv;
947 }
948
949 /* finally, set up any mode specific contexts */
950 switch (mode) {
951 case NSS_AES_CTS:
952 cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv);
953 cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate);
954 cx->destroy = (freeblDestroyFunc)CTS_DestroyContext;
955 cx->isBlock = PR_FALSE;
956 break;
957 case NSS_AES_GCM:
958 #if defined(INTEL_GCM) && defined(USE_HW_AES)
959 if (aesni_support() && (keysize % 8) == 0 && avx_support() &&
960 clmul_support()) {
961 cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv);
962 cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate
963 : intel_AES_GCM_DecryptUpdate);
964 cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext;
965 cx->isBlock = PR_FALSE;
966 } else
967 #endif
968 {
969 cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv);
970 cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate
971 : GCM_DecryptUpdate);
972 cx->destroy = (freeblDestroyFunc)GCM_DestroyContext;
973 cx->isBlock = PR_FALSE;
974 }
975 break;
976 case NSS_AES_CTR:
977 cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv);
978 #if defined(USE_HW_AES) && defined(_MSC_VER)
979 if (aesni_support() && (keysize % 8) == 0) {
980 cx->worker = (freeblCipherFunc)CTR_Update_HW_AES;
981 } else
982 #endif
983 {
984 cx->worker = (freeblCipherFunc)CTR_Update;
985 }
986 cx->destroy = (freeblDestroyFunc)CTR_DestroyContext;
987 cx->isBlock = PR_FALSE;
988 break;
989 default:
990 /* everything has already been set up by aes_InitContext, just
991 * return */
992 return SECSuccess;
993 }
994 /* check to see if we succeeded in getting the worker context */
995 if (cx->worker_cx == NULL) {
996 /* no, just destroy the existing context */
997 cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
998 /* below that this isn't necessary */
999 AES_DestroyContext(cx, PR_FALSE);
1000 return SECFailure;
1001 }
1002 return SECSuccess;
1003 }
1004
1005 /* AES_CreateContext
1006 *
1007 * create a new context for Rijndael operations
1008 */
1009 AESContext *
AES_CreateContext(const unsigned char * key,const unsigned char * iv,int mode,int encrypt,unsigned int keysize,unsigned int blocksize)1010 AES_CreateContext(const unsigned char *key, const unsigned char *iv,
1011 int mode, int encrypt,
1012 unsigned int keysize, unsigned int blocksize)
1013 {
1014 AESContext *cx = AES_AllocateContext();
1015 if (cx) {
1016 SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
1017 blocksize);
1018 if (rv != SECSuccess) {
1019 AES_DestroyContext(cx, PR_TRUE);
1020 cx = NULL;
1021 }
1022 }
1023 return cx;
1024 }
1025
1026 /*
1027 * AES_DestroyContext
1028 *
1029 * Zero an AES cipher context. If freeit is true, also free the pointer
1030 * to the context.
1031 */
1032 void
AES_DestroyContext(AESContext * cx,PRBool freeit)1033 AES_DestroyContext(AESContext *cx, PRBool freeit)
1034 {
1035 if (cx->worker_cx && cx->destroy) {
1036 (*cx->destroy)(cx->worker_cx, PR_TRUE);
1037 cx->worker_cx = NULL;
1038 cx->destroy = NULL;
1039 }
1040 if (freeit) {
1041 PORT_Free(cx->mem);
1042 }
1043 }
1044
1045 /*
1046 * AES_Encrypt
1047 *
1048 * Encrypt an arbitrary-length buffer. The output buffer must already be
1049 * allocated to at least inputLen.
1050 */
1051 SECStatus
AES_Encrypt(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen)1052 AES_Encrypt(AESContext *cx, unsigned char *output,
1053 unsigned int *outputLen, unsigned int maxOutputLen,
1054 const unsigned char *input, unsigned int inputLen)
1055 {
1056 /* Check args */
1057 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
1058 PORT_SetError(SEC_ERROR_INVALID_ARGS);
1059 return SECFailure;
1060 }
1061 if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
1062 PORT_SetError(SEC_ERROR_INPUT_LEN);
1063 return SECFailure;
1064 }
1065 if (maxOutputLen < inputLen) {
1066 PORT_SetError(SEC_ERROR_OUTPUT_LEN);
1067 return SECFailure;
1068 }
1069 *outputLen = inputLen;
1070 #if UINT_MAX > MP_32BIT_MAX
1071 /*
1072 * we can guarentee that GSM won't overlfow if we limit the input to
1073 * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
1074 *
1075 * We do it here to cover both hardware and software GCM operations.
1076 */
1077 {
1078 PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
1079 }
1080 if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {
1081 PORT_SetError(SEC_ERROR_OUTPUT_LEN);
1082 return SECFailure;
1083 }
1084 #else
1085 /* if we can't pass in a 32_bit number, then no such check needed */
1086 {
1087 PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
1088 }
1089 #endif
1090
1091 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
1092 input, inputLen, AES_BLOCK_SIZE);
1093 }
1094
1095 /*
1096 * AES_Decrypt
1097 *
1098 * Decrypt and arbitrary-length buffer. The output buffer must already be
1099 * allocated to at least inputLen.
1100 */
1101 SECStatus
AES_Decrypt(AESContext * cx,unsigned char * output,unsigned int * outputLen,unsigned int maxOutputLen,const unsigned char * input,unsigned int inputLen)1102 AES_Decrypt(AESContext *cx, unsigned char *output,
1103 unsigned int *outputLen, unsigned int maxOutputLen,
1104 const unsigned char *input, unsigned int inputLen)
1105 {
1106 /* Check args */
1107 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
1108 PORT_SetError(SEC_ERROR_INVALID_ARGS);
1109 return SECFailure;
1110 }
1111 if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
1112 PORT_SetError(SEC_ERROR_INPUT_LEN);
1113 return SECFailure;
1114 }
1115 if (maxOutputLen < inputLen) {
1116 PORT_SetError(SEC_ERROR_OUTPUT_LEN);
1117 return SECFailure;
1118 }
1119 *outputLen = inputLen;
1120 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
1121 input, inputLen, AES_BLOCK_SIZE);
1122 }
1123