1 /*-
2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
5 *
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
9 */
10
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 *
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
24 * distribution.
25 *
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30 *
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
35 *
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
39 *
40 * 6. Redistributions of any form whatsoever must retain the following
41 * acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
58 *
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
62 *
63 */
64
65 #include <stdio.h>
66 #include <string.h>
67
68 #include <openssl/opensslconf.h>
69 #include <openssl/crypto.h>
70 #include <openssl/dso.h>
71 #include <openssl/engine.h>
72 #include <openssl/evp.h>
73 #ifndef OPENSSL_NO_AES
74 # include <openssl/aes.h>
75 #endif
76 #include <openssl/rand.h>
77 #include <openssl/err.h>
78
79 #ifndef OPENSSL_NO_HW
80 # ifndef OPENSSL_NO_HW_PADLOCK
81
82 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83 # if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
85 # define DYNAMIC_ENGINE
86 # endif
87 # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88 # ifdef ENGINE_DYNAMIC_SUPPORT
89 # define DYNAMIC_ENGINE
90 # endif
91 # else
92 # error "Only OpenSSL >= 0.9.7 is supported"
93 # endif
94
95 /*
96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98 *
99 * In addition, because of the heavy use of inline assembler, compiler choice
100 * is limited to GCC and Microsoft C.
101 */
102 # undef COMPILE_HW_PADLOCK
103 # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86))
106 # define COMPILE_HW_PADLOCK
107 # endif
108 # endif
109
110 # ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 # ifdef COMPILE_HW_PADLOCK
112 static ENGINE *ENGINE_padlock(void);
113 # endif
114
ENGINE_load_padlock(void)115 void ENGINE_load_padlock(void)
116 {
117 /* On non-x86 CPUs it just returns. */
118 # ifdef COMPILE_HW_PADLOCK
119 ENGINE *toadd = ENGINE_padlock();
120 if (!toadd)
121 return;
122 ENGINE_add(toadd);
123 ENGINE_free(toadd);
124 ERR_clear_error();
125 # endif
126 }
127
128 # endif
129
130 # ifdef COMPILE_HW_PADLOCK
131 /*
132 * We do these includes here to avoid header problems on platforms that do
133 * not have the VIA padlock anyway...
134 */
135 # include <stdlib.h>
136 # ifdef _WIN32
137 # include <malloc.h>
138 # ifndef alloca
139 # define alloca _alloca
140 # endif
141 # elif defined(__GNUC__)
142 # ifndef alloca
143 # define alloca(s) __builtin_alloca(s)
144 # endif
145 # endif
146
147 /* Function for ENGINE detection and control */
148 static int padlock_available(void);
149 static int padlock_init(ENGINE *e);
150
151 /* RNG Stuff */
152 static RAND_METHOD padlock_rand;
153
154 /* Cipher Stuff */
155 # ifndef OPENSSL_NO_AES
156 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157 const int **nids, int nid);
158 # endif
159
160 /* Engine names */
161 static const char *padlock_id = "padlock";
162 static char padlock_name[100];
163
164 /* Available features */
165 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166 static int padlock_use_rng = 0; /* Random Number Generator */
167 # ifndef OPENSSL_NO_AES
168 static int padlock_aes_align_required = 1;
169 # endif
170
171 /* ===== Engine "management" functions ===== */
172
173 /* Prepare the ENGINE structure for registration */
padlock_bind_helper(ENGINE * e)174 static int padlock_bind_helper(ENGINE *e)
175 {
176 /* Check available features */
177 padlock_available();
178
179 # if 1 /* disable RNG for now, see commentary in
180 * vicinity of RNG code */
181 padlock_use_rng = 0;
182 # endif
183
184 /* Generate a nice engine name with available features */
185 BIO_snprintf(padlock_name, sizeof(padlock_name),
186 "VIA PadLock (%s, %s)",
187 padlock_use_rng ? "RNG" : "no-RNG",
188 padlock_use_ace ? "ACE" : "no-ACE");
189
190 /* Register everything or return with an error */
191 if (!ENGINE_set_id(e, padlock_id) ||
192 !ENGINE_set_name(e, padlock_name) ||
193 !ENGINE_set_init_function(e, padlock_init) ||
194 # ifndef OPENSSL_NO_AES
195 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
196 # endif
197 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
198 return 0;
199 }
200
201 /* Everything looks good */
202 return 1;
203 }
204
205 # ifdef OPENSSL_NO_DYNAMIC_ENGINE
206
207 /* Constructor */
ENGINE_padlock(void)208 static ENGINE *ENGINE_padlock(void)
209 {
210 ENGINE *eng = ENGINE_new();
211
212 if (!eng) {
213 return NULL;
214 }
215
216 if (!padlock_bind_helper(eng)) {
217 ENGINE_free(eng);
218 return NULL;
219 }
220
221 return eng;
222 }
223
224 # endif
225
226 /* Check availability of the engine */
padlock_init(ENGINE * e)227 static int padlock_init(ENGINE *e)
228 {
229 return (padlock_use_rng || padlock_use_ace);
230 }
231
232 /*
233 * This stuff is needed if this ENGINE is being compiled into a
234 * self-contained shared-library.
235 */
236 # ifdef DYNAMIC_ENGINE
padlock_bind_fn(ENGINE * e,const char * id)237 static int padlock_bind_fn(ENGINE *e, const char *id)
238 {
239 if (id && (strcmp(id, padlock_id) != 0)) {
240 return 0;
241 }
242
243 if (!padlock_bind_helper(e)) {
244 return 0;
245 }
246
247 return 1;
248 }
249
250 IMPLEMENT_DYNAMIC_CHECK_FN()
251 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252 # endif /* DYNAMIC_ENGINE */
253 /* ===== Here comes the "real" engine ===== */
254 # ifndef OPENSSL_NO_AES
255 /* Some AES-related constants */
256 # define AES_BLOCK_SIZE 16
257 # define AES_KEY_SIZE_128 16
258 # define AES_KEY_SIZE_192 24
259 # define AES_KEY_SIZE_256 32
260 /*
261 * Here we store the status information relevant to the current context.
262 */
263 /*
264 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265 * the order of items in this structure. Don't blindly modify, reorder,
266 * etc!
267 */
268 struct padlock_cipher_data {
269 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270 union {
271 unsigned int pad[4];
272 struct {
273 int rounds:4;
274 int dgst:1; /* n/a in C3 */
275 int align:1; /* n/a in C3 */
276 int ciphr:1; /* n/a in C3 */
277 unsigned int keygen:1;
278 int interm:1;
279 unsigned int encdec:1;
280 int ksize:2;
281 } b;
282 } cword; /* Control word */
283 AES_KEY ks; /* Encryption key */
284 };
285
286 /*
287 * Essentially this variable belongs in thread local storage.
288 * Having this variable global on the other hand can only cause
289 * few bogus key reloads [if any at all on single-CPU system],
290 * so we accept the penatly...
291 */
292 static volatile struct padlock_cipher_data *padlock_saved_context;
293 # endif
294
295 /*-
296 * =======================================================
297 * Inline assembler section(s).
298 * =======================================================
299 * Order of arguments is chosen to facilitate Windows port
300 * using __fastcall calling convention. If you wish to add
301 * more routines, keep in mind that first __fastcall
302 * argument is passed in %ecx and second - in %edx.
303 * =======================================================
304 */
305 # if defined(__GNUC__) && __GNUC__>=2
306 /*
307 * As for excessive "push %ebx"/"pop %ebx" found all over.
308 * When generating position-independent code GCC won't let
309 * us use "b" in assembler templates nor even respect "ebx"
310 * in "clobber description." Therefore the trouble...
311 */
312
313 /*
314 * Helper function - check if a CPUID instruction is available on this CPU
315 */
padlock_insn_cpuid_available(void)316 static int padlock_insn_cpuid_available(void)
317 {
318 int result = -1;
319
320 /*
321 * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322 * CPUID is available.
323 */
324 asm volatile ("pushf\n"
325 "popl %%eax\n"
326 "xorl $0x200000, %%eax\n"
327 "movl %%eax, %%ecx\n"
328 "andl $0x200000, %%ecx\n"
329 "pushl %%eax\n"
330 "popf\n"
331 "pushf\n"
332 "popl %%eax\n"
333 "andl $0x200000, %%eax\n"
334 "xorl %%eax, %%ecx\n"
335 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
336
337 return (result == 0);
338 }
339
340 /*
341 * Load supported features of the CPU to see if the PadLock is available.
342 */
padlock_available(void)343 static int padlock_available(void)
344 {
345 char vendor_string[16];
346 unsigned int eax, edx;
347
348 /* First check if the CPUID instruction is available at all... */
349 if (!padlock_insn_cpuid_available())
350 return 0;
351
352 /* Are we running on the Centaur (VIA) CPU? */
353 eax = 0x00000000;
354 vendor_string[12] = 0;
355 asm volatile ("pushl %%ebx\n"
356 "cpuid\n"
357 "movl %%ebx,(%%edi)\n"
358 "movl %%edx,4(%%edi)\n"
359 "movl %%ecx,8(%%edi)\n"
360 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361 if (strcmp(vendor_string, "CentaurHauls") != 0)
362 return 0;
363
364 /* Check for Centaur Extended Feature Flags presence */
365 eax = 0xC0000000;
366 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367 if (eax < 0xC0000001)
368 return 0;
369
370 /* Read the Centaur Extended Feature Flags */
371 eax = 0xC0000001;
372 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
373 "=d"(edx)::"ecx");
374
375 /* Fill up some flags */
376 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
378
379 return padlock_use_ace + padlock_use_rng;
380 }
381
382 # ifndef OPENSSL_NO_AES
383 # ifndef AES_ASM
384 /* Our own htonl()/ntohl() */
padlock_bswapl(AES_KEY * ks)385 static inline void padlock_bswapl(AES_KEY *ks)
386 {
387 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388 unsigned int *key = ks->rd_key;
389
390 while (i--) {
391 asm volatile ("bswapl %0":"+r" (*key));
392 key++;
393 }
394 }
395 # endif
396 # endif
397
398 /*
399 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400 * stack clears EFLAGS[30] which does the trick.
401 */
padlock_reload_key(void)402 static inline void padlock_reload_key(void)
403 {
404 asm volatile ("pushfl; popfl");
405 }
406
407 # ifndef OPENSSL_NO_AES
408 /*
409 * This is heuristic key context tracing. At first one
410 * believes that one should use atomic swap instructions,
411 * but it's not actually necessary. Point is that if
412 * padlock_saved_context was changed by another thread
413 * after we've read it and before we compare it with cdata,
414 * our key *shall* be reloaded upon thread context switch
415 * and we are therefore set in either case...
416 */
padlock_verify_context(struct padlock_cipher_data * cdata)417 static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
418 {
419 asm volatile ("pushfl\n"
420 " btl $30,(%%esp)\n"
421 " jnc 1f\n"
422 " cmpl %2,%1\n"
423 " je 1f\n"
424 " popfl\n"
425 " subl $4,%%esp\n"
426 "1: addl $4,%%esp\n"
427 " movl %2,%0":"+m" (padlock_saved_context)
428 :"r"(padlock_saved_context), "r"(cdata):"cc");
429 }
430
431 /* Template for padlock_xcrypt_* modes */
432 /*
433 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434 * of the 'padlock_cipher_data' structure.
435 */
436 # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
437 static inline void *name(size_t cnt, \
438 struct padlock_cipher_data *cdata, \
439 void *out, const void *inp) \
440 { void *iv; \
441 asm volatile ( "pushl %%ebx\n" \
442 " leal 16(%0),%%edx\n" \
443 " leal 32(%0),%%ebx\n" \
444 rep_xcrypt "\n" \
445 " popl %%ebx" \
446 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
448 : "edx", "cc", "memory"); \
449 return iv; \
450 }
451
452 /* Generate all functions with appropriate opcodes */
453 /* rep xcryptecb */
454 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
455 /* rep xcryptcbc */
456 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
457 /* rep xcryptcfb */
458 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
459 /* rep xcryptofb */
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
461 # endif
462 /* The RNG call itself */
padlock_xstore(void * addr,unsigned int edx_in)463 static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
464 {
465 unsigned int eax_out;
466
467 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468 :"=a" (eax_out), "=m"(*(unsigned *)addr)
469 :"D"(addr), "d"(edx_in)
470 );
471
472 return eax_out;
473 }
474
475 /*
476 * Why not inline 'rep movsd'? I failed to find information on what value in
477 * Direction Flag one can expect and consequently have to apply
478 * "better-safe-than-sorry" approach and assume "undefined." I could
479 * explicitly clear it and restore the original value upon return from
480 * padlock_aes_cipher, but it's presumably too much trouble for too little
481 * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482 * affected by the Direction Flag and pointers advance toward larger
483 * addresses unconditionally.
484 */
padlock_memcpy(void * dst,const void * src,size_t n)485 static inline unsigned char *padlock_memcpy(void *dst, const void *src,
486 size_t n)
487 {
488 long *d = dst;
489 const long *s = src;
490
491 n /= sizeof(*d);
492 do {
493 *d++ = *s++;
494 } while (--n);
495
496 return dst;
497 }
498
499 # elif defined(_MSC_VER)
500 /*
501 * Unlike GCC these are real functions. In order to minimize impact
502 * on performance we adhere to __fastcall calling convention in
503 * order to get two first arguments passed through %ecx and %edx.
504 * Which kind of suits very well, as instructions in question use
505 * both %ecx and %edx as input:-)
506 */
507 # define REP_XCRYPT(code) \
508 _asm _emit 0xf3 \
509 _asm _emit 0x0f _asm _emit 0xa7 \
510 _asm _emit code
511
512 /*
513 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514 * of the 'padlock_cipher_data' structure.
515 */
516 # define PADLOCK_XCRYPT_ASM(name,code) \
517 static void * __fastcall \
518 name (size_t cnt, void *cdata, \
519 void *outp, const void *inp) \
520 { _asm mov eax,edx \
521 _asm lea edx,[eax+16] \
522 _asm lea ebx,[eax+32] \
523 _asm mov edi,outp \
524 _asm mov esi,inp \
525 REP_XCRYPT(code) \
526 }
527
528 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8)
529 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0)
530 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0)
531 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8)
532
padlock_xstore(void * outp,unsigned int code)533 static int __fastcall padlock_xstore(void *outp, unsigned int code)
534 {
535 _asm mov edi,ecx
536 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0
537 }
538
padlock_reload_key(void)539 static void __fastcall padlock_reload_key(void)
540 {
541 _asm pushfd
542 _asm popfd
543 }
544
padlock_verify_context(void * cdata)545 static void __fastcall padlock_verify_context(void *cdata)
546 {
547 _asm {
548 pushfd
549 bt DWORD PTR[esp],30
550 jnc skip
551 cmp ecx,padlock_saved_context
552 je skip
553 popfd
554 sub esp,4
555 skip: add esp,4
556 mov padlock_saved_context,ecx
557 }
558 }
559
560 static int
padlock_available(void)561 padlock_available(void)
562 {
563 _asm {
564 pushfd
565 pop eax
566 mov ecx,eax
567 xor eax,1<<21
568 push eax
569 popfd
570 pushfd
571 pop eax
572 xor eax,ecx
573 bt eax,21
574 jnc noluck
575 mov eax,0
576 cpuid
577 xor eax,eax
578 cmp ebx,'tneC'
579 jne noluck
580 cmp edx,'Hrua'
581 jne noluck
582 cmp ecx,'slua'
583 jne noluck
584 mov eax,0xC0000000
585 cpuid
586 mov edx,eax
587 xor eax,eax
588 cmp edx,0xC0000001
589 jb noluck
590 mov eax,0xC0000001
591 cpuid
592 xor eax,eax
593 bt edx,6
594 jnc skip_a
595 bt edx,7
596 jnc skip_a
597 mov padlock_use_ace,1
598 inc eax
599 skip_a: bt edx,2
600 jnc skip_r
601 bt edx,3
602 jnc skip_r
603 mov padlock_use_rng,1
604 inc eax
605 skip_r:
606 noluck:
607 }
608 }
609
padlock_bswapl(void * key)610 static void __fastcall padlock_bswapl(void *key)
611 {
612 _asm {
613 pushfd
614 cld
615 mov esi,ecx
616 mov edi,ecx
617 mov ecx,60
618 up: lodsd
619 bswap eax
620 stosd
621 loop up
622 popfd
623 }
624 }
625
626 /*
627 * MS actually specifies status of Direction Flag and compiler even manages
628 * to compile following as 'rep movsd' all by itself...
629 */
630 # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
631 # endif
632 /* ===== AES encryption/decryption ===== */
633 # ifndef OPENSSL_NO_AES
634 # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
635 # define NID_aes_128_cfb NID_aes_128_cfb128
636 # endif
637 # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
638 # define NID_aes_128_ofb NID_aes_128_ofb128
639 # endif
640 # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
641 # define NID_aes_192_cfb NID_aes_192_cfb128
642 # endif
643 # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
644 # define NID_aes_192_ofb NID_aes_192_ofb128
645 # endif
646 # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
647 # define NID_aes_256_cfb NID_aes_256_cfb128
648 # endif
649 # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
650 # define NID_aes_256_ofb NID_aes_256_ofb128
651 # endif
652 /*
653 * List of supported ciphers.
654 */ static int padlock_cipher_nids[] = {
655 NID_aes_128_ecb,
656 NID_aes_128_cbc,
657 NID_aes_128_cfb,
658 NID_aes_128_ofb,
659
660 NID_aes_192_ecb,
661 NID_aes_192_cbc,
662 NID_aes_192_cfb,
663 NID_aes_192_ofb,
664
665 NID_aes_256_ecb,
666 NID_aes_256_cbc,
667 NID_aes_256_cfb,
668 NID_aes_256_ofb,
669 };
670
671 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
672 sizeof(padlock_cipher_nids[0]));
673
674 /* Function prototypes ... */
675 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
676 const unsigned char *iv, int enc);
677 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
678 const unsigned char *in, size_t nbytes);
679
680 # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
681 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
682 # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
683 NEAREST_ALIGNED(ctx->cipher_data))
684
685 # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
686 # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
687 # define EVP_CIPHER_block_size_OFB 1
688 # define EVP_CIPHER_block_size_CFB 1
689
690 /*
691 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
692 * of preprocessor magic :-)
693 */
694 # define DECLARE_AES_EVP(ksize,lmode,umode) \
695 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
696 NID_aes_##ksize##_##lmode, \
697 EVP_CIPHER_block_size_##umode, \
698 AES_KEY_SIZE_##ksize, \
699 AES_BLOCK_SIZE, \
700 0 | EVP_CIPH_##umode##_MODE, \
701 padlock_aes_init_key, \
702 padlock_aes_cipher, \
703 NULL, \
704 sizeof(struct padlock_cipher_data) + 16, \
705 EVP_CIPHER_set_asn1_iv, \
706 EVP_CIPHER_get_asn1_iv, \
707 NULL, \
708 NULL \
709 }
710
711 DECLARE_AES_EVP(128, ecb, ECB);
712 DECLARE_AES_EVP(128, cbc, CBC);
713 DECLARE_AES_EVP(128, cfb, CFB);
714 DECLARE_AES_EVP(128, ofb, OFB);
715
716 DECLARE_AES_EVP(192, ecb, ECB);
717 DECLARE_AES_EVP(192, cbc, CBC);
718 DECLARE_AES_EVP(192, cfb, CFB);
719 DECLARE_AES_EVP(192, ofb, OFB);
720
721 DECLARE_AES_EVP(256, ecb, ECB);
722 DECLARE_AES_EVP(256, cbc, CBC);
723 DECLARE_AES_EVP(256, cfb, CFB);
724 DECLARE_AES_EVP(256, ofb, OFB);
725
726 static int
padlock_ciphers(ENGINE * e,const EVP_CIPHER ** cipher,const int ** nids,int nid)727 padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
728 int nid)
729 {
730 /* No specific cipher => return a list of supported nids ... */
731 if (!cipher) {
732 *nids = padlock_cipher_nids;
733 return padlock_cipher_nids_num;
734 }
735
736 /* ... or the requested "cipher" otherwise */
737 switch (nid) {
738 case NID_aes_128_ecb:
739 *cipher = &padlock_aes_128_ecb;
740 break;
741 case NID_aes_128_cbc:
742 *cipher = &padlock_aes_128_cbc;
743 break;
744 case NID_aes_128_cfb:
745 *cipher = &padlock_aes_128_cfb;
746 break;
747 case NID_aes_128_ofb:
748 *cipher = &padlock_aes_128_ofb;
749 break;
750
751 case NID_aes_192_ecb:
752 *cipher = &padlock_aes_192_ecb;
753 break;
754 case NID_aes_192_cbc:
755 *cipher = &padlock_aes_192_cbc;
756 break;
757 case NID_aes_192_cfb:
758 *cipher = &padlock_aes_192_cfb;
759 break;
760 case NID_aes_192_ofb:
761 *cipher = &padlock_aes_192_ofb;
762 break;
763
764 case NID_aes_256_ecb:
765 *cipher = &padlock_aes_256_ecb;
766 break;
767 case NID_aes_256_cbc:
768 *cipher = &padlock_aes_256_cbc;
769 break;
770 case NID_aes_256_cfb:
771 *cipher = &padlock_aes_256_cfb;
772 break;
773 case NID_aes_256_ofb:
774 *cipher = &padlock_aes_256_ofb;
775 break;
776
777 default:
778 /* Sorry, we don't support this NID */
779 *cipher = NULL;
780 return 0;
781 }
782
783 return 1;
784 }
785
786 /* Prepare the encryption key for PadLock usage */
787 static int
padlock_aes_init_key(EVP_CIPHER_CTX * ctx,const unsigned char * key,const unsigned char * iv,int enc)788 padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
789 const unsigned char *iv, int enc)
790 {
791 struct padlock_cipher_data *cdata;
792 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
793
794 if (key == NULL)
795 return 0; /* ERROR */
796
797 cdata = ALIGNED_CIPHER_DATA(ctx);
798 memset(cdata, 0, sizeof(struct padlock_cipher_data));
799
800 /* Prepare Control word. */
801 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
802 cdata->cword.b.encdec = 0;
803 else
804 cdata->cword.b.encdec = (ctx->encrypt == 0);
805 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
806 cdata->cword.b.ksize = (key_len - 128) / 64;
807
808 switch (key_len) {
809 case 128:
810 /*
811 * PadLock can generate an extended key for AES128 in hardware
812 */
813 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
814 cdata->cword.b.keygen = 0;
815 break;
816
817 case 192:
818 case 256:
819 /*
820 * Generate an extended AES key in software. Needed for AES192/AES256
821 */
822 /*
823 * Well, the above applies to Stepping 8 CPUs and is listed as
824 * hardware errata. They most likely will fix it at some point and
825 * then a check for stepping would be due here.
826 */
827 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
828 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
829 AES_set_encrypt_key(key, key_len, &cdata->ks);
830 else
831 AES_set_decrypt_key(key, key_len, &cdata->ks);
832 # ifndef AES_ASM
833 /*
834 * OpenSSL C functions use byte-swapped extended key.
835 */
836 padlock_bswapl(&cdata->ks);
837 # endif
838 cdata->cword.b.keygen = 1;
839 break;
840
841 default:
842 /* ERROR */
843 return 0;
844 }
845
846 /*
847 * This is done to cover for cases when user reuses the
848 * context for new key. The catch is that if we don't do
849 * this, padlock_eas_cipher might proceed with old key...
850 */
851 padlock_reload_key();
852
853 return 1;
854 }
855
856 /*-
857 * Simplified version of padlock_aes_cipher() used when
858 * 1) both input and output buffers are at aligned addresses.
859 * or when
860 * 2) running on a newer CPU that doesn't require aligned buffers.
861 */
862 static int
padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX * ctx,unsigned char * out_arg,const unsigned char * in_arg,size_t nbytes)863 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
864 const unsigned char *in_arg, size_t nbytes)
865 {
866 struct padlock_cipher_data *cdata;
867 void *iv;
868
869 cdata = ALIGNED_CIPHER_DATA(ctx);
870 padlock_verify_context(cdata);
871
872 switch (EVP_CIPHER_CTX_mode(ctx)) {
873 case EVP_CIPH_ECB_MODE:
874 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
875 break;
876
877 case EVP_CIPH_CBC_MODE:
878 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
879 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
880 in_arg);
881 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
882 break;
883
884 case EVP_CIPH_CFB_MODE:
885 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
886 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
887 in_arg);
888 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
889 break;
890
891 case EVP_CIPH_OFB_MODE:
892 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
893 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
894 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
895 break;
896
897 default:
898 return 0;
899 }
900
901 memset(cdata->iv, 0, AES_BLOCK_SIZE);
902
903 return 1;
904 }
905
906 # ifndef PADLOCK_CHUNK
907 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
908 # endif
909 # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
910 # error "insane PADLOCK_CHUNK..."
911 # endif
912
913 /*
914 * Re-align the arguments to 16-Bytes boundaries and run the encryption
915 * function itself. This function is not AES-specific.
916 */
917 static int
padlock_aes_cipher(EVP_CIPHER_CTX * ctx,unsigned char * out_arg,const unsigned char * in_arg,size_t nbytes)918 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
919 const unsigned char *in_arg, size_t nbytes)
920 {
921 struct padlock_cipher_data *cdata;
922 const void *inp;
923 unsigned char *out;
924 void *iv;
925 int inp_misaligned, out_misaligned, realign_in_loop;
926 size_t chunk, allocated = 0;
927
928 /*
929 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
930 */
931 if ((chunk = ctx->num)) { /* borrow chunk variable */
932 unsigned char *ivp = ctx->iv;
933
934 switch (EVP_CIPHER_CTX_mode(ctx)) {
935 case EVP_CIPH_CFB_MODE:
936 if (chunk >= AES_BLOCK_SIZE)
937 return 0; /* bogus value */
938
939 if (ctx->encrypt)
940 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
941 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
942 chunk++, nbytes--;
943 } else
944 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
945 unsigned char c = *(in_arg++);
946 *(out_arg++) = c ^ ivp[chunk];
947 ivp[chunk++] = c, nbytes--;
948 }
949
950 ctx->num = chunk % AES_BLOCK_SIZE;
951 break;
952 case EVP_CIPH_OFB_MODE:
953 if (chunk >= AES_BLOCK_SIZE)
954 return 0; /* bogus value */
955
956 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
957 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
958 chunk++, nbytes--;
959 }
960
961 ctx->num = chunk % AES_BLOCK_SIZE;
962 break;
963 }
964 }
965
966 if (nbytes == 0)
967 return 1;
968 # if 0
969 if (nbytes % AES_BLOCK_SIZE)
970 return 0; /* are we expected to do tail processing? */
971 # else
972 /*
973 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
974 * arbitrary value in byte-oriented modes, such as CFB and OFB...
975 */
976 # endif
977
978 /*
979 * VIA promises CPUs that won't require alignment in the future. For now
980 * padlock_aes_align_required is initialized to 1 and the condition is
981 * never met...
982 */
983 /*
984 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
985 * performance penalties appear to be approximately same as for software
986 * alignment below or ~3x. They promise to improve it in the future, but
987 * for now we can just as well pretend that it can only handle aligned
988 * input...
989 */
990 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
991 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
992
993 inp_misaligned = (((size_t)in_arg) & 0x0F);
994 out_misaligned = (((size_t)out_arg) & 0x0F);
995
996 /*
997 * Note that even if output is aligned and input not, I still prefer to
998 * loop instead of copy the whole input and then encrypt in one stroke.
999 * This is done in order to improve L1 cache utilization...
1000 */
1001 realign_in_loop = out_misaligned | inp_misaligned;
1002
1003 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
1004 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
1005
1006 /* this takes one "if" out of the loops */
1007 chunk = nbytes;
1008 chunk %= PADLOCK_CHUNK;
1009 if (chunk == 0)
1010 chunk = PADLOCK_CHUNK;
1011
1012 if (out_misaligned) {
1013 /* optmize for small input */
1014 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
1015 out = alloca(0x10 + allocated);
1016 out = NEAREST_ALIGNED(out);
1017 } else
1018 out = out_arg;
1019
1020 cdata = ALIGNED_CIPHER_DATA(ctx);
1021 padlock_verify_context(cdata);
1022
1023 switch (EVP_CIPHER_CTX_mode(ctx)) {
1024 case EVP_CIPH_ECB_MODE:
1025 do {
1026 if (inp_misaligned)
1027 inp = padlock_memcpy(out, in_arg, chunk);
1028 else
1029 inp = in_arg;
1030 in_arg += chunk;
1031
1032 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1033
1034 if (out_misaligned)
1035 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1036 else
1037 out = out_arg += chunk;
1038
1039 nbytes -= chunk;
1040 chunk = PADLOCK_CHUNK;
1041 } while (nbytes);
1042 break;
1043
1044 case EVP_CIPH_CBC_MODE:
1045 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1046 goto cbc_shortcut;
1047 do {
1048 if (iv != cdata->iv)
1049 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1050 chunk = PADLOCK_CHUNK;
1051 cbc_shortcut: /* optimize for small input */
1052 if (inp_misaligned)
1053 inp = padlock_memcpy(out, in_arg, chunk);
1054 else
1055 inp = in_arg;
1056 in_arg += chunk;
1057
1058 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1059
1060 if (out_misaligned)
1061 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1062 else
1063 out = out_arg += chunk;
1064
1065 } while (nbytes -= chunk);
1066 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1067 break;
1068
1069 case EVP_CIPH_CFB_MODE:
1070 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1071 chunk &= ~(AES_BLOCK_SIZE - 1);
1072 if (chunk)
1073 goto cfb_shortcut;
1074 else
1075 goto cfb_skiploop;
1076 do {
1077 if (iv != cdata->iv)
1078 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1079 chunk = PADLOCK_CHUNK;
1080 cfb_shortcut: /* optimize for small input */
1081 if (inp_misaligned)
1082 inp = padlock_memcpy(out, in_arg, chunk);
1083 else
1084 inp = in_arg;
1085 in_arg += chunk;
1086
1087 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1088
1089 if (out_misaligned)
1090 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1091 else
1092 out = out_arg += chunk;
1093
1094 nbytes -= chunk;
1095 } while (nbytes >= AES_BLOCK_SIZE);
1096
1097 cfb_skiploop:
1098 if (nbytes) {
1099 unsigned char *ivp = cdata->iv;
1100
1101 if (iv != ivp) {
1102 memcpy(ivp, iv, AES_BLOCK_SIZE);
1103 iv = ivp;
1104 }
1105 ctx->num = nbytes;
1106 if (cdata->cword.b.encdec) {
1107 cdata->cword.b.encdec = 0;
1108 padlock_reload_key();
1109 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1110 cdata->cword.b.encdec = 1;
1111 padlock_reload_key();
1112 while (nbytes) {
1113 unsigned char c = *(in_arg++);
1114 *(out_arg++) = c ^ *ivp;
1115 *(ivp++) = c, nbytes--;
1116 }
1117 } else {
1118 padlock_reload_key();
1119 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1120 padlock_reload_key();
1121 while (nbytes) {
1122 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1123 ivp++, nbytes--;
1124 }
1125 }
1126 }
1127
1128 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1129 break;
1130
1131 case EVP_CIPH_OFB_MODE:
1132 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1133 chunk &= ~(AES_BLOCK_SIZE - 1);
1134 if (chunk)
1135 do {
1136 if (inp_misaligned)
1137 inp = padlock_memcpy(out, in_arg, chunk);
1138 else
1139 inp = in_arg;
1140 in_arg += chunk;
1141
1142 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1143
1144 if (out_misaligned)
1145 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1146 else
1147 out = out_arg += chunk;
1148
1149 nbytes -= chunk;
1150 chunk = PADLOCK_CHUNK;
1151 } while (nbytes >= AES_BLOCK_SIZE);
1152
1153 if (nbytes) {
1154 unsigned char *ivp = cdata->iv;
1155
1156 ctx->num = nbytes;
1157 padlock_reload_key(); /* empirically found */
1158 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1159 padlock_reload_key(); /* empirically found */
1160 while (nbytes) {
1161 *(out_arg++) = *(in_arg++) ^ *ivp;
1162 ivp++, nbytes--;
1163 }
1164 }
1165
1166 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1167 break;
1168
1169 default:
1170 return 0;
1171 }
1172
1173 /* Clean the realign buffer if it was used */
1174 if (out_misaligned) {
1175 volatile unsigned long *p = (void *)out;
1176 size_t n = allocated / sizeof(*p);
1177 while (n--)
1178 *p++ = 0;
1179 }
1180
1181 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1182
1183 return 1;
1184 }
1185
1186 # endif /* OPENSSL_NO_AES */
1187
1188 /* ===== Random Number Generator ===== */
1189 /*
1190 * This code is not engaged. The reason is that it does not comply
1191 * with recommendations for VIA RNG usage for secure applications
1192 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1193 * provide meaningful error control...
1194 */
1195 /*
1196 * Wrapper that provides an interface between the API and the raw PadLock
1197 * RNG
1198 */
padlock_rand_bytes(unsigned char * output,int count)1199 static int padlock_rand_bytes(unsigned char *output, int count)
1200 {
1201 unsigned int eax, buf;
1202
1203 while (count >= 8) {
1204 eax = padlock_xstore(output, 0);
1205 if (!(eax & (1 << 6)))
1206 return 0; /* RNG disabled */
1207 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1208 if (eax & (0x1F << 10))
1209 return 0;
1210 if ((eax & 0x1F) == 0)
1211 continue; /* no data, retry... */
1212 if ((eax & 0x1F) != 8)
1213 return 0; /* fatal failure... */
1214 output += 8;
1215 count -= 8;
1216 }
1217 while (count > 0) {
1218 eax = padlock_xstore(&buf, 3);
1219 if (!(eax & (1 << 6)))
1220 return 0; /* RNG disabled */
1221 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1222 if (eax & (0x1F << 10))
1223 return 0;
1224 if ((eax & 0x1F) == 0)
1225 continue; /* no data, retry... */
1226 if ((eax & 0x1F) != 1)
1227 return 0; /* fatal failure... */
1228 *output++ = (unsigned char)buf;
1229 count--;
1230 }
1231 *(volatile unsigned int *)&buf = 0;
1232
1233 return 1;
1234 }
1235
1236 /* Dummy but necessary function */
padlock_rand_status(void)1237 static int padlock_rand_status(void)
1238 {
1239 return 1;
1240 }
1241
1242 /* Prepare structure for registration */
1243 static RAND_METHOD padlock_rand = {
1244 NULL, /* seed */
1245 padlock_rand_bytes, /* bytes */
1246 NULL, /* cleanup */
1247 NULL, /* add */
1248 padlock_rand_bytes, /* pseudorand */
1249 padlock_rand_status, /* rand status */
1250 };
1251
1252 # else /* !COMPILE_HW_PADLOCK */
1253 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
1254 OPENSSL_EXPORT
1255 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1256 OPENSSL_EXPORT
bind_engine(ENGINE * e,const char * id,const dynamic_fns * fns)1257 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1258 {
1259 return 0;
1260 }
1261
1262 IMPLEMENT_DYNAMIC_CHECK_FN()
1263 # endif
1264 # endif /* COMPILE_HW_PADLOCK */
1265 # endif /* !OPENSSL_NO_HW_PADLOCK */
1266 #endif /* !OPENSSL_NO_HW */
1267