1 /* serpent.c - Implementation of the Serpent encryption algorithm.
2  *	Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3  *
4  * This file is part of Libgcrypt.
5  *
6  * Libgcrypt is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser general Public License as
8  * published by the Free Software Foundation; either version 2.1 of
9  * the License, or (at your option) any later version.
10  *
11  * Libgcrypt is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
19  * 02111-1307, USA.
20  */
21 
22 #include <config.h>
23 
24 #include <string.h>
25 #include <stdio.h>
26 
27 #include "types.h"
28 #include "g10lib.h"
29 #include "cipher.h"
30 #include "bithelp.h"
31 #include "bufhelp.h"
32 #include "cipher-internal.h"
33 #include "cipher-selftest.h"
34 
35 
36 /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
37 #undef USE_SSE2
38 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
39     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
40 # define USE_SSE2 1
41 #endif
42 
43 /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
44 #undef USE_AVX2
45 #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
46     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
47 # if defined(ENABLE_AVX2_SUPPORT)
48 #  define USE_AVX2 1
49 # endif
50 #endif
51 
52 /* USE_NEON indicates whether to enable ARM NEON assembly code. */
53 #undef USE_NEON
54 #ifdef ENABLE_NEON_SUPPORT
55 # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
56      && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
57      && defined(HAVE_GCC_INLINE_ASM_NEON)
58 #  define USE_NEON 1
59 # endif
60 #endif /*ENABLE_NEON_SUPPORT*/
61 
62 /* Number of rounds per Serpent encrypt/decrypt operation.  */
63 #define ROUNDS 32
64 
65 /* Magic number, used during generating of the subkeys.  */
66 #define PHI 0x9E3779B9
67 
68 /* Serpent works on 128 bit blocks.  */
69 typedef u32 serpent_block_t[4];
70 
71 /* Serpent key, provided by the user.  If the original key is shorter
72    than 256 bits, it is padded.  */
73 typedef u32 serpent_key_t[8];
74 
75 /* The key schedule consists of 33 128 bit subkeys.  */
76 typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
77 
78 /* A Serpent context.  */
79 typedef struct serpent_context
80 {
81   serpent_subkeys_t keys;	/* Generated subkeys.  */
82 
83 #ifdef USE_AVX2
84   int use_avx2;
85 #endif
86 #ifdef USE_NEON
87   int use_neon;
88 #endif
89 } serpent_context_t;
90 
91 
92 /* Assembly implementations use SystemV ABI, ABI conversion and additional
93  * stack to store XMM6-XMM15 needed on Win64. */
94 #undef ASM_FUNC_ABI
95 #if defined(USE_SSE2) || defined(USE_AVX2)
96 # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
97 #  define ASM_FUNC_ABI __attribute__((sysv_abi))
98 # else
99 #  define ASM_FUNC_ABI
100 # endif
101 #endif
102 
103 
104 #ifdef USE_SSE2
105 /* Assembler implementations of Serpent using SSE2.  Process 8 block in
106    parallel.
107  */
108 extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
109 				       unsigned char *out,
110 				       const unsigned char *in,
111 				       unsigned char *ctr) ASM_FUNC_ABI;
112 
113 extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
114 				       unsigned char *out,
115 				       const unsigned char *in,
116 				       unsigned char *iv) ASM_FUNC_ABI;
117 
118 extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
119 				       unsigned char *out,
120 				       const unsigned char *in,
121 				       unsigned char *iv) ASM_FUNC_ABI;
122 
123 extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx,
124 				       unsigned char *out,
125 				       const unsigned char *in,
126 				       unsigned char *offset,
127 				       unsigned char *checksum,
128 				       const u64 Ls[8]) ASM_FUNC_ABI;
129 
130 extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx,
131 				       unsigned char *out,
132 				       const unsigned char *in,
133 				       unsigned char *offset,
134 				       unsigned char *checksum,
135 				       const u64 Ls[8]) ASM_FUNC_ABI;
136 
137 extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx,
138 					const unsigned char *abuf,
139 					unsigned char *offset,
140 					unsigned char *checksum,
141 					const u64 Ls[8]) ASM_FUNC_ABI;
142 #endif
143 
144 #ifdef USE_AVX2
145 /* Assembler implementations of Serpent using AVX2.  Process 16 block in
146    parallel.
147  */
148 extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
149 				       unsigned char *out,
150 				       const unsigned char *in,
151 				       unsigned char *ctr) ASM_FUNC_ABI;
152 
153 extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
154 				       unsigned char *out,
155 				       const unsigned char *in,
156 				       unsigned char *iv) ASM_FUNC_ABI;
157 
158 extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
159 				       unsigned char *out,
160 				       const unsigned char *in,
161 				       unsigned char *iv) ASM_FUNC_ABI;
162 
163 extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx,
164 				       unsigned char *out,
165 				       const unsigned char *in,
166 				       unsigned char *offset,
167 				       unsigned char *checksum,
168 				       const u64 Ls[16]) ASM_FUNC_ABI;
169 
170 extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx,
171 				       unsigned char *out,
172 				       const unsigned char *in,
173 				       unsigned char *offset,
174 				       unsigned char *checksum,
175 				       const u64 Ls[16]) ASM_FUNC_ABI;
176 
177 extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx,
178 					const unsigned char *abuf,
179 					unsigned char *offset,
180 					unsigned char *checksum,
181 					const u64 Ls[16]) ASM_FUNC_ABI;
182 #endif
183 
184 #ifdef USE_NEON
185 /* Assembler implementations of Serpent using ARM NEON.  Process 8 block in
186    parallel.
187  */
188 extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
189 				       unsigned char *out,
190 				       const unsigned char *in,
191 				       unsigned char *ctr);
192 
193 extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
194 				       unsigned char *out,
195 				       const unsigned char *in,
196 				       unsigned char *iv);
197 
198 extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
199 				       unsigned char *out,
200 				       const unsigned char *in,
201 				       unsigned char *iv);
202 
203 extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx,
204 				       unsigned char *out,
205 				       const unsigned char *in,
206 				       unsigned char *offset,
207 				       unsigned char *checksum,
208 				       const void *Ls[8]);
209 
210 extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx,
211 				       unsigned char *out,
212 				       const unsigned char *in,
213 				       unsigned char *offset,
214 				       unsigned char *checksum,
215 				       const void *Ls[8]);
216 
217 extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx,
218 					const unsigned char *abuf,
219 					unsigned char *offset,
220 					unsigned char *checksum,
221 					const void *Ls[8]);
222 #endif
223 
224 
225 /* Prototypes.  */
226 static const char *serpent_test (void);
227 
228 static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr,
229 				   void *outbuf_arg, const void *inbuf_arg,
230 				   size_t nblocks);
231 static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv,
232 				   void *outbuf_arg, const void *inbuf_arg,
233 				   size_t nblocks);
234 static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv,
235 				   void *outbuf_arg, const void *inbuf_arg,
236 				   size_t nblocks);
237 static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
238 				       const void *inbuf_arg, size_t nblocks,
239 				       int encrypt);
240 static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
241 				      size_t nblocks);
242 
243 
244 /*
245  * These are the S-Boxes of Serpent from following research paper.
246  *
247  *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
248  *   (New York, New York, USA), p. 317–329, National Institute of Standards and
249  *   Technology, 2000.
250  *
251  * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
252  *
253  */
254 
255 #define SBOX0(r0, r1, r2, r3, w, x, y, z) \
256   { \
257     u32 r4; \
258     \
259     r3 ^= r0; r4 =  r1; \
260     r1 &= r3; r4 ^= r2; \
261     r1 ^= r0; r0 |= r3; \
262     r0 ^= r4; r4 ^= r3; \
263     r3 ^= r2; r2 |= r1; \
264     r2 ^= r4; r4 = ~r4; \
265     r4 |= r1; r1 ^= r3; \
266     r1 ^= r4; r3 |= r0; \
267     r1 ^= r3; r4 ^= r3; \
268     \
269     w = r1; x = r4; y = r2; z = r0; \
270   }
271 
272 #define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
273   { \
274     u32 r4; \
275     \
276     r2 = ~r2; r4 =  r1; \
277     r1 |= r0; r4 = ~r4; \
278     r1 ^= r2; r2 |= r4; \
279     r1 ^= r3; r0 ^= r4; \
280     r2 ^= r0; r0 &= r3; \
281     r4 ^= r0; r0 |= r1; \
282     r0 ^= r2; r3 ^= r4; \
283     r2 ^= r1; r3 ^= r0; \
284     r3 ^= r1; \
285     r2 &= r3; \
286     r4 ^= r2; \
287     \
288     w = r0; x = r4; y = r1; z = r3; \
289   }
290 
291 #define SBOX1(r0, r1, r2, r3, w, x, y, z) \
292   { \
293     u32 r4; \
294     \
295     r0 = ~r0; r2 = ~r2; \
296     r4 =  r0; r0 &= r1; \
297     r2 ^= r0; r0 |= r3; \
298     r3 ^= r2; r1 ^= r0; \
299     r0 ^= r4; r4 |= r1; \
300     r1 ^= r3; r2 |= r0; \
301     r2 &= r4; r0 ^= r1; \
302     r1 &= r2; \
303     r1 ^= r0; r0 &= r2; \
304     r0 ^= r4; \
305     \
306     w = r2; x = r0; y = r3; z = r1; \
307   }
308 
309 #define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
310   { \
311     u32 r4; \
312     \
313     r4 =  r1; r1 ^= r3; \
314     r3 &= r1; r4 ^= r2; \
315     r3 ^= r0; r0 |= r1; \
316     r2 ^= r3; r0 ^= r4; \
317     r0 |= r2; r1 ^= r3; \
318     r0 ^= r1; r1 |= r3; \
319     r1 ^= r0; r4 = ~r4; \
320     r4 ^= r1; r1 |= r0; \
321     r1 ^= r0; \
322     r1 |= r4; \
323     r3 ^= r1; \
324     \
325     w = r4; x = r0; y = r3; z = r2; \
326   }
327 
328 #define SBOX2(r0, r1, r2, r3, w, x, y, z) \
329   { \
330     u32 r4; \
331     \
332     r4 =  r0; r0 &= r2; \
333     r0 ^= r3; r2 ^= r1; \
334     r2 ^= r0; r3 |= r4; \
335     r3 ^= r1; r4 ^= r2; \
336     r1 =  r3; r3 |= r4; \
337     r3 ^= r0; r0 &= r1; \
338     r4 ^= r0; r1 ^= r3; \
339     r1 ^= r4; r4 = ~r4; \
340     \
341     w = r2; x = r3; y = r1; z = r4; \
342   }
343 
344 #define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
345   { \
346     u32 r4; \
347     \
348     r2 ^= r3; r3 ^= r0; \
349     r4 =  r3; r3 &= r2; \
350     r3 ^= r1; r1 |= r2; \
351     r1 ^= r4; r4 &= r3; \
352     r2 ^= r3; r4 &= r0; \
353     r4 ^= r2; r2 &= r1; \
354     r2 |= r0; r3 = ~r3; \
355     r2 ^= r3; r0 ^= r3; \
356     r0 &= r1; r3 ^= r4; \
357     r3 ^= r0; \
358     \
359     w = r1; x = r4; y = r2; z = r3; \
360   }
361 
362 #define SBOX3(r0, r1, r2, r3, w, x, y, z) \
363   { \
364     u32 r4; \
365     \
366     r4 =  r0; r0 |= r3; \
367     r3 ^= r1; r1 &= r4; \
368     r4 ^= r2; r2 ^= r3; \
369     r3 &= r0; r4 |= r1; \
370     r3 ^= r4; r0 ^= r1; \
371     r4 &= r0; r1 ^= r3; \
372     r4 ^= r2; r1 |= r0; \
373     r1 ^= r2; r0 ^= r3; \
374     r2 =  r1; r1 |= r3; \
375     r1 ^= r0; \
376     \
377     w = r1; x = r2; y = r3; z = r4; \
378   }
379 
380 #define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
381   { \
382     u32 r4; \
383     \
384     r4 =  r2; r2 ^= r1; \
385     r0 ^= r2; r4 &= r2; \
386     r4 ^= r0; r0 &= r1; \
387     r1 ^= r3; r3 |= r4; \
388     r2 ^= r3; r0 ^= r3; \
389     r1 ^= r4; r3 &= r2; \
390     r3 ^= r1; r1 ^= r0; \
391     r1 |= r2; r0 ^= r3; \
392     r1 ^= r4; \
393     r0 ^= r1; \
394     \
395     w = r2; x = r1; y = r3; z = r0; \
396   }
397 
398 #define SBOX4(r0, r1, r2, r3, w, x, y, z) \
399   { \
400     u32 r4; \
401     \
402     r1 ^= r3; r3 = ~r3; \
403     r2 ^= r3; r3 ^= r0; \
404     r4 =  r1; r1 &= r3; \
405     r1 ^= r2; r4 ^= r3; \
406     r0 ^= r4; r2 &= r4; \
407     r2 ^= r0; r0 &= r1; \
408     r3 ^= r0; r4 |= r1; \
409     r4 ^= r0; r0 |= r3; \
410     r0 ^= r2; r2 &= r3; \
411     r0 = ~r0; r4 ^= r2; \
412     \
413     w = r1; x = r4; y = r0; z = r3; \
414   }
415 
416 #define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
417   { \
418     u32 r4; \
419     \
420     r4 =  r2; r2 &= r3; \
421     r2 ^= r1; r1 |= r3; \
422     r1 &= r0; r4 ^= r2; \
423     r4 ^= r1; r1 &= r2; \
424     r0 = ~r0; r3 ^= r4; \
425     r1 ^= r3; r3 &= r0; \
426     r3 ^= r2; r0 ^= r1; \
427     r2 &= r0; r3 ^= r0; \
428     r2 ^= r4; \
429     r2 |= r3; r3 ^= r0; \
430     r2 ^= r1; \
431     \
432     w = r0; x = r3; y = r2; z = r4; \
433   }
434 
435 #define SBOX5(r0, r1, r2, r3, w, x, y, z) \
436   { \
437     u32 r4; \
438     \
439     r0 ^= r1; r1 ^= r3; \
440     r3 = ~r3; r4 =  r1; \
441     r1 &= r0; r2 ^= r3; \
442     r1 ^= r2; r2 |= r4; \
443     r4 ^= r3; r3 &= r1; \
444     r3 ^= r0; r4 ^= r1; \
445     r4 ^= r2; r2 ^= r0; \
446     r0 &= r3; r2 = ~r2; \
447     r0 ^= r4; r4 |= r3; \
448     r2 ^= r4; \
449     \
450     w = r1; x = r3; y = r0; z = r2; \
451   }
452 
453 #define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
454   { \
455     u32 r4; \
456     \
457     r1 = ~r1; r4 =  r3; \
458     r2 ^= r1; r3 |= r0; \
459     r3 ^= r2; r2 |= r1; \
460     r2 &= r0; r4 ^= r3; \
461     r2 ^= r4; r4 |= r0; \
462     r4 ^= r1; r1 &= r2; \
463     r1 ^= r3; r4 ^= r2; \
464     r3 &= r4; r4 ^= r1; \
465     r3 ^= r4; r4 = ~r4; \
466     r3 ^= r0; \
467     \
468     w = r1; x = r4; y = r3; z = r2; \
469   }
470 
471 #define SBOX6(r0, r1, r2, r3, w, x, y, z) \
472   { \
473     u32 r4; \
474     \
475     r2 = ~r2; r4 =  r3; \
476     r3 &= r0; r0 ^= r4; \
477     r3 ^= r2; r2 |= r4; \
478     r1 ^= r3; r2 ^= r0; \
479     r0 |= r1; r2 ^= r1; \
480     r4 ^= r0; r0 |= r3; \
481     r0 ^= r2; r4 ^= r3; \
482     r4 ^= r0; r3 = ~r3; \
483     r2 &= r4; \
484     r2 ^= r3; \
485     \
486     w = r0; x = r1; y = r4; z = r2; \
487   }
488 
489 #define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
490   { \
491     u32 r4; \
492     \
493     r0 ^= r2; r4 =  r2; \
494     r2 &= r0; r4 ^= r3; \
495     r2 = ~r2; r3 ^= r1; \
496     r2 ^= r3; r4 |= r0; \
497     r0 ^= r2; r3 ^= r4; \
498     r4 ^= r1; r1 &= r3; \
499     r1 ^= r0; r0 ^= r3; \
500     r0 |= r2; r3 ^= r1; \
501     r4 ^= r0; \
502     \
503     w = r1; x = r2; y = r4; z = r3; \
504   }
505 
506 #define SBOX7(r0, r1, r2, r3, w, x, y, z) \
507   { \
508     u32 r4; \
509     \
510     r4 =  r1; r1 |= r2; \
511     r1 ^= r3; r4 ^= r2; \
512     r2 ^= r1; r3 |= r4; \
513     r3 &= r0; r4 ^= r2; \
514     r3 ^= r1; r1 |= r4; \
515     r1 ^= r0; r0 |= r4; \
516     r0 ^= r2; r1 ^= r4; \
517     r2 ^= r1; r1 &= r0; \
518     r1 ^= r4; r2 = ~r2; \
519     r2 |= r0; \
520     r4 ^= r2; \
521     \
522     w = r4; x = r3; y = r1; z = r0; \
523   }
524 
525 #define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
526   { \
527     u32 r4; \
528     \
529     r4 =  r2; r2 ^= r0; \
530     r0 &= r3; r4 |= r3; \
531     r2 = ~r2; r3 ^= r1; \
532     r1 |= r0; r0 ^= r2; \
533     r2 &= r4; r3 &= r4; \
534     r1 ^= r2; r2 ^= r0; \
535     r0 |= r2; r4 ^= r1; \
536     r0 ^= r3; r3 ^= r4; \
537     r4 |= r0; r3 ^= r2; \
538     r4 ^= r2; \
539     \
540     w = r3; x = r0; y = r1; z = r4; \
541   }
542 
543 /* XOR BLOCK1 into BLOCK0.  */
544 #define BLOCK_XOR(block0, block1) \
545   {                               \
546     block0[0] ^= block1[0];       \
547     block0[1] ^= block1[1];       \
548     block0[2] ^= block1[2];       \
549     block0[3] ^= block1[3];       \
550   }
551 
552 /* Copy BLOCK_SRC to BLOCK_DST.  */
553 #define BLOCK_COPY(block_dst, block_src) \
554   {                                      \
555     block_dst[0] = block_src[0];         \
556     block_dst[1] = block_src[1];         \
557     block_dst[2] = block_src[2];         \
558     block_dst[3] = block_src[3];         \
559   }
560 
561 /* Apply SBOX number WHICH to to the block found in ARRAY0, writing
562    the output to the block found in ARRAY1.  */
563 #define SBOX(which, array0, array1)                         \
564   SBOX##which (array0[0], array0[1], array0[2], array0[3],  \
565                array1[0], array1[1], array1[2], array1[3]);
566 
567 /* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
568    the output to the block found in ARRAY1.  */
569 #define SBOX_INVERSE(which, array0, array1)                           \
570   SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3],  \
571                          array1[0], array1[1], array1[2], array1[3]);
572 
573 /* Apply the linear transformation to BLOCK.  */
574 #define LINEAR_TRANSFORMATION(block)                  \
575   {                                                   \
576     block[0] = rol (block[0], 13);                    \
577     block[2] = rol (block[2], 3);                     \
578     block[1] = block[1] ^ block[0] ^ block[2];        \
579     block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
580     block[1] = rol (block[1], 1);                     \
581     block[3] = rol (block[3], 7);                     \
582     block[0] = block[0] ^ block[1] ^ block[3];        \
583     block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
584     block[0] = rol (block[0], 5);                     \
585     block[2] = rol (block[2], 22);                    \
586   }
587 
588 /* Apply the inverse linear transformation to BLOCK.  */
589 #define LINEAR_TRANSFORMATION_INVERSE(block)          \
590   {                                                   \
591     block[2] = ror (block[2], 22);                    \
592     block[0] = ror (block[0] , 5);                    \
593     block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
594     block[0] = block[0] ^ block[1] ^ block[3];        \
595     block[3] = ror (block[3], 7);                     \
596     block[1] = ror (block[1], 1);                     \
597     block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
598     block[1] = block[1] ^ block[0] ^ block[2];        \
599     block[2] = ror (block[2], 3);                     \
600     block[0] = ror (block[0], 13);                    \
601   }
602 
603 /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the
604    subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary storage.
605    This macro increments `round'.  */
606 #define ROUND(which, subkeys, block, block_tmp) \
607   {                                             \
608     BLOCK_XOR (block, subkeys[round]);          \
609     round++;                                    \
610     SBOX (which, block, block_tmp);             \
611     LINEAR_TRANSFORMATION (block_tmp);          \
612     BLOCK_COPY (block, block_tmp);              \
613   }
614 
615 /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH
616    and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
617    storage.  The result will be stored in BLOCK_TMP.  This macro
618    increments `round'.  */
619 #define ROUND_LAST(which, subkeys, block, block_tmp) \
620   {                                                  \
621     BLOCK_XOR (block, subkeys[round]);               \
622     round++;                                         \
623     SBOX (which, block, block_tmp);                  \
624     BLOCK_XOR (block_tmp, subkeys[round]);           \
625     round++;                                         \
626   }
627 
628 /* Apply an inverse Serpent round to BLOCK, using the SBOX number
629    WHICH and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as
630    temporary storage.  This macro increments `round'.  */
631 #define ROUND_INVERSE(which, subkey, block, block_tmp) \
632   {                                                    \
633     LINEAR_TRANSFORMATION_INVERSE (block);             \
634     SBOX_INVERSE (which, block, block_tmp);            \
635     BLOCK_XOR (block_tmp, subkey[round]);              \
636     round--;                                           \
637     BLOCK_COPY (block, block_tmp);                     \
638   }
639 
640 /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH
641    and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
642    storage.  The result will be stored in BLOCK_TMP.  This macro
643    increments `round'.  */
644 #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \
645   {                                                           \
646     BLOCK_XOR (block, subkeys[round]);                        \
647     round--;                                                  \
648     SBOX_INVERSE (which, block, block_tmp);                   \
649     BLOCK_XOR (block_tmp, subkeys[round]);                    \
650     round--;                                                  \
651   }
652 
653 /* Convert the user provided key KEY of KEY_LENGTH bytes into the
654    internally used format.  */
655 static void
serpent_key_prepare(const byte * key,unsigned int key_length,serpent_key_t key_prepared)656 serpent_key_prepare (const byte *key, unsigned int key_length,
657 		     serpent_key_t key_prepared)
658 {
659   int i;
660 
661   /* Copy key.  */
662   key_length /= 4;
663   for (i = 0; i < key_length; i++)
664     key_prepared[i] = buf_get_le32 (key + i * 4);
665 
666   if (i < 8)
667     {
668       /* Key must be padded according to the Serpent
669 	 specification.  */
670       key_prepared[i] = 0x00000001;
671 
672       for (i++; i < 8; i++)
673 	key_prepared[i] = 0;
674     }
675 }
676 
677 /* Derive the 33 subkeys from KEY and store them in SUBKEYS.  */
678 static void
serpent_subkeys_generate(serpent_key_t key,serpent_subkeys_t subkeys)679 serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
680 {
681   u32 w[8];		/* The `prekey'.  */
682   u32 ws[4];
683   u32 wt[4];
684 
685   /* Initialize with key values.  */
686   w[0] = key[0];
687   w[1] = key[1];
688   w[2] = key[2];
689   w[3] = key[3];
690   w[4] = key[4];
691   w[5] = key[5];
692   w[6] = key[6];
693   w[7] = key[7];
694 
695   /* Expand to intermediate key using the affine recurrence.  */
696 #define EXPAND_KEY4(wo, r)                                                     \
697   wo[0] = w[(r+0)%8] =                                                         \
698     rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
699   wo[1] = w[(r+1)%8] =                                                         \
700     rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
701   wo[2] = w[(r+2)%8] =                                                         \
702     rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
703   wo[3] = w[(r+3)%8] =                                                         \
704     rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
705 
706 #define EXPAND_KEY(r)       \
707   EXPAND_KEY4(ws, (r));     \
708   EXPAND_KEY4(wt, (r + 4));
709 
710   /* Calculate subkeys via S-Boxes, in bitslice mode.  */
711   EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
712   EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
713   EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
714   EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
715   EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
716   EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
717   EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
718   EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
719   EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
720   EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
721   EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
722   EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
723   EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
724   EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
725   EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
726   EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
727   EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
728 
729   wipememory (ws, sizeof (ws));
730   wipememory (wt, sizeof (wt));
731   wipememory (w, sizeof (w));
732 }
733 
734 /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits.  */
735 static void
serpent_setkey_internal(serpent_context_t * context,const byte * key,unsigned int key_length)736 serpent_setkey_internal (serpent_context_t *context,
737 			 const byte *key, unsigned int key_length)
738 {
739   serpent_key_t key_prepared;
740 
741   serpent_key_prepare (key, key_length, key_prepared);
742   serpent_subkeys_generate (key_prepared, context->keys);
743 
744 #ifdef USE_AVX2
745   context->use_avx2 = 0;
746   if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
747     {
748       context->use_avx2 = 1;
749     }
750 #endif
751 
752 #ifdef USE_NEON
753   context->use_neon = 0;
754   if ((_gcry_get_hw_features () & HWF_ARM_NEON))
755     {
756       context->use_neon = 1;
757     }
758 #endif
759 
760   wipememory (key_prepared, sizeof(key_prepared));
761 }
762 
763 /* Initialize CTX with the key KEY of KEY_LENGTH bytes.  */
764 static gcry_err_code_t
serpent_setkey(void * ctx,const byte * key,unsigned int key_length,cipher_bulk_ops_t * bulk_ops)765 serpent_setkey (void *ctx,
766 		const byte *key, unsigned int key_length,
767                 cipher_bulk_ops_t *bulk_ops)
768 {
769   serpent_context_t *context = ctx;
770   static const char *serpent_test_ret;
771   static int serpent_init_done;
772   gcry_err_code_t ret = GPG_ERR_NO_ERROR;
773 
774   if (! serpent_init_done)
775     {
776       /* Execute a self-test the first time, Serpent is used.  */
777       serpent_init_done = 1;
778       serpent_test_ret = serpent_test ();
779       if (serpent_test_ret)
780 	log_error ("Serpent test failure: %s\n", serpent_test_ret);
781     }
782 
783   /* Setup bulk encryption routines.  */
784   memset (bulk_ops, 0, sizeof(*bulk_ops));
785   bulk_ops->cbc_dec = _gcry_serpent_cbc_dec;
786   bulk_ops->cfb_dec = _gcry_serpent_cfb_dec;
787   bulk_ops->ctr_enc = _gcry_serpent_ctr_enc;
788   bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt;
789   bulk_ops->ocb_auth  = _gcry_serpent_ocb_auth;
790 
791   if (serpent_test_ret)
792     ret = GPG_ERR_SELFTEST_FAILED;
793   else
794     serpent_setkey_internal (context, key, key_length);
795 
796   return ret;
797 }
798 
799 static void
serpent_encrypt_internal(serpent_context_t * context,const byte * input,byte * output)800 serpent_encrypt_internal (serpent_context_t *context,
801 			  const byte *input, byte *output)
802 {
803   serpent_block_t b, b_next;
804   int round = 0;
805 
806   b[0] = buf_get_le32 (input + 0);
807   b[1] = buf_get_le32 (input + 4);
808   b[2] = buf_get_le32 (input + 8);
809   b[3] = buf_get_le32 (input + 12);
810 
811   ROUND (0, context->keys, b, b_next);
812   ROUND (1, context->keys, b, b_next);
813   ROUND (2, context->keys, b, b_next);
814   ROUND (3, context->keys, b, b_next);
815   ROUND (4, context->keys, b, b_next);
816   ROUND (5, context->keys, b, b_next);
817   ROUND (6, context->keys, b, b_next);
818   ROUND (7, context->keys, b, b_next);
819   ROUND (0, context->keys, b, b_next);
820   ROUND (1, context->keys, b, b_next);
821   ROUND (2, context->keys, b, b_next);
822   ROUND (3, context->keys, b, b_next);
823   ROUND (4, context->keys, b, b_next);
824   ROUND (5, context->keys, b, b_next);
825   ROUND (6, context->keys, b, b_next);
826   ROUND (7, context->keys, b, b_next);
827   ROUND (0, context->keys, b, b_next);
828   ROUND (1, context->keys, b, b_next);
829   ROUND (2, context->keys, b, b_next);
830   ROUND (3, context->keys, b, b_next);
831   ROUND (4, context->keys, b, b_next);
832   ROUND (5, context->keys, b, b_next);
833   ROUND (6, context->keys, b, b_next);
834   ROUND (7, context->keys, b, b_next);
835   ROUND (0, context->keys, b, b_next);
836   ROUND (1, context->keys, b, b_next);
837   ROUND (2, context->keys, b, b_next);
838   ROUND (3, context->keys, b, b_next);
839   ROUND (4, context->keys, b, b_next);
840   ROUND (5, context->keys, b, b_next);
841   ROUND (6, context->keys, b, b_next);
842 
843   ROUND_LAST (7, context->keys, b, b_next);
844 
845   buf_put_le32 (output + 0, b_next[0]);
846   buf_put_le32 (output + 4, b_next[1]);
847   buf_put_le32 (output + 8, b_next[2]);
848   buf_put_le32 (output + 12, b_next[3]);
849 }
850 
851 static void
serpent_decrypt_internal(serpent_context_t * context,const byte * input,byte * output)852 serpent_decrypt_internal (serpent_context_t *context,
853 			  const byte *input, byte *output)
854 {
855   serpent_block_t b, b_next;
856   int round = ROUNDS;
857 
858   b_next[0] = buf_get_le32 (input + 0);
859   b_next[1] = buf_get_le32 (input + 4);
860   b_next[2] = buf_get_le32 (input + 8);
861   b_next[3] = buf_get_le32 (input + 12);
862 
863   ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
864 
865   ROUND_INVERSE (6, context->keys, b, b_next);
866   ROUND_INVERSE (5, context->keys, b, b_next);
867   ROUND_INVERSE (4, context->keys, b, b_next);
868   ROUND_INVERSE (3, context->keys, b, b_next);
869   ROUND_INVERSE (2, context->keys, b, b_next);
870   ROUND_INVERSE (1, context->keys, b, b_next);
871   ROUND_INVERSE (0, context->keys, b, b_next);
872   ROUND_INVERSE (7, context->keys, b, b_next);
873   ROUND_INVERSE (6, context->keys, b, b_next);
874   ROUND_INVERSE (5, context->keys, b, b_next);
875   ROUND_INVERSE (4, context->keys, b, b_next);
876   ROUND_INVERSE (3, context->keys, b, b_next);
877   ROUND_INVERSE (2, context->keys, b, b_next);
878   ROUND_INVERSE (1, context->keys, b, b_next);
879   ROUND_INVERSE (0, context->keys, b, b_next);
880   ROUND_INVERSE (7, context->keys, b, b_next);
881   ROUND_INVERSE (6, context->keys, b, b_next);
882   ROUND_INVERSE (5, context->keys, b, b_next);
883   ROUND_INVERSE (4, context->keys, b, b_next);
884   ROUND_INVERSE (3, context->keys, b, b_next);
885   ROUND_INVERSE (2, context->keys, b, b_next);
886   ROUND_INVERSE (1, context->keys, b, b_next);
887   ROUND_INVERSE (0, context->keys, b, b_next);
888   ROUND_INVERSE (7, context->keys, b, b_next);
889   ROUND_INVERSE (6, context->keys, b, b_next);
890   ROUND_INVERSE (5, context->keys, b, b_next);
891   ROUND_INVERSE (4, context->keys, b, b_next);
892   ROUND_INVERSE (3, context->keys, b, b_next);
893   ROUND_INVERSE (2, context->keys, b, b_next);
894   ROUND_INVERSE (1, context->keys, b, b_next);
895   ROUND_INVERSE (0, context->keys, b, b_next);
896 
897   buf_put_le32 (output + 0, b_next[0]);
898   buf_put_le32 (output + 4, b_next[1]);
899   buf_put_le32 (output + 8, b_next[2]);
900   buf_put_le32 (output + 12, b_next[3]);
901 }
902 
903 static unsigned int
serpent_encrypt(void * ctx,byte * buffer_out,const byte * buffer_in)904 serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
905 {
906   serpent_context_t *context = ctx;
907 
908   serpent_encrypt_internal (context, buffer_in, buffer_out);
909   return /*burn_stack*/ (2 * sizeof (serpent_block_t));
910 }
911 
912 static unsigned int
serpent_decrypt(void * ctx,byte * buffer_out,const byte * buffer_in)913 serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
914 {
915   serpent_context_t *context = ctx;
916 
917   serpent_decrypt_internal (context, buffer_in, buffer_out);
918   return /*burn_stack*/ (2 * sizeof (serpent_block_t));
919 }
920 
921 
922 
923 /* Bulk encryption of complete blocks in CTR mode.  This function is only
924    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
925    of size sizeof(serpent_block_t). */
926 static void
_gcry_serpent_ctr_enc(void * context,unsigned char * ctr,void * outbuf_arg,const void * inbuf_arg,size_t nblocks)927 _gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
928                       void *outbuf_arg, const void *inbuf_arg,
929                       size_t nblocks)
930 {
931   serpent_context_t *ctx = context;
932   unsigned char *outbuf = outbuf_arg;
933   const unsigned char *inbuf = inbuf_arg;
934   unsigned char tmpbuf[sizeof(serpent_block_t)];
935   int burn_stack_depth = 2 * sizeof (serpent_block_t);
936 
937 #ifdef USE_AVX2
938   if (ctx->use_avx2)
939     {
940       int did_use_avx2 = 0;
941 
942       /* Process data in 16 block chunks. */
943       while (nblocks >= 16)
944         {
945           _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
946 
947           nblocks -= 16;
948           outbuf += 16 * sizeof(serpent_block_t);
949           inbuf  += 16 * sizeof(serpent_block_t);
950           did_use_avx2 = 1;
951         }
952 
953       if (did_use_avx2)
954         {
955           /* serpent-avx2 assembly code does not use stack */
956           if (nblocks == 0)
957             burn_stack_depth = 0;
958         }
959 
960       /* Use generic/sse2 code to handle smaller chunks... */
961       /* TODO: use caching instead? */
962     }
963 #endif
964 
965 #ifdef USE_SSE2
966   {
967     int did_use_sse2 = 0;
968 
969     /* Process data in 8 block chunks. */
970     while (nblocks >= 8)
971       {
972         _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
973 
974         nblocks -= 8;
975         outbuf += 8 * sizeof(serpent_block_t);
976         inbuf  += 8 * sizeof(serpent_block_t);
977         did_use_sse2 = 1;
978       }
979 
980     if (did_use_sse2)
981       {
982         /* serpent-sse2 assembly code does not use stack */
983         if (nblocks == 0)
984           burn_stack_depth = 0;
985       }
986 
987     /* Use generic code to handle smaller chunks... */
988     /* TODO: use caching instead? */
989   }
990 #endif
991 
992 #ifdef USE_NEON
993   if (ctx->use_neon)
994     {
995       int did_use_neon = 0;
996 
997       /* Process data in 8 block chunks. */
998       while (nblocks >= 8)
999         {
1000           _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
1001 
1002           nblocks -= 8;
1003           outbuf += 8 * sizeof(serpent_block_t);
1004           inbuf  += 8 * sizeof(serpent_block_t);
1005           did_use_neon = 1;
1006         }
1007 
1008       if (did_use_neon)
1009         {
1010           /* serpent-neon assembly code does not use stack */
1011           if (nblocks == 0)
1012             burn_stack_depth = 0;
1013         }
1014 
1015       /* Use generic code to handle smaller chunks... */
1016       /* TODO: use caching instead? */
1017     }
1018 #endif
1019 
1020   for ( ;nblocks; nblocks-- )
1021     {
1022       /* Encrypt the counter. */
1023       serpent_encrypt_internal(ctx, ctr, tmpbuf);
1024       /* XOR the input with the encrypted counter and store in output.  */
1025       cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
1026       outbuf += sizeof(serpent_block_t);
1027       inbuf  += sizeof(serpent_block_t);
1028       /* Increment the counter.  */
1029       cipher_block_add(ctr, 1, sizeof(serpent_block_t));
1030     }
1031 
1032   wipememory(tmpbuf, sizeof(tmpbuf));
1033   _gcry_burn_stack(burn_stack_depth);
1034 }
1035 
1036 /* Bulk decryption of complete blocks in CBC mode.  This function is only
1037    intended for the bulk encryption feature of cipher.c. */
1038 static void
_gcry_serpent_cbc_dec(void * context,unsigned char * iv,void * outbuf_arg,const void * inbuf_arg,size_t nblocks)1039 _gcry_serpent_cbc_dec(void *context, unsigned char *iv,
1040                       void *outbuf_arg, const void *inbuf_arg,
1041                       size_t nblocks)
1042 {
1043   serpent_context_t *ctx = context;
1044   unsigned char *outbuf = outbuf_arg;
1045   const unsigned char *inbuf = inbuf_arg;
1046   unsigned char savebuf[sizeof(serpent_block_t)];
1047   int burn_stack_depth = 2 * sizeof (serpent_block_t);
1048 
1049 #ifdef USE_AVX2
1050   if (ctx->use_avx2)
1051     {
1052       int did_use_avx2 = 0;
1053 
1054       /* Process data in 16 block chunks. */
1055       while (nblocks >= 16)
1056         {
1057           _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
1058 
1059           nblocks -= 16;
1060           outbuf += 16 * sizeof(serpent_block_t);
1061           inbuf  += 16 * sizeof(serpent_block_t);
1062           did_use_avx2 = 1;
1063         }
1064 
1065       if (did_use_avx2)
1066         {
1067           /* serpent-avx2 assembly code does not use stack */
1068           if (nblocks == 0)
1069             burn_stack_depth = 0;
1070         }
1071 
1072       /* Use generic/sse2 code to handle smaller chunks... */
1073     }
1074 #endif
1075 
1076 #ifdef USE_SSE2
1077   {
1078     int did_use_sse2 = 0;
1079 
1080     /* Process data in 8 block chunks. */
1081     while (nblocks >= 8)
1082       {
1083         _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
1084 
1085         nblocks -= 8;
1086         outbuf += 8 * sizeof(serpent_block_t);
1087         inbuf  += 8 * sizeof(serpent_block_t);
1088         did_use_sse2 = 1;
1089       }
1090 
1091     if (did_use_sse2)
1092       {
1093         /* serpent-sse2 assembly code does not use stack */
1094         if (nblocks == 0)
1095           burn_stack_depth = 0;
1096       }
1097 
1098     /* Use generic code to handle smaller chunks... */
1099   }
1100 #endif
1101 
1102 #ifdef USE_NEON
1103   if (ctx->use_neon)
1104     {
1105       int did_use_neon = 0;
1106 
1107       /* Process data in 8 block chunks. */
1108       while (nblocks >= 8)
1109         {
1110           _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
1111 
1112           nblocks -= 8;
1113           outbuf += 8 * sizeof(serpent_block_t);
1114           inbuf  += 8 * sizeof(serpent_block_t);
1115           did_use_neon = 1;
1116         }
1117 
1118       if (did_use_neon)
1119         {
1120           /* serpent-neon assembly code does not use stack */
1121           if (nblocks == 0)
1122             burn_stack_depth = 0;
1123         }
1124 
1125       /* Use generic code to handle smaller chunks... */
1126     }
1127 #endif
1128 
1129   for ( ;nblocks; nblocks-- )
1130     {
1131       /* INBUF is needed later and it may be identical to OUTBUF, so store
1132          the intermediate result to SAVEBUF.  */
1133       serpent_decrypt_internal (ctx, inbuf, savebuf);
1134 
1135       cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
1136                                 sizeof(serpent_block_t));
1137       inbuf += sizeof(serpent_block_t);
1138       outbuf += sizeof(serpent_block_t);
1139     }
1140 
1141   wipememory(savebuf, sizeof(savebuf));
1142   _gcry_burn_stack(burn_stack_depth);
1143 }
1144 
1145 /* Bulk decryption of complete blocks in CFB mode.  This function is only
1146    intended for the bulk encryption feature of cipher.c. */
1147 static void
_gcry_serpent_cfb_dec(void * context,unsigned char * iv,void * outbuf_arg,const void * inbuf_arg,size_t nblocks)1148 _gcry_serpent_cfb_dec(void *context, unsigned char *iv,
1149                       void *outbuf_arg, const void *inbuf_arg,
1150                       size_t nblocks)
1151 {
1152   serpent_context_t *ctx = context;
1153   unsigned char *outbuf = outbuf_arg;
1154   const unsigned char *inbuf = inbuf_arg;
1155   int burn_stack_depth = 2 * sizeof (serpent_block_t);
1156 
1157 #ifdef USE_AVX2
1158   if (ctx->use_avx2)
1159     {
1160       int did_use_avx2 = 0;
1161 
1162       /* Process data in 16 block chunks. */
1163       while (nblocks >= 16)
1164         {
1165           _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
1166 
1167           nblocks -= 16;
1168           outbuf += 16 * sizeof(serpent_block_t);
1169           inbuf  += 16 * sizeof(serpent_block_t);
1170           did_use_avx2 = 1;
1171         }
1172 
1173       if (did_use_avx2)
1174         {
1175           /* serpent-avx2 assembly code does not use stack */
1176           if (nblocks == 0)
1177             burn_stack_depth = 0;
1178         }
1179 
1180       /* Use generic/sse2 code to handle smaller chunks... */
1181     }
1182 #endif
1183 
1184 #ifdef USE_SSE2
1185   {
1186     int did_use_sse2 = 0;
1187 
1188     /* Process data in 8 block chunks. */
1189     while (nblocks >= 8)
1190       {
1191         _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
1192 
1193         nblocks -= 8;
1194         outbuf += 8 * sizeof(serpent_block_t);
1195         inbuf  += 8 * sizeof(serpent_block_t);
1196         did_use_sse2 = 1;
1197       }
1198 
1199     if (did_use_sse2)
1200       {
1201         /* serpent-sse2 assembly code does not use stack */
1202         if (nblocks == 0)
1203           burn_stack_depth = 0;
1204       }
1205 
1206     /* Use generic code to handle smaller chunks... */
1207   }
1208 #endif
1209 
1210 #ifdef USE_NEON
1211   if (ctx->use_neon)
1212     {
1213       int did_use_neon = 0;
1214 
1215       /* Process data in 8 block chunks. */
1216       while (nblocks >= 8)
1217         {
1218           _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
1219 
1220           nblocks -= 8;
1221           outbuf += 8 * sizeof(serpent_block_t);
1222           inbuf  += 8 * sizeof(serpent_block_t);
1223           did_use_neon = 1;
1224         }
1225 
1226       if (did_use_neon)
1227         {
1228           /* serpent-neon assembly code does not use stack */
1229           if (nblocks == 0)
1230             burn_stack_depth = 0;
1231         }
1232 
1233       /* Use generic code to handle smaller chunks... */
1234     }
1235 #endif
1236 
1237   for ( ;nblocks; nblocks-- )
1238     {
1239       serpent_encrypt_internal(ctx, iv, iv);
1240       cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
1241       outbuf += sizeof(serpent_block_t);
1242       inbuf  += sizeof(serpent_block_t);
1243     }
1244 
1245   _gcry_burn_stack(burn_stack_depth);
1246 }
1247 
1248 /* Bulk encryption/decryption of complete blocks in OCB mode. */
1249 static size_t
_gcry_serpent_ocb_crypt(gcry_cipher_hd_t c,void * outbuf_arg,const void * inbuf_arg,size_t nblocks,int encrypt)1250 _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
1251 			const void *inbuf_arg, size_t nblocks, int encrypt)
1252 {
1253 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
1254   serpent_context_t *ctx = (void *)&c->context.c;
1255   unsigned char *outbuf = outbuf_arg;
1256   const unsigned char *inbuf = inbuf_arg;
1257   int burn_stack_depth = 2 * sizeof (serpent_block_t);
1258   u64 blkn = c->u_mode.ocb.data_nblocks;
1259 #else
1260   (void)c;
1261   (void)outbuf_arg;
1262   (void)inbuf_arg;
1263   (void)encrypt;
1264 #endif
1265 
1266 #ifdef USE_AVX2
1267   if (ctx->use_avx2)
1268     {
1269       int did_use_avx2 = 0;
1270       u64 Ls[16];
1271       unsigned int n = 16 - (blkn % 16);
1272       u64 *l;
1273       int i;
1274 
1275       if (nblocks >= 16)
1276 	{
1277 	  for (i = 0; i < 16; i += 8)
1278 	    {
1279 	      /* Use u64 to store pointers for x32 support (assembly function
1280 	       * assumes 64-bit pointers). */
1281 	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1282 	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1283 	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1284 	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
1285 	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1286 	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1287 	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1288 	    }
1289 
1290 	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
1291 	  l = &Ls[(15 + n) % 16];
1292 
1293 	  /* Process data in 16 block chunks. */
1294 	  while (nblocks >= 16)
1295 	    {
1296 	      blkn += 16;
1297 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1298 
1299 	      if (encrypt)
1300 		_gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1301 					  c->u_ctr.ctr, Ls);
1302 	      else
1303 		_gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1304 					  c->u_ctr.ctr, Ls);
1305 
1306 	      nblocks -= 16;
1307 	      outbuf += 16 * sizeof(serpent_block_t);
1308 	      inbuf  += 16 * sizeof(serpent_block_t);
1309 	      did_use_avx2 = 1;
1310 	    }
1311 	}
1312 
1313       if (did_use_avx2)
1314 	{
1315 	  /* serpent-avx2 assembly code does not use stack */
1316 	  if (nblocks == 0)
1317 	    burn_stack_depth = 0;
1318 	}
1319 
1320       /* Use generic code to handle smaller chunks... */
1321     }
1322 #endif
1323 
1324 #ifdef USE_SSE2
1325   {
1326     int did_use_sse2 = 0;
1327     u64 Ls[8];
1328     unsigned int n = 8 - (blkn % 8);
1329     u64 *l;
1330 
1331     if (nblocks >= 8)
1332       {
1333 	/* Use u64 to store pointers for x32 support (assembly function
1334 	  * assumes 64-bit pointers). */
1335 	Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1336 	Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1337 	Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1338 	Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
1339 	Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1340 	Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1341 	Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1342 	l = &Ls[(7 + n) % 8];
1343 
1344 	/* Process data in 8 block chunks. */
1345 	while (nblocks >= 8)
1346 	  {
1347 	    blkn += 8;
1348 	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1349 
1350 	    if (encrypt)
1351 	      _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1352 					  c->u_ctr.ctr, Ls);
1353 	    else
1354 	      _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1355 					  c->u_ctr.ctr, Ls);
1356 
1357 	    nblocks -= 8;
1358 	    outbuf += 8 * sizeof(serpent_block_t);
1359 	    inbuf  += 8 * sizeof(serpent_block_t);
1360 	    did_use_sse2 = 1;
1361 	  }
1362       }
1363 
1364     if (did_use_sse2)
1365       {
1366 	/* serpent-sse2 assembly code does not use stack */
1367 	if (nblocks == 0)
1368 	  burn_stack_depth = 0;
1369       }
1370 
1371     /* Use generic code to handle smaller chunks... */
1372   }
1373 #endif
1374 
1375 #ifdef USE_NEON
1376   if (ctx->use_neon)
1377     {
1378       int did_use_neon = 0;
1379       const void *Ls[8];
1380       unsigned int n = 8 - (blkn % 8);
1381       const void **l;
1382 
1383       if (nblocks >= 8)
1384 	{
1385 	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
1386 	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
1387 	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
1388 	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
1389 	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
1390 	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
1391 	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
1392 	  l = &Ls[(7 + n) % 8];
1393 
1394 	  /* Process data in 8 block chunks. */
1395 	  while (nblocks >= 8)
1396 	    {
1397 	      blkn += 8;
1398 	      *l = ocb_get_l(c,  blkn - blkn % 8);
1399 
1400 	      if (encrypt)
1401 		_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
1402 					  c->u_ctr.ctr, Ls);
1403 	      else
1404 		_gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
1405 					  c->u_ctr.ctr, Ls);
1406 
1407 	      nblocks -= 8;
1408 	      outbuf += 8 * sizeof(serpent_block_t);
1409 	      inbuf  += 8 * sizeof(serpent_block_t);
1410 	      did_use_neon = 1;
1411 	    }
1412 	}
1413 
1414       if (did_use_neon)
1415 	{
1416 	  /* serpent-neon assembly code does not use stack */
1417 	  if (nblocks == 0)
1418 	    burn_stack_depth = 0;
1419 	}
1420 
1421       /* Use generic code to handle smaller chunks... */
1422     }
1423 #endif
1424 
1425 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
1426   c->u_mode.ocb.data_nblocks = blkn;
1427 
1428   if (burn_stack_depth)
1429     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
1430 #endif
1431 
1432   return nblocks;
1433 }
1434 
1435 /* Bulk authentication of complete blocks in OCB mode. */
1436 static size_t
_gcry_serpent_ocb_auth(gcry_cipher_hd_t c,const void * abuf_arg,size_t nblocks)1437 _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
1438 			size_t nblocks)
1439 {
1440 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
1441   serpent_context_t *ctx = (void *)&c->context.c;
1442   const unsigned char *abuf = abuf_arg;
1443   int burn_stack_depth = 2 * sizeof(serpent_block_t);
1444   u64 blkn = c->u_mode.ocb.aad_nblocks;
1445 #else
1446   (void)c;
1447   (void)abuf_arg;
1448 #endif
1449 
1450 #ifdef USE_AVX2
1451   if (ctx->use_avx2)
1452     {
1453       int did_use_avx2 = 0;
1454       u64 Ls[16];
1455       unsigned int n = 16 - (blkn % 16);
1456       u64 *l;
1457       int i;
1458 
1459       if (nblocks >= 16)
1460 	{
1461 	  for (i = 0; i < 16; i += 8)
1462 	    {
1463 	      /* Use u64 to store pointers for x32 support (assembly function
1464 	       * assumes 64-bit pointers). */
1465 	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1466 	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1467 	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1468 	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
1469 	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1470 	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1471 	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1472 	    }
1473 
1474 	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
1475 	  l = &Ls[(15 + n) % 16];
1476 
1477 	  /* Process data in 16 block chunks. */
1478 	  while (nblocks >= 16)
1479 	    {
1480 	      blkn += 16;
1481 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1482 
1483 	      _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1484 					  c->u_mode.ocb.aad_sum, Ls);
1485 
1486 	      nblocks -= 16;
1487 	      abuf += 16 * sizeof(serpent_block_t);
1488 	      did_use_avx2 = 1;
1489 	    }
1490 	}
1491 
1492       if (did_use_avx2)
1493 	{
1494 	  /* serpent-avx2 assembly code does not use stack */
1495 	  if (nblocks == 0)
1496 	    burn_stack_depth = 0;
1497 	}
1498 
1499       /* Use generic code to handle smaller chunks... */
1500     }
1501 #endif
1502 
1503 #ifdef USE_SSE2
1504   {
1505     int did_use_sse2 = 0;
1506     u64 Ls[8];
1507     unsigned int n = 8 - (blkn % 8);
1508     u64 *l;
1509 
1510     if (nblocks >= 8)
1511       {
1512 	/* Use u64 to store pointers for x32 support (assembly function
1513 	* assumes 64-bit pointers). */
1514 	Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1515 	Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1516 	Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1517 	Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
1518 	Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1519 	Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1520 	Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1521 	l = &Ls[(7 + n) % 8];
1522 
1523 	/* Process data in 8 block chunks. */
1524 	while (nblocks >= 8)
1525 	  {
1526 	    blkn += 8;
1527 	    *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1528 
1529 	    _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1530 					c->u_mode.ocb.aad_sum, Ls);
1531 
1532 	    nblocks -= 8;
1533 	    abuf += 8 * sizeof(serpent_block_t);
1534 	    did_use_sse2 = 1;
1535 	  }
1536       }
1537 
1538     if (did_use_sse2)
1539       {
1540 	/* serpent-avx2 assembly code does not use stack */
1541 	if (nblocks == 0)
1542 	  burn_stack_depth = 0;
1543       }
1544 
1545     /* Use generic code to handle smaller chunks... */
1546   }
1547 #endif
1548 
1549 #ifdef USE_NEON
1550   if (ctx->use_neon)
1551     {
1552       int did_use_neon = 0;
1553       const void *Ls[8];
1554       unsigned int n = 8 - (blkn % 8);
1555       const void **l;
1556 
1557       if (nblocks >= 8)
1558 	{
1559 	  Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
1560 	  Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
1561 	  Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
1562 	  Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
1563 	  Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
1564 	  Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
1565 	  Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
1566 	  l = &Ls[(7 + n) % 8];
1567 
1568 	  /* Process data in 8 block chunks. */
1569 	  while (nblocks >= 8)
1570 	    {
1571 	      blkn += 8;
1572 	      *l = ocb_get_l(c, blkn - blkn % 8);
1573 
1574 	      _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
1575 					  c->u_mode.ocb.aad_sum, Ls);
1576 
1577 	      nblocks -= 8;
1578 	      abuf += 8 * sizeof(serpent_block_t);
1579 	      did_use_neon = 1;
1580 	    }
1581 	}
1582 
1583       if (did_use_neon)
1584 	{
1585 	  /* serpent-neon assembly code does not use stack */
1586 	  if (nblocks == 0)
1587 	    burn_stack_depth = 0;
1588 	}
1589 
1590       /* Use generic code to handle smaller chunks... */
1591     }
1592 #endif
1593 
1594 #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
1595   c->u_mode.ocb.aad_nblocks = blkn;
1596 
1597   if (burn_stack_depth)
1598     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
1599 #endif
1600 
1601   return nblocks;
1602 }
1603 
1604 
1605 
1606 /* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR
1607    encryption.  Returns NULL on success. */
1608 static const char*
selftest_ctr_128(void)1609 selftest_ctr_128 (void)
1610 {
1611   const int nblocks = 16+8+1;
1612   const int blocksize = sizeof(serpent_block_t);
1613   const int context_size = sizeof(serpent_context_t);
1614 
1615   return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
1616            &serpent_encrypt, nblocks, blocksize, context_size);
1617 }
1618 
1619 
1620 /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
1621    Returns NULL on success. */
1622 static const char*
selftest_cbc_128(void)1623 selftest_cbc_128 (void)
1624 {
1625   const int nblocks = 16+8+2;
1626   const int blocksize = sizeof(serpent_block_t);
1627   const int context_size = sizeof(serpent_context_t);
1628 
1629   return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
1630            &serpent_encrypt, nblocks, blocksize, context_size);
1631 }
1632 
1633 
1634 /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
1635    Returns NULL on success. */
1636 static const char*
selftest_cfb_128(void)1637 selftest_cfb_128 (void)
1638 {
1639   const int nblocks = 16+8+2;
1640   const int blocksize = sizeof(serpent_block_t);
1641   const int context_size = sizeof(serpent_context_t);
1642 
1643   return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
1644            &serpent_encrypt, nblocks, blocksize, context_size);
1645 }
1646 
1647 
1648 /* Serpent test.  */
1649 
1650 static const char *
serpent_test(void)1651 serpent_test (void)
1652 {
1653   serpent_context_t context;
1654   unsigned char scratch[16];
1655   unsigned int i;
1656   const char *r;
1657 
1658   static struct test
1659   {
1660     int key_length;
1661     unsigned char key[32];
1662     unsigned char text_plain[16];
1663     unsigned char text_cipher[16];
1664   } test_data[] =
1665     {
1666       {
1667 	16,
1668 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1669 	"\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E",
1670 	"\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D"
1671       },
1672       {
1673 	24,
1674 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1675 	"\x00\x00\x00\x00\x00\x00\x00\x00",
1676 	"\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E",
1677 	"\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9"
1678       },
1679       {
1680 	32,
1681 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1682 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1683 	"\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E",
1684 	"\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B"
1685       },
1686       {
1687 	32,
1688 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
1689 	"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
1690 	"\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00",
1691 	"\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C"
1692       },
1693       {
1694 	0
1695       },
1696     };
1697 
1698   for (i = 0; test_data[i].key_length; i++)
1699     {
1700       serpent_setkey_internal (&context, test_data[i].key,
1701                                test_data[i].key_length);
1702       serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
1703 
1704       if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
1705 	switch (test_data[i].key_length)
1706 	  {
1707 	  case 16:
1708 	    return "Serpent-128 test encryption failed.";
1709 	  case  24:
1710 	    return "Serpent-192 test encryption failed.";
1711 	  case 32:
1712 	    return "Serpent-256 test encryption failed.";
1713 	  }
1714 
1715     serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
1716     if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
1717       switch (test_data[i].key_length)
1718 	{
1719 	case 16:
1720 	  return "Serpent-128 test decryption failed.";
1721 	case  24:
1722 	  return "Serpent-192 test decryption failed.";
1723 	case 32:
1724 	  return "Serpent-256 test decryption failed.";
1725 	}
1726     }
1727 
1728   if ( (r = selftest_ctr_128 ()) )
1729     return r;
1730 
1731   if ( (r = selftest_cbc_128 ()) )
1732     return r;
1733 
1734   if ( (r = selftest_cfb_128 ()) )
1735     return r;
1736 
1737   return NULL;
1738 }
1739 
1740 
1741 static gcry_cipher_oid_spec_t serpent128_oids[] =
1742   {
1743     {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB },
1744     {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC },
1745     {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB },
1746     {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB },
1747     { NULL }
1748   };
1749 
1750 static gcry_cipher_oid_spec_t serpent192_oids[] =
1751   {
1752     {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB },
1753     {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC },
1754     {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB },
1755     {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB },
1756     { NULL }
1757   };
1758 
1759 static gcry_cipher_oid_spec_t serpent256_oids[] =
1760   {
1761     {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB },
1762     {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC },
1763     {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB },
1764     {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB },
1765     { NULL }
1766   };
1767 
1768 static const char *serpent128_aliases[] =
1769   {
1770     "SERPENT",
1771     "SERPENT-128",
1772     NULL
1773   };
1774 static const char *serpent192_aliases[] =
1775   {
1776     "SERPENT-192",
1777     NULL
1778   };
1779 static const char *serpent256_aliases[] =
1780   {
1781     "SERPENT-256",
1782     NULL
1783   };
1784 
1785 gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
1786   {
1787     GCRY_CIPHER_SERPENT128, {0, 0},
1788     "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128,
1789     sizeof (serpent_context_t),
1790     serpent_setkey, serpent_encrypt, serpent_decrypt
1791   };
1792 
1793 gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
1794   {
1795     GCRY_CIPHER_SERPENT192, {0, 0},
1796     "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192,
1797     sizeof (serpent_context_t),
1798     serpent_setkey, serpent_encrypt, serpent_decrypt
1799   };
1800 
1801 gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
1802   {
1803     GCRY_CIPHER_SERPENT256, {0, 0},
1804     "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256,
1805     sizeof (serpent_context_t),
1806     serpent_setkey, serpent_encrypt, serpent_decrypt
1807   };
1808