1 /* sm4.c  -  SM4 Cipher Algorithm
2  * Copyright (C) 2020 Alibaba Group.
3  * Copyright (C) 2020 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
4  * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5  *
6  * This file is part of Libgcrypt.
7  *
8  * Libgcrypt is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as
10  * published by the Free Software Foundation; either version 2.1 of
11  * the License, or (at your option) any later version.
12  *
13  * Libgcrypt is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <config.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "types.h"  /* for byte and u32 typedefs */
27 #include "bithelp.h"
28 #include "g10lib.h"
29 #include "cipher.h"
30 #include "bufhelp.h"
31 #include "cipher-internal.h"
32 #include "cipher-selftest.h"
33 
34 /* Helper macro to force alignment to 64 bytes.  */
35 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
36 # define ATTR_ALIGNED_64  __attribute__ ((aligned (64)))
37 #else
38 # define ATTR_ALIGNED_64
39 #endif
40 
41 /* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */
42 #undef USE_AESNI_AVX
43 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
44 # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
45      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
46 #  define USE_AESNI_AVX 1
47 # endif
48 #endif
49 
50 /* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX2 code. */
51 #undef USE_AESNI_AVX2
52 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
53 # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
54      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
55 #  define USE_AESNI_AVX2 1
56 # endif
57 #endif
58 
59 /* Assembly implementations use SystemV ABI, ABI conversion and additional
60  * stack to store XMM6-XMM15 needed on Win64. */
61 #undef ASM_FUNC_ABI
62 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
63 # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
64 #  define ASM_FUNC_ABI __attribute__((sysv_abi))
65 # else
66 #  define ASM_FUNC_ABI
67 # endif
68 #endif
69 
70 static const char *sm4_selftest (void);
71 
72 static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr,
73 			       void *outbuf_arg, const void *inbuf_arg,
74 			       size_t nblocks);
75 static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv,
76 			       void *outbuf_arg, const void *inbuf_arg,
77 			       size_t nblocks);
78 static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv,
79 			       void *outbuf_arg, const void *inbuf_arg,
80 			       size_t nblocks);
81 static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
82 				   const void *inbuf_arg, size_t nblocks,
83 				   int encrypt);
84 static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
85 				  size_t nblocks);
86 
87 typedef struct
88 {
89   u32 rkey_enc[32];
90   u32 rkey_dec[32];
91 #ifdef USE_AESNI_AVX
92   unsigned int use_aesni_avx:1;
93 #endif
94 #ifdef USE_AESNI_AVX2
95   unsigned int use_aesni_avx2:1;
96 #endif
97 } SM4_context;
98 
99 static const u32 fk[4] =
100 {
101   0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
102 };
103 
104 static struct
105 {
106   volatile u32 counter_head;
107   u32 cacheline_align[64 / 4 - 1];
108   byte S[256];
109   volatile u32 counter_tail;
110 } sbox_table ATTR_ALIGNED_64 =
111   {
112     0,
113     { 0, },
114     {
115       0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
116       0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
117       0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
118       0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
119       0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
120       0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
121       0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
122       0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
123       0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
124       0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
125       0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
126       0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
127       0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
128       0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
129       0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
130       0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
131       0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
132       0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
133       0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
134       0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
135       0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
136       0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
137       0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
138       0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
139       0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
140       0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
141       0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
142       0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
143       0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
144       0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
145       0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
146       0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
147     },
148     0
149   };
150 
151 static const u32 ck[] =
152 {
153   0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
154   0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
155   0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
156   0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
157   0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
158   0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
159   0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
160   0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
161 };
162 
163 #ifdef USE_AESNI_AVX
164 extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc,
165 					   u32 *rk_dec, const u32 *fk,
166 					   const u32 *ck) ASM_FUNC_ABI;
167 
168 extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out,
169 					const byte *in, byte *ctr) ASM_FUNC_ABI;
170 
171 extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out,
172 					const byte *in, byte *iv) ASM_FUNC_ABI;
173 
174 extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out,
175 					const byte *in, byte *iv) ASM_FUNC_ABI;
176 
177 extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc,
178 					unsigned char *out,
179 					const unsigned char *in,
180 					unsigned char *offset,
181 					unsigned char *checksum,
182 					const u64 Ls[8]) ASM_FUNC_ABI;
183 
184 extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec,
185 					unsigned char *out,
186 					const unsigned char *in,
187 					unsigned char *offset,
188 					unsigned char *checksum,
189 					const u64 Ls[8]) ASM_FUNC_ABI;
190 
191 extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc,
192 					 const unsigned char *abuf,
193 					 unsigned char *offset,
194 					 unsigned char *checksum,
195 					 const u64 Ls[8]) ASM_FUNC_ABI;
196 
197 extern unsigned int
198 _gcry_sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
199 				 unsigned int num_blks) ASM_FUNC_ABI;
200 
201 static inline unsigned int
sm4_aesni_avx_crypt_blk1_8(const u32 * rk,byte * out,const byte * in,unsigned int num_blks)202 sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
203 			   unsigned int num_blks)
204 {
205   return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
206 }
207 
208 #endif /* USE_AESNI_AVX */
209 
210 #ifdef USE_AESNI_AVX2
211 extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
212 					 const byte *in,
213 					 byte *ctr) ASM_FUNC_ABI;
214 
215 extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
216 					 const byte *in,
217 					 byte *iv) ASM_FUNC_ABI;
218 
219 extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
220 					 const byte *in,
221 					 byte *iv) ASM_FUNC_ABI;
222 
223 extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc,
224 					 unsigned char *out,
225 					 const unsigned char *in,
226 					 unsigned char *offset,
227 					 unsigned char *checksum,
228 					 const u64 Ls[16]) ASM_FUNC_ABI;
229 
230 extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec,
231 					 unsigned char *out,
232 					 const unsigned char *in,
233 					 unsigned char *offset,
234 					 unsigned char *checksum,
235 					 const u64 Ls[16]) ASM_FUNC_ABI;
236 
237 extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc,
238 					  const unsigned char *abuf,
239 					  unsigned char *offset,
240 					  unsigned char *checksum,
241 					  const u64 Ls[16]) ASM_FUNC_ABI;
242 #endif /* USE_AESNI_AVX2 */
243 
prefetch_sbox_table(void)244 static inline void prefetch_sbox_table(void)
245 {
246   const volatile byte *vtab = (void *)&sbox_table;
247 
248   /* Modify counters to trigger copy-on-write and unsharing if physical pages
249    * of look-up table are shared between processes.  Modifying counters also
250    * causes checksums for pages to change and hint same-page merging algorithm
251    * that these pages are frequently changing.  */
252   sbox_table.counter_head++;
253   sbox_table.counter_tail++;
254 
255   /* Prefetch look-up table to cache.  */
256   (void)vtab[0 * 32];
257   (void)vtab[1 * 32];
258   (void)vtab[2 * 32];
259   (void)vtab[3 * 32];
260   (void)vtab[4 * 32];
261   (void)vtab[5 * 32];
262   (void)vtab[6 * 32];
263   (void)vtab[7 * 32];
264   (void)vtab[8 * 32 - 1];
265 }
266 
sm4_t_non_lin_sub(u32 x)267 static inline u32 sm4_t_non_lin_sub(u32 x)
268 {
269   u32 out;
270 
271   out  = (u32)sbox_table.S[(x >> 0) & 0xff] << 0;
272   out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8;
273   out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16;
274   out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24;
275 
276   return out;
277 }
278 
sm4_key_lin_sub(u32 x)279 static inline u32 sm4_key_lin_sub(u32 x)
280 {
281   return x ^ rol(x, 13) ^ rol(x, 23);
282 }
283 
sm4_enc_lin_sub(u32 x)284 static inline u32 sm4_enc_lin_sub(u32 x)
285 {
286   u32 xrol2 = rol(x, 2);
287   return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24);
288 }
289 
sm4_key_sub(u32 x)290 static inline u32 sm4_key_sub(u32 x)
291 {
292   return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
293 }
294 
sm4_enc_sub(u32 x)295 static inline u32 sm4_enc_sub(u32 x)
296 {
297   return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
298 }
299 
300 static inline u32
sm4_round(const u32 x0,const u32 x1,const u32 x2,const u32 x3,const u32 rk)301 sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk)
302 {
303   return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
304 }
305 
306 static void
sm4_expand_key(SM4_context * ctx,const byte * key)307 sm4_expand_key (SM4_context *ctx, const byte *key)
308 {
309   u32 rk[4];
310   int i;
311 
312 #ifdef USE_AESNI_AVX
313   if (ctx->use_aesni_avx)
314     {
315       _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
316 				      fk, ck);
317       return;
318     }
319 #endif
320 
321   rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0];
322   rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1];
323   rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2];
324   rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3];
325 
326   for (i = 0; i < 32; i += 4)
327     {
328       rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
329       rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
330       rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
331       rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
332       ctx->rkey_enc[i + 0] = rk[0];
333       ctx->rkey_enc[i + 1] = rk[1];
334       ctx->rkey_enc[i + 2] = rk[2];
335       ctx->rkey_enc[i + 3] = rk[3];
336       ctx->rkey_dec[31 - i - 0] = rk[0];
337       ctx->rkey_dec[31 - i - 1] = rk[1];
338       ctx->rkey_dec[31 - i - 2] = rk[2];
339       ctx->rkey_dec[31 - i - 3] = rk[3];
340     }
341 
342   wipememory (rk, sizeof(rk));
343 }
344 
345 static gcry_err_code_t
sm4_setkey(void * context,const byte * key,const unsigned keylen,cipher_bulk_ops_t * bulk_ops)346 sm4_setkey (void *context, const byte *key, const unsigned keylen,
347             cipher_bulk_ops_t *bulk_ops)
348 {
349   SM4_context *ctx = context;
350   static int init = 0;
351   static const char *selftest_failed = NULL;
352   unsigned int hwf = _gcry_get_hw_features ();
353 
354   (void)hwf;
355 
356   if (!init)
357     {
358       init = 1;
359       selftest_failed = sm4_selftest();
360       if (selftest_failed)
361 	log_error("%s\n", selftest_failed);
362     }
363   if (selftest_failed)
364     return GPG_ERR_SELFTEST_FAILED;
365 
366   if (keylen != 16)
367     return GPG_ERR_INV_KEYLEN;
368 
369 #ifdef USE_AESNI_AVX
370   ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
371 #endif
372 #ifdef USE_AESNI_AVX2
373   ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
374 #endif
375 
376   /* Setup bulk encryption routines.  */
377   memset (bulk_ops, 0, sizeof(*bulk_ops));
378   bulk_ops->cbc_dec = _gcry_sm4_cbc_dec;
379   bulk_ops->cfb_dec = _gcry_sm4_cfb_dec;
380   bulk_ops->ctr_enc = _gcry_sm4_ctr_enc;
381   bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt;
382   bulk_ops->ocb_auth  = _gcry_sm4_ocb_auth;
383 
384   sm4_expand_key (ctx, key);
385   return 0;
386 }
387 
388 static unsigned int
sm4_do_crypt(const u32 * rk,byte * out,const byte * in)389 sm4_do_crypt (const u32 *rk, byte *out, const byte *in)
390 {
391   u32 x[4];
392   int i;
393 
394   x[0] = buf_get_be32(in + 0 * 4);
395   x[1] = buf_get_be32(in + 1 * 4);
396   x[2] = buf_get_be32(in + 2 * 4);
397   x[3] = buf_get_be32(in + 3 * 4);
398 
399   for (i = 0; i < 32; i += 4)
400     {
401       x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
402       x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
403       x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
404       x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
405     }
406 
407   buf_put_be32(out + 0 * 4, x[3 - 0]);
408   buf_put_be32(out + 1 * 4, x[3 - 1]);
409   buf_put_be32(out + 2 * 4, x[3 - 2]);
410   buf_put_be32(out + 3 * 4, x[3 - 3]);
411 
412   return /*burn_stack*/ 4*6+sizeof(void*)*4;
413 }
414 
415 static unsigned int
sm4_encrypt(void * context,byte * outbuf,const byte * inbuf)416 sm4_encrypt (void *context, byte *outbuf, const byte *inbuf)
417 {
418   SM4_context *ctx = context;
419 
420   prefetch_sbox_table ();
421 
422   return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf);
423 }
424 
425 static unsigned int
sm4_decrypt(void * context,byte * outbuf,const byte * inbuf)426 sm4_decrypt (void *context, byte *outbuf, const byte *inbuf)
427 {
428   SM4_context *ctx = context;
429 
430   prefetch_sbox_table ();
431 
432   return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf);
433 }
434 
435 static unsigned int
sm4_do_crypt_blks2(const u32 * rk,byte * out,const byte * in)436 sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in)
437 {
438   u32 x[4];
439   u32 y[4];
440   u32 k;
441   int i;
442 
443   /* Encrypts/Decrypts two blocks for higher instruction level
444    * parallelism. */
445 
446   x[0] = buf_get_be32(in + 0 * 4);
447   x[1] = buf_get_be32(in + 1 * 4);
448   x[2] = buf_get_be32(in + 2 * 4);
449   x[3] = buf_get_be32(in + 3 * 4);
450   y[0] = buf_get_be32(in + 4 * 4);
451   y[1] = buf_get_be32(in + 5 * 4);
452   y[2] = buf_get_be32(in + 6 * 4);
453   y[3] = buf_get_be32(in + 7 * 4);
454 
455   for (i = 0; i < 32; i += 4)
456     {
457       k = rk[i + 0];
458       x[0] = sm4_round(x[0], x[1], x[2], x[3], k);
459       y[0] = sm4_round(y[0], y[1], y[2], y[3], k);
460       k = rk[i + 1];
461       x[1] = sm4_round(x[1], x[2], x[3], x[0], k);
462       y[1] = sm4_round(y[1], y[2], y[3], y[0], k);
463       k = rk[i + 2];
464       x[2] = sm4_round(x[2], x[3], x[0], x[1], k);
465       y[2] = sm4_round(y[2], y[3], y[0], y[1], k);
466       k = rk[i + 3];
467       x[3] = sm4_round(x[3], x[0], x[1], x[2], k);
468       y[3] = sm4_round(y[3], y[0], y[1], y[2], k);
469     }
470 
471   buf_put_be32(out + 0 * 4, x[3 - 0]);
472   buf_put_be32(out + 1 * 4, x[3 - 1]);
473   buf_put_be32(out + 2 * 4, x[3 - 2]);
474   buf_put_be32(out + 3 * 4, x[3 - 3]);
475   buf_put_be32(out + 4 * 4, y[3 - 0]);
476   buf_put_be32(out + 5 * 4, y[3 - 1]);
477   buf_put_be32(out + 6 * 4, y[3 - 2]);
478   buf_put_be32(out + 7 * 4, y[3 - 3]);
479 
480   return /*burn_stack*/ 4*10+sizeof(void*)*4;
481 }
482 
483 static unsigned int
sm4_crypt_blocks(const u32 * rk,byte * out,const byte * in,unsigned int num_blks)484 sm4_crypt_blocks (const u32 *rk, byte *out, const byte *in,
485 		  unsigned int num_blks)
486 {
487   unsigned int burn_depth = 0;
488   unsigned int nburn;
489 
490   while (num_blks >= 2)
491     {
492       nburn = sm4_do_crypt_blks2 (rk, out, in);
493       burn_depth = nburn > burn_depth ? nburn : burn_depth;
494       out += 2 * 16;
495       in += 2 * 16;
496       num_blks -= 2;
497     }
498 
499   while (num_blks)
500     {
501       nburn = sm4_do_crypt (rk, out, in);
502       burn_depth = nburn > burn_depth ? nburn : burn_depth;
503       out += 16;
504       in += 16;
505       num_blks--;
506     }
507 
508   if (burn_depth)
509     burn_depth += sizeof(void *) * 5;
510   return burn_depth;
511 }
512 
513 /* Bulk encryption of complete blocks in CTR mode.  This function is only
514    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
515    of size 16. */
516 static void
_gcry_sm4_ctr_enc(void * context,unsigned char * ctr,void * outbuf_arg,const void * inbuf_arg,size_t nblocks)517 _gcry_sm4_ctr_enc(void *context, unsigned char *ctr,
518                   void *outbuf_arg, const void *inbuf_arg,
519                   size_t nblocks)
520 {
521   SM4_context *ctx = context;
522   byte *outbuf = outbuf_arg;
523   const byte *inbuf = inbuf_arg;
524   int burn_stack_depth = 0;
525 
526 #ifdef USE_AESNI_AVX2
527   if (ctx->use_aesni_avx2)
528     {
529       /* Process data in 16 block chunks. */
530       while (nblocks >= 16)
531         {
532           _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
533 
534           nblocks -= 16;
535           outbuf += 16 * 16;
536           inbuf += 16 * 16;
537         }
538     }
539 #endif
540 
541 #ifdef USE_AESNI_AVX
542   if (ctx->use_aesni_avx)
543     {
544       /* Process data in 8 block chunks. */
545       while (nblocks >= 8)
546         {
547           _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
548 
549           nblocks -= 8;
550           outbuf += 8 * 16;
551           inbuf += 8 * 16;
552         }
553     }
554 #endif
555 
556   /* Process remaining blocks. */
557   if (nblocks)
558     {
559       unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
560 				   unsigned int num_blks);
561       byte tmpbuf[16 * 8];
562       unsigned int tmp_used = 16;
563 
564       if (0)
565 	;
566 #ifdef USE_AESNI_AVX
567       else if (ctx->use_aesni_avx)
568 	{
569 	  crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
570 	}
571 #endif
572       else
573 	{
574 	  prefetch_sbox_table ();
575 	  crypt_blk1_8 = sm4_crypt_blocks;
576 	}
577 
578       /* Process remaining blocks. */
579       while (nblocks)
580 	{
581 	  size_t curr_blks = nblocks > 8 ? 8 : nblocks;
582 	  size_t i;
583 
584 	  if (curr_blks * 16 > tmp_used)
585 	    tmp_used = curr_blks * 16;
586 
587 	  cipher_block_cpy (tmpbuf + 0 * 16, ctr, 16);
588 	  for (i = 1; i < curr_blks; i++)
589 	    {
590 	      cipher_block_cpy (&tmpbuf[i * 16], ctr, 16);
591 	      cipher_block_add (&tmpbuf[i * 16], i, 16);
592 	    }
593 	  cipher_block_add (ctr, curr_blks, 16);
594 
595 	  burn_stack_depth = crypt_blk1_8 (ctx->rkey_enc, tmpbuf, tmpbuf,
596 					   curr_blks);
597 
598 	  for (i = 0; i < curr_blks; i++)
599 	    {
600 	      cipher_block_xor (outbuf, &tmpbuf[i * 16], inbuf, 16);
601 	      outbuf += 16;
602 	      inbuf += 16;
603 	    }
604 
605 	  nblocks -= curr_blks;
606 	}
607 
608       wipememory(tmpbuf, tmp_used);
609     }
610 
611   if (burn_stack_depth)
612     _gcry_burn_stack(burn_stack_depth);
613 }
614 
615 /* Bulk decryption of complete blocks in CBC mode.  This function is only
616    intended for the bulk encryption feature of cipher.c. */
617 static void
_gcry_sm4_cbc_dec(void * context,unsigned char * iv,void * outbuf_arg,const void * inbuf_arg,size_t nblocks)618 _gcry_sm4_cbc_dec(void *context, unsigned char *iv,
619                   void *outbuf_arg, const void *inbuf_arg,
620                   size_t nblocks)
621 {
622   SM4_context *ctx = context;
623   unsigned char *outbuf = outbuf_arg;
624   const unsigned char *inbuf = inbuf_arg;
625   int burn_stack_depth = 0;
626 
627 #ifdef USE_AESNI_AVX2
628   if (ctx->use_aesni_avx2)
629     {
630       /* Process data in 16 block chunks. */
631       while (nblocks >= 16)
632         {
633           _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
634 
635           nblocks -= 16;
636           outbuf += 16 * 16;
637           inbuf += 16 * 16;
638         }
639     }
640 #endif
641 
642 #ifdef USE_AESNI_AVX
643   if (ctx->use_aesni_avx)
644     {
645       /* Process data in 8 block chunks. */
646       while (nblocks >= 8)
647         {
648           _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
649 
650           nblocks -= 8;
651           outbuf += 8 * 16;
652           inbuf += 8 * 16;
653         }
654     }
655 #endif
656 
657   /* Process remaining blocks. */
658   if (nblocks)
659     {
660       unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
661 				   unsigned int num_blks);
662       unsigned char savebuf[16 * 8];
663       unsigned int tmp_used = 16;
664 
665       if (0)
666 	;
667 #ifdef USE_AESNI_AVX
668       else if (ctx->use_aesni_avx)
669 	{
670 	  crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
671 	}
672 #endif
673       else
674 	{
675 	  prefetch_sbox_table ();
676 	  crypt_blk1_8 = sm4_crypt_blocks;
677 	}
678 
679       /* Process remaining blocks. */
680       while (nblocks)
681 	{
682 	  size_t curr_blks = nblocks > 8 ? 8 : nblocks;
683 	  size_t i;
684 
685 	  if (curr_blks * 16 > tmp_used)
686 	    tmp_used = curr_blks * 16;
687 
688 	  burn_stack_depth = crypt_blk1_8 (ctx->rkey_dec, savebuf, inbuf,
689 					   curr_blks);
690 
691 	  for (i = 0; i < curr_blks; i++)
692 	    {
693 	      cipher_block_xor_n_copy_2(outbuf, &savebuf[i * 16], iv, inbuf,
694 					16);
695 	      outbuf += 16;
696 	      inbuf += 16;
697 	    }
698 
699 	  nblocks -= curr_blks;
700 	}
701 
702       wipememory(savebuf, tmp_used);
703     }
704 
705   if (burn_stack_depth)
706     _gcry_burn_stack(burn_stack_depth);
707 }
708 
709 /* Bulk decryption of complete blocks in CFB mode.  This function is only
710    intended for the bulk encryption feature of cipher.c. */
711 static void
_gcry_sm4_cfb_dec(void * context,unsigned char * iv,void * outbuf_arg,const void * inbuf_arg,size_t nblocks)712 _gcry_sm4_cfb_dec(void *context, unsigned char *iv,
713                   void *outbuf_arg, const void *inbuf_arg,
714                   size_t nblocks)
715 {
716   SM4_context *ctx = context;
717   unsigned char *outbuf = outbuf_arg;
718   const unsigned char *inbuf = inbuf_arg;
719   int burn_stack_depth = 0;
720 
721 #ifdef USE_AESNI_AVX2
722   if (ctx->use_aesni_avx2)
723     {
724       /* Process data in 16 block chunks. */
725       while (nblocks >= 16)
726         {
727           _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
728 
729           nblocks -= 16;
730           outbuf += 16 * 16;
731           inbuf += 16 * 16;
732         }
733     }
734 #endif
735 
736 #ifdef USE_AESNI_AVX
737   if (ctx->use_aesni_avx)
738     {
739       /* Process data in 8 block chunks. */
740       while (nblocks >= 8)
741         {
742           _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
743 
744           nblocks -= 8;
745           outbuf += 8 * 16;
746           inbuf += 8 * 16;
747         }
748     }
749 #endif
750 
751   /* Process remaining blocks. */
752   if (nblocks)
753     {
754       unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
755 				   unsigned int num_blks);
756       unsigned char ivbuf[16 * 8];
757       unsigned int tmp_used = 16;
758 
759       if (0)
760 	;
761 #ifdef USE_AESNI_AVX
762       else if (ctx->use_aesni_avx)
763 	{
764 	  crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
765 	}
766 #endif
767       else
768 	{
769 	  prefetch_sbox_table ();
770 	  crypt_blk1_8 = sm4_crypt_blocks;
771 	}
772 
773       /* Process remaining blocks. */
774       while (nblocks)
775 	{
776 	  size_t curr_blks = nblocks > 8 ? 8 : nblocks;
777 	  size_t i;
778 
779 	  if (curr_blks * 16 > tmp_used)
780 	    tmp_used = curr_blks * 16;
781 
782 	  cipher_block_cpy (&ivbuf[0 * 16], iv, 16);
783 	  for (i = 1; i < curr_blks; i++)
784 	    cipher_block_cpy (&ivbuf[i * 16], &inbuf[(i - 1) * 16], 16);
785 	  cipher_block_cpy (iv, &inbuf[(i - 1) * 16], 16);
786 
787 	  burn_stack_depth = crypt_blk1_8 (ctx->rkey_enc, ivbuf, ivbuf,
788 					   curr_blks);
789 
790 	  for (i = 0; i < curr_blks; i++)
791 	    {
792 	      cipher_block_xor (outbuf, inbuf, &ivbuf[i * 16], 16);
793 	      outbuf += 16;
794 	      inbuf += 16;
795 	    }
796 
797 	  nblocks -= curr_blks;
798 	}
799 
800       wipememory(ivbuf, tmp_used);
801     }
802 
803   if (burn_stack_depth)
804     _gcry_burn_stack(burn_stack_depth);
805 }
806 
807 /* Bulk encryption/decryption of complete blocks in OCB mode. */
808 static size_t
_gcry_sm4_ocb_crypt(gcry_cipher_hd_t c,void * outbuf_arg,const void * inbuf_arg,size_t nblocks,int encrypt)809 _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
810 		     const void *inbuf_arg, size_t nblocks, int encrypt)
811 {
812   SM4_context *ctx = (void *)&c->context.c;
813   unsigned char *outbuf = outbuf_arg;
814   const unsigned char *inbuf = inbuf_arg;
815   u64 blkn = c->u_mode.ocb.data_nblocks;
816   int burn_stack_depth = 0;
817 
818 #ifdef USE_AESNI_AVX2
819   if (ctx->use_aesni_avx2)
820     {
821       u64 Ls[16];
822       unsigned int n = 16 - (blkn % 16);
823       u64 *l;
824       int i;
825 
826       if (nblocks >= 16)
827 	{
828 	  for (i = 0; i < 16; i += 8)
829 	    {
830 	      /* Use u64 to store pointers for x32 support (assembly function
831 	       * assumes 64-bit pointers). */
832 	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
833 	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
834 	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
835 	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
836 	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
837 	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
838 	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
839 	    }
840 
841 	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
842 	  l = &Ls[(15 + n) % 16];
843 
844 	  /* Process data in 16 block chunks. */
845 	  while (nblocks >= 16)
846 	    {
847 	      blkn += 16;
848 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
849 
850 	      if (encrypt)
851 		_gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
852 					     c->u_iv.iv, c->u_ctr.ctr, Ls);
853 	      else
854 		_gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
855 					     c->u_iv.iv, c->u_ctr.ctr, Ls);
856 
857 	      nblocks -= 16;
858 	      outbuf += 16 * 16;
859 	      inbuf += 16 * 16;
860 	    }
861 	}
862     }
863 #endif
864 
865 #ifdef USE_AESNI_AVX
866   if (ctx->use_aesni_avx)
867     {
868       u64 Ls[8];
869       unsigned int n = 8 - (blkn % 8);
870       u64 *l;
871 
872       if (nblocks >= 8)
873 	{
874 	  /* Use u64 to store pointers for x32 support (assembly function
875 	   * assumes 64-bit pointers). */
876 	  Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
877 	  Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
878 	  Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
879 	  Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
880 	  Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
881 	  Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
882 	  Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
883 	  Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
884 	  l = &Ls[(7 + n) % 8];
885 
886 	  /* Process data in 8 block chunks. */
887 	  while (nblocks >= 8)
888 	    {
889 	      blkn += 8;
890 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
891 
892 	      if (encrypt)
893 		_gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
894 					    c->u_iv.iv, c->u_ctr.ctr, Ls);
895 	      else
896 		_gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
897 					    c->u_iv.iv, c->u_ctr.ctr, Ls);
898 
899 	      nblocks -= 8;
900 	      outbuf += 8 * 16;
901 	      inbuf += 8 * 16;
902 	    }
903 	}
904     }
905 #endif
906 
907   if (nblocks)
908     {
909       unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
910 				   unsigned int num_blks);
911       const u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec;
912       unsigned char tmpbuf[16 * 8];
913       unsigned int tmp_used = 16;
914 
915       if (0)
916 	;
917 #ifdef USE_AESNI_AVX
918       else if (ctx->use_aesni_avx)
919 	{
920 	  crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
921 	}
922 #endif
923       else
924 	{
925 	  prefetch_sbox_table ();
926 	  crypt_blk1_8 = sm4_crypt_blocks;
927 	}
928 
929       while (nblocks)
930 	{
931 	  size_t curr_blks = nblocks > 8 ? 8 : nblocks;
932 	  size_t i;
933 
934 	  if (curr_blks * 16 > tmp_used)
935 	    tmp_used = curr_blks * 16;
936 
937 	  for (i = 0; i < curr_blks; i++)
938 	    {
939 	      const unsigned char *l = ocb_get_l(c, ++blkn);
940 
941 	      /* Checksum_i = Checksum_{i-1} xor P_i  */
942 	      if (encrypt)
943 		cipher_block_xor_1(c->u_ctr.ctr, &inbuf[i * 16], 16);
944 
945 	      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
946 	      cipher_block_xor_2dst (&tmpbuf[i * 16], c->u_iv.iv, l, 16);
947 	      cipher_block_xor (&outbuf[i * 16], &inbuf[i * 16],
948 				c->u_iv.iv, 16);
949 	    }
950 
951 	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
952 	  crypt_blk1_8 (rk, outbuf, outbuf, curr_blks);
953 
954 	  for (i = 0; i < curr_blks; i++)
955 	    {
956 	      cipher_block_xor_1 (&outbuf[i * 16], &tmpbuf[i * 16], 16);
957 
958 	      /* Checksum_i = Checksum_{i-1} xor P_i  */
959 	      if (!encrypt)
960 		  cipher_block_xor_1(c->u_ctr.ctr, &outbuf[i * 16], 16);
961 	    }
962 
963 	  outbuf += curr_blks * 16;
964 	  inbuf  += curr_blks * 16;
965 	  nblocks -= curr_blks;
966 	}
967 
968       wipememory(tmpbuf, tmp_used);
969     }
970 
971   c->u_mode.ocb.data_nblocks = blkn;
972 
973   if (burn_stack_depth)
974     _gcry_burn_stack(burn_stack_depth);
975 
976   return 0;
977 }
978 
979 /* Bulk authentication of complete blocks in OCB mode. */
980 static size_t
_gcry_sm4_ocb_auth(gcry_cipher_hd_t c,const void * abuf_arg,size_t nblocks)981 _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
982 {
983   SM4_context *ctx = (void *)&c->context.c;
984   const unsigned char *abuf = abuf_arg;
985   u64 blkn = c->u_mode.ocb.aad_nblocks;
986 
987 #ifdef USE_AESNI_AVX2
988   if (ctx->use_aesni_avx2)
989     {
990       u64 Ls[16];
991       unsigned int n = 16 - (blkn % 16);
992       u64 *l;
993       int i;
994 
995       if (nblocks >= 16)
996 	{
997 	  for (i = 0; i < 16; i += 8)
998 	    {
999 	      /* Use u64 to store pointers for x32 support (assembly function
1000 	       * assumes 64-bit pointers). */
1001 	      Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1002 	      Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1003 	      Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1004 	      Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
1005 	      Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1006 	      Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1007 	      Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1008 	    }
1009 
1010 	  Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
1011 	  l = &Ls[(15 + n) % 16];
1012 
1013 	  /* Process data in 16 block chunks. */
1014 	  while (nblocks >= 16)
1015 	    {
1016 	      blkn += 16;
1017 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
1018 
1019 	      _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf,
1020 					    c->u_mode.ocb.aad_offset,
1021 					    c->u_mode.ocb.aad_sum, Ls);
1022 
1023 	      nblocks -= 16;
1024 	      abuf += 16 * 16;
1025 	    }
1026 	}
1027     }
1028 #endif
1029 
1030 #ifdef USE_AESNI_AVX
1031   if (ctx->use_aesni_avx)
1032     {
1033       u64 Ls[8];
1034       unsigned int n = 8 - (blkn % 8);
1035       u64 *l;
1036 
1037       if (nblocks >= 8)
1038 	{
1039 	  /* Use u64 to store pointers for x32 support (assembly function
1040 	    * assumes 64-bit pointers). */
1041 	  Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1042 	  Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1043 	  Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1044 	  Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
1045 	  Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1046 	  Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
1047 	  Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
1048 	  Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
1049 	  l = &Ls[(7 + n) % 8];
1050 
1051 	  /* Process data in 8 block chunks. */
1052 	  while (nblocks >= 8)
1053 	    {
1054 	      blkn += 8;
1055 	      *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
1056 
1057 	      _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf,
1058 					   c->u_mode.ocb.aad_offset,
1059 					   c->u_mode.ocb.aad_sum, Ls);
1060 
1061 	      nblocks -= 8;
1062 	      abuf += 8 * 16;
1063 	    }
1064 	}
1065     }
1066 #endif
1067 
1068   if (nblocks)
1069     {
1070       unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
1071 				   unsigned int num_blks);
1072       unsigned char tmpbuf[16 * 8];
1073       unsigned int tmp_used = 16;
1074 
1075       if (0)
1076 	;
1077 #ifdef USE_AESNI_AVX
1078       else if (ctx->use_aesni_avx)
1079 	{
1080 	  crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
1081 	}
1082 #endif
1083       else
1084 	{
1085 	  prefetch_sbox_table ();
1086 	  crypt_blk1_8 = sm4_crypt_blocks;
1087 	}
1088 
1089       while (nblocks)
1090 	{
1091 	  size_t curr_blks = nblocks > 8 ? 8 : nblocks;
1092 	  size_t i;
1093 
1094 	  if (curr_blks * 16 > tmp_used)
1095 	    tmp_used = curr_blks * 16;
1096 
1097 	  for (i = 0; i < curr_blks; i++)
1098 	    {
1099 	      const unsigned char *l = ocb_get_l(c, ++blkn);
1100 
1101 	      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
1102 	      cipher_block_xor_2dst (&tmpbuf[i * 16],
1103 				     c->u_mode.ocb.aad_offset, l, 16);
1104 	      cipher_block_xor_1 (&tmpbuf[i * 16], &abuf[i * 16], 16);
1105 	    }
1106 
1107 	  /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
1108 	  crypt_blk1_8 (ctx->rkey_enc, tmpbuf, tmpbuf, curr_blks);
1109 
1110 	  for (i = 0; i < curr_blks; i++)
1111 	    {
1112 	      cipher_block_xor_1 (c->u_mode.ocb.aad_sum, &tmpbuf[i * 16], 16);
1113 	    }
1114 
1115 	  abuf += curr_blks * 16;
1116 	  nblocks -= curr_blks;
1117 	}
1118 
1119       wipememory(tmpbuf, tmp_used);
1120     }
1121 
1122   c->u_mode.ocb.aad_nblocks = blkn;
1123 
1124   return 0;
1125 }
1126 
1127 /* Run the self-tests for SM4-CTR, tests IV increment of bulk CTR
1128    encryption.  Returns NULL on success. */
1129 static const char*
selftest_ctr_128(void)1130 selftest_ctr_128 (void)
1131 {
1132   const int nblocks = 16 - 1;
1133   const int blocksize = 16;
1134   const int context_size = sizeof(SM4_context);
1135 
1136   return _gcry_selftest_helper_ctr("SM4", &sm4_setkey,
1137            &sm4_encrypt, nblocks, blocksize, context_size);
1138 }
1139 
1140 /* Run the self-tests for SM4-CBC, tests bulk CBC decryption.
1141    Returns NULL on success. */
1142 static const char*
selftest_cbc_128(void)1143 selftest_cbc_128 (void)
1144 {
1145   const int nblocks = 16 - 1;
1146   const int blocksize = 16;
1147   const int context_size = sizeof(SM4_context);
1148 
1149   return _gcry_selftest_helper_cbc("SM4", &sm4_setkey,
1150            &sm4_encrypt, nblocks, blocksize, context_size);
1151 }
1152 
1153 /* Run the self-tests for SM4-CFB, tests bulk CFB decryption.
1154    Returns NULL on success. */
1155 static const char*
selftest_cfb_128(void)1156 selftest_cfb_128 (void)
1157 {
1158   const int nblocks = 16 - 1;
1159   const int blocksize = 16;
1160   const int context_size = sizeof(SM4_context);
1161 
1162   return _gcry_selftest_helper_cfb("SM4", &sm4_setkey,
1163            &sm4_encrypt, nblocks, blocksize, context_size);
1164 }
1165 
1166 static const char *
sm4_selftest(void)1167 sm4_selftest (void)
1168 {
1169   SM4_context ctx;
1170   byte scratch[16];
1171   const char *r;
1172 
1173   static const byte plaintext[16] = {
1174     0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1175     0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
1176   };
1177   static const byte key[16] = {
1178     0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
1179     0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
1180   };
1181   static const byte ciphertext[16] = {
1182     0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E,
1183     0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46
1184   };
1185 
1186   memset (&ctx, 0, sizeof(ctx));
1187 
1188   sm4_expand_key (&ctx, key);
1189   sm4_encrypt (&ctx, scratch, plaintext);
1190   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
1191     return "SM4 test encryption failed.";
1192   sm4_decrypt (&ctx, scratch, scratch);
1193   if (memcmp (scratch, plaintext, sizeof (plaintext)))
1194     return "SM4 test decryption failed.";
1195 
1196   if ( (r = selftest_ctr_128 ()) )
1197     return r;
1198 
1199   if ( (r = selftest_cbc_128 ()) )
1200     return r;
1201 
1202   if ( (r = selftest_cfb_128 ()) )
1203     return r;
1204 
1205   return NULL;
1206 }
1207 
1208 static gpg_err_code_t
run_selftests(int algo,int extended,selftest_report_func_t report)1209 run_selftests (int algo, int extended, selftest_report_func_t report)
1210 {
1211   const char *what;
1212   const char *errtxt;
1213 
1214   (void)extended;
1215 
1216   if (algo != GCRY_CIPHER_SM4)
1217     return GPG_ERR_CIPHER_ALGO;
1218 
1219   what = "selftest";
1220   errtxt = sm4_selftest ();
1221   if (errtxt)
1222     goto failed;
1223 
1224   return 0;
1225 
1226  failed:
1227   if (report)
1228     report ("cipher", GCRY_CIPHER_SM4, what, errtxt);
1229   return GPG_ERR_SELFTEST_FAILED;
1230 }
1231 
1232 
1233 static gcry_cipher_oid_spec_t sm4_oids[] =
1234   {
1235     { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB },
1236     { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC },
1237     { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB },
1238     { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB },
1239     { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR },
1240     { NULL }
1241   };
1242 
1243 gcry_cipher_spec_t _gcry_cipher_spec_sm4 =
1244   {
1245     GCRY_CIPHER_SM4, {0, 0},
1246     "SM4", NULL, sm4_oids, 16, 128,
1247     sizeof (SM4_context),
1248     sm4_setkey, sm4_encrypt, sm4_decrypt,
1249     NULL, NULL,
1250     run_selftests
1251   };
1252