1;;
2;; Copyright (c) 2018-2020, Intel Corporation
3;;
4;; Redistribution and use in source and binary forms, with or without
5;; modification, are permitted provided that the following conditions are met:
6;;
7;;     * Redistributions of source code must retain the above copyright notice,
8;;       this list of conditions and the following disclaimer.
9;;     * Redistributions in binary form must reproduce the above copyright
10;;       notice, this list of conditions and the following disclaimer in the
11;;       documentation and/or other materials provided with the distribution.
12;;     * Neither the name of Intel Corporation nor the names of its contributors
13;;       may be used to endorse or promote products derived from this software
14;;       without specific prior written permission.
15;;
16;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26;;
27
28%include "include/os.asm"
29%define NO_AESNI_RENAME
30%include "include/aesni_emu.inc"
31%include "include/clear_regs.asm"
32
33;;; Routines to generate subkeys for AES-CMAC.
34;;; See RFC 4493 for more details.
35
36;; In System V AMD64 ABI
37;;      callee saves: RBX, RBP, R12-R15
38;; Windows x64 ABI
39;;      callee saves: RBX, RBP, RDI, RSI, RSP, R12-R15
40;;
41;; Registers:           RAX RBX RCX RDX RBP RSI RDI R8  R9  R10 R11 R12 R13 R14 R15
42;;                      -----------------------------------------------------------
43;; Windows clobbers:
44;; Windows preserves:   RAX RBX RCX RDX RBP RSI RDI R8  R9  R10 R11 R12 R13 R14 R15
45;;                      -----------------------------------------------------------
46;; Linux clobbers:
47;; Linux preserves:     RAX RBX RCX RDX RBP RSI RDI R8  R9  R10 R11 R12 R13 R14 R15
48;;                      -----------------------------------------------------------
49;;
50;; Linux/Windows clobbers: xmm0, xmm1, xmm2
51;;
52
53%ifdef LINUX
54%define arg1    rdi
55%define arg2    rsi
56%define arg3    rdx
57%define arg4    rcx
58%define arg5    r8
59%else
60%define arg1    rcx
61%define arg2    rdx
62%define arg3    r8
63%define arg4    r9
64%define arg5    [rsp + 5*8]
65%endif
66
67%define KEY_EXP arg1
68%define KEY1    arg2
69%define KEY2    arg3
70
71%define XL      xmm0
72%define XKEY1   xmm1
73%define XKEY2   xmm2
74
75
76section .data
77default rel
78
79align 16
80xmm_bit127:
81        ;ddq 0x80000000000000000000000000000000
82        dq 0x0000000000000000, 0x8000000000000000
83
84align 16
85xmm_bit63:
86        ;ddq 0x00000000000000008000000000000000
87        dq 0x8000000000000000, 0x0000000000000000
88
89align 16
90xmm_bit64:
91        ;ddq 0x00000000000000010000000000000000
92        dq 0x0000000000000000, 0x0000000000000001
93
94align 16
95const_Rb:
96        ;ddq 0x00000000000000000000000000000087
97        dq 0x0000000000000087, 0x0000000000000000
98
99align 16
100byteswap_const:
101        ;DDQ 0x000102030405060708090A0B0C0D0E0F
102        dq 0x08090A0B0C0D0E0F, 0x0001020304050607
103
104section .text
105
106;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
107;;;
108;;; void aes_cmac_subkey_gen(const void *key_exp, void *key1, void *key2)
109;;;
110;;; key_exp : IN  : address of expanded encryption key structure
111;;; key1    : OUT : address to store subkey 1 (16 bytes)
112;;; key2    : OUT : address to store subkey 2 (16 bytes)
113;;;
114;;; RFC 4493 Figure 2.2 describing function operations at highlevel
115;;;
116;;; ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
117;;; +                    Algorithm Generate_Subkey                       +
118;;; ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
119;;; +                                                                    +
120;;; +   Input    : K  (128/256-bit key)                                  +
121;;; +   Output   : K1 (128-bit first subkey)                             +
122;;; +              K2 (128-bit second subkey)                            +
123;;; +--------------------------------------------------------------------+
124;;; +                                                                    +
125;;; +   Constants: const_Zero is 0x00000000000000000000000000000000      +
126;;; +              const_Rb   is 0x00000000000000000000000000000087      +
127;;; +   Variables: L          for output of AES-128/256 applied to 0^128 +
128;;; +                                                                    +
129;;; +   Step 1.  L := AES-128/256(K, const_Zero) ;                       +
130;;; +   Step 2.  if MSB(L) is equal to 0                                 +
131;;; +            then    K1 := L << 1 ;                                  +
132;;; +            else    K1 := (L << 1) XOR const_Rb ;                   +
133;;; +   Step 3.  if MSB(K1) is equal to 0                                +
134;;; +            then    K2 := K1 << 1 ;                                 +
135;;; +            else    K2 := (K1 << 1) XOR const_Rb ;                  +
136;;; +   Step 4.  return K1, K2                        ;                  +
137;;; +                                                                    +
138;;; ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
139
140%macro AES_CMAC_SUBKEY_GEN_SSE 1-2
141%define %%NROUNDS       %1
142%define %%ARCH          %2
143
144%ifdef SAFE_PARAM
145        cmp     KEY_EXP, 0
146        jz      %%_aes_cmac_subkey_gen_sse_return
147        cmp     KEY1, 0
148        jz      %%_aes_cmac_subkey_gen_sse_return
149        cmp     KEY2, 0
150        jz      %%_aes_cmac_subkey_gen_sse_return
151%endif
152
153%ifidn %%ARCH, no_aesni
154%define AESENC          EMULATE_AESENC
155%define AESENCLAST      EMULATE_AESENCLAST
156%else
157%define AESENC          aesenc
158%define AESENCLAST      aesenclast
159%endif
160
161        ;; Step 1.  L := AES-128(K, const_Zero) ;
162        movdqa          XL, [KEY_EXP + 16*0]    ; 0. ARK xor const_Zero
163        AESENC          XL, [KEY_EXP + 16*1]    ; 1. ENC
164        AESENC          XL, [KEY_EXP + 16*2]    ; 2. ENC
165        AESENC          XL, [KEY_EXP + 16*3]    ; 3. ENC
166        AESENC          XL, [KEY_EXP + 16*4]    ; 4. ENC
167        AESENC          XL, [KEY_EXP + 16*5]    ; 5. ENC
168        AESENC          XL, [KEY_EXP + 16*6]    ; 6. ENC
169        AESENC          XL, [KEY_EXP + 16*7]    ; 7. ENC
170        AESENC          XL, [KEY_EXP + 16*8]    ; 8. ENC
171        AESENC          XL, [KEY_EXP + 16*9]    ; 9. ENC
172%if %%NROUNDS == 13     ;; CMAC-256
173        AESENC          XL, [KEY_EXP + 16*10]   ; 10. ENC
174        AESENC          XL, [KEY_EXP + 16*11]   ; 11. ENC
175        AESENC          XL, [KEY_EXP + 16*12]   ; 12. ENC
176        AESENC          XL, [KEY_EXP + 16*13]   ; 13. ENC
177        AESENCLAST      XL, [KEY_EXP + 16*14]   ; 14. ENC
178%else                   ;; CMAC-128
179        AESENCLAST      XL, [KEY_EXP + 16*10]   ; 10. ENC
180%endif
181
182        ;; Step 2.  if MSB(L) is equal to 0
183        ;;          then    K1 := L << 1 ;
184        ;;          else    K1 := (L << 1) XOR const_Rb ;
185        pshufb          XL, [rel byteswap_const]
186        movdqa          XKEY1, XL
187        psllq           XKEY1, 1
188        ptest           XL, [rel xmm_bit63]
189        jz              %%_K1_no_carry_bit_sse
190        ;; set carry bit
191        por             XKEY1, [rel xmm_bit64]
192%%_K1_no_carry_bit_sse:
193        ptest           XL, [rel xmm_bit127]
194        jz              %%_K1_msb_is_zero_sse
195        ;; XOR const_Rb
196        pxor            XKEY1, [rel const_Rb]
197%%_K1_msb_is_zero_sse:
198
199        ;; Step 3.  if MSB(K1) is equal to 0
200        ;;          then    K2 := K1 << 1 ;
201        ;;          else    K2 := (K1 << 1) XOR const_Rb ;
202        movdqa          XKEY2, XKEY1
203        psllq           XKEY2, 1
204        ptest           XKEY1, [rel xmm_bit63]
205        jz              %%_K2_no_carry_bit_sse
206        ;; set carry bit
207        por             XKEY2, [rel xmm_bit64]
208%%_K2_no_carry_bit_sse:
209        ptest           XKEY1, [rel xmm_bit127]
210        jz              %%_K2_msb_is_zero_sse
211        ;; XOR const_Rb
212        pxor            XKEY2, [rel const_Rb]
213%%_K2_msb_is_zero_sse:
214
215        ;; Step 4.  return K1, K2
216        pshufb          XKEY1, [rel byteswap_const]
217        pshufb          XKEY2, [rel byteswap_const]
218        movdqu          [KEY1], XKEY1
219        movdqu          [KEY2], XKEY2
220
221%%_aes_cmac_subkey_gen_sse_return:
222
223%ifdef SAFE_DATA
224        clear_scratch_gps_asm
225        clear_scratch_xmms_sse_asm
226%endif
227%endmacro
228
229
230%macro AES_CMAC_SUBKEY_GEN_AVX 1
231%define %%NROUNDS       %1
232
233%ifdef SAFE_PARAM
234        cmp     KEY_EXP, 0
235        jz      %%_aes_cmac_subkey_gen_avx_return
236        cmp     KEY1, 0
237        jz      %%_aes_cmac_subkey_gen_avx_return
238        cmp     KEY2, 0
239        jz      %%_aes_cmac_subkey_gen_avx_return
240%endif
241
242        ;; Step 1.  L := AES-128(K, const_Zero) ;
243        vmovdqa         XL, [KEY_EXP + 16*0]        ; 0. ARK xor const_Zero
244        vaesenc         XL, [KEY_EXP + 16*1]        ; 1. ENC
245        vaesenc         XL, [KEY_EXP + 16*2]        ; 2. ENC
246        vaesenc         XL, [KEY_EXP + 16*3]        ; 3. ENC
247        vaesenc         XL, [KEY_EXP + 16*4]        ; 4. ENC
248        vaesenc         XL, [KEY_EXP + 16*5]        ; 5. ENC
249        vaesenc         XL, [KEY_EXP + 16*6]        ; 6. ENC
250        vaesenc         XL, [KEY_EXP + 16*7]        ; 7. ENC
251        vaesenc         XL, [KEY_EXP + 16*8]        ; 8. ENC
252        vaesenc         XL, [KEY_EXP + 16*9]        ; 9. ENC
253%if %%NROUNDS == 13     ;; CMAC-256
254        vaesenc         XL, [KEY_EXP + 16*10]       ; 10. ENC
255        vaesenc         XL, [KEY_EXP + 16*11]       ; 11. ENC
256        vaesenc         XL, [KEY_EXP + 16*12]       ; 12. ENC
257        vaesenc         XL, [KEY_EXP + 16*13]       ; 13. ENC
258        vaesenclast     XL, [KEY_EXP + 16*14]       ; 14. ENC
259%else                   ;; CMAC-128
260        vaesenclast     XL, [KEY_EXP + 16*10]       ; 10. ENC
261%endif
262
263        ;; Step 2.  if MSB(L) is equal to 0
264        ;;          then    K1 := L << 1 ;
265        ;;          else    K1 := (L << 1) XOR const_Rb ;
266        vpshufb         XL, [rel byteswap_const]
267        vmovdqa         XKEY1, XL
268        vpsllq          XKEY1, 1
269        vptest          XL, [rel xmm_bit63]
270        jz              %%_K1_no_carry_bit_avx
271        ;; set carry bit
272        vpor            XKEY1, [rel xmm_bit64]
273%%_K1_no_carry_bit_avx:
274        vptest          XL, [rel xmm_bit127]
275        jz              %%_K1_msb_is_zero_avx
276        ;; XOR const_Rb
277        vpxor           XKEY1, [rel const_Rb]
278%%_K1_msb_is_zero_avx:
279
280        ;; Step 3.  if MSB(K1) is equal to 0
281        ;;          then    K2 := K1 << 1 ;
282        ;;          else    K2 := (K1 << 1) XOR const_Rb ;
283        vmovdqa         XKEY2, XKEY1
284        vpsllq          XKEY2, 1
285        vptest          XKEY1, [rel xmm_bit63]
286        jz              %%_K2_no_carry_bit_avx
287        ;; set carry bit
288        vpor            XKEY2, [rel xmm_bit64]
289%%_K2_no_carry_bit_avx:
290        vptest          XKEY1, [rel xmm_bit127]
291        jz              %%_K2_msb_is_zero_avx
292        ;; XOR const_Rb
293        vpxor           XKEY2, [rel const_Rb]
294%%_K2_msb_is_zero_avx:
295
296        ;; Step 4.  return K1, K2
297        vpshufb         XKEY1, [rel byteswap_const]
298        vpshufb         XKEY2, [rel byteswap_const]
299        vmovdqu         [KEY1], XKEY1
300        vmovdqu         [KEY2], XKEY2
301
302%%_aes_cmac_subkey_gen_avx_return:
303
304%ifdef SAFE_DATA
305        clear_scratch_gps_asm
306        clear_scratch_xmms_avx_asm
307%endif
308%endmacro
309
310;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
311;;;
312;;; void aes_cmac_subkey_gen_sse(const void *key_exp, void *key1, void *key2)
313;;;
314;;; key_exp : IN  : address of expanded encryption key structure (AES 128)
315;;; key1    : OUT : address to store subkey 1 (AES128 - 16 bytes)
316;;; key2    : OUT : address to store subkey 2 (AES128 - 16 bytes)
317;;;
318;;; See aes_cmac_subkey_gen() above for operation details
319MKGLOBAL(aes_cmac_subkey_gen_sse,function,)
320align 32
321aes_cmac_subkey_gen_sse:
322        AES_CMAC_SUBKEY_GEN_SSE 9
323        ret
324
325MKGLOBAL(aes_cmac_subkey_gen_sse_no_aesni,function,)
326align 32
327aes_cmac_subkey_gen_sse_no_aesni:
328        AES_CMAC_SUBKEY_GEN_SSE 9, no_aesni
329        ret
330
331;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332;;;
333;;; void aes_cmac_256_subkey_gen_sse(const void *key_exp,
334;;;                                  void *key1,
335;;;                                  void *key2)
336;;;
337;;; key_exp : IN  : address of expanded encryption key structure (AES 256)
338;;; key1    : OUT : address to store subkey 1 (AES256 - 16 bytes)
339;;; key2    : OUT : address to store subkey 2 (AES256 - 16 bytes)
340;;;
341;;; See aes_cmac_subkey_gen() above for operation details
342MKGLOBAL(aes_cmac_256_subkey_gen_sse,function,)
343align 32
344aes_cmac_256_subkey_gen_sse:
345        AES_CMAC_SUBKEY_GEN_SSE 13
346        ret
347
348MKGLOBAL(aes_cmac_256_subkey_gen_sse_no_aesni,function,)
349align 32
350aes_cmac_256_subkey_gen_sse_no_aesni:
351        AES_CMAC_SUBKEY_GEN_SSE 13, no_aesni
352        ret
353
354;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
355;;;
356;;; void aes_cmac_subkey_gen_avx(const void *key_exp, void *key1, void *key2)
357;;;
358;;; key_exp : IN  : address of expanded encryption key structure (AES 128)
359;;; key1    : OUT : address to store subkey 1 (AES128 - 16 bytes)
360;;; key2    : OUT : address to store subkey 2 (AES128 - 16 bytes)
361;;;
362;;; See aes_cmac_subkey_gen() above for operation details
363MKGLOBAL(aes_cmac_subkey_gen_avx,function,)
364MKGLOBAL(aes_cmac_subkey_gen_avx2,function,)
365MKGLOBAL(aes_cmac_subkey_gen_avx512,function,)
366align 32
367aes_cmac_subkey_gen_avx:
368aes_cmac_subkey_gen_avx2:
369aes_cmac_subkey_gen_avx512:
370        AES_CMAC_SUBKEY_GEN_AVX 9
371        ret
372
373;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
374;;;
375;;; void aes_cmac_256_subkey_gen_avx(const void *key_exp,
376;;;                                  void *key1,
377;;;                                  void *key2)
378;;;
379;;; key_exp : IN  : address of expanded encryption key structure (AES 256)
380;;; key1    : OUT : address to store subkey 1 (AES256 - 16 bytes)
381;;; key2    : OUT : address to store subkey 2 (AES256 - 16 bytes)
382;;;
383;;; See aes_cmac_subkey_gen() above for operation details
384MKGLOBAL(aes_cmac_256_subkey_gen_avx,function,)
385MKGLOBAL(aes_cmac_256_subkey_gen_avx2,function,)
386MKGLOBAL(aes_cmac_256_subkey_gen_avx512,function,)
387align 32
388aes_cmac_256_subkey_gen_avx:
389aes_cmac_256_subkey_gen_avx2:
390aes_cmac_256_subkey_gen_avx512:
391        AES_CMAC_SUBKEY_GEN_AVX 13
392        ret
393
394%ifdef LINUX
395section .note.GNU-stack noalloc noexec nowrite progbits
396%endif
397