1/*- 2* The white paper of AES-NI instructions can be downloaded from: 3 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf 4 * 5 * Copyright (C) 2008-2010, Intel Corporation 6 * Author: Huang Ying <ying.huang@intel.com> 7 * Vinodh Gopal <vinodh.gopal@intel.com> 8 * Kahraman Akdemir 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 17 * - Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the 20 * distribution. 21 * 22 * - Neither the name of Intel Corporation nor the names of its 23 * contributors may be used to endorse or promote products 24 * derived from this software without specific prior written 25 * permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 31 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 33 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 34 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 * 39 * $FreeBSD: src/sys/crypto/aesni/aeskeys_amd64.S,v 1.2 2011/03/02 14:56:58 kib Exp $ 40 */ 41 42#include <machine/asmacros.h> 43 44 .text 45 46ENTRY(_key_expansion_128) 47_key_expansion_256a: 48 .cfi_startproc 49 pshufd $0b11111111,%xmm1,%xmm1 50 shufps $0b00010000,%xmm0,%xmm4 51 pxor %xmm4,%xmm0 52 shufps $0b10001100,%xmm0,%xmm4 53 pxor %xmm4,%xmm0 54 pxor %xmm1,%xmm0 55 movaps %xmm0,(%rsi) 56 addq $0x10,%rsi 57 retq 58 .cfi_endproc 59END(_key_expansion_128) 60 61ENTRY(_key_expansion_192a) 62 .cfi_startproc 63 pshufd $0b01010101,%xmm1,%xmm1 64 shufps $0b00010000,%xmm0,%xmm4 65 pxor %xmm4,%xmm0 66 shufps $0b10001100,%xmm0,%xmm4 67 pxor %xmm4,%xmm0 68 pxor %xmm1,%xmm0 69 movaps %xmm2,%xmm5 70 movaps %xmm2,%xmm6 71 pslldq $4,%xmm5 72 pshufd $0b11111111,%xmm0,%xmm3 73 pxor %xmm3,%xmm2 74 pxor %xmm5,%xmm2 75 movaps %xmm0,%xmm1 76 shufps $0b01000100,%xmm0,%xmm6 77 movaps %xmm6,(%rsi) 78 shufps $0b01001110,%xmm2,%xmm1 79 movaps %xmm1,0x10(%rsi) 80 addq $0x20,%rsi 81 retq 82 .cfi_endproc 83END(_key_expansion_192a) 84 85ENTRY(_key_expansion_192b) 86 .cfi_startproc 87 pshufd $0b01010101,%xmm1,%xmm1 88 shufps $0b00010000,%xmm0,%xmm4 89 pxor %xmm4,%xmm0 90 shufps $0b10001100,%xmm0,%xmm4 91 pxor %xmm4,%xmm0 92 pxor %xmm1,%xmm0 93 movaps %xmm2,%xmm5 94 pslldq $4,%xmm5 95 pshufd $0b11111111,%xmm0,%xmm3 96 pxor %xmm3,%xmm2 97 pxor %xmm5,%xmm2 98 movaps %xmm0,(%rsi) 99 addq $0x10,%rsi 100 retq 101 .cfi_endproc 102END(_key_expansion_192b) 103 104ENTRY(_key_expansion_256b) 105 .cfi_startproc 106 pshufd $0b10101010,%xmm1,%xmm1 107 shufps $0b00010000,%xmm2,%xmm4 108 pxor %xmm4,%xmm2 109 shufps $0b10001100,%xmm2,%xmm4 110 pxor %xmm4,%xmm2 111 pxor %xmm1,%xmm2 112 movaps %xmm2,(%rsi) 113 addq $0x10,%rsi 114 retq 115 .cfi_endproc 116END(_key_expansion_256b) 117 118ENTRY(aesni_set_enckey) 119 .cfi_startproc 120 movups (%rdi),%xmm0 # user key (first 16 bytes) 121 movaps %xmm0,(%rsi) 122 addq $0x10,%rsi # key addr 123 pxor %xmm4,%xmm4 # xmm4 is assumed 0 in _key_expansion_x 124 cmpl $12,%edx 125 jb .Lenc_key128 126 je .Lenc_key192 127 movups 0x10(%rdi),%xmm2 # other user key 128 movaps %xmm2,(%rsi) 129 addq $0x10,%rsi 130// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 131 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 132 call _key_expansion_256a 133// aeskeygenassist $0x1,%xmm0,%xmm1 134 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 135 call _key_expansion_256b 136// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 137 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 138 call _key_expansion_256a 139// aeskeygenassist $0x2,%xmm0,%xmm1 140 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 141 call _key_expansion_256b 142// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 143 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 144 call _key_expansion_256a 145// aeskeygenassist $0x4,%xmm0,%xmm1 146 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 147 call _key_expansion_256b 148// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 149 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 150 call _key_expansion_256a 151// aeskeygenassist $0x8,%xmm0,%xmm1 152 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 153 call _key_expansion_256b 154// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 155 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 156 call _key_expansion_256a 157// aeskeygenassist $0x10,%xmm0,%xmm1 158 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 159 call _key_expansion_256b 160// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 161 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 162 call _key_expansion_256a 163// aeskeygenassist $0x20,%xmm0,%xmm1 164 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 165 call _key_expansion_256b 166// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 167 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 168 call _key_expansion_256a 169 retq 170.Lenc_key192: 171 movq 0x10(%rdi),%xmm2 # other user key 172// aeskeygenassist $0x1,%xmm2,%xmm1 # round 1 173 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x01 174 call _key_expansion_192a 175// aeskeygenassist $0x2,%xmm2,%xmm1 # round 2 176 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x02 177 call _key_expansion_192b 178// aeskeygenassist $0x4,%xmm2,%xmm1 # round 3 179 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x04 180 call _key_expansion_192a 181// aeskeygenassist $0x8,%xmm2,%xmm1 # round 4 182 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x08 183 call _key_expansion_192b 184// aeskeygenassist $0x10,%xmm2,%xmm1 # round 5 185 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x10 186 call _key_expansion_192a 187// aeskeygenassist $0x20,%xmm2,%xmm1 # round 6 188 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x20 189 call _key_expansion_192b 190// aeskeygenassist $0x40,%xmm2,%xmm1 # round 7 191 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x40 192 call _key_expansion_192a 193// aeskeygenassist $0x80,%xmm2,%xmm1 # round 8 194 .byte 0x66,0x0f,0x3a,0xdf,0xca,0x80 195 call _key_expansion_192b 196 retq 197.Lenc_key128: 198// aeskeygenassist $0x1,%xmm0,%xmm1 # round 1 199 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x01 200 call _key_expansion_128 201// aeskeygenassist $0x2,%xmm0,%xmm1 # round 2 202 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x02 203 call _key_expansion_128 204// aeskeygenassist $0x4,%xmm0,%xmm1 # round 3 205 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x04 206 call _key_expansion_128 207// aeskeygenassist $0x8,%xmm0,%xmm1 # round 4 208 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x08 209 call _key_expansion_128 210// aeskeygenassist $0x10,%xmm0,%xmm1 # round 5 211 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x10 212 call _key_expansion_128 213// aeskeygenassist $0x20,%xmm0,%xmm1 # round 6 214 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x20 215 call _key_expansion_128 216// aeskeygenassist $0x40,%xmm0,%xmm1 # round 7 217 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x40 218 call _key_expansion_128 219// aeskeygenassist $0x80,%xmm0,%xmm1 # round 8 220 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x80 221 call _key_expansion_128 222// aeskeygenassist $0x1b,%xmm0,%xmm1 # round 9 223 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x1b 224 call _key_expansion_128 225// aeskeygenassist $0x36,%xmm0,%xmm1 # round 10 226 .byte 0x66,0x0f,0x3a,0xdf,0xc8,0x36 227 call _key_expansion_128 228 retq 229 .cfi_endproc 230END(aesni_set_enckey) 231 232ENTRY(aesni_set_deckey) 233 .cfi_startproc 234 movslq %edx,%rax 235 shlq $4,%rax 236 addq %rax,%rdi 237 movdqa (%rdi),%xmm0 238 movdqa %xmm0,(%rsi) 239 decl %edx 2401: 241 addq $0x10,%rsi 242 subq $0x10,%rdi 243// aesimc (%rdi),%xmm1 244 .byte 0x66,0x0f,0x38,0xdb,0x0f 245 movdqa %xmm1,(%rsi) 246 decl %edx 247 jne 1b 248 249 addq $0x10,%rsi 250 subq $0x10,%rdi 251 movdqa (%rdi),%xmm0 252 movdqa %xmm0,(%rsi) 253 retq 254 .cfi_endproc 255END(aesni_set_deckey) 256