1/* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 .text 6 7#define IV_OFFSET 256 8 9/* 10 * Warning: the length values used in this module are "unsigned int" 11 * in C, which is 32-bit. When they're passed in registers, use only 12 * the low 32 bits, because the top half is unspecified. 13 * 14 * This is called from C code, so the contents of those bits can 15 * depend on the C compiler's optimization decisions. This means that 16 * mistakes might not be obvious in testing if those bits happen to be 17 * zero in your build. 18 * 19 * Exception: 32-bit lea instructions use a 64-bit address because the 20 * address size doesn't affect the result, and that form is more 21 * compactly encoded and preferred by compilers over a 32-bit address. 22 */ 23 24/* in %rdi : the key 25 in %rsi : buffer for expanded key 26*/ 27 .type intel_aes_encrypt_init_128,@function 28 .globl intel_aes_encrypt_init_128 29 .align 16 30intel_aes_encrypt_init_128: 31 movups (%rdi), %xmm1 32 movups %xmm1, (%rsi) 33 leaq 16(%rsi), %rsi 34 xorl %eax, %eax 35 36 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ 37 call key_expansion128 38 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ 39 call key_expansion128 40 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ 41 call key_expansion128 42 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ 43 call key_expansion128 44 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ 45 call key_expansion128 46 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ 47 call key_expansion128 48 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ 49 call key_expansion128 50 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ 51 call key_expansion128 52 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ 53 call key_expansion128 54 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ 55 call key_expansion128 56 57 ret 58 .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128 59 60 61/* in %rdi : the key 62 in %rsi : buffer for expanded key 63*/ 64 .type intel_aes_decrypt_init_128,@function 65 .globl intel_aes_decrypt_init_128 66 .align 16 67intel_aes_decrypt_init_128: 68 movups (%rdi), %xmm1 69 movups %xmm1, (%rsi) 70 leaq 16(%rsi), %rsi 71 xorl %eax, %eax 72 73 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ 74 call key_expansion128 75 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 76 movups %xmm2, -16(%rsi) 77 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ 78 call key_expansion128 79 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 80 movups %xmm2, -16(%rsi) 81 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ 82 call key_expansion128 83 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 84 movups %xmm2, -16(%rsi) 85 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ 86 call key_expansion128 87 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 88 movups %xmm2, -16(%rsi) 89 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ 90 call key_expansion128 91 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 92 movups %xmm2, -16(%rsi) 93 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ 94 call key_expansion128 95 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 96 movups %xmm2, -16(%rsi) 97 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ 98 call key_expansion128 99 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 100 movups %xmm2, -16(%rsi) 101 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ 102 call key_expansion128 103 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 104 movups %xmm2, -16(%rsi) 105 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ 106 call key_expansion128 107 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 108 movups %xmm2, -16(%rsi) 109 .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ 110 call key_expansion128 111 112 ret 113 .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128 114 115 116 .type key_expansion128,@function 117 .align 16 118key_expansion128: 119 movd %eax, %xmm3 120 pshufd $0xff, %xmm2, %xmm2 121 shufps $0x10, %xmm1, %xmm3 122 pxor %xmm3, %xmm1 123 shufps $0x8c, %xmm1, %xmm3 124 pxor %xmm2, %xmm1 125 pxor %xmm3, %xmm1 126 movdqu %xmm1, (%rsi) 127 addq $16, %rsi 128 ret 129 .size key_expansion128, .-key_expansion128 130 131 132/* in %rdi : cx - context 133 in %rsi : output - pointer to output buffer 134 in %rdx : outputLen - pointer to variable for length of output 135 (already filled in by caller) 136 in %ecx : maxOutputLen - length of output buffer 137 (already checked by caller) 138 in %r8 : input - pointer to input buffer 139 in %r9d : inputLen - length of input buffer 140 on stack: blocksize - AES blocksize (always 16, unused) 141*/ 142 .type intel_aes_encrypt_ecb_128,@function 143 .globl intel_aes_encrypt_ecb_128 144 .align 16 145intel_aes_encrypt_ecb_128: 146 movdqu (%rdi), %xmm2 147 movdqu 160(%rdi), %xmm12 148 xor %eax, %eax 149// cmpl $8*16, %r9d 150 cmpl $128, %r9d 151 jb 1f 152// leal -8*16(%r9), %r11d 153 leal -128(%r9), %r11d 1542: movdqu (%r8, %rax), %xmm3 155 movdqu 16(%r8, %rax), %xmm4 156 movdqu 32(%r8, %rax), %xmm5 157 movdqu 48(%r8, %rax), %xmm6 158 movdqu 64(%r8, %rax), %xmm7 159 movdqu 80(%r8, %rax), %xmm8 160 movdqu 96(%r8, %rax), %xmm9 161 movdqu 112(%r8, %rax), %xmm10 162 pxor %xmm2, %xmm3 163 pxor %xmm2, %xmm4 164 pxor %xmm2, %xmm5 165 pxor %xmm2, %xmm6 166 pxor %xmm2, %xmm7 167 pxor %xmm2, %xmm8 168 pxor %xmm2, %xmm9 169 pxor %xmm2, %xmm10 170 171// complete loop unrolling 172 movdqu 16(%rdi), %xmm1 173 movdqu 32(%rdi), %xmm11 174 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 175 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 176 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 177 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 178 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 179 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 180 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 181 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 182 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 183 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 184 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 185 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 186 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 187 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 188 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 189 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 190 191 movdqu 48(%rdi), %xmm1 192 movdqu 64(%rdi), %xmm11 193 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 194 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 195 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 196 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 197 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 198 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 199 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 200 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 201 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 202 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 203 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 204 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 205 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 206 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 207 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 208 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 209 210 movdqu 80(%rdi), %xmm1 211 movdqu 96(%rdi), %xmm11 212 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 213 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 214 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 215 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 216 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 217 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 218 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 219 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 220 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 221 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 222 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 223 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 224 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 225 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 226 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 227 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 228 229 movdqu 112(%rdi), %xmm1 230 movdqu 128(%rdi), %xmm11 231 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 232 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 233 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 234 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 235 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 236 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 237 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 238 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 239 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 240 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 241 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 242 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 243 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 244 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 245 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 246 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 247 248 movdqu 144(%rdi), %xmm1 249 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 250 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 251 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 252 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 253 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 254 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 255 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 256 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 257 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */ 258 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */ 259 .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */ 260 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */ 261 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */ 262 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */ 263 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */ 264 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */ 265 266 movdqu %xmm3, (%rsi, %rax) 267 movdqu %xmm4, 16(%rsi, %rax) 268 movdqu %xmm5, 32(%rsi, %rax) 269 movdqu %xmm6, 48(%rsi, %rax) 270 movdqu %xmm7, 64(%rsi, %rax) 271 movdqu %xmm8, 80(%rsi, %rax) 272 movdqu %xmm9, 96(%rsi, %rax) 273 movdqu %xmm10, 112(%rsi, %rax) 274// addl $8*16, %eax 275 addl $128, %eax 276 cmpl %r11d, %eax 277 jbe 2b 2781: cmpl %eax, %r9d 279 je 5f 280 281 movdqu 16(%rdi), %xmm3 282 movdqu 32(%rdi), %xmm4 283 movdqu 48(%rdi), %xmm5 284 movdqu 64(%rdi), %xmm6 285 movdqu 80(%rdi), %xmm7 286 movdqu 96(%rdi), %xmm8 287 movdqu 112(%rdi), %xmm9 288 movdqu 128(%rdi), %xmm10 289 movdqu 144(%rdi), %xmm11 290 2914: movdqu (%r8, %rax), %xmm1 292 pxor %xmm2, %xmm1 293 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ 294 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ 295 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ 296 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ 297 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ 298 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ 299 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ 300 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ 301 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ 302 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ 303 movdqu %xmm1, (%rsi, %rax) 304 addl $16, %eax 305 cmpl %eax, %r9d 306 jne 4b 307 3085: xor %eax, %eax 309 ret 310 .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128 311 312 313/* in %rdi : cx - context 314 in %rsi : output - pointer to output buffer 315 in %rdx : outputLen - pointer to variable for length of output 316 (already filled in by caller) 317 in %ecx : maxOutputLen - length of output buffer 318 (already checked by caller) 319 in %r8 : input - pointer to input buffer 320 in %r9d : inputLen - length of input buffer 321 on stack: blocksize - AES blocksize (always 16, unused) 322*/ 323 .type intel_aes_decrypt_ecb_128,@function 324 .globl intel_aes_decrypt_ecb_128 325 .align 16 326intel_aes_decrypt_ecb_128: 327 movdqu (%rdi), %xmm2 328 movdqu 160(%rdi), %xmm12 329 xorl %eax, %eax 330// cmpl $8*16, %r9d 331 cmpl $128, %r9d 332 jb 1f 333// leal -8*16(%r9), %r11d 334 leal -128(%r9), %r11d 3352: movdqu (%r8, %rax), %xmm3 336 movdqu 16(%r8, %rax), %xmm4 337 movdqu 32(%r8, %rax), %xmm5 338 movdqu 48(%r8, %rax), %xmm6 339 movdqu 64(%r8, %rax), %xmm7 340 movdqu 80(%r8, %rax), %xmm8 341 movdqu 96(%r8, %rax), %xmm9 342 movdqu 112(%r8, %rax), %xmm10 343 pxor %xmm12, %xmm3 344 pxor %xmm12, %xmm4 345 pxor %xmm12, %xmm5 346 pxor %xmm12, %xmm6 347 pxor %xmm12, %xmm7 348 pxor %xmm12, %xmm8 349 pxor %xmm12, %xmm9 350 pxor %xmm12, %xmm10 351 352// complete loop unrolling 353 movdqu 144(%rdi), %xmm1 354 movdqu 128(%rdi), %xmm11 355 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 356 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 357 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 358 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 359 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 360 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 361 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 362 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 363 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 364 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 365 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 366 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 367 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 368 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 369 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 370 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 371 372 movdqu 112(%rdi), %xmm1 373 movdqu 96(%rdi), %xmm11 374 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 375 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 376 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 377 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 378 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 379 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 380 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 381 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 382 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 383 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 384 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 385 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 386 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 387 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 388 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 389 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 390 391 movdqu 80(%rdi), %xmm1 392 movdqu 64(%rdi), %xmm11 393 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 394 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 395 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 396 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 397 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 398 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 399 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 400 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 401 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 402 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 403 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 404 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 405 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 406 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 407 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 408 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 409 410 movdqu 48(%rdi), %xmm1 411 movdqu 32(%rdi), %xmm11 412 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 413 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 414 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 415 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 416 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 417 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 418 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 419 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 420 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 421 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 422 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 423 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 424 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 425 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 426 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 427 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 428 429 movdqu 16(%rdi), %xmm1 430 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 431 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 432 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 433 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 434 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 435 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 436 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 437 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 438 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ 439 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ 440 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ 441 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ 442 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ 443 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ 444 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ 445 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ 446 447 movdqu %xmm3, (%rsi, %rax) 448 movdqu %xmm4, 16(%rsi, %rax) 449 movdqu %xmm5, 32(%rsi, %rax) 450 movdqu %xmm6, 48(%rsi, %rax) 451 movdqu %xmm7, 64(%rsi, %rax) 452 movdqu %xmm8, 80(%rsi, %rax) 453 movdqu %xmm9, 96(%rsi, %rax) 454 movdqu %xmm10, 112(%rsi, %rax) 455// addl $8*16, %eax 456 addl $128, %eax 457 cmpl %r11d, %eax 458 jbe 2b 4591: cmpl %eax, %r9d 460 je 5f 461 462 movdqu 16(%rdi), %xmm3 463 movdqu 32(%rdi), %xmm4 464 movdqu 48(%rdi), %xmm5 465 movdqu 64(%rdi), %xmm6 466 movdqu 80(%rdi), %xmm7 467 movdqu 96(%rdi), %xmm8 468 movdqu 112(%rdi), %xmm9 469 movdqu 128(%rdi), %xmm10 470 movdqu 144(%rdi), %xmm11 471 4724: movdqu (%r8, %rax), %xmm1 473 pxor %xmm12, %xmm1 474 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ 475 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ 476 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ 477 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ 478 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ 479 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */ 480 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */ 481 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */ 482 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */ 483 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ 484 movdqu %xmm1, (%rsi, %rax) 485 addl $16, %eax 486 cmpl %eax, %r9d 487 jne 4b 488 4895: xor %eax, %eax 490 ret 491 .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128 492 493 494/* in %rdi : cx - context 495 in %rsi : output - pointer to output buffer 496 in %rdx : outputLen - pointer to variable for length of output 497 (already filled in by caller) 498 in %ecx : maxOutputLen - length of output buffer 499 (already checked by caller) 500 in %r8 : input - pointer to input buffer 501 in %r9d : inputLen - length of input buffer 502 on stack: blocksize - AES blocksize (always 16, unused) 503*/ 504 .type intel_aes_encrypt_cbc_128,@function 505 .globl intel_aes_encrypt_cbc_128 506 .align 16 507intel_aes_encrypt_cbc_128: 508 testl %r9d, %r9d 509 je 2f 510 511// leaq IV_OFFSET(%rdi), %rdx 512 leaq 256(%rdi), %rdx 513 514 movdqu (%rdx), %xmm0 515 movdqu (%rdi), %xmm2 516 movdqu 16(%rdi), %xmm3 517 movdqu 32(%rdi), %xmm4 518 movdqu 48(%rdi), %xmm5 519 movdqu 64(%rdi), %xmm6 520 movdqu 80(%rdi), %xmm7 521 movdqu 96(%rdi), %xmm8 522 movdqu 112(%rdi), %xmm9 523 movdqu 128(%rdi), %xmm10 524 movdqu 144(%rdi), %xmm11 525 movdqu 160(%rdi), %xmm12 526 527 xorl %eax, %eax 5281: movdqu (%r8, %rax), %xmm1 529 pxor %xmm0, %xmm1 530 pxor %xmm2, %xmm1 531 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ 532 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ 533 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ 534 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ 535 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ 536 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ 537 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ 538 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */ 539 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */ 540 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ 541 movdqu %xmm1, (%rsi, %rax) 542 movdqa %xmm1, %xmm0 543 addl $16, %eax 544 cmpl %eax, %r9d 545 jne 1b 546 547 movdqu %xmm0, (%rdx) 548 5492: xor %eax, %eax 550 ret 551 .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128 552 553 554/* in %rdi : cx - context 555 in %rsi : output - pointer to output buffer 556 in %rdx : outputLen - pointer to variable for length of output 557 (already filled in by caller) 558 in %ecx : maxOutputLen - length of output buffer 559 (already checked by caller) 560 in %r8 : input - pointer to input buffer 561 in %r9d : inputLen - length of input buffer 562 on stack: blocksize - AES blocksize (always 16, unused) 563*/ 564 .type intel_aes_decrypt_cbc_128,@function 565 .globl intel_aes_decrypt_cbc_128 566 .align 16 567intel_aes_decrypt_cbc_128: 568// leaq IV_OFFSET(%rdi), %rdx 569 leaq 256(%rdi), %rdx 570 571 movdqu (%rdx), %xmm0 /* iv */ 572 movdqu (%rdi), %xmm2 /* first key block */ 573 movdqu 160(%rdi), %xmm12 /* last key block */ 574 xorl %eax, %eax 575 cmpl $128, %r9d 576 jb 1f 577 leal -128(%r9), %r11d 5782: movdqu (%r8, %rax), %xmm3 /* 1st data block */ 579 movdqu 16(%r8, %rax), %xmm4 /* 2d data block */ 580 movdqu 32(%r8, %rax), %xmm5 581 movdqu 48(%r8, %rax), %xmm6 582 movdqu 64(%r8, %rax), %xmm7 583 movdqu 80(%r8, %rax), %xmm8 584 movdqu 96(%r8, %rax), %xmm9 585 movdqu 112(%r8, %rax), %xmm10 586 pxor %xmm12, %xmm3 587 pxor %xmm12, %xmm4 588 pxor %xmm12, %xmm5 589 pxor %xmm12, %xmm6 590 pxor %xmm12, %xmm7 591 pxor %xmm12, %xmm8 592 pxor %xmm12, %xmm9 593 pxor %xmm12, %xmm10 594 595// complete loop unrolling 596 movdqu 144(%rdi), %xmm1 597 movdqu 128(%rdi), %xmm11 598 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 599 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 600 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 601 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 602 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 603 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 604 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 605 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 606 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 607 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 608 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 609 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 610 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 611 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 612 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 613 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 614 615 movdqu 112(%rdi), %xmm1 616 movdqu 96(%rdi), %xmm11 617 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 618 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 619 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 620 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 621 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 622 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 623 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 624 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 625 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 626 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 627 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 628 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 629 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 630 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 631 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 632 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 633 634 movdqu 80(%rdi), %xmm1 635 movdqu 64(%rdi), %xmm11 636 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 637 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 638 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 639 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 640 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 641 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 642 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 643 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 644 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 645 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 646 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 647 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 648 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 649 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 650 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 651 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 652 653 movdqu 48(%rdi), %xmm1 654 movdqu 32(%rdi), %xmm11 655 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 656 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 657 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 658 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 659 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 660 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 661 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 662 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 663 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 664 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 665 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 666 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 667 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 668 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 669 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 670 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 671 672 movdqu 16(%rdi), %xmm1 673 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 674 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 675 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 676 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 677 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 678 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 679 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 680 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 681 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ 682 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ 683 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ 684 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ 685 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ 686 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ 687 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ 688 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ 689 690 pxor %xmm0, %xmm3 691 movdqu (%r8, %rax), %xmm0 692 pxor %xmm0, %xmm4 693 movdqu 16(%r8, %rax), %xmm0 694 pxor %xmm0, %xmm5 695 movdqu 32(%r8, %rax), %xmm0 696 pxor %xmm0, %xmm6 697 movdqu 48(%r8, %rax), %xmm0 698 pxor %xmm0, %xmm7 699 movdqu 64(%r8, %rax), %xmm0 700 pxor %xmm0, %xmm8 701 movdqu 80(%r8, %rax), %xmm0 702 pxor %xmm0, %xmm9 703 movdqu 96(%r8, %rax), %xmm0 704 pxor %xmm0, %xmm10 705 movdqu 112(%r8, %rax), %xmm0 706 movdqu %xmm3, (%rsi, %rax) 707 movdqu %xmm4, 16(%rsi, %rax) 708 movdqu %xmm5, 32(%rsi, %rax) 709 movdqu %xmm6, 48(%rsi, %rax) 710 movdqu %xmm7, 64(%rsi, %rax) 711 movdqu %xmm8, 80(%rsi, %rax) 712 movdqu %xmm9, 96(%rsi, %rax) 713 movdqu %xmm10, 112(%rsi, %rax) 714 addl $128, %eax 715 cmpl %r11d, %eax 716 jbe 2b 7171: cmpl %eax, %r9d 718 je 5f 719 720 movdqu 16(%rdi), %xmm3 721 movdqu 32(%rdi), %xmm4 722 movdqu 48(%rdi), %xmm5 723 movdqu 64(%rdi), %xmm6 724 movdqu 80(%rdi), %xmm7 725 movdqu 96(%rdi), %xmm8 726 movdqu 112(%rdi), %xmm9 727 movdqu 128(%rdi), %xmm10 728 movdqu 144(%rdi), %xmm11 729 7304: movdqu (%r8, %rax), %xmm1 731 movdqa %xmm1, %xmm13 732 pxor %xmm12, %xmm1 733 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ 734 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ 735 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ 736 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ 737 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ 738 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ 739 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ 740 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ 741 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ 742 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ 743 pxor %xmm0, %xmm1 744 movdqu %xmm1, (%rsi, %rax) 745 movdqa %xmm13, %xmm0 746 addl $16, %eax 747 cmpl %eax, %r9d 748 jne 4b 749 7505: movdqu %xmm0, (%rdx) 751 752 xor %eax, %eax 753 ret 754 .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128 755 756/* in %rdi : the key 757 in %rsi : buffer for expanded key 758*/ 759 .type intel_aes_encrypt_init_192,@function 760 .globl intel_aes_encrypt_init_192 761 .align 16 762intel_aes_encrypt_init_192: 763 movdqu (%rdi), %xmm1 764 movq 16(%rdi), %xmm3 765 movdqu %xmm1, (%rsi) 766 movq %xmm3, 16(%rsi) 767 leaq 24(%rsi), %rsi 768 769 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ 770 call key_expansion192 771 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ 772 call key_expansion192 773 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ 774 call key_expansion192 775 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ 776 call key_expansion192 777 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ 778 call key_expansion192 779 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ 780 call key_expansion192 781 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ 782 call key_expansion192 783 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ 784 call key_expansion192 785 786 ret 787 .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192 788 789 790/* in %rdi : the key 791 in %rsi : buffer for expanded key 792*/ 793 .type intel_aes_decrypt_init_192,@function 794 .globl intel_aes_decrypt_init_192 795 .align 16 796intel_aes_decrypt_init_192: 797 movdqu (%rdi), %xmm1 798 movq 16(%rdi), %xmm3 799 movdqu %xmm1, (%rsi) 800 movq %xmm3, 16(%rsi) 801 leaq 24(%rsi), %rsi 802 803 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ 804 call key_expansion192 805 movups -32(%rsi), %xmm2 806 movups -16(%rsi), %xmm4 807 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ 808 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ 809 movups %xmm2, -32(%rsi) 810 movups %xmm4, -16(%rsi) 811 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ 812 call key_expansion192 813 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 814 movups %xmm2, -24(%rsi) 815 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ 816 call key_expansion192 817 movups -32(%rsi), %xmm2 818 movups -16(%rsi), %xmm4 819 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ 820 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ 821 movups %xmm2, -32(%rsi) 822 movups %xmm4, -16(%rsi) 823 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ 824 call key_expansion192 825 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 826 movups %xmm2, -24(%rsi) 827 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ 828 call key_expansion192 829 movups -32(%rsi), %xmm2 830 movups -16(%rsi), %xmm4 831 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ 832 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ 833 movups %xmm2, -32(%rsi) 834 movups %xmm4, -16(%rsi) 835 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ 836 call key_expansion192 837 .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ 838 movups %xmm2, -24(%rsi) 839 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ 840 call key_expansion192 841 movups -32(%rsi), %xmm2 842 movups -16(%rsi), %xmm4 843 .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ 844 .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ 845 movups %xmm2, -32(%rsi) 846 movups %xmm4, -16(%rsi) 847 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ 848 call key_expansion192 849 850 ret 851 .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192 852 853 854 .type key_expansion192,@function 855 .align 16 856key_expansion192: 857 pshufd $0x55, %xmm2, %xmm2 858 xor %eax, %eax 859 movd %eax, %xmm4 860 shufps $0x10, %xmm1, %xmm4 861 pxor %xmm4, %xmm1 862 shufps $0x8c, %xmm1, %xmm4 863 pxor %xmm2, %xmm1 864 pxor %xmm4, %xmm1 865 movdqu %xmm1, (%rsi) 866 addq $16, %rsi 867 868 pshufd $0xff, %xmm1, %xmm4 869 movd %eax, %xmm5 870 shufps $0x00, %xmm3, %xmm5 871 shufps $0x08, %xmm3, %xmm5 872 pxor %xmm4, %xmm3 873 pxor %xmm5, %xmm3 874 movq %xmm3, (%rsi) 875 addq $8, %rsi 876 ret 877 .size key_expansion192, .-key_expansion192 878 879 880/* in %rdi : cx - context 881 in %rsi : output - pointer to output buffer 882 in %rdx : outputLen - pointer to variable for length of output 883 (already filled in by caller) 884 in %ecx : maxOutputLen - length of output buffer 885 (already checked by caller) 886 in %r8 : input - pointer to input buffer 887 in %r9d : inputLen - length of input buffer 888 on stack: blocksize - AES blocksize (always 16, unused) 889*/ 890 .type intel_aes_encrypt_ecb_192,@function 891 .globl intel_aes_encrypt_ecb_192 892 .align 16 893intel_aes_encrypt_ecb_192: 894 movdqu (%rdi), %xmm2 895 movdqu 192(%rdi), %xmm14 896 xorl %eax, %eax 897// cmpl $8*16, %r9d 898 cmpl $128, %r9d 899 jb 1f 900// leal -8*16(%r9), %r11d 901 leal -128(%r9), %r11d 9022: movdqu (%r8, %rax), %xmm3 903 movdqu 16(%r8, %rax), %xmm4 904 movdqu 32(%r8, %rax), %xmm5 905 movdqu 48(%r8, %rax), %xmm6 906 movdqu 64(%r8, %rax), %xmm7 907 movdqu 80(%r8, %rax), %xmm8 908 movdqu 96(%r8, %rax), %xmm9 909 movdqu 112(%r8, %rax), %xmm10 910 pxor %xmm2, %xmm3 911 pxor %xmm2, %xmm4 912 pxor %xmm2, %xmm5 913 pxor %xmm2, %xmm6 914 pxor %xmm2, %xmm7 915 pxor %xmm2, %xmm8 916 pxor %xmm2, %xmm9 917 pxor %xmm2, %xmm10 918 919// complete loop unrolling 920 movdqu 16(%rdi), %xmm1 921 movdqu 32(%rdi), %xmm11 922 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 923 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 924 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 925 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 926 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 927 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 928 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 929 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 930 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 931 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 932 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 933 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 934 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 935 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 936 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 937 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 938 939 movdqu 48(%rdi), %xmm1 940 movdqu 64(%rdi), %xmm11 941 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 942 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 943 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 944 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 945 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 946 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 947 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 948 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 949 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 950 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 951 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 952 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 953 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 954 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 955 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 956 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 957 958 movdqu 80(%rdi), %xmm1 959 movdqu 96(%rdi), %xmm11 960 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 961 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 962 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 963 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 964 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 965 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 966 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 967 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 968 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 969 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 970 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 971 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 972 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 973 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 974 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 975 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 976 977 movdqu 112(%rdi), %xmm1 978 movdqu 128(%rdi), %xmm11 979 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 980 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 981 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 982 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 983 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 984 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 985 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 986 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 987 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 988 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 989 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 990 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 991 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 992 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 993 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 994 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 995 996 movdqu 144(%rdi), %xmm1 997 movdqu 160(%rdi), %xmm11 998 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 999 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1000 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1001 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1002 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1003 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1004 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1005 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1006 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1007 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1008 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1009 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1010 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1011 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1012 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1013 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1014 1015 movdqu 176(%rdi), %xmm1 1016 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1017 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1018 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1019 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1020 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1021 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1022 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1023 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1024 .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */ 1025 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */ 1026 .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */ 1027 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */ 1028 .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */ 1029 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */ 1030 .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */ 1031 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */ 1032 1033 movdqu %xmm3, (%rsi, %rax) 1034 movdqu %xmm4, 16(%rsi, %rax) 1035 movdqu %xmm5, 32(%rsi, %rax) 1036 movdqu %xmm6, 48(%rsi, %rax) 1037 movdqu %xmm7, 64(%rsi, %rax) 1038 movdqu %xmm8, 80(%rsi, %rax) 1039 movdqu %xmm9, 96(%rsi, %rax) 1040 movdqu %xmm10, 112(%rsi, %rax) 1041// addl $8*16, %eax 1042 addl $128, %eax 1043 cmpl %r11d, %eax 1044 jbe 2b 10451: cmpl %eax, %r9d 1046 je 5f 1047 1048 movdqu 16(%rdi), %xmm3 1049 movdqu 32(%rdi), %xmm4 1050 movdqu 48(%rdi), %xmm5 1051 movdqu 64(%rdi), %xmm6 1052 movdqu 80(%rdi), %xmm7 1053 movdqu 96(%rdi), %xmm8 1054 movdqu 112(%rdi), %xmm9 1055 movdqu 128(%rdi), %xmm10 1056 movdqu 144(%rdi), %xmm11 1057 movdqu 160(%rdi), %xmm12 1058 movdqu 176(%rdi), %xmm13 1059 10604: movdqu (%r8, %rax), %xmm1 1061 pxor %xmm2, %xmm1 1062 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ 1063 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ 1064 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ 1065 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ 1066 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ 1067 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ 1068 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ 1069 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ 1070 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ 1071 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ 1072 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ 1073 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ 1074 movdqu %xmm1, (%rsi, %rax) 1075 addl $16, %eax 1076 cmpl %eax, %r9d 1077 jne 4b 1078 10795: xor %eax, %eax 1080 ret 1081 .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192 1082 1083 1084/* in %rdi : cx - context 1085 in %rsi : output - pointer to output buffer 1086 in %rdx : outputLen - pointer to variable for length of output 1087 (already filled in by caller) 1088 in %ecx : maxOutputLen - length of output buffer 1089 (already checked by caller) 1090 in %r8 : input - pointer to input buffer 1091 in %r9d : inputLen - length of input buffer 1092 on stack: blocksize - AES blocksize (always 16, unused) 1093*/ 1094 .type intel_aes_decrypt_ecb_192,@function 1095 .globl intel_aes_decrypt_ecb_192 1096 .align 16 1097intel_aes_decrypt_ecb_192: 1098 movdqu (%rdi), %xmm2 1099 movdqu 192(%rdi), %xmm14 1100 xorl %eax, %eax 1101// cmpl $8*16, %r9d 1102 cmpl $128, %r9d 1103 jb 1f 1104// leal -8*16(%r9), %r11d 1105 leal -128(%r9), %r11d 11062: movdqu (%r8, %rax), %xmm3 1107 movdqu 16(%r8, %rax), %xmm4 1108 movdqu 32(%r8, %rax), %xmm5 1109 movdqu 48(%r8, %rax), %xmm6 1110 movdqu 64(%r8, %rax), %xmm7 1111 movdqu 80(%r8, %rax), %xmm8 1112 movdqu 96(%r8, %rax), %xmm9 1113 movdqu 112(%r8, %rax), %xmm10 1114 pxor %xmm14, %xmm3 1115 pxor %xmm14, %xmm4 1116 pxor %xmm14, %xmm5 1117 pxor %xmm14, %xmm6 1118 pxor %xmm14, %xmm7 1119 pxor %xmm14, %xmm8 1120 pxor %xmm14, %xmm9 1121 pxor %xmm14, %xmm10 1122 1123// complete loop unrolling 1124 movdqu 176(%rdi), %xmm1 1125 movdqu 160(%rdi), %xmm11 1126 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1127 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1128 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1129 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1130 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1131 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1132 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1133 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1134 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1135 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1136 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1137 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1138 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1139 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1140 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1141 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1142 1143 movdqu 144(%rdi), %xmm1 1144 movdqu 128(%rdi), %xmm11 1145 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1146 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1147 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1148 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1149 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1150 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1151 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1152 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1153 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1154 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1155 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1156 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1157 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1158 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1159 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1160 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1161 1162 movdqu 112(%rdi), %xmm1 1163 movdqu 96(%rdi), %xmm11 1164 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1165 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1166 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1167 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1168 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1169 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1170 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1171 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1172 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1173 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1174 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1175 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1176 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1177 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1178 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1179 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1180 1181 movdqu 80(%rdi), %xmm1 1182 movdqu 64(%rdi), %xmm11 1183 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1184 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1185 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1186 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1187 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1188 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1189 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1190 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1191 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1192 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1193 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1194 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1195 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1196 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1197 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1198 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1199 1200 movdqu 48(%rdi), %xmm1 1201 movdqu 32(%rdi), %xmm11 1202 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1203 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1204 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1205 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1206 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1207 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1208 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1209 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1210 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1211 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1212 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1213 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1214 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1215 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1216 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1217 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1218 1219 movdqu 16(%rdi), %xmm1 1220 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1221 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1222 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1223 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1224 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1225 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1226 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1227 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1228 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ 1229 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ 1230 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ 1231 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ 1232 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ 1233 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ 1234 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ 1235 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ 1236 1237 movdqu %xmm3, (%rsi, %rax) 1238 movdqu %xmm4, 16(%rsi, %rax) 1239 movdqu %xmm5, 32(%rsi, %rax) 1240 movdqu %xmm6, 48(%rsi, %rax) 1241 movdqu %xmm7, 64(%rsi, %rax) 1242 movdqu %xmm8, 80(%rsi, %rax) 1243 movdqu %xmm9, 96(%rsi, %rax) 1244 movdqu %xmm10, 112(%rsi, %rax) 1245// addl $8*16, %eax 1246 addl $128, %eax 1247 cmpl %r11d, %eax 1248 jbe 2b 12491: cmpl %eax, %r9d 1250 je 5f 1251 1252 movdqu 16(%rdi), %xmm3 1253 movdqu 32(%rdi), %xmm4 1254 movdqu 48(%rdi), %xmm5 1255 movdqu 64(%rdi), %xmm6 1256 movdqu 80(%rdi), %xmm7 1257 movdqu 96(%rdi), %xmm8 1258 movdqu 112(%rdi), %xmm9 1259 movdqu 128(%rdi), %xmm10 1260 movdqu 144(%rdi), %xmm11 1261 movdqu 160(%rdi), %xmm12 1262 movdqu 176(%rdi), %xmm13 1263 12644: movdqu (%r8, %rax), %xmm1 1265 pxor %xmm14, %xmm1 1266 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ 1267 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ 1268 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ 1269 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ 1270 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ 1271 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ 1272 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ 1273 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ 1274 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ 1275 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ 1276 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ 1277 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ 1278 movdqu %xmm1, (%rsi, %rax) 1279 addl $16, %eax 1280 cmpl %eax, %r9d 1281 jne 4b 1282 12835: xor %eax, %eax 1284 ret 1285 .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192 1286 1287 1288/* in %rdi : cx - context 1289 in %rsi : output - pointer to output buffer 1290 in %rdx : outputLen - pointer to variable for length of output 1291 (already filled in by caller) 1292 in %ecx : maxOutputLen - length of output buffer 1293 (already checked by caller) 1294 in %r8 : input - pointer to input buffer 1295 in %r9d : inputLen - length of input buffer 1296 on stack: blocksize - AES blocksize (always 16, unused) 1297*/ 1298 .type intel_aes_encrypt_cbc_192,@function 1299 .globl intel_aes_encrypt_cbc_192 1300 .align 16 1301intel_aes_encrypt_cbc_192: 1302 testl %r9d, %r9d 1303 je 2f 1304 1305// leaq IV_OFFSET(%rdi), %rdx 1306 leaq 256(%rdi), %rdx 1307 1308 movdqu (%rdx), %xmm0 1309 movdqu (%rdi), %xmm2 1310 movdqu 16(%rdi), %xmm3 1311 movdqu 32(%rdi), %xmm4 1312 movdqu 48(%rdi), %xmm5 1313 movdqu 64(%rdi), %xmm6 1314 movdqu 80(%rdi), %xmm7 1315 movdqu 96(%rdi), %xmm8 1316 movdqu 112(%rdi), %xmm9 1317 movdqu 128(%rdi), %xmm10 1318 movdqu 144(%rdi), %xmm11 1319 movdqu 160(%rdi), %xmm12 1320 movdqu 176(%rdi), %xmm13 1321 movdqu 192(%rdi), %xmm14 1322 1323 xorl %eax, %eax 13241: movdqu (%r8, %rax), %xmm1 1325 pxor %xmm0, %xmm1 1326 pxor %xmm2, %xmm1 1327 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ 1328 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ 1329 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ 1330 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ 1331 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ 1332 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ 1333 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ 1334 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ 1335 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ 1336 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ 1337 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ 1338 .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ 1339 movdqu %xmm1, (%rsi, %rax) 1340 movdqa %xmm1, %xmm0 1341 addl $16, %eax 1342 cmpl %eax, %r9d 1343 jne 1b 1344 1345 movdqu %xmm0, (%rdx) 1346 13472: xor %eax, %eax 1348 ret 1349 .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192 1350 1351 1352/* in %rdi : cx - context 1353 in %rsi : output - pointer to output buffer 1354 in %rdx : outputLen - pointer to variable for length of output 1355 (already filled in by caller) 1356 in %exx : maxOutputLen - length of output buffer 1357 (already checked by caller) 1358 in %r8 : input - pointer to input buffer 1359 in %r9d : inputLen - length of input buffer 1360 on stack: blocksize - AES blocksize (always 16, unused) 1361*/ 1362 .type intel_aes_decrypt_cbc_192,@function 1363 .globl intel_aes_decrypt_cbc_192 1364 .align 16 1365intel_aes_decrypt_cbc_192: 1366// leaq IV_OFFSET(%rdi), %rdx 1367 leaq 256(%rdi), %rdx 1368 1369 movdqu (%rdx), %xmm0 1370 movdqu (%rdi), %xmm2 1371 movdqu 192(%rdi), %xmm14 1372 xorl %eax, %eax 1373 cmpl $128, %r9d 1374 jb 1f 1375 leal -128(%r9), %r11d 13762: movdqu (%r8, %rax), %xmm3 1377 movdqu 16(%r8, %rax), %xmm4 1378 movdqu 32(%r8, %rax), %xmm5 1379 movdqu 48(%r8, %rax), %xmm6 1380 movdqu 64(%r8, %rax), %xmm7 1381 movdqu 80(%r8, %rax), %xmm8 1382 movdqu 96(%r8, %rax), %xmm9 1383 movdqu 112(%r8, %rax), %xmm10 1384 pxor %xmm14, %xmm3 1385 pxor %xmm14, %xmm4 1386 pxor %xmm14, %xmm5 1387 pxor %xmm14, %xmm6 1388 pxor %xmm14, %xmm7 1389 pxor %xmm14, %xmm8 1390 pxor %xmm14, %xmm9 1391 pxor %xmm14, %xmm10 1392 1393// complete loop unrolling 1394 movdqu 176(%rdi), %xmm1 1395 movdqu 160(%rdi), %xmm11 1396 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1397 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1398 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1399 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1400 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1401 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1402 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1403 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1404 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1405 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1406 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1407 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1408 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1409 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1410 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1411 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1412 1413 movdqu 144(%rdi), %xmm1 1414 movdqu 128(%rdi), %xmm11 1415 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1416 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1417 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1418 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1419 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1420 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1421 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1422 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1423 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1424 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1425 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1426 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1427 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1428 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1429 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1430 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1431 1432 movdqu 112(%rdi), %xmm1 1433 movdqu 96(%rdi), %xmm11 1434 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1435 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1436 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1437 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1438 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1439 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1440 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1441 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1442 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1443 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1444 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1445 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1446 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1447 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1448 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1449 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1450 1451 movdqu 80(%rdi), %xmm1 1452 movdqu 64(%rdi), %xmm11 1453 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1454 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1455 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1456 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1457 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1458 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1459 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1460 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1461 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1462 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1463 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1464 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1465 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1466 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1467 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1468 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1469 1470 movdqu 48(%rdi), %xmm1 1471 movdqu 32(%rdi), %xmm11 1472 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1473 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1474 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1475 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1476 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1477 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1478 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1479 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1480 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1481 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1482 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1483 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1484 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1485 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1486 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1487 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1488 1489 movdqu 16(%rdi), %xmm1 1490 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1491 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1492 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1493 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1494 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1495 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1496 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1497 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1498 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ 1499 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ 1500 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ 1501 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ 1502 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ 1503 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ 1504 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ 1505 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ 1506 1507 pxor %xmm0, %xmm3 1508 movdqu (%r8, %rax), %xmm0 1509 pxor %xmm0, %xmm4 1510 movdqu 16(%r8, %rax), %xmm0 1511 pxor %xmm0, %xmm5 1512 movdqu 32(%r8, %rax), %xmm0 1513 pxor %xmm0, %xmm6 1514 movdqu 48(%r8, %rax), %xmm0 1515 pxor %xmm0, %xmm7 1516 movdqu 64(%r8, %rax), %xmm0 1517 pxor %xmm0, %xmm8 1518 movdqu 80(%r8, %rax), %xmm0 1519 pxor %xmm0, %xmm9 1520 movdqu 96(%r8, %rax), %xmm0 1521 pxor %xmm0, %xmm10 1522 movdqu 112(%r8, %rax), %xmm0 1523 movdqu %xmm3, (%rsi, %rax) 1524 movdqu %xmm4, 16(%rsi, %rax) 1525 movdqu %xmm5, 32(%rsi, %rax) 1526 movdqu %xmm6, 48(%rsi, %rax) 1527 movdqu %xmm7, 64(%rsi, %rax) 1528 movdqu %xmm8, 80(%rsi, %rax) 1529 movdqu %xmm9, 96(%rsi, %rax) 1530 movdqu %xmm10, 112(%rsi, %rax) 1531 addl $128, %eax 1532 cmpl %r11d, %eax 1533 jbe 2b 15341: cmpl %eax, %r9d 1535 je 5f 1536 1537 movdqu 16(%rdi), %xmm3 1538 movdqu 32(%rdi), %xmm4 1539 movdqu 48(%rdi), %xmm5 1540 movdqu 64(%rdi), %xmm6 1541 movdqu 80(%rdi), %xmm7 1542 movdqu 96(%rdi), %xmm8 1543 movdqu 112(%rdi), %xmm9 1544 movdqu 128(%rdi), %xmm10 1545 movdqu 144(%rdi), %xmm11 1546 movdqu 160(%rdi), %xmm12 1547 movdqu 176(%rdi), %xmm13 1548 15494: movdqu (%r8, %rax), %xmm1 1550 movdqa %xmm1, %xmm15 1551 pxor %xmm14, %xmm1 1552 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ 1553 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ 1554 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ 1555 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ 1556 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ 1557 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ 1558 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ 1559 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ 1560 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ 1561 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ 1562 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ 1563 .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ 1564 pxor %xmm0, %xmm1 1565 movdqu %xmm1, (%rsi, %rax) 1566 movdqa %xmm15, %xmm0 1567 addl $16, %eax 1568 cmpl %eax, %r9d 1569 jne 4b 1570 15715: movdqu %xmm0, (%rdx) 1572 1573 xor %eax, %eax 1574 ret 1575 .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192 1576 1577/* in %rdi : the key 1578 in %rsi : buffer for expanded key 1579*/ 1580 .type intel_aes_encrypt_init_256,@function 1581 .globl intel_aes_encrypt_init_256 1582 .align 16 1583intel_aes_encrypt_init_256: 1584 movdqu (%rdi), %xmm1 1585 movdqu 16(%rdi), %xmm3 1586 movdqu %xmm1, (%rsi) 1587 movdqu %xmm3, 16(%rsi) 1588 leaq 32(%rsi), %rsi 1589 xor %eax, %eax 1590 1591 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ 1592 call key_expansion256 1593 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ 1594 call key_expansion256 1595 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ 1596 call key_expansion256 1597 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ 1598 call key_expansion256 1599 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ 1600 call key_expansion256 1601 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ 1602 call key_expansion256 1603 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ 1604 pxor %xmm6, %xmm6 1605 pshufd $0xff, %xmm2, %xmm2 1606 shufps $0x10, %xmm1, %xmm6 1607 pxor %xmm6, %xmm1 1608 shufps $0x8c, %xmm1, %xmm6 1609 pxor %xmm2, %xmm1 1610 pxor %xmm6, %xmm1 1611 movdqu %xmm1, (%rsi) 1612 1613 ret 1614 .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256 1615 1616 1617/* in %rdi : the key 1618 in %rsi : buffer for expanded key 1619*/ 1620 .type intel_aes_decrypt_init_256,@function 1621 .globl intel_aes_decrypt_init_256 1622 .align 16 1623intel_aes_decrypt_init_256: 1624 movdqu (%rdi), %xmm1 1625 movdqu 16(%rdi), %xmm3 1626 movdqu %xmm1, (%rsi) 1627 .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */ 1628 movdqu %xmm4, 16(%rsi) 1629 leaq 32(%rsi), %rsi 1630 xor %eax, %eax 1631 1632 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ 1633 call key_expansion256 1634 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ 1635 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ 1636 movdqu %xmm4, -32(%rsi) 1637 movdqu %xmm5, -16(%rsi) 1638 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ 1639 call key_expansion256 1640 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ 1641 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ 1642 movdqu %xmm4, -32(%rsi) 1643 movdqu %xmm5, -16(%rsi) 1644 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ 1645 call key_expansion256 1646 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ 1647 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ 1648 movdqu %xmm4, -32(%rsi) 1649 movdqu %xmm5, -16(%rsi) 1650 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ 1651 call key_expansion256 1652 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ 1653 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ 1654 movdqu %xmm4, -32(%rsi) 1655 movdqu %xmm5, -16(%rsi) 1656 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ 1657 call key_expansion256 1658 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ 1659 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ 1660 movdqu %xmm4, -32(%rsi) 1661 movdqu %xmm5, -16(%rsi) 1662 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ 1663 call key_expansion256 1664 .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ 1665 .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ 1666 movdqu %xmm4, -32(%rsi) 1667 movdqu %xmm5, -16(%rsi) 1668 .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ 1669 pxor %xmm6, %xmm6 1670 pshufd $0xff, %xmm2, %xmm2 1671 shufps $0x10, %xmm1, %xmm6 1672 pxor %xmm6, %xmm1 1673 shufps $0x8c, %xmm1, %xmm6 1674 pxor %xmm2, %xmm1 1675 pxor %xmm6, %xmm1 1676 movdqu %xmm1, (%rsi) 1677 1678 ret 1679 .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256 1680 1681 1682 .type key_expansion256,@function 1683 .align 16 1684key_expansion256: 1685 movd %eax, %xmm6 1686 pshufd $0xff, %xmm2, %xmm2 1687 shufps $0x10, %xmm1, %xmm6 1688 pxor %xmm6, %xmm1 1689 shufps $0x8c, %xmm1, %xmm6 1690 pxor %xmm2, %xmm1 1691 pxor %xmm6, %xmm1 1692 movdqu %xmm1, (%rsi) 1693 1694 addq $16, %rsi 1695 .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */ 1696 pshufd $0xaa, %xmm4, %xmm4 1697 shufps $0x10, %xmm3, %xmm6 1698 pxor %xmm6, %xmm3 1699 shufps $0x8c, %xmm3, %xmm6 1700 pxor %xmm4, %xmm3 1701 pxor %xmm6, %xmm3 1702 movdqu %xmm3, (%rsi) 1703 addq $16, %rsi 1704 ret 1705 .size key_expansion256, .-key_expansion256 1706 1707 1708/* in %rdi : cx - context 1709 in %rsi : output - pointer to output buffer 1710 in %rdx : outputLen - pointer to variable for length of output 1711 (already filled in by caller) 1712 in %ecx : maxOutputLen - length of output buffer 1713 (already checked by caller) 1714 in %r8 : input - pointer to input buffer 1715 in %r9d : inputLen - length of input buffer 1716 on stack: blocksize - AES blocksize (always 16, unused) 1717*/ 1718 .type intel_aes_encrypt_ecb_256,@function 1719 .globl intel_aes_encrypt_ecb_256 1720 .align 16 1721intel_aes_encrypt_ecb_256: 1722 movdqu (%rdi), %xmm2 1723 movdqu 224(%rdi), %xmm15 1724 xorl %eax, %eax 1725// cmpl $8*16, %r9d 1726 cmpl $128, %r9d 1727 jb 1f 1728// leal -8*16(%r9), %r11d 1729 leal -128(%r9), %r11d 17302: movdqu (%r8, %rax), %xmm3 1731 movdqu 16(%r8, %rax), %xmm4 1732 movdqu 32(%r8, %rax), %xmm5 1733 movdqu 48(%r8, %rax), %xmm6 1734 movdqu 64(%r8, %rax), %xmm7 1735 movdqu 80(%r8, %rax), %xmm8 1736 movdqu 96(%r8, %rax), %xmm9 1737 movdqu 112(%r8, %rax), %xmm10 1738 pxor %xmm2, %xmm3 1739 pxor %xmm2, %xmm4 1740 pxor %xmm2, %xmm5 1741 pxor %xmm2, %xmm6 1742 pxor %xmm2, %xmm7 1743 pxor %xmm2, %xmm8 1744 pxor %xmm2, %xmm9 1745 pxor %xmm2, %xmm10 1746 1747// complete loop unrolling 1748 movdqu 16(%rdi), %xmm1 1749 movdqu 32(%rdi), %xmm11 1750 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1751 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1752 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1753 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1754 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1755 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1756 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1757 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1758 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1759 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1760 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1761 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1762 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1763 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1764 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1765 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1766 1767 movdqu 48(%rdi), %xmm1 1768 movdqu 64(%rdi), %xmm11 1769 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1770 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1771 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1772 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1773 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1774 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1775 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1776 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1777 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1778 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1779 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1780 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1781 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1782 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1783 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1784 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1785 1786 movdqu 80(%rdi), %xmm1 1787 movdqu 96(%rdi), %xmm11 1788 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1789 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1790 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1791 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1792 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1793 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1794 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1795 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1796 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1797 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1798 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1799 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1800 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1801 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1802 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1803 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1804 1805 movdqu 112(%rdi), %xmm1 1806 movdqu 128(%rdi), %xmm11 1807 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1808 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1809 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1810 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1811 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1812 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1813 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1814 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1815 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1816 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1817 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1818 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1819 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1820 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1821 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1822 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1823 1824 movdqu 144(%rdi), %xmm1 1825 movdqu 160(%rdi), %xmm11 1826 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1827 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1828 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1829 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1830 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1831 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1832 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1833 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1834 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1835 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1836 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1837 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1838 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1839 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1840 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1841 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1842 1843 movdqu 176(%rdi), %xmm1 1844 movdqu 192(%rdi), %xmm11 1845 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1846 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1847 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1848 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1849 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1850 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1851 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1852 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1853 .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ 1854 .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ 1855 .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ 1856 .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ 1857 .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ 1858 .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ 1859 .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ 1860 .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ 1861 1862 movdqu 208(%rdi), %xmm1 1863 .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ 1864 .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ 1865 .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ 1866 .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ 1867 .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ 1868 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ 1869 .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ 1870 .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ 1871 .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */ 1872 .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */ 1873 .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */ 1874 .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */ 1875 .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */ 1876 .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */ 1877 .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */ 1878 .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */ 1879 1880 movdqu %xmm3, (%rsi, %rax) 1881 movdqu %xmm4, 16(%rsi, %rax) 1882 movdqu %xmm5, 32(%rsi, %rax) 1883 movdqu %xmm6, 48(%rsi, %rax) 1884 movdqu %xmm7, 64(%rsi, %rax) 1885 movdqu %xmm8, 80(%rsi, %rax) 1886 movdqu %xmm9, 96(%rsi, %rax) 1887 movdqu %xmm10, 112(%rsi, %rax) 1888// addl $8*16, %eax 1889 addl $128, %eax 1890 cmpl %r11d, %eax 1891 jbe 2b 18921: cmpl %eax, %r9d 1893 je 5f 1894 1895 movdqu (%rdi), %xmm8 1896 movdqu 16(%rdi), %xmm2 1897 movdqu 32(%rdi), %xmm3 1898 movdqu 48(%rdi), %xmm4 1899 movdqu 64(%rdi), %xmm5 1900 movdqu 80(%rdi), %xmm6 1901 movdqu 96(%rdi), %xmm7 1902 movdqu 128(%rdi), %xmm9 1903 movdqu 144(%rdi), %xmm10 1904 movdqu 160(%rdi), %xmm11 1905 movdqu 176(%rdi), %xmm12 1906 movdqu 192(%rdi), %xmm13 1907 movdqu 208(%rdi), %xmm14 1908 19094: movdqu (%r8, %rax), %xmm1 1910 pxor %xmm8, %xmm1 1911 movdqu 112(%rdi), %xmm8 1912 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ 1913 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ 1914 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ 1915 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ 1916 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ 1917 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ 1918 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ 1919 movdqu (%rdi), %xmm8 1920 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ 1921 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ 1922 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ 1923 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ 1924 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ 1925 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ 1926 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ 1927 movdqu %xmm1, (%rsi, %rax) 1928 addl $16, %eax 1929 cmpl %eax, %r9d 1930 jne 4b 1931 19325: xor %eax, %eax 1933 ret 1934 .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256 1935 1936 1937/* in %rdi : cx - context 1938 in %rsi : output - pointer to output buffer 1939 in %rdx : outputLen - pointer to variable for length of output 1940 (already filled in by caller) 1941 in %ecx : maxOutputLen - length of output buffer 1942 (already checked by caller) 1943 in %r8 : input - pointer to input buffer 1944 in %r9d : inputLen - length of input buffer 1945 on stack: blocksize - AES blocksize (always 16, unused) 1946*/ 1947 .type intel_aes_decrypt_ecb_256,@function 1948 .globl intel_aes_decrypt_ecb_256 1949 .align 16 1950intel_aes_decrypt_ecb_256: 1951 movdqu (%rdi), %xmm2 1952 movdqu 224(%rdi), %xmm15 1953 xorl %eax, %eax 1954// cmpl $8*16, %r9d 1955 cmpl $128, %r9d 1956 jb 1f 1957// leal -8*16(%r9), %r11d 1958 leal -128(%r9), %r11d 19592: movdqu (%r8, %rax), %xmm3 1960 movdqu 16(%r8, %rax), %xmm4 1961 movdqu 32(%r8, %rax), %xmm5 1962 movdqu 48(%r8, %rax), %xmm6 1963 movdqu 64(%r8, %rax), %xmm7 1964 movdqu 80(%r8, %rax), %xmm8 1965 movdqu 96(%r8, %rax), %xmm9 1966 movdqu 112(%r8, %rax), %xmm10 1967 pxor %xmm15, %xmm3 1968 pxor %xmm15, %xmm4 1969 pxor %xmm15, %xmm5 1970 pxor %xmm15, %xmm6 1971 pxor %xmm15, %xmm7 1972 pxor %xmm15, %xmm8 1973 pxor %xmm15, %xmm9 1974 pxor %xmm15, %xmm10 1975 1976// complete loop unrolling 1977 movdqu 208(%rdi), %xmm1 1978 movdqu 192(%rdi), %xmm11 1979 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1980 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 1981 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 1982 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 1983 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 1984 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 1985 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 1986 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 1987 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 1988 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 1989 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 1990 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 1991 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 1992 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 1993 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 1994 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 1995 1996 movdqu 176(%rdi), %xmm1 1997 movdqu 160(%rdi), %xmm11 1998 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 1999 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2000 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2001 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2002 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2003 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2004 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2005 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2006 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2007 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2008 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2009 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2010 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2011 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2012 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2013 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2014 2015 movdqu 144(%rdi), %xmm1 2016 movdqu 128(%rdi), %xmm11 2017 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2018 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2019 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2020 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2021 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2022 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2023 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2024 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2025 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2026 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2027 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2028 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2029 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2030 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2031 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2032 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2033 2034 movdqu 112(%rdi), %xmm1 2035 movdqu 96(%rdi), %xmm11 2036 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2037 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2038 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2039 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2040 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2041 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2042 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2043 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2044 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2045 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2046 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2047 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2048 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2049 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2050 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2051 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2052 2053 movdqu 80(%rdi), %xmm1 2054 movdqu 64(%rdi), %xmm11 2055 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2056 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2057 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2058 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2059 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2060 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2061 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2062 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2063 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2064 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2065 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2066 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2067 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2068 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2069 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2070 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2071 2072 movdqu 48(%rdi), %xmm1 2073 movdqu 32(%rdi), %xmm11 2074 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2075 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2076 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2077 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2078 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2079 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2080 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2081 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2082 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2083 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2084 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2085 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2086 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2087 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2088 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2089 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2090 2091 movdqu 16(%rdi), %xmm1 2092 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2093 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2094 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2095 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2096 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2097 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2098 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2099 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2100 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ 2101 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ 2102 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ 2103 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ 2104 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ 2105 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ 2106 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ 2107 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ 2108 2109 movdqu %xmm3, (%rsi, %rax) 2110 movdqu %xmm4, 16(%rsi, %rax) 2111 movdqu %xmm5, 32(%rsi, %rax) 2112 movdqu %xmm6, 48(%rsi, %rax) 2113 movdqu %xmm7, 64(%rsi, %rax) 2114 movdqu %xmm8, 80(%rsi, %rax) 2115 movdqu %xmm9, 96(%rsi, %rax) 2116 movdqu %xmm10, 112(%rsi, %rax) 2117// addl $8*16, %eax 2118 addl $128, %eax 2119 cmpl %r11d, %eax 2120 jbe 2b 21211: cmpl %eax, %r9d 2122 je 5f 2123 2124 movdqu 16(%rdi), %xmm2 2125 movdqu 32(%rdi), %xmm3 2126 movdqu 48(%rdi), %xmm4 2127 movdqu 64(%rdi), %xmm5 2128 movdqu 80(%rdi), %xmm6 2129 movdqu 96(%rdi), %xmm7 2130 movdqu 112(%rdi), %xmm8 2131 movdqu 128(%rdi), %xmm9 2132 movdqu 144(%rdi), %xmm10 2133 movdqu 160(%rdi), %xmm11 2134 movdqu 176(%rdi), %xmm12 2135 movdqu 192(%rdi), %xmm13 2136 movdqu 208(%rdi), %xmm14 2137 21384: movdqu (%r8, %rax), %xmm1 2139 pxor %xmm15, %xmm1 2140 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ 2141 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ 2142 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ 2143 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ 2144 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ 2145 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ 2146 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ 2147 movdqu (%rdi), %xmm8 2148 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ 2149 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ 2150 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ 2151 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ 2152 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ 2153 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ 2154 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ 2155 movdqu 112(%rdi), %xmm8 2156 movdqu %xmm1, (%rsi, %rax) 2157 addl $16, %eax 2158 cmpl %eax, %r9d 2159 jne 4b 2160 21615: xor %eax, %eax 2162 ret 2163 .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256 2164 2165 2166/* in %rdi : cx - context 2167 in %rsi : output - pointer to output buffer 2168 in %rdx : outputLen - pointer to variable for length of output 2169 (already filled in by caller) 2170 in %ecx : maxOutputLen - length of output buffer 2171 (already checked by caller) 2172 in %r8 : input - pointer to input buffer 2173 in %r9d : inputLen - length of input buffer 2174 on stack: blocksize - AES blocksize (always 16, unused) 2175*/ 2176 .type intel_aes_encrypt_cbc_256,@function 2177 .globl intel_aes_encrypt_cbc_256 2178 .align 16 2179intel_aes_encrypt_cbc_256: 2180 testl %r9d, %r9d 2181 je 2f 2182 2183// leaq IV_OFFSET(%rdi), %rdx 2184 leaq 256(%rdi), %rdx 2185 2186 movdqu (%rdx), %xmm0 2187 movdqu (%rdi), %xmm8 2188 movdqu 16(%rdi), %xmm2 2189 movdqu 32(%rdi), %xmm3 2190 movdqu 48(%rdi), %xmm4 2191 movdqu 64(%rdi), %xmm5 2192 movdqu 80(%rdi), %xmm6 2193 movdqu 96(%rdi), %xmm7 2194 movdqu 128(%rdi), %xmm9 2195 movdqu 144(%rdi), %xmm10 2196 movdqu 160(%rdi), %xmm11 2197 movdqu 176(%rdi), %xmm12 2198 movdqu 192(%rdi), %xmm13 2199 movdqu 208(%rdi), %xmm14 2200 movdqu 224(%rdi), %xmm15 2201 2202 xorl %eax, %eax 22031: movdqu (%r8, %rax), %xmm1 2204 pxor %xmm0, %xmm1 2205 pxor %xmm8, %xmm1 2206 movdqu 112(%rdi), %xmm8 2207 .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ 2208 .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ 2209 .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ 2210 .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ 2211 .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ 2212 .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ 2213 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ 2214 movdqu (%rdi), %xmm8 2215 .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ 2216 .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ 2217 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ 2218 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ 2219 .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ 2220 .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ 2221 .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ 2222 movdqu %xmm1, (%rsi, %rax) 2223 movdqa %xmm1, %xmm0 2224 addl $16, %eax 2225 cmpl %eax, %r9d 2226 jne 1b 2227 2228 movdqu %xmm0, (%rdx) 2229 22302: xor %eax, %eax 2231 ret 2232 .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256 2233 2234 2235/* in %rdi : cx - context 2236 in %rsi : output - pointer to output buffer 2237 in %rdx : outputLen - pointer to variable for length of output 2238 (already filled in by caller) 2239 in %ecx : maxOutputLen - length of output buffer 2240 (already checked by caller) 2241 in %r8 : input - pointer to input buffer 2242 in %r9d : inputLen - length of input buffer 2243 on stack: blocksize - AES blocksize (always 16, unused) 2244*/ 2245 .type intel_aes_decrypt_cbc_256,@function 2246 .globl intel_aes_decrypt_cbc_256 2247 .align 16 2248intel_aes_decrypt_cbc_256: 2249// leaq IV_OFFSET(%rdi), %rdx 2250 leaq 256(%rdi), %rdx 2251 2252 movdqu (%rdx), %xmm0 2253 movdqu (%rdi), %xmm2 2254 movdqu 224(%rdi), %xmm15 2255 xorl %eax, %eax 2256// cmpl $8*16, %r9d 2257 cmpl $128, %r9d 2258 jb 1f 2259// leal -8*16(%r9), %r11d 2260 leal -128(%r9), %r11d 22612: movdqu (%r8, %rax), %xmm3 2262 movdqu 16(%r8, %rax), %xmm4 2263 movdqu 32(%r8, %rax), %xmm5 2264 movdqu 48(%r8, %rax), %xmm6 2265 movdqu 64(%r8, %rax), %xmm7 2266 movdqu 80(%r8, %rax), %xmm8 2267 movdqu 96(%r8, %rax), %xmm9 2268 movdqu 112(%r8, %rax), %xmm10 2269 pxor %xmm15, %xmm3 2270 pxor %xmm15, %xmm4 2271 pxor %xmm15, %xmm5 2272 pxor %xmm15, %xmm6 2273 pxor %xmm15, %xmm7 2274 pxor %xmm15, %xmm8 2275 pxor %xmm15, %xmm9 2276 pxor %xmm15, %xmm10 2277 2278// complete loop unrolling 2279 movdqu 208(%rdi), %xmm1 2280 movdqu 192(%rdi), %xmm11 2281 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2282 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2283 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2284 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2285 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2286 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2287 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2288 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2289 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2290 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2291 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2292 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2293 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2294 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2295 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2296 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2297 2298 movdqu 176(%rdi), %xmm1 2299 movdqu 160(%rdi), %xmm11 2300 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2301 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2302 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2303 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2304 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2305 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2306 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2307 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2308 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2309 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2310 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2311 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2312 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2313 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2314 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2315 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2316 2317 movdqu 144(%rdi), %xmm1 2318 movdqu 128(%rdi), %xmm11 2319 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2320 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2321 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2322 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2323 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2324 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2325 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2326 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2327 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2328 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2329 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2330 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2331 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2332 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2333 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2334 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2335 2336 movdqu 112(%rdi), %xmm1 2337 movdqu 96(%rdi), %xmm11 2338 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2339 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2340 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2341 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2342 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2343 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2344 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2345 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2346 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2347 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2348 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2349 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2350 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2351 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2352 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2353 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2354 2355 movdqu 80(%rdi), %xmm1 2356 movdqu 64(%rdi), %xmm11 2357 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2358 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2359 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2360 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2361 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2362 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2363 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2364 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2365 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2366 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2367 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2368 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2369 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2370 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2371 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2372 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2373 2374 movdqu 48(%rdi), %xmm1 2375 movdqu 32(%rdi), %xmm11 2376 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2377 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2378 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2379 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2380 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2381 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2382 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2383 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2384 .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ 2385 .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ 2386 .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ 2387 .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ 2388 .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ 2389 .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ 2390 .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ 2391 .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ 2392 2393 movdqu 16(%rdi), %xmm1 2394 .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ 2395 .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ 2396 .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ 2397 .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ 2398 .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ 2399 .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ 2400 .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ 2401 .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ 2402 .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ 2403 .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ 2404 .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ 2405 .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ 2406 .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ 2407 .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ 2408 .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ 2409 .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ 2410 2411 pxor %xmm0, %xmm3 2412 movdqu (%r8, %rax), %xmm0 2413 pxor %xmm0, %xmm4 2414 movdqu 16(%r8, %rax), %xmm0 2415 pxor %xmm0, %xmm5 2416 movdqu 32(%r8, %rax), %xmm0 2417 pxor %xmm0, %xmm6 2418 movdqu 48(%r8, %rax), %xmm0 2419 pxor %xmm0, %xmm7 2420 movdqu 64(%r8, %rax), %xmm0 2421 pxor %xmm0, %xmm8 2422 movdqu 80(%r8, %rax), %xmm0 2423 pxor %xmm0, %xmm9 2424 movdqu 96(%r8, %rax), %xmm0 2425 pxor %xmm0, %xmm10 2426 movdqu 112(%r8, %rax), %xmm0 2427 movdqu %xmm3, (%rsi, %rax) 2428 movdqu %xmm4, 16(%rsi, %rax) 2429 movdqu %xmm5, 32(%rsi, %rax) 2430 movdqu %xmm6, 48(%rsi, %rax) 2431 movdqu %xmm7, 64(%rsi, %rax) 2432 movdqu %xmm8, 80(%rsi, %rax) 2433 movdqu %xmm9, 96(%rsi, %rax) 2434 movdqu %xmm10, 112(%rsi, %rax) 2435// addl $8*16, %eax 2436 addl $128, %eax 2437 cmpl %r11d, %eax 2438 jbe 2b 24391: cmpl %eax, %r9d 2440 je 5f 2441 2442 movdqu 16(%rdi), %xmm2 2443 movdqu 32(%rdi), %xmm3 2444 movdqu 48(%rdi), %xmm4 2445 movdqu 64(%rdi), %xmm5 2446 movdqu 80(%rdi), %xmm6 2447 movdqu 96(%rdi), %xmm7 2448 movdqu 112(%rdi), %xmm8 2449 movdqu 128(%rdi), %xmm9 2450 movdqu 144(%rdi), %xmm10 2451 movdqu 160(%rdi), %xmm11 2452 movdqu 176(%rdi), %xmm12 2453 movdqu 192(%rdi), %xmm13 2454 movdqu 208(%rdi), %xmm14 2455 24564: movdqu (%r8, %rax), %xmm1 2457 pxor %xmm15, %xmm1 2458 .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ 2459 .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ 2460 .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ 2461 .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ 2462 .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ 2463 .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ 2464 .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ 2465 movdqu (%rdi), %xmm8 2466 .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ 2467 .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ 2468 .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ 2469 .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ 2470 .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ 2471 .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ 2472 .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ 2473 movdqu 112(%rdi), %xmm8 2474 pxor %xmm0, %xmm1 2475 movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */ 2476 movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */ 2477 addl $16, %eax 2478 cmpl %eax, %r9d 2479 jne 4b 2480 24815: movdqu %xmm0, (%rdx) 2482 2483 xor %eax, %eax 2484 ret 2485 .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256 2486