1#! /usr/bin/env perl 2# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# 18# AES-NI-CTR+GHASH stitch. 19# 20# February 2013 21# 22# OpenSSL GCM implementation is organized in such way that its 23# performance is rather close to the sum of its streamed components, 24# in the context parallelized AES-NI CTR and modulo-scheduled 25# PCLMULQDQ-enabled GHASH. Unfortunately, as no stitch implementation 26# was observed to perform significantly better than the sum of the 27# components on contemporary CPUs, the effort was deemed impossible to 28# justify. This module is based on combination of Intel submissions, 29# [1] and [2], with MOVBE twist suggested by Ilya Albrekht and Max 30# Locktyukhin of Intel Corp. who verified that it reduces shuffles 31# pressure with notable relative improvement, achieving 1.0 cycle per 32# byte processed with 128-bit key on Haswell processor, 0.74 - on 33# Broadwell, 0.63 - on Skylake... [Mentioned results are raw profiled 34# measurements for favourable packet size, one divisible by 96. 35# Applications using the EVP interface will observe a few percent 36# worse performance.] 37# 38# Knights Landing processes 1 byte in 1.25 cycles (measured with EVP). 39# 40# [1] http://rt.openssl.org/Ticket/Display.html?id=2900&user=guest&pass=guest 41# [2] http://www.intel.com/content/dam/www/public/us/en/documents/software-support/enabling-high-performance-gcm.pdf 42 43$flavour = shift; 44$output = shift; 45if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 46 47$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 48 49$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 50( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 51( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 52die "can't locate x86_64-xlate.pl"; 53 54if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 55 =~ /GNU assembler version ([2-9]\.[0-9]+)/) { 56 $avx = ($1>=2.20) + ($1>=2.22); 57} 58 59if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && 60 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { 61 $avx = ($1>=2.09) + ($1>=2.10); 62} 63 64if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && 65 `ml64 2>&1` =~ /Version ([0-9]+)\./) { 66 $avx = ($1>=10) + ($1>=11); 67} 68 69if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) { 70 $avx = ($2>=3.0) + ($2>3.0); 71} 72 73open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; 74*STDOUT=*OUT; 75 76if ($avx>1) {{{ 77 78($inp,$out,$len,$key,$ivp,$Xip)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); 79 80($Ii,$T1,$T2,$Hkey, 81 $Z0,$Z1,$Z2,$Z3,$Xi) = map("%xmm$_",(0..8)); 82 83($inout0,$inout1,$inout2,$inout3,$inout4,$inout5,$rndkey) = map("%xmm$_",(9..15)); 84 85($counter,$rounds,$ret,$const,$in0,$end0)=("%ebx","%ebp","%r10","%r11","%r14","%r15"); 86 87$code=<<___; 88.text 89 90.type _aesni_ctr32_ghash_6x,\@abi-omnipotent 91.align 32 92_aesni_ctr32_ghash_6x: 93 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 94 sub \$6,$len 95 vpxor $Z0,$Z0,$Z0 # $Z0 = 0 96 vmovdqu 0x00-0x80($key),$rndkey 97 vpaddb $T2,$T1,$inout1 98 vpaddb $T2,$inout1,$inout2 99 vpaddb $T2,$inout2,$inout3 100 vpaddb $T2,$inout3,$inout4 101 vpaddb $T2,$inout4,$inout5 102 vpxor $rndkey,$T1,$inout0 103 vmovdqu $Z0,16+8(%rsp) # "$Z3" = 0 104 jmp .Loop6x 105 106.align 32 107.Loop6x: 108 add \$`6<<24`,$counter 109 jc .Lhandle_ctr32 # discard $inout[1-5]? 110 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 111 vpaddb $T2,$inout5,$T1 # next counter value 112 vpxor $rndkey,$inout1,$inout1 113 vpxor $rndkey,$inout2,$inout2 114 115.Lresume_ctr32: 116 vmovdqu $T1,($ivp) # save next counter value 117 vpclmulqdq \$0x10,$Hkey,$Z3,$Z1 118 vpxor $rndkey,$inout3,$inout3 119 vmovups 0x10-0x80($key),$T2 # borrow $T2 for $rndkey 120 vpclmulqdq \$0x01,$Hkey,$Z3,$Z2 121 xor %r12,%r12 122 cmp $in0,$end0 123 124 vaesenc $T2,$inout0,$inout0 125 vmovdqu 0x30+8(%rsp),$Ii # I[4] 126 vpxor $rndkey,$inout4,$inout4 127 vpclmulqdq \$0x00,$Hkey,$Z3,$T1 128 vaesenc $T2,$inout1,$inout1 129 vpxor $rndkey,$inout5,$inout5 130 setnc %r12b 131 vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 132 vaesenc $T2,$inout2,$inout2 133 vmovdqu 0x10-0x20($Xip),$Hkey # $Hkey^2 134 neg %r12 135 vaesenc $T2,$inout3,$inout3 136 vpxor $Z1,$Z2,$Z2 137 vpclmulqdq \$0x00,$Hkey,$Ii,$Z1 138 vpxor $Z0,$Xi,$Xi # modulo-scheduled 139 vaesenc $T2,$inout4,$inout4 140 vpxor $Z1,$T1,$Z0 141 and \$0x60,%r12 142 vmovups 0x20-0x80($key),$rndkey 143 vpclmulqdq \$0x10,$Hkey,$Ii,$T1 144 vaesenc $T2,$inout5,$inout5 145 146 vpclmulqdq \$0x01,$Hkey,$Ii,$T2 147 lea ($in0,%r12),$in0 148 vaesenc $rndkey,$inout0,$inout0 149 vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled [vpxor $Z3,$Xi,$Xi] 150 vpclmulqdq \$0x11,$Hkey,$Ii,$Hkey 151 vmovdqu 0x40+8(%rsp),$Ii # I[3] 152 vaesenc $rndkey,$inout1,$inout1 153 movbe 0x58($in0),%r13 154 vaesenc $rndkey,$inout2,$inout2 155 movbe 0x50($in0),%r12 156 vaesenc $rndkey,$inout3,$inout3 157 mov %r13,0x20+8(%rsp) 158 vaesenc $rndkey,$inout4,$inout4 159 mov %r12,0x28+8(%rsp) 160 vmovdqu 0x30-0x20($Xip),$Z1 # borrow $Z1 for $Hkey^3 161 vaesenc $rndkey,$inout5,$inout5 162 163 vmovups 0x30-0x80($key),$rndkey 164 vpxor $T1,$Z2,$Z2 165 vpclmulqdq \$0x00,$Z1,$Ii,$T1 166 vaesenc $rndkey,$inout0,$inout0 167 vpxor $T2,$Z2,$Z2 168 vpclmulqdq \$0x10,$Z1,$Ii,$T2 169 vaesenc $rndkey,$inout1,$inout1 170 vpxor $Hkey,$Z3,$Z3 171 vpclmulqdq \$0x01,$Z1,$Ii,$Hkey 172 vaesenc $rndkey,$inout2,$inout2 173 vpclmulqdq \$0x11,$Z1,$Ii,$Z1 174 vmovdqu 0x50+8(%rsp),$Ii # I[2] 175 vaesenc $rndkey,$inout3,$inout3 176 vaesenc $rndkey,$inout4,$inout4 177 vpxor $T1,$Z0,$Z0 178 vmovdqu 0x40-0x20($Xip),$T1 # borrow $T1 for $Hkey^4 179 vaesenc $rndkey,$inout5,$inout5 180 181 vmovups 0x40-0x80($key),$rndkey 182 vpxor $T2,$Z2,$Z2 183 vpclmulqdq \$0x00,$T1,$Ii,$T2 184 vaesenc $rndkey,$inout0,$inout0 185 vpxor $Hkey,$Z2,$Z2 186 vpclmulqdq \$0x10,$T1,$Ii,$Hkey 187 vaesenc $rndkey,$inout1,$inout1 188 movbe 0x48($in0),%r13 189 vpxor $Z1,$Z3,$Z3 190 vpclmulqdq \$0x01,$T1,$Ii,$Z1 191 vaesenc $rndkey,$inout2,$inout2 192 movbe 0x40($in0),%r12 193 vpclmulqdq \$0x11,$T1,$Ii,$T1 194 vmovdqu 0x60+8(%rsp),$Ii # I[1] 195 vaesenc $rndkey,$inout3,$inout3 196 mov %r13,0x30+8(%rsp) 197 vaesenc $rndkey,$inout4,$inout4 198 mov %r12,0x38+8(%rsp) 199 vpxor $T2,$Z0,$Z0 200 vmovdqu 0x60-0x20($Xip),$T2 # borrow $T2 for $Hkey^5 201 vaesenc $rndkey,$inout5,$inout5 202 203 vmovups 0x50-0x80($key),$rndkey 204 vpxor $Hkey,$Z2,$Z2 205 vpclmulqdq \$0x00,$T2,$Ii,$Hkey 206 vaesenc $rndkey,$inout0,$inout0 207 vpxor $Z1,$Z2,$Z2 208 vpclmulqdq \$0x10,$T2,$Ii,$Z1 209 vaesenc $rndkey,$inout1,$inout1 210 movbe 0x38($in0),%r13 211 vpxor $T1,$Z3,$Z3 212 vpclmulqdq \$0x01,$T2,$Ii,$T1 213 vpxor 0x70+8(%rsp),$Xi,$Xi # accumulate I[0] 214 vaesenc $rndkey,$inout2,$inout2 215 movbe 0x30($in0),%r12 216 vpclmulqdq \$0x11,$T2,$Ii,$T2 217 vaesenc $rndkey,$inout3,$inout3 218 mov %r13,0x40+8(%rsp) 219 vaesenc $rndkey,$inout4,$inout4 220 mov %r12,0x48+8(%rsp) 221 vpxor $Hkey,$Z0,$Z0 222 vmovdqu 0x70-0x20($Xip),$Hkey # $Hkey^6 223 vaesenc $rndkey,$inout5,$inout5 224 225 vmovups 0x60-0x80($key),$rndkey 226 vpxor $Z1,$Z2,$Z2 227 vpclmulqdq \$0x10,$Hkey,$Xi,$Z1 228 vaesenc $rndkey,$inout0,$inout0 229 vpxor $T1,$Z2,$Z2 230 vpclmulqdq \$0x01,$Hkey,$Xi,$T1 231 vaesenc $rndkey,$inout1,$inout1 232 movbe 0x28($in0),%r13 233 vpxor $T2,$Z3,$Z3 234 vpclmulqdq \$0x00,$Hkey,$Xi,$T2 235 vaesenc $rndkey,$inout2,$inout2 236 movbe 0x20($in0),%r12 237 vpclmulqdq \$0x11,$Hkey,$Xi,$Xi 238 vaesenc $rndkey,$inout3,$inout3 239 mov %r13,0x50+8(%rsp) 240 vaesenc $rndkey,$inout4,$inout4 241 mov %r12,0x58+8(%rsp) 242 vpxor $Z1,$Z2,$Z2 243 vaesenc $rndkey,$inout5,$inout5 244 vpxor $T1,$Z2,$Z2 245 246 vmovups 0x70-0x80($key),$rndkey 247 vpslldq \$8,$Z2,$Z1 248 vpxor $T2,$Z0,$Z0 249 vmovdqu 0x10($const),$Hkey # .Lpoly 250 251 vaesenc $rndkey,$inout0,$inout0 252 vpxor $Xi,$Z3,$Z3 253 vaesenc $rndkey,$inout1,$inout1 254 vpxor $Z1,$Z0,$Z0 255 movbe 0x18($in0),%r13 256 vaesenc $rndkey,$inout2,$inout2 257 movbe 0x10($in0),%r12 258 vpalignr \$8,$Z0,$Z0,$Ii # 1st phase 259 vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 260 mov %r13,0x60+8(%rsp) 261 vaesenc $rndkey,$inout3,$inout3 262 mov %r12,0x68+8(%rsp) 263 vaesenc $rndkey,$inout4,$inout4 264 vmovups 0x80-0x80($key),$T1 # borrow $T1 for $rndkey 265 vaesenc $rndkey,$inout5,$inout5 266 267 vaesenc $T1,$inout0,$inout0 268 vmovups 0x90-0x80($key),$rndkey 269 vaesenc $T1,$inout1,$inout1 270 vpsrldq \$8,$Z2,$Z2 271 vaesenc $T1,$inout2,$inout2 272 vpxor $Z2,$Z3,$Z3 273 vaesenc $T1,$inout3,$inout3 274 vpxor $Ii,$Z0,$Z0 275 movbe 0x08($in0),%r13 276 vaesenc $T1,$inout4,$inout4 277 movbe 0x00($in0),%r12 278 vaesenc $T1,$inout5,$inout5 279 vmovups 0xa0-0x80($key),$T1 280 cmp \$11,$rounds 281 jb .Lenc_tail # 128-bit key 282 283 vaesenc $rndkey,$inout0,$inout0 284 vaesenc $rndkey,$inout1,$inout1 285 vaesenc $rndkey,$inout2,$inout2 286 vaesenc $rndkey,$inout3,$inout3 287 vaesenc $rndkey,$inout4,$inout4 288 vaesenc $rndkey,$inout5,$inout5 289 290 vaesenc $T1,$inout0,$inout0 291 vaesenc $T1,$inout1,$inout1 292 vaesenc $T1,$inout2,$inout2 293 vaesenc $T1,$inout3,$inout3 294 vaesenc $T1,$inout4,$inout4 295 vmovups 0xb0-0x80($key),$rndkey 296 vaesenc $T1,$inout5,$inout5 297 vmovups 0xc0-0x80($key),$T1 298 je .Lenc_tail # 192-bit key 299 300 vaesenc $rndkey,$inout0,$inout0 301 vaesenc $rndkey,$inout1,$inout1 302 vaesenc $rndkey,$inout2,$inout2 303 vaesenc $rndkey,$inout3,$inout3 304 vaesenc $rndkey,$inout4,$inout4 305 vaesenc $rndkey,$inout5,$inout5 306 307 vaesenc $T1,$inout0,$inout0 308 vaesenc $T1,$inout1,$inout1 309 vaesenc $T1,$inout2,$inout2 310 vaesenc $T1,$inout3,$inout3 311 vaesenc $T1,$inout4,$inout4 312 vmovups 0xd0-0x80($key),$rndkey 313 vaesenc $T1,$inout5,$inout5 314 vmovups 0xe0-0x80($key),$T1 315 jmp .Lenc_tail # 256-bit key 316 317.align 32 318.Lhandle_ctr32: 319 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 320 vpshufb $Ii,$T1,$Z2 # byte-swap counter 321 vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb 322 vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb 323 vpaddd $Z1,$Z2,$inout2 324 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 325 vpaddd $Z1,$inout1,$inout3 326 vpshufb $Ii,$inout1,$inout1 327 vpaddd $Z1,$inout2,$inout4 328 vpshufb $Ii,$inout2,$inout2 329 vpxor $rndkey,$inout1,$inout1 330 vpaddd $Z1,$inout3,$inout5 331 vpshufb $Ii,$inout3,$inout3 332 vpxor $rndkey,$inout2,$inout2 333 vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value 334 vpshufb $Ii,$inout4,$inout4 335 vpshufb $Ii,$inout5,$inout5 336 vpshufb $Ii,$T1,$T1 # next counter value 337 jmp .Lresume_ctr32 338 339.align 32 340.Lenc_tail: 341 vaesenc $rndkey,$inout0,$inout0 342 vmovdqu $Z3,16+8(%rsp) # postpone vpxor $Z3,$Xi,$Xi 343 vpalignr \$8,$Z0,$Z0,$Xi # 2nd phase 344 vaesenc $rndkey,$inout1,$inout1 345 vpclmulqdq \$0x10,$Hkey,$Z0,$Z0 346 vpxor 0x00($inp),$T1,$T2 347 vaesenc $rndkey,$inout2,$inout2 348 vpxor 0x10($inp),$T1,$Ii 349 vaesenc $rndkey,$inout3,$inout3 350 vpxor 0x20($inp),$T1,$Z1 351 vaesenc $rndkey,$inout4,$inout4 352 vpxor 0x30($inp),$T1,$Z2 353 vaesenc $rndkey,$inout5,$inout5 354 vpxor 0x40($inp),$T1,$Z3 355 vpxor 0x50($inp),$T1,$Hkey 356 vmovdqu ($ivp),$T1 # load next counter value 357 358 vaesenclast $T2,$inout0,$inout0 359 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 360 vaesenclast $Ii,$inout1,$inout1 361 vpaddb $T2,$T1,$Ii 362 mov %r13,0x70+8(%rsp) 363 lea 0x60($inp),$inp 364 vaesenclast $Z1,$inout2,$inout2 365 vpaddb $T2,$Ii,$Z1 366 mov %r12,0x78+8(%rsp) 367 lea 0x60($out),$out 368 vmovdqu 0x00-0x80($key),$rndkey 369 vaesenclast $Z2,$inout3,$inout3 370 vpaddb $T2,$Z1,$Z2 371 vaesenclast $Z3, $inout4,$inout4 372 vpaddb $T2,$Z2,$Z3 373 vaesenclast $Hkey,$inout5,$inout5 374 vpaddb $T2,$Z3,$Hkey 375 376 add \$0x60,$ret 377 sub \$0x6,$len 378 jc .L6x_done 379 380 vmovups $inout0,-0x60($out) # save output 381 vpxor $rndkey,$T1,$inout0 382 vmovups $inout1,-0x50($out) 383 vmovdqa $Ii,$inout1 # 0 latency 384 vmovups $inout2,-0x40($out) 385 vmovdqa $Z1,$inout2 # 0 latency 386 vmovups $inout3,-0x30($out) 387 vmovdqa $Z2,$inout3 # 0 latency 388 vmovups $inout4,-0x20($out) 389 vmovdqa $Z3,$inout4 # 0 latency 390 vmovups $inout5,-0x10($out) 391 vmovdqa $Hkey,$inout5 # 0 latency 392 vmovdqu 0x20+8(%rsp),$Z3 # I[5] 393 jmp .Loop6x 394 395.L6x_done: 396 vpxor 16+8(%rsp),$Xi,$Xi # modulo-scheduled 397 vpxor $Z0,$Xi,$Xi # modulo-scheduled 398 399 ret 400.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x 401___ 402###################################################################### 403# 404# size_t aesni_gcm_[en|de]crypt(const void *inp, void *out, size_t len, 405# const AES_KEY *key, unsigned char iv[16], 406# struct { u128 Xi,H,Htbl[9]; } *Xip); 407$code.=<<___; 408.globl aesni_gcm_decrypt 409.type aesni_gcm_decrypt,\@function,6 410.align 32 411aesni_gcm_decrypt: 412.cfi_startproc 413 xor $ret,$ret 414 cmp \$0x60,$len # minimal accepted length 415 jb .Lgcm_dec_abort 416 417 lea (%rsp),%rax # save stack pointer 418.cfi_def_cfa_register %rax 419 push %rbx 420.cfi_push %rbx 421 push %rbp 422.cfi_push %rbp 423 push %r12 424.cfi_push %r12 425 push %r13 426.cfi_push %r13 427 push %r14 428.cfi_push %r14 429 push %r15 430.cfi_push %r15 431___ 432$code.=<<___ if ($win64); 433 lea -0xa8(%rsp),%rsp 434 movaps %xmm6,-0xd8(%rax) 435 movaps %xmm7,-0xc8(%rax) 436 movaps %xmm8,-0xb8(%rax) 437 movaps %xmm9,-0xa8(%rax) 438 movaps %xmm10,-0x98(%rax) 439 movaps %xmm11,-0x88(%rax) 440 movaps %xmm12,-0x78(%rax) 441 movaps %xmm13,-0x68(%rax) 442 movaps %xmm14,-0x58(%rax) 443 movaps %xmm15,-0x48(%rax) 444.Lgcm_dec_body: 445___ 446$code.=<<___; 447 vzeroupper 448 449 vmovdqu ($ivp),$T1 # input counter value 450 add \$-128,%rsp 451 mov 12($ivp),$counter 452 lea .Lbswap_mask(%rip),$const 453 lea -0x80($key),$in0 # borrow $in0 454 mov \$0xf80,$end0 # borrow $end0 455 vmovdqu ($Xip),$Xi # load Xi 456 and \$-128,%rsp # ensure stack alignment 457 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 458 lea 0x80($key),$key # size optimization 459 lea 0x20+0x20($Xip),$Xip # size optimization 460 mov 0xf0-0x80($key),$rounds 461 vpshufb $Ii,$Xi,$Xi 462 463 and $end0,$in0 464 and %rsp,$end0 465 sub $in0,$end0 466 jc .Ldec_no_key_aliasing 467 cmp \$768,$end0 468 jnc .Ldec_no_key_aliasing 469 sub $end0,%rsp # avoid aliasing with key 470.Ldec_no_key_aliasing: 471 472 vmovdqu 0x50($inp),$Z3 # I[5] 473 lea ($inp),$in0 474 vmovdqu 0x40($inp),$Z0 475 lea -0xc0($inp,$len),$end0 476 vmovdqu 0x30($inp),$Z1 477 shr \$4,$len 478 xor $ret,$ret 479 vmovdqu 0x20($inp),$Z2 480 vpshufb $Ii,$Z3,$Z3 # passed to _aesni_ctr32_ghash_6x 481 vmovdqu 0x10($inp),$T2 482 vpshufb $Ii,$Z0,$Z0 483 vmovdqu ($inp),$Hkey 484 vpshufb $Ii,$Z1,$Z1 485 vmovdqu $Z0,0x30(%rsp) 486 vpshufb $Ii,$Z2,$Z2 487 vmovdqu $Z1,0x40(%rsp) 488 vpshufb $Ii,$T2,$T2 489 vmovdqu $Z2,0x50(%rsp) 490 vpshufb $Ii,$Hkey,$Hkey 491 vmovdqu $T2,0x60(%rsp) 492 vmovdqu $Hkey,0x70(%rsp) 493 494 call _aesni_ctr32_ghash_6x 495 496 vmovups $inout0,-0x60($out) # save output 497 vmovups $inout1,-0x50($out) 498 vmovups $inout2,-0x40($out) 499 vmovups $inout3,-0x30($out) 500 vmovups $inout4,-0x20($out) 501 vmovups $inout5,-0x10($out) 502 503 vpshufb ($const),$Xi,$Xi # .Lbswap_mask 504 vmovdqu $Xi,-0x40($Xip) # output Xi 505 506 vzeroupper 507___ 508$code.=<<___ if ($win64); 509 movaps -0xd8(%rax),%xmm6 510 movaps -0xc8(%rax),%xmm7 511 movaps -0xb8(%rax),%xmm8 512 movaps -0xa8(%rax),%xmm9 513 movaps -0x98(%rax),%xmm10 514 movaps -0x88(%rax),%xmm11 515 movaps -0x78(%rax),%xmm12 516 movaps -0x68(%rax),%xmm13 517 movaps -0x58(%rax),%xmm14 518 movaps -0x48(%rax),%xmm15 519___ 520$code.=<<___; 521 mov -48(%rax),%r15 522.cfi_restore %r15 523 mov -40(%rax),%r14 524.cfi_restore %r14 525 mov -32(%rax),%r13 526.cfi_restore %r13 527 mov -24(%rax),%r12 528.cfi_restore %r12 529 mov -16(%rax),%rbp 530.cfi_restore %rbp 531 mov -8(%rax),%rbx 532.cfi_restore %rbx 533 lea (%rax),%rsp # restore %rsp 534.cfi_def_cfa_register %rsp 535.Lgcm_dec_abort: 536 mov $ret,%rax # return value 537 ret 538.cfi_endproc 539.size aesni_gcm_decrypt,.-aesni_gcm_decrypt 540___ 541 542$code.=<<___; 543.type _aesni_ctr32_6x,\@abi-omnipotent 544.align 32 545_aesni_ctr32_6x: 546 vmovdqu 0x00-0x80($key),$Z0 # borrow $Z0 for $rndkey 547 vmovdqu 0x20($const),$T2 # borrow $T2, .Lone_msb 548 lea -1($rounds),%r13 549 vmovups 0x10-0x80($key),$rndkey 550 lea 0x20-0x80($key),%r12 551 vpxor $Z0,$T1,$inout0 552 add \$`6<<24`,$counter 553 jc .Lhandle_ctr32_2 554 vpaddb $T2,$T1,$inout1 555 vpaddb $T2,$inout1,$inout2 556 vpxor $Z0,$inout1,$inout1 557 vpaddb $T2,$inout2,$inout3 558 vpxor $Z0,$inout2,$inout2 559 vpaddb $T2,$inout3,$inout4 560 vpxor $Z0,$inout3,$inout3 561 vpaddb $T2,$inout4,$inout5 562 vpxor $Z0,$inout4,$inout4 563 vpaddb $T2,$inout5,$T1 564 vpxor $Z0,$inout5,$inout5 565 jmp .Loop_ctr32 566 567.align 16 568.Loop_ctr32: 569 vaesenc $rndkey,$inout0,$inout0 570 vaesenc $rndkey,$inout1,$inout1 571 vaesenc $rndkey,$inout2,$inout2 572 vaesenc $rndkey,$inout3,$inout3 573 vaesenc $rndkey,$inout4,$inout4 574 vaesenc $rndkey,$inout5,$inout5 575 vmovups (%r12),$rndkey 576 lea 0x10(%r12),%r12 577 dec %r13d 578 jnz .Loop_ctr32 579 580 vmovdqu (%r12),$Hkey # last round key 581 vaesenc $rndkey,$inout0,$inout0 582 vpxor 0x00($inp),$Hkey,$Z0 583 vaesenc $rndkey,$inout1,$inout1 584 vpxor 0x10($inp),$Hkey,$Z1 585 vaesenc $rndkey,$inout2,$inout2 586 vpxor 0x20($inp),$Hkey,$Z2 587 vaesenc $rndkey,$inout3,$inout3 588 vpxor 0x30($inp),$Hkey,$Xi 589 vaesenc $rndkey,$inout4,$inout4 590 vpxor 0x40($inp),$Hkey,$T2 591 vaesenc $rndkey,$inout5,$inout5 592 vpxor 0x50($inp),$Hkey,$Hkey 593 lea 0x60($inp),$inp 594 595 vaesenclast $Z0,$inout0,$inout0 596 vaesenclast $Z1,$inout1,$inout1 597 vaesenclast $Z2,$inout2,$inout2 598 vaesenclast $Xi,$inout3,$inout3 599 vaesenclast $T2,$inout4,$inout4 600 vaesenclast $Hkey,$inout5,$inout5 601 vmovups $inout0,0x00($out) 602 vmovups $inout1,0x10($out) 603 vmovups $inout2,0x20($out) 604 vmovups $inout3,0x30($out) 605 vmovups $inout4,0x40($out) 606 vmovups $inout5,0x50($out) 607 lea 0x60($out),$out 608 609 ret 610.align 32 611.Lhandle_ctr32_2: 612 vpshufb $Ii,$T1,$Z2 # byte-swap counter 613 vmovdqu 0x30($const),$Z1 # borrow $Z1, .Ltwo_lsb 614 vpaddd 0x40($const),$Z2,$inout1 # .Lone_lsb 615 vpaddd $Z1,$Z2,$inout2 616 vpaddd $Z1,$inout1,$inout3 617 vpshufb $Ii,$inout1,$inout1 618 vpaddd $Z1,$inout2,$inout4 619 vpshufb $Ii,$inout2,$inout2 620 vpxor $Z0,$inout1,$inout1 621 vpaddd $Z1,$inout3,$inout5 622 vpshufb $Ii,$inout3,$inout3 623 vpxor $Z0,$inout2,$inout2 624 vpaddd $Z1,$inout4,$T1 # byte-swapped next counter value 625 vpshufb $Ii,$inout4,$inout4 626 vpxor $Z0,$inout3,$inout3 627 vpshufb $Ii,$inout5,$inout5 628 vpxor $Z0,$inout4,$inout4 629 vpshufb $Ii,$T1,$T1 # next counter value 630 vpxor $Z0,$inout5,$inout5 631 jmp .Loop_ctr32 632.size _aesni_ctr32_6x,.-_aesni_ctr32_6x 633 634.globl aesni_gcm_encrypt 635.type aesni_gcm_encrypt,\@function,6 636.align 32 637aesni_gcm_encrypt: 638.cfi_startproc 639 xor $ret,$ret 640 cmp \$0x60*3,$len # minimal accepted length 641 jb .Lgcm_enc_abort 642 643 lea (%rsp),%rax # save stack pointer 644.cfi_def_cfa_register %rax 645 push %rbx 646.cfi_push %rbx 647 push %rbp 648.cfi_push %rbp 649 push %r12 650.cfi_push %r12 651 push %r13 652.cfi_push %r13 653 push %r14 654.cfi_push %r14 655 push %r15 656.cfi_push %r15 657___ 658$code.=<<___ if ($win64); 659 lea -0xa8(%rsp),%rsp 660 movaps %xmm6,-0xd8(%rax) 661 movaps %xmm7,-0xc8(%rax) 662 movaps %xmm8,-0xb8(%rax) 663 movaps %xmm9,-0xa8(%rax) 664 movaps %xmm10,-0x98(%rax) 665 movaps %xmm11,-0x88(%rax) 666 movaps %xmm12,-0x78(%rax) 667 movaps %xmm13,-0x68(%rax) 668 movaps %xmm14,-0x58(%rax) 669 movaps %xmm15,-0x48(%rax) 670.Lgcm_enc_body: 671___ 672$code.=<<___; 673 vzeroupper 674 675 vmovdqu ($ivp),$T1 # input counter value 676 add \$-128,%rsp 677 mov 12($ivp),$counter 678 lea .Lbswap_mask(%rip),$const 679 lea -0x80($key),$in0 # borrow $in0 680 mov \$0xf80,$end0 # borrow $end0 681 lea 0x80($key),$key # size optimization 682 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 683 and \$-128,%rsp # ensure stack alignment 684 mov 0xf0-0x80($key),$rounds 685 686 and $end0,$in0 687 and %rsp,$end0 688 sub $in0,$end0 689 jc .Lenc_no_key_aliasing 690 cmp \$768,$end0 691 jnc .Lenc_no_key_aliasing 692 sub $end0,%rsp # avoid aliasing with key 693.Lenc_no_key_aliasing: 694 695 lea ($out),$in0 696 lea -0xc0($out,$len),$end0 697 shr \$4,$len 698 699 call _aesni_ctr32_6x 700 vpshufb $Ii,$inout0,$Xi # save bswapped output on stack 701 vpshufb $Ii,$inout1,$T2 702 vmovdqu $Xi,0x70(%rsp) 703 vpshufb $Ii,$inout2,$Z0 704 vmovdqu $T2,0x60(%rsp) 705 vpshufb $Ii,$inout3,$Z1 706 vmovdqu $Z0,0x50(%rsp) 707 vpshufb $Ii,$inout4,$Z2 708 vmovdqu $Z1,0x40(%rsp) 709 vpshufb $Ii,$inout5,$Z3 # passed to _aesni_ctr32_ghash_6x 710 vmovdqu $Z2,0x30(%rsp) 711 712 call _aesni_ctr32_6x 713 714 vmovdqu ($Xip),$Xi # load Xi 715 lea 0x20+0x20($Xip),$Xip # size optimization 716 sub \$12,$len 717 mov \$0x60*2,$ret 718 vpshufb $Ii,$Xi,$Xi 719 720 call _aesni_ctr32_ghash_6x 721 vmovdqu 0x20(%rsp),$Z3 # I[5] 722 vmovdqu ($const),$Ii # borrow $Ii for .Lbswap_mask 723 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 724 vpunpckhqdq $Z3,$Z3,$T1 725 vmovdqu 0x20-0x20($Xip),$rndkey # borrow $rndkey for $HK 726 vmovups $inout0,-0x60($out) # save output 727 vpshufb $Ii,$inout0,$inout0 # but keep bswapped copy 728 vpxor $Z3,$T1,$T1 729 vmovups $inout1,-0x50($out) 730 vpshufb $Ii,$inout1,$inout1 731 vmovups $inout2,-0x40($out) 732 vpshufb $Ii,$inout2,$inout2 733 vmovups $inout3,-0x30($out) 734 vpshufb $Ii,$inout3,$inout3 735 vmovups $inout4,-0x20($out) 736 vpshufb $Ii,$inout4,$inout4 737 vmovups $inout5,-0x10($out) 738 vpshufb $Ii,$inout5,$inout5 739 vmovdqu $inout0,0x10(%rsp) # free $inout0 740___ 741{ my ($HK,$T3)=($rndkey,$inout0); 742 743$code.=<<___; 744 vmovdqu 0x30(%rsp),$Z2 # I[4] 745 vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 746 vpunpckhqdq $Z2,$Z2,$T2 747 vpclmulqdq \$0x00,$Hkey,$Z3,$Z1 748 vpxor $Z2,$T2,$T2 749 vpclmulqdq \$0x11,$Hkey,$Z3,$Z3 750 vpclmulqdq \$0x00,$HK,$T1,$T1 751 752 vmovdqu 0x40(%rsp),$T3 # I[3] 753 vpclmulqdq \$0x00,$Ii,$Z2,$Z0 754 vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 755 vpxor $Z1,$Z0,$Z0 756 vpunpckhqdq $T3,$T3,$Z1 757 vpclmulqdq \$0x11,$Ii,$Z2,$Z2 758 vpxor $T3,$Z1,$Z1 759 vpxor $Z3,$Z2,$Z2 760 vpclmulqdq \$0x10,$HK,$T2,$T2 761 vmovdqu 0x50-0x20($Xip),$HK 762 vpxor $T1,$T2,$T2 763 764 vmovdqu 0x50(%rsp),$T1 # I[2] 765 vpclmulqdq \$0x00,$Hkey,$T3,$Z3 766 vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 767 vpxor $Z0,$Z3,$Z3 768 vpunpckhqdq $T1,$T1,$Z0 769 vpclmulqdq \$0x11,$Hkey,$T3,$T3 770 vpxor $T1,$Z0,$Z0 771 vpxor $Z2,$T3,$T3 772 vpclmulqdq \$0x00,$HK,$Z1,$Z1 773 vpxor $T2,$Z1,$Z1 774 775 vmovdqu 0x60(%rsp),$T2 # I[1] 776 vpclmulqdq \$0x00,$Ii,$T1,$Z2 777 vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 778 vpxor $Z3,$Z2,$Z2 779 vpunpckhqdq $T2,$T2,$Z3 780 vpclmulqdq \$0x11,$Ii,$T1,$T1 781 vpxor $T2,$Z3,$Z3 782 vpxor $T3,$T1,$T1 783 vpclmulqdq \$0x10,$HK,$Z0,$Z0 784 vmovdqu 0x80-0x20($Xip),$HK 785 vpxor $Z1,$Z0,$Z0 786 787 vpxor 0x70(%rsp),$Xi,$Xi # accumulate I[0] 788 vpclmulqdq \$0x00,$Hkey,$T2,$Z1 789 vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 790 vpunpckhqdq $Xi,$Xi,$T3 791 vpxor $Z2,$Z1,$Z1 792 vpclmulqdq \$0x11,$Hkey,$T2,$T2 793 vpxor $Xi,$T3,$T3 794 vpxor $T1,$T2,$T2 795 vpclmulqdq \$0x00,$HK,$Z3,$Z3 796 vpxor $Z0,$Z3,$Z0 797 798 vpclmulqdq \$0x00,$Ii,$Xi,$Z2 799 vmovdqu 0x00-0x20($Xip),$Hkey # $Hkey^1 800 vpunpckhqdq $inout5,$inout5,$T1 801 vpclmulqdq \$0x11,$Ii,$Xi,$Xi 802 vpxor $inout5,$T1,$T1 803 vpxor $Z1,$Z2,$Z1 804 vpclmulqdq \$0x10,$HK,$T3,$T3 805 vmovdqu 0x20-0x20($Xip),$HK 806 vpxor $T2,$Xi,$Z3 807 vpxor $Z0,$T3,$Z2 808 809 vmovdqu 0x10-0x20($Xip),$Ii # borrow $Ii for $Hkey^2 810 vpxor $Z1,$Z3,$T3 # aggregated Karatsuba post-processing 811 vpclmulqdq \$0x00,$Hkey,$inout5,$Z0 812 vpxor $T3,$Z2,$Z2 813 vpunpckhqdq $inout4,$inout4,$T2 814 vpclmulqdq \$0x11,$Hkey,$inout5,$inout5 815 vpxor $inout4,$T2,$T2 816 vpslldq \$8,$Z2,$T3 817 vpclmulqdq \$0x00,$HK,$T1,$T1 818 vpxor $T3,$Z1,$Xi 819 vpsrldq \$8,$Z2,$Z2 820 vpxor $Z2,$Z3,$Z3 821 822 vpclmulqdq \$0x00,$Ii,$inout4,$Z1 823 vmovdqu 0x30-0x20($Xip),$Hkey # $Hkey^3 824 vpxor $Z0,$Z1,$Z1 825 vpunpckhqdq $inout3,$inout3,$T3 826 vpclmulqdq \$0x11,$Ii,$inout4,$inout4 827 vpxor $inout3,$T3,$T3 828 vpxor $inout5,$inout4,$inout4 829 vpalignr \$8,$Xi,$Xi,$inout5 # 1st phase 830 vpclmulqdq \$0x10,$HK,$T2,$T2 831 vmovdqu 0x50-0x20($Xip),$HK 832 vpxor $T1,$T2,$T2 833 834 vpclmulqdq \$0x00,$Hkey,$inout3,$Z0 835 vmovdqu 0x40-0x20($Xip),$Ii # borrow $Ii for $Hkey^4 836 vpxor $Z1,$Z0,$Z0 837 vpunpckhqdq $inout2,$inout2,$T1 838 vpclmulqdq \$0x11,$Hkey,$inout3,$inout3 839 vpxor $inout2,$T1,$T1 840 vpxor $inout4,$inout3,$inout3 841 vxorps 0x10(%rsp),$Z3,$Z3 # accumulate $inout0 842 vpclmulqdq \$0x00,$HK,$T3,$T3 843 vpxor $T2,$T3,$T3 844 845 vpclmulqdq \$0x10,0x10($const),$Xi,$Xi 846 vxorps $inout5,$Xi,$Xi 847 848 vpclmulqdq \$0x00,$Ii,$inout2,$Z1 849 vmovdqu 0x60-0x20($Xip),$Hkey # $Hkey^5 850 vpxor $Z0,$Z1,$Z1 851 vpunpckhqdq $inout1,$inout1,$T2 852 vpclmulqdq \$0x11,$Ii,$inout2,$inout2 853 vpxor $inout1,$T2,$T2 854 vpalignr \$8,$Xi,$Xi,$inout5 # 2nd phase 855 vpxor $inout3,$inout2,$inout2 856 vpclmulqdq \$0x10,$HK,$T1,$T1 857 vmovdqu 0x80-0x20($Xip),$HK 858 vpxor $T3,$T1,$T1 859 860 vxorps $Z3,$inout5,$inout5 861 vpclmulqdq \$0x10,0x10($const),$Xi,$Xi 862 vxorps $inout5,$Xi,$Xi 863 864 vpclmulqdq \$0x00,$Hkey,$inout1,$Z0 865 vmovdqu 0x70-0x20($Xip),$Ii # borrow $Ii for $Hkey^6 866 vpxor $Z1,$Z0,$Z0 867 vpunpckhqdq $Xi,$Xi,$T3 868 vpclmulqdq \$0x11,$Hkey,$inout1,$inout1 869 vpxor $Xi,$T3,$T3 870 vpxor $inout2,$inout1,$inout1 871 vpclmulqdq \$0x00,$HK,$T2,$T2 872 vpxor $T1,$T2,$T2 873 874 vpclmulqdq \$0x00,$Ii,$Xi,$Z1 875 vpclmulqdq \$0x11,$Ii,$Xi,$Z3 876 vpxor $Z0,$Z1,$Z1 877 vpclmulqdq \$0x10,$HK,$T3,$Z2 878 vpxor $inout1,$Z3,$Z3 879 vpxor $T2,$Z2,$Z2 880 881 vpxor $Z1,$Z3,$Z0 # aggregated Karatsuba post-processing 882 vpxor $Z0,$Z2,$Z2 883 vpslldq \$8,$Z2,$T1 884 vmovdqu 0x10($const),$Hkey # .Lpoly 885 vpsrldq \$8,$Z2,$Z2 886 vpxor $T1,$Z1,$Xi 887 vpxor $Z2,$Z3,$Z3 888 889 vpalignr \$8,$Xi,$Xi,$T2 # 1st phase 890 vpclmulqdq \$0x10,$Hkey,$Xi,$Xi 891 vpxor $T2,$Xi,$Xi 892 893 vpalignr \$8,$Xi,$Xi,$T2 # 2nd phase 894 vpclmulqdq \$0x10,$Hkey,$Xi,$Xi 895 vpxor $Z3,$T2,$T2 896 vpxor $T2,$Xi,$Xi 897___ 898} 899$code.=<<___; 900 vpshufb ($const),$Xi,$Xi # .Lbswap_mask 901 vmovdqu $Xi,-0x40($Xip) # output Xi 902 903 vzeroupper 904___ 905$code.=<<___ if ($win64); 906 movaps -0xd8(%rax),%xmm6 907 movaps -0xc8(%rax),%xmm7 908 movaps -0xb8(%rax),%xmm8 909 movaps -0xa8(%rax),%xmm9 910 movaps -0x98(%rax),%xmm10 911 movaps -0x88(%rax),%xmm11 912 movaps -0x78(%rax),%xmm12 913 movaps -0x68(%rax),%xmm13 914 movaps -0x58(%rax),%xmm14 915 movaps -0x48(%rax),%xmm15 916___ 917$code.=<<___; 918 mov -48(%rax),%r15 919.cfi_restore %r15 920 mov -40(%rax),%r14 921.cfi_restore %r14 922 mov -32(%rax),%r13 923.cfi_restore %r13 924 mov -24(%rax),%r12 925.cfi_restore %r12 926 mov -16(%rax),%rbp 927.cfi_restore %rbp 928 mov -8(%rax),%rbx 929.cfi_restore %rbx 930 lea (%rax),%rsp # restore %rsp 931.cfi_def_cfa_register %rsp 932.Lgcm_enc_abort: 933 mov $ret,%rax # return value 934 ret 935.cfi_endproc 936.size aesni_gcm_encrypt,.-aesni_gcm_encrypt 937___ 938 939$code.=<<___; 940.align 64 941.Lbswap_mask: 942 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 943.Lpoly: 944 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 945.Lone_msb: 946 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 947.Ltwo_lsb: 948 .byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 949.Lone_lsb: 950 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 951.asciz "AES-NI GCM module for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 952.align 64 953___ 954if ($win64) { 955$rec="%rcx"; 956$frame="%rdx"; 957$context="%r8"; 958$disp="%r9"; 959 960$code.=<<___ 961.extern __imp_RtlVirtualUnwind 962.type gcm_se_handler,\@abi-omnipotent 963.align 16 964gcm_se_handler: 965 push %rsi 966 push %rdi 967 push %rbx 968 push %rbp 969 push %r12 970 push %r13 971 push %r14 972 push %r15 973 pushfq 974 sub \$64,%rsp 975 976 mov 120($context),%rax # pull context->Rax 977 mov 248($context),%rbx # pull context->Rip 978 979 mov 8($disp),%rsi # disp->ImageBase 980 mov 56($disp),%r11 # disp->HandlerData 981 982 mov 0(%r11),%r10d # HandlerData[0] 983 lea (%rsi,%r10),%r10 # prologue label 984 cmp %r10,%rbx # context->Rip<prologue label 985 jb .Lcommon_seh_tail 986 987 mov 152($context),%rax # pull context->Rsp 988 989 mov 4(%r11),%r10d # HandlerData[1] 990 lea (%rsi,%r10),%r10 # epilogue label 991 cmp %r10,%rbx # context->Rip>=epilogue label 992 jae .Lcommon_seh_tail 993 994 mov 120($context),%rax # pull context->Rax 995 996 mov -48(%rax),%r15 997 mov -40(%rax),%r14 998 mov -32(%rax),%r13 999 mov -24(%rax),%r12 1000 mov -16(%rax),%rbp 1001 mov -8(%rax),%rbx 1002 mov %r15,240($context) 1003 mov %r14,232($context) 1004 mov %r13,224($context) 1005 mov %r12,216($context) 1006 mov %rbp,160($context) 1007 mov %rbx,144($context) 1008 1009 lea -0xd8(%rax),%rsi # %xmm save area 1010 lea 512($context),%rdi # & context.Xmm6 1011 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax) 1012 .long 0xa548f3fc # cld; rep movsq 1013 1014.Lcommon_seh_tail: 1015 mov 8(%rax),%rdi 1016 mov 16(%rax),%rsi 1017 mov %rax,152($context) # restore context->Rsp 1018 mov %rsi,168($context) # restore context->Rsi 1019 mov %rdi,176($context) # restore context->Rdi 1020 1021 mov 40($disp),%rdi # disp->ContextRecord 1022 mov $context,%rsi # context 1023 mov \$154,%ecx # sizeof(CONTEXT) 1024 .long 0xa548f3fc # cld; rep movsq 1025 1026 mov $disp,%rsi 1027 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 1028 mov 8(%rsi),%rdx # arg2, disp->ImageBase 1029 mov 0(%rsi),%r8 # arg3, disp->ControlPc 1030 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 1031 mov 40(%rsi),%r10 # disp->ContextRecord 1032 lea 56(%rsi),%r11 # &disp->HandlerData 1033 lea 24(%rsi),%r12 # &disp->EstablisherFrame 1034 mov %r10,32(%rsp) # arg5 1035 mov %r11,40(%rsp) # arg6 1036 mov %r12,48(%rsp) # arg7 1037 mov %rcx,56(%rsp) # arg8, (NULL) 1038 call *__imp_RtlVirtualUnwind(%rip) 1039 1040 mov \$1,%eax # ExceptionContinueSearch 1041 add \$64,%rsp 1042 popfq 1043 pop %r15 1044 pop %r14 1045 pop %r13 1046 pop %r12 1047 pop %rbp 1048 pop %rbx 1049 pop %rdi 1050 pop %rsi 1051 ret 1052.size gcm_se_handler,.-gcm_se_handler 1053 1054.section .pdata 1055.align 4 1056 .rva .LSEH_begin_aesni_gcm_decrypt 1057 .rva .LSEH_end_aesni_gcm_decrypt 1058 .rva .LSEH_gcm_dec_info 1059 1060 .rva .LSEH_begin_aesni_gcm_encrypt 1061 .rva .LSEH_end_aesni_gcm_encrypt 1062 .rva .LSEH_gcm_enc_info 1063.section .xdata 1064.align 8 1065.LSEH_gcm_dec_info: 1066 .byte 9,0,0,0 1067 .rva gcm_se_handler 1068 .rva .Lgcm_dec_body,.Lgcm_dec_abort 1069.LSEH_gcm_enc_info: 1070 .byte 9,0,0,0 1071 .rva gcm_se_handler 1072 .rva .Lgcm_enc_body,.Lgcm_enc_abort 1073___ 1074} 1075}}} else {{{ 1076$code=<<___; # assembler is too old 1077.text 1078 1079.globl aesni_gcm_encrypt 1080.type aesni_gcm_encrypt,\@abi-omnipotent 1081aesni_gcm_encrypt: 1082 xor %eax,%eax 1083 ret 1084.size aesni_gcm_encrypt,.-aesni_gcm_encrypt 1085 1086.globl aesni_gcm_decrypt 1087.type aesni_gcm_decrypt,\@abi-omnipotent 1088aesni_gcm_decrypt: 1089 xor %eax,%eax 1090 ret 1091.size aesni_gcm_decrypt,.-aesni_gcm_decrypt 1092___ 1093}}} 1094 1095$code =~ s/\`([^\`]*)\`/eval($1)/gem; 1096 1097print $code; 1098 1099close STDOUT; 1100