1#! /usr/bin/env perl 2# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# June 2011 18# 19# This is RC4+MD5 "stitch" implementation. The idea, as spelled in 20# http://download.intel.com/design/intarch/papers/323686.pdf, is that 21# since both algorithms exhibit instruction-level parallelism, ILP, 22# below theoretical maximum, interleaving them would allow to utilize 23# processor resources better and achieve better performance. RC4 24# instruction sequence is virtually identical to rc4-x86_64.pl, which 25# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin 26# and Jim Guilford of Intel. MD5 is fresh implementation aiming to 27# minimize register usage, which was used as "main thread" with RC4 28# weaved into it, one RC4 round per one MD5 round. In addition to the 29# stiched subroutine the script can generate standalone replacement 30# md5_block_asm_data_order and RC4. Below are performance numbers in 31# cycles per processed byte, less is better, for these the standalone 32# subroutines, sum of them, and stitched one: 33# 34# RC4 MD5 RC4+MD5 stitch gain 35# Opteron 6.5(*) 5.4 11.9 7.0 +70%(*) 36# Core2 6.5 5.8 12.3 7.7 +60% 37# Westmere 4.3 5.2 9.5 7.0 +36% 38# Sandy Bridge 4.2 5.5 9.7 6.8 +43% 39# Ivy Bridge 4.1 5.2 9.3 6.0 +54% 40# Haswell 4.0 5.0 9.0 5.7 +60% 41# Skylake 6.3(**) 5.0 11.3 5.3 +110% 42# Atom 9.3 6.5 15.8 11.1 +42% 43# VIA Nano 6.3 5.4 11.7 8.6 +37% 44# Bulldozer 4.5 5.4 9.9 7.7 +29% 45# 46# (*) rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement 47# is +53%... 48# (**) unidentified anomaly; 49 50my ($rc4,$md5)=(1,1); # what to generate? 51my $D="#" if (!$md5); # if set to "#", MD5 is stitched into RC4(), 52 # but its result is discarded. Idea here is 53 # to be able to use 'openssl speed rc4' for 54 # benchmarking the stitched subroutine... 55 56my $flavour = shift; 57my $output = shift; 58if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 59 60my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 61 62$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate; 63( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 64( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 65die "can't locate x86_64-xlate.pl"; 66 67open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; 68*STDOUT=*OUT; 69 70my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs); 71 72if ($rc4 && !$md5) { 73 ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx"); 74 $func="RC4"; $nargs=4; 75} elsif ($md5 && !$rc4) { 76 ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx"); 77 $func="md5_block_asm_data_order"; $nargs=3; 78} else { 79 ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9"); 80 $func="rc4_md5_enc"; $nargs=6; 81 # void rc4_md5_enc( 82 # RC4_KEY *key, # 83 # const void *in0, # RC4 input 84 # void *out, # RC4 output 85 # MD5_CTX *ctx, # 86 # const void *inp, # MD5 input 87 # size_t len); # number of 64-byte blocks 88} 89 90my @K=( 0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee, 91 0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501, 92 0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be, 93 0x6b901122,0xfd987193,0xa679438e,0x49b40821, 94 95 0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa, 96 0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8, 97 0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed, 98 0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a, 99 100 0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c, 101 0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70, 102 0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05, 103 0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665, 104 105 0xf4292244,0x432aff97,0xab9423a7,0xfc93a039, 106 0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1, 107 0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1, 108 0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391 ); 109 110my @V=("%r8d","%r9d","%r10d","%r11d"); # MD5 registers 111my $tmp="%r12d"; 112 113my @XX=("%rbp","%rsi"); # RC4 registers 114my @TX=("%rax","%rbx"); 115my $YY="%rcx"; 116my $TY="%rdx"; 117 118my $MOD=32; # 16, 32 or 64 119 120$code.=<<___; 121.text 122.align 16 123 124.globl $func 125.type $func,\@function,$nargs 126$func: 127.cfi_startproc 128 cmp \$0,$len 129 je .Labort 130 push %rbx 131.cfi_push %rbx 132 push %rbp 133.cfi_push %rbp 134 push %r12 135.cfi_push %r12 136 push %r13 137.cfi_push %r13 138 push %r14 139.cfi_push %r14 140 push %r15 141.cfi_push %r15 142 sub \$40,%rsp 143.cfi_adjust_cfa_offset 40 144.Lbody: 145___ 146if ($rc4) { 147$code.=<<___; 148$D#md5# mov $ctx,%r11 # reassign arguments 149 mov $len,%r12 150 mov $in0,%r13 151 mov $out,%r14 152$D#md5# mov $inp,%r15 153___ 154 $ctx="%r11" if ($md5); # reassign arguments 155 $len="%r12"; 156 $in0="%r13"; 157 $out="%r14"; 158 $inp="%r15" if ($md5); 159 $inp=$in0 if (!$md5); 160$code.=<<___; 161 xor $XX[0],$XX[0] 162 xor $YY,$YY 163 164 lea 8($dat),$dat 165 mov -8($dat),$XX[0]#b 166 mov -4($dat),$YY#b 167 168 inc $XX[0]#b 169 sub $in0,$out 170 movl ($dat,$XX[0],4),$TX[0]#d 171___ 172$code.=<<___ if (!$md5); 173 xor $TX[1],$TX[1] 174 test \$-128,$len 175 jz .Loop1 176 sub $XX[0],$TX[1] 177 and \$`$MOD-1`,$TX[1] 178 jz .Loop${MOD}_is_hot 179 sub $TX[1],$len 180.Loop${MOD}_warmup: 181 add $TX[0]#b,$YY#b 182 movl ($dat,$YY,4),$TY#d 183 movl $TX[0]#d,($dat,$YY,4) 184 movl $TY#d,($dat,$XX[0],4) 185 add $TY#b,$TX[0]#b 186 inc $XX[0]#b 187 movl ($dat,$TX[0],4),$TY#d 188 movl ($dat,$XX[0],4),$TX[0]#d 189 xorb ($in0),$TY#b 190 movb $TY#b,($out,$in0) 191 lea 1($in0),$in0 192 dec $TX[1] 193 jnz .Loop${MOD}_warmup 194 195 mov $YY,$TX[1] 196 xor $YY,$YY 197 mov $TX[1]#b,$YY#b 198 199.Loop${MOD}_is_hot: 200 mov $len,32(%rsp) # save original $len 201 shr \$6,$len # number of 64-byte blocks 202___ 203 if ($D && !$md5) { # stitch in dummy MD5 204 $md5=1; 205 $ctx="%r11"; 206 $inp="%r15"; 207 $code.=<<___; 208 mov %rsp,$ctx 209 mov $in0,$inp 210___ 211 } 212} 213$code.=<<___; 214#rc4# add $TX[0]#b,$YY#b 215#rc4# lea ($dat,$XX[0],4),$XX[1] 216 shl \$6,$len 217 add $inp,$len # pointer to the end of input 218 mov $len,16(%rsp) 219 220#md5# mov $ctx,24(%rsp) # save pointer to MD5_CTX 221#md5# mov 0*4($ctx),$V[0] # load current hash value from MD5_CTX 222#md5# mov 1*4($ctx),$V[1] 223#md5# mov 2*4($ctx),$V[2] 224#md5# mov 3*4($ctx),$V[3] 225 jmp .Loop 226 227.align 16 228.Loop: 229#md5# mov $V[0],0*4(%rsp) # put aside current hash value 230#md5# mov $V[1],1*4(%rsp) 231#md5# mov $V[2],2*4(%rsp) 232#md5# mov $V[3],$tmp # forward reference 233#md5# mov $V[3],3*4(%rsp) 234___ 235 236sub R0 { 237 my ($i,$a,$b,$c,$d)=@_; 238 my @rot0=(7,12,17,22); 239 my $j=$i%16; 240 my $k=$i%$MOD; 241 my $xmm="%xmm".($j&1); 242 $code.=" movdqu ($in0),%xmm2\n" if ($rc4 && $j==15); 243 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 244 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 245 $code.=<<___; 246#rc4# movl ($dat,$YY,4),$TY#d 247#md5# xor $c,$tmp 248#rc4# movl $TX[0]#d,($dat,$YY,4) 249#md5# and $b,$tmp 250#md5# add 4*`$j`($inp),$a 251#rc4# add $TY#b,$TX[0]#b 252#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 253#md5# add \$$K[$i],$a 254#md5# xor $d,$tmp 255#rc4# movz $TX[0]#b,$TX[0]#d 256#rc4# movl $TY#d,4*$k($XX[1]) 257#md5# add $tmp,$a 258#rc4# add $TX[1]#b,$YY#b 259#md5# rol \$$rot0[$j%4],$a 260#md5# mov `$j==15?"$b":"$c"`,$tmp # forward reference 261#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 262#md5# add $b,$a 263___ 264 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); 265 mov $YY,$XX[1] 266 xor $YY,$YY # keyword to partial register 267 mov $XX[1]#b,$YY#b 268 lea ($dat,$XX[0],4),$XX[1] 269___ 270 $code.=<<___ if ($rc4 && $j==15); 271 psllq \$8,%xmm1 272 pxor %xmm0,%xmm2 273 pxor %xmm1,%xmm2 274___ 275} 276sub R1 { 277 my ($i,$a,$b,$c,$d)=@_; 278 my @rot1=(5,9,14,20); 279 my $j=$i%16; 280 my $k=$i%$MOD; 281 my $xmm="%xmm".($j&1); 282 $code.=" movdqu 16($in0),%xmm3\n" if ($rc4 && $j==15); 283 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 284 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 285 $code.=<<___; 286#rc4# movl ($dat,$YY,4),$TY#d 287#md5# xor $b,$tmp 288#rc4# movl $TX[0]#d,($dat,$YY,4) 289#md5# and $d,$tmp 290#md5# add 4*`((1+5*$j)%16)`($inp),$a 291#rc4# add $TY#b,$TX[0]#b 292#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 293#md5# add \$$K[$i],$a 294#md5# xor $c,$tmp 295#rc4# movz $TX[0]#b,$TX[0]#d 296#rc4# movl $TY#d,4*$k($XX[1]) 297#md5# add $tmp,$a 298#rc4# add $TX[1]#b,$YY#b 299#md5# rol \$$rot1[$j%4],$a 300#md5# mov `$j==15?"$c":"$b"`,$tmp # forward reference 301#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 302#md5# add $b,$a 303___ 304 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); 305 mov $YY,$XX[1] 306 xor $YY,$YY # keyword to partial register 307 mov $XX[1]#b,$YY#b 308 lea ($dat,$XX[0],4),$XX[1] 309___ 310 $code.=<<___ if ($rc4 && $j==15); 311 psllq \$8,%xmm1 312 pxor %xmm0,%xmm3 313 pxor %xmm1,%xmm3 314___ 315} 316sub R2 { 317 my ($i,$a,$b,$c,$d)=@_; 318 my @rot2=(4,11,16,23); 319 my $j=$i%16; 320 my $k=$i%$MOD; 321 my $xmm="%xmm".($j&1); 322 $code.=" movdqu 32($in0),%xmm4\n" if ($rc4 && $j==15); 323 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 324 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 325 $code.=<<___; 326#rc4# movl ($dat,$YY,4),$TY#d 327#md5# xor $c,$tmp 328#rc4# movl $TX[0]#d,($dat,$YY,4) 329#md5# xor $b,$tmp 330#md5# add 4*`((5+3*$j)%16)`($inp),$a 331#rc4# add $TY#b,$TX[0]#b 332#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 333#md5# add \$$K[$i],$a 334#rc4# movz $TX[0]#b,$TX[0]#d 335#md5# add $tmp,$a 336#rc4# movl $TY#d,4*$k($XX[1]) 337#rc4# add $TX[1]#b,$YY#b 338#md5# rol \$$rot2[$j%4],$a 339#md5# mov `$j==15?"\\\$-1":"$c"`,$tmp # forward reference 340#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 341#md5# add $b,$a 342___ 343 $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1); 344 mov $YY,$XX[1] 345 xor $YY,$YY # keyword to partial register 346 mov $XX[1]#b,$YY#b 347 lea ($dat,$XX[0],4),$XX[1] 348___ 349 $code.=<<___ if ($rc4 && $j==15); 350 psllq \$8,%xmm1 351 pxor %xmm0,%xmm4 352 pxor %xmm1,%xmm4 353___ 354} 355sub R3 { 356 my ($i,$a,$b,$c,$d)=@_; 357 my @rot3=(6,10,15,21); 358 my $j=$i%16; 359 my $k=$i%$MOD; 360 my $xmm="%xmm".($j&1); 361 $code.=" movdqu 48($in0),%xmm5\n" if ($rc4 && $j==15); 362 $code.=" add \$$MOD,$XX[0]#b\n" if ($rc4 && $j==15 && $k==$MOD-1); 363 $code.=" pxor $xmm,$xmm\n" if ($rc4 && $j<=1); 364 $code.=<<___; 365#rc4# movl ($dat,$YY,4),$TY#d 366#md5# xor $d,$tmp 367#rc4# movl $TX[0]#d,($dat,$YY,4) 368#md5# or $b,$tmp 369#md5# add 4*`((7*$j)%16)`($inp),$a 370#rc4# add $TY#b,$TX[0]#b 371#rc4# movl `4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d 372#md5# add \$$K[$i],$a 373#rc4# movz $TX[0]#b,$TX[0]#d 374#md5# xor $c,$tmp 375#rc4# movl $TY#d,4*$k($XX[1]) 376#md5# add $tmp,$a 377#rc4# add $TX[1]#b,$YY#b 378#md5# rol \$$rot3[$j%4],$a 379#md5# mov \$-1,$tmp # forward reference 380#rc4# pinsrw \$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n 381#md5# add $b,$a 382___ 383 $code.=<<___ if ($rc4 && $j==15); 384 mov $XX[0],$XX[1] 385 xor $XX[0],$XX[0] # keyword to partial register 386 mov $XX[1]#b,$XX[0]#b 387 mov $YY,$XX[1] 388 xor $YY,$YY # keyword to partial register 389 mov $XX[1]#b,$YY#b 390 lea ($dat,$XX[0],4),$XX[1] 391 psllq \$8,%xmm1 392 pxor %xmm0,%xmm5 393 pxor %xmm1,%xmm5 394___ 395} 396 397my $i=0; 398for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 399for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 400for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 401for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); } 402 403$code.=<<___; 404#md5# add 0*4(%rsp),$V[0] # accumulate hash value 405#md5# add 1*4(%rsp),$V[1] 406#md5# add 2*4(%rsp),$V[2] 407#md5# add 3*4(%rsp),$V[3] 408 409#rc4# movdqu %xmm2,($out,$in0) # write RC4 output 410#rc4# movdqu %xmm3,16($out,$in0) 411#rc4# movdqu %xmm4,32($out,$in0) 412#rc4# movdqu %xmm5,48($out,$in0) 413#md5# lea 64($inp),$inp 414#rc4# lea 64($in0),$in0 415 cmp 16(%rsp),$inp # are we done? 416 jb .Loop 417 418#md5# mov 24(%rsp),$len # restore pointer to MD5_CTX 419#rc4# sub $TX[0]#b,$YY#b # correct $YY 420#md5# mov $V[0],0*4($len) # write MD5_CTX 421#md5# mov $V[1],1*4($len) 422#md5# mov $V[2],2*4($len) 423#md5# mov $V[3],3*4($len) 424___ 425$code.=<<___ if ($rc4 && (!$md5 || $D)); 426 mov 32(%rsp),$len # restore original $len 427 and \$63,$len # remaining bytes 428 jnz .Loop1 429 jmp .Ldone 430 431.align 16 432.Loop1: 433 add $TX[0]#b,$YY#b 434 movl ($dat,$YY,4),$TY#d 435 movl $TX[0]#d,($dat,$YY,4) 436 movl $TY#d,($dat,$XX[0],4) 437 add $TY#b,$TX[0]#b 438 inc $XX[0]#b 439 movl ($dat,$TX[0],4),$TY#d 440 movl ($dat,$XX[0],4),$TX[0]#d 441 xorb ($in0),$TY#b 442 movb $TY#b,($out,$in0) 443 lea 1($in0),$in0 444 dec $len 445 jnz .Loop1 446 447.Ldone: 448___ 449$code.=<<___; 450#rc4# sub \$1,$XX[0]#b 451#rc4# movl $XX[0]#d,-8($dat) 452#rc4# movl $YY#d,-4($dat) 453 454 mov 40(%rsp),%r15 455.cfi_restore %r15 456 mov 48(%rsp),%r14 457.cfi_restore %r14 458 mov 56(%rsp),%r13 459.cfi_restore %r13 460 mov 64(%rsp),%r12 461.cfi_restore %r12 462 mov 72(%rsp),%rbp 463.cfi_restore %rbp 464 mov 80(%rsp),%rbx 465.cfi_restore %rbx 466 lea 88(%rsp),%rsp 467.cfi_adjust_cfa_offset -88 468.Lepilogue: 469.Labort: 470 ret 471.cfi_endproc 472.size $func,.-$func 473___ 474 475if ($rc4 && $D) { # sole purpose of this section is to provide 476 # option to use the generated module as drop-in 477 # replacement for rc4-x86_64.pl for debugging 478 # and testing purposes... 479my ($idx,$ido)=("%r8","%r9"); 480my ($dat,$len,$inp)=("%rdi","%rsi","%rdx"); 481 482$code.=<<___; 483.globl RC4_set_key 484.type RC4_set_key,\@function,3 485.align 16 486RC4_set_key: 487.cfi_startproc 488 lea 8($dat),$dat 489 lea ($inp,$len),$inp 490 neg $len 491 mov $len,%rcx 492 xor %eax,%eax 493 xor $ido,$ido 494 xor %r10,%r10 495 xor %r11,%r11 496 jmp .Lw1stloop 497 498.align 16 499.Lw1stloop: 500 mov %eax,($dat,%rax,4) 501 add \$1,%al 502 jnc .Lw1stloop 503 504 xor $ido,$ido 505 xor $idx,$idx 506.align 16 507.Lw2ndloop: 508 mov ($dat,$ido,4),%r10d 509 add ($inp,$len,1),$idx#b 510 add %r10b,$idx#b 511 add \$1,$len 512 mov ($dat,$idx,4),%r11d 513 cmovz %rcx,$len 514 mov %r10d,($dat,$idx,4) 515 mov %r11d,($dat,$ido,4) 516 add \$1,$ido#b 517 jnc .Lw2ndloop 518 519 xor %eax,%eax 520 mov %eax,-8($dat) 521 mov %eax,-4($dat) 522 ret 523.cfi_endproc 524.size RC4_set_key,.-RC4_set_key 525 526.globl RC4_options 527.type RC4_options,\@abi-omnipotent 528.align 16 529RC4_options: 530 lea .Lopts(%rip),%rax 531 ret 532.align 64 533.Lopts: 534.asciz "rc4(64x,int)" 535.align 64 536.size RC4_options,.-RC4_options 537___ 538} 539# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 540# CONTEXT *context,DISPATCHER_CONTEXT *disp) 541if ($win64) { 542my $rec="%rcx"; 543my $frame="%rdx"; 544my $context="%r8"; 545my $disp="%r9"; 546 547$code.=<<___; 548.extern __imp_RtlVirtualUnwind 549.type se_handler,\@abi-omnipotent 550.align 16 551se_handler: 552 push %rsi 553 push %rdi 554 push %rbx 555 push %rbp 556 push %r12 557 push %r13 558 push %r14 559 push %r15 560 pushfq 561 sub \$64,%rsp 562 563 mov 120($context),%rax # pull context->Rax 564 mov 248($context),%rbx # pull context->Rip 565 566 lea .Lbody(%rip),%r10 567 cmp %r10,%rbx # context->Rip<.Lbody 568 jb .Lin_prologue 569 570 mov 152($context),%rax # pull context->Rsp 571 572 lea .Lepilogue(%rip),%r10 573 cmp %r10,%rbx # context->Rip>=.Lepilogue 574 jae .Lin_prologue 575 576 mov 40(%rax),%r15 577 mov 48(%rax),%r14 578 mov 56(%rax),%r13 579 mov 64(%rax),%r12 580 mov 72(%rax),%rbp 581 mov 80(%rax),%rbx 582 lea 88(%rax),%rax 583 584 mov %rbx,144($context) # restore context->Rbx 585 mov %rbp,160($context) # restore context->Rbp 586 mov %r12,216($context) # restore context->R12 587 mov %r13,224($context) # restore context->R12 588 mov %r14,232($context) # restore context->R14 589 mov %r15,240($context) # restore context->R15 590 591.Lin_prologue: 592 mov 8(%rax),%rdi 593 mov 16(%rax),%rsi 594 mov %rax,152($context) # restore context->Rsp 595 mov %rsi,168($context) # restore context->Rsi 596 mov %rdi,176($context) # restore context->Rdi 597 598 mov 40($disp),%rdi # disp->ContextRecord 599 mov $context,%rsi # context 600 mov \$154,%ecx # sizeof(CONTEXT) 601 .long 0xa548f3fc # cld; rep movsq 602 603 mov $disp,%rsi 604 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 605 mov 8(%rsi),%rdx # arg2, disp->ImageBase 606 mov 0(%rsi),%r8 # arg3, disp->ControlPc 607 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 608 mov 40(%rsi),%r10 # disp->ContextRecord 609 lea 56(%rsi),%r11 # &disp->HandlerData 610 lea 24(%rsi),%r12 # &disp->EstablisherFrame 611 mov %r10,32(%rsp) # arg5 612 mov %r11,40(%rsp) # arg6 613 mov %r12,48(%rsp) # arg7 614 mov %rcx,56(%rsp) # arg8, (NULL) 615 call *__imp_RtlVirtualUnwind(%rip) 616 617 mov \$1,%eax # ExceptionContinueSearch 618 add \$64,%rsp 619 popfq 620 pop %r15 621 pop %r14 622 pop %r13 623 pop %r12 624 pop %rbp 625 pop %rbx 626 pop %rdi 627 pop %rsi 628 ret 629.size se_handler,.-se_handler 630 631.section .pdata 632.align 4 633 .rva .LSEH_begin_$func 634 .rva .LSEH_end_$func 635 .rva .LSEH_info_$func 636 637.section .xdata 638.align 8 639.LSEH_info_$func: 640 .byte 9,0,0,0 641 .rva se_handler 642___ 643} 644 645sub reg_part { 646my ($reg,$conv)=@_; 647 if ($reg =~ /%r[0-9]+/) { $reg .= $conv; } 648 elsif ($conv eq "b") { $reg =~ s/%[er]([^x]+)x?/%$1l/; } 649 elsif ($conv eq "w") { $reg =~ s/%[er](.+)/%$1/; } 650 elsif ($conv eq "d") { $reg =~ s/%[er](.+)/%e$1/; } 651 return $reg; 652} 653 654$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem; 655$code =~ s/\`([^\`]*)\`/eval $1/gem; 656$code =~ s/pinsrw\s+\$0,/movd /gm; 657 658$code =~ s/#md5#//gm if ($md5); 659$code =~ s/#rc4#//gm if ($rc4); 660 661print $code; 662 663close STDOUT or die "error closing STDOUT: $!"; 664