1#! /usr/bin/env perl
2# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# June 2011
18#
19# This is RC4+MD5 "stitch" implementation. The idea, as spelled in
20# http://download.intel.com/design/intarch/papers/323686.pdf, is that
21# since both algorithms exhibit instruction-level parallelism, ILP,
22# below theoretical maximum, interleaving them would allow to utilize
23# processor resources better and achieve better performance. RC4
24# instruction sequence is virtually identical to rc4-x86_64.pl, which
25# is heavily based on submission by Maxim Perminov, Maxim Locktyukhin
26# and Jim Guilford of Intel. MD5 is fresh implementation aiming to
27# minimize register usage, which was used as "main thread" with RC4
28# weaved into it, one RC4 round per one MD5 round. In addition to the
29# stiched subroutine the script can generate standalone replacement
30# md5_block_asm_data_order and RC4. Below are performance numbers in
31# cycles per processed byte, less is better, for these the standalone
32# subroutines, sum of them, and stitched one:
33#
34#		RC4	MD5	RC4+MD5	stitch	gain
35# Opteron	6.5(*)	5.4	11.9	7.0	+70%(*)
36# Core2		6.5	5.8	12.3	7.7	+60%
37# Westmere	4.3	5.2	9.5	7.0	+36%
38# Sandy Bridge	4.2	5.5	9.7	6.8	+43%
39# Ivy Bridge	4.1	5.2	9.3	6.0	+54%
40# Haswell	4.0	5.0	9.0	5.7	+60%
41# Skylake	6.3(**)	5.0	11.3	5.3	+110%
42# Atom		9.3	6.5	15.8	11.1	+42%
43# VIA Nano	6.3	5.4	11.7	8.6	+37%
44# Bulldozer	4.5	5.4	9.9	7.7	+29%
45#
46# (*)	rc4-x86_64.pl delivers 5.3 on Opteron, so real improvement
47#	is +53%...
48# (**)	unidentified anomaly;
49
50my ($rc4,$md5)=(1,1);	# what to generate?
51my $D="#" if (!$md5);	# if set to "#", MD5 is stitched into RC4(),
52			# but its result is discarded. Idea here is
53			# to be able to use 'openssl speed rc4' for
54			# benchmarking the stitched subroutine...
55
56my $flavour = shift;
57my $output  = shift;
58if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
59
60my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
61
62$0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
63( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
64( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
65die "can't locate x86_64-xlate.pl";
66
67open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
68*STDOUT=*OUT;
69
70my ($dat,$in0,$out,$ctx,$inp,$len, $func,$nargs);
71
72if ($rc4 && !$md5) {
73  ($dat,$len,$in0,$out) = ("%rdi","%rsi","%rdx","%rcx");
74  $func="RC4";				$nargs=4;
75} elsif ($md5 && !$rc4) {
76  ($ctx,$inp,$len) = ("%rdi","%rsi","%rdx");
77  $func="md5_block_asm_data_order";	$nargs=3;
78} else {
79  ($dat,$in0,$out,$ctx,$inp,$len) = ("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
80  $func="rc4_md5_enc";			$nargs=6;
81  # void rc4_md5_enc(
82  #		RC4_KEY *key,		#
83  #		const void *in0,	# RC4 input
84  #		void *out,		# RC4 output
85  #		MD5_CTX *ctx,		#
86  #		const void *inp,	# MD5 input
87  #		size_t len);		# number of 64-byte blocks
88}
89
90my @K=(	0xd76aa478,0xe8c7b756,0x242070db,0xc1bdceee,
91	0xf57c0faf,0x4787c62a,0xa8304613,0xfd469501,
92	0x698098d8,0x8b44f7af,0xffff5bb1,0x895cd7be,
93	0x6b901122,0xfd987193,0xa679438e,0x49b40821,
94
95	0xf61e2562,0xc040b340,0x265e5a51,0xe9b6c7aa,
96	0xd62f105d,0x02441453,0xd8a1e681,0xe7d3fbc8,
97	0x21e1cde6,0xc33707d6,0xf4d50d87,0x455a14ed,
98	0xa9e3e905,0xfcefa3f8,0x676f02d9,0x8d2a4c8a,
99
100	0xfffa3942,0x8771f681,0x6d9d6122,0xfde5380c,
101	0xa4beea44,0x4bdecfa9,0xf6bb4b60,0xbebfbc70,
102	0x289b7ec6,0xeaa127fa,0xd4ef3085,0x04881d05,
103	0xd9d4d039,0xe6db99e5,0x1fa27cf8,0xc4ac5665,
104
105	0xf4292244,0x432aff97,0xab9423a7,0xfc93a039,
106	0x655b59c3,0x8f0ccc92,0xffeff47d,0x85845dd1,
107	0x6fa87e4f,0xfe2ce6e0,0xa3014314,0x4e0811a1,
108	0xf7537e82,0xbd3af235,0x2ad7d2bb,0xeb86d391	);
109
110my @V=("%r8d","%r9d","%r10d","%r11d");	# MD5 registers
111my $tmp="%r12d";
112
113my @XX=("%rbp","%rsi");			# RC4 registers
114my @TX=("%rax","%rbx");
115my $YY="%rcx";
116my $TY="%rdx";
117
118my $MOD=32;				# 16, 32 or 64
119
120$code.=<<___;
121.text
122.align 16
123
124.globl	$func
125.type	$func,\@function,$nargs
126$func:
127.cfi_startproc
128	cmp	\$0,$len
129	je	.Labort
130	push	%rbx
131.cfi_push	%rbx
132	push	%rbp
133.cfi_push	%rbp
134	push	%r12
135.cfi_push	%r12
136	push	%r13
137.cfi_push	%r13
138	push	%r14
139.cfi_push	%r14
140	push	%r15
141.cfi_push	%r15
142	sub	\$40,%rsp
143.cfi_adjust_cfa_offset	40
144.Lbody:
145___
146if ($rc4) {
147$code.=<<___;
148$D#md5#	mov	$ctx,%r11		# reassign arguments
149	mov	$len,%r12
150	mov	$in0,%r13
151	mov	$out,%r14
152$D#md5#	mov	$inp,%r15
153___
154    $ctx="%r11"	if ($md5);		# reassign arguments
155    $len="%r12";
156    $in0="%r13";
157    $out="%r14";
158    $inp="%r15"	if ($md5);
159    $inp=$in0	if (!$md5);
160$code.=<<___;
161	xor	$XX[0],$XX[0]
162	xor	$YY,$YY
163
164	lea	8($dat),$dat
165	mov	-8($dat),$XX[0]#b
166	mov	-4($dat),$YY#b
167
168	inc	$XX[0]#b
169	sub	$in0,$out
170	movl	($dat,$XX[0],4),$TX[0]#d
171___
172$code.=<<___ if (!$md5);
173	xor	$TX[1],$TX[1]
174	test	\$-128,$len
175	jz	.Loop1
176	sub	$XX[0],$TX[1]
177	and	\$`$MOD-1`,$TX[1]
178	jz	.Loop${MOD}_is_hot
179	sub	$TX[1],$len
180.Loop${MOD}_warmup:
181	add	$TX[0]#b,$YY#b
182	movl	($dat,$YY,4),$TY#d
183	movl	$TX[0]#d,($dat,$YY,4)
184	movl	$TY#d,($dat,$XX[0],4)
185	add	$TY#b,$TX[0]#b
186	inc	$XX[0]#b
187	movl	($dat,$TX[0],4),$TY#d
188	movl	($dat,$XX[0],4),$TX[0]#d
189	xorb	($in0),$TY#b
190	movb	$TY#b,($out,$in0)
191	lea	1($in0),$in0
192	dec	$TX[1]
193	jnz	.Loop${MOD}_warmup
194
195	mov	$YY,$TX[1]
196	xor	$YY,$YY
197	mov	$TX[1]#b,$YY#b
198
199.Loop${MOD}_is_hot:
200	mov	$len,32(%rsp)		# save original $len
201	shr	\$6,$len		# number of 64-byte blocks
202___
203  if ($D && !$md5) {			# stitch in dummy MD5
204    $md5=1;
205    $ctx="%r11";
206    $inp="%r15";
207    $code.=<<___;
208	mov	%rsp,$ctx
209	mov	$in0,$inp
210___
211  }
212}
213$code.=<<___;
214#rc4#	add	$TX[0]#b,$YY#b
215#rc4#	lea	($dat,$XX[0],4),$XX[1]
216	shl	\$6,$len
217	add	$inp,$len		# pointer to the end of input
218	mov	$len,16(%rsp)
219
220#md5#	mov	$ctx,24(%rsp)		# save pointer to MD5_CTX
221#md5#	mov	0*4($ctx),$V[0]		# load current hash value from MD5_CTX
222#md5#	mov	1*4($ctx),$V[1]
223#md5#	mov	2*4($ctx),$V[2]
224#md5#	mov	3*4($ctx),$V[3]
225	jmp	.Loop
226
227.align	16
228.Loop:
229#md5#	mov	$V[0],0*4(%rsp)		# put aside current hash value
230#md5#	mov	$V[1],1*4(%rsp)
231#md5#	mov	$V[2],2*4(%rsp)
232#md5#	mov	$V[3],$tmp		# forward reference
233#md5#	mov	$V[3],3*4(%rsp)
234___
235
236sub R0 {
237  my ($i,$a,$b,$c,$d)=@_;
238  my @rot0=(7,12,17,22);
239  my $j=$i%16;
240  my $k=$i%$MOD;
241  my $xmm="%xmm".($j&1);
242    $code.="	movdqu	($in0),%xmm2\n"		if ($rc4 && $j==15);
243    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
244    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
245    $code.=<<___;
246#rc4#	movl	($dat,$YY,4),$TY#d
247#md5#	xor	$c,$tmp
248#rc4#	movl	$TX[0]#d,($dat,$YY,4)
249#md5#	and	$b,$tmp
250#md5#	add	4*`$j`($inp),$a
251#rc4#	add	$TY#b,$TX[0]#b
252#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
253#md5#	add	\$$K[$i],$a
254#md5#	xor	$d,$tmp
255#rc4#	movz	$TX[0]#b,$TX[0]#d
256#rc4#	movl	$TY#d,4*$k($XX[1])
257#md5#	add	$tmp,$a
258#rc4#	add	$TX[1]#b,$YY#b
259#md5#	rol	\$$rot0[$j%4],$a
260#md5#	mov	`$j==15?"$b":"$c"`,$tmp		# forward reference
261#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
262#md5#	add	$b,$a
263___
264    $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
265	mov	$YY,$XX[1]
266	xor	$YY,$YY				# keyword to partial register
267	mov	$XX[1]#b,$YY#b
268	lea	($dat,$XX[0],4),$XX[1]
269___
270    $code.=<<___ if ($rc4 && $j==15);
271	psllq	\$8,%xmm1
272	pxor	%xmm0,%xmm2
273	pxor	%xmm1,%xmm2
274___
275}
276sub R1 {
277  my ($i,$a,$b,$c,$d)=@_;
278  my @rot1=(5,9,14,20);
279  my $j=$i%16;
280  my $k=$i%$MOD;
281  my $xmm="%xmm".($j&1);
282    $code.="	movdqu	16($in0),%xmm3\n"	if ($rc4 && $j==15);
283    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
284    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
285    $code.=<<___;
286#rc4#	movl	($dat,$YY,4),$TY#d
287#md5#	xor	$b,$tmp
288#rc4#	movl	$TX[0]#d,($dat,$YY,4)
289#md5#	and	$d,$tmp
290#md5#	add	4*`((1+5*$j)%16)`($inp),$a
291#rc4#	add	$TY#b,$TX[0]#b
292#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
293#md5#	add	\$$K[$i],$a
294#md5#	xor	$c,$tmp
295#rc4#	movz	$TX[0]#b,$TX[0]#d
296#rc4#	movl	$TY#d,4*$k($XX[1])
297#md5#	add	$tmp,$a
298#rc4#	add	$TX[1]#b,$YY#b
299#md5#	rol	\$$rot1[$j%4],$a
300#md5#	mov	`$j==15?"$c":"$b"`,$tmp		# forward reference
301#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
302#md5#	add	$b,$a
303___
304    $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
305	mov	$YY,$XX[1]
306	xor	$YY,$YY				# keyword to partial register
307	mov	$XX[1]#b,$YY#b
308	lea	($dat,$XX[0],4),$XX[1]
309___
310    $code.=<<___ if ($rc4 && $j==15);
311	psllq	\$8,%xmm1
312	pxor	%xmm0,%xmm3
313	pxor	%xmm1,%xmm3
314___
315}
316sub R2 {
317  my ($i,$a,$b,$c,$d)=@_;
318  my @rot2=(4,11,16,23);
319  my $j=$i%16;
320  my $k=$i%$MOD;
321  my $xmm="%xmm".($j&1);
322    $code.="	movdqu	32($in0),%xmm4\n"	if ($rc4 && $j==15);
323    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
324    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
325    $code.=<<___;
326#rc4#	movl	($dat,$YY,4),$TY#d
327#md5#	xor	$c,$tmp
328#rc4#	movl	$TX[0]#d,($dat,$YY,4)
329#md5#	xor	$b,$tmp
330#md5#	add	4*`((5+3*$j)%16)`($inp),$a
331#rc4#	add	$TY#b,$TX[0]#b
332#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
333#md5#	add	\$$K[$i],$a
334#rc4#	movz	$TX[0]#b,$TX[0]#d
335#md5#	add	$tmp,$a
336#rc4#	movl	$TY#d,4*$k($XX[1])
337#rc4#	add	$TX[1]#b,$YY#b
338#md5#	rol	\$$rot2[$j%4],$a
339#md5#	mov	`$j==15?"\\\$-1":"$c"`,$tmp	# forward reference
340#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
341#md5#	add	$b,$a
342___
343    $code.=<<___ if ($rc4 && $j==15 && $k==$MOD-1);
344	mov	$YY,$XX[1]
345	xor	$YY,$YY				# keyword to partial register
346	mov	$XX[1]#b,$YY#b
347	lea	($dat,$XX[0],4),$XX[1]
348___
349    $code.=<<___ if ($rc4 && $j==15);
350	psllq	\$8,%xmm1
351	pxor	%xmm0,%xmm4
352	pxor	%xmm1,%xmm4
353___
354}
355sub R3 {
356  my ($i,$a,$b,$c,$d)=@_;
357  my @rot3=(6,10,15,21);
358  my $j=$i%16;
359  my $k=$i%$MOD;
360  my $xmm="%xmm".($j&1);
361    $code.="	movdqu	48($in0),%xmm5\n"	if ($rc4 && $j==15);
362    $code.="	add	\$$MOD,$XX[0]#b\n"	if ($rc4 && $j==15 && $k==$MOD-1);
363    $code.="	pxor	$xmm,$xmm\n"		if ($rc4 && $j<=1);
364    $code.=<<___;
365#rc4#	movl	($dat,$YY,4),$TY#d
366#md5#	xor	$d,$tmp
367#rc4#	movl	$TX[0]#d,($dat,$YY,4)
368#md5#	or	$b,$tmp
369#md5#	add	4*`((7*$j)%16)`($inp),$a
370#rc4#	add	$TY#b,$TX[0]#b
371#rc4#	movl	`4*(($k+1)%$MOD)`(`$k==$MOD-1?"$dat,$XX[0],4":"$XX[1]"`),$TX[1]#d
372#md5#	add	\$$K[$i],$a
373#rc4#	movz	$TX[0]#b,$TX[0]#d
374#md5#	xor	$c,$tmp
375#rc4#	movl	$TY#d,4*$k($XX[1])
376#md5#	add	$tmp,$a
377#rc4#	add	$TX[1]#b,$YY#b
378#md5#	rol	\$$rot3[$j%4],$a
379#md5#	mov	\$-1,$tmp			# forward reference
380#rc4#	pinsrw	\$`($j>>1)&7`,($dat,$TX[0],4),$xmm\n
381#md5#	add	$b,$a
382___
383    $code.=<<___ if ($rc4 && $j==15);
384	mov	$XX[0],$XX[1]
385	xor	$XX[0],$XX[0]			# keyword to partial register
386	mov	$XX[1]#b,$XX[0]#b
387	mov	$YY,$XX[1]
388	xor	$YY,$YY				# keyword to partial register
389	mov	$XX[1]#b,$YY#b
390	lea	($dat,$XX[0],4),$XX[1]
391	psllq	\$8,%xmm1
392	pxor	%xmm0,%xmm5
393	pxor	%xmm1,%xmm5
394___
395}
396
397my $i=0;
398for(;$i<16;$i++) { R0($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
399for(;$i<32;$i++) { R1($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
400for(;$i<48;$i++) { R2($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
401for(;$i<64;$i++) { R3($i,@V); unshift(@V,pop(@V)); push(@TX,shift(@TX)); }
402
403$code.=<<___;
404#md5#	add	0*4(%rsp),$V[0]		# accumulate hash value
405#md5#	add	1*4(%rsp),$V[1]
406#md5#	add	2*4(%rsp),$V[2]
407#md5#	add	3*4(%rsp),$V[3]
408
409#rc4#	movdqu	%xmm2,($out,$in0)	# write RC4 output
410#rc4#	movdqu	%xmm3,16($out,$in0)
411#rc4#	movdqu	%xmm4,32($out,$in0)
412#rc4#	movdqu	%xmm5,48($out,$in0)
413#md5#	lea	64($inp),$inp
414#rc4#	lea	64($in0),$in0
415	cmp	16(%rsp),$inp		# are we done?
416	jb	.Loop
417
418#md5#	mov	24(%rsp),$len		# restore pointer to MD5_CTX
419#rc4#	sub	$TX[0]#b,$YY#b		# correct $YY
420#md5#	mov	$V[0],0*4($len)		# write MD5_CTX
421#md5#	mov	$V[1],1*4($len)
422#md5#	mov	$V[2],2*4($len)
423#md5#	mov	$V[3],3*4($len)
424___
425$code.=<<___ if ($rc4 && (!$md5 || $D));
426	mov	32(%rsp),$len		# restore original $len
427	and	\$63,$len		# remaining bytes
428	jnz	.Loop1
429	jmp	.Ldone
430
431.align	16
432.Loop1:
433	add	$TX[0]#b,$YY#b
434	movl	($dat,$YY,4),$TY#d
435	movl	$TX[0]#d,($dat,$YY,4)
436	movl	$TY#d,($dat,$XX[0],4)
437	add	$TY#b,$TX[0]#b
438	inc	$XX[0]#b
439	movl	($dat,$TX[0],4),$TY#d
440	movl	($dat,$XX[0],4),$TX[0]#d
441	xorb	($in0),$TY#b
442	movb	$TY#b,($out,$in0)
443	lea	1($in0),$in0
444	dec	$len
445	jnz	.Loop1
446
447.Ldone:
448___
449$code.=<<___;
450#rc4#	sub	\$1,$XX[0]#b
451#rc4#	movl	$XX[0]#d,-8($dat)
452#rc4#	movl	$YY#d,-4($dat)
453
454	mov	40(%rsp),%r15
455.cfi_restore	%r15
456	mov	48(%rsp),%r14
457.cfi_restore	%r14
458	mov	56(%rsp),%r13
459.cfi_restore	%r13
460	mov	64(%rsp),%r12
461.cfi_restore	%r12
462	mov	72(%rsp),%rbp
463.cfi_restore	%rbp
464	mov	80(%rsp),%rbx
465.cfi_restore	%rbx
466	lea	88(%rsp),%rsp
467.cfi_adjust_cfa_offset	-88
468.Lepilogue:
469.Labort:
470	ret
471.cfi_endproc
472.size $func,.-$func
473___
474
475if ($rc4 && $D) {	# sole purpose of this section is to provide
476			# option to use the generated module as drop-in
477			# replacement for rc4-x86_64.pl for debugging
478			# and testing purposes...
479my ($idx,$ido)=("%r8","%r9");
480my ($dat,$len,$inp)=("%rdi","%rsi","%rdx");
481
482$code.=<<___;
483.globl	RC4_set_key
484.type	RC4_set_key,\@function,3
485.align	16
486RC4_set_key:
487.cfi_startproc
488	lea	8($dat),$dat
489	lea	($inp,$len),$inp
490	neg	$len
491	mov	$len,%rcx
492	xor	%eax,%eax
493	xor	$ido,$ido
494	xor	%r10,%r10
495	xor	%r11,%r11
496	jmp	.Lw1stloop
497
498.align	16
499.Lw1stloop:
500	mov	%eax,($dat,%rax,4)
501	add	\$1,%al
502	jnc	.Lw1stloop
503
504	xor	$ido,$ido
505	xor	$idx,$idx
506.align	16
507.Lw2ndloop:
508	mov	($dat,$ido,4),%r10d
509	add	($inp,$len,1),$idx#b
510	add	%r10b,$idx#b
511	add	\$1,$len
512	mov	($dat,$idx,4),%r11d
513	cmovz	%rcx,$len
514	mov	%r10d,($dat,$idx,4)
515	mov	%r11d,($dat,$ido,4)
516	add	\$1,$ido#b
517	jnc	.Lw2ndloop
518
519	xor	%eax,%eax
520	mov	%eax,-8($dat)
521	mov	%eax,-4($dat)
522	ret
523.cfi_endproc
524.size	RC4_set_key,.-RC4_set_key
525
526.globl	RC4_options
527.type	RC4_options,\@abi-omnipotent
528.align	16
529RC4_options:
530	lea	.Lopts(%rip),%rax
531	ret
532.align	64
533.Lopts:
534.asciz	"rc4(64x,int)"
535.align	64
536.size	RC4_options,.-RC4_options
537___
538}
539# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
540#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
541if ($win64) {
542my $rec="%rcx";
543my $frame="%rdx";
544my $context="%r8";
545my $disp="%r9";
546
547$code.=<<___;
548.extern	__imp_RtlVirtualUnwind
549.type	se_handler,\@abi-omnipotent
550.align	16
551se_handler:
552	push	%rsi
553	push	%rdi
554	push	%rbx
555	push	%rbp
556	push	%r12
557	push	%r13
558	push	%r14
559	push	%r15
560	pushfq
561	sub	\$64,%rsp
562
563	mov	120($context),%rax	# pull context->Rax
564	mov	248($context),%rbx	# pull context->Rip
565
566	lea	.Lbody(%rip),%r10
567	cmp	%r10,%rbx		# context->Rip<.Lbody
568	jb	.Lin_prologue
569
570	mov	152($context),%rax	# pull context->Rsp
571
572	lea	.Lepilogue(%rip),%r10
573	cmp	%r10,%rbx		# context->Rip>=.Lepilogue
574	jae	.Lin_prologue
575
576	mov	40(%rax),%r15
577	mov	48(%rax),%r14
578	mov	56(%rax),%r13
579	mov	64(%rax),%r12
580	mov	72(%rax),%rbp
581	mov	80(%rax),%rbx
582	lea	88(%rax),%rax
583
584	mov	%rbx,144($context)	# restore context->Rbx
585	mov	%rbp,160($context)	# restore context->Rbp
586	mov	%r12,216($context)	# restore context->R12
587	mov	%r13,224($context)	# restore context->R12
588	mov	%r14,232($context)	# restore context->R14
589	mov	%r15,240($context)	# restore context->R15
590
591.Lin_prologue:
592	mov	8(%rax),%rdi
593	mov	16(%rax),%rsi
594	mov	%rax,152($context)	# restore context->Rsp
595	mov	%rsi,168($context)	# restore context->Rsi
596	mov	%rdi,176($context)	# restore context->Rdi
597
598	mov	40($disp),%rdi		# disp->ContextRecord
599	mov	$context,%rsi		# context
600	mov	\$154,%ecx		# sizeof(CONTEXT)
601	.long	0xa548f3fc		# cld; rep movsq
602
603	mov	$disp,%rsi
604	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
605	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
606	mov	0(%rsi),%r8		# arg3, disp->ControlPc
607	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
608	mov	40(%rsi),%r10		# disp->ContextRecord
609	lea	56(%rsi),%r11		# &disp->HandlerData
610	lea	24(%rsi),%r12		# &disp->EstablisherFrame
611	mov	%r10,32(%rsp)		# arg5
612	mov	%r11,40(%rsp)		# arg6
613	mov	%r12,48(%rsp)		# arg7
614	mov	%rcx,56(%rsp)		# arg8, (NULL)
615	call	*__imp_RtlVirtualUnwind(%rip)
616
617	mov	\$1,%eax		# ExceptionContinueSearch
618	add	\$64,%rsp
619	popfq
620	pop	%r15
621	pop	%r14
622	pop	%r13
623	pop	%r12
624	pop	%rbp
625	pop	%rbx
626	pop	%rdi
627	pop	%rsi
628	ret
629.size	se_handler,.-se_handler
630
631.section	.pdata
632.align	4
633	.rva	.LSEH_begin_$func
634	.rva	.LSEH_end_$func
635	.rva	.LSEH_info_$func
636
637.section	.xdata
638.align	8
639.LSEH_info_$func:
640	.byte	9,0,0,0
641	.rva	se_handler
642___
643}
644
645sub reg_part {
646my ($reg,$conv)=@_;
647    if ($reg =~ /%r[0-9]+/)     { $reg .= $conv; }
648    elsif ($conv eq "b")        { $reg =~ s/%[er]([^x]+)x?/%$1l/;       }
649    elsif ($conv eq "w")        { $reg =~ s/%[er](.+)/%$1/;             }
650    elsif ($conv eq "d")        { $reg =~ s/%[er](.+)/%e$1/;            }
651    return $reg;
652}
653
654$code =~ s/(%[a-z0-9]+)#([bwd])/reg_part($1,$2)/gem;
655$code =~ s/\`([^\`]*)\`/eval $1/gem;
656$code =~ s/pinsrw\s+\$0,/movd	/gm;
657
658$code =~ s/#md5#//gm	if ($md5);
659$code =~ s/#rc4#//gm	if ($rc4);
660
661print $code;
662
663close STDOUT or die "error closing STDOUT: $!";
664