11f13597dSJung-uk Kim#! /usr/bin/env perl
217f01e99SJung-uk Kim# Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved.
3e71b7053SJung-uk Kim#
4*b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License").  You may not use
5e71b7053SJung-uk Kim# this file except in compliance with the License.  You can obtain a copy
6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at
7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html
8e71b7053SJung-uk Kim
91f13597dSJung-uk Kim
101f13597dSJung-uk Kim# ====================================================================
111f13597dSJung-uk Kim# Copyright (c) 2008 Andy Polyakov <appro@openssl.org>
121f13597dSJung-uk Kim#
131f13597dSJung-uk Kim# This module may be used under the terms of either the GNU General
141f13597dSJung-uk Kim# Public License version 2 or later, the GNU Lesser General Public
151f13597dSJung-uk Kim# License version 2.1 or later, the Mozilla Public License version
161f13597dSJung-uk Kim# 1.1 or the BSD License. The exact terms of either license are
171f13597dSJung-uk Kim# distributed along with this module. For further details see
181f13597dSJung-uk Kim# http://www.openssl.org/~appro/camellia/.
191f13597dSJung-uk Kim# ====================================================================
201f13597dSJung-uk Kim
211f13597dSJung-uk Kim# Performance in cycles per processed byte (less is better) in
221f13597dSJung-uk Kim# 'openssl speed ...' benchmark:
231f13597dSJung-uk Kim#
241f13597dSJung-uk Kim#			AMD64	Core2	EM64T
251f13597dSJung-uk Kim# -evp camellia-128-ecb	16.7	21.0	22.7
261f13597dSJung-uk Kim# + over gcc 3.4.6	+25%	+5%	0%
271f13597dSJung-uk Kim#
281f13597dSJung-uk Kim# camellia-128-cbc	15.7	20.4	21.1
291f13597dSJung-uk Kim#
301f13597dSJung-uk Kim# 128-bit key setup	128	216	205	cycles/key
311f13597dSJung-uk Kim# + over gcc 3.4.6	+54%	+39%	+15%
321f13597dSJung-uk Kim#
331f13597dSJung-uk Kim# Numbers in "+" rows represent performance improvement over compiler
341f13597dSJung-uk Kim# generated code. Key setup timings are impressive on AMD and Core2
351f13597dSJung-uk Kim# thanks to 64-bit operations being covertly deployed. Improvement on
361f13597dSJung-uk Kim# EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it
371f13597dSJung-uk Kim# apparently emulates some of 64-bit operations in [32-bit] microcode.
381f13597dSJung-uk Kim
39*b077aed3SPierre Pronchery# $output is the last argument if it looks like a file (it has an extension)
40*b077aed3SPierre Pronchery# $flavour is the first argument if it doesn't look like a file
41*b077aed3SPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
42*b077aed3SPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
431f13597dSJung-uk Kim
441f13597dSJung-uk Kim$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
451f13597dSJung-uk Kim
461f13597dSJung-uk Kim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
471f13597dSJung-uk Kim( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
481f13597dSJung-uk Kim( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
491f13597dSJung-uk Kimdie "can't locate x86_64-xlate.pl";
501f13597dSJung-uk Kim
51*b077aed3SPierre Proncheryopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
52*b077aed3SPierre Pronchery    or die "can't call $xlate: $!";
5309286989SJung-uk Kim*STDOUT=*OUT;
541f13597dSJung-uk Kim
551f13597dSJung-uk Kimsub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/;    $r; }
561f13597dSJung-uk Kimsub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
571f13597dSJung-uk Kim                        $r =~ s/%[er]([sd]i)/%\1l/;
581f13597dSJung-uk Kim                        $r =~ s/%(r[0-9]+)[d]?/%\1b/;   $r; }
591f13597dSJung-uk Kim
601f13597dSJung-uk Kim$t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx";
611f13597dSJung-uk Kim@S=("%r8d","%r9d","%r10d","%r11d");
621f13597dSJung-uk Kim$i0="%esi";
631f13597dSJung-uk Kim$i1="%edi";
641f13597dSJung-uk Kim$Tbl="%rbp";	# size optimization
651f13597dSJung-uk Kim$inp="%r12";
661f13597dSJung-uk Kim$out="%r13";
671f13597dSJung-uk Kim$key="%r14";
681f13597dSJung-uk Kim$keyend="%r15";
691f13597dSJung-uk Kim$arg0d=$win64?"%ecx":"%edi";
701f13597dSJung-uk Kim
711f13597dSJung-uk Kim# const unsigned int Camellia_SBOX[4][256];
721f13597dSJung-uk Kim# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][],
731f13597dSJung-uk Kim# and [2][] - with [3][]. This is done to minimize code size.
741f13597dSJung-uk Kim$SBOX1_1110=0;		# Camellia_SBOX[0]
751f13597dSJung-uk Kim$SBOX4_4404=4;		# Camellia_SBOX[1]
761f13597dSJung-uk Kim$SBOX2_0222=2048;	# Camellia_SBOX[2]
771f13597dSJung-uk Kim$SBOX3_3033=2052;	# Camellia_SBOX[3]
781f13597dSJung-uk Kim
791f13597dSJung-uk Kimsub Camellia_Feistel {
801f13597dSJung-uk Kimmy $i=@_[0];
811f13597dSJung-uk Kimmy $seed=defined(@_[1])?@_[1]:0;
821f13597dSJung-uk Kimmy $scale=$seed<0?-8:8;
831f13597dSJung-uk Kimmy $j=($i&1)*2;
847bded2dbSJung-uk Kimmy ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]);
851f13597dSJung-uk Kim
861f13597dSJung-uk Kim$code.=<<___;
871f13597dSJung-uk Kim	xor	$s0,$t0				# t0^=key[0]
881f13597dSJung-uk Kim	xor	$s1,$t1				# t1^=key[1]
891f13597dSJung-uk Kim	movz	`&hi("$t0")`,$i0		# (t0>>8)&0xff
901f13597dSJung-uk Kim	movz	`&lo("$t1")`,$i1		# (t1>>0)&0xff
911f13597dSJung-uk Kim	mov	$SBOX3_3033($Tbl,$i0,8),$t3	# t3=SBOX3_3033[0]
921f13597dSJung-uk Kim	mov	$SBOX1_1110($Tbl,$i1,8),$t2	# t2=SBOX1_1110[1]
931f13597dSJung-uk Kim	movz	`&lo("$t0")`,$i0		# (t0>>0)&0xff
941f13597dSJung-uk Kim	shr	\$16,$t0
951f13597dSJung-uk Kim	movz	`&hi("$t1")`,$i1		# (t1>>8)&0xff
961f13597dSJung-uk Kim	xor	$SBOX4_4404($Tbl,$i0,8),$t3	# t3^=SBOX4_4404[0]
971f13597dSJung-uk Kim	shr	\$16,$t1
981f13597dSJung-uk Kim	xor	$SBOX4_4404($Tbl,$i1,8),$t2	# t2^=SBOX4_4404[1]
991f13597dSJung-uk Kim	movz	`&hi("$t0")`,$i0		# (t0>>24)&0xff
1001f13597dSJung-uk Kim	movz	`&lo("$t1")`,$i1		# (t1>>16)&0xff
1011f13597dSJung-uk Kim	xor	$SBOX1_1110($Tbl,$i0,8),$t3	# t3^=SBOX1_1110[0]
1021f13597dSJung-uk Kim	xor	$SBOX3_3033($Tbl,$i1,8),$t2	# t2^=SBOX3_3033[1]
1031f13597dSJung-uk Kim	movz	`&lo("$t0")`,$i0		# (t0>>16)&0xff
1041f13597dSJung-uk Kim	movz	`&hi("$t1")`,$i1		# (t1>>24)&0xff
1051f13597dSJung-uk Kim	xor	$SBOX2_0222($Tbl,$i0,8),$t3	# t3^=SBOX2_0222[0]
1061f13597dSJung-uk Kim	xor	$SBOX2_0222($Tbl,$i1,8),$t2	# t2^=SBOX2_0222[1]
1071f13597dSJung-uk Kim	mov	`$seed+($i+1)*$scale`($key),$t1	# prefetch key[i+1]
1081f13597dSJung-uk Kim	mov	`$seed+($i+1)*$scale+4`($key),$t0
1091f13597dSJung-uk Kim	xor	$t3,$t2				# t2^=t3
1101f13597dSJung-uk Kim	ror	\$8,$t3				# t3=RightRotate(t3,8)
1111f13597dSJung-uk Kim	xor	$t2,$s2
1121f13597dSJung-uk Kim	xor	$t2,$s3
1131f13597dSJung-uk Kim	xor	$t3,$s3
1141f13597dSJung-uk Kim___
1151f13597dSJung-uk Kim}
1161f13597dSJung-uk Kim
1171f13597dSJung-uk Kim# void Camellia_EncryptBlock_Rounds(
1181f13597dSJung-uk Kim#		int grandRounds,
1191f13597dSJung-uk Kim#		const Byte plaintext[],
1201f13597dSJung-uk Kim#		const KEY_TABLE_TYPE keyTable,
1211f13597dSJung-uk Kim#		Byte ciphertext[])
1221f13597dSJung-uk Kim$code=<<___;
1231f13597dSJung-uk Kim.text
1241f13597dSJung-uk Kim
1251f13597dSJung-uk Kim# V1.x API
1261f13597dSJung-uk Kim.globl	Camellia_EncryptBlock
1271f13597dSJung-uk Kim.type	Camellia_EncryptBlock,\@abi-omnipotent
1281f13597dSJung-uk Kim.align	16
1291f13597dSJung-uk KimCamellia_EncryptBlock:
13017f01e99SJung-uk Kim.cfi_startproc
1311f13597dSJung-uk Kim	movl	\$128,%eax
1321f13597dSJung-uk Kim	subl	$arg0d,%eax
1331f13597dSJung-uk Kim	movl	\$3,$arg0d
1341f13597dSJung-uk Kim	adcl	\$0,$arg0d	# keyBitLength==128?3:4
1351f13597dSJung-uk Kim	jmp	.Lenc_rounds
13617f01e99SJung-uk Kim.cfi_endproc
1371f13597dSJung-uk Kim.size	Camellia_EncryptBlock,.-Camellia_EncryptBlock
1381f13597dSJung-uk Kim# V2
1391f13597dSJung-uk Kim.globl	Camellia_EncryptBlock_Rounds
1401f13597dSJung-uk Kim.type	Camellia_EncryptBlock_Rounds,\@function,4
1411f13597dSJung-uk Kim.align	16
1421f13597dSJung-uk Kim.Lenc_rounds:
1431f13597dSJung-uk KimCamellia_EncryptBlock_Rounds:
144e71b7053SJung-uk Kim.cfi_startproc
1451f13597dSJung-uk Kim	push	%rbx
146e71b7053SJung-uk Kim.cfi_push	%rbx
1471f13597dSJung-uk Kim	push	%rbp
148e71b7053SJung-uk Kim.cfi_push	%rbp
1491f13597dSJung-uk Kim	push	%r13
150e71b7053SJung-uk Kim.cfi_push	%r13
1511f13597dSJung-uk Kim	push	%r14
152e71b7053SJung-uk Kim.cfi_push	%r14
1531f13597dSJung-uk Kim	push	%r15
154e71b7053SJung-uk Kim.cfi_push	%r15
1551f13597dSJung-uk Kim.Lenc_prologue:
1561f13597dSJung-uk Kim
1571f13597dSJung-uk Kim	#mov	%rsi,$inp		# put away arguments
1581f13597dSJung-uk Kim	mov	%rcx,$out
1591f13597dSJung-uk Kim	mov	%rdx,$key
1601f13597dSJung-uk Kim
1611f13597dSJung-uk Kim	shl	\$6,%edi		# process grandRounds
1621f13597dSJung-uk Kim	lea	.LCamellia_SBOX(%rip),$Tbl
1631f13597dSJung-uk Kim	lea	($key,%rdi),$keyend
1641f13597dSJung-uk Kim
1651f13597dSJung-uk Kim	mov	0(%rsi),@S[0]		# load plaintext
1661f13597dSJung-uk Kim	mov	4(%rsi),@S[1]
1671f13597dSJung-uk Kim	mov	8(%rsi),@S[2]
1681f13597dSJung-uk Kim	bswap	@S[0]
1691f13597dSJung-uk Kim	mov	12(%rsi),@S[3]
1701f13597dSJung-uk Kim	bswap	@S[1]
1711f13597dSJung-uk Kim	bswap	@S[2]
1721f13597dSJung-uk Kim	bswap	@S[3]
1731f13597dSJung-uk Kim
1741f13597dSJung-uk Kim	call	_x86_64_Camellia_encrypt
1751f13597dSJung-uk Kim
1761f13597dSJung-uk Kim	bswap	@S[0]
1771f13597dSJung-uk Kim	bswap	@S[1]
1781f13597dSJung-uk Kim	bswap	@S[2]
1791f13597dSJung-uk Kim	mov	@S[0],0($out)
1801f13597dSJung-uk Kim	bswap	@S[3]
1811f13597dSJung-uk Kim	mov	@S[1],4($out)
1821f13597dSJung-uk Kim	mov	@S[2],8($out)
1831f13597dSJung-uk Kim	mov	@S[3],12($out)
1841f13597dSJung-uk Kim
1851f13597dSJung-uk Kim	mov	0(%rsp),%r15
186e71b7053SJung-uk Kim.cfi_restore	%r15
1871f13597dSJung-uk Kim	mov	8(%rsp),%r14
188e71b7053SJung-uk Kim.cfi_restore	%r14
1891f13597dSJung-uk Kim	mov	16(%rsp),%r13
190e71b7053SJung-uk Kim.cfi_restore	%r13
1911f13597dSJung-uk Kim	mov	24(%rsp),%rbp
192e71b7053SJung-uk Kim.cfi_restore	%rbp
1931f13597dSJung-uk Kim	mov	32(%rsp),%rbx
194e71b7053SJung-uk Kim.cfi_restore	%rbx
1951f13597dSJung-uk Kim	lea	40(%rsp),%rsp
196e71b7053SJung-uk Kim.cfi_adjust_cfa_offset	-40
1971f13597dSJung-uk Kim.Lenc_epilogue:
1981f13597dSJung-uk Kim	ret
199e71b7053SJung-uk Kim.cfi_endproc
2001f13597dSJung-uk Kim.size	Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
2011f13597dSJung-uk Kim
2021f13597dSJung-uk Kim.type	_x86_64_Camellia_encrypt,\@abi-omnipotent
2031f13597dSJung-uk Kim.align	16
2041f13597dSJung-uk Kim_x86_64_Camellia_encrypt:
20517f01e99SJung-uk Kim.cfi_startproc
2061f13597dSJung-uk Kim	xor	0($key),@S[1]
2071f13597dSJung-uk Kim	xor	4($key),@S[0]		# ^=key[0-3]
2081f13597dSJung-uk Kim	xor	8($key),@S[3]
2091f13597dSJung-uk Kim	xor	12($key),@S[2]
2101f13597dSJung-uk Kim.align	16
2111f13597dSJung-uk Kim.Leloop:
2121f13597dSJung-uk Kim	mov	16($key),$t1		# prefetch key[4-5]
2131f13597dSJung-uk Kim	mov	20($key),$t0
2141f13597dSJung-uk Kim
2151f13597dSJung-uk Kim___
2161f13597dSJung-uk Kim	for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); }
2171f13597dSJung-uk Kim$code.=<<___;
2181f13597dSJung-uk Kim	lea	16*4($key),$key
2191f13597dSJung-uk Kim	cmp	$keyend,$key
2201f13597dSJung-uk Kim	mov	8($key),$t3		# prefetch key[2-3]
2211f13597dSJung-uk Kim	mov	12($key),$t2
2221f13597dSJung-uk Kim	je	.Ledone
2231f13597dSJung-uk Kim
2241f13597dSJung-uk Kim	and	@S[0],$t0
2251f13597dSJung-uk Kim	or	@S[3],$t3
2261f13597dSJung-uk Kim	rol	\$1,$t0
2271f13597dSJung-uk Kim	xor	$t3,@S[2]		# s2^=s3|key[3];
2281f13597dSJung-uk Kim	xor	$t0,@S[1]		# s1^=LeftRotate(s0&key[0],1);
2291f13597dSJung-uk Kim	and	@S[2],$t2
2301f13597dSJung-uk Kim	or	@S[1],$t1
2311f13597dSJung-uk Kim	rol	\$1,$t2
2321f13597dSJung-uk Kim	xor	$t1,@S[0]		# s0^=s1|key[1];
2331f13597dSJung-uk Kim	xor	$t2,@S[3]		# s3^=LeftRotate(s2&key[2],1);
2341f13597dSJung-uk Kim	jmp	.Leloop
2351f13597dSJung-uk Kim
2361f13597dSJung-uk Kim.align	16
2371f13597dSJung-uk Kim.Ledone:
2381f13597dSJung-uk Kim	xor	@S[2],$t0		# SwapHalf
2391f13597dSJung-uk Kim	xor	@S[3],$t1
2401f13597dSJung-uk Kim	xor	@S[0],$t2
2411f13597dSJung-uk Kim	xor	@S[1],$t3
2421f13597dSJung-uk Kim
2431f13597dSJung-uk Kim	mov	$t0,@S[0]
2441f13597dSJung-uk Kim	mov	$t1,@S[1]
2451f13597dSJung-uk Kim	mov	$t2,@S[2]
2461f13597dSJung-uk Kim	mov	$t3,@S[3]
2471f13597dSJung-uk Kim
2481f13597dSJung-uk Kim	.byte	0xf3,0xc3		# rep ret
24917f01e99SJung-uk Kim.cfi_endproc
2501f13597dSJung-uk Kim.size	_x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt
2511f13597dSJung-uk Kim
2521f13597dSJung-uk Kim# V1.x API
2531f13597dSJung-uk Kim.globl	Camellia_DecryptBlock
2541f13597dSJung-uk Kim.type	Camellia_DecryptBlock,\@abi-omnipotent
2551f13597dSJung-uk Kim.align	16
2561f13597dSJung-uk KimCamellia_DecryptBlock:
25717f01e99SJung-uk Kim.cfi_startproc
2581f13597dSJung-uk Kim	movl	\$128,%eax
2591f13597dSJung-uk Kim	subl	$arg0d,%eax
2601f13597dSJung-uk Kim	movl	\$3,$arg0d
2611f13597dSJung-uk Kim	adcl	\$0,$arg0d	# keyBitLength==128?3:4
2621f13597dSJung-uk Kim	jmp	.Ldec_rounds
26317f01e99SJung-uk Kim.cfi_endproc
2641f13597dSJung-uk Kim.size	Camellia_DecryptBlock,.-Camellia_DecryptBlock
2651f13597dSJung-uk Kim# V2
2661f13597dSJung-uk Kim.globl	Camellia_DecryptBlock_Rounds
2671f13597dSJung-uk Kim.type	Camellia_DecryptBlock_Rounds,\@function,4
2681f13597dSJung-uk Kim.align	16
2691f13597dSJung-uk Kim.Ldec_rounds:
2701f13597dSJung-uk KimCamellia_DecryptBlock_Rounds:
271e71b7053SJung-uk Kim.cfi_startproc
2721f13597dSJung-uk Kim	push	%rbx
273e71b7053SJung-uk Kim.cfi_push	%rbx
2741f13597dSJung-uk Kim	push	%rbp
275e71b7053SJung-uk Kim.cfi_push	%rbp
2761f13597dSJung-uk Kim	push	%r13
277e71b7053SJung-uk Kim.cfi_push	%r13
2781f13597dSJung-uk Kim	push	%r14
279e71b7053SJung-uk Kim.cfi_push	%r14
2801f13597dSJung-uk Kim	push	%r15
281e71b7053SJung-uk Kim.cfi_push	%r15
2821f13597dSJung-uk Kim.Ldec_prologue:
2831f13597dSJung-uk Kim
2841f13597dSJung-uk Kim	#mov	%rsi,$inp		# put away arguments
2851f13597dSJung-uk Kim	mov	%rcx,$out
2861f13597dSJung-uk Kim	mov	%rdx,$keyend
2871f13597dSJung-uk Kim
2881f13597dSJung-uk Kim	shl	\$6,%edi		# process grandRounds
2891f13597dSJung-uk Kim	lea	.LCamellia_SBOX(%rip),$Tbl
2901f13597dSJung-uk Kim	lea	($keyend,%rdi),$key
2911f13597dSJung-uk Kim
2921f13597dSJung-uk Kim	mov	0(%rsi),@S[0]		# load plaintext
2931f13597dSJung-uk Kim	mov	4(%rsi),@S[1]
2941f13597dSJung-uk Kim	mov	8(%rsi),@S[2]
2951f13597dSJung-uk Kim	bswap	@S[0]
2961f13597dSJung-uk Kim	mov	12(%rsi),@S[3]
2971f13597dSJung-uk Kim	bswap	@S[1]
2981f13597dSJung-uk Kim	bswap	@S[2]
2991f13597dSJung-uk Kim	bswap	@S[3]
3001f13597dSJung-uk Kim
3011f13597dSJung-uk Kim	call	_x86_64_Camellia_decrypt
3021f13597dSJung-uk Kim
3031f13597dSJung-uk Kim	bswap	@S[0]
3041f13597dSJung-uk Kim	bswap	@S[1]
3051f13597dSJung-uk Kim	bswap	@S[2]
3061f13597dSJung-uk Kim	mov	@S[0],0($out)
3071f13597dSJung-uk Kim	bswap	@S[3]
3081f13597dSJung-uk Kim	mov	@S[1],4($out)
3091f13597dSJung-uk Kim	mov	@S[2],8($out)
3101f13597dSJung-uk Kim	mov	@S[3],12($out)
3111f13597dSJung-uk Kim
3121f13597dSJung-uk Kim	mov	0(%rsp),%r15
313e71b7053SJung-uk Kim.cfi_restore	%r15
3141f13597dSJung-uk Kim	mov	8(%rsp),%r14
315e71b7053SJung-uk Kim.cfi_restore	%r14
3161f13597dSJung-uk Kim	mov	16(%rsp),%r13
317e71b7053SJung-uk Kim.cfi_restore	%r13
3181f13597dSJung-uk Kim	mov	24(%rsp),%rbp
319e71b7053SJung-uk Kim.cfi_restore	%rbp
3201f13597dSJung-uk Kim	mov	32(%rsp),%rbx
321e71b7053SJung-uk Kim.cfi_restore	%rbx
3221f13597dSJung-uk Kim	lea	40(%rsp),%rsp
323e71b7053SJung-uk Kim.cfi_adjust_cfa_offset	-40
3241f13597dSJung-uk Kim.Ldec_epilogue:
3251f13597dSJung-uk Kim	ret
326e71b7053SJung-uk Kim.cfi_endproc
3271f13597dSJung-uk Kim.size	Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
3281f13597dSJung-uk Kim
3291f13597dSJung-uk Kim.type	_x86_64_Camellia_decrypt,\@abi-omnipotent
3301f13597dSJung-uk Kim.align	16
3311f13597dSJung-uk Kim_x86_64_Camellia_decrypt:
33217f01e99SJung-uk Kim.cfi_startproc
3331f13597dSJung-uk Kim	xor	0($key),@S[1]
3341f13597dSJung-uk Kim	xor	4($key),@S[0]		# ^=key[0-3]
3351f13597dSJung-uk Kim	xor	8($key),@S[3]
3361f13597dSJung-uk Kim	xor	12($key),@S[2]
3371f13597dSJung-uk Kim.align	16
3381f13597dSJung-uk Kim.Ldloop:
3391f13597dSJung-uk Kim	mov	-8($key),$t1		# prefetch key[4-5]
3401f13597dSJung-uk Kim	mov	-4($key),$t0
3411f13597dSJung-uk Kim
3421f13597dSJung-uk Kim___
3431f13597dSJung-uk Kim	for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); }
3441f13597dSJung-uk Kim$code.=<<___;
3451f13597dSJung-uk Kim	lea	-16*4($key),$key
3461f13597dSJung-uk Kim	cmp	$keyend,$key
3471f13597dSJung-uk Kim	mov	0($key),$t3		# prefetch key[2-3]
3481f13597dSJung-uk Kim	mov	4($key),$t2
3491f13597dSJung-uk Kim	je	.Lddone
3501f13597dSJung-uk Kim
3511f13597dSJung-uk Kim	and	@S[0],$t0
3521f13597dSJung-uk Kim	or	@S[3],$t3
3531f13597dSJung-uk Kim	rol	\$1,$t0
3541f13597dSJung-uk Kim	xor	$t3,@S[2]		# s2^=s3|key[3];
3551f13597dSJung-uk Kim	xor	$t0,@S[1]		# s1^=LeftRotate(s0&key[0],1);
3561f13597dSJung-uk Kim	and	@S[2],$t2
3571f13597dSJung-uk Kim	or	@S[1],$t1
3581f13597dSJung-uk Kim	rol	\$1,$t2
3591f13597dSJung-uk Kim	xor	$t1,@S[0]		# s0^=s1|key[1];
3601f13597dSJung-uk Kim	xor	$t2,@S[3]		# s3^=LeftRotate(s2&key[2],1);
3611f13597dSJung-uk Kim
3621f13597dSJung-uk Kim	jmp	.Ldloop
3631f13597dSJung-uk Kim
3641f13597dSJung-uk Kim.align	16
3651f13597dSJung-uk Kim.Lddone:
3661f13597dSJung-uk Kim	xor	@S[2],$t2
3671f13597dSJung-uk Kim	xor	@S[3],$t3
3681f13597dSJung-uk Kim	xor	@S[0],$t0
3691f13597dSJung-uk Kim	xor	@S[1],$t1
3701f13597dSJung-uk Kim
3711f13597dSJung-uk Kim	mov	$t2,@S[0]		# SwapHalf
3721f13597dSJung-uk Kim	mov	$t3,@S[1]
3731f13597dSJung-uk Kim	mov	$t0,@S[2]
3741f13597dSJung-uk Kim	mov	$t1,@S[3]
3751f13597dSJung-uk Kim
3761f13597dSJung-uk Kim	.byte	0xf3,0xc3		# rep ret
37717f01e99SJung-uk Kim.cfi_endproc
3781f13597dSJung-uk Kim.size	_x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt
3791f13597dSJung-uk Kim___
3801f13597dSJung-uk Kim
3811f13597dSJung-uk Kimsub _saveround {
3821f13597dSJung-uk Kimmy ($rnd,$key,@T)=@_;
3831f13597dSJung-uk Kimmy $bias=int(@T[0])?shift(@T):0;
3841f13597dSJung-uk Kim
3851f13597dSJung-uk Kim    if ($#T==3) {
3861f13597dSJung-uk Kim	$code.=<<___;
3871f13597dSJung-uk Kim	mov	@T[1],`$bias+$rnd*8+0`($key)
3881f13597dSJung-uk Kim	mov	@T[0],`$bias+$rnd*8+4`($key)
3891f13597dSJung-uk Kim	mov	@T[3],`$bias+$rnd*8+8`($key)
3901f13597dSJung-uk Kim	mov	@T[2],`$bias+$rnd*8+12`($key)
3911f13597dSJung-uk Kim___
3921f13597dSJung-uk Kim    } else {
3931f13597dSJung-uk Kim	$code.="	mov	@T[0],`$bias+$rnd*8+0`($key)\n";
3941f13597dSJung-uk Kim	$code.="	mov	@T[1],`$bias+$rnd*8+8`($key)\n"	if ($#T>=1);
3951f13597dSJung-uk Kim    }
3961f13597dSJung-uk Kim}
3971f13597dSJung-uk Kim
3981f13597dSJung-uk Kimsub _loadround {
3991f13597dSJung-uk Kimmy ($rnd,$key,@T)=@_;
4001f13597dSJung-uk Kimmy $bias=int(@T[0])?shift(@T):0;
4011f13597dSJung-uk Kim
4021f13597dSJung-uk Kim$code.="	mov	`$bias+$rnd*8+0`($key),@T[0]\n";
4031f13597dSJung-uk Kim$code.="	mov	`$bias+$rnd*8+8`($key),@T[1]\n"	if ($#T>=1);
4041f13597dSJung-uk Kim}
4051f13597dSJung-uk Kim
4061f13597dSJung-uk Kim# shld is very slow on Intel EM64T family. Even on AMD it limits
4071f13597dSJung-uk Kim# instruction decode rate [because it's VectorPath] and consequently
4081f13597dSJung-uk Kim# performance...
4091f13597dSJung-uk Kimsub __rotl128 {
4101f13597dSJung-uk Kimmy ($i0,$i1,$rot)=@_;
4111f13597dSJung-uk Kim
4121f13597dSJung-uk Kim    if ($rot) {
4131f13597dSJung-uk Kim	$code.=<<___;
4141f13597dSJung-uk Kim	mov	$i0,%r11
4151f13597dSJung-uk Kim	shld	\$$rot,$i1,$i0
4161f13597dSJung-uk Kim	shld	\$$rot,%r11,$i1
4171f13597dSJung-uk Kim___
4181f13597dSJung-uk Kim    }
4191f13597dSJung-uk Kim}
4201f13597dSJung-uk Kim
4211f13597dSJung-uk Kim# ... Implementing 128-bit rotate without shld gives 80% better
4221f13597dSJung-uk Kim# performance EM64T, +15% on AMD64 and only ~7% degradation on
4231f13597dSJung-uk Kim# Core2. This is therefore preferred.
4241f13597dSJung-uk Kimsub _rotl128 {
4251f13597dSJung-uk Kimmy ($i0,$i1,$rot)=@_;
4261f13597dSJung-uk Kim
4271f13597dSJung-uk Kim    if ($rot) {
4281f13597dSJung-uk Kim	$code.=<<___;
4291f13597dSJung-uk Kim	mov	$i0,%r11
4301f13597dSJung-uk Kim	shl	\$$rot,$i0
4311f13597dSJung-uk Kim	mov	$i1,%r9
4321f13597dSJung-uk Kim	shr	\$`64-$rot`,%r9
4331f13597dSJung-uk Kim	shr	\$`64-$rot`,%r11
4341f13597dSJung-uk Kim	or	%r9,$i0
4351f13597dSJung-uk Kim	shl	\$$rot,$i1
4361f13597dSJung-uk Kim	or	%r11,$i1
4371f13597dSJung-uk Kim___
4381f13597dSJung-uk Kim    }
4391f13597dSJung-uk Kim}
4401f13597dSJung-uk Kim
4411f13597dSJung-uk Kim{ my $step=0;
4421f13597dSJung-uk Kim
4431f13597dSJung-uk Kim$code.=<<___;
4441f13597dSJung-uk Kim.globl	Camellia_Ekeygen
4451f13597dSJung-uk Kim.type	Camellia_Ekeygen,\@function,3
4461f13597dSJung-uk Kim.align	16
4471f13597dSJung-uk KimCamellia_Ekeygen:
448e71b7053SJung-uk Kim.cfi_startproc
4491f13597dSJung-uk Kim	push	%rbx
450e71b7053SJung-uk Kim.cfi_push	%rbx
4511f13597dSJung-uk Kim	push	%rbp
452e71b7053SJung-uk Kim.cfi_push	%rbp
4531f13597dSJung-uk Kim	push	%r13
454e71b7053SJung-uk Kim.cfi_push	%r13
4551f13597dSJung-uk Kim	push	%r14
456e71b7053SJung-uk Kim.cfi_push	%r14
4571f13597dSJung-uk Kim	push	%r15
458e71b7053SJung-uk Kim.cfi_push	%r15
4591f13597dSJung-uk Kim.Lkey_prologue:
4601f13597dSJung-uk Kim
4617bded2dbSJung-uk Kim	mov	%edi,${keyend}d		# put away arguments, keyBitLength
4621f13597dSJung-uk Kim	mov	%rdx,$out		# keyTable
4631f13597dSJung-uk Kim
4641f13597dSJung-uk Kim	mov	0(%rsi),@S[0]		# load 0-127 bits
4651f13597dSJung-uk Kim	mov	4(%rsi),@S[1]
4661f13597dSJung-uk Kim	mov	8(%rsi),@S[2]
4671f13597dSJung-uk Kim	mov	12(%rsi),@S[3]
4681f13597dSJung-uk Kim
4691f13597dSJung-uk Kim	bswap	@S[0]
4701f13597dSJung-uk Kim	bswap	@S[1]
4711f13597dSJung-uk Kim	bswap	@S[2]
4721f13597dSJung-uk Kim	bswap	@S[3]
4731f13597dSJung-uk Kim___
4741f13597dSJung-uk Kim	&_saveround	(0,$out,@S);	# KL<<<0
4751f13597dSJung-uk Kim$code.=<<___;
4761f13597dSJung-uk Kim	cmp	\$128,$keyend		# check keyBitLength
4771f13597dSJung-uk Kim	je	.L1st128
4781f13597dSJung-uk Kim
4791f13597dSJung-uk Kim	mov	16(%rsi),@S[0]		# load 128-191 bits
4801f13597dSJung-uk Kim	mov	20(%rsi),@S[1]
4811f13597dSJung-uk Kim	cmp	\$192,$keyend
4821f13597dSJung-uk Kim	je	.L1st192
4831f13597dSJung-uk Kim	mov	24(%rsi),@S[2]		# load 192-255 bits
4841f13597dSJung-uk Kim	mov	28(%rsi),@S[3]
4851f13597dSJung-uk Kim	jmp	.L1st256
4861f13597dSJung-uk Kim.L1st192:
4871f13597dSJung-uk Kim	mov	@S[0],@S[2]
4881f13597dSJung-uk Kim	mov	@S[1],@S[3]
4891f13597dSJung-uk Kim	not	@S[2]
4901f13597dSJung-uk Kim	not	@S[3]
4911f13597dSJung-uk Kim.L1st256:
4921f13597dSJung-uk Kim	bswap	@S[0]
4931f13597dSJung-uk Kim	bswap	@S[1]
4941f13597dSJung-uk Kim	bswap	@S[2]
4951f13597dSJung-uk Kim	bswap	@S[3]
4961f13597dSJung-uk Kim___
4971f13597dSJung-uk Kim	&_saveround	(4,$out,@S);	# temp storage for KR!
4981f13597dSJung-uk Kim$code.=<<___;
4991f13597dSJung-uk Kim	xor	0($out),@S[1]		# KR^KL
5001f13597dSJung-uk Kim	xor	4($out),@S[0]
5011f13597dSJung-uk Kim	xor	8($out),@S[3]
5021f13597dSJung-uk Kim	xor	12($out),@S[2]
5031f13597dSJung-uk Kim
5041f13597dSJung-uk Kim.L1st128:
5051f13597dSJung-uk Kim	lea	.LCamellia_SIGMA(%rip),$key
5061f13597dSJung-uk Kim	lea	.LCamellia_SBOX(%rip),$Tbl
5071f13597dSJung-uk Kim
5081f13597dSJung-uk Kim	mov	0($key),$t1
5091f13597dSJung-uk Kim	mov	4($key),$t0
5101f13597dSJung-uk Kim___
5111f13597dSJung-uk Kim	&Camellia_Feistel($step++);
5121f13597dSJung-uk Kim	&Camellia_Feistel($step++);
5131f13597dSJung-uk Kim$code.=<<___;
5141f13597dSJung-uk Kim	xor	0($out),@S[1]		# ^KL
5151f13597dSJung-uk Kim	xor	4($out),@S[0]
5161f13597dSJung-uk Kim	xor	8($out),@S[3]
5171f13597dSJung-uk Kim	xor	12($out),@S[2]
5181f13597dSJung-uk Kim___
5191f13597dSJung-uk Kim	&Camellia_Feistel($step++);
5201f13597dSJung-uk Kim	&Camellia_Feistel($step++);
5211f13597dSJung-uk Kim$code.=<<___;
5221f13597dSJung-uk Kim	cmp	\$128,$keyend
5231f13597dSJung-uk Kim	jne	.L2nd256
5241f13597dSJung-uk Kim
5251f13597dSJung-uk Kim	lea	128($out),$out		# size optimization
5261f13597dSJung-uk Kim	shl	\$32,%r8		# @S[0]||
5271f13597dSJung-uk Kim	shl	\$32,%r10		# @S[2]||
5281f13597dSJung-uk Kim	or	%r9,%r8			# ||@S[1]
5291f13597dSJung-uk Kim	or	%r11,%r10		# ||@S[3]
5301f13597dSJung-uk Kim___
5311f13597dSJung-uk Kim	&_loadround	(0,$out,-128,"%rax","%rbx");	# KL
5321f13597dSJung-uk Kim	&_saveround	(2,$out,-128,"%r8","%r10");	# KA<<<0
5331f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",15);
5341f13597dSJung-uk Kim	&_saveround	(4,$out,-128,"%rax","%rbx");	# KL<<<15
5351f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",15);
5361f13597dSJung-uk Kim	&_saveround	(6,$out,-128,"%r8","%r10");	# KA<<<15
5371f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",15);		# 15+15=30
5381f13597dSJung-uk Kim	&_saveround	(8,$out,-128,"%r8","%r10");	# KA<<<30
5391f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",30);		# 15+30=45
5401f13597dSJung-uk Kim	&_saveround	(10,$out,-128,"%rax","%rbx");	# KL<<<45
5411f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",15);		# 30+15=45
5421f13597dSJung-uk Kim	&_saveround	(12,$out,-128,"%r8");		# KA<<<45
5431f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",15);		# 45+15=60
5441f13597dSJung-uk Kim	&_saveround	(13,$out,-128,"%rbx");		# KL<<<60
5451f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",15);		# 45+15=60
5461f13597dSJung-uk Kim	&_saveround	(14,$out,-128,"%r8","%r10");	# KA<<<60
5471f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",17);		# 60+17=77
5481f13597dSJung-uk Kim	&_saveround	(16,$out,-128,"%rax","%rbx");	# KL<<<77
5491f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",17);		# 77+17=94
5501f13597dSJung-uk Kim	&_saveround	(18,$out,-128,"%rax","%rbx");	# KL<<<94
5511f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",34);		# 60+34=94
5521f13597dSJung-uk Kim	&_saveround	(20,$out,-128,"%r8","%r10");	# KA<<<94
5531f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",17);		# 94+17=111
5541f13597dSJung-uk Kim	&_saveround	(22,$out,-128,"%rax","%rbx");	# KL<<<111
5551f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",17);		# 94+17=111
5561f13597dSJung-uk Kim	&_saveround	(24,$out,-128,"%r8","%r10");	# KA<<<111
5571f13597dSJung-uk Kim$code.=<<___;
5581f13597dSJung-uk Kim	mov	\$3,%eax
5591f13597dSJung-uk Kim	jmp	.Ldone
5601f13597dSJung-uk Kim.align	16
5611f13597dSJung-uk Kim.L2nd256:
5621f13597dSJung-uk Kim___
5631f13597dSJung-uk Kim	&_saveround	(6,$out,@S);	# temp storage for KA!
5641f13597dSJung-uk Kim$code.=<<___;
5651f13597dSJung-uk Kim	xor	`4*8+0`($out),@S[1]	# KA^KR
5661f13597dSJung-uk Kim	xor	`4*8+4`($out),@S[0]
5671f13597dSJung-uk Kim	xor	`5*8+0`($out),@S[3]
5681f13597dSJung-uk Kim	xor	`5*8+4`($out),@S[2]
5691f13597dSJung-uk Kim___
5701f13597dSJung-uk Kim	&Camellia_Feistel($step++);
5711f13597dSJung-uk Kim	&Camellia_Feistel($step++);
5721f13597dSJung-uk Kim
5731f13597dSJung-uk Kim	&_loadround	(0,$out,"%rax","%rbx");	# KL
5741f13597dSJung-uk Kim	&_loadround	(4,$out,"%rcx","%rdx");	# KR
5751f13597dSJung-uk Kim	&_loadround	(6,$out,"%r14","%r15");	# KA
5761f13597dSJung-uk Kim$code.=<<___;
5771f13597dSJung-uk Kim	lea	128($out),$out		# size optimization
5781f13597dSJung-uk Kim	shl	\$32,%r8		# @S[0]||
5791f13597dSJung-uk Kim	shl	\$32,%r10		# @S[2]||
5801f13597dSJung-uk Kim	or	%r9,%r8			# ||@S[1]
5811f13597dSJung-uk Kim	or	%r11,%r10		# ||@S[3]
5821f13597dSJung-uk Kim___
5831f13597dSJung-uk Kim	&_saveround	(2,$out,-128,"%r8","%r10");	# KB<<<0
5841f13597dSJung-uk Kim	&_rotl128	("%rcx","%rdx",15);
5851f13597dSJung-uk Kim	&_saveround	(4,$out,-128,"%rcx","%rdx");	# KR<<<15
5861f13597dSJung-uk Kim	&_rotl128	("%r14","%r15",15);
5871f13597dSJung-uk Kim	&_saveround	(6,$out,-128,"%r14","%r15");	# KA<<<15
5881f13597dSJung-uk Kim	&_rotl128	("%rcx","%rdx",15);		# 15+15=30
5891f13597dSJung-uk Kim	&_saveround	(8,$out,-128,"%rcx","%rdx");	# KR<<<30
5901f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",30);
5911f13597dSJung-uk Kim	&_saveround	(10,$out,-128,"%r8","%r10");	# KB<<<30
5921f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",45);
5931f13597dSJung-uk Kim	&_saveround	(12,$out,-128,"%rax","%rbx");	# KL<<<45
5941f13597dSJung-uk Kim	&_rotl128	("%r14","%r15",30);		# 15+30=45
5951f13597dSJung-uk Kim	&_saveround	(14,$out,-128,"%r14","%r15");	# KA<<<45
5961f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",15);		# 45+15=60
5971f13597dSJung-uk Kim	&_saveround	(16,$out,-128,"%rax","%rbx");	# KL<<<60
5981f13597dSJung-uk Kim	&_rotl128	("%rcx","%rdx",30);		# 30+30=60
5991f13597dSJung-uk Kim	&_saveround	(18,$out,-128,"%rcx","%rdx");	# KR<<<60
6001f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",30);		# 30+30=60
6011f13597dSJung-uk Kim	&_saveround	(20,$out,-128,"%r8","%r10");	# KB<<<60
6021f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",17);		# 60+17=77
6031f13597dSJung-uk Kim	&_saveround	(22,$out,-128,"%rax","%rbx");	# KL<<<77
6041f13597dSJung-uk Kim	&_rotl128	("%r14","%r15",32);		# 45+32=77
6051f13597dSJung-uk Kim	&_saveround	(24,$out,-128,"%r14","%r15");	# KA<<<77
6061f13597dSJung-uk Kim	&_rotl128	("%rcx","%rdx",34);		# 60+34=94
6071f13597dSJung-uk Kim	&_saveround	(26,$out,-128,"%rcx","%rdx");	# KR<<<94
6081f13597dSJung-uk Kim	&_rotl128	("%r14","%r15",17);		# 77+17=94
6091f13597dSJung-uk Kim	&_saveround	(28,$out,-128,"%r14","%r15");	# KA<<<77
6101f13597dSJung-uk Kim	&_rotl128	("%rax","%rbx",34);		# 77+34=111
6111f13597dSJung-uk Kim	&_saveround	(30,$out,-128,"%rax","%rbx");	# KL<<<111
6121f13597dSJung-uk Kim	&_rotl128	("%r8","%r10",51);		# 60+51=111
6131f13597dSJung-uk Kim	&_saveround	(32,$out,-128,"%r8","%r10");	# KB<<<111
6141f13597dSJung-uk Kim$code.=<<___;
6151f13597dSJung-uk Kim	mov	\$4,%eax
6161f13597dSJung-uk Kim.Ldone:
6171f13597dSJung-uk Kim	mov	0(%rsp),%r15
618e71b7053SJung-uk Kim.cfi_restore	%r15
6191f13597dSJung-uk Kim	mov	8(%rsp),%r14
620e71b7053SJung-uk Kim.cfi_restore	%r14
6211f13597dSJung-uk Kim	mov	16(%rsp),%r13
622e71b7053SJung-uk Kim.cfi_restore	%r13
6231f13597dSJung-uk Kim	mov	24(%rsp),%rbp
624e71b7053SJung-uk Kim.cfi_restore	%rbp
6251f13597dSJung-uk Kim	mov	32(%rsp),%rbx
626e71b7053SJung-uk Kim.cfi_restore	%rbx
6271f13597dSJung-uk Kim	lea	40(%rsp),%rsp
628e71b7053SJung-uk Kim.cfi_adjust_cfa_offset	-40
6291f13597dSJung-uk Kim.Lkey_epilogue:
6301f13597dSJung-uk Kim	ret
631e71b7053SJung-uk Kim.cfi_endproc
6321f13597dSJung-uk Kim.size	Camellia_Ekeygen,.-Camellia_Ekeygen
6331f13597dSJung-uk Kim___
6341f13597dSJung-uk Kim}
6351f13597dSJung-uk Kim
6361f13597dSJung-uk Kim@SBOX=(
6371f13597dSJung-uk Kim112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65,
6381f13597dSJung-uk Kim 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189,
6391f13597dSJung-uk Kim134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26,
6401f13597dSJung-uk Kim166,225, 57,202,213, 71, 93, 61,217,  1, 90,214, 81, 86,108, 77,
6411f13597dSJung-uk Kim139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153,
6421f13597dSJung-uk Kim223, 76,203,194, 52,126,118,  5,109,183,169, 49,209, 23,  4,215,
6431f13597dSJung-uk Kim 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34,
6441f13597dSJung-uk Kim254, 68,207,178,195,181,122,145, 36,  8,232,168, 96,252,105, 80,
6451f13597dSJung-uk Kim170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210,
6461f13597dSJung-uk Kim 16,196,  0, 72,163,247,117,219,138,  3,230,218,  9, 63,221,148,
6471f13597dSJung-uk Kim135, 92,131,  2,205, 74,144, 51,115,103,246,243,157,127,191,226,
6481f13597dSJung-uk Kim 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46,
6491f13597dSJung-uk Kim233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89,
6501f13597dSJung-uk Kim120,152,  6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250,
6511f13597dSJung-uk Kim114,  7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164,
6521f13597dSJung-uk Kim 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158);
6531f13597dSJung-uk Kim
6541f13597dSJung-uk Kimsub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); }
6551f13597dSJung-uk Kimsub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); }
6561f13597dSJung-uk Kimsub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); }
6571f13597dSJung-uk Kimsub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); }
6581f13597dSJung-uk Kim
6591f13597dSJung-uk Kim$code.=<<___;
6601f13597dSJung-uk Kim.align	64
6611f13597dSJung-uk Kim.LCamellia_SIGMA:
6621f13597dSJung-uk Kim.long	0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858
6631f13597dSJung-uk Kim.long	0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5
6641f13597dSJung-uk Kim.long	0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2
6651f13597dSJung-uk Kim.long	0,          0,          0,          0
6661f13597dSJung-uk Kim.LCamellia_SBOX:
6671f13597dSJung-uk Kim___
6681f13597dSJung-uk Kim# tables are interleaved, remember?
6691f13597dSJung-uk Kimsub data_word { $code.=".long\t".join(',',@_)."\n"; }
6701f13597dSJung-uk Kimfor ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
6711f13597dSJung-uk Kimfor ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
6721f13597dSJung-uk Kim
6731f13597dSJung-uk Kim# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
6741f13597dSJung-uk Kim#			size_t length, const CAMELLIA_KEY *key,
6751f13597dSJung-uk Kim#			unsigned char *ivp,const int enc);
6761f13597dSJung-uk Kim{
6771f13597dSJung-uk Kim$_key="0(%rsp)";
6781f13597dSJung-uk Kim$_end="8(%rsp)";	# inp+len&~15
6791f13597dSJung-uk Kim$_res="16(%rsp)";	# len&15
6801f13597dSJung-uk Kim$ivec="24(%rsp)";
6811f13597dSJung-uk Kim$_ivp="40(%rsp)";
6821f13597dSJung-uk Kim$_rsp="48(%rsp)";
6831f13597dSJung-uk Kim
6841f13597dSJung-uk Kim$code.=<<___;
6851f13597dSJung-uk Kim.globl	Camellia_cbc_encrypt
6861f13597dSJung-uk Kim.type	Camellia_cbc_encrypt,\@function,6
6871f13597dSJung-uk Kim.align	16
6881f13597dSJung-uk KimCamellia_cbc_encrypt:
689e71b7053SJung-uk Kim.cfi_startproc
690*b077aed3SPierre Pronchery	endbranch
6911f13597dSJung-uk Kim	cmp	\$0,%rdx
6921f13597dSJung-uk Kim	je	.Lcbc_abort
6931f13597dSJung-uk Kim	push	%rbx
694e71b7053SJung-uk Kim.cfi_push	%rbx
6951f13597dSJung-uk Kim	push	%rbp
696e71b7053SJung-uk Kim.cfi_push	%rbp
6971f13597dSJung-uk Kim	push	%r12
698e71b7053SJung-uk Kim.cfi_push	%r12
6991f13597dSJung-uk Kim	push	%r13
700e71b7053SJung-uk Kim.cfi_push	%r13
7011f13597dSJung-uk Kim	push	%r14
702e71b7053SJung-uk Kim.cfi_push	%r14
7031f13597dSJung-uk Kim	push	%r15
704e71b7053SJung-uk Kim.cfi_push	%r15
7051f13597dSJung-uk Kim.Lcbc_prologue:
7061f13597dSJung-uk Kim
7071f13597dSJung-uk Kim	mov	%rsp,%rbp
708e71b7053SJung-uk Kim.cfi_def_cfa_register	%rbp
7091f13597dSJung-uk Kim	sub	\$64,%rsp
7101f13597dSJung-uk Kim	and	\$-64,%rsp
7111f13597dSJung-uk Kim
7121f13597dSJung-uk Kim	# place stack frame just "above mod 1024" the key schedule,
7131f13597dSJung-uk Kim	# this ensures that cache associativity suffices
7141f13597dSJung-uk Kim	lea	-64-63(%rcx),%r10
7151f13597dSJung-uk Kim	sub	%rsp,%r10
7161f13597dSJung-uk Kim	neg	%r10
7171f13597dSJung-uk Kim	and	\$0x3C0,%r10
7181f13597dSJung-uk Kim	sub	%r10,%rsp
7191f13597dSJung-uk Kim	#add	\$8,%rsp		# 8 is reserved for callee's ra
7201f13597dSJung-uk Kim
7211f13597dSJung-uk Kim	mov	%rdi,$inp		# inp argument
7221f13597dSJung-uk Kim	mov	%rsi,$out		# out argument
7231f13597dSJung-uk Kim	mov	%r8,%rbx		# ivp argument
7241f13597dSJung-uk Kim	mov	%rcx,$key		# key argument
7251f13597dSJung-uk Kim	mov	272(%rcx),${keyend}d	# grandRounds
7261f13597dSJung-uk Kim
7271f13597dSJung-uk Kim	mov	%r8,$_ivp
7281f13597dSJung-uk Kim	mov	%rbp,$_rsp
729e71b7053SJung-uk Kim.cfi_cfa_expression	$_rsp,deref,+56
7301f13597dSJung-uk Kim
7311f13597dSJung-uk Kim.Lcbc_body:
7321f13597dSJung-uk Kim	lea	.LCamellia_SBOX(%rip),$Tbl
7331f13597dSJung-uk Kim
7341f13597dSJung-uk Kim	mov	\$32,%ecx
7351f13597dSJung-uk Kim.align	4
7361f13597dSJung-uk Kim.Lcbc_prefetch_sbox:
7371f13597dSJung-uk Kim	mov	0($Tbl),%rax
7381f13597dSJung-uk Kim	mov	32($Tbl),%rsi
7391f13597dSJung-uk Kim	mov	64($Tbl),%rdi
7401f13597dSJung-uk Kim	mov	96($Tbl),%r11
7411f13597dSJung-uk Kim	lea	128($Tbl),$Tbl
7421f13597dSJung-uk Kim	loop	.Lcbc_prefetch_sbox
7431f13597dSJung-uk Kim	sub	\$4096,$Tbl
7441f13597dSJung-uk Kim	shl	\$6,$keyend
7451f13597dSJung-uk Kim	mov	%rdx,%rcx		# len argument
7461f13597dSJung-uk Kim	lea	($key,$keyend),$keyend
7471f13597dSJung-uk Kim
7481f13597dSJung-uk Kim	cmp	\$0,%r9d		# enc argument
7491f13597dSJung-uk Kim	je	.LCBC_DECRYPT
7501f13597dSJung-uk Kim
7511f13597dSJung-uk Kim	and	\$-16,%rdx
7521f13597dSJung-uk Kim	and	\$15,%rcx		# length residue
7531f13597dSJung-uk Kim	lea	($inp,%rdx),%rdx
7541f13597dSJung-uk Kim	mov	$key,$_key
7551f13597dSJung-uk Kim	mov	%rdx,$_end
7561f13597dSJung-uk Kim	mov	%rcx,$_res
7571f13597dSJung-uk Kim
7581f13597dSJung-uk Kim	cmp	$inp,%rdx
7591f13597dSJung-uk Kim	mov	0(%rbx),@S[0]		# load IV
7601f13597dSJung-uk Kim	mov	4(%rbx),@S[1]
7611f13597dSJung-uk Kim	mov	8(%rbx),@S[2]
7621f13597dSJung-uk Kim	mov	12(%rbx),@S[3]
7631f13597dSJung-uk Kim	je	.Lcbc_enc_tail
7641f13597dSJung-uk Kim	jmp	.Lcbc_eloop
7651f13597dSJung-uk Kim
7661f13597dSJung-uk Kim.align	16
7671f13597dSJung-uk Kim.Lcbc_eloop:
7681f13597dSJung-uk Kim	xor	0($inp),@S[0]
7691f13597dSJung-uk Kim	xor	4($inp),@S[1]
7701f13597dSJung-uk Kim	xor	8($inp),@S[2]
7711f13597dSJung-uk Kim	bswap	@S[0]
7721f13597dSJung-uk Kim	xor	12($inp),@S[3]
7731f13597dSJung-uk Kim	bswap	@S[1]
7741f13597dSJung-uk Kim	bswap	@S[2]
7751f13597dSJung-uk Kim	bswap	@S[3]
7761f13597dSJung-uk Kim
7771f13597dSJung-uk Kim	call	_x86_64_Camellia_encrypt
7781f13597dSJung-uk Kim
7791f13597dSJung-uk Kim	mov	$_key,$key		# "rewind" the key
7801f13597dSJung-uk Kim	bswap	@S[0]
7811f13597dSJung-uk Kim	mov	$_end,%rdx
7821f13597dSJung-uk Kim	bswap	@S[1]
7831f13597dSJung-uk Kim	mov	$_res,%rcx
7841f13597dSJung-uk Kim	bswap	@S[2]
7851f13597dSJung-uk Kim	mov	@S[0],0($out)
7861f13597dSJung-uk Kim	bswap	@S[3]
7871f13597dSJung-uk Kim	mov	@S[1],4($out)
7881f13597dSJung-uk Kim	mov	@S[2],8($out)
7891f13597dSJung-uk Kim	lea	16($inp),$inp
7901f13597dSJung-uk Kim	mov	@S[3],12($out)
7911f13597dSJung-uk Kim	cmp	%rdx,$inp
7921f13597dSJung-uk Kim	lea	16($out),$out
7931f13597dSJung-uk Kim	jne	.Lcbc_eloop
7941f13597dSJung-uk Kim
7951f13597dSJung-uk Kim	cmp	\$0,%rcx
7961f13597dSJung-uk Kim	jne	.Lcbc_enc_tail
7971f13597dSJung-uk Kim
7981f13597dSJung-uk Kim	mov	$_ivp,$out
7991f13597dSJung-uk Kim	mov	@S[0],0($out)		# write out IV residue
8001f13597dSJung-uk Kim	mov	@S[1],4($out)
8011f13597dSJung-uk Kim	mov	@S[2],8($out)
8021f13597dSJung-uk Kim	mov	@S[3],12($out)
8031f13597dSJung-uk Kim	jmp	.Lcbc_done
8041f13597dSJung-uk Kim
8051f13597dSJung-uk Kim.align	16
8061f13597dSJung-uk Kim.Lcbc_enc_tail:
8071f13597dSJung-uk Kim	xor	%rax,%rax
8081f13597dSJung-uk Kim	mov	%rax,0+$ivec
8091f13597dSJung-uk Kim	mov	%rax,8+$ivec
8101f13597dSJung-uk Kim	mov	%rax,$_res
8111f13597dSJung-uk Kim
8121f13597dSJung-uk Kim.Lcbc_enc_pushf:
8131f13597dSJung-uk Kim	pushfq
8141f13597dSJung-uk Kim	cld
8151f13597dSJung-uk Kim	mov	$inp,%rsi
8161f13597dSJung-uk Kim	lea	8+$ivec,%rdi
8171f13597dSJung-uk Kim	.long	0x9066A4F3		# rep movsb
8181f13597dSJung-uk Kim	popfq
8191f13597dSJung-uk Kim.Lcbc_enc_popf:
8201f13597dSJung-uk Kim
8211f13597dSJung-uk Kim	lea	$ivec,$inp
8221f13597dSJung-uk Kim	lea	16+$ivec,%rax
8231f13597dSJung-uk Kim	mov	%rax,$_end
8241f13597dSJung-uk Kim	jmp	.Lcbc_eloop		# one more time
8251f13597dSJung-uk Kim
8261f13597dSJung-uk Kim.align	16
8271f13597dSJung-uk Kim.LCBC_DECRYPT:
8281f13597dSJung-uk Kim	xchg	$key,$keyend
8291f13597dSJung-uk Kim	add	\$15,%rdx
8301f13597dSJung-uk Kim	and	\$15,%rcx		# length residue
8311f13597dSJung-uk Kim	and	\$-16,%rdx
8321f13597dSJung-uk Kim	mov	$key,$_key
8331f13597dSJung-uk Kim	lea	($inp,%rdx),%rdx
8341f13597dSJung-uk Kim	mov	%rdx,$_end
8351f13597dSJung-uk Kim	mov	%rcx,$_res
8361f13597dSJung-uk Kim
8371f13597dSJung-uk Kim	mov	(%rbx),%rax		# load IV
8381f13597dSJung-uk Kim	mov	8(%rbx),%rbx
8391f13597dSJung-uk Kim	jmp	.Lcbc_dloop
8401f13597dSJung-uk Kim.align	16
8411f13597dSJung-uk Kim.Lcbc_dloop:
8421f13597dSJung-uk Kim	mov	0($inp),@S[0]
8431f13597dSJung-uk Kim	mov	4($inp),@S[1]
8441f13597dSJung-uk Kim	mov	8($inp),@S[2]
8451f13597dSJung-uk Kim	bswap	@S[0]
8461f13597dSJung-uk Kim	mov	12($inp),@S[3]
8471f13597dSJung-uk Kim	bswap	@S[1]
8481f13597dSJung-uk Kim	mov	%rax,0+$ivec		# save IV to temporary storage
8491f13597dSJung-uk Kim	bswap	@S[2]
8501f13597dSJung-uk Kim	mov	%rbx,8+$ivec
8511f13597dSJung-uk Kim	bswap	@S[3]
8521f13597dSJung-uk Kim
8531f13597dSJung-uk Kim	call	_x86_64_Camellia_decrypt
8541f13597dSJung-uk Kim
8551f13597dSJung-uk Kim	mov	$_key,$key		# "rewind" the key
8561f13597dSJung-uk Kim	mov	$_end,%rdx
8571f13597dSJung-uk Kim	mov	$_res,%rcx
8581f13597dSJung-uk Kim
8591f13597dSJung-uk Kim	bswap	@S[0]
8601f13597dSJung-uk Kim	mov	($inp),%rax		# load IV for next iteration
8611f13597dSJung-uk Kim	bswap	@S[1]
8621f13597dSJung-uk Kim	mov	8($inp),%rbx
8631f13597dSJung-uk Kim	bswap	@S[2]
8641f13597dSJung-uk Kim	xor	0+$ivec,@S[0]
8651f13597dSJung-uk Kim	bswap	@S[3]
8661f13597dSJung-uk Kim	xor	4+$ivec,@S[1]
8671f13597dSJung-uk Kim	xor	8+$ivec,@S[2]
8681f13597dSJung-uk Kim	lea	16($inp),$inp
8691f13597dSJung-uk Kim	xor	12+$ivec,@S[3]
8701f13597dSJung-uk Kim	cmp	%rdx,$inp
8711f13597dSJung-uk Kim	je	.Lcbc_ddone
8721f13597dSJung-uk Kim
8731f13597dSJung-uk Kim	mov	@S[0],0($out)
8741f13597dSJung-uk Kim	mov	@S[1],4($out)
8751f13597dSJung-uk Kim	mov	@S[2],8($out)
8761f13597dSJung-uk Kim	mov	@S[3],12($out)
8771f13597dSJung-uk Kim
8781f13597dSJung-uk Kim	lea	16($out),$out
8791f13597dSJung-uk Kim	jmp	.Lcbc_dloop
8801f13597dSJung-uk Kim
8811f13597dSJung-uk Kim.align	16
8821f13597dSJung-uk Kim.Lcbc_ddone:
8831f13597dSJung-uk Kim	mov	$_ivp,%rdx
8841f13597dSJung-uk Kim	cmp	\$0,%rcx
8851f13597dSJung-uk Kim	jne	.Lcbc_dec_tail
8861f13597dSJung-uk Kim
8871f13597dSJung-uk Kim	mov	@S[0],0($out)
8881f13597dSJung-uk Kim	mov	@S[1],4($out)
8891f13597dSJung-uk Kim	mov	@S[2],8($out)
8901f13597dSJung-uk Kim	mov	@S[3],12($out)
8911f13597dSJung-uk Kim
8921f13597dSJung-uk Kim	mov	%rax,(%rdx)		# write out IV residue
8931f13597dSJung-uk Kim	mov	%rbx,8(%rdx)
8941f13597dSJung-uk Kim	jmp	.Lcbc_done
8951f13597dSJung-uk Kim.align	16
8961f13597dSJung-uk Kim.Lcbc_dec_tail:
8971f13597dSJung-uk Kim	mov	@S[0],0+$ivec
8981f13597dSJung-uk Kim	mov	@S[1],4+$ivec
8991f13597dSJung-uk Kim	mov	@S[2],8+$ivec
9001f13597dSJung-uk Kim	mov	@S[3],12+$ivec
9011f13597dSJung-uk Kim
9021f13597dSJung-uk Kim.Lcbc_dec_pushf:
9031f13597dSJung-uk Kim	pushfq
9041f13597dSJung-uk Kim	cld
9051f13597dSJung-uk Kim	lea	8+$ivec,%rsi
9061f13597dSJung-uk Kim	lea	($out),%rdi
9071f13597dSJung-uk Kim	.long	0x9066A4F3		# rep movsb
9081f13597dSJung-uk Kim	popfq
9091f13597dSJung-uk Kim.Lcbc_dec_popf:
9101f13597dSJung-uk Kim
9111f13597dSJung-uk Kim	mov	%rax,(%rdx)		# write out IV residue
9121f13597dSJung-uk Kim	mov	%rbx,8(%rdx)
9131f13597dSJung-uk Kim	jmp	.Lcbc_done
9141f13597dSJung-uk Kim
9151f13597dSJung-uk Kim.align	16
9161f13597dSJung-uk Kim.Lcbc_done:
9171f13597dSJung-uk Kim	mov	$_rsp,%rcx
918e71b7053SJung-uk Kim.cfi_def_cfa	%rcx,56
9191f13597dSJung-uk Kim	mov	0(%rcx),%r15
920e71b7053SJung-uk Kim.cfi_restore	%r15
9211f13597dSJung-uk Kim	mov	8(%rcx),%r14
922e71b7053SJung-uk Kim.cfi_restore	%r14
9231f13597dSJung-uk Kim	mov	16(%rcx),%r13
924e71b7053SJung-uk Kim.cfi_restore	%r13
9251f13597dSJung-uk Kim	mov	24(%rcx),%r12
926e71b7053SJung-uk Kim.cfi_restore	%r12
9271f13597dSJung-uk Kim	mov	32(%rcx),%rbp
928e71b7053SJung-uk Kim.cfi_restore	%rbp
9291f13597dSJung-uk Kim	mov	40(%rcx),%rbx
930e71b7053SJung-uk Kim.cfi_restore	%rbx
9311f13597dSJung-uk Kim	lea	48(%rcx),%rsp
932e71b7053SJung-uk Kim.cfi_def_cfa	%rsp,8
9331f13597dSJung-uk Kim.Lcbc_abort:
9341f13597dSJung-uk Kim	ret
935e71b7053SJung-uk Kim.cfi_endproc
9361f13597dSJung-uk Kim.size	Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
9371f13597dSJung-uk Kim
9381f13597dSJung-uk Kim.asciz	"Camellia for x86_64 by <appro\@openssl.org>"
9391f13597dSJung-uk Kim___
9401f13597dSJung-uk Kim}
9411f13597dSJung-uk Kim
9421f13597dSJung-uk Kim# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
9431f13597dSJung-uk Kim#		CONTEXT *context,DISPATCHER_CONTEXT *disp)
9441f13597dSJung-uk Kimif ($win64) {
9451f13597dSJung-uk Kim$rec="%rcx";
9461f13597dSJung-uk Kim$frame="%rdx";
9471f13597dSJung-uk Kim$context="%r8";
9481f13597dSJung-uk Kim$disp="%r9";
9491f13597dSJung-uk Kim
9501f13597dSJung-uk Kim$code.=<<___;
9511f13597dSJung-uk Kim.extern	__imp_RtlVirtualUnwind
9521f13597dSJung-uk Kim.type	common_se_handler,\@abi-omnipotent
9531f13597dSJung-uk Kim.align	16
9541f13597dSJung-uk Kimcommon_se_handler:
9551f13597dSJung-uk Kim	push	%rsi
9561f13597dSJung-uk Kim	push	%rdi
9571f13597dSJung-uk Kim	push	%rbx
9581f13597dSJung-uk Kim	push	%rbp
9591f13597dSJung-uk Kim	push	%r12
9601f13597dSJung-uk Kim	push	%r13
9611f13597dSJung-uk Kim	push	%r14
9621f13597dSJung-uk Kim	push	%r15
9631f13597dSJung-uk Kim	pushfq
9641f13597dSJung-uk Kim	lea	-64(%rsp),%rsp
9651f13597dSJung-uk Kim
9661f13597dSJung-uk Kim	mov	120($context),%rax	# pull context->Rax
9671f13597dSJung-uk Kim	mov	248($context),%rbx	# pull context->Rip
9681f13597dSJung-uk Kim
9691f13597dSJung-uk Kim	mov	8($disp),%rsi		# disp->ImageBase
9701f13597dSJung-uk Kim	mov	56($disp),%r11		# disp->HandlerData
9711f13597dSJung-uk Kim
9721f13597dSJung-uk Kim	mov	0(%r11),%r10d		# HandlerData[0]
9731f13597dSJung-uk Kim	lea	(%rsi,%r10),%r10	# prologue label
9741f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<prologue label
9751f13597dSJung-uk Kim	jb	.Lin_prologue
9761f13597dSJung-uk Kim
9771f13597dSJung-uk Kim	mov	152($context),%rax	# pull context->Rsp
9781f13597dSJung-uk Kim
9791f13597dSJung-uk Kim	mov	4(%r11),%r10d		# HandlerData[1]
9801f13597dSJung-uk Kim	lea	(%rsi,%r10),%r10	# epilogue label
9811f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip>=epilogue label
9821f13597dSJung-uk Kim	jae	.Lin_prologue
9831f13597dSJung-uk Kim
9841f13597dSJung-uk Kim	lea	40(%rax),%rax
9851f13597dSJung-uk Kim	mov	-8(%rax),%rbx
9861f13597dSJung-uk Kim	mov	-16(%rax),%rbp
9871f13597dSJung-uk Kim	mov	-24(%rax),%r13
9881f13597dSJung-uk Kim	mov	-32(%rax),%r14
9891f13597dSJung-uk Kim	mov	-40(%rax),%r15
9901f13597dSJung-uk Kim	mov	%rbx,144($context)	# restore context->Rbx
9911f13597dSJung-uk Kim	mov	%rbp,160($context)	# restore context->Rbp
9921f13597dSJung-uk Kim	mov	%r13,224($context)	# restore context->R13
9931f13597dSJung-uk Kim	mov	%r14,232($context)	# restore context->R14
9941f13597dSJung-uk Kim	mov	%r15,240($context)	# restore context->R15
9951f13597dSJung-uk Kim
9961f13597dSJung-uk Kim.Lin_prologue:
9971f13597dSJung-uk Kim	mov	8(%rax),%rdi
9981f13597dSJung-uk Kim	mov	16(%rax),%rsi
9991f13597dSJung-uk Kim	mov	%rax,152($context)	# restore context->Rsp
10001f13597dSJung-uk Kim	mov	%rsi,168($context)	# restore context->Rsi
10011f13597dSJung-uk Kim	mov	%rdi,176($context)	# restore context->Rdi
10021f13597dSJung-uk Kim
10031f13597dSJung-uk Kim	jmp	.Lcommon_seh_exit
10041f13597dSJung-uk Kim.size	common_se_handler,.-common_se_handler
10051f13597dSJung-uk Kim
10061f13597dSJung-uk Kim.type	cbc_se_handler,\@abi-omnipotent
10071f13597dSJung-uk Kim.align	16
10081f13597dSJung-uk Kimcbc_se_handler:
10091f13597dSJung-uk Kim	push	%rsi
10101f13597dSJung-uk Kim	push	%rdi
10111f13597dSJung-uk Kim	push	%rbx
10121f13597dSJung-uk Kim	push	%rbp
10131f13597dSJung-uk Kim	push	%r12
10141f13597dSJung-uk Kim	push	%r13
10151f13597dSJung-uk Kim	push	%r14
10161f13597dSJung-uk Kim	push	%r15
10171f13597dSJung-uk Kim	pushfq
10181f13597dSJung-uk Kim	lea	-64(%rsp),%rsp
10191f13597dSJung-uk Kim
10201f13597dSJung-uk Kim	mov	120($context),%rax	# pull context->Rax
10211f13597dSJung-uk Kim	mov	248($context),%rbx	# pull context->Rip
10221f13597dSJung-uk Kim
10231f13597dSJung-uk Kim	lea	.Lcbc_prologue(%rip),%r10
10241f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<.Lcbc_prologue
10251f13597dSJung-uk Kim	jb	.Lin_cbc_prologue
10261f13597dSJung-uk Kim
10271f13597dSJung-uk Kim	lea	.Lcbc_body(%rip),%r10
10281f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<.Lcbc_body
10291f13597dSJung-uk Kim	jb	.Lin_cbc_frame_setup
10301f13597dSJung-uk Kim
10311f13597dSJung-uk Kim	mov	152($context),%rax	# pull context->Rsp
10321f13597dSJung-uk Kim
10331f13597dSJung-uk Kim	lea	.Lcbc_abort(%rip),%r10
10341f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip>=.Lcbc_abort
10351f13597dSJung-uk Kim	jae	.Lin_cbc_prologue
10361f13597dSJung-uk Kim
10371f13597dSJung-uk Kim	# handle pushf/popf in Camellia_cbc_encrypt
10381f13597dSJung-uk Kim	lea	.Lcbc_enc_pushf(%rip),%r10
10391f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<=.Lcbc_enc_pushf
10401f13597dSJung-uk Kim	jbe	.Lin_cbc_no_flag
10411f13597dSJung-uk Kim	lea	8(%rax),%rax
10421f13597dSJung-uk Kim	lea	.Lcbc_enc_popf(%rip),%r10
10431f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<.Lcbc_enc_popf
10441f13597dSJung-uk Kim	jb	.Lin_cbc_no_flag
10451f13597dSJung-uk Kim	lea	-8(%rax),%rax
10461f13597dSJung-uk Kim	lea	.Lcbc_dec_pushf(%rip),%r10
10471f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<=.Lcbc_dec_pushf
10481f13597dSJung-uk Kim	jbe	.Lin_cbc_no_flag
10491f13597dSJung-uk Kim	lea	8(%rax),%rax
10501f13597dSJung-uk Kim	lea	.Lcbc_dec_popf(%rip),%r10
10511f13597dSJung-uk Kim	cmp	%r10,%rbx		# context->Rip<.Lcbc_dec_popf
10521f13597dSJung-uk Kim	jb	.Lin_cbc_no_flag
10531f13597dSJung-uk Kim	lea	-8(%rax),%rax
10541f13597dSJung-uk Kim
10551f13597dSJung-uk Kim.Lin_cbc_no_flag:
10561f13597dSJung-uk Kim	mov	48(%rax),%rax		# $_rsp
10571f13597dSJung-uk Kim	lea	48(%rax),%rax
10581f13597dSJung-uk Kim
10591f13597dSJung-uk Kim.Lin_cbc_frame_setup:
10601f13597dSJung-uk Kim	mov	-8(%rax),%rbx
10611f13597dSJung-uk Kim	mov	-16(%rax),%rbp
10621f13597dSJung-uk Kim	mov	-24(%rax),%r12
10631f13597dSJung-uk Kim	mov	-32(%rax),%r13
10641f13597dSJung-uk Kim	mov	-40(%rax),%r14
10651f13597dSJung-uk Kim	mov	-48(%rax),%r15
10661f13597dSJung-uk Kim	mov	%rbx,144($context)	# restore context->Rbx
10671f13597dSJung-uk Kim	mov	%rbp,160($context)	# restore context->Rbp
10681f13597dSJung-uk Kim	mov	%r12,216($context)	# restore context->R12
10691f13597dSJung-uk Kim	mov	%r13,224($context)	# restore context->R13
10701f13597dSJung-uk Kim	mov	%r14,232($context)	# restore context->R14
10711f13597dSJung-uk Kim	mov	%r15,240($context)	# restore context->R15
10721f13597dSJung-uk Kim
10731f13597dSJung-uk Kim.Lin_cbc_prologue:
10741f13597dSJung-uk Kim	mov	8(%rax),%rdi
10751f13597dSJung-uk Kim	mov	16(%rax),%rsi
10761f13597dSJung-uk Kim	mov	%rax,152($context)	# restore context->Rsp
10771f13597dSJung-uk Kim	mov	%rsi,168($context)	# restore context->Rsi
10781f13597dSJung-uk Kim	mov	%rdi,176($context)	# restore context->Rdi
10791f13597dSJung-uk Kim
10801f13597dSJung-uk Kim.align	4
10811f13597dSJung-uk Kim.Lcommon_seh_exit:
10821f13597dSJung-uk Kim
10831f13597dSJung-uk Kim	mov	40($disp),%rdi		# disp->ContextRecord
10841f13597dSJung-uk Kim	mov	$context,%rsi		# context
10851f13597dSJung-uk Kim	mov	\$`1232/8`,%ecx		# sizeof(CONTEXT)
10861f13597dSJung-uk Kim	.long	0xa548f3fc		# cld; rep movsq
10871f13597dSJung-uk Kim
10881f13597dSJung-uk Kim	mov	$disp,%rsi
10891f13597dSJung-uk Kim	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
10901f13597dSJung-uk Kim	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
10911f13597dSJung-uk Kim	mov	0(%rsi),%r8		# arg3, disp->ControlPc
10921f13597dSJung-uk Kim	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
10931f13597dSJung-uk Kim	mov	40(%rsi),%r10		# disp->ContextRecord
10941f13597dSJung-uk Kim	lea	56(%rsi),%r11		# &disp->HandlerData
10951f13597dSJung-uk Kim	lea	24(%rsi),%r12		# &disp->EstablisherFrame
10961f13597dSJung-uk Kim	mov	%r10,32(%rsp)		# arg5
10971f13597dSJung-uk Kim	mov	%r11,40(%rsp)		# arg6
10981f13597dSJung-uk Kim	mov	%r12,48(%rsp)		# arg7
10991f13597dSJung-uk Kim	mov	%rcx,56(%rsp)		# arg8, (NULL)
11001f13597dSJung-uk Kim	call	*__imp_RtlVirtualUnwind(%rip)
11011f13597dSJung-uk Kim
11021f13597dSJung-uk Kim	mov	\$1,%eax		# ExceptionContinueSearch
11031f13597dSJung-uk Kim	lea	64(%rsp),%rsp
11041f13597dSJung-uk Kim	popfq
11051f13597dSJung-uk Kim	pop	%r15
11061f13597dSJung-uk Kim	pop	%r14
11071f13597dSJung-uk Kim	pop	%r13
11081f13597dSJung-uk Kim	pop	%r12
11091f13597dSJung-uk Kim	pop	%rbp
11101f13597dSJung-uk Kim	pop	%rbx
11111f13597dSJung-uk Kim	pop	%rdi
11121f13597dSJung-uk Kim	pop	%rsi
11131f13597dSJung-uk Kim	ret
11141f13597dSJung-uk Kim.size	cbc_se_handler,.-cbc_se_handler
11151f13597dSJung-uk Kim
11161f13597dSJung-uk Kim.section	.pdata
11171f13597dSJung-uk Kim.align	4
11181f13597dSJung-uk Kim	.rva	.LSEH_begin_Camellia_EncryptBlock_Rounds
11191f13597dSJung-uk Kim	.rva	.LSEH_end_Camellia_EncryptBlock_Rounds
11201f13597dSJung-uk Kim	.rva	.LSEH_info_Camellia_EncryptBlock_Rounds
11211f13597dSJung-uk Kim
11221f13597dSJung-uk Kim	.rva	.LSEH_begin_Camellia_DecryptBlock_Rounds
11231f13597dSJung-uk Kim	.rva	.LSEH_end_Camellia_DecryptBlock_Rounds
11241f13597dSJung-uk Kim	.rva	.LSEH_info_Camellia_DecryptBlock_Rounds
11251f13597dSJung-uk Kim
11261f13597dSJung-uk Kim	.rva	.LSEH_begin_Camellia_Ekeygen
11271f13597dSJung-uk Kim	.rva	.LSEH_end_Camellia_Ekeygen
11281f13597dSJung-uk Kim	.rva	.LSEH_info_Camellia_Ekeygen
11291f13597dSJung-uk Kim
11301f13597dSJung-uk Kim	.rva	.LSEH_begin_Camellia_cbc_encrypt
11311f13597dSJung-uk Kim	.rva	.LSEH_end_Camellia_cbc_encrypt
11321f13597dSJung-uk Kim	.rva	.LSEH_info_Camellia_cbc_encrypt
11331f13597dSJung-uk Kim
11341f13597dSJung-uk Kim.section	.xdata
11351f13597dSJung-uk Kim.align	8
11361f13597dSJung-uk Kim.LSEH_info_Camellia_EncryptBlock_Rounds:
11371f13597dSJung-uk Kim	.byte	9,0,0,0
11381f13597dSJung-uk Kim	.rva	common_se_handler
11391f13597dSJung-uk Kim	.rva	.Lenc_prologue,.Lenc_epilogue	# HandlerData[]
11401f13597dSJung-uk Kim.LSEH_info_Camellia_DecryptBlock_Rounds:
11411f13597dSJung-uk Kim	.byte	9,0,0,0
11421f13597dSJung-uk Kim	.rva	common_se_handler
11431f13597dSJung-uk Kim	.rva	.Ldec_prologue,.Ldec_epilogue	# HandlerData[]
11441f13597dSJung-uk Kim.LSEH_info_Camellia_Ekeygen:
11451f13597dSJung-uk Kim	.byte	9,0,0,0
11461f13597dSJung-uk Kim	.rva	common_se_handler
11471f13597dSJung-uk Kim	.rva	.Lkey_prologue,.Lkey_epilogue	# HandlerData[]
11481f13597dSJung-uk Kim.LSEH_info_Camellia_cbc_encrypt:
11491f13597dSJung-uk Kim	.byte	9,0,0,0
11501f13597dSJung-uk Kim	.rva	cbc_se_handler
11511f13597dSJung-uk Kim___
11521f13597dSJung-uk Kim}
11531f13597dSJung-uk Kim
11541f13597dSJung-uk Kim$code =~ s/\`([^\`]*)\`/eval $1/gem;
11551f13597dSJung-uk Kimprint $code;
115617f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!";
1157