11f13597dSJung-uk Kim#! /usr/bin/env perl 217f01e99SJung-uk Kim# Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim# 4*b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License"). You may not use 5e71b7053SJung-uk Kim# this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim 91f13597dSJung-uk Kim 101f13597dSJung-uk Kim# ==================================================================== 111f13597dSJung-uk Kim# Copyright (c) 2008 Andy Polyakov <appro@openssl.org> 121f13597dSJung-uk Kim# 131f13597dSJung-uk Kim# This module may be used under the terms of either the GNU General 141f13597dSJung-uk Kim# Public License version 2 or later, the GNU Lesser General Public 151f13597dSJung-uk Kim# License version 2.1 or later, the Mozilla Public License version 161f13597dSJung-uk Kim# 1.1 or the BSD License. The exact terms of either license are 171f13597dSJung-uk Kim# distributed along with this module. For further details see 181f13597dSJung-uk Kim# http://www.openssl.org/~appro/camellia/. 191f13597dSJung-uk Kim# ==================================================================== 201f13597dSJung-uk Kim 211f13597dSJung-uk Kim# Performance in cycles per processed byte (less is better) in 221f13597dSJung-uk Kim# 'openssl speed ...' benchmark: 231f13597dSJung-uk Kim# 241f13597dSJung-uk Kim# AMD64 Core2 EM64T 251f13597dSJung-uk Kim# -evp camellia-128-ecb 16.7 21.0 22.7 261f13597dSJung-uk Kim# + over gcc 3.4.6 +25% +5% 0% 271f13597dSJung-uk Kim# 281f13597dSJung-uk Kim# camellia-128-cbc 15.7 20.4 21.1 291f13597dSJung-uk Kim# 301f13597dSJung-uk Kim# 128-bit key setup 128 216 205 cycles/key 311f13597dSJung-uk Kim# + over gcc 3.4.6 +54% +39% +15% 321f13597dSJung-uk Kim# 331f13597dSJung-uk Kim# Numbers in "+" rows represent performance improvement over compiler 341f13597dSJung-uk Kim# generated code. Key setup timings are impressive on AMD and Core2 351f13597dSJung-uk Kim# thanks to 64-bit operations being covertly deployed. Improvement on 361f13597dSJung-uk Kim# EM64T, pre-Core2 Intel x86_64 CPU, is not as impressive, because it 371f13597dSJung-uk Kim# apparently emulates some of 64-bit operations in [32-bit] microcode. 381f13597dSJung-uk Kim 39*b077aed3SPierre Pronchery# $output is the last argument if it looks like a file (it has an extension) 40*b077aed3SPierre Pronchery# $flavour is the first argument if it doesn't look like a file 41*b077aed3SPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 42*b077aed3SPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 431f13597dSJung-uk Kim 441f13597dSJung-uk Kim$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 451f13597dSJung-uk Kim 461f13597dSJung-uk Kim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 471f13597dSJung-uk Kim( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 481f13597dSJung-uk Kim( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 491f13597dSJung-uk Kimdie "can't locate x86_64-xlate.pl"; 501f13597dSJung-uk Kim 51*b077aed3SPierre Proncheryopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 52*b077aed3SPierre Pronchery or die "can't call $xlate: $!"; 5309286989SJung-uk Kim*STDOUT=*OUT; 541f13597dSJung-uk Kim 551f13597dSJung-uk Kimsub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } 561f13597dSJung-uk Kimsub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; 571f13597dSJung-uk Kim $r =~ s/%[er]([sd]i)/%\1l/; 581f13597dSJung-uk Kim $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } 591f13597dSJung-uk Kim 601f13597dSJung-uk Kim$t0="%eax";$t1="%ebx";$t2="%ecx";$t3="%edx"; 611f13597dSJung-uk Kim@S=("%r8d","%r9d","%r10d","%r11d"); 621f13597dSJung-uk Kim$i0="%esi"; 631f13597dSJung-uk Kim$i1="%edi"; 641f13597dSJung-uk Kim$Tbl="%rbp"; # size optimization 651f13597dSJung-uk Kim$inp="%r12"; 661f13597dSJung-uk Kim$out="%r13"; 671f13597dSJung-uk Kim$key="%r14"; 681f13597dSJung-uk Kim$keyend="%r15"; 691f13597dSJung-uk Kim$arg0d=$win64?"%ecx":"%edi"; 701f13597dSJung-uk Kim 711f13597dSJung-uk Kim# const unsigned int Camellia_SBOX[4][256]; 721f13597dSJung-uk Kim# Well, sort of... Camellia_SBOX[0][] is interleaved with [1][], 731f13597dSJung-uk Kim# and [2][] - with [3][]. This is done to minimize code size. 741f13597dSJung-uk Kim$SBOX1_1110=0; # Camellia_SBOX[0] 751f13597dSJung-uk Kim$SBOX4_4404=4; # Camellia_SBOX[1] 761f13597dSJung-uk Kim$SBOX2_0222=2048; # Camellia_SBOX[2] 771f13597dSJung-uk Kim$SBOX3_3033=2052; # Camellia_SBOX[3] 781f13597dSJung-uk Kim 791f13597dSJung-uk Kimsub Camellia_Feistel { 801f13597dSJung-uk Kimmy $i=@_[0]; 811f13597dSJung-uk Kimmy $seed=defined(@_[1])?@_[1]:0; 821f13597dSJung-uk Kimmy $scale=$seed<0?-8:8; 831f13597dSJung-uk Kimmy $j=($i&1)*2; 847bded2dbSJung-uk Kimmy ($s0,$s1,$s2,$s3)=(@S[($j)%4],@S[($j+1)%4],@S[($j+2)%4],@S[($j+3)%4]); 851f13597dSJung-uk Kim 861f13597dSJung-uk Kim$code.=<<___; 871f13597dSJung-uk Kim xor $s0,$t0 # t0^=key[0] 881f13597dSJung-uk Kim xor $s1,$t1 # t1^=key[1] 891f13597dSJung-uk Kim movz `&hi("$t0")`,$i0 # (t0>>8)&0xff 901f13597dSJung-uk Kim movz `&lo("$t1")`,$i1 # (t1>>0)&0xff 911f13597dSJung-uk Kim mov $SBOX3_3033($Tbl,$i0,8),$t3 # t3=SBOX3_3033[0] 921f13597dSJung-uk Kim mov $SBOX1_1110($Tbl,$i1,8),$t2 # t2=SBOX1_1110[1] 931f13597dSJung-uk Kim movz `&lo("$t0")`,$i0 # (t0>>0)&0xff 941f13597dSJung-uk Kim shr \$16,$t0 951f13597dSJung-uk Kim movz `&hi("$t1")`,$i1 # (t1>>8)&0xff 961f13597dSJung-uk Kim xor $SBOX4_4404($Tbl,$i0,8),$t3 # t3^=SBOX4_4404[0] 971f13597dSJung-uk Kim shr \$16,$t1 981f13597dSJung-uk Kim xor $SBOX4_4404($Tbl,$i1,8),$t2 # t2^=SBOX4_4404[1] 991f13597dSJung-uk Kim movz `&hi("$t0")`,$i0 # (t0>>24)&0xff 1001f13597dSJung-uk Kim movz `&lo("$t1")`,$i1 # (t1>>16)&0xff 1011f13597dSJung-uk Kim xor $SBOX1_1110($Tbl,$i0,8),$t3 # t3^=SBOX1_1110[0] 1021f13597dSJung-uk Kim xor $SBOX3_3033($Tbl,$i1,8),$t2 # t2^=SBOX3_3033[1] 1031f13597dSJung-uk Kim movz `&lo("$t0")`,$i0 # (t0>>16)&0xff 1041f13597dSJung-uk Kim movz `&hi("$t1")`,$i1 # (t1>>24)&0xff 1051f13597dSJung-uk Kim xor $SBOX2_0222($Tbl,$i0,8),$t3 # t3^=SBOX2_0222[0] 1061f13597dSJung-uk Kim xor $SBOX2_0222($Tbl,$i1,8),$t2 # t2^=SBOX2_0222[1] 1071f13597dSJung-uk Kim mov `$seed+($i+1)*$scale`($key),$t1 # prefetch key[i+1] 1081f13597dSJung-uk Kim mov `$seed+($i+1)*$scale+4`($key),$t0 1091f13597dSJung-uk Kim xor $t3,$t2 # t2^=t3 1101f13597dSJung-uk Kim ror \$8,$t3 # t3=RightRotate(t3,8) 1111f13597dSJung-uk Kim xor $t2,$s2 1121f13597dSJung-uk Kim xor $t2,$s3 1131f13597dSJung-uk Kim xor $t3,$s3 1141f13597dSJung-uk Kim___ 1151f13597dSJung-uk Kim} 1161f13597dSJung-uk Kim 1171f13597dSJung-uk Kim# void Camellia_EncryptBlock_Rounds( 1181f13597dSJung-uk Kim# int grandRounds, 1191f13597dSJung-uk Kim# const Byte plaintext[], 1201f13597dSJung-uk Kim# const KEY_TABLE_TYPE keyTable, 1211f13597dSJung-uk Kim# Byte ciphertext[]) 1221f13597dSJung-uk Kim$code=<<___; 1231f13597dSJung-uk Kim.text 1241f13597dSJung-uk Kim 1251f13597dSJung-uk Kim# V1.x API 1261f13597dSJung-uk Kim.globl Camellia_EncryptBlock 1271f13597dSJung-uk Kim.type Camellia_EncryptBlock,\@abi-omnipotent 1281f13597dSJung-uk Kim.align 16 1291f13597dSJung-uk KimCamellia_EncryptBlock: 13017f01e99SJung-uk Kim.cfi_startproc 1311f13597dSJung-uk Kim movl \$128,%eax 1321f13597dSJung-uk Kim subl $arg0d,%eax 1331f13597dSJung-uk Kim movl \$3,$arg0d 1341f13597dSJung-uk Kim adcl \$0,$arg0d # keyBitLength==128?3:4 1351f13597dSJung-uk Kim jmp .Lenc_rounds 13617f01e99SJung-uk Kim.cfi_endproc 1371f13597dSJung-uk Kim.size Camellia_EncryptBlock,.-Camellia_EncryptBlock 1381f13597dSJung-uk Kim# V2 1391f13597dSJung-uk Kim.globl Camellia_EncryptBlock_Rounds 1401f13597dSJung-uk Kim.type Camellia_EncryptBlock_Rounds,\@function,4 1411f13597dSJung-uk Kim.align 16 1421f13597dSJung-uk Kim.Lenc_rounds: 1431f13597dSJung-uk KimCamellia_EncryptBlock_Rounds: 144e71b7053SJung-uk Kim.cfi_startproc 1451f13597dSJung-uk Kim push %rbx 146e71b7053SJung-uk Kim.cfi_push %rbx 1471f13597dSJung-uk Kim push %rbp 148e71b7053SJung-uk Kim.cfi_push %rbp 1491f13597dSJung-uk Kim push %r13 150e71b7053SJung-uk Kim.cfi_push %r13 1511f13597dSJung-uk Kim push %r14 152e71b7053SJung-uk Kim.cfi_push %r14 1531f13597dSJung-uk Kim push %r15 154e71b7053SJung-uk Kim.cfi_push %r15 1551f13597dSJung-uk Kim.Lenc_prologue: 1561f13597dSJung-uk Kim 1571f13597dSJung-uk Kim #mov %rsi,$inp # put away arguments 1581f13597dSJung-uk Kim mov %rcx,$out 1591f13597dSJung-uk Kim mov %rdx,$key 1601f13597dSJung-uk Kim 1611f13597dSJung-uk Kim shl \$6,%edi # process grandRounds 1621f13597dSJung-uk Kim lea .LCamellia_SBOX(%rip),$Tbl 1631f13597dSJung-uk Kim lea ($key,%rdi),$keyend 1641f13597dSJung-uk Kim 1651f13597dSJung-uk Kim mov 0(%rsi),@S[0] # load plaintext 1661f13597dSJung-uk Kim mov 4(%rsi),@S[1] 1671f13597dSJung-uk Kim mov 8(%rsi),@S[2] 1681f13597dSJung-uk Kim bswap @S[0] 1691f13597dSJung-uk Kim mov 12(%rsi),@S[3] 1701f13597dSJung-uk Kim bswap @S[1] 1711f13597dSJung-uk Kim bswap @S[2] 1721f13597dSJung-uk Kim bswap @S[3] 1731f13597dSJung-uk Kim 1741f13597dSJung-uk Kim call _x86_64_Camellia_encrypt 1751f13597dSJung-uk Kim 1761f13597dSJung-uk Kim bswap @S[0] 1771f13597dSJung-uk Kim bswap @S[1] 1781f13597dSJung-uk Kim bswap @S[2] 1791f13597dSJung-uk Kim mov @S[0],0($out) 1801f13597dSJung-uk Kim bswap @S[3] 1811f13597dSJung-uk Kim mov @S[1],4($out) 1821f13597dSJung-uk Kim mov @S[2],8($out) 1831f13597dSJung-uk Kim mov @S[3],12($out) 1841f13597dSJung-uk Kim 1851f13597dSJung-uk Kim mov 0(%rsp),%r15 186e71b7053SJung-uk Kim.cfi_restore %r15 1871f13597dSJung-uk Kim mov 8(%rsp),%r14 188e71b7053SJung-uk Kim.cfi_restore %r14 1891f13597dSJung-uk Kim mov 16(%rsp),%r13 190e71b7053SJung-uk Kim.cfi_restore %r13 1911f13597dSJung-uk Kim mov 24(%rsp),%rbp 192e71b7053SJung-uk Kim.cfi_restore %rbp 1931f13597dSJung-uk Kim mov 32(%rsp),%rbx 194e71b7053SJung-uk Kim.cfi_restore %rbx 1951f13597dSJung-uk Kim lea 40(%rsp),%rsp 196e71b7053SJung-uk Kim.cfi_adjust_cfa_offset -40 1971f13597dSJung-uk Kim.Lenc_epilogue: 1981f13597dSJung-uk Kim ret 199e71b7053SJung-uk Kim.cfi_endproc 2001f13597dSJung-uk Kim.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds 2011f13597dSJung-uk Kim 2021f13597dSJung-uk Kim.type _x86_64_Camellia_encrypt,\@abi-omnipotent 2031f13597dSJung-uk Kim.align 16 2041f13597dSJung-uk Kim_x86_64_Camellia_encrypt: 20517f01e99SJung-uk Kim.cfi_startproc 2061f13597dSJung-uk Kim xor 0($key),@S[1] 2071f13597dSJung-uk Kim xor 4($key),@S[0] # ^=key[0-3] 2081f13597dSJung-uk Kim xor 8($key),@S[3] 2091f13597dSJung-uk Kim xor 12($key),@S[2] 2101f13597dSJung-uk Kim.align 16 2111f13597dSJung-uk Kim.Leloop: 2121f13597dSJung-uk Kim mov 16($key),$t1 # prefetch key[4-5] 2131f13597dSJung-uk Kim mov 20($key),$t0 2141f13597dSJung-uk Kim 2151f13597dSJung-uk Kim___ 2161f13597dSJung-uk Kim for ($i=0;$i<6;$i++) { Camellia_Feistel($i,16); } 2171f13597dSJung-uk Kim$code.=<<___; 2181f13597dSJung-uk Kim lea 16*4($key),$key 2191f13597dSJung-uk Kim cmp $keyend,$key 2201f13597dSJung-uk Kim mov 8($key),$t3 # prefetch key[2-3] 2211f13597dSJung-uk Kim mov 12($key),$t2 2221f13597dSJung-uk Kim je .Ledone 2231f13597dSJung-uk Kim 2241f13597dSJung-uk Kim and @S[0],$t0 2251f13597dSJung-uk Kim or @S[3],$t3 2261f13597dSJung-uk Kim rol \$1,$t0 2271f13597dSJung-uk Kim xor $t3,@S[2] # s2^=s3|key[3]; 2281f13597dSJung-uk Kim xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); 2291f13597dSJung-uk Kim and @S[2],$t2 2301f13597dSJung-uk Kim or @S[1],$t1 2311f13597dSJung-uk Kim rol \$1,$t2 2321f13597dSJung-uk Kim xor $t1,@S[0] # s0^=s1|key[1]; 2331f13597dSJung-uk Kim xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); 2341f13597dSJung-uk Kim jmp .Leloop 2351f13597dSJung-uk Kim 2361f13597dSJung-uk Kim.align 16 2371f13597dSJung-uk Kim.Ledone: 2381f13597dSJung-uk Kim xor @S[2],$t0 # SwapHalf 2391f13597dSJung-uk Kim xor @S[3],$t1 2401f13597dSJung-uk Kim xor @S[0],$t2 2411f13597dSJung-uk Kim xor @S[1],$t3 2421f13597dSJung-uk Kim 2431f13597dSJung-uk Kim mov $t0,@S[0] 2441f13597dSJung-uk Kim mov $t1,@S[1] 2451f13597dSJung-uk Kim mov $t2,@S[2] 2461f13597dSJung-uk Kim mov $t3,@S[3] 2471f13597dSJung-uk Kim 2481f13597dSJung-uk Kim .byte 0xf3,0xc3 # rep ret 24917f01e99SJung-uk Kim.cfi_endproc 2501f13597dSJung-uk Kim.size _x86_64_Camellia_encrypt,.-_x86_64_Camellia_encrypt 2511f13597dSJung-uk Kim 2521f13597dSJung-uk Kim# V1.x API 2531f13597dSJung-uk Kim.globl Camellia_DecryptBlock 2541f13597dSJung-uk Kim.type Camellia_DecryptBlock,\@abi-omnipotent 2551f13597dSJung-uk Kim.align 16 2561f13597dSJung-uk KimCamellia_DecryptBlock: 25717f01e99SJung-uk Kim.cfi_startproc 2581f13597dSJung-uk Kim movl \$128,%eax 2591f13597dSJung-uk Kim subl $arg0d,%eax 2601f13597dSJung-uk Kim movl \$3,$arg0d 2611f13597dSJung-uk Kim adcl \$0,$arg0d # keyBitLength==128?3:4 2621f13597dSJung-uk Kim jmp .Ldec_rounds 26317f01e99SJung-uk Kim.cfi_endproc 2641f13597dSJung-uk Kim.size Camellia_DecryptBlock,.-Camellia_DecryptBlock 2651f13597dSJung-uk Kim# V2 2661f13597dSJung-uk Kim.globl Camellia_DecryptBlock_Rounds 2671f13597dSJung-uk Kim.type Camellia_DecryptBlock_Rounds,\@function,4 2681f13597dSJung-uk Kim.align 16 2691f13597dSJung-uk Kim.Ldec_rounds: 2701f13597dSJung-uk KimCamellia_DecryptBlock_Rounds: 271e71b7053SJung-uk Kim.cfi_startproc 2721f13597dSJung-uk Kim push %rbx 273e71b7053SJung-uk Kim.cfi_push %rbx 2741f13597dSJung-uk Kim push %rbp 275e71b7053SJung-uk Kim.cfi_push %rbp 2761f13597dSJung-uk Kim push %r13 277e71b7053SJung-uk Kim.cfi_push %r13 2781f13597dSJung-uk Kim push %r14 279e71b7053SJung-uk Kim.cfi_push %r14 2801f13597dSJung-uk Kim push %r15 281e71b7053SJung-uk Kim.cfi_push %r15 2821f13597dSJung-uk Kim.Ldec_prologue: 2831f13597dSJung-uk Kim 2841f13597dSJung-uk Kim #mov %rsi,$inp # put away arguments 2851f13597dSJung-uk Kim mov %rcx,$out 2861f13597dSJung-uk Kim mov %rdx,$keyend 2871f13597dSJung-uk Kim 2881f13597dSJung-uk Kim shl \$6,%edi # process grandRounds 2891f13597dSJung-uk Kim lea .LCamellia_SBOX(%rip),$Tbl 2901f13597dSJung-uk Kim lea ($keyend,%rdi),$key 2911f13597dSJung-uk Kim 2921f13597dSJung-uk Kim mov 0(%rsi),@S[0] # load plaintext 2931f13597dSJung-uk Kim mov 4(%rsi),@S[1] 2941f13597dSJung-uk Kim mov 8(%rsi),@S[2] 2951f13597dSJung-uk Kim bswap @S[0] 2961f13597dSJung-uk Kim mov 12(%rsi),@S[3] 2971f13597dSJung-uk Kim bswap @S[1] 2981f13597dSJung-uk Kim bswap @S[2] 2991f13597dSJung-uk Kim bswap @S[3] 3001f13597dSJung-uk Kim 3011f13597dSJung-uk Kim call _x86_64_Camellia_decrypt 3021f13597dSJung-uk Kim 3031f13597dSJung-uk Kim bswap @S[0] 3041f13597dSJung-uk Kim bswap @S[1] 3051f13597dSJung-uk Kim bswap @S[2] 3061f13597dSJung-uk Kim mov @S[0],0($out) 3071f13597dSJung-uk Kim bswap @S[3] 3081f13597dSJung-uk Kim mov @S[1],4($out) 3091f13597dSJung-uk Kim mov @S[2],8($out) 3101f13597dSJung-uk Kim mov @S[3],12($out) 3111f13597dSJung-uk Kim 3121f13597dSJung-uk Kim mov 0(%rsp),%r15 313e71b7053SJung-uk Kim.cfi_restore %r15 3141f13597dSJung-uk Kim mov 8(%rsp),%r14 315e71b7053SJung-uk Kim.cfi_restore %r14 3161f13597dSJung-uk Kim mov 16(%rsp),%r13 317e71b7053SJung-uk Kim.cfi_restore %r13 3181f13597dSJung-uk Kim mov 24(%rsp),%rbp 319e71b7053SJung-uk Kim.cfi_restore %rbp 3201f13597dSJung-uk Kim mov 32(%rsp),%rbx 321e71b7053SJung-uk Kim.cfi_restore %rbx 3221f13597dSJung-uk Kim lea 40(%rsp),%rsp 323e71b7053SJung-uk Kim.cfi_adjust_cfa_offset -40 3241f13597dSJung-uk Kim.Ldec_epilogue: 3251f13597dSJung-uk Kim ret 326e71b7053SJung-uk Kim.cfi_endproc 3271f13597dSJung-uk Kim.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds 3281f13597dSJung-uk Kim 3291f13597dSJung-uk Kim.type _x86_64_Camellia_decrypt,\@abi-omnipotent 3301f13597dSJung-uk Kim.align 16 3311f13597dSJung-uk Kim_x86_64_Camellia_decrypt: 33217f01e99SJung-uk Kim.cfi_startproc 3331f13597dSJung-uk Kim xor 0($key),@S[1] 3341f13597dSJung-uk Kim xor 4($key),@S[0] # ^=key[0-3] 3351f13597dSJung-uk Kim xor 8($key),@S[3] 3361f13597dSJung-uk Kim xor 12($key),@S[2] 3371f13597dSJung-uk Kim.align 16 3381f13597dSJung-uk Kim.Ldloop: 3391f13597dSJung-uk Kim mov -8($key),$t1 # prefetch key[4-5] 3401f13597dSJung-uk Kim mov -4($key),$t0 3411f13597dSJung-uk Kim 3421f13597dSJung-uk Kim___ 3431f13597dSJung-uk Kim for ($i=0;$i<6;$i++) { Camellia_Feistel($i,-8); } 3441f13597dSJung-uk Kim$code.=<<___; 3451f13597dSJung-uk Kim lea -16*4($key),$key 3461f13597dSJung-uk Kim cmp $keyend,$key 3471f13597dSJung-uk Kim mov 0($key),$t3 # prefetch key[2-3] 3481f13597dSJung-uk Kim mov 4($key),$t2 3491f13597dSJung-uk Kim je .Lddone 3501f13597dSJung-uk Kim 3511f13597dSJung-uk Kim and @S[0],$t0 3521f13597dSJung-uk Kim or @S[3],$t3 3531f13597dSJung-uk Kim rol \$1,$t0 3541f13597dSJung-uk Kim xor $t3,@S[2] # s2^=s3|key[3]; 3551f13597dSJung-uk Kim xor $t0,@S[1] # s1^=LeftRotate(s0&key[0],1); 3561f13597dSJung-uk Kim and @S[2],$t2 3571f13597dSJung-uk Kim or @S[1],$t1 3581f13597dSJung-uk Kim rol \$1,$t2 3591f13597dSJung-uk Kim xor $t1,@S[0] # s0^=s1|key[1]; 3601f13597dSJung-uk Kim xor $t2,@S[3] # s3^=LeftRotate(s2&key[2],1); 3611f13597dSJung-uk Kim 3621f13597dSJung-uk Kim jmp .Ldloop 3631f13597dSJung-uk Kim 3641f13597dSJung-uk Kim.align 16 3651f13597dSJung-uk Kim.Lddone: 3661f13597dSJung-uk Kim xor @S[2],$t2 3671f13597dSJung-uk Kim xor @S[3],$t3 3681f13597dSJung-uk Kim xor @S[0],$t0 3691f13597dSJung-uk Kim xor @S[1],$t1 3701f13597dSJung-uk Kim 3711f13597dSJung-uk Kim mov $t2,@S[0] # SwapHalf 3721f13597dSJung-uk Kim mov $t3,@S[1] 3731f13597dSJung-uk Kim mov $t0,@S[2] 3741f13597dSJung-uk Kim mov $t1,@S[3] 3751f13597dSJung-uk Kim 3761f13597dSJung-uk Kim .byte 0xf3,0xc3 # rep ret 37717f01e99SJung-uk Kim.cfi_endproc 3781f13597dSJung-uk Kim.size _x86_64_Camellia_decrypt,.-_x86_64_Camellia_decrypt 3791f13597dSJung-uk Kim___ 3801f13597dSJung-uk Kim 3811f13597dSJung-uk Kimsub _saveround { 3821f13597dSJung-uk Kimmy ($rnd,$key,@T)=@_; 3831f13597dSJung-uk Kimmy $bias=int(@T[0])?shift(@T):0; 3841f13597dSJung-uk Kim 3851f13597dSJung-uk Kim if ($#T==3) { 3861f13597dSJung-uk Kim $code.=<<___; 3871f13597dSJung-uk Kim mov @T[1],`$bias+$rnd*8+0`($key) 3881f13597dSJung-uk Kim mov @T[0],`$bias+$rnd*8+4`($key) 3891f13597dSJung-uk Kim mov @T[3],`$bias+$rnd*8+8`($key) 3901f13597dSJung-uk Kim mov @T[2],`$bias+$rnd*8+12`($key) 3911f13597dSJung-uk Kim___ 3921f13597dSJung-uk Kim } else { 3931f13597dSJung-uk Kim $code.=" mov @T[0],`$bias+$rnd*8+0`($key)\n"; 3941f13597dSJung-uk Kim $code.=" mov @T[1],`$bias+$rnd*8+8`($key)\n" if ($#T>=1); 3951f13597dSJung-uk Kim } 3961f13597dSJung-uk Kim} 3971f13597dSJung-uk Kim 3981f13597dSJung-uk Kimsub _loadround { 3991f13597dSJung-uk Kimmy ($rnd,$key,@T)=@_; 4001f13597dSJung-uk Kimmy $bias=int(@T[0])?shift(@T):0; 4011f13597dSJung-uk Kim 4021f13597dSJung-uk Kim$code.=" mov `$bias+$rnd*8+0`($key),@T[0]\n"; 4031f13597dSJung-uk Kim$code.=" mov `$bias+$rnd*8+8`($key),@T[1]\n" if ($#T>=1); 4041f13597dSJung-uk Kim} 4051f13597dSJung-uk Kim 4061f13597dSJung-uk Kim# shld is very slow on Intel EM64T family. Even on AMD it limits 4071f13597dSJung-uk Kim# instruction decode rate [because it's VectorPath] and consequently 4081f13597dSJung-uk Kim# performance... 4091f13597dSJung-uk Kimsub __rotl128 { 4101f13597dSJung-uk Kimmy ($i0,$i1,$rot)=@_; 4111f13597dSJung-uk Kim 4121f13597dSJung-uk Kim if ($rot) { 4131f13597dSJung-uk Kim $code.=<<___; 4141f13597dSJung-uk Kim mov $i0,%r11 4151f13597dSJung-uk Kim shld \$$rot,$i1,$i0 4161f13597dSJung-uk Kim shld \$$rot,%r11,$i1 4171f13597dSJung-uk Kim___ 4181f13597dSJung-uk Kim } 4191f13597dSJung-uk Kim} 4201f13597dSJung-uk Kim 4211f13597dSJung-uk Kim# ... Implementing 128-bit rotate without shld gives 80% better 4221f13597dSJung-uk Kim# performance EM64T, +15% on AMD64 and only ~7% degradation on 4231f13597dSJung-uk Kim# Core2. This is therefore preferred. 4241f13597dSJung-uk Kimsub _rotl128 { 4251f13597dSJung-uk Kimmy ($i0,$i1,$rot)=@_; 4261f13597dSJung-uk Kim 4271f13597dSJung-uk Kim if ($rot) { 4281f13597dSJung-uk Kim $code.=<<___; 4291f13597dSJung-uk Kim mov $i0,%r11 4301f13597dSJung-uk Kim shl \$$rot,$i0 4311f13597dSJung-uk Kim mov $i1,%r9 4321f13597dSJung-uk Kim shr \$`64-$rot`,%r9 4331f13597dSJung-uk Kim shr \$`64-$rot`,%r11 4341f13597dSJung-uk Kim or %r9,$i0 4351f13597dSJung-uk Kim shl \$$rot,$i1 4361f13597dSJung-uk Kim or %r11,$i1 4371f13597dSJung-uk Kim___ 4381f13597dSJung-uk Kim } 4391f13597dSJung-uk Kim} 4401f13597dSJung-uk Kim 4411f13597dSJung-uk Kim{ my $step=0; 4421f13597dSJung-uk Kim 4431f13597dSJung-uk Kim$code.=<<___; 4441f13597dSJung-uk Kim.globl Camellia_Ekeygen 4451f13597dSJung-uk Kim.type Camellia_Ekeygen,\@function,3 4461f13597dSJung-uk Kim.align 16 4471f13597dSJung-uk KimCamellia_Ekeygen: 448e71b7053SJung-uk Kim.cfi_startproc 4491f13597dSJung-uk Kim push %rbx 450e71b7053SJung-uk Kim.cfi_push %rbx 4511f13597dSJung-uk Kim push %rbp 452e71b7053SJung-uk Kim.cfi_push %rbp 4531f13597dSJung-uk Kim push %r13 454e71b7053SJung-uk Kim.cfi_push %r13 4551f13597dSJung-uk Kim push %r14 456e71b7053SJung-uk Kim.cfi_push %r14 4571f13597dSJung-uk Kim push %r15 458e71b7053SJung-uk Kim.cfi_push %r15 4591f13597dSJung-uk Kim.Lkey_prologue: 4601f13597dSJung-uk Kim 4617bded2dbSJung-uk Kim mov %edi,${keyend}d # put away arguments, keyBitLength 4621f13597dSJung-uk Kim mov %rdx,$out # keyTable 4631f13597dSJung-uk Kim 4641f13597dSJung-uk Kim mov 0(%rsi),@S[0] # load 0-127 bits 4651f13597dSJung-uk Kim mov 4(%rsi),@S[1] 4661f13597dSJung-uk Kim mov 8(%rsi),@S[2] 4671f13597dSJung-uk Kim mov 12(%rsi),@S[3] 4681f13597dSJung-uk Kim 4691f13597dSJung-uk Kim bswap @S[0] 4701f13597dSJung-uk Kim bswap @S[1] 4711f13597dSJung-uk Kim bswap @S[2] 4721f13597dSJung-uk Kim bswap @S[3] 4731f13597dSJung-uk Kim___ 4741f13597dSJung-uk Kim &_saveround (0,$out,@S); # KL<<<0 4751f13597dSJung-uk Kim$code.=<<___; 4761f13597dSJung-uk Kim cmp \$128,$keyend # check keyBitLength 4771f13597dSJung-uk Kim je .L1st128 4781f13597dSJung-uk Kim 4791f13597dSJung-uk Kim mov 16(%rsi),@S[0] # load 128-191 bits 4801f13597dSJung-uk Kim mov 20(%rsi),@S[1] 4811f13597dSJung-uk Kim cmp \$192,$keyend 4821f13597dSJung-uk Kim je .L1st192 4831f13597dSJung-uk Kim mov 24(%rsi),@S[2] # load 192-255 bits 4841f13597dSJung-uk Kim mov 28(%rsi),@S[3] 4851f13597dSJung-uk Kim jmp .L1st256 4861f13597dSJung-uk Kim.L1st192: 4871f13597dSJung-uk Kim mov @S[0],@S[2] 4881f13597dSJung-uk Kim mov @S[1],@S[3] 4891f13597dSJung-uk Kim not @S[2] 4901f13597dSJung-uk Kim not @S[3] 4911f13597dSJung-uk Kim.L1st256: 4921f13597dSJung-uk Kim bswap @S[0] 4931f13597dSJung-uk Kim bswap @S[1] 4941f13597dSJung-uk Kim bswap @S[2] 4951f13597dSJung-uk Kim bswap @S[3] 4961f13597dSJung-uk Kim___ 4971f13597dSJung-uk Kim &_saveround (4,$out,@S); # temp storage for KR! 4981f13597dSJung-uk Kim$code.=<<___; 4991f13597dSJung-uk Kim xor 0($out),@S[1] # KR^KL 5001f13597dSJung-uk Kim xor 4($out),@S[0] 5011f13597dSJung-uk Kim xor 8($out),@S[3] 5021f13597dSJung-uk Kim xor 12($out),@S[2] 5031f13597dSJung-uk Kim 5041f13597dSJung-uk Kim.L1st128: 5051f13597dSJung-uk Kim lea .LCamellia_SIGMA(%rip),$key 5061f13597dSJung-uk Kim lea .LCamellia_SBOX(%rip),$Tbl 5071f13597dSJung-uk Kim 5081f13597dSJung-uk Kim mov 0($key),$t1 5091f13597dSJung-uk Kim mov 4($key),$t0 5101f13597dSJung-uk Kim___ 5111f13597dSJung-uk Kim &Camellia_Feistel($step++); 5121f13597dSJung-uk Kim &Camellia_Feistel($step++); 5131f13597dSJung-uk Kim$code.=<<___; 5141f13597dSJung-uk Kim xor 0($out),@S[1] # ^KL 5151f13597dSJung-uk Kim xor 4($out),@S[0] 5161f13597dSJung-uk Kim xor 8($out),@S[3] 5171f13597dSJung-uk Kim xor 12($out),@S[2] 5181f13597dSJung-uk Kim___ 5191f13597dSJung-uk Kim &Camellia_Feistel($step++); 5201f13597dSJung-uk Kim &Camellia_Feistel($step++); 5211f13597dSJung-uk Kim$code.=<<___; 5221f13597dSJung-uk Kim cmp \$128,$keyend 5231f13597dSJung-uk Kim jne .L2nd256 5241f13597dSJung-uk Kim 5251f13597dSJung-uk Kim lea 128($out),$out # size optimization 5261f13597dSJung-uk Kim shl \$32,%r8 # @S[0]|| 5271f13597dSJung-uk Kim shl \$32,%r10 # @S[2]|| 5281f13597dSJung-uk Kim or %r9,%r8 # ||@S[1] 5291f13597dSJung-uk Kim or %r11,%r10 # ||@S[3] 5301f13597dSJung-uk Kim___ 5311f13597dSJung-uk Kim &_loadround (0,$out,-128,"%rax","%rbx"); # KL 5321f13597dSJung-uk Kim &_saveround (2,$out,-128,"%r8","%r10"); # KA<<<0 5331f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",15); 5341f13597dSJung-uk Kim &_saveround (4,$out,-128,"%rax","%rbx"); # KL<<<15 5351f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",15); 5361f13597dSJung-uk Kim &_saveround (6,$out,-128,"%r8","%r10"); # KA<<<15 5371f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",15); # 15+15=30 5381f13597dSJung-uk Kim &_saveround (8,$out,-128,"%r8","%r10"); # KA<<<30 5391f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",30); # 15+30=45 5401f13597dSJung-uk Kim &_saveround (10,$out,-128,"%rax","%rbx"); # KL<<<45 5411f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",15); # 30+15=45 5421f13597dSJung-uk Kim &_saveround (12,$out,-128,"%r8"); # KA<<<45 5431f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",15); # 45+15=60 5441f13597dSJung-uk Kim &_saveround (13,$out,-128,"%rbx"); # KL<<<60 5451f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",15); # 45+15=60 5461f13597dSJung-uk Kim &_saveround (14,$out,-128,"%r8","%r10"); # KA<<<60 5471f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",17); # 60+17=77 5481f13597dSJung-uk Kim &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<77 5491f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",17); # 77+17=94 5501f13597dSJung-uk Kim &_saveround (18,$out,-128,"%rax","%rbx"); # KL<<<94 5511f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",34); # 60+34=94 5521f13597dSJung-uk Kim &_saveround (20,$out,-128,"%r8","%r10"); # KA<<<94 5531f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",17); # 94+17=111 5541f13597dSJung-uk Kim &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<111 5551f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",17); # 94+17=111 5561f13597dSJung-uk Kim &_saveround (24,$out,-128,"%r8","%r10"); # KA<<<111 5571f13597dSJung-uk Kim$code.=<<___; 5581f13597dSJung-uk Kim mov \$3,%eax 5591f13597dSJung-uk Kim jmp .Ldone 5601f13597dSJung-uk Kim.align 16 5611f13597dSJung-uk Kim.L2nd256: 5621f13597dSJung-uk Kim___ 5631f13597dSJung-uk Kim &_saveround (6,$out,@S); # temp storage for KA! 5641f13597dSJung-uk Kim$code.=<<___; 5651f13597dSJung-uk Kim xor `4*8+0`($out),@S[1] # KA^KR 5661f13597dSJung-uk Kim xor `4*8+4`($out),@S[0] 5671f13597dSJung-uk Kim xor `5*8+0`($out),@S[3] 5681f13597dSJung-uk Kim xor `5*8+4`($out),@S[2] 5691f13597dSJung-uk Kim___ 5701f13597dSJung-uk Kim &Camellia_Feistel($step++); 5711f13597dSJung-uk Kim &Camellia_Feistel($step++); 5721f13597dSJung-uk Kim 5731f13597dSJung-uk Kim &_loadround (0,$out,"%rax","%rbx"); # KL 5741f13597dSJung-uk Kim &_loadround (4,$out,"%rcx","%rdx"); # KR 5751f13597dSJung-uk Kim &_loadround (6,$out,"%r14","%r15"); # KA 5761f13597dSJung-uk Kim$code.=<<___; 5771f13597dSJung-uk Kim lea 128($out),$out # size optimization 5781f13597dSJung-uk Kim shl \$32,%r8 # @S[0]|| 5791f13597dSJung-uk Kim shl \$32,%r10 # @S[2]|| 5801f13597dSJung-uk Kim or %r9,%r8 # ||@S[1] 5811f13597dSJung-uk Kim or %r11,%r10 # ||@S[3] 5821f13597dSJung-uk Kim___ 5831f13597dSJung-uk Kim &_saveround (2,$out,-128,"%r8","%r10"); # KB<<<0 5841f13597dSJung-uk Kim &_rotl128 ("%rcx","%rdx",15); 5851f13597dSJung-uk Kim &_saveround (4,$out,-128,"%rcx","%rdx"); # KR<<<15 5861f13597dSJung-uk Kim &_rotl128 ("%r14","%r15",15); 5871f13597dSJung-uk Kim &_saveround (6,$out,-128,"%r14","%r15"); # KA<<<15 5881f13597dSJung-uk Kim &_rotl128 ("%rcx","%rdx",15); # 15+15=30 5891f13597dSJung-uk Kim &_saveround (8,$out,-128,"%rcx","%rdx"); # KR<<<30 5901f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",30); 5911f13597dSJung-uk Kim &_saveround (10,$out,-128,"%r8","%r10"); # KB<<<30 5921f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",45); 5931f13597dSJung-uk Kim &_saveround (12,$out,-128,"%rax","%rbx"); # KL<<<45 5941f13597dSJung-uk Kim &_rotl128 ("%r14","%r15",30); # 15+30=45 5951f13597dSJung-uk Kim &_saveround (14,$out,-128,"%r14","%r15"); # KA<<<45 5961f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",15); # 45+15=60 5971f13597dSJung-uk Kim &_saveround (16,$out,-128,"%rax","%rbx"); # KL<<<60 5981f13597dSJung-uk Kim &_rotl128 ("%rcx","%rdx",30); # 30+30=60 5991f13597dSJung-uk Kim &_saveround (18,$out,-128,"%rcx","%rdx"); # KR<<<60 6001f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",30); # 30+30=60 6011f13597dSJung-uk Kim &_saveround (20,$out,-128,"%r8","%r10"); # KB<<<60 6021f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",17); # 60+17=77 6031f13597dSJung-uk Kim &_saveround (22,$out,-128,"%rax","%rbx"); # KL<<<77 6041f13597dSJung-uk Kim &_rotl128 ("%r14","%r15",32); # 45+32=77 6051f13597dSJung-uk Kim &_saveround (24,$out,-128,"%r14","%r15"); # KA<<<77 6061f13597dSJung-uk Kim &_rotl128 ("%rcx","%rdx",34); # 60+34=94 6071f13597dSJung-uk Kim &_saveround (26,$out,-128,"%rcx","%rdx"); # KR<<<94 6081f13597dSJung-uk Kim &_rotl128 ("%r14","%r15",17); # 77+17=94 6091f13597dSJung-uk Kim &_saveround (28,$out,-128,"%r14","%r15"); # KA<<<77 6101f13597dSJung-uk Kim &_rotl128 ("%rax","%rbx",34); # 77+34=111 6111f13597dSJung-uk Kim &_saveround (30,$out,-128,"%rax","%rbx"); # KL<<<111 6121f13597dSJung-uk Kim &_rotl128 ("%r8","%r10",51); # 60+51=111 6131f13597dSJung-uk Kim &_saveround (32,$out,-128,"%r8","%r10"); # KB<<<111 6141f13597dSJung-uk Kim$code.=<<___; 6151f13597dSJung-uk Kim mov \$4,%eax 6161f13597dSJung-uk Kim.Ldone: 6171f13597dSJung-uk Kim mov 0(%rsp),%r15 618e71b7053SJung-uk Kim.cfi_restore %r15 6191f13597dSJung-uk Kim mov 8(%rsp),%r14 620e71b7053SJung-uk Kim.cfi_restore %r14 6211f13597dSJung-uk Kim mov 16(%rsp),%r13 622e71b7053SJung-uk Kim.cfi_restore %r13 6231f13597dSJung-uk Kim mov 24(%rsp),%rbp 624e71b7053SJung-uk Kim.cfi_restore %rbp 6251f13597dSJung-uk Kim mov 32(%rsp),%rbx 626e71b7053SJung-uk Kim.cfi_restore %rbx 6271f13597dSJung-uk Kim lea 40(%rsp),%rsp 628e71b7053SJung-uk Kim.cfi_adjust_cfa_offset -40 6291f13597dSJung-uk Kim.Lkey_epilogue: 6301f13597dSJung-uk Kim ret 631e71b7053SJung-uk Kim.cfi_endproc 6321f13597dSJung-uk Kim.size Camellia_Ekeygen,.-Camellia_Ekeygen 6331f13597dSJung-uk Kim___ 6341f13597dSJung-uk Kim} 6351f13597dSJung-uk Kim 6361f13597dSJung-uk Kim@SBOX=( 6371f13597dSJung-uk Kim112,130, 44,236,179, 39,192,229,228,133, 87, 53,234, 12,174, 65, 6381f13597dSJung-uk Kim 35,239,107,147, 69, 25,165, 33,237, 14, 79, 78, 29,101,146,189, 6391f13597dSJung-uk Kim134,184,175,143,124,235, 31,206, 62, 48,220, 95, 94,197, 11, 26, 6401f13597dSJung-uk Kim166,225, 57,202,213, 71, 93, 61,217, 1, 90,214, 81, 86,108, 77, 6411f13597dSJung-uk Kim139, 13,154,102,251,204,176, 45,116, 18, 43, 32,240,177,132,153, 6421f13597dSJung-uk Kim223, 76,203,194, 52,126,118, 5,109,183,169, 49,209, 23, 4,215, 6431f13597dSJung-uk Kim 20, 88, 58, 97,222, 27, 17, 28, 50, 15,156, 22, 83, 24,242, 34, 6441f13597dSJung-uk Kim254, 68,207,178,195,181,122,145, 36, 8,232,168, 96,252,105, 80, 6451f13597dSJung-uk Kim170,208,160,125,161,137, 98,151, 84, 91, 30,149,224,255,100,210, 6461f13597dSJung-uk Kim 16,196, 0, 72,163,247,117,219,138, 3,230,218, 9, 63,221,148, 6471f13597dSJung-uk Kim135, 92,131, 2,205, 74,144, 51,115,103,246,243,157,127,191,226, 6481f13597dSJung-uk Kim 82,155,216, 38,200, 55,198, 59,129,150,111, 75, 19,190, 99, 46, 6491f13597dSJung-uk Kim233,121,167,140,159,110,188,142, 41,245,249,182, 47,253,180, 89, 6501f13597dSJung-uk Kim120,152, 6,106,231, 70,113,186,212, 37,171, 66,136,162,141,250, 6511f13597dSJung-uk Kim114, 7,185, 85,248,238,172, 10, 54, 73, 42,104, 60, 56,241,164, 6521f13597dSJung-uk Kim 64, 40,211,123,187,201, 67,193, 21,227,173,244,119,199,128,158); 6531f13597dSJung-uk Kim 6541f13597dSJung-uk Kimsub S1110 { my $i=shift; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i<<8; sprintf("0x%08x",$i); } 6551f13597dSJung-uk Kimsub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; $i=$i<<24|$i<<16|$i; sprintf("0x%08x",$i); } 6561f13597dSJung-uk Kimsub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; $i=$i<<16|$i<<8|$i; sprintf("0x%08x",$i); } 6571f13597dSJung-uk Kimsub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; $i=$i<<24|$i<<8|$i; sprintf("0x%08x",$i); } 6581f13597dSJung-uk Kim 6591f13597dSJung-uk Kim$code.=<<___; 6601f13597dSJung-uk Kim.align 64 6611f13597dSJung-uk Kim.LCamellia_SIGMA: 6621f13597dSJung-uk Kim.long 0x3bcc908b, 0xa09e667f, 0x4caa73b2, 0xb67ae858 6631f13597dSJung-uk Kim.long 0xe94f82be, 0xc6ef372f, 0xf1d36f1c, 0x54ff53a5 6641f13597dSJung-uk Kim.long 0xde682d1d, 0x10e527fa, 0xb3e6c1fd, 0xb05688c2 6651f13597dSJung-uk Kim.long 0, 0, 0, 0 6661f13597dSJung-uk Kim.LCamellia_SBOX: 6671f13597dSJung-uk Kim___ 6681f13597dSJung-uk Kim# tables are interleaved, remember? 6691f13597dSJung-uk Kimsub data_word { $code.=".long\t".join(',',@_)."\n"; } 6701f13597dSJung-uk Kimfor ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } 6711f13597dSJung-uk Kimfor ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } 6721f13597dSJung-uk Kim 6731f13597dSJung-uk Kim# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, 6741f13597dSJung-uk Kim# size_t length, const CAMELLIA_KEY *key, 6751f13597dSJung-uk Kim# unsigned char *ivp,const int enc); 6761f13597dSJung-uk Kim{ 6771f13597dSJung-uk Kim$_key="0(%rsp)"; 6781f13597dSJung-uk Kim$_end="8(%rsp)"; # inp+len&~15 6791f13597dSJung-uk Kim$_res="16(%rsp)"; # len&15 6801f13597dSJung-uk Kim$ivec="24(%rsp)"; 6811f13597dSJung-uk Kim$_ivp="40(%rsp)"; 6821f13597dSJung-uk Kim$_rsp="48(%rsp)"; 6831f13597dSJung-uk Kim 6841f13597dSJung-uk Kim$code.=<<___; 6851f13597dSJung-uk Kim.globl Camellia_cbc_encrypt 6861f13597dSJung-uk Kim.type Camellia_cbc_encrypt,\@function,6 6871f13597dSJung-uk Kim.align 16 6881f13597dSJung-uk KimCamellia_cbc_encrypt: 689e71b7053SJung-uk Kim.cfi_startproc 690*b077aed3SPierre Pronchery endbranch 6911f13597dSJung-uk Kim cmp \$0,%rdx 6921f13597dSJung-uk Kim je .Lcbc_abort 6931f13597dSJung-uk Kim push %rbx 694e71b7053SJung-uk Kim.cfi_push %rbx 6951f13597dSJung-uk Kim push %rbp 696e71b7053SJung-uk Kim.cfi_push %rbp 6971f13597dSJung-uk Kim push %r12 698e71b7053SJung-uk Kim.cfi_push %r12 6991f13597dSJung-uk Kim push %r13 700e71b7053SJung-uk Kim.cfi_push %r13 7011f13597dSJung-uk Kim push %r14 702e71b7053SJung-uk Kim.cfi_push %r14 7031f13597dSJung-uk Kim push %r15 704e71b7053SJung-uk Kim.cfi_push %r15 7051f13597dSJung-uk Kim.Lcbc_prologue: 7061f13597dSJung-uk Kim 7071f13597dSJung-uk Kim mov %rsp,%rbp 708e71b7053SJung-uk Kim.cfi_def_cfa_register %rbp 7091f13597dSJung-uk Kim sub \$64,%rsp 7101f13597dSJung-uk Kim and \$-64,%rsp 7111f13597dSJung-uk Kim 7121f13597dSJung-uk Kim # place stack frame just "above mod 1024" the key schedule, 7131f13597dSJung-uk Kim # this ensures that cache associativity suffices 7141f13597dSJung-uk Kim lea -64-63(%rcx),%r10 7151f13597dSJung-uk Kim sub %rsp,%r10 7161f13597dSJung-uk Kim neg %r10 7171f13597dSJung-uk Kim and \$0x3C0,%r10 7181f13597dSJung-uk Kim sub %r10,%rsp 7191f13597dSJung-uk Kim #add \$8,%rsp # 8 is reserved for callee's ra 7201f13597dSJung-uk Kim 7211f13597dSJung-uk Kim mov %rdi,$inp # inp argument 7221f13597dSJung-uk Kim mov %rsi,$out # out argument 7231f13597dSJung-uk Kim mov %r8,%rbx # ivp argument 7241f13597dSJung-uk Kim mov %rcx,$key # key argument 7251f13597dSJung-uk Kim mov 272(%rcx),${keyend}d # grandRounds 7261f13597dSJung-uk Kim 7271f13597dSJung-uk Kim mov %r8,$_ivp 7281f13597dSJung-uk Kim mov %rbp,$_rsp 729e71b7053SJung-uk Kim.cfi_cfa_expression $_rsp,deref,+56 7301f13597dSJung-uk Kim 7311f13597dSJung-uk Kim.Lcbc_body: 7321f13597dSJung-uk Kim lea .LCamellia_SBOX(%rip),$Tbl 7331f13597dSJung-uk Kim 7341f13597dSJung-uk Kim mov \$32,%ecx 7351f13597dSJung-uk Kim.align 4 7361f13597dSJung-uk Kim.Lcbc_prefetch_sbox: 7371f13597dSJung-uk Kim mov 0($Tbl),%rax 7381f13597dSJung-uk Kim mov 32($Tbl),%rsi 7391f13597dSJung-uk Kim mov 64($Tbl),%rdi 7401f13597dSJung-uk Kim mov 96($Tbl),%r11 7411f13597dSJung-uk Kim lea 128($Tbl),$Tbl 7421f13597dSJung-uk Kim loop .Lcbc_prefetch_sbox 7431f13597dSJung-uk Kim sub \$4096,$Tbl 7441f13597dSJung-uk Kim shl \$6,$keyend 7451f13597dSJung-uk Kim mov %rdx,%rcx # len argument 7461f13597dSJung-uk Kim lea ($key,$keyend),$keyend 7471f13597dSJung-uk Kim 7481f13597dSJung-uk Kim cmp \$0,%r9d # enc argument 7491f13597dSJung-uk Kim je .LCBC_DECRYPT 7501f13597dSJung-uk Kim 7511f13597dSJung-uk Kim and \$-16,%rdx 7521f13597dSJung-uk Kim and \$15,%rcx # length residue 7531f13597dSJung-uk Kim lea ($inp,%rdx),%rdx 7541f13597dSJung-uk Kim mov $key,$_key 7551f13597dSJung-uk Kim mov %rdx,$_end 7561f13597dSJung-uk Kim mov %rcx,$_res 7571f13597dSJung-uk Kim 7581f13597dSJung-uk Kim cmp $inp,%rdx 7591f13597dSJung-uk Kim mov 0(%rbx),@S[0] # load IV 7601f13597dSJung-uk Kim mov 4(%rbx),@S[1] 7611f13597dSJung-uk Kim mov 8(%rbx),@S[2] 7621f13597dSJung-uk Kim mov 12(%rbx),@S[3] 7631f13597dSJung-uk Kim je .Lcbc_enc_tail 7641f13597dSJung-uk Kim jmp .Lcbc_eloop 7651f13597dSJung-uk Kim 7661f13597dSJung-uk Kim.align 16 7671f13597dSJung-uk Kim.Lcbc_eloop: 7681f13597dSJung-uk Kim xor 0($inp),@S[0] 7691f13597dSJung-uk Kim xor 4($inp),@S[1] 7701f13597dSJung-uk Kim xor 8($inp),@S[2] 7711f13597dSJung-uk Kim bswap @S[0] 7721f13597dSJung-uk Kim xor 12($inp),@S[3] 7731f13597dSJung-uk Kim bswap @S[1] 7741f13597dSJung-uk Kim bswap @S[2] 7751f13597dSJung-uk Kim bswap @S[3] 7761f13597dSJung-uk Kim 7771f13597dSJung-uk Kim call _x86_64_Camellia_encrypt 7781f13597dSJung-uk Kim 7791f13597dSJung-uk Kim mov $_key,$key # "rewind" the key 7801f13597dSJung-uk Kim bswap @S[0] 7811f13597dSJung-uk Kim mov $_end,%rdx 7821f13597dSJung-uk Kim bswap @S[1] 7831f13597dSJung-uk Kim mov $_res,%rcx 7841f13597dSJung-uk Kim bswap @S[2] 7851f13597dSJung-uk Kim mov @S[0],0($out) 7861f13597dSJung-uk Kim bswap @S[3] 7871f13597dSJung-uk Kim mov @S[1],4($out) 7881f13597dSJung-uk Kim mov @S[2],8($out) 7891f13597dSJung-uk Kim lea 16($inp),$inp 7901f13597dSJung-uk Kim mov @S[3],12($out) 7911f13597dSJung-uk Kim cmp %rdx,$inp 7921f13597dSJung-uk Kim lea 16($out),$out 7931f13597dSJung-uk Kim jne .Lcbc_eloop 7941f13597dSJung-uk Kim 7951f13597dSJung-uk Kim cmp \$0,%rcx 7961f13597dSJung-uk Kim jne .Lcbc_enc_tail 7971f13597dSJung-uk Kim 7981f13597dSJung-uk Kim mov $_ivp,$out 7991f13597dSJung-uk Kim mov @S[0],0($out) # write out IV residue 8001f13597dSJung-uk Kim mov @S[1],4($out) 8011f13597dSJung-uk Kim mov @S[2],8($out) 8021f13597dSJung-uk Kim mov @S[3],12($out) 8031f13597dSJung-uk Kim jmp .Lcbc_done 8041f13597dSJung-uk Kim 8051f13597dSJung-uk Kim.align 16 8061f13597dSJung-uk Kim.Lcbc_enc_tail: 8071f13597dSJung-uk Kim xor %rax,%rax 8081f13597dSJung-uk Kim mov %rax,0+$ivec 8091f13597dSJung-uk Kim mov %rax,8+$ivec 8101f13597dSJung-uk Kim mov %rax,$_res 8111f13597dSJung-uk Kim 8121f13597dSJung-uk Kim.Lcbc_enc_pushf: 8131f13597dSJung-uk Kim pushfq 8141f13597dSJung-uk Kim cld 8151f13597dSJung-uk Kim mov $inp,%rsi 8161f13597dSJung-uk Kim lea 8+$ivec,%rdi 8171f13597dSJung-uk Kim .long 0x9066A4F3 # rep movsb 8181f13597dSJung-uk Kim popfq 8191f13597dSJung-uk Kim.Lcbc_enc_popf: 8201f13597dSJung-uk Kim 8211f13597dSJung-uk Kim lea $ivec,$inp 8221f13597dSJung-uk Kim lea 16+$ivec,%rax 8231f13597dSJung-uk Kim mov %rax,$_end 8241f13597dSJung-uk Kim jmp .Lcbc_eloop # one more time 8251f13597dSJung-uk Kim 8261f13597dSJung-uk Kim.align 16 8271f13597dSJung-uk Kim.LCBC_DECRYPT: 8281f13597dSJung-uk Kim xchg $key,$keyend 8291f13597dSJung-uk Kim add \$15,%rdx 8301f13597dSJung-uk Kim and \$15,%rcx # length residue 8311f13597dSJung-uk Kim and \$-16,%rdx 8321f13597dSJung-uk Kim mov $key,$_key 8331f13597dSJung-uk Kim lea ($inp,%rdx),%rdx 8341f13597dSJung-uk Kim mov %rdx,$_end 8351f13597dSJung-uk Kim mov %rcx,$_res 8361f13597dSJung-uk Kim 8371f13597dSJung-uk Kim mov (%rbx),%rax # load IV 8381f13597dSJung-uk Kim mov 8(%rbx),%rbx 8391f13597dSJung-uk Kim jmp .Lcbc_dloop 8401f13597dSJung-uk Kim.align 16 8411f13597dSJung-uk Kim.Lcbc_dloop: 8421f13597dSJung-uk Kim mov 0($inp),@S[0] 8431f13597dSJung-uk Kim mov 4($inp),@S[1] 8441f13597dSJung-uk Kim mov 8($inp),@S[2] 8451f13597dSJung-uk Kim bswap @S[0] 8461f13597dSJung-uk Kim mov 12($inp),@S[3] 8471f13597dSJung-uk Kim bswap @S[1] 8481f13597dSJung-uk Kim mov %rax,0+$ivec # save IV to temporary storage 8491f13597dSJung-uk Kim bswap @S[2] 8501f13597dSJung-uk Kim mov %rbx,8+$ivec 8511f13597dSJung-uk Kim bswap @S[3] 8521f13597dSJung-uk Kim 8531f13597dSJung-uk Kim call _x86_64_Camellia_decrypt 8541f13597dSJung-uk Kim 8551f13597dSJung-uk Kim mov $_key,$key # "rewind" the key 8561f13597dSJung-uk Kim mov $_end,%rdx 8571f13597dSJung-uk Kim mov $_res,%rcx 8581f13597dSJung-uk Kim 8591f13597dSJung-uk Kim bswap @S[0] 8601f13597dSJung-uk Kim mov ($inp),%rax # load IV for next iteration 8611f13597dSJung-uk Kim bswap @S[1] 8621f13597dSJung-uk Kim mov 8($inp),%rbx 8631f13597dSJung-uk Kim bswap @S[2] 8641f13597dSJung-uk Kim xor 0+$ivec,@S[0] 8651f13597dSJung-uk Kim bswap @S[3] 8661f13597dSJung-uk Kim xor 4+$ivec,@S[1] 8671f13597dSJung-uk Kim xor 8+$ivec,@S[2] 8681f13597dSJung-uk Kim lea 16($inp),$inp 8691f13597dSJung-uk Kim xor 12+$ivec,@S[3] 8701f13597dSJung-uk Kim cmp %rdx,$inp 8711f13597dSJung-uk Kim je .Lcbc_ddone 8721f13597dSJung-uk Kim 8731f13597dSJung-uk Kim mov @S[0],0($out) 8741f13597dSJung-uk Kim mov @S[1],4($out) 8751f13597dSJung-uk Kim mov @S[2],8($out) 8761f13597dSJung-uk Kim mov @S[3],12($out) 8771f13597dSJung-uk Kim 8781f13597dSJung-uk Kim lea 16($out),$out 8791f13597dSJung-uk Kim jmp .Lcbc_dloop 8801f13597dSJung-uk Kim 8811f13597dSJung-uk Kim.align 16 8821f13597dSJung-uk Kim.Lcbc_ddone: 8831f13597dSJung-uk Kim mov $_ivp,%rdx 8841f13597dSJung-uk Kim cmp \$0,%rcx 8851f13597dSJung-uk Kim jne .Lcbc_dec_tail 8861f13597dSJung-uk Kim 8871f13597dSJung-uk Kim mov @S[0],0($out) 8881f13597dSJung-uk Kim mov @S[1],4($out) 8891f13597dSJung-uk Kim mov @S[2],8($out) 8901f13597dSJung-uk Kim mov @S[3],12($out) 8911f13597dSJung-uk Kim 8921f13597dSJung-uk Kim mov %rax,(%rdx) # write out IV residue 8931f13597dSJung-uk Kim mov %rbx,8(%rdx) 8941f13597dSJung-uk Kim jmp .Lcbc_done 8951f13597dSJung-uk Kim.align 16 8961f13597dSJung-uk Kim.Lcbc_dec_tail: 8971f13597dSJung-uk Kim mov @S[0],0+$ivec 8981f13597dSJung-uk Kim mov @S[1],4+$ivec 8991f13597dSJung-uk Kim mov @S[2],8+$ivec 9001f13597dSJung-uk Kim mov @S[3],12+$ivec 9011f13597dSJung-uk Kim 9021f13597dSJung-uk Kim.Lcbc_dec_pushf: 9031f13597dSJung-uk Kim pushfq 9041f13597dSJung-uk Kim cld 9051f13597dSJung-uk Kim lea 8+$ivec,%rsi 9061f13597dSJung-uk Kim lea ($out),%rdi 9071f13597dSJung-uk Kim .long 0x9066A4F3 # rep movsb 9081f13597dSJung-uk Kim popfq 9091f13597dSJung-uk Kim.Lcbc_dec_popf: 9101f13597dSJung-uk Kim 9111f13597dSJung-uk Kim mov %rax,(%rdx) # write out IV residue 9121f13597dSJung-uk Kim mov %rbx,8(%rdx) 9131f13597dSJung-uk Kim jmp .Lcbc_done 9141f13597dSJung-uk Kim 9151f13597dSJung-uk Kim.align 16 9161f13597dSJung-uk Kim.Lcbc_done: 9171f13597dSJung-uk Kim mov $_rsp,%rcx 918e71b7053SJung-uk Kim.cfi_def_cfa %rcx,56 9191f13597dSJung-uk Kim mov 0(%rcx),%r15 920e71b7053SJung-uk Kim.cfi_restore %r15 9211f13597dSJung-uk Kim mov 8(%rcx),%r14 922e71b7053SJung-uk Kim.cfi_restore %r14 9231f13597dSJung-uk Kim mov 16(%rcx),%r13 924e71b7053SJung-uk Kim.cfi_restore %r13 9251f13597dSJung-uk Kim mov 24(%rcx),%r12 926e71b7053SJung-uk Kim.cfi_restore %r12 9271f13597dSJung-uk Kim mov 32(%rcx),%rbp 928e71b7053SJung-uk Kim.cfi_restore %rbp 9291f13597dSJung-uk Kim mov 40(%rcx),%rbx 930e71b7053SJung-uk Kim.cfi_restore %rbx 9311f13597dSJung-uk Kim lea 48(%rcx),%rsp 932e71b7053SJung-uk Kim.cfi_def_cfa %rsp,8 9331f13597dSJung-uk Kim.Lcbc_abort: 9341f13597dSJung-uk Kim ret 935e71b7053SJung-uk Kim.cfi_endproc 9361f13597dSJung-uk Kim.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt 9371f13597dSJung-uk Kim 9381f13597dSJung-uk Kim.asciz "Camellia for x86_64 by <appro\@openssl.org>" 9391f13597dSJung-uk Kim___ 9401f13597dSJung-uk Kim} 9411f13597dSJung-uk Kim 9421f13597dSJung-uk Kim# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 9431f13597dSJung-uk Kim# CONTEXT *context,DISPATCHER_CONTEXT *disp) 9441f13597dSJung-uk Kimif ($win64) { 9451f13597dSJung-uk Kim$rec="%rcx"; 9461f13597dSJung-uk Kim$frame="%rdx"; 9471f13597dSJung-uk Kim$context="%r8"; 9481f13597dSJung-uk Kim$disp="%r9"; 9491f13597dSJung-uk Kim 9501f13597dSJung-uk Kim$code.=<<___; 9511f13597dSJung-uk Kim.extern __imp_RtlVirtualUnwind 9521f13597dSJung-uk Kim.type common_se_handler,\@abi-omnipotent 9531f13597dSJung-uk Kim.align 16 9541f13597dSJung-uk Kimcommon_se_handler: 9551f13597dSJung-uk Kim push %rsi 9561f13597dSJung-uk Kim push %rdi 9571f13597dSJung-uk Kim push %rbx 9581f13597dSJung-uk Kim push %rbp 9591f13597dSJung-uk Kim push %r12 9601f13597dSJung-uk Kim push %r13 9611f13597dSJung-uk Kim push %r14 9621f13597dSJung-uk Kim push %r15 9631f13597dSJung-uk Kim pushfq 9641f13597dSJung-uk Kim lea -64(%rsp),%rsp 9651f13597dSJung-uk Kim 9661f13597dSJung-uk Kim mov 120($context),%rax # pull context->Rax 9671f13597dSJung-uk Kim mov 248($context),%rbx # pull context->Rip 9681f13597dSJung-uk Kim 9691f13597dSJung-uk Kim mov 8($disp),%rsi # disp->ImageBase 9701f13597dSJung-uk Kim mov 56($disp),%r11 # disp->HandlerData 9711f13597dSJung-uk Kim 9721f13597dSJung-uk Kim mov 0(%r11),%r10d # HandlerData[0] 9731f13597dSJung-uk Kim lea (%rsi,%r10),%r10 # prologue label 9741f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<prologue label 9751f13597dSJung-uk Kim jb .Lin_prologue 9761f13597dSJung-uk Kim 9771f13597dSJung-uk Kim mov 152($context),%rax # pull context->Rsp 9781f13597dSJung-uk Kim 9791f13597dSJung-uk Kim mov 4(%r11),%r10d # HandlerData[1] 9801f13597dSJung-uk Kim lea (%rsi,%r10),%r10 # epilogue label 9811f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip>=epilogue label 9821f13597dSJung-uk Kim jae .Lin_prologue 9831f13597dSJung-uk Kim 9841f13597dSJung-uk Kim lea 40(%rax),%rax 9851f13597dSJung-uk Kim mov -8(%rax),%rbx 9861f13597dSJung-uk Kim mov -16(%rax),%rbp 9871f13597dSJung-uk Kim mov -24(%rax),%r13 9881f13597dSJung-uk Kim mov -32(%rax),%r14 9891f13597dSJung-uk Kim mov -40(%rax),%r15 9901f13597dSJung-uk Kim mov %rbx,144($context) # restore context->Rbx 9911f13597dSJung-uk Kim mov %rbp,160($context) # restore context->Rbp 9921f13597dSJung-uk Kim mov %r13,224($context) # restore context->R13 9931f13597dSJung-uk Kim mov %r14,232($context) # restore context->R14 9941f13597dSJung-uk Kim mov %r15,240($context) # restore context->R15 9951f13597dSJung-uk Kim 9961f13597dSJung-uk Kim.Lin_prologue: 9971f13597dSJung-uk Kim mov 8(%rax),%rdi 9981f13597dSJung-uk Kim mov 16(%rax),%rsi 9991f13597dSJung-uk Kim mov %rax,152($context) # restore context->Rsp 10001f13597dSJung-uk Kim mov %rsi,168($context) # restore context->Rsi 10011f13597dSJung-uk Kim mov %rdi,176($context) # restore context->Rdi 10021f13597dSJung-uk Kim 10031f13597dSJung-uk Kim jmp .Lcommon_seh_exit 10041f13597dSJung-uk Kim.size common_se_handler,.-common_se_handler 10051f13597dSJung-uk Kim 10061f13597dSJung-uk Kim.type cbc_se_handler,\@abi-omnipotent 10071f13597dSJung-uk Kim.align 16 10081f13597dSJung-uk Kimcbc_se_handler: 10091f13597dSJung-uk Kim push %rsi 10101f13597dSJung-uk Kim push %rdi 10111f13597dSJung-uk Kim push %rbx 10121f13597dSJung-uk Kim push %rbp 10131f13597dSJung-uk Kim push %r12 10141f13597dSJung-uk Kim push %r13 10151f13597dSJung-uk Kim push %r14 10161f13597dSJung-uk Kim push %r15 10171f13597dSJung-uk Kim pushfq 10181f13597dSJung-uk Kim lea -64(%rsp),%rsp 10191f13597dSJung-uk Kim 10201f13597dSJung-uk Kim mov 120($context),%rax # pull context->Rax 10211f13597dSJung-uk Kim mov 248($context),%rbx # pull context->Rip 10221f13597dSJung-uk Kim 10231f13597dSJung-uk Kim lea .Lcbc_prologue(%rip),%r10 10241f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<.Lcbc_prologue 10251f13597dSJung-uk Kim jb .Lin_cbc_prologue 10261f13597dSJung-uk Kim 10271f13597dSJung-uk Kim lea .Lcbc_body(%rip),%r10 10281f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<.Lcbc_body 10291f13597dSJung-uk Kim jb .Lin_cbc_frame_setup 10301f13597dSJung-uk Kim 10311f13597dSJung-uk Kim mov 152($context),%rax # pull context->Rsp 10321f13597dSJung-uk Kim 10331f13597dSJung-uk Kim lea .Lcbc_abort(%rip),%r10 10341f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip>=.Lcbc_abort 10351f13597dSJung-uk Kim jae .Lin_cbc_prologue 10361f13597dSJung-uk Kim 10371f13597dSJung-uk Kim # handle pushf/popf in Camellia_cbc_encrypt 10381f13597dSJung-uk Kim lea .Lcbc_enc_pushf(%rip),%r10 10391f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<=.Lcbc_enc_pushf 10401f13597dSJung-uk Kim jbe .Lin_cbc_no_flag 10411f13597dSJung-uk Kim lea 8(%rax),%rax 10421f13597dSJung-uk Kim lea .Lcbc_enc_popf(%rip),%r10 10431f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<.Lcbc_enc_popf 10441f13597dSJung-uk Kim jb .Lin_cbc_no_flag 10451f13597dSJung-uk Kim lea -8(%rax),%rax 10461f13597dSJung-uk Kim lea .Lcbc_dec_pushf(%rip),%r10 10471f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<=.Lcbc_dec_pushf 10481f13597dSJung-uk Kim jbe .Lin_cbc_no_flag 10491f13597dSJung-uk Kim lea 8(%rax),%rax 10501f13597dSJung-uk Kim lea .Lcbc_dec_popf(%rip),%r10 10511f13597dSJung-uk Kim cmp %r10,%rbx # context->Rip<.Lcbc_dec_popf 10521f13597dSJung-uk Kim jb .Lin_cbc_no_flag 10531f13597dSJung-uk Kim lea -8(%rax),%rax 10541f13597dSJung-uk Kim 10551f13597dSJung-uk Kim.Lin_cbc_no_flag: 10561f13597dSJung-uk Kim mov 48(%rax),%rax # $_rsp 10571f13597dSJung-uk Kim lea 48(%rax),%rax 10581f13597dSJung-uk Kim 10591f13597dSJung-uk Kim.Lin_cbc_frame_setup: 10601f13597dSJung-uk Kim mov -8(%rax),%rbx 10611f13597dSJung-uk Kim mov -16(%rax),%rbp 10621f13597dSJung-uk Kim mov -24(%rax),%r12 10631f13597dSJung-uk Kim mov -32(%rax),%r13 10641f13597dSJung-uk Kim mov -40(%rax),%r14 10651f13597dSJung-uk Kim mov -48(%rax),%r15 10661f13597dSJung-uk Kim mov %rbx,144($context) # restore context->Rbx 10671f13597dSJung-uk Kim mov %rbp,160($context) # restore context->Rbp 10681f13597dSJung-uk Kim mov %r12,216($context) # restore context->R12 10691f13597dSJung-uk Kim mov %r13,224($context) # restore context->R13 10701f13597dSJung-uk Kim mov %r14,232($context) # restore context->R14 10711f13597dSJung-uk Kim mov %r15,240($context) # restore context->R15 10721f13597dSJung-uk Kim 10731f13597dSJung-uk Kim.Lin_cbc_prologue: 10741f13597dSJung-uk Kim mov 8(%rax),%rdi 10751f13597dSJung-uk Kim mov 16(%rax),%rsi 10761f13597dSJung-uk Kim mov %rax,152($context) # restore context->Rsp 10771f13597dSJung-uk Kim mov %rsi,168($context) # restore context->Rsi 10781f13597dSJung-uk Kim mov %rdi,176($context) # restore context->Rdi 10791f13597dSJung-uk Kim 10801f13597dSJung-uk Kim.align 4 10811f13597dSJung-uk Kim.Lcommon_seh_exit: 10821f13597dSJung-uk Kim 10831f13597dSJung-uk Kim mov 40($disp),%rdi # disp->ContextRecord 10841f13597dSJung-uk Kim mov $context,%rsi # context 10851f13597dSJung-uk Kim mov \$`1232/8`,%ecx # sizeof(CONTEXT) 10861f13597dSJung-uk Kim .long 0xa548f3fc # cld; rep movsq 10871f13597dSJung-uk Kim 10881f13597dSJung-uk Kim mov $disp,%rsi 10891f13597dSJung-uk Kim xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 10901f13597dSJung-uk Kim mov 8(%rsi),%rdx # arg2, disp->ImageBase 10911f13597dSJung-uk Kim mov 0(%rsi),%r8 # arg3, disp->ControlPc 10921f13597dSJung-uk Kim mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 10931f13597dSJung-uk Kim mov 40(%rsi),%r10 # disp->ContextRecord 10941f13597dSJung-uk Kim lea 56(%rsi),%r11 # &disp->HandlerData 10951f13597dSJung-uk Kim lea 24(%rsi),%r12 # &disp->EstablisherFrame 10961f13597dSJung-uk Kim mov %r10,32(%rsp) # arg5 10971f13597dSJung-uk Kim mov %r11,40(%rsp) # arg6 10981f13597dSJung-uk Kim mov %r12,48(%rsp) # arg7 10991f13597dSJung-uk Kim mov %rcx,56(%rsp) # arg8, (NULL) 11001f13597dSJung-uk Kim call *__imp_RtlVirtualUnwind(%rip) 11011f13597dSJung-uk Kim 11021f13597dSJung-uk Kim mov \$1,%eax # ExceptionContinueSearch 11031f13597dSJung-uk Kim lea 64(%rsp),%rsp 11041f13597dSJung-uk Kim popfq 11051f13597dSJung-uk Kim pop %r15 11061f13597dSJung-uk Kim pop %r14 11071f13597dSJung-uk Kim pop %r13 11081f13597dSJung-uk Kim pop %r12 11091f13597dSJung-uk Kim pop %rbp 11101f13597dSJung-uk Kim pop %rbx 11111f13597dSJung-uk Kim pop %rdi 11121f13597dSJung-uk Kim pop %rsi 11131f13597dSJung-uk Kim ret 11141f13597dSJung-uk Kim.size cbc_se_handler,.-cbc_se_handler 11151f13597dSJung-uk Kim 11161f13597dSJung-uk Kim.section .pdata 11171f13597dSJung-uk Kim.align 4 11181f13597dSJung-uk Kim .rva .LSEH_begin_Camellia_EncryptBlock_Rounds 11191f13597dSJung-uk Kim .rva .LSEH_end_Camellia_EncryptBlock_Rounds 11201f13597dSJung-uk Kim .rva .LSEH_info_Camellia_EncryptBlock_Rounds 11211f13597dSJung-uk Kim 11221f13597dSJung-uk Kim .rva .LSEH_begin_Camellia_DecryptBlock_Rounds 11231f13597dSJung-uk Kim .rva .LSEH_end_Camellia_DecryptBlock_Rounds 11241f13597dSJung-uk Kim .rva .LSEH_info_Camellia_DecryptBlock_Rounds 11251f13597dSJung-uk Kim 11261f13597dSJung-uk Kim .rva .LSEH_begin_Camellia_Ekeygen 11271f13597dSJung-uk Kim .rva .LSEH_end_Camellia_Ekeygen 11281f13597dSJung-uk Kim .rva .LSEH_info_Camellia_Ekeygen 11291f13597dSJung-uk Kim 11301f13597dSJung-uk Kim .rva .LSEH_begin_Camellia_cbc_encrypt 11311f13597dSJung-uk Kim .rva .LSEH_end_Camellia_cbc_encrypt 11321f13597dSJung-uk Kim .rva .LSEH_info_Camellia_cbc_encrypt 11331f13597dSJung-uk Kim 11341f13597dSJung-uk Kim.section .xdata 11351f13597dSJung-uk Kim.align 8 11361f13597dSJung-uk Kim.LSEH_info_Camellia_EncryptBlock_Rounds: 11371f13597dSJung-uk Kim .byte 9,0,0,0 11381f13597dSJung-uk Kim .rva common_se_handler 11391f13597dSJung-uk Kim .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[] 11401f13597dSJung-uk Kim.LSEH_info_Camellia_DecryptBlock_Rounds: 11411f13597dSJung-uk Kim .byte 9,0,0,0 11421f13597dSJung-uk Kim .rva common_se_handler 11431f13597dSJung-uk Kim .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] 11441f13597dSJung-uk Kim.LSEH_info_Camellia_Ekeygen: 11451f13597dSJung-uk Kim .byte 9,0,0,0 11461f13597dSJung-uk Kim .rva common_se_handler 11471f13597dSJung-uk Kim .rva .Lkey_prologue,.Lkey_epilogue # HandlerData[] 11481f13597dSJung-uk Kim.LSEH_info_Camellia_cbc_encrypt: 11491f13597dSJung-uk Kim .byte 9,0,0,0 11501f13597dSJung-uk Kim .rva cbc_se_handler 11511f13597dSJung-uk Kim___ 11521f13597dSJung-uk Kim} 11531f13597dSJung-uk Kim 11541f13597dSJung-uk Kim$code =~ s/\`([^\`]*)\`/eval $1/gem; 11551f13597dSJung-uk Kimprint $code; 115617f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!"; 1157