11f13597dSJung-uk Kim#! /usr/bin/env perl 217f01e99SJung-uk Kim# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim# 4b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License"). You may not use 5e71b7053SJung-uk Kim# this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim 91f13597dSJung-uk Kim# 101f13597dSJung-uk Kim# ==================================================================== 111f13597dSJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 121f13597dSJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and 131f13597dSJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further 141f13597dSJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/. 151f13597dSJung-uk Kim# ==================================================================== 161f13597dSJung-uk Kim# 171f13597dSJung-uk Kim# May 2011 181f13597dSJung-uk Kim# 191f13597dSJung-uk Kim# The module implements bn_GF2m_mul_2x2 polynomial multiplication 201f13597dSJung-uk Kim# used in bn_gf2m.c. It's kind of low-hanging mechanical port from 211f13597dSJung-uk Kim# C for the time being... Except that it has two code paths: pure 221f13597dSJung-uk Kim# integer code suitable for any ARMv4 and later CPU and NEON code 231f13597dSJung-uk Kim# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs 241f13597dSJung-uk Kim# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50% 251f13597dSJung-uk Kim# faster than compiler-generated code. For ECDH and ECDSA verify (but 261f13597dSJung-uk Kim# not for ECDSA sign) it means 25%-45% improvement depending on key 271f13597dSJung-uk Kim# length, more for longer keys. Even though NEON 1x1 multiplication 281f13597dSJung-uk Kim# runs in even less cycles, ~30, improvement is measurable only on 291f13597dSJung-uk Kim# longer keys. One has to optimize code elsewhere to get NEON glow... 307bded2dbSJung-uk Kim# 317bded2dbSJung-uk Kim# April 2014 327bded2dbSJung-uk Kim# 337bded2dbSJung-uk Kim# Double bn_GF2m_mul_2x2 performance by using algorithm from paper 347bded2dbSJung-uk Kim# referred below, which improves ECDH and ECDSA verify benchmarks 357bded2dbSJung-uk Kim# by 18-40%. 367bded2dbSJung-uk Kim# 3780815a77SJung-uk Kim# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software 387bded2dbSJung-uk Kim# Polynomial Multiplication on ARM Processors using the NEON Engine. 397bded2dbSJung-uk Kim# 407bded2dbSJung-uk Kim# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf 411f13597dSJung-uk Kim 42b077aed3SPierre Pronchery# $output is the last argument if it looks like a file (it has an extension) 43b077aed3SPierre Pronchery# $flavour is the first argument if it doesn't look like a file 44b077aed3SPierre Pronchery$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 45b077aed3SPierre Pronchery$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 46e71b7053SJung-uk Kim 47e71b7053SJung-uk Kimif ($flavour && $flavour ne "void") { 48e71b7053SJung-uk Kim $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 49e71b7053SJung-uk Kim ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 50e71b7053SJung-uk Kim ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 51e71b7053SJung-uk Kim die "can't locate arm-xlate.pl"; 52e71b7053SJung-uk Kim 53b077aed3SPierre Pronchery open STDOUT,"| \"$^X\" $xlate $flavour \"$output\"" 54b077aed3SPierre Pronchery or die "can't call $xlate: $1"; 55e71b7053SJung-uk Kim} else { 56b077aed3SPierre Pronchery $output and open STDOUT,">$output"; 57e71b7053SJung-uk Kim} 581f13597dSJung-uk Kim 591f13597dSJung-uk Kim$code=<<___; 601f13597dSJung-uk Kim#include "arm_arch.h" 611f13597dSJung-uk Kim 62e71b7053SJung-uk Kim#if defined(__thumb2__) 63e71b7053SJung-uk Kim.syntax unified 64e71b7053SJung-uk Kim.thumb 65e71b7053SJung-uk Kim#else 661f13597dSJung-uk Kim.code 32 67e71b7053SJung-uk Kim#endif 68b077aed3SPierre Pronchery 69b077aed3SPierre Pronchery.text 701f13597dSJung-uk Kim___ 711f13597dSJung-uk Kim################ 721f13597dSJung-uk Kim# private interface to mul_1x1_ialu 731f13597dSJung-uk Kim# 741f13597dSJung-uk Kim$a="r1"; 751f13597dSJung-uk Kim$b="r0"; 761f13597dSJung-uk Kim 771f13597dSJung-uk Kim($a0,$a1,$a2,$a12,$a4,$a14)= 781f13597dSJung-uk Kim($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); 791f13597dSJung-uk Kim 801f13597dSJung-uk Kim$mask="r12"; 811f13597dSJung-uk Kim 821f13597dSJung-uk Kim$code.=<<___; 831f13597dSJung-uk Kim.type mul_1x1_ialu,%function 841f13597dSJung-uk Kim.align 5 851f13597dSJung-uk Kimmul_1x1_ialu: 861f13597dSJung-uk Kim mov $a0,#0 871f13597dSJung-uk Kim bic $a1,$a,#3<<30 @ a1=a&0x3fffffff 881f13597dSJung-uk Kim str $a0,[sp,#0] @ tab[0]=0 891f13597dSJung-uk Kim add $a2,$a1,$a1 @ a2=a1<<1 901f13597dSJung-uk Kim str $a1,[sp,#4] @ tab[1]=a1 911f13597dSJung-uk Kim eor $a12,$a1,$a2 @ a1^a2 921f13597dSJung-uk Kim str $a2,[sp,#8] @ tab[2]=a2 931f13597dSJung-uk Kim mov $a4,$a1,lsl#2 @ a4=a1<<2 941f13597dSJung-uk Kim str $a12,[sp,#12] @ tab[3]=a1^a2 951f13597dSJung-uk Kim eor $a14,$a1,$a4 @ a1^a4 961f13597dSJung-uk Kim str $a4,[sp,#16] @ tab[4]=a4 971f13597dSJung-uk Kim eor $a0,$a2,$a4 @ a2^a4 981f13597dSJung-uk Kim str $a14,[sp,#20] @ tab[5]=a1^a4 991f13597dSJung-uk Kim eor $a12,$a12,$a4 @ a1^a2^a4 1001f13597dSJung-uk Kim str $a0,[sp,#24] @ tab[6]=a2^a4 1011f13597dSJung-uk Kim and $i0,$mask,$b,lsl#2 1021f13597dSJung-uk Kim str $a12,[sp,#28] @ tab[7]=a1^a2^a4 1031f13597dSJung-uk Kim 1041f13597dSJung-uk Kim and $i1,$mask,$b,lsr#1 1051f13597dSJung-uk Kim ldr $lo,[sp,$i0] @ tab[b & 0x7] 1061f13597dSJung-uk Kim and $i0,$mask,$b,lsr#4 1071f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7] 1081f13597dSJung-uk Kim and $i1,$mask,$b,lsr#7 1091f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7] 1101f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#3 @ stall 1111f13597dSJung-uk Kim mov $hi,$t1,lsr#29 1121f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7] 1131f13597dSJung-uk Kim 1141f13597dSJung-uk Kim and $i0,$mask,$b,lsr#10 1151f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#6 1161f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#26 1171f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7] 1181f13597dSJung-uk Kim 1191f13597dSJung-uk Kim and $i1,$mask,$b,lsr#13 1201f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#9 1211f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#23 1221f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7] 1231f13597dSJung-uk Kim 1241f13597dSJung-uk Kim and $i0,$mask,$b,lsr#16 1251f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#12 1261f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#20 1271f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7] 1281f13597dSJung-uk Kim 1291f13597dSJung-uk Kim and $i1,$mask,$b,lsr#19 1301f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#15 1311f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#17 1321f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7] 1331f13597dSJung-uk Kim 1341f13597dSJung-uk Kim and $i0,$mask,$b,lsr#22 1351f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#18 1361f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#14 1371f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7] 1381f13597dSJung-uk Kim 1391f13597dSJung-uk Kim and $i1,$mask,$b,lsr#25 1401f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#21 1411f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#11 1421f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7] 1431f13597dSJung-uk Kim 1441f13597dSJung-uk Kim tst $a,#1<<30 1451f13597dSJung-uk Kim and $i0,$mask,$b,lsr#28 1461f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#24 1471f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#8 1481f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 30 ] 1491f13597dSJung-uk Kim 150e71b7053SJung-uk Kim#ifdef __thumb2__ 151e71b7053SJung-uk Kim itt ne 152e71b7053SJung-uk Kim#endif 1531f13597dSJung-uk Kim eorne $lo,$lo,$b,lsl#30 1541f13597dSJung-uk Kim eorne $hi,$hi,$b,lsr#2 1551f13597dSJung-uk Kim tst $a,#1<<31 1561f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#27 1571f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#5 158e71b7053SJung-uk Kim#ifdef __thumb2__ 159e71b7053SJung-uk Kim itt ne 160e71b7053SJung-uk Kim#endif 1611f13597dSJung-uk Kim eorne $lo,$lo,$b,lsl#31 1621f13597dSJung-uk Kim eorne $hi,$hi,$b,lsr#1 1631f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#30 1641f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#2 1651f13597dSJung-uk Kim 1661f13597dSJung-uk Kim mov pc,lr 1671f13597dSJung-uk Kim.size mul_1x1_ialu,.-mul_1x1_ialu 1681f13597dSJung-uk Kim___ 1691f13597dSJung-uk Kim################ 1701f13597dSJung-uk Kim# void bn_GF2m_mul_2x2(BN_ULONG *r, 1711f13597dSJung-uk Kim# BN_ULONG a1,BN_ULONG a0, 17280815a77SJung-uk Kim# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 1737bded2dbSJung-uk Kim{ 1741f13597dSJung-uk Kim$code.=<<___; 1751f13597dSJung-uk Kim.global bn_GF2m_mul_2x2 1761f13597dSJung-uk Kim.type bn_GF2m_mul_2x2,%function 1771f13597dSJung-uk Kim.align 5 1781f13597dSJung-uk Kimbn_GF2m_mul_2x2: 1797bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 180e71b7053SJung-uk Kim stmdb sp!,{r10,lr} 1811f13597dSJung-uk Kim ldr r12,.LOPENSSL_armcap 182b077aed3SPierre Pronchery# if !defined(_WIN32) 183e71b7053SJung-uk Kim adr r10,.LOPENSSL_armcap 184e71b7053SJung-uk Kim ldr r12,[r12,r10] 185b077aed3SPierre Pronchery# endif 186b077aed3SPierre Pronchery# if defined(__APPLE__) || defined(_WIN32) 187e71b7053SJung-uk Kim ldr r12,[r12] 188e71b7053SJung-uk Kim# endif 189e71b7053SJung-uk Kim tst r12,#ARMV7_NEON 190e71b7053SJung-uk Kim itt ne 191e71b7053SJung-uk Kim ldrne r10,[sp],#8 1927bded2dbSJung-uk Kim bne .LNEON 193e71b7053SJung-uk Kim stmdb sp!,{r4-r9} 194e71b7053SJung-uk Kim#else 195e71b7053SJung-uk Kim stmdb sp!,{r4-r10,lr} 1961f13597dSJung-uk Kim#endif 1971f13597dSJung-uk Kim___ 1981f13597dSJung-uk Kim$ret="r10"; # reassigned 1st argument 1991f13597dSJung-uk Kim$code.=<<___; 2001f13597dSJung-uk Kim mov $ret,r0 @ reassign 1st argument 2011f13597dSJung-uk Kim mov $b,r3 @ $b=b1 202e71b7053SJung-uk Kim sub r7,sp,#36 203e71b7053SJung-uk Kim mov r8,sp 204e71b7053SJung-uk Kim and r7,r7,#-32 2051f13597dSJung-uk Kim ldr r3,[sp,#32] @ load b0 2061f13597dSJung-uk Kim mov $mask,#7<<2 207e71b7053SJung-uk Kim mov sp,r7 @ allocate tab[8] 208e71b7053SJung-uk Kim str r8,[r7,#32] 2091f13597dSJung-uk Kim 21080815a77SJung-uk Kim bl mul_1x1_ialu @ a1·b1 2111f13597dSJung-uk Kim str $lo,[$ret,#8] 2121f13597dSJung-uk Kim str $hi,[$ret,#12] 2131f13597dSJung-uk Kim 2141f13597dSJung-uk Kim eor $b,$b,r3 @ flip b0 and b1 2151f13597dSJung-uk Kim eor $a,$a,r2 @ flip a0 and a1 2161f13597dSJung-uk Kim eor r3,r3,$b 2171f13597dSJung-uk Kim eor r2,r2,$a 2181f13597dSJung-uk Kim eor $b,$b,r3 2191f13597dSJung-uk Kim eor $a,$a,r2 22080815a77SJung-uk Kim bl mul_1x1_ialu @ a0·b0 2211f13597dSJung-uk Kim str $lo,[$ret] 2221f13597dSJung-uk Kim str $hi,[$ret,#4] 2231f13597dSJung-uk Kim 2241f13597dSJung-uk Kim eor $a,$a,r2 2251f13597dSJung-uk Kim eor $b,$b,r3 22680815a77SJung-uk Kim bl mul_1x1_ialu @ (a1+a0)·(b1+b0) 2271f13597dSJung-uk Kim___ 2281f13597dSJung-uk Kim@r=map("r$_",(6..9)); 2291f13597dSJung-uk Kim$code.=<<___; 2301f13597dSJung-uk Kim ldmia $ret,{@r[0]-@r[3]} 2311f13597dSJung-uk Kim eor $lo,$lo,$hi 232e71b7053SJung-uk Kim ldr sp,[sp,#32] @ destroy tab[8] 2331f13597dSJung-uk Kim eor $hi,$hi,@r[1] 2341f13597dSJung-uk Kim eor $lo,$lo,@r[0] 2351f13597dSJung-uk Kim eor $hi,$hi,@r[2] 2361f13597dSJung-uk Kim eor $lo,$lo,@r[3] 2371f13597dSJung-uk Kim eor $hi,$hi,@r[3] 2381f13597dSJung-uk Kim str $hi,[$ret,#8] 2391f13597dSJung-uk Kim eor $lo,$lo,$hi 2401f13597dSJung-uk Kim str $lo,[$ret,#4] 2411f13597dSJung-uk Kim 2421f13597dSJung-uk Kim#if __ARM_ARCH__>=5 2431f13597dSJung-uk Kim ldmia sp!,{r4-r10,pc} 2441f13597dSJung-uk Kim#else 2451f13597dSJung-uk Kim ldmia sp!,{r4-r10,lr} 2461f13597dSJung-uk Kim tst lr,#1 2471f13597dSJung-uk Kim moveq pc,lr @ be binary compatible with V4, yet 2481f13597dSJung-uk Kim bx lr @ interoperable with Thumb ISA:-) 2491f13597dSJung-uk Kim#endif 2507bded2dbSJung-uk Kim___ 2517bded2dbSJung-uk Kim} 2527bded2dbSJung-uk Kim{ 2537bded2dbSJung-uk Kimmy ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12)); 2547bded2dbSJung-uk Kimmy ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31)); 2557bded2dbSJung-uk Kim 2567bded2dbSJung-uk Kim$code.=<<___; 2577bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 2587bded2dbSJung-uk Kim.arch armv7-a 2597bded2dbSJung-uk Kim.fpu neon 2607bded2dbSJung-uk Kim 2617bded2dbSJung-uk Kim.align 5 2627bded2dbSJung-uk Kim.LNEON: 2637bded2dbSJung-uk Kim ldr r12, [sp] @ 5th argument 26443e4bca7SJung-uk Kim vmov $a, r2, r1 26543e4bca7SJung-uk Kim vmov $b, r12, r3 2667bded2dbSJung-uk Kim vmov.i64 $k48, #0x0000ffffffffffff 2677bded2dbSJung-uk Kim vmov.i64 $k32, #0x00000000ffffffff 2687bded2dbSJung-uk Kim vmov.i64 $k16, #0x000000000000ffff 2697bded2dbSJung-uk Kim 2707bded2dbSJung-uk Kim vext.8 $t0#lo, $a, $a, #1 @ A1 2717bded2dbSJung-uk Kim vmull.p8 $t0, $t0#lo, $b @ F = A1*B 2727bded2dbSJung-uk Kim vext.8 $r#lo, $b, $b, #1 @ B1 2737bded2dbSJung-uk Kim vmull.p8 $r, $a, $r#lo @ E = A*B1 2747bded2dbSJung-uk Kim vext.8 $t1#lo, $a, $a, #2 @ A2 2757bded2dbSJung-uk Kim vmull.p8 $t1, $t1#lo, $b @ H = A2*B 2767bded2dbSJung-uk Kim vext.8 $t3#lo, $b, $b, #2 @ B2 2777bded2dbSJung-uk Kim vmull.p8 $t3, $a, $t3#lo @ G = A*B2 2787bded2dbSJung-uk Kim vext.8 $t2#lo, $a, $a, #3 @ A3 2797bded2dbSJung-uk Kim veor $t0, $t0, $r @ L = E + F 2807bded2dbSJung-uk Kim vmull.p8 $t2, $t2#lo, $b @ J = A3*B 2817bded2dbSJung-uk Kim vext.8 $r#lo, $b, $b, #3 @ B3 2827bded2dbSJung-uk Kim veor $t1, $t1, $t3 @ M = G + H 2837bded2dbSJung-uk Kim vmull.p8 $r, $a, $r#lo @ I = A*B3 2847bded2dbSJung-uk Kim veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 2857bded2dbSJung-uk Kim vand $t0#hi, $t0#hi, $k48 2867bded2dbSJung-uk Kim vext.8 $t3#lo, $b, $b, #4 @ B4 2877bded2dbSJung-uk Kim veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 2887bded2dbSJung-uk Kim vand $t1#hi, $t1#hi, $k32 2897bded2dbSJung-uk Kim vmull.p8 $t3, $a, $t3#lo @ K = A*B4 2907bded2dbSJung-uk Kim veor $t2, $t2, $r @ N = I + J 2917bded2dbSJung-uk Kim veor $t0#lo, $t0#lo, $t0#hi 2927bded2dbSJung-uk Kim veor $t1#lo, $t1#lo, $t1#hi 2937bded2dbSJung-uk Kim veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 2947bded2dbSJung-uk Kim vand $t2#hi, $t2#hi, $k16 2957bded2dbSJung-uk Kim vext.8 $t0, $t0, $t0, #15 2967bded2dbSJung-uk Kim veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 2977bded2dbSJung-uk Kim vmov.i64 $t3#hi, #0 2987bded2dbSJung-uk Kim vext.8 $t1, $t1, $t1, #14 2997bded2dbSJung-uk Kim veor $t2#lo, $t2#lo, $t2#hi 3007bded2dbSJung-uk Kim vmull.p8 $r, $a, $b @ D = A*B 3017bded2dbSJung-uk Kim vext.8 $t3, $t3, $t3, #12 3027bded2dbSJung-uk Kim vext.8 $t2, $t2, $t2, #13 3037bded2dbSJung-uk Kim veor $t0, $t0, $t1 3047bded2dbSJung-uk Kim veor $t2, $t2, $t3 3057bded2dbSJung-uk Kim veor $r, $r, $t0 3067bded2dbSJung-uk Kim veor $r, $r, $t2 3077bded2dbSJung-uk Kim 3087bded2dbSJung-uk Kim vst1.32 {$r}, [r0] 3097bded2dbSJung-uk Kim ret @ bx lr 3107bded2dbSJung-uk Kim#endif 3117bded2dbSJung-uk Kim___ 3127bded2dbSJung-uk Kim} 3137bded2dbSJung-uk Kim$code.=<<___; 3141f13597dSJung-uk Kim.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 3157bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 3161f13597dSJung-uk Kim.align 5 3171f13597dSJung-uk Kim.LOPENSSL_armcap: 318b077aed3SPierre Pronchery# ifdef _WIN32 319b077aed3SPierre Pronchery.word OPENSSL_armcap_P 320b077aed3SPierre Pronchery# else 321e71b7053SJung-uk Kim.word OPENSSL_armcap_P-. 3221f13597dSJung-uk Kim# endif 323b077aed3SPierre Pronchery#endif 3241f13597dSJung-uk Kim.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" 3251f13597dSJung-uk Kim.align 5 3261f13597dSJung-uk Kim 3277bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 3281f13597dSJung-uk Kim.comm OPENSSL_armcap_P,4,4 3297bded2dbSJung-uk Kim#endif 3301f13597dSJung-uk Kim___ 3311f13597dSJung-uk Kim 3327bded2dbSJung-uk Kimforeach (split("\n",$code)) { 3337bded2dbSJung-uk Kim s/\`([^\`]*)\`/eval $1/geo; 3347bded2dbSJung-uk Kim 3357bded2dbSJung-uk Kim s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or 3367bded2dbSJung-uk Kim s/\bret\b/bx lr/go or 3377bded2dbSJung-uk Kim s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 3387bded2dbSJung-uk Kim 3397bded2dbSJung-uk Kim print $_,"\n"; 3407bded2dbSJung-uk Kim} 34117f01e99SJung-uk Kimclose STDOUT or die "error closing STDOUT: $!"; # enforce flush 342