11f13597dSJung-uk Kim#! /usr/bin/env perl 2e71b7053SJung-uk Kim# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim# 4e71b7053SJung-uk Kim# Licensed under the OpenSSL license (the "License"). You may not use 5e71b7053SJung-uk Kim# this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim 91f13597dSJung-uk Kim# 101f13597dSJung-uk Kim# ==================================================================== 111f13597dSJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 121f13597dSJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and 131f13597dSJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further 141f13597dSJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/. 151f13597dSJung-uk Kim# ==================================================================== 161f13597dSJung-uk Kim# 171f13597dSJung-uk Kim# May 2011 181f13597dSJung-uk Kim# 191f13597dSJung-uk Kim# The module implements bn_GF2m_mul_2x2 polynomial multiplication 201f13597dSJung-uk Kim# used in bn_gf2m.c. It's kind of low-hanging mechanical port from 211f13597dSJung-uk Kim# C for the time being... Except that it has two code paths: pure 221f13597dSJung-uk Kim# integer code suitable for any ARMv4 and later CPU and NEON code 231f13597dSJung-uk Kim# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs 241f13597dSJung-uk Kim# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50% 251f13597dSJung-uk Kim# faster than compiler-generated code. For ECDH and ECDSA verify (but 261f13597dSJung-uk Kim# not for ECDSA sign) it means 25%-45% improvement depending on key 271f13597dSJung-uk Kim# length, more for longer keys. Even though NEON 1x1 multiplication 281f13597dSJung-uk Kim# runs in even less cycles, ~30, improvement is measurable only on 291f13597dSJung-uk Kim# longer keys. One has to optimize code elsewhere to get NEON glow... 307bded2dbSJung-uk Kim# 317bded2dbSJung-uk Kim# April 2014 327bded2dbSJung-uk Kim# 337bded2dbSJung-uk Kim# Double bn_GF2m_mul_2x2 performance by using algorithm from paper 347bded2dbSJung-uk Kim# referred below, which improves ECDH and ECDSA verify benchmarks 357bded2dbSJung-uk Kim# by 18-40%. 367bded2dbSJung-uk Kim# 3780815a77SJung-uk Kim# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software 387bded2dbSJung-uk Kim# Polynomial Multiplication on ARM Processors using the NEON Engine. 397bded2dbSJung-uk Kim# 407bded2dbSJung-uk Kim# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf 411f13597dSJung-uk Kim 42e71b7053SJung-uk Kim$flavour = shift; 43e71b7053SJung-uk Kimif ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; } 44e71b7053SJung-uk Kimelse { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} } 45e71b7053SJung-uk Kim 46e71b7053SJung-uk Kimif ($flavour && $flavour ne "void") { 47e71b7053SJung-uk Kim $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 48e71b7053SJung-uk Kim ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 49e71b7053SJung-uk Kim ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 50e71b7053SJung-uk Kim die "can't locate arm-xlate.pl"; 51e71b7053SJung-uk Kim 52e71b7053SJung-uk Kim open STDOUT,"| \"$^X\" $xlate $flavour $output"; 53e71b7053SJung-uk Kim} else { 541f13597dSJung-uk Kim open STDOUT,">$output"; 55e71b7053SJung-uk Kim} 561f13597dSJung-uk Kim 571f13597dSJung-uk Kim$code=<<___; 581f13597dSJung-uk Kim#include "arm_arch.h" 591f13597dSJung-uk Kim 601f13597dSJung-uk Kim.text 61e71b7053SJung-uk Kim#if defined(__thumb2__) 62e71b7053SJung-uk Kim.syntax unified 63e71b7053SJung-uk Kim.thumb 64e71b7053SJung-uk Kim#else 651f13597dSJung-uk Kim.code 32 66e71b7053SJung-uk Kim#endif 671f13597dSJung-uk Kim___ 681f13597dSJung-uk Kim################ 691f13597dSJung-uk Kim# private interface to mul_1x1_ialu 701f13597dSJung-uk Kim# 711f13597dSJung-uk Kim$a="r1"; 721f13597dSJung-uk Kim$b="r0"; 731f13597dSJung-uk Kim 741f13597dSJung-uk Kim($a0,$a1,$a2,$a12,$a4,$a14)= 751f13597dSJung-uk Kim($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12); 761f13597dSJung-uk Kim 771f13597dSJung-uk Kim$mask="r12"; 781f13597dSJung-uk Kim 791f13597dSJung-uk Kim$code.=<<___; 801f13597dSJung-uk Kim.type mul_1x1_ialu,%function 811f13597dSJung-uk Kim.align 5 821f13597dSJung-uk Kimmul_1x1_ialu: 831f13597dSJung-uk Kim mov $a0,#0 841f13597dSJung-uk Kim bic $a1,$a,#3<<30 @ a1=a&0x3fffffff 851f13597dSJung-uk Kim str $a0,[sp,#0] @ tab[0]=0 861f13597dSJung-uk Kim add $a2,$a1,$a1 @ a2=a1<<1 871f13597dSJung-uk Kim str $a1,[sp,#4] @ tab[1]=a1 881f13597dSJung-uk Kim eor $a12,$a1,$a2 @ a1^a2 891f13597dSJung-uk Kim str $a2,[sp,#8] @ tab[2]=a2 901f13597dSJung-uk Kim mov $a4,$a1,lsl#2 @ a4=a1<<2 911f13597dSJung-uk Kim str $a12,[sp,#12] @ tab[3]=a1^a2 921f13597dSJung-uk Kim eor $a14,$a1,$a4 @ a1^a4 931f13597dSJung-uk Kim str $a4,[sp,#16] @ tab[4]=a4 941f13597dSJung-uk Kim eor $a0,$a2,$a4 @ a2^a4 951f13597dSJung-uk Kim str $a14,[sp,#20] @ tab[5]=a1^a4 961f13597dSJung-uk Kim eor $a12,$a12,$a4 @ a1^a2^a4 971f13597dSJung-uk Kim str $a0,[sp,#24] @ tab[6]=a2^a4 981f13597dSJung-uk Kim and $i0,$mask,$b,lsl#2 991f13597dSJung-uk Kim str $a12,[sp,#28] @ tab[7]=a1^a2^a4 1001f13597dSJung-uk Kim 1011f13597dSJung-uk Kim and $i1,$mask,$b,lsr#1 1021f13597dSJung-uk Kim ldr $lo,[sp,$i0] @ tab[b & 0x7] 1031f13597dSJung-uk Kim and $i0,$mask,$b,lsr#4 1041f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7] 1051f13597dSJung-uk Kim and $i1,$mask,$b,lsr#7 1061f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7] 1071f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#3 @ stall 1081f13597dSJung-uk Kim mov $hi,$t1,lsr#29 1091f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7] 1101f13597dSJung-uk Kim 1111f13597dSJung-uk Kim and $i0,$mask,$b,lsr#10 1121f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#6 1131f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#26 1141f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7] 1151f13597dSJung-uk Kim 1161f13597dSJung-uk Kim and $i1,$mask,$b,lsr#13 1171f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#9 1181f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#23 1191f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7] 1201f13597dSJung-uk Kim 1211f13597dSJung-uk Kim and $i0,$mask,$b,lsr#16 1221f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#12 1231f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#20 1241f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7] 1251f13597dSJung-uk Kim 1261f13597dSJung-uk Kim and $i1,$mask,$b,lsr#19 1271f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#15 1281f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#17 1291f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7] 1301f13597dSJung-uk Kim 1311f13597dSJung-uk Kim and $i0,$mask,$b,lsr#22 1321f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#18 1331f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#14 1341f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7] 1351f13597dSJung-uk Kim 1361f13597dSJung-uk Kim and $i1,$mask,$b,lsr#25 1371f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#21 1381f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#11 1391f13597dSJung-uk Kim ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7] 1401f13597dSJung-uk Kim 1411f13597dSJung-uk Kim tst $a,#1<<30 1421f13597dSJung-uk Kim and $i0,$mask,$b,lsr#28 1431f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#24 1441f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#8 1451f13597dSJung-uk Kim ldr $t0,[sp,$i0] @ tab[b >> 30 ] 1461f13597dSJung-uk Kim 147e71b7053SJung-uk Kim#ifdef __thumb2__ 148e71b7053SJung-uk Kim itt ne 149e71b7053SJung-uk Kim#endif 1501f13597dSJung-uk Kim eorne $lo,$lo,$b,lsl#30 1511f13597dSJung-uk Kim eorne $hi,$hi,$b,lsr#2 1521f13597dSJung-uk Kim tst $a,#1<<31 1531f13597dSJung-uk Kim eor $lo,$lo,$t1,lsl#27 1541f13597dSJung-uk Kim eor $hi,$hi,$t1,lsr#5 155e71b7053SJung-uk Kim#ifdef __thumb2__ 156e71b7053SJung-uk Kim itt ne 157e71b7053SJung-uk Kim#endif 1581f13597dSJung-uk Kim eorne $lo,$lo,$b,lsl#31 1591f13597dSJung-uk Kim eorne $hi,$hi,$b,lsr#1 1601f13597dSJung-uk Kim eor $lo,$lo,$t0,lsl#30 1611f13597dSJung-uk Kim eor $hi,$hi,$t0,lsr#2 1621f13597dSJung-uk Kim 1631f13597dSJung-uk Kim mov pc,lr 1641f13597dSJung-uk Kim.size mul_1x1_ialu,.-mul_1x1_ialu 1651f13597dSJung-uk Kim___ 1661f13597dSJung-uk Kim################ 1671f13597dSJung-uk Kim# void bn_GF2m_mul_2x2(BN_ULONG *r, 1681f13597dSJung-uk Kim# BN_ULONG a1,BN_ULONG a0, 16980815a77SJung-uk Kim# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 1707bded2dbSJung-uk Kim{ 1711f13597dSJung-uk Kim$code.=<<___; 1721f13597dSJung-uk Kim.global bn_GF2m_mul_2x2 1731f13597dSJung-uk Kim.type bn_GF2m_mul_2x2,%function 1741f13597dSJung-uk Kim.align 5 1751f13597dSJung-uk Kimbn_GF2m_mul_2x2: 1767bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 177e71b7053SJung-uk Kim stmdb sp!,{r10,lr} 1781f13597dSJung-uk Kim ldr r12,.LOPENSSL_armcap 179e71b7053SJung-uk Kim adr r10,.LOPENSSL_armcap 180e71b7053SJung-uk Kim ldr r12,[r12,r10] 181e71b7053SJung-uk Kim#ifdef __APPLE__ 182e71b7053SJung-uk Kim ldr r12,[r12] 183e71b7053SJung-uk Kim#endif 184e71b7053SJung-uk Kim tst r12,#ARMV7_NEON 185e71b7053SJung-uk Kim itt ne 186e71b7053SJung-uk Kim ldrne r10,[sp],#8 1877bded2dbSJung-uk Kim bne .LNEON 188e71b7053SJung-uk Kim stmdb sp!,{r4-r9} 189e71b7053SJung-uk Kim#else 190e71b7053SJung-uk Kim stmdb sp!,{r4-r10,lr} 1911f13597dSJung-uk Kim#endif 1921f13597dSJung-uk Kim___ 1931f13597dSJung-uk Kim$ret="r10"; # reassigned 1st argument 1941f13597dSJung-uk Kim$code.=<<___; 1951f13597dSJung-uk Kim mov $ret,r0 @ reassign 1st argument 1961f13597dSJung-uk Kim mov $b,r3 @ $b=b1 197e71b7053SJung-uk Kim sub r7,sp,#36 198e71b7053SJung-uk Kim mov r8,sp 199e71b7053SJung-uk Kim and r7,r7,#-32 2001f13597dSJung-uk Kim ldr r3,[sp,#32] @ load b0 2011f13597dSJung-uk Kim mov $mask,#7<<2 202e71b7053SJung-uk Kim mov sp,r7 @ allocate tab[8] 203e71b7053SJung-uk Kim str r8,[r7,#32] 2041f13597dSJung-uk Kim 20580815a77SJung-uk Kim bl mul_1x1_ialu @ a1·b1 2061f13597dSJung-uk Kim str $lo,[$ret,#8] 2071f13597dSJung-uk Kim str $hi,[$ret,#12] 2081f13597dSJung-uk Kim 2091f13597dSJung-uk Kim eor $b,$b,r3 @ flip b0 and b1 2101f13597dSJung-uk Kim eor $a,$a,r2 @ flip a0 and a1 2111f13597dSJung-uk Kim eor r3,r3,$b 2121f13597dSJung-uk Kim eor r2,r2,$a 2131f13597dSJung-uk Kim eor $b,$b,r3 2141f13597dSJung-uk Kim eor $a,$a,r2 21580815a77SJung-uk Kim bl mul_1x1_ialu @ a0·b0 2161f13597dSJung-uk Kim str $lo,[$ret] 2171f13597dSJung-uk Kim str $hi,[$ret,#4] 2181f13597dSJung-uk Kim 2191f13597dSJung-uk Kim eor $a,$a,r2 2201f13597dSJung-uk Kim eor $b,$b,r3 22180815a77SJung-uk Kim bl mul_1x1_ialu @ (a1+a0)·(b1+b0) 2221f13597dSJung-uk Kim___ 2231f13597dSJung-uk Kim@r=map("r$_",(6..9)); 2241f13597dSJung-uk Kim$code.=<<___; 2251f13597dSJung-uk Kim ldmia $ret,{@r[0]-@r[3]} 2261f13597dSJung-uk Kim eor $lo,$lo,$hi 227e71b7053SJung-uk Kim ldr sp,[sp,#32] @ destroy tab[8] 2281f13597dSJung-uk Kim eor $hi,$hi,@r[1] 2291f13597dSJung-uk Kim eor $lo,$lo,@r[0] 2301f13597dSJung-uk Kim eor $hi,$hi,@r[2] 2311f13597dSJung-uk Kim eor $lo,$lo,@r[3] 2321f13597dSJung-uk Kim eor $hi,$hi,@r[3] 2331f13597dSJung-uk Kim str $hi,[$ret,#8] 2341f13597dSJung-uk Kim eor $lo,$lo,$hi 2351f13597dSJung-uk Kim str $lo,[$ret,#4] 2361f13597dSJung-uk Kim 2371f13597dSJung-uk Kim#if __ARM_ARCH__>=5 2381f13597dSJung-uk Kim ldmia sp!,{r4-r10,pc} 2391f13597dSJung-uk Kim#else 2401f13597dSJung-uk Kim ldmia sp!,{r4-r10,lr} 2411f13597dSJung-uk Kim tst lr,#1 2421f13597dSJung-uk Kim moveq pc,lr @ be binary compatible with V4, yet 2431f13597dSJung-uk Kim bx lr @ interoperable with Thumb ISA:-) 2441f13597dSJung-uk Kim#endif 2457bded2dbSJung-uk Kim___ 2467bded2dbSJung-uk Kim} 2477bded2dbSJung-uk Kim{ 2487bded2dbSJung-uk Kimmy ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12)); 2497bded2dbSJung-uk Kimmy ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31)); 2507bded2dbSJung-uk Kim 2517bded2dbSJung-uk Kim$code.=<<___; 2527bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 2537bded2dbSJung-uk Kim.arch armv7-a 2547bded2dbSJung-uk Kim.fpu neon 2557bded2dbSJung-uk Kim 2567bded2dbSJung-uk Kim.align 5 2577bded2dbSJung-uk Kim.LNEON: 2587bded2dbSJung-uk Kim ldr r12, [sp] @ 5th argument 25943e4bca7SJung-uk Kim vmov $a, r2, r1 26043e4bca7SJung-uk Kim vmov $b, r12, r3 2617bded2dbSJung-uk Kim vmov.i64 $k48, #0x0000ffffffffffff 2627bded2dbSJung-uk Kim vmov.i64 $k32, #0x00000000ffffffff 2637bded2dbSJung-uk Kim vmov.i64 $k16, #0x000000000000ffff 2647bded2dbSJung-uk Kim 2657bded2dbSJung-uk Kim vext.8 $t0#lo, $a, $a, #1 @ A1 2667bded2dbSJung-uk Kim vmull.p8 $t0, $t0#lo, $b @ F = A1*B 2677bded2dbSJung-uk Kim vext.8 $r#lo, $b, $b, #1 @ B1 2687bded2dbSJung-uk Kim vmull.p8 $r, $a, $r#lo @ E = A*B1 2697bded2dbSJung-uk Kim vext.8 $t1#lo, $a, $a, #2 @ A2 2707bded2dbSJung-uk Kim vmull.p8 $t1, $t1#lo, $b @ H = A2*B 2717bded2dbSJung-uk Kim vext.8 $t3#lo, $b, $b, #2 @ B2 2727bded2dbSJung-uk Kim vmull.p8 $t3, $a, $t3#lo @ G = A*B2 2737bded2dbSJung-uk Kim vext.8 $t2#lo, $a, $a, #3 @ A3 2747bded2dbSJung-uk Kim veor $t0, $t0, $r @ L = E + F 2757bded2dbSJung-uk Kim vmull.p8 $t2, $t2#lo, $b @ J = A3*B 2767bded2dbSJung-uk Kim vext.8 $r#lo, $b, $b, #3 @ B3 2777bded2dbSJung-uk Kim veor $t1, $t1, $t3 @ M = G + H 2787bded2dbSJung-uk Kim vmull.p8 $r, $a, $r#lo @ I = A*B3 2797bded2dbSJung-uk Kim veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8 2807bded2dbSJung-uk Kim vand $t0#hi, $t0#hi, $k48 2817bded2dbSJung-uk Kim vext.8 $t3#lo, $b, $b, #4 @ B4 2827bded2dbSJung-uk Kim veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16 2837bded2dbSJung-uk Kim vand $t1#hi, $t1#hi, $k32 2847bded2dbSJung-uk Kim vmull.p8 $t3, $a, $t3#lo @ K = A*B4 2857bded2dbSJung-uk Kim veor $t2, $t2, $r @ N = I + J 2867bded2dbSJung-uk Kim veor $t0#lo, $t0#lo, $t0#hi 2877bded2dbSJung-uk Kim veor $t1#lo, $t1#lo, $t1#hi 2887bded2dbSJung-uk Kim veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24 2897bded2dbSJung-uk Kim vand $t2#hi, $t2#hi, $k16 2907bded2dbSJung-uk Kim vext.8 $t0, $t0, $t0, #15 2917bded2dbSJung-uk Kim veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32 2927bded2dbSJung-uk Kim vmov.i64 $t3#hi, #0 2937bded2dbSJung-uk Kim vext.8 $t1, $t1, $t1, #14 2947bded2dbSJung-uk Kim veor $t2#lo, $t2#lo, $t2#hi 2957bded2dbSJung-uk Kim vmull.p8 $r, $a, $b @ D = A*B 2967bded2dbSJung-uk Kim vext.8 $t3, $t3, $t3, #12 2977bded2dbSJung-uk Kim vext.8 $t2, $t2, $t2, #13 2987bded2dbSJung-uk Kim veor $t0, $t0, $t1 2997bded2dbSJung-uk Kim veor $t2, $t2, $t3 3007bded2dbSJung-uk Kim veor $r, $r, $t0 3017bded2dbSJung-uk Kim veor $r, $r, $t2 3027bded2dbSJung-uk Kim 3037bded2dbSJung-uk Kim vst1.32 {$r}, [r0] 3047bded2dbSJung-uk Kim ret @ bx lr 3057bded2dbSJung-uk Kim#endif 3067bded2dbSJung-uk Kim___ 3077bded2dbSJung-uk Kim} 3087bded2dbSJung-uk Kim$code.=<<___; 3091f13597dSJung-uk Kim.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 3107bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 3111f13597dSJung-uk Kim.align 5 3121f13597dSJung-uk Kim.LOPENSSL_armcap: 313e71b7053SJung-uk Kim.word OPENSSL_armcap_P-. 3141f13597dSJung-uk Kim#endif 3151f13597dSJung-uk Kim.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" 3161f13597dSJung-uk Kim.align 5 3171f13597dSJung-uk Kim 3187bded2dbSJung-uk Kim#if __ARM_MAX_ARCH__>=7 3191f13597dSJung-uk Kim.comm OPENSSL_armcap_P,4,4 3207bded2dbSJung-uk Kim#endif 3211f13597dSJung-uk Kim___ 3221f13597dSJung-uk Kim 3237bded2dbSJung-uk Kimforeach (split("\n",$code)) { 3247bded2dbSJung-uk Kim s/\`([^\`]*)\`/eval $1/geo; 3257bded2dbSJung-uk Kim 3267bded2dbSJung-uk Kim s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or 3277bded2dbSJung-uk Kim s/\bret\b/bx lr/go or 3287bded2dbSJung-uk Kim s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4 3297bded2dbSJung-uk Kim 3307bded2dbSJung-uk Kim print $_,"\n"; 3317bded2dbSJung-uk Kim} 3321f13597dSJung-uk Kimclose STDOUT; # enforce flush 333