1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# sha1_block for Thumb. 11# 12# January 2007. 13# 14# The code does not present direct interest to OpenSSL, because of low 15# performance. Its purpose is to establish _size_ benchmark. Pretty 16# useless one I must say, because 30% or 88 bytes larger ARMv4 code 17# [avialable on demand] is almost _twice_ as fast. It should also be 18# noted that in-lining of .Lcommon and .Lrotate improves performance 19# by over 40%, while code increases by only 10% or 32 bytes. But once 20# again, the goal was to establish _size_ benchmark, not performance. 21 22$output=shift; 23open STDOUT,">$output"; 24 25$inline=0; 26#$cheat_on_binutils=1; 27 28$t0="r0"; 29$t1="r1"; 30$t2="r2"; 31$a="r3"; 32$b="r4"; 33$c="r5"; 34$d="r6"; 35$e="r7"; 36$K="r8"; # "upper" registers can be used in add/sub and mov insns 37$ctx="r9"; 38$inp="r10"; 39$len="r11"; 40$Xi="r12"; 41 42sub common { 43<<___; 44 sub $t0,#4 45 ldr $t1,[$t0] 46 add $e,$K @ E+=K_xx_xx 47 lsl $t2,$a,#5 48 add $t2,$e 49 lsr $e,$a,#27 50 add $t2,$e @ E+=ROR(A,27) 51 add $t2,$t1 @ E+=X[i] 52___ 53} 54sub rotate { 55<<___; 56 mov $e,$d @ E=D 57 mov $d,$c @ D=C 58 lsl $c,$b,#30 59 lsr $b,$b,#2 60 orr $c,$b @ C=ROR(B,2) 61 mov $b,$a @ B=A 62 add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) 63___ 64} 65 66sub BODY_00_19 { 67$code.=$inline?&common():"\tbl .Lcommon\n"; 68$code.=<<___; 69 mov $t1,$c 70 eor $t1,$d 71 and $t1,$b 72 eor $t1,$d @ F_00_19(B,C,D) 73___ 74$code.=$inline?&rotate():"\tbl .Lrotate\n"; 75} 76 77sub BODY_20_39 { 78$code.=$inline?&common():"\tbl .Lcommon\n"; 79$code.=<<___; 80 mov $t1,$b 81 eor $t1,$c 82 eor $t1,$d @ F_20_39(B,C,D) 83___ 84$code.=$inline?&rotate():"\tbl .Lrotate\n"; 85} 86 87sub BODY_40_59 { 88$code.=$inline?&common():"\tbl .Lcommon\n"; 89$code.=<<___; 90 mov $t1,$b 91 and $t1,$c 92 mov $e,$b 93 orr $e,$c 94 and $e,$d 95 orr $t1,$e @ F_40_59(B,C,D) 96___ 97$code.=$inline?&rotate():"\tbl .Lrotate\n"; 98} 99 100$code=<<___; 101.text 102.code 16 103 104.global sha1_block_data_order 105.type sha1_block_data_order,%function 106 107.align 2 108sha1_block_data_order: 109___ 110if ($cheat_on_binutils) { 111$code.=<<___; 112.code 32 113 add r3,pc,#1 114 bx r3 @ switch to Thumb ISA 115.code 16 116___ 117} 118$code.=<<___; 119 push {r4-r7} 120 mov r3,r8 121 mov r4,r9 122 mov r5,r10 123 mov r6,r11 124 mov r7,r12 125 push {r3-r7,lr} 126 lsl r2,#6 127 mov $ctx,r0 @ save context 128 mov $inp,r1 @ save inp 129 mov $len,r2 @ save len 130 add $len,$inp @ $len to point at inp end 131 132.Lloop: 133 mov $Xi,sp 134 mov $t2,sp 135 sub $t2,#16*4 @ [3] 136.LXload: 137 ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp 138 ldrb $b,[$t1,#1] 139 ldrb $c,[$t1,#2] 140 ldrb $d,[$t1,#3] 141 lsl $a,#24 142 lsl $b,#16 143 lsl $c,#8 144 orr $a,$b 145 orr $a,$c 146 orr $a,$d 147 add $t1,#4 148 push {$a} 149 cmp sp,$t2 150 bne .LXload @ [+14*16] 151 152 mov $inp,$t1 @ update $inp 153 sub $t2,#32*4 154 sub $t2,#32*4 155 mov $e,#31 @ [+4] 156.LXupdate: 157 ldr $a,[sp,#15*4] 158 ldr $b,[sp,#13*4] 159 ldr $c,[sp,#7*4] 160 ldr $d,[sp,#2*4] 161 eor $a,$b 162 eor $a,$c 163 eor $a,$d 164 ror $a,$e 165 push {$a} 166 cmp sp,$t2 167 bne .LXupdate @ [+(11+1)*64] 168 169 ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx 170 mov $t0,$Xi 171 172 ldr $t2,.LK_00_19 173 mov $t1,$t0 174 sub $t1,#20*4 175 mov $Xi,$t1 176 mov $K,$t2 @ [+7+4] 177.L_00_19: 178___ 179 &BODY_00_19(); 180$code.=<<___; 181 cmp $Xi,$t0 182 bne .L_00_19 @ [+(2+9+4+2+8+2)*20] 183 184 ldr $t2,.LK_20_39 185 mov $t1,$t0 186 sub $t1,#20*4 187 mov $Xi,$t1 188 mov $K,$t2 @ [+5] 189.L_20_39_or_60_79: 190___ 191 &BODY_20_39(); 192$code.=<<___; 193 cmp $Xi,$t0 194 bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] 195 cmp sp,$t0 196 beq .Ldone @ [+2] 197 198 ldr $t2,.LK_40_59 199 mov $t1,$t0 200 sub $t1,#20*4 201 mov $Xi,$t1 202 mov $K,$t2 @ [+5] 203.L_40_59: 204___ 205 &BODY_40_59(); 206$code.=<<___; 207 cmp $Xi,$t0 208 bne .L_40_59 @ [+(2+9+6+2+8+2)*20] 209 210 ldr $t2,.LK_60_79 211 mov $Xi,sp 212 mov $K,$t2 213 b .L_20_39_or_60_79 @ [+4] 214.Ldone: 215 mov $t0,$ctx 216 ldr $t1,[$t0,#0] 217 ldr $t2,[$t0,#4] 218 add $a,$t1 219 ldr $t1,[$t0,#8] 220 add $b,$t2 221 ldr $t2,[$t0,#12] 222 add $c,$t1 223 ldr $t1,[$t0,#16] 224 add $d,$t2 225 add $e,$t1 226 stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] 227 228 add sp,#80*4 @ deallocate stack frame 229 mov $t0,$ctx @ restore ctx 230 mov $t1,$inp @ restore inp 231 cmp $t1,$len 232 beq .Lexit 233 b .Lloop @ [+6] total 3212 cycles 234.Lexit: 235 pop {r2-r7} 236 mov r8,r2 237 mov r9,r3 238 mov r10,r4 239 mov r11,r5 240 mov r12,r6 241 mov lr,r7 242 pop {r4-r7} 243 bx lr 244.align 2 245___ 246$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); 247$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); 248$code.=<<___; 249.align 2 250.LK_00_19: .word 0x5a827999 251.LK_20_39: .word 0x6ed9eba1 252.LK_40_59: .word 0x8f1bbcdc 253.LK_60_79: .word 0xca62c1d6 254.size sha1_block_data_order,.-sha1_block_data_order 255.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" 256___ 257 258print $code; 259close STDOUT; # enforce flush 260