1#! /usr/bin/env perl 2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# SHA1 block procedure for MIPS. 18 19# Performance improvement is 30% on unaligned input. The "secret" is 20# to deploy lwl/lwr pair to load unaligned input. One could have 21# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 22# compatible subroutine. There is room for minor optimization on 23# little-endian platforms... 24 25# September 2012. 26# 27# Add MIPS32r2 code (>25% less instructions). 28 29###################################################################### 30# There is a number of MIPS ABI in use, O32 and N32/64 are most 31# widely used. Then there is a new contender: NUBI. It appears that if 32# one picks the latter, it's possible to arrange code in ABI neutral 33# manner. Therefore let's stick to NUBI register layout: 34# 35($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 36($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 37($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 38($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 39# 40# The return value is placed in $a0. Following coding rules facilitate 41# interoperability: 42# 43# - never ever touch $tp, "thread pointer", former $gp; 44# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 45# old code]; 46# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 47# 48# For reference here is register layout for N32/64 MIPS ABIs: 49# 50# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 51# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 52# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 53# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 54# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 55# 56$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 57 58if ($flavour =~ /64|n32/i) { 59 $PTR_ADD="daddu"; # incidentally works even on n32 60 $PTR_SUB="dsubu"; # incidentally works even on n32 61 $REG_S="sd"; 62 $REG_L="ld"; 63 $PTR_SLL="dsll"; # incidentally works even on n32 64 $SZREG=8; 65} else { 66 $PTR_ADD="addu"; 67 $PTR_SUB="subu"; 68 $REG_S="sw"; 69 $REG_L="lw"; 70 $PTR_SLL="sll"; 71 $SZREG=4; 72} 73# 74# <appro@openssl.org> 75# 76###################################################################### 77 78$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC}); 79 80for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); } 81open STDOUT,">$output"; 82 83if (!defined($big_endian)) 84 { $big_endian=(unpack('L',pack('N',1))==1); } 85 86# offsets of the Most and Least Significant Bytes 87$MSB=$big_endian?0:3; 88$LSB=3&~$MSB; 89 90@X=map("\$$_",(8..23)); # a4-a7,s0-s11 91 92$ctx=$a0; 93$inp=$a1; 94$num=$a2; 95$A="\$1"; 96$B="\$2"; 97$C="\$3"; 98$D="\$7"; 99$E="\$24"; @V=($A,$B,$C,$D,$E); 100$t0="\$25"; 101$t1=$num; # $num is offloaded to stack 102$t2="\$30"; # fp 103$K="\$31"; # ra 104 105sub BODY_00_14 { 106my ($i,$a,$b,$c,$d,$e)=@_; 107my $j=$i+1; 108$code.=<<___ if (!$big_endian); 109#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 110 wsbh @X[$i],@X[$i] # byte swap($i) 111 rotr @X[$i],@X[$i],16 112#else 113 srl $t0,@X[$i],24 # byte swap($i) 114 srl $t1,@X[$i],8 115 andi $t2,@X[$i],0xFF00 116 sll @X[$i],@X[$i],24 117 andi $t1,0xFF00 118 sll $t2,$t2,8 119 or @X[$i],$t0 120 or $t1,$t2 121 or @X[$i],$t1 122#endif 123___ 124$code.=<<___; 125#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 126 addu $e,$K # $i 127 xor $t0,$c,$d 128 rotr $t1,$a,27 129 and $t0,$b 130 addu $e,$t1 131#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) 132 lw @X[$j],$j*4($inp) 133#else 134 lwl @X[$j],$j*4+$MSB($inp) 135 lwr @X[$j],$j*4+$LSB($inp) 136#endif 137 xor $t0,$d 138 addu $e,@X[$i] 139 rotr $b,$b,2 140 addu $e,$t0 141#else 142 lwl @X[$j],$j*4+$MSB($inp) 143 sll $t0,$a,5 # $i 144 addu $e,$K 145 lwr @X[$j],$j*4+$LSB($inp) 146 srl $t1,$a,27 147 addu $e,$t0 148 xor $t0,$c,$d 149 addu $e,$t1 150 sll $t2,$b,30 151 and $t0,$b 152 srl $b,$b,2 153 xor $t0,$d 154 addu $e,@X[$i] 155 or $b,$t2 156 addu $e,$t0 157#endif 158___ 159} 160 161sub BODY_15_19 { 162my ($i,$a,$b,$c,$d,$e)=@_; 163my $j=$i+1; 164 165$code.=<<___ if (!$big_endian && $i==15); 166#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 167 wsbh @X[$i],@X[$i] # byte swap($i) 168 rotr @X[$i],@X[$i],16 169#else 170 srl $t0,@X[$i],24 # byte swap($i) 171 srl $t1,@X[$i],8 172 andi $t2,@X[$i],0xFF00 173 sll @X[$i],@X[$i],24 174 andi $t1,0xFF00 175 sll $t2,$t2,8 176 or @X[$i],$t0 177 or @X[$i],$t1 178 or @X[$i],$t2 179#endif 180___ 181$code.=<<___; 182#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 183 addu $e,$K # $i 184 xor @X[$j%16],@X[($j+2)%16] 185 xor $t0,$c,$d 186 rotr $t1,$a,27 187 xor @X[$j%16],@X[($j+8)%16] 188 and $t0,$b 189 addu $e,$t1 190 xor @X[$j%16],@X[($j+13)%16] 191 xor $t0,$d 192 addu $e,@X[$i%16] 193 rotr @X[$j%16],@X[$j%16],31 194 rotr $b,$b,2 195 addu $e,$t0 196#else 197 xor @X[$j%16],@X[($j+2)%16] 198 sll $t0,$a,5 # $i 199 addu $e,$K 200 srl $t1,$a,27 201 addu $e,$t0 202 xor @X[$j%16],@X[($j+8)%16] 203 xor $t0,$c,$d 204 addu $e,$t1 205 xor @X[$j%16],@X[($j+13)%16] 206 sll $t2,$b,30 207 and $t0,$b 208 srl $t1,@X[$j%16],31 209 addu @X[$j%16],@X[$j%16] 210 srl $b,$b,2 211 xor $t0,$d 212 or @X[$j%16],$t1 213 addu $e,@X[$i%16] 214 or $b,$t2 215 addu $e,$t0 216#endif 217___ 218} 219 220sub BODY_20_39 { 221my ($i,$a,$b,$c,$d,$e)=@_; 222my $j=$i+1; 223$code.=<<___ if ($i<79); 224#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 225 xor @X[$j%16],@X[($j+2)%16] 226 addu $e,$K # $i 227 rotr $t1,$a,27 228 xor @X[$j%16],@X[($j+8)%16] 229 xor $t0,$c,$d 230 addu $e,$t1 231 xor @X[$j%16],@X[($j+13)%16] 232 xor $t0,$b 233 addu $e,@X[$i%16] 234 rotr @X[$j%16],@X[$j%16],31 235 rotr $b,$b,2 236 addu $e,$t0 237#else 238 xor @X[$j%16],@X[($j+2)%16] 239 sll $t0,$a,5 # $i 240 addu $e,$K 241 srl $t1,$a,27 242 addu $e,$t0 243 xor @X[$j%16],@X[($j+8)%16] 244 xor $t0,$c,$d 245 addu $e,$t1 246 xor @X[$j%16],@X[($j+13)%16] 247 sll $t2,$b,30 248 xor $t0,$b 249 srl $t1,@X[$j%16],31 250 addu @X[$j%16],@X[$j%16] 251 srl $b,$b,2 252 addu $e,@X[$i%16] 253 or @X[$j%16],$t1 254 or $b,$t2 255 addu $e,$t0 256#endif 257___ 258$code.=<<___ if ($i==79); 259#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 260 lw @X[0],0($ctx) 261 addu $e,$K # $i 262 lw @X[1],4($ctx) 263 rotr $t1,$a,27 264 lw @X[2],8($ctx) 265 xor $t0,$c,$d 266 addu $e,$t1 267 lw @X[3],12($ctx) 268 xor $t0,$b 269 addu $e,@X[$i%16] 270 lw @X[4],16($ctx) 271 rotr $b,$b,2 272 addu $e,$t0 273#else 274 lw @X[0],0($ctx) 275 sll $t0,$a,5 # $i 276 addu $e,$K 277 lw @X[1],4($ctx) 278 srl $t1,$a,27 279 addu $e,$t0 280 lw @X[2],8($ctx) 281 xor $t0,$c,$d 282 addu $e,$t1 283 lw @X[3],12($ctx) 284 sll $t2,$b,30 285 xor $t0,$b 286 lw @X[4],16($ctx) 287 srl $b,$b,2 288 addu $e,@X[$i%16] 289 or $b,$t2 290 addu $e,$t0 291#endif 292___ 293} 294 295sub BODY_40_59 { 296my ($i,$a,$b,$c,$d,$e)=@_; 297my $j=$i+1; 298$code.=<<___ if ($i<79); 299#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) 300 addu $e,$K # $i 301 and $t0,$c,$d 302 xor @X[$j%16],@X[($j+2)%16] 303 rotr $t1,$a,27 304 addu $e,$t0 305 xor @X[$j%16],@X[($j+8)%16] 306 xor $t0,$c,$d 307 addu $e,$t1 308 xor @X[$j%16],@X[($j+13)%16] 309 and $t0,$b 310 addu $e,@X[$i%16] 311 rotr @X[$j%16],@X[$j%16],31 312 rotr $b,$b,2 313 addu $e,$t0 314#else 315 xor @X[$j%16],@X[($j+2)%16] 316 sll $t0,$a,5 # $i 317 addu $e,$K 318 srl $t1,$a,27 319 addu $e,$t0 320 xor @X[$j%16],@X[($j+8)%16] 321 and $t0,$c,$d 322 addu $e,$t1 323 xor @X[$j%16],@X[($j+13)%16] 324 sll $t2,$b,30 325 addu $e,$t0 326 srl $t1,@X[$j%16],31 327 xor $t0,$c,$d 328 addu @X[$j%16],@X[$j%16] 329 and $t0,$b 330 srl $b,$b,2 331 or @X[$j%16],$t1 332 addu $e,@X[$i%16] 333 or $b,$t2 334 addu $e,$t0 335#endif 336___ 337} 338 339$FRAMESIZE=16; # large enough to accommodate NUBI saved registers 340$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000"; 341 342$code=<<___; 343#include "mips_arch.h" 344 345.text 346 347.set noat 348.set noreorder 349.align 5 350.globl sha1_block_data_order 351.ent sha1_block_data_order 352sha1_block_data_order: 353 .frame $sp,$FRAMESIZE*$SZREG,$ra 354 .mask $SAVED_REGS_MASK,-$SZREG 355 .set noreorder 356 $PTR_SUB $sp,$FRAMESIZE*$SZREG 357 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 358 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 359 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 360 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 361 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 362 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 363 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 364 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 365 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 366 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 367___ 368$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 369 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 370 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 371 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 372 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 373 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 374___ 375$code.=<<___; 376 $PTR_SLL $num,6 377 $PTR_ADD $num,$inp 378 $REG_S $num,0($sp) 379 lw $A,0($ctx) 380 lw $B,4($ctx) 381 lw $C,8($ctx) 382 lw $D,12($ctx) 383 b .Loop 384 lw $E,16($ctx) 385.align 4 386.Loop: 387 .set reorder 388#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) 389 lui $K,0x5a82 390 lw @X[0],($inp) 391 ori $K,0x7999 # K_00_19 392#else 393 lwl @X[0],$MSB($inp) 394 lui $K,0x5a82 395 lwr @X[0],$LSB($inp) 396 ori $K,0x7999 # K_00_19 397#endif 398___ 399for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 400for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 401$code.=<<___; 402 lui $K,0x6ed9 403 ori $K,0xeba1 # K_20_39 404___ 405for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 406$code.=<<___; 407 lui $K,0x8f1b 408 ori $K,0xbcdc # K_40_59 409___ 410for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 411$code.=<<___; 412 lui $K,0xca62 413 ori $K,0xc1d6 # K_60_79 414___ 415for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 416$code.=<<___; 417 $PTR_ADD $inp,64 418 $REG_L $num,0($sp) 419 420 addu $A,$X[0] 421 addu $B,$X[1] 422 sw $A,0($ctx) 423 addu $C,$X[2] 424 addu $D,$X[3] 425 sw $B,4($ctx) 426 addu $E,$X[4] 427 sw $C,8($ctx) 428 sw $D,12($ctx) 429 sw $E,16($ctx) 430 .set noreorder 431 bne $inp,$num,.Loop 432 nop 433 434 .set noreorder 435 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 436 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 437 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 438 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 439 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 440 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 441 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 442 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 443 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 444 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 445___ 446$code.=<<___ if ($flavour =~ /nubi/i); 447 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 448 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 449 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 450 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 451 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 452___ 453$code.=<<___; 454 jr $ra 455 $PTR_ADD $sp,$FRAMESIZE*$SZREG 456.end sha1_block_data_order 457.rdata 458.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 459___ 460print $code; 461close STDOUT or die "error closing STDOUT: $!"; 462