1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9# 10# October 2012 11# 12# The module implements bn_GF2m_mul_2x2 polynomial multiplication used 13# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for 14# the time being... Except that it has two code paths: one suitable 15# for all SPARCv9 processors and one for VIS3-capable ones. Former 16# delivers ~25-45% more, more for longer keys, heaviest DH and DSA 17# verify operations on venerable UltraSPARC II. On T4 VIS3 code is 18# ~100-230% faster than gcc-generated code and ~35-90% faster than 19# the pure SPARCv9 code path. 20 21$locals=16*8; 22 23$tab="%l0"; 24 25@T=("%g2","%g3"); 26@i=("%g4","%g5"); 27 28($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5)); 29($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo; 30 31$code.=<<___; 32#include <sparc_arch.h> 33 34#ifdef __arch64__ 35.register %g2,#scratch 36.register %g3,#scratch 37#endif 38 39#ifdef __PIC__ 40SPARC_PIC_THUNK(%g1) 41#endif 42 43.globl bn_GF2m_mul_2x2 44.align 16 45bn_GF2m_mul_2x2: 46 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 47 ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0] 48 49 andcc %g1, SPARCV9_VIS3, %g0 50 bz,pn %icc,.Lsoftware 51 nop 52 53 sllx %o1, 32, %o1 54 sllx %o3, 32, %o3 55 or %o2, %o1, %o1 56 or %o4, %o3, %o3 57 .word 0x95b262ab ! xmulx %o1, %o3, %o2 58 .word 0x99b262cb ! xmulxhi %o1, %o3, %o4 59 srlx %o2, 32, %o1 ! 13 cycles later 60 st %o2, [%o0+0] 61 st %o1, [%o0+4] 62 srlx %o4, 32, %o3 63 st %o4, [%o0+8] 64 retl 65 st %o3, [%o0+12] 66 67.align 16 68.Lsoftware: 69 save %sp,-STACK_FRAME-$locals,%sp 70 71 sllx %i1,32,$a 72 mov -1,$a12 73 sllx %i3,32,$b 74 or %i2,$a,$a 75 srlx $a12,1,$a48 ! 0x7fff... 76 or %i4,$b,$b 77 srlx $a12,2,$a12 ! 0x3fff... 78 add %sp,STACK_BIAS+STACK_FRAME,$tab 79 80 sllx $a,2,$a4 81 mov $a,$a1 82 sllx $a,1,$a2 83 84 srax $a4,63,@i[1] ! broadcast 61st bit 85 and $a48,$a4,$a4 ! (a<<2)&0x7fff... 86 srlx $a48,2,$a48 87 srax $a2,63,@i[0] ! broadcast 62nd bit 88 and $a12,$a2,$a2 ! (a<<1)&0x3fff... 89 srax $a1,63,$lo ! broadcast 63rd bit 90 and $a48,$a1,$a1 ! (a<<0)&0x1fff... 91 92 sllx $a1,3,$a8 93 and $b,$lo,$lo 94 and $b,@i[0],@i[0] 95 and $b,@i[1],@i[1] 96 97 stx %g0,[$tab+0*8] ! tab[0]=0 98 xor $a1,$a2,$a12 99 stx $a1,[$tab+1*8] ! tab[1]=a1 100 stx $a2,[$tab+2*8] ! tab[2]=a2 101 xor $a4,$a8,$a48 102 stx $a12,[$tab+3*8] ! tab[3]=a1^a2 103 xor $a4,$a1,$a1 104 105 stx $a4,[$tab+4*8] ! tab[4]=a4 106 xor $a4,$a2,$a2 107 stx $a1,[$tab+5*8] ! tab[5]=a1^a4 108 xor $a4,$a12,$a12 109 stx $a2,[$tab+6*8] ! tab[6]=a2^a4 110 xor $a48,$a1,$a1 111 stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4 112 xor $a48,$a2,$a2 113 114 stx $a8,[$tab+8*8] ! tab[8]=a8 115 xor $a48,$a12,$a12 116 stx $a1,[$tab+9*8] ! tab[9]=a1^a8 117 xor $a4,$a1,$a1 118 stx $a2,[$tab+10*8] ! tab[10]=a2^a8 119 xor $a4,$a2,$a2 120 stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8 121 122 xor $a4,$a12,$a12 123 stx $a48,[$tab+12*8] ! tab[12]=a4^a8 124 srlx $lo,1,$hi 125 stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8 126 sllx $lo,63,$lo 127 stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8 128 srlx @i[0],2,@T[0] 129 stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8 130 131 sllx @i[0],62,$a1 132 sllx $b,3,@i[0] 133 srlx @i[1],3,@T[1] 134 and @i[0],`0xf<<3`,@i[0] 135 sllx @i[1],61,$a2 136 ldx [$tab+@i[0]],@i[0] 137 srlx $b,4-3,@i[1] 138 xor @T[0],$hi,$hi 139 and @i[1],`0xf<<3`,@i[1] 140 xor $a1,$lo,$lo 141 ldx [$tab+@i[1]],@i[1] 142 xor @T[1],$hi,$hi 143 144 xor @i[0],$lo,$lo 145 srlx $b,8-3,@i[0] 146 xor $a2,$lo,$lo 147 and @i[0],`0xf<<3`,@i[0] 148___ 149for($n=1;$n<14;$n++) { 150$code.=<<___; 151 sllx @i[1],`$n*4`,@T[0] 152 ldx [$tab+@i[0]],@i[0] 153 srlx @i[1],`64-$n*4`,@T[1] 154 xor @T[0],$lo,$lo 155 srlx $b,`($n+2)*4`-3,@i[1] 156 xor @T[1],$hi,$hi 157 and @i[1],`0xf<<3`,@i[1] 158___ 159 push(@i,shift(@i)); push(@T,shift(@T)); 160} 161$code.=<<___; 162 sllx @i[1],`$n*4`,@T[0] 163 ldx [$tab+@i[0]],@i[0] 164 srlx @i[1],`64-$n*4`,@T[1] 165 xor @T[0],$lo,$lo 166 167 sllx @i[0],`($n+1)*4`,@T[0] 168 xor @T[1],$hi,$hi 169 srlx @i[0],`64-($n+1)*4`,@T[1] 170 xor @T[0],$lo,$lo 171 xor @T[1],$hi,$hi 172 173 srlx $lo,32,%i1 174 st $lo,[%i0+0] 175 st %i1,[%i0+4] 176 srlx $hi,32,%i2 177 st $hi,[%i0+8] 178 st %i2,[%i0+12] 179 180 ret 181 restore 182.type bn_GF2m_mul_2x2,#function 183.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 184.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" 185.align 4 186___ 187 188$code =~ s/\`([^\`]*)\`/eval($1)/gem; 189print $code; 190close STDOUT; 191