1#include <sparc_arch.h> 2 3#ifdef __arch64__ 4.register %g2,#scratch 5.register %g3,#scratch 6#endif 7 8#ifdef __PIC__ 9SPARC_PIC_THUNK(%g1) 10#endif 11 12.globl bn_GF2m_mul_2x2 13.align 16 14bn_GF2m_mul_2x2: 15 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 16 ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0] 17 18 andcc %g1, SPARCV9_VIS3, %g0 19 bz,pn %icc,.Lsoftware 20 nop 21 22 sllx %o1, 32, %o1 23 sllx %o3, 32, %o3 24 or %o2, %o1, %o1 25 or %o4, %o3, %o3 26 .word 0x95b262ab ! xmulx %o1, %o3, %o2 27 .word 0x99b262cb ! xmulxhi %o1, %o3, %o4 28 srlx %o2, 32, %o1 ! 13 cycles later 29 st %o2, [%o0+0] 30 st %o1, [%o0+4] 31 srlx %o4, 32, %o3 32 st %o4, [%o0+8] 33 retl 34 st %o3, [%o0+12] 35 36.align 16 37.Lsoftware: 38 save %sp,-STACK_FRAME-128,%sp 39 40 sllx %i1,32,%g1 41 mov -1,%o4 42 sllx %i3,32,%o7 43 or %i2,%g1,%g1 44 srlx %o4,1,%o5 ! 0x7fff... 45 or %i4,%o7,%o7 46 srlx %o4,2,%o4 ! 0x3fff... 47 add %sp,STACK_BIAS+STACK_FRAME,%l0 48 49 sllx %g1,2,%o2 50 mov %g1,%o0 51 sllx %g1,1,%o1 52 53 srax %o2,63,%g5 ! broadcast 61st bit 54 and %o5,%o2,%o2 ! (a<<2)&0x7fff... 55 srlx %o5,2,%o5 56 srax %o1,63,%g4 ! broadcast 62nd bit 57 and %o4,%o1,%o1 ! (a<<1)&0x3fff... 58 srax %o0,63,%g1 ! broadcast 63rd bit 59 and %o5,%o0,%o0 ! (a<<0)&0x1fff... 60 61 sllx %o0,3,%o3 62 and %o7,%g1,%g1 63 and %o7,%g4,%g4 64 and %o7,%g5,%g5 65 66 stx %g0,[%l0+0*8] ! tab[0]=0 67 xor %o0,%o1,%o4 68 stx %o0,[%l0+1*8] ! tab[1]=a1 69 stx %o1,[%l0+2*8] ! tab[2]=a2 70 xor %o2,%o3,%o5 71 stx %o4,[%l0+3*8] ! tab[3]=a1^a2 72 xor %o2,%o0,%o0 73 74 stx %o2,[%l0+4*8] ! tab[4]=a4 75 xor %o2,%o1,%o1 76 stx %o0,[%l0+5*8] ! tab[5]=a1^a4 77 xor %o2,%o4,%o4 78 stx %o1,[%l0+6*8] ! tab[6]=a2^a4 79 xor %o5,%o0,%o0 80 stx %o4,[%l0+7*8] ! tab[7]=a1^a2^a4 81 xor %o5,%o1,%o1 82 83 stx %o3,[%l0+8*8] ! tab[8]=a8 84 xor %o5,%o4,%o4 85 stx %o0,[%l0+9*8] ! tab[9]=a1^a8 86 xor %o2,%o0,%o0 87 stx %o1,[%l0+10*8] ! tab[10]=a2^a8 88 xor %o2,%o1,%o1 89 stx %o4,[%l0+11*8] ! tab[11]=a1^a2^a8 90 91 xor %o2,%o4,%o4 92 stx %o5,[%l0+12*8] ! tab[12]=a4^a8 93 srlx %g1,1,%o3 94 stx %o0,[%l0+13*8] ! tab[13]=a1^a4^a8 95 sllx %g1,63,%g1 96 stx %o1,[%l0+14*8] ! tab[14]=a2^a4^a8 97 srlx %g4,2,%g2 98 stx %o4,[%l0+15*8] ! tab[15]=a1^a2^a4^a8 99 100 sllx %g4,62,%o0 101 sllx %o7,3,%g4 102 srlx %g5,3,%g3 103 and %g4,120,%g4 104 sllx %g5,61,%o1 105 ldx [%l0+%g4],%g4 106 srlx %o7,4-3,%g5 107 xor %g2,%o3,%o3 108 and %g5,120,%g5 109 xor %o0,%g1,%g1 110 ldx [%l0+%g5],%g5 111 xor %g3,%o3,%o3 112 113 xor %g4,%g1,%g1 114 srlx %o7,8-3,%g4 115 xor %o1,%g1,%g1 116 and %g4,120,%g4 117 sllx %g5,4,%g2 118 ldx [%l0+%g4],%g4 119 srlx %g5,60,%g3 120 xor %g2,%g1,%g1 121 srlx %o7,12-3,%g5 122 xor %g3,%o3,%o3 123 and %g5,120,%g5 124 sllx %g4,8,%g3 125 ldx [%l0+%g5],%g5 126 srlx %g4,56,%g2 127 xor %g3,%g1,%g1 128 srlx %o7,16-3,%g4 129 xor %g2,%o3,%o3 130 and %g4,120,%g4 131 sllx %g5,12,%g2 132 ldx [%l0+%g4],%g4 133 srlx %g5,52,%g3 134 xor %g2,%g1,%g1 135 srlx %o7,20-3,%g5 136 xor %g3,%o3,%o3 137 and %g5,120,%g5 138 sllx %g4,16,%g3 139 ldx [%l0+%g5],%g5 140 srlx %g4,48,%g2 141 xor %g3,%g1,%g1 142 srlx %o7,24-3,%g4 143 xor %g2,%o3,%o3 144 and %g4,120,%g4 145 sllx %g5,20,%g2 146 ldx [%l0+%g4],%g4 147 srlx %g5,44,%g3 148 xor %g2,%g1,%g1 149 srlx %o7,28-3,%g5 150 xor %g3,%o3,%o3 151 and %g5,120,%g5 152 sllx %g4,24,%g3 153 ldx [%l0+%g5],%g5 154 srlx %g4,40,%g2 155 xor %g3,%g1,%g1 156 srlx %o7,32-3,%g4 157 xor %g2,%o3,%o3 158 and %g4,120,%g4 159 sllx %g5,28,%g2 160 ldx [%l0+%g4],%g4 161 srlx %g5,36,%g3 162 xor %g2,%g1,%g1 163 srlx %o7,36-3,%g5 164 xor %g3,%o3,%o3 165 and %g5,120,%g5 166 sllx %g4,32,%g3 167 ldx [%l0+%g5],%g5 168 srlx %g4,32,%g2 169 xor %g3,%g1,%g1 170 srlx %o7,40-3,%g4 171 xor %g2,%o3,%o3 172 and %g4,120,%g4 173 sllx %g5,36,%g2 174 ldx [%l0+%g4],%g4 175 srlx %g5,28,%g3 176 xor %g2,%g1,%g1 177 srlx %o7,44-3,%g5 178 xor %g3,%o3,%o3 179 and %g5,120,%g5 180 sllx %g4,40,%g3 181 ldx [%l0+%g5],%g5 182 srlx %g4,24,%g2 183 xor %g3,%g1,%g1 184 srlx %o7,48-3,%g4 185 xor %g2,%o3,%o3 186 and %g4,120,%g4 187 sllx %g5,44,%g2 188 ldx [%l0+%g4],%g4 189 srlx %g5,20,%g3 190 xor %g2,%g1,%g1 191 srlx %o7,52-3,%g5 192 xor %g3,%o3,%o3 193 and %g5,120,%g5 194 sllx %g4,48,%g3 195 ldx [%l0+%g5],%g5 196 srlx %g4,16,%g2 197 xor %g3,%g1,%g1 198 srlx %o7,56-3,%g4 199 xor %g2,%o3,%o3 200 and %g4,120,%g4 201 sllx %g5,52,%g2 202 ldx [%l0+%g4],%g4 203 srlx %g5,12,%g3 204 xor %g2,%g1,%g1 205 srlx %o7,60-3,%g5 206 xor %g3,%o3,%o3 207 and %g5,120,%g5 208 sllx %g4,56,%g3 209 ldx [%l0+%g5],%g5 210 srlx %g4,8,%g2 211 xor %g3,%g1,%g1 212 213 sllx %g5,60,%g3 214 xor %g2,%o3,%o3 215 srlx %g5,4,%g2 216 xor %g3,%g1,%g1 217 xor %g2,%o3,%o3 218 219 srlx %g1,32,%i1 220 st %g1,[%i0+0] 221 st %i1,[%i0+4] 222 srlx %o3,32,%i2 223 st %o3,[%i0+8] 224 st %i2,[%i0+12] 225 226 ret 227 restore 228.type bn_GF2m_mul_2x2,#function 229.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 230.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro@openssl.org>" 231.align 4 232