1#ifdef __linux__ 2#include <asm/regdef.h> 3#else 4#include <asm.h> 5#include <regdef.h> 6#endif 7 8.text 9 10.set noat 11.set noreorder 12 13.globl bn_mul_mont 14.align 5 15.ent bn_mul_mont 16bn_mul_mont: 17 lda sp,-48(sp) 18 stq ra,0(sp) 19 stq s3,8(sp) 20 stq s4,16(sp) 21 stq s5,24(sp) 22 stq fp,32(sp) 23 mov sp,fp 24 .mask 0x0400f000,-48 25 .frame fp,48,ra 26 .prologue 0 27 28 .align 4 29 .set reorder 30 sextl a5,a5 31 mov 0,v0 32 cmplt a5,4,AT 33 bne AT,.Lexit 34 35 ldq t1,0(a1) # ap[0] 36 s8addq a5,16,AT 37 ldq t4,8(a1) 38 subq sp,AT,sp 39 ldq t5,0(a2) # bp[0] 40 lda AT,-4096(zero) # mov -4096,AT 41 ldq a4,0(a4) 42 and sp,AT,sp 43 44 mulq t1,t5,t0 45 ldq t3,0(a3) # np[0] 46 umulh t1,t5,t1 47 ldq t6,8(a3) 48 49 mulq t0,a4,s5 50 51 mulq t3,s5,t2 52 umulh t3,s5,t3 53 54 addq t2,t0,t2 55 cmpult t2,t0,AT 56 addq t3,AT,t3 57 58 mulq t4,t5,t8 59 mov 2,s4 60 umulh t4,t5,t9 61 mov sp,t7 62 63 mulq t6,s5,t10 64 s8addq s4,a1,t4 65 umulh t6,s5,t11 66 s8addq s4,a3,t6 67.align 4 68.L1st: 69 .set noreorder 70 ldq t4,0(t4) 71 addl s4,1,s4 72 ldq t6,0(t6) 73 lda t7,8(t7) 74 75 addq t8,t1,t0 76 mulq t4,t5,t8 77 cmpult t0,t1,AT 78 addq t10,t3,t2 79 80 mulq t6,s5,t10 81 addq t9,AT,t1 82 cmpult t2,t3,v0 83 cmplt s4,a5,t12 84 85 umulh t4,t5,t9 86 addq t11,v0,t3 87 addq t2,t0,t2 88 s8addq s4,a1,t4 89 90 umulh t6,s5,t11 91 cmpult t2,t0,v0 92 addq t3,v0,t3 93 s8addq s4,a3,t6 94 95 stq t2,-8(t7) 96 nop 97 unop 98 bne t12,.L1st 99 .set reorder 100 101 addq t8,t1,t0 102 addq t10,t3,t2 103 cmpult t0,t1,AT 104 cmpult t2,t3,v0 105 addq t9,AT,t1 106 addq t11,v0,t3 107 108 addq t2,t0,t2 109 cmpult t2,t0,v0 110 addq t3,v0,t3 111 112 stq t2,0(t7) 113 114 addq t3,t1,t3 115 cmpult t3,t1,AT 116 stq t3,8(t7) 117 stq AT,16(t7) 118 119 mov 1,s3 120.align 4 121.Louter: 122 s8addq s3,a2,t5 123 ldq t1,0(a1) 124 ldq t4,8(a1) 125 ldq t5,0(t5) 126 ldq t3,0(a3) 127 ldq t6,8(a3) 128 ldq t12,0(sp) 129 130 mulq t1,t5,t0 131 umulh t1,t5,t1 132 133 addq t0,t12,t0 134 cmpult t0,t12,AT 135 addq t1,AT,t1 136 137 mulq t0,a4,s5 138 139 mulq t3,s5,t2 140 umulh t3,s5,t3 141 142 addq t2,t0,t2 143 cmpult t2,t0,AT 144 mov 2,s4 145 addq t3,AT,t3 146 147 mulq t4,t5,t8 148 mov sp,t7 149 umulh t4,t5,t9 150 151 mulq t6,s5,t10 152 s8addq s4,a1,t4 153 umulh t6,s5,t11 154.align 4 155.Linner: 156 .set noreorder 157 ldq t12,8(t7) #L0 158 nop #U1 159 ldq t4,0(t4) #L1 160 s8addq s4,a3,t6 #U0 161 162 ldq t6,0(t6) #L0 163 nop #U1 164 addq t8,t1,t0 #L1 165 lda t7,8(t7) 166 167 mulq t4,t5,t8 #U1 168 cmpult t0,t1,AT #L0 169 addq t10,t3,t2 #L1 170 addl s4,1,s4 171 172 mulq t6,s5,t10 #U1 173 addq t9,AT,t1 #L0 174 addq t0,t12,t0 #L1 175 cmpult t2,t3,v0 #U0 176 177 umulh t4,t5,t9 #U1 178 cmpult t0,t12,AT #L0 179 addq t2,t0,t2 #L1 180 addq t11,v0,t3 #U0 181 182 umulh t6,s5,t11 #U1 183 s8addq s4,a1,t4 #L0 184 cmpult t2,t0,v0 #L1 185 cmplt s4,a5,t12 #U0 # borrow t12 186 187 addq t1,AT,t1 #L0 188 addq t3,v0,t3 #U1 189 stq t2,-8(t7) #L1 190 bne t12,.Linner #U0 191 .set reorder 192 193 ldq t12,8(t7) 194 addq t8,t1,t0 195 addq t10,t3,t2 196 cmpult t0,t1,AT 197 cmpult t2,t3,v0 198 addq t9,AT,t1 199 addq t11,v0,t3 200 201 addq t0,t12,t0 202 cmpult t0,t12,AT 203 addq t1,AT,t1 204 205 ldq t12,16(t7) 206 addq t2,t0,s4 207 cmpult s4,t0,v0 208 addq t3,v0,t3 209 210 addq t3,t1,t2 211 stq s4,0(t7) 212 cmpult t2,t1,t3 213 addq t2,t12,t2 214 cmpult t2,t12,AT 215 addl s3,1,s3 216 addq t3,AT,t3 217 stq t2,8(t7) 218 cmplt s3,a5,t12 # borrow t12 219 stq t3,16(t7) 220 bne t12,.Louter 221 222 s8addq a5,sp,t12 # &tp[num] 223 mov a0,a2 # put rp aside 224 mov sp,t7 225 mov sp,a1 226 mov 0,t1 # clear borrow bit 227 228.align 4 229.Lsub: ldq t0,0(t7) 230 ldq t2,0(a3) 231 lda t7,8(t7) 232 lda a3,8(a3) 233 subq t0,t2,t2 # tp[i]-np[i] 234 cmpult t0,t2,AT 235 subq t2,t1,t0 236 cmpult t2,t0,t1 237 or t1,AT,t1 238 stq t0,0(a0) 239 cmpult t7,t12,v0 240 lda a0,8(a0) 241 bne v0,.Lsub 242 243 subq t3,t1,t1 # handle upmost overflow bit 244 mov sp,t7 245 mov a2,a0 # restore rp 246 247.align 4 248.Lcopy: ldq t4,0(t7) # conditional copy 249 ldq t6,0(a0) 250 lda t7,8(t7) 251 lda a0,8(a0) 252 cmoveq t1,t6,t4 253 stq zero,-8(t7) # zap tp 254 cmpult t7,t12,AT 255 stq t4,-8(a0) 256 bne AT,.Lcopy 257 mov 1,v0 258 259.Lexit: 260 .set noreorder 261 mov fp,sp 262 /*ldq ra,0(sp)*/ 263 ldq s3,8(sp) 264 ldq s4,16(sp) 265 ldq s5,24(sp) 266 ldq fp,32(sp) 267 lda sp,48(sp) 268 ret (ra) 269.end bn_mul_mont 270.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro@openssl.org>" 271.align 2 272