1# 2# This Source Code Form is subject to the terms of the Mozilla Public 3# License, v. 2.0. If a copy of the MPL was not distributed with this 4# file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6.data 7.align 4 8 # 9 # -1 means to call _s_mpi_is_sse to determine if we support sse 10 # instructions. 11 # 0 means to use x86 instructions 12 # 1 means to use sse2 instructions 13.type is_sse,@object 14.size is_sse,4 15is_sse: .long -1 16 17# 18# sigh, handle the difference between -fPIC and not PIC 19# default to pic, since this file seems to be exclusively 20# linux right now (solaris uses mpi_i86pc.s and windows uses 21# mpi_x86_asm.c) 22# 23#.ifndef NO_PIC 24#.macro GET var,reg 25# movl \var@GOTOFF(%ebx),\reg 26#.endm 27#.macro PUT reg,var 28# movl \reg,\var@GOTOFF(%ebx) 29#.endm 30#.else 31.macro GET var,reg 32 movl \var,\reg 33.endm 34.macro PUT reg,var 35 movl \reg,\var 36.endm 37#.endif 38 39.text 40 41 42 # ebp - 36: caller's esi 43 # ebp - 32: caller's edi 44 # ebp - 28: 45 # ebp - 24: 46 # ebp - 20: 47 # ebp - 16: 48 # ebp - 12: 49 # ebp - 8: 50 # ebp - 4: 51 # ebp + 0: caller's ebp 52 # ebp + 4: return address 53 # ebp + 8: a argument 54 # ebp + 12: a_len argument 55 # ebp + 16: b argument 56 # ebp + 20: c argument 57 # registers: 58 # eax: 59 # ebx: carry 60 # ecx: a_len 61 # edx: 62 # esi: a ptr 63 # edi: c ptr 64.globl _s_mpv_mul_d 65.type _s_mpv_mul_d,@function 66_s_mpv_mul_d: 67 GET is_sse,%eax 68 cmp $0,%eax 69 je _s_mpv_mul_d_x86 70 jg _s_mpv_mul_d_sse2 71 call _s_mpi_is_sse2 72 PUT %eax,is_sse 73 cmp $0,%eax 74 jg _s_mpv_mul_d_sse2 75_s_mpv_mul_d_x86: 76 push %ebp 77 mov %esp,%ebp 78 sub $28,%esp 79 push %edi 80 push %esi 81 push %ebx 82 movl $0,%ebx # carry = 0 83 mov 12(%ebp),%ecx # ecx = a_len 84 mov 20(%ebp),%edi 85 cmp $0,%ecx 86 je 2f # jmp if a_len == 0 87 mov 8(%ebp),%esi # esi = a 88 cld 891: 90 lodsl # eax = [ds:esi]; esi += 4 91 mov 16(%ebp),%edx # edx = b 92 mull %edx # edx:eax = Phi:Plo = a_i * b 93 94 add %ebx,%eax # add carry (%ebx) to edx:eax 95 adc $0,%edx 96 mov %edx,%ebx # high half of product becomes next carry 97 98 stosl # [es:edi] = ax; edi += 4; 99 dec %ecx # --a_len 100 jnz 1b # jmp if a_len != 0 1012: 102 mov %ebx,0(%edi) # *c = carry 103 pop %ebx 104 pop %esi 105 pop %edi 106 leave 107 ret 108 nop 109_s_mpv_mul_d_sse2: 110 push %ebp 111 mov %esp,%ebp 112 push %edi 113 push %esi 114 psubq %mm2,%mm2 # carry = 0 115 mov 12(%ebp),%ecx # ecx = a_len 116 movd 16(%ebp),%mm1 # mm1 = b 117 mov 20(%ebp),%edi 118 cmp $0,%ecx 119 je 6f # jmp if a_len == 0 120 mov 8(%ebp),%esi # esi = a 121 cld 1225: 123 movd 0(%esi),%mm0 # mm0 = *a++ 124 add $4,%esi 125 pmuludq %mm1,%mm0 # mm0 = b * *a++ 126 paddq %mm0,%mm2 # add the carry 127 movd %mm2,0(%edi) # store the 32bit result 128 add $4,%edi 129 psrlq $32, %mm2 # save the carry 130 dec %ecx # --a_len 131 jnz 5b # jmp if a_len != 0 1326: 133 movd %mm2,0(%edi) # *c = carry 134 emms 135 pop %esi 136 pop %edi 137 leave 138 ret 139 nop 140 141 # ebp - 36: caller's esi 142 # ebp - 32: caller's edi 143 # ebp - 28: 144 # ebp - 24: 145 # ebp - 20: 146 # ebp - 16: 147 # ebp - 12: 148 # ebp - 8: 149 # ebp - 4: 150 # ebp + 0: caller's ebp 151 # ebp + 4: return address 152 # ebp + 8: a argument 153 # ebp + 12: a_len argument 154 # ebp + 16: b argument 155 # ebp + 20: c argument 156 # registers: 157 # eax: 158 # ebx: carry 159 # ecx: a_len 160 # edx: 161 # esi: a ptr 162 # edi: c ptr 163.globl _s_mpv_mul_d_add 164.type _s_mpv_mul_d_add,@function 165_s_mpv_mul_d_add: 166 GET is_sse,%eax 167 cmp $0,%eax 168 je _s_mpv_mul_d_add_x86 169 jg _s_mpv_mul_d_add_sse2 170 call _s_mpi_is_sse2 171 PUT %eax,is_sse 172 cmp $0,%eax 173 jg _s_mpv_mul_d_add_sse2 174_s_mpv_mul_d_add_x86: 175 push %ebp 176 mov %esp,%ebp 177 sub $28,%esp 178 push %edi 179 push %esi 180 push %ebx 181 movl $0,%ebx # carry = 0 182 mov 12(%ebp),%ecx # ecx = a_len 183 mov 20(%ebp),%edi 184 cmp $0,%ecx 185 je 11f # jmp if a_len == 0 186 mov 8(%ebp),%esi # esi = a 187 cld 18810: 189 lodsl # eax = [ds:esi]; esi += 4 190 mov 16(%ebp),%edx # edx = b 191 mull %edx # edx:eax = Phi:Plo = a_i * b 192 193 add %ebx,%eax # add carry (%ebx) to edx:eax 194 adc $0,%edx 195 mov 0(%edi),%ebx # add in current word from *c 196 add %ebx,%eax 197 adc $0,%edx 198 mov %edx,%ebx # high half of product becomes next carry 199 200 stosl # [es:edi] = ax; edi += 4; 201 dec %ecx # --a_len 202 jnz 10b # jmp if a_len != 0 20311: 204 mov %ebx,0(%edi) # *c = carry 205 pop %ebx 206 pop %esi 207 pop %edi 208 leave 209 ret 210 nop 211_s_mpv_mul_d_add_sse2: 212 push %ebp 213 mov %esp,%ebp 214 push %edi 215 push %esi 216 psubq %mm2,%mm2 # carry = 0 217 mov 12(%ebp),%ecx # ecx = a_len 218 movd 16(%ebp),%mm1 # mm1 = b 219 mov 20(%ebp),%edi 220 cmp $0,%ecx 221 je 16f # jmp if a_len == 0 222 mov 8(%ebp),%esi # esi = a 223 cld 22415: 225 movd 0(%esi),%mm0 # mm0 = *a++ 226 add $4,%esi 227 pmuludq %mm1,%mm0 # mm0 = b * *a++ 228 paddq %mm0,%mm2 # add the carry 229 movd 0(%edi),%mm0 230 paddq %mm0,%mm2 # add the carry 231 movd %mm2,0(%edi) # store the 32bit result 232 add $4,%edi 233 psrlq $32, %mm2 # save the carry 234 dec %ecx # --a_len 235 jnz 15b # jmp if a_len != 0 23616: 237 movd %mm2,0(%edi) # *c = carry 238 emms 239 pop %esi 240 pop %edi 241 leave 242 ret 243 nop 244 245 # ebp - 8: caller's esi 246 # ebp - 4: caller's edi 247 # ebp + 0: caller's ebp 248 # ebp + 4: return address 249 # ebp + 8: a argument 250 # ebp + 12: a_len argument 251 # ebp + 16: b argument 252 # ebp + 20: c argument 253 # registers: 254 # eax: 255 # ebx: carry 256 # ecx: a_len 257 # edx: 258 # esi: a ptr 259 # edi: c ptr 260.globl _s_mpv_mul_d_add_prop 261.type _s_mpv_mul_d_add_prop,@function 262_s_mpv_mul_d_add_prop: 263 GET is_sse,%eax 264 cmp $0,%eax 265 je _s_mpv_mul_d_add_prop_x86 266 jg _s_mpv_mul_d_add_prop_sse2 267 call _s_mpi_is_sse2 268 PUT %eax,is_sse 269 cmp $0,%eax 270 jg _s_mpv_mul_d_add_prop_sse2 271_s_mpv_mul_d_add_prop_x86: 272 push %ebp 273 mov %esp,%ebp 274 sub $28,%esp 275 push %edi 276 push %esi 277 push %ebx 278 movl $0,%ebx # carry = 0 279 mov 12(%ebp),%ecx # ecx = a_len 280 mov 20(%ebp),%edi 281 cmp $0,%ecx 282 je 21f # jmp if a_len == 0 283 cld 284 mov 8(%ebp),%esi # esi = a 28520: 286 lodsl # eax = [ds:esi]; esi += 4 287 mov 16(%ebp),%edx # edx = b 288 mull %edx # edx:eax = Phi:Plo = a_i * b 289 290 add %ebx,%eax # add carry (%ebx) to edx:eax 291 adc $0,%edx 292 mov 0(%edi),%ebx # add in current word from *c 293 add %ebx,%eax 294 adc $0,%edx 295 mov %edx,%ebx # high half of product becomes next carry 296 297 stosl # [es:edi] = ax; edi += 4; 298 dec %ecx # --a_len 299 jnz 20b # jmp if a_len != 0 30021: 301 cmp $0,%ebx # is carry zero? 302 jz 23f 303 mov 0(%edi),%eax # add in current word from *c 304 add %ebx,%eax 305 stosl # [es:edi] = ax; edi += 4; 306 jnc 23f 30722: 308 mov 0(%edi),%eax # add in current word from *c 309 adc $0,%eax 310 stosl # [es:edi] = ax; edi += 4; 311 jc 22b 31223: 313 pop %ebx 314 pop %esi 315 pop %edi 316 leave 317 ret 318 nop 319_s_mpv_mul_d_add_prop_sse2: 320 push %ebp 321 mov %esp,%ebp 322 push %edi 323 push %esi 324 push %ebx 325 psubq %mm2,%mm2 # carry = 0 326 mov 12(%ebp),%ecx # ecx = a_len 327 movd 16(%ebp),%mm1 # mm1 = b 328 mov 20(%ebp),%edi 329 cmp $0,%ecx 330 je 26f # jmp if a_len == 0 331 mov 8(%ebp),%esi # esi = a 332 cld 33325: 334 movd 0(%esi),%mm0 # mm0 = *a++ 335 movd 0(%edi),%mm3 # fetch the sum 336 add $4,%esi 337 pmuludq %mm1,%mm0 # mm0 = b * *a++ 338 paddq %mm0,%mm2 # add the carry 339 paddq %mm3,%mm2 # add *c++ 340 movd %mm2,0(%edi) # store the 32bit result 341 add $4,%edi 342 psrlq $32, %mm2 # save the carry 343 dec %ecx # --a_len 344 jnz 25b # jmp if a_len != 0 34526: 346 movd %mm2,%ebx 347 cmp $0,%ebx # is carry zero? 348 jz 28f 349 mov 0(%edi),%eax 350 add %ebx, %eax 351 stosl 352 jnc 28f 35327: 354 mov 0(%edi),%eax # add in current word from *c 355 adc $0,%eax 356 stosl # [es:edi] = ax; edi += 4; 357 jc 27b 35828: 359 emms 360 pop %ebx 361 pop %esi 362 pop %edi 363 leave 364 ret 365 nop 366 367 368 # ebp - 20: caller's esi 369 # ebp - 16: caller's edi 370 # ebp - 12: 371 # ebp - 8: carry 372 # ebp - 4: a_len local 373 # ebp + 0: caller's ebp 374 # ebp + 4: return address 375 # ebp + 8: pa argument 376 # ebp + 12: a_len argument 377 # ebp + 16: ps argument 378 # ebp + 20: 379 # registers: 380 # eax: 381 # ebx: carry 382 # ecx: a_len 383 # edx: 384 # esi: a ptr 385 # edi: c ptr 386 387.globl _s_mpv_sqr_add_prop 388.type _s_mpv_sqr_add_prop,@function 389_s_mpv_sqr_add_prop: 390 GET is_sse,%eax 391 cmp $0,%eax 392 je _s_mpv_sqr_add_prop_x86 393 jg _s_mpv_sqr_add_prop_sse2 394 call _s_mpi_is_sse2 395 PUT %eax,is_sse 396 cmp $0,%eax 397 jg _s_mpv_sqr_add_prop_sse2 398_s_mpv_sqr_add_prop_x86: 399 push %ebp 400 mov %esp,%ebp 401 sub $12,%esp 402 push %edi 403 push %esi 404 push %ebx 405 movl $0,%ebx # carry = 0 406 mov 12(%ebp),%ecx # a_len 407 mov 16(%ebp),%edi # edi = ps 408 cmp $0,%ecx 409 je 31f # jump if a_len == 0 410 cld 411 mov 8(%ebp),%esi # esi = pa 41230: 413 lodsl # %eax = [ds:si]; si += 4; 414 mull %eax 415 416 add %ebx,%eax # add "carry" 417 adc $0,%edx 418 mov 0(%edi),%ebx 419 add %ebx,%eax # add low word from result 420 mov 4(%edi),%ebx 421 stosl # [es:di] = %eax; di += 4; 422 adc %ebx,%edx # add high word from result 423 movl $0,%ebx 424 mov %edx,%eax 425 adc $0,%ebx 426 stosl # [es:di] = %eax; di += 4; 427 dec %ecx # --a_len 428 jnz 30b # jmp if a_len != 0 42931: 430 cmp $0,%ebx # is carry zero? 431 jz 34f 432 mov 0(%edi),%eax # add in current word from *c 433 add %ebx,%eax 434 stosl # [es:edi] = ax; edi += 4; 435 jnc 34f 43632: 437 mov 0(%edi),%eax # add in current word from *c 438 adc $0,%eax 439 stosl # [es:edi] = ax; edi += 4; 440 jc 32b 44134: 442 pop %ebx 443 pop %esi 444 pop %edi 445 leave 446 ret 447 nop 448_s_mpv_sqr_add_prop_sse2: 449 push %ebp 450 mov %esp,%ebp 451 push %edi 452 push %esi 453 push %ebx 454 psubq %mm2,%mm2 # carry = 0 455 mov 12(%ebp),%ecx # ecx = a_len 456 mov 16(%ebp),%edi 457 cmp $0,%ecx 458 je 36f # jmp if a_len == 0 459 mov 8(%ebp),%esi # esi = a 460 cld 46135: 462 movd 0(%esi),%mm0 # mm0 = *a 463 movd 0(%edi),%mm3 # fetch the sum 464 add $4,%esi 465 pmuludq %mm0,%mm0 # mm0 = sqr(a) 466 paddq %mm0,%mm2 # add the carry 467 paddq %mm3,%mm2 # add the low word 468 movd 4(%edi),%mm3 469 movd %mm2,0(%edi) # store the 32bit result 470 psrlq $32, %mm2 471 paddq %mm3,%mm2 # add the high word 472 movd %mm2,4(%edi) # store the 32bit result 473 psrlq $32, %mm2 # save the carry. 474 add $8,%edi 475 dec %ecx # --a_len 476 jnz 35b # jmp if a_len != 0 47736: 478 movd %mm2,%ebx 479 cmp $0,%ebx # is carry zero? 480 jz 38f 481 mov 0(%edi),%eax 482 add %ebx, %eax 483 stosl 484 jnc 38f 48537: 486 mov 0(%edi),%eax # add in current word from *c 487 adc $0,%eax 488 stosl # [es:edi] = ax; edi += 4; 489 jc 37b 49038: 491 emms 492 pop %ebx 493 pop %esi 494 pop %edi 495 leave 496 ret 497 nop 498 499 # 500 # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized 501 # so its high bit is 1. This code is from NSPR. 502 # 503 # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, 504 # mp_digit *qp, mp_digit *rp) 505 506 # esp + 0: Caller's ebx 507 # esp + 4: return address 508 # esp + 8: Nhi argument 509 # esp + 12: Nlo argument 510 # esp + 16: divisor argument 511 # esp + 20: qp argument 512 # esp + 24: rp argument 513 # registers: 514 # eax: 515 # ebx: carry 516 # ecx: a_len 517 # edx: 518 # esi: a ptr 519 # edi: c ptr 520 # 521 522.globl _s_mpv_div_2dx1d 523.type _s_mpv_div_2dx1d,@function 524_s_mpv_div_2dx1d: 525 push %ebx 526 mov 8(%esp),%edx 527 mov 12(%esp),%eax 528 mov 16(%esp),%ebx 529 div %ebx 530 mov 20(%esp),%ebx 531 mov %eax,0(%ebx) 532 mov 24(%esp),%ebx 533 mov %edx,0(%ebx) 534 xor %eax,%eax # return zero 535 pop %ebx 536 ret 537 nop 538 539