1// This file is generated from a similarly-named Perl script in the BoringSSL 2// source tree. Do not edit by hand. 3 4#if !defined(__has_feature) 5#define __has_feature(x) 0 6#endif 7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 8#define OPENSSL_NO_ASM 9#endif 10 11#if !defined(OPENSSL_NO_ASM) 12#include <GFp/arm_arch.h> 13 14.text 15.align 5 16Lpoly: 17.quad 0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001 18Lone_mont: 19.quad 0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe 20Lone: 21.quad 1,0,0,0 22.byte 69,67,80,95,78,73,83,84,90,50,53,54,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 23.align 2 24 25// void GFp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], 26// const BN_ULONG x2[4]); 27.globl _GFp_nistz256_mul_mont 28.private_extern _GFp_nistz256_mul_mont 29 30.align 4 31_GFp_nistz256_mul_mont: 32 stp x29,x30,[sp,#-32]! 33 add x29,sp,#0 34 stp x19,x20,[sp,#16] 35 36 ldr x3,[x2] // bp[0] 37 ldp x4,x5,[x1] 38 ldp x6,x7,[x1,#16] 39 ldr x12,Lpoly+8 40 ldr x13,Lpoly+24 41 42 bl __ecp_nistz256_mul_mont 43 44 ldp x19,x20,[sp,#16] 45 ldp x29,x30,[sp],#32 46 ret 47 48 49// void GFp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); 50.globl _GFp_nistz256_sqr_mont 51.private_extern _GFp_nistz256_sqr_mont 52 53.align 4 54_GFp_nistz256_sqr_mont: 55 stp x29,x30,[sp,#-32]! 56 add x29,sp,#0 57 stp x19,x20,[sp,#16] 58 59 ldp x4,x5,[x1] 60 ldp x6,x7,[x1,#16] 61 ldr x12,Lpoly+8 62 ldr x13,Lpoly+24 63 64 bl __ecp_nistz256_sqr_mont 65 66 ldp x19,x20,[sp,#16] 67 ldp x29,x30,[sp],#32 68 ret 69 70 71// void GFp_nistz256_add(BN_ULONG x0[4],const BN_ULONG x1[4], 72// const BN_ULONG x2[4]); 73.globl _GFp_nistz256_add 74.private_extern _GFp_nistz256_add 75 76.align 4 77_GFp_nistz256_add: 78 stp x29,x30,[sp,#-16]! 79 add x29,sp,#0 80 81 ldp x14,x15,[x1] 82 ldp x8,x9,[x2] 83 ldp x16,x17,[x1,#16] 84 ldp x10,x11,[x2,#16] 85 ldr x12,Lpoly+8 86 ldr x13,Lpoly+24 87 88 bl __ecp_nistz256_add 89 90 ldp x29,x30,[sp],#16 91 ret 92 93 94// void GFp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); 95.globl _GFp_nistz256_neg 96.private_extern _GFp_nistz256_neg 97 98.align 4 99_GFp_nistz256_neg: 100 stp x29,x30,[sp,#-16]! 101 add x29,sp,#0 102 103 mov x2,x1 104 mov x14,xzr // a = 0 105 mov x15,xzr 106 mov x16,xzr 107 mov x17,xzr 108 ldr x12,Lpoly+8 109 ldr x13,Lpoly+24 110 111 bl __ecp_nistz256_sub_from 112 113 ldp x29,x30,[sp],#16 114 ret 115 116 117// note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded 118// to x4-x7 and b[0] - to x3 119 120.align 4 121__ecp_nistz256_mul_mont: 122 mul x14,x4,x3 // a[0]*b[0] 123 umulh x8,x4,x3 124 125 mul x15,x5,x3 // a[1]*b[0] 126 umulh x9,x5,x3 127 128 mul x16,x6,x3 // a[2]*b[0] 129 umulh x10,x6,x3 130 131 mul x17,x7,x3 // a[3]*b[0] 132 umulh x11,x7,x3 133 ldr x3,[x2,#8] // b[1] 134 135 adds x15,x15,x8 // accumulate high parts of multiplication 136 lsl x8,x14,#32 137 adcs x16,x16,x9 138 lsr x9,x14,#32 139 adcs x17,x17,x10 140 adc x19,xzr,x11 141 mov x20,xzr 142 subs x10,x14,x8 // "*0xffff0001" 143 sbc x11,x14,x9 144 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 145 mul x8,x4,x3 // lo(a[0]*b[i]) 146 adcs x15,x16,x9 147 mul x9,x5,x3 // lo(a[1]*b[i]) 148 adcs x16,x17,x10 // +=acc[0]*0xffff0001 149 mul x10,x6,x3 // lo(a[2]*b[i]) 150 adcs x17,x19,x11 151 mul x11,x7,x3 // lo(a[3]*b[i]) 152 adc x19,x20,xzr 153 154 adds x14,x14,x8 // accumulate low parts of multiplication 155 umulh x8,x4,x3 // hi(a[0]*b[i]) 156 adcs x15,x15,x9 157 umulh x9,x5,x3 // hi(a[1]*b[i]) 158 adcs x16,x16,x10 159 umulh x10,x6,x3 // hi(a[2]*b[i]) 160 adcs x17,x17,x11 161 umulh x11,x7,x3 // hi(a[3]*b[i]) 162 adc x19,x19,xzr 163 ldr x3,[x2,#8*(1+1)] // b[1+1] 164 adds x15,x15,x8 // accumulate high parts of multiplication 165 lsl x8,x14,#32 166 adcs x16,x16,x9 167 lsr x9,x14,#32 168 adcs x17,x17,x10 169 adcs x19,x19,x11 170 adc x20,xzr,xzr 171 subs x10,x14,x8 // "*0xffff0001" 172 sbc x11,x14,x9 173 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 174 mul x8,x4,x3 // lo(a[0]*b[i]) 175 adcs x15,x16,x9 176 mul x9,x5,x3 // lo(a[1]*b[i]) 177 adcs x16,x17,x10 // +=acc[0]*0xffff0001 178 mul x10,x6,x3 // lo(a[2]*b[i]) 179 adcs x17,x19,x11 180 mul x11,x7,x3 // lo(a[3]*b[i]) 181 adc x19,x20,xzr 182 183 adds x14,x14,x8 // accumulate low parts of multiplication 184 umulh x8,x4,x3 // hi(a[0]*b[i]) 185 adcs x15,x15,x9 186 umulh x9,x5,x3 // hi(a[1]*b[i]) 187 adcs x16,x16,x10 188 umulh x10,x6,x3 // hi(a[2]*b[i]) 189 adcs x17,x17,x11 190 umulh x11,x7,x3 // hi(a[3]*b[i]) 191 adc x19,x19,xzr 192 ldr x3,[x2,#8*(2+1)] // b[2+1] 193 adds x15,x15,x8 // accumulate high parts of multiplication 194 lsl x8,x14,#32 195 adcs x16,x16,x9 196 lsr x9,x14,#32 197 adcs x17,x17,x10 198 adcs x19,x19,x11 199 adc x20,xzr,xzr 200 subs x10,x14,x8 // "*0xffff0001" 201 sbc x11,x14,x9 202 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 203 mul x8,x4,x3 // lo(a[0]*b[i]) 204 adcs x15,x16,x9 205 mul x9,x5,x3 // lo(a[1]*b[i]) 206 adcs x16,x17,x10 // +=acc[0]*0xffff0001 207 mul x10,x6,x3 // lo(a[2]*b[i]) 208 adcs x17,x19,x11 209 mul x11,x7,x3 // lo(a[3]*b[i]) 210 adc x19,x20,xzr 211 212 adds x14,x14,x8 // accumulate low parts of multiplication 213 umulh x8,x4,x3 // hi(a[0]*b[i]) 214 adcs x15,x15,x9 215 umulh x9,x5,x3 // hi(a[1]*b[i]) 216 adcs x16,x16,x10 217 umulh x10,x6,x3 // hi(a[2]*b[i]) 218 adcs x17,x17,x11 219 umulh x11,x7,x3 // hi(a[3]*b[i]) 220 adc x19,x19,xzr 221 adds x15,x15,x8 // accumulate high parts of multiplication 222 lsl x8,x14,#32 223 adcs x16,x16,x9 224 lsr x9,x14,#32 225 adcs x17,x17,x10 226 adcs x19,x19,x11 227 adc x20,xzr,xzr 228 // last reduction 229 subs x10,x14,x8 // "*0xffff0001" 230 sbc x11,x14,x9 231 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 232 adcs x15,x16,x9 233 adcs x16,x17,x10 // +=acc[0]*0xffff0001 234 adcs x17,x19,x11 235 adc x19,x20,xzr 236 237 adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus 238 sbcs x9,x15,x12 239 sbcs x10,x16,xzr 240 sbcs x11,x17,x13 241 sbcs xzr,x19,xzr // did it borrow? 242 243 csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus 244 csel x15,x15,x9,lo 245 csel x16,x16,x10,lo 246 stp x14,x15,[x0] 247 csel x17,x17,x11,lo 248 stp x16,x17,[x0,#16] 249 250 ret 251 252 253// note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded 254// to x4-x7 255 256.align 4 257__ecp_nistz256_sqr_mont: 258 // | | | | | |a1*a0| | 259 // | | | | |a2*a0| | | 260 // | |a3*a2|a3*a0| | | | 261 // | | | |a2*a1| | | | 262 // | | |a3*a1| | | | | 263 // *| | | | | | | | 2| 264 // +|a3*a3|a2*a2|a1*a1|a0*a0| 265 // |--+--+--+--+--+--+--+--| 266 // |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is , i.e. follow 267 // 268 // "can't overflow" below mark carrying into high part of 269 // multiplication result, which can't overflow, because it 270 // can never be all ones. 271 272 mul x15,x5,x4 // a[1]*a[0] 273 umulh x9,x5,x4 274 mul x16,x6,x4 // a[2]*a[0] 275 umulh x10,x6,x4 276 mul x17,x7,x4 // a[3]*a[0] 277 umulh x19,x7,x4 278 279 adds x16,x16,x9 // accumulate high parts of multiplication 280 mul x8,x6,x5 // a[2]*a[1] 281 umulh x9,x6,x5 282 adcs x17,x17,x10 283 mul x10,x7,x5 // a[3]*a[1] 284 umulh x11,x7,x5 285 adc x19,x19,xzr // can't overflow 286 287 mul x20,x7,x6 // a[3]*a[2] 288 umulh x1,x7,x6 289 290 adds x9,x9,x10 // accumulate high parts of multiplication 291 mul x14,x4,x4 // a[0]*a[0] 292 adc x10,x11,xzr // can't overflow 293 294 adds x17,x17,x8 // accumulate low parts of multiplication 295 umulh x4,x4,x4 296 adcs x19,x19,x9 297 mul x9,x5,x5 // a[1]*a[1] 298 adcs x20,x20,x10 299 umulh x5,x5,x5 300 adc x1,x1,xzr // can't overflow 301 302 adds x15,x15,x15 // acc[1-6]*=2 303 mul x10,x6,x6 // a[2]*a[2] 304 adcs x16,x16,x16 305 umulh x6,x6,x6 306 adcs x17,x17,x17 307 mul x11,x7,x7 // a[3]*a[3] 308 adcs x19,x19,x19 309 umulh x7,x7,x7 310 adcs x20,x20,x20 311 adcs x1,x1,x1 312 adc x2,xzr,xzr 313 314 adds x15,x15,x4 // +a[i]*a[i] 315 adcs x16,x16,x9 316 adcs x17,x17,x5 317 adcs x19,x19,x10 318 adcs x20,x20,x6 319 lsl x8,x14,#32 320 adcs x1,x1,x11 321 lsr x9,x14,#32 322 adc x2,x2,x7 323 subs x10,x14,x8 // "*0xffff0001" 324 sbc x11,x14,x9 325 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 326 adcs x15,x16,x9 327 lsl x8,x14,#32 328 adcs x16,x17,x10 // +=acc[0]*0xffff0001 329 lsr x9,x14,#32 330 adc x17,x11,xzr // can't overflow 331 subs x10,x14,x8 // "*0xffff0001" 332 sbc x11,x14,x9 333 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 334 adcs x15,x16,x9 335 lsl x8,x14,#32 336 adcs x16,x17,x10 // +=acc[0]*0xffff0001 337 lsr x9,x14,#32 338 adc x17,x11,xzr // can't overflow 339 subs x10,x14,x8 // "*0xffff0001" 340 sbc x11,x14,x9 341 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 342 adcs x15,x16,x9 343 lsl x8,x14,#32 344 adcs x16,x17,x10 // +=acc[0]*0xffff0001 345 lsr x9,x14,#32 346 adc x17,x11,xzr // can't overflow 347 subs x10,x14,x8 // "*0xffff0001" 348 sbc x11,x14,x9 349 adds x14,x15,x8 // +=acc[0]<<96 and omit acc[0] 350 adcs x15,x16,x9 351 adcs x16,x17,x10 // +=acc[0]*0xffff0001 352 adc x17,x11,xzr // can't overflow 353 354 adds x14,x14,x19 // accumulate upper half 355 adcs x15,x15,x20 356 adcs x16,x16,x1 357 adcs x17,x17,x2 358 adc x19,xzr,xzr 359 360 adds x8,x14,#1 // subs x8,x14,#-1 // tmp = ret-modulus 361 sbcs x9,x15,x12 362 sbcs x10,x16,xzr 363 sbcs x11,x17,x13 364 sbcs xzr,x19,xzr // did it borrow? 365 366 csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus 367 csel x15,x15,x9,lo 368 csel x16,x16,x10,lo 369 stp x14,x15,[x0] 370 csel x17,x17,x11,lo 371 stp x16,x17,[x0,#16] 372 373 ret 374 375 376// Note that __ecp_nistz256_add expects both input vectors pre-loaded to 377// x4-x7 and x8-x11. This is done because it's used in multiple 378// contexts, e.g. in multiplication by 2 and 3... 379 380.align 4 381__ecp_nistz256_add: 382 adds x14,x14,x8 // ret = a+b 383 adcs x15,x15,x9 384 adcs x16,x16,x10 385 adcs x17,x17,x11 386 adc x1,xzr,xzr // zap x1 387 388 adds x8,x14,#1 // subs x8,x4,#-1 // tmp = ret-modulus 389 sbcs x9,x15,x12 390 sbcs x10,x16,xzr 391 sbcs x11,x17,x13 392 sbcs xzr,x1,xzr // did subtraction borrow? 393 394 csel x14,x14,x8,lo // ret = borrow ? ret : ret-modulus 395 csel x15,x15,x9,lo 396 csel x16,x16,x10,lo 397 stp x14,x15,[x0] 398 csel x17,x17,x11,lo 399 stp x16,x17,[x0,#16] 400 401 ret 402 403 404 405.align 4 406__ecp_nistz256_sub_from: 407 ldp x8,x9,[x2] 408 ldp x10,x11,[x2,#16] 409 subs x14,x14,x8 // ret = a-b 410 sbcs x15,x15,x9 411 sbcs x16,x16,x10 412 sbcs x17,x17,x11 413 sbc x1,xzr,xzr // zap x1 414 415 subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus 416 adcs x9,x15,x12 417 adcs x10,x16,xzr 418 adc x11,x17,x13 419 cmp x1,xzr // did subtraction borrow? 420 421 csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret 422 csel x15,x15,x9,eq 423 csel x16,x16,x10,eq 424 stp x14,x15,[x0] 425 csel x17,x17,x11,eq 426 stp x16,x17,[x0,#16] 427 428 ret 429 430 431 432.align 4 433__ecp_nistz256_sub_morf: 434 ldp x8,x9,[x2] 435 ldp x10,x11,[x2,#16] 436 subs x14,x8,x14 // ret = b-a 437 sbcs x15,x9,x15 438 sbcs x16,x10,x16 439 sbcs x17,x11,x17 440 sbc x1,xzr,xzr // zap x1 441 442 subs x8,x14,#1 // adds x8,x4,#-1 // tmp = ret+modulus 443 adcs x9,x15,x12 444 adcs x10,x16,xzr 445 adc x11,x17,x13 446 cmp x1,xzr // did subtraction borrow? 447 448 csel x14,x14,x8,eq // ret = borrow ? ret+modulus : ret 449 csel x15,x15,x9,eq 450 csel x16,x16,x10,eq 451 stp x14,x15,[x0] 452 csel x17,x17,x11,eq 453 stp x16,x17,[x0,#16] 454 455 ret 456 457 458 459.align 4 460__ecp_nistz256_div_by_2: 461 subs x8,x14,#1 // adds x8,x4,#-1 // tmp = a+modulus 462 adcs x9,x15,x12 463 adcs x10,x16,xzr 464 adcs x11,x17,x13 465 adc x1,xzr,xzr // zap x1 466 tst x14,#1 // is a even? 467 468 csel x14,x14,x8,eq // ret = even ? a : a+modulus 469 csel x15,x15,x9,eq 470 csel x16,x16,x10,eq 471 csel x17,x17,x11,eq 472 csel x1,xzr,x1,eq 473 474 lsr x14,x14,#1 // ret >>= 1 475 orr x14,x14,x15,lsl#63 476 lsr x15,x15,#1 477 orr x15,x15,x16,lsl#63 478 lsr x16,x16,#1 479 orr x16,x16,x17,lsl#63 480 lsr x17,x17,#1 481 stp x14,x15,[x0] 482 orr x17,x17,x1,lsl#63 483 stp x16,x17,[x0,#16] 484 485 ret 486 487.globl _GFp_nistz256_point_double 488.private_extern _GFp_nistz256_point_double 489 490.align 5 491_GFp_nistz256_point_double: 492 stp x29,x30,[sp,#-80]! 493 add x29,sp,#0 494 stp x19,x20,[sp,#16] 495 stp x21,x22,[sp,#32] 496 sub sp,sp,#32*4 497 498Ldouble_shortcut: 499 ldp x14,x15,[x1,#32] 500 mov x21,x0 501 ldp x16,x17,[x1,#48] 502 mov x22,x1 503 ldr x12,Lpoly+8 504 mov x8,x14 505 ldr x13,Lpoly+24 506 mov x9,x15 507 ldp x4,x5,[x22,#64] // forward load for p256_sqr_mont 508 mov x10,x16 509 mov x11,x17 510 ldp x6,x7,[x22,#64+16] 511 add x0,sp,#0 512 bl __ecp_nistz256_add // p256_mul_by_2(S, in_y); 513 514 add x0,sp,#64 515 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Zsqr, in_z); 516 517 ldp x8,x9,[x22] 518 ldp x10,x11,[x22,#16] 519 mov x4,x14 // put Zsqr aside for p256_sub 520 mov x5,x15 521 mov x6,x16 522 mov x7,x17 523 add x0,sp,#32 524 bl __ecp_nistz256_add // p256_add(M, Zsqr, in_x); 525 526 add x2,x22,#0 527 mov x14,x4 // restore Zsqr 528 mov x15,x5 529 ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont 530 mov x16,x6 531 mov x17,x7 532 ldp x6,x7,[sp,#0+16] 533 add x0,sp,#64 534 bl __ecp_nistz256_sub_morf // p256_sub(Zsqr, in_x, Zsqr); 535 536 add x0,sp,#0 537 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(S, S); 538 539 ldr x3,[x22,#32] 540 ldp x4,x5,[x22,#64] 541 ldp x6,x7,[x22,#64+16] 542 add x2,x22,#32 543 add x0,sp,#96 544 bl __ecp_nistz256_mul_mont // p256_mul_mont(tmp0, in_z, in_y); 545 546 mov x8,x14 547 mov x9,x15 548 ldp x4,x5,[sp,#0] // forward load for p256_sqr_mont 549 mov x10,x16 550 mov x11,x17 551 ldp x6,x7,[sp,#0+16] 552 add x0,x21,#64 553 bl __ecp_nistz256_add // p256_mul_by_2(res_z, tmp0); 554 555 add x0,sp,#96 556 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(tmp0, S); 557 558 ldr x3,[sp,#64] // forward load for p256_mul_mont 559 ldp x4,x5,[sp,#32] 560 ldp x6,x7,[sp,#32+16] 561 add x0,x21,#32 562 bl __ecp_nistz256_div_by_2 // p256_div_by_2(res_y, tmp0); 563 564 add x2,sp,#64 565 add x0,sp,#32 566 bl __ecp_nistz256_mul_mont // p256_mul_mont(M, M, Zsqr); 567 568 mov x8,x14 // duplicate M 569 mov x9,x15 570 mov x10,x16 571 mov x11,x17 572 mov x4,x14 // put M aside 573 mov x5,x15 574 mov x6,x16 575 mov x7,x17 576 add x0,sp,#32 577 bl __ecp_nistz256_add 578 mov x8,x4 // restore M 579 mov x9,x5 580 ldr x3,[x22] // forward load for p256_mul_mont 581 mov x10,x6 582 ldp x4,x5,[sp,#0] 583 mov x11,x7 584 ldp x6,x7,[sp,#0+16] 585 bl __ecp_nistz256_add // p256_mul_by_3(M, M); 586 587 add x2,x22,#0 588 add x0,sp,#0 589 bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, in_x); 590 591 mov x8,x14 592 mov x9,x15 593 ldp x4,x5,[sp,#32] // forward load for p256_sqr_mont 594 mov x10,x16 595 mov x11,x17 596 ldp x6,x7,[sp,#32+16] 597 add x0,sp,#96 598 bl __ecp_nistz256_add // p256_mul_by_2(tmp0, S); 599 600 add x0,x21,#0 601 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(res_x, M); 602 603 add x2,sp,#96 604 bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, tmp0); 605 606 add x2,sp,#0 607 add x0,sp,#0 608 bl __ecp_nistz256_sub_morf // p256_sub(S, S, res_x); 609 610 ldr x3,[sp,#32] 611 mov x4,x14 // copy S 612 mov x5,x15 613 mov x6,x16 614 mov x7,x17 615 add x2,sp,#32 616 bl __ecp_nistz256_mul_mont // p256_mul_mont(S, S, M); 617 618 add x2,x21,#32 619 add x0,x21,#32 620 bl __ecp_nistz256_sub_from // p256_sub(res_y, S, res_y); 621 622 add sp,x29,#0 // destroy frame 623 ldp x19,x20,[x29,#16] 624 ldp x21,x22,[x29,#32] 625 ldp x29,x30,[sp],#80 626 ret 627 628.globl _GFp_nistz256_point_add_affine 629.private_extern _GFp_nistz256_point_add_affine 630 631.align 5 632_GFp_nistz256_point_add_affine: 633 stp x29,x30,[sp,#-80]! 634 add x29,sp,#0 635 stp x19,x20,[sp,#16] 636 stp x21,x22,[sp,#32] 637 stp x23,x24,[sp,#48] 638 stp x25,x26,[sp,#64] 639 sub sp,sp,#32*10 640 641 mov x21,x0 642 mov x22,x1 643 mov x23,x2 644 ldr x12,Lpoly+8 645 ldr x13,Lpoly+24 646 647 ldp x4,x5,[x1,#64] // in1_z 648 ldp x6,x7,[x1,#64+16] 649 orr x8,x4,x5 650 orr x10,x6,x7 651 orr x24,x8,x10 652 cmp x24,#0 653 csetm x24,ne // !in1infty 654 655 ldp x14,x15,[x2] // in2_x 656 ldp x16,x17,[x2,#16] 657 ldp x8,x9,[x2,#32] // in2_y 658 ldp x10,x11,[x2,#48] 659 orr x14,x14,x15 660 orr x16,x16,x17 661 orr x8,x8,x9 662 orr x10,x10,x11 663 orr x14,x14,x16 664 orr x8,x8,x10 665 orr x25,x14,x8 666 cmp x25,#0 667 csetm x25,ne // !in2infty 668 669 add x0,sp,#128 670 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); 671 672 mov x4,x14 673 mov x5,x15 674 mov x6,x16 675 mov x7,x17 676 ldr x3,[x23] 677 add x2,x23,#0 678 add x0,sp,#96 679 bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, Z1sqr, in2_x); 680 681 add x2,x22,#0 682 ldr x3,[x22,#64] // forward load for p256_mul_mont 683 ldp x4,x5,[sp,#128] 684 ldp x6,x7,[sp,#128+16] 685 add x0,sp,#160 686 bl __ecp_nistz256_sub_from // p256_sub(H, U2, in1_x); 687 688 add x2,x22,#64 689 add x0,sp,#128 690 bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, Z1sqr, in1_z); 691 692 ldr x3,[x22,#64] 693 ldp x4,x5,[sp,#160] 694 ldp x6,x7,[sp,#160+16] 695 add x2,x22,#64 696 add x0,sp,#64 697 bl __ecp_nistz256_mul_mont // p256_mul_mont(res_z, H, in1_z); 698 699 ldr x3,[x23,#32] 700 ldp x4,x5,[sp,#128] 701 ldp x6,x7,[sp,#128+16] 702 add x2,x23,#32 703 add x0,sp,#128 704 bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, S2, in2_y); 705 706 add x2,x22,#32 707 ldp x4,x5,[sp,#160] // forward load for p256_sqr_mont 708 ldp x6,x7,[sp,#160+16] 709 add x0,sp,#192 710 bl __ecp_nistz256_sub_from // p256_sub(R, S2, in1_y); 711 712 add x0,sp,#224 713 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Hsqr, H); 714 715 ldp x4,x5,[sp,#192] 716 ldp x6,x7,[sp,#192+16] 717 add x0,sp,#288 718 bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R); 719 720 ldr x3,[sp,#160] 721 ldp x4,x5,[sp,#224] 722 ldp x6,x7,[sp,#224+16] 723 add x2,sp,#160 724 add x0,sp,#256 725 bl __ecp_nistz256_mul_mont // p256_mul_mont(Hcub, Hsqr, H); 726 727 ldr x3,[x22] 728 ldp x4,x5,[sp,#224] 729 ldp x6,x7,[sp,#224+16] 730 add x2,x22,#0 731 add x0,sp,#96 732 bl __ecp_nistz256_mul_mont // p256_mul_mont(U2, in1_x, Hsqr); 733 734 mov x8,x14 735 mov x9,x15 736 mov x10,x16 737 mov x11,x17 738 add x0,sp,#224 739 bl __ecp_nistz256_add // p256_mul_by_2(Hsqr, U2); 740 741 add x2,sp,#288 742 add x0,sp,#0 743 bl __ecp_nistz256_sub_morf // p256_sub(res_x, Rsqr, Hsqr); 744 745 add x2,sp,#256 746 bl __ecp_nistz256_sub_from // p256_sub(res_x, res_x, Hcub); 747 748 add x2,sp,#96 749 ldr x3,[x22,#32] // forward load for p256_mul_mont 750 ldp x4,x5,[sp,#256] 751 ldp x6,x7,[sp,#256+16] 752 add x0,sp,#32 753 bl __ecp_nistz256_sub_morf // p256_sub(res_y, U2, res_x); 754 755 add x2,x22,#32 756 add x0,sp,#128 757 bl __ecp_nistz256_mul_mont // p256_mul_mont(S2, in1_y, Hcub); 758 759 ldr x3,[sp,#192] 760 ldp x4,x5,[sp,#32] 761 ldp x6,x7,[sp,#32+16] 762 add x2,sp,#192 763 add x0,sp,#32 764 bl __ecp_nistz256_mul_mont // p256_mul_mont(res_y, res_y, R); 765 766 add x2,sp,#128 767 bl __ecp_nistz256_sub_from // p256_sub(res_y, res_y, S2); 768 769 ldp x4,x5,[sp,#0] // res 770 ldp x6,x7,[sp,#0+16] 771 ldp x8,x9,[x23] // in2 772 ldp x10,x11,[x23,#16] 773 ldp x14,x15,[x22,#0] // in1 774 cmp x24,#0 // !, remember? 775 ldp x16,x17,[x22,#0+16] 776 csel x8,x4,x8,ne 777 csel x9,x5,x9,ne 778 ldp x4,x5,[sp,#0+0+32] // res 779 csel x10,x6,x10,ne 780 csel x11,x7,x11,ne 781 cmp x25,#0 // !, remember? 782 ldp x6,x7,[sp,#0+0+48] 783 csel x14,x8,x14,ne 784 csel x15,x9,x15,ne 785 ldp x8,x9,[x23,#0+32] // in2 786 csel x16,x10,x16,ne 787 csel x17,x11,x17,ne 788 ldp x10,x11,[x23,#0+48] 789 stp x14,x15,[x21,#0] 790 stp x16,x17,[x21,#0+16] 791 adr x23,Lone_mont-64 792 ldp x14,x15,[x22,#32] // in1 793 cmp x24,#0 // !, remember? 794 ldp x16,x17,[x22,#32+16] 795 csel x8,x4,x8,ne 796 csel x9,x5,x9,ne 797 ldp x4,x5,[sp,#0+32+32] // res 798 csel x10,x6,x10,ne 799 csel x11,x7,x11,ne 800 cmp x25,#0 // !, remember? 801 ldp x6,x7,[sp,#0+32+48] 802 csel x14,x8,x14,ne 803 csel x15,x9,x15,ne 804 ldp x8,x9,[x23,#32+32] // in2 805 csel x16,x10,x16,ne 806 csel x17,x11,x17,ne 807 ldp x10,x11,[x23,#32+48] 808 stp x14,x15,[x21,#32] 809 stp x16,x17,[x21,#32+16] 810 ldp x14,x15,[x22,#64] // in1 811 cmp x24,#0 // !, remember? 812 ldp x16,x17,[x22,#64+16] 813 csel x8,x4,x8,ne 814 csel x9,x5,x9,ne 815 csel x10,x6,x10,ne 816 csel x11,x7,x11,ne 817 cmp x25,#0 // !, remember? 818 csel x14,x8,x14,ne 819 csel x15,x9,x15,ne 820 csel x16,x10,x16,ne 821 csel x17,x11,x17,ne 822 stp x14,x15,[x21,#64] 823 stp x16,x17,[x21,#64+16] 824 825 add sp,x29,#0 // destroy frame 826 ldp x19,x20,[x29,#16] 827 ldp x21,x22,[x29,#32] 828 ldp x23,x24,[x29,#48] 829 ldp x25,x26,[x29,#64] 830 ldp x29,x30,[sp],#80 831 ret 832 833#endif // !OPENSSL_NO_ASM 834