1#ifdef IN_SANDY2X 2 3/* 4 This file is adapted from amd64-51/fe25519_square.s: 5 Adding loop to perform n squares. 6*/ 7#include "fe51_namespace.h" 8#include "consts_namespace.h" 9.p2align 5 10 11#ifdef ASM_HIDE_SYMBOL 12ASM_HIDE_SYMBOL fe51_nsquare 13ASM_HIDE_SYMBOL _fe51_nsquare 14#endif 15.globl fe51_nsquare 16.globl _fe51_nsquare 17#ifdef __ELF__ 18.type fe51_nsquare, @function 19.type _fe51_nsquare, @function 20#endif 21fe51_nsquare: 22_fe51_nsquare: 23 24mov %rsp,%r11 25and $31,%r11 26add $64,%r11 27sub %r11,%rsp 28movq %r11,0(%rsp) 29movq %r12,8(%rsp) 30movq %r13,16(%rsp) 31movq %r14,24(%rsp) 32movq %r15,32(%rsp) 33movq %rbx,40(%rsp) 34movq %rbp,48(%rsp) 35movq 0(%rsi),%rcx 36movq 8(%rsi),%r8 37movq 16(%rsi),%r9 38movq 24(%rsi),%rax 39movq 32(%rsi),%rsi 40movq %r9,16(%rdi) 41movq %rax,24(%rdi) 42movq %rsi,32(%rdi) 43mov %rdx,%rsi 44 45.p2align 4 46._loop: 47sub $1,%rsi 48mov %rcx,%rax 49mul %rcx 50add %rcx,%rcx 51mov %rax,%r9 52mov %rdx,%r10 53mov %rcx,%rax 54mul %r8 55mov %rax,%r11 56mov %rdx,%r12 57mov %rcx,%rax 58mulq 16(%rdi) 59mov %rax,%r13 60mov %rdx,%r14 61mov %rcx,%rax 62mulq 24(%rdi) 63mov %rax,%r15 64mov %rdx,%rbx 65mov %rcx,%rax 66mulq 32(%rdi) 67mov %rax,%rcx 68mov %rdx,%rbp 69mov %r8,%rax 70mul %r8 71add %r8,%r8 72add %rax,%r13 73adc %rdx,%r14 74mov %r8,%rax 75mulq 16(%rdi) 76add %rax,%r15 77adc %rdx,%rbx 78mov %r8,%rax 79imulq $19, %r8,%r8 80mulq 24(%rdi) 81add %rax,%rcx 82adc %rdx,%rbp 83mov %r8,%rax 84mulq 32(%rdi) 85add %rax,%r9 86adc %rdx,%r10 87movq 16(%rdi),%rax 88mulq 16(%rdi) 89add %rax,%rcx 90adc %rdx,%rbp 91shld $13,%rcx,%rbp 92movq 16(%rdi),%rax 93imulq $38, %rax,%rax 94mulq 24(%rdi) 95add %rax,%r9 96adc %rdx,%r10 97shld $13,%r9,%r10 98movq 16(%rdi),%rax 99imulq $38, %rax,%rax 100mulq 32(%rdi) 101add %rax,%r11 102adc %rdx,%r12 103movq 24(%rdi),%rax 104imulq $19, %rax,%rax 105mulq 24(%rdi) 106add %rax,%r11 107adc %rdx,%r12 108shld $13,%r11,%r12 109movq 24(%rdi),%rax 110imulq $38, %rax,%rax 111mulq 32(%rdi) 112add %rax,%r13 113adc %rdx,%r14 114shld $13,%r13,%r14 115movq 32(%rdi),%rax 116imulq $19, %rax,%rax 117mulq 32(%rdi) 118add %rax,%r15 119adc %rdx,%rbx 120shld $13,%r15,%rbx 121movq REDMASK51(%rip),%rdx 122and %rdx,%rcx 123add %rbx,%rcx 124and %rdx,%r9 125and %rdx,%r11 126add %r10,%r11 127and %rdx,%r13 128add %r12,%r13 129and %rdx,%r15 130add %r14,%r15 131imulq $19, %rbp,%rbp 132lea (%r9,%rbp),%r9 133mov %r9,%rax 134shr $51,%r9 135add %r11,%r9 136and %rdx,%rax 137mov %r9,%r8 138shr $51,%r9 139add %r13,%r9 140and %rdx,%r8 141mov %r9,%r10 142shr $51,%r9 143add %r15,%r9 144and %rdx,%r10 145movq %r10,16(%rdi) 146mov %r9,%r10 147shr $51,%r9 148add %rcx,%r9 149and %rdx,%r10 150movq %r10,24(%rdi) 151mov %r9,%r10 152shr $51,%r9 153imulq $19, %r9,%r9 154lea (%rax,%r9),%rcx 155and %rdx,%r10 156movq %r10,32(%rdi) 157cmp $0,%rsi 158jne ._loop 159 160movq %rcx,0(%rdi) 161movq %r8,8(%rdi) 162movq 0(%rsp),%r11 163movq 8(%rsp),%r12 164movq 16(%rsp),%r13 165movq 24(%rsp),%r14 166movq 32(%rsp),%r15 167movq 40(%rsp),%rbx 168movq 48(%rsp),%rbp 169add %r11,%rsp 170ret 171 172#endif 173