1 /* 2 This file is adapted from amd64-51/fe25519_invert.c: 3 Loops of squares are replaced by nsquares for better performance. 4 */ 5 6 #include "fe51.h" 7 8 #ifdef HAVE_AVX_ASM 9 10 #define fe51_square(x, y) fe51_nsquare(x, y, 1) 11 12 void 13 fe51_invert(fe51 *r, const fe51 *x) 14 { 15 fe51 z2; 16 fe51 z9; 17 fe51 z11; 18 fe51 z2_5_0; 19 fe51 z2_10_0; 20 fe51 z2_20_0; 21 fe51 z2_50_0; 22 fe51 z2_100_0; 23 fe51 t; 24 25 /* 2 */ fe51_square(&z2,x); 26 /* 4 */ fe51_square(&t,&z2); 27 /* 8 */ fe51_square(&t,&t); 28 /* 9 */ fe51_mul(&z9,&t,x); 29 /* 11 */ fe51_mul(&z11,&z9,&z2); 30 /* 22 */ fe51_square(&t,&z11); 31 /* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9); 32 33 /* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5); 34 /* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0); 35 36 /* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10); 37 /* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0); 38 39 /* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20); 40 /* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0); 41 42 /* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10); 43 /* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0); 44 45 /* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50); 46 /* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0); 47 48 /* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100); 49 /* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0); 50 51 /* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50); 52 /* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0); 53 54 /* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5); 55 /* 2^255 - 21 */ fe51_mul(r,&t,&z11); 56 } 57 58 #endif 59