1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
/*
This file is adapted from amd64-51/fe25519_invert.c:
Loops of squares are replaced by nsquares for better performance.
*/
#include "fe51.h"
#ifdef HAVE_AVX_ASM
#define fe51_square(x, y) fe51_nsquare(x, y, 1)
void
fe51_invert(fe51 *r, const fe51 *x)
{
fe51 z2;
fe51 z9;
fe51 z11;
fe51 z2_5_0;
fe51 z2_10_0;
fe51 z2_20_0;
fe51 z2_50_0;
fe51 z2_100_0;
fe51 t;
/* 2 */ fe51_square(&z2,x);
/* 4 */ fe51_square(&t,&z2);
/* 8 */ fe51_square(&t,&t);
/* 9 */ fe51_mul(&z9,&t,x);
/* 11 */ fe51_mul(&z11,&z9,&z2);
/* 22 */ fe51_square(&t,&z11);
/* 2^5 - 2^0 = 31 */ fe51_mul(&z2_5_0,&t,&z9);
/* 2^10 - 2^5 */ fe51_nsquare(&t,&z2_5_0, 5);
/* 2^10 - 2^0 */ fe51_mul(&z2_10_0,&t,&z2_5_0);
/* 2^20 - 2^10 */ fe51_nsquare(&t,&z2_10_0, 10);
/* 2^20 - 2^0 */ fe51_mul(&z2_20_0,&t,&z2_10_0);
/* 2^40 - 2^20 */ fe51_nsquare(&t,&z2_20_0, 20);
/* 2^40 - 2^0 */ fe51_mul(&t,&t,&z2_20_0);
/* 2^50 - 2^10 */ fe51_nsquare(&t,&t,10);
/* 2^50 - 2^0 */ fe51_mul(&z2_50_0,&t,&z2_10_0);
/* 2^100 - 2^50 */ fe51_nsquare(&t,&z2_50_0, 50);
/* 2^100 - 2^0 */ fe51_mul(&z2_100_0,&t,&z2_50_0);
/* 2^200 - 2^100 */ fe51_nsquare(&t,&z2_100_0, 100);
/* 2^200 - 2^0 */ fe51_mul(&t,&t,&z2_100_0);
/* 2^250 - 2^50 */ fe51_nsquare(&t,&t, 50);
/* 2^250 - 2^0 */ fe51_mul(&t,&t,&z2_50_0);
/* 2^255 - 2^5 */ fe51_nsquare(&t,&t,5);
/* 2^255 - 21 */ fe51_mul(r,&t,&z11);
}
#endif
|