1*36e94dc5SPeter Avalos /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2*36e94dc5SPeter Avalos /*
3*36e94dc5SPeter Avalos version 20081011
4*36e94dc5SPeter Avalos Matthew Dempsky
5*36e94dc5SPeter Avalos Public domain.
6*36e94dc5SPeter Avalos Derived from public domain code by D. J. Bernstein.
7*36e94dc5SPeter Avalos */
8*36e94dc5SPeter Avalos
9*36e94dc5SPeter Avalos int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*36e94dc5SPeter Avalos
add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])11*36e94dc5SPeter Avalos static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*36e94dc5SPeter Avalos {
13*36e94dc5SPeter Avalos unsigned int j;
14*36e94dc5SPeter Avalos unsigned int u;
15*36e94dc5SPeter Avalos u = 0;
16*36e94dc5SPeter Avalos for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*36e94dc5SPeter Avalos u += a[31] + b[31]; out[31] = u;
18*36e94dc5SPeter Avalos }
19*36e94dc5SPeter Avalos
sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])20*36e94dc5SPeter Avalos static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*36e94dc5SPeter Avalos {
22*36e94dc5SPeter Avalos unsigned int j;
23*36e94dc5SPeter Avalos unsigned int u;
24*36e94dc5SPeter Avalos u = 218;
25*36e94dc5SPeter Avalos for (j = 0;j < 31;++j) {
26*36e94dc5SPeter Avalos u += a[j] + 65280 - b[j];
27*36e94dc5SPeter Avalos out[j] = u & 255;
28*36e94dc5SPeter Avalos u >>= 8;
29*36e94dc5SPeter Avalos }
30*36e94dc5SPeter Avalos u += a[31] - b[31];
31*36e94dc5SPeter Avalos out[31] = u;
32*36e94dc5SPeter Avalos }
33*36e94dc5SPeter Avalos
squeeze(unsigned int a[32])34*36e94dc5SPeter Avalos static void squeeze(unsigned int a[32])
35*36e94dc5SPeter Avalos {
36*36e94dc5SPeter Avalos unsigned int j;
37*36e94dc5SPeter Avalos unsigned int u;
38*36e94dc5SPeter Avalos u = 0;
39*36e94dc5SPeter Avalos for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*36e94dc5SPeter Avalos u += a[31]; a[31] = u & 127;
41*36e94dc5SPeter Avalos u = 19 * (u >> 7);
42*36e94dc5SPeter Avalos for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*36e94dc5SPeter Avalos u += a[31]; a[31] = u;
44*36e94dc5SPeter Avalos }
45*36e94dc5SPeter Avalos
46*36e94dc5SPeter Avalos static const unsigned int minusp[32] = {
47*36e94dc5SPeter Avalos 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*36e94dc5SPeter Avalos } ;
49*36e94dc5SPeter Avalos
freeze(unsigned int a[32])50*36e94dc5SPeter Avalos static void freeze(unsigned int a[32])
51*36e94dc5SPeter Avalos {
52*36e94dc5SPeter Avalos unsigned int aorig[32];
53*36e94dc5SPeter Avalos unsigned int j;
54*36e94dc5SPeter Avalos unsigned int negative;
55*36e94dc5SPeter Avalos
56*36e94dc5SPeter Avalos for (j = 0;j < 32;++j) aorig[j] = a[j];
57*36e94dc5SPeter Avalos add(a,a,minusp);
58*36e94dc5SPeter Avalos negative = -((a[31] >> 7) & 1);
59*36e94dc5SPeter Avalos for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*36e94dc5SPeter Avalos }
61*36e94dc5SPeter Avalos
mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])62*36e94dc5SPeter Avalos static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*36e94dc5SPeter Avalos {
64*36e94dc5SPeter Avalos unsigned int i;
65*36e94dc5SPeter Avalos unsigned int j;
66*36e94dc5SPeter Avalos unsigned int u;
67*36e94dc5SPeter Avalos
68*36e94dc5SPeter Avalos for (i = 0;i < 32;++i) {
69*36e94dc5SPeter Avalos u = 0;
70*36e94dc5SPeter Avalos for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*36e94dc5SPeter Avalos for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*36e94dc5SPeter Avalos out[i] = u;
73*36e94dc5SPeter Avalos }
74*36e94dc5SPeter Avalos squeeze(out);
75*36e94dc5SPeter Avalos }
76*36e94dc5SPeter Avalos
mult121665(unsigned int out[32],const unsigned int a[32])77*36e94dc5SPeter Avalos static void mult121665(unsigned int out[32],const unsigned int a[32])
78*36e94dc5SPeter Avalos {
79*36e94dc5SPeter Avalos unsigned int j;
80*36e94dc5SPeter Avalos unsigned int u;
81*36e94dc5SPeter Avalos
82*36e94dc5SPeter Avalos u = 0;
83*36e94dc5SPeter Avalos for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*36e94dc5SPeter Avalos u += 121665 * a[31]; out[31] = u & 127;
85*36e94dc5SPeter Avalos u = 19 * (u >> 7);
86*36e94dc5SPeter Avalos for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*36e94dc5SPeter Avalos u += out[j]; out[j] = u;
88*36e94dc5SPeter Avalos }
89*36e94dc5SPeter Avalos
square(unsigned int out[32],const unsigned int a[32])90*36e94dc5SPeter Avalos static void square(unsigned int out[32],const unsigned int a[32])
91*36e94dc5SPeter Avalos {
92*36e94dc5SPeter Avalos unsigned int i;
93*36e94dc5SPeter Avalos unsigned int j;
94*36e94dc5SPeter Avalos unsigned int u;
95*36e94dc5SPeter Avalos
96*36e94dc5SPeter Avalos for (i = 0;i < 32;++i) {
97*36e94dc5SPeter Avalos u = 0;
98*36e94dc5SPeter Avalos for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*36e94dc5SPeter Avalos for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*36e94dc5SPeter Avalos u *= 2;
101*36e94dc5SPeter Avalos if ((i & 1) == 0) {
102*36e94dc5SPeter Avalos u += a[i / 2] * a[i / 2];
103*36e94dc5SPeter Avalos u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*36e94dc5SPeter Avalos }
105*36e94dc5SPeter Avalos out[i] = u;
106*36e94dc5SPeter Avalos }
107*36e94dc5SPeter Avalos squeeze(out);
108*36e94dc5SPeter Avalos }
109*36e94dc5SPeter Avalos
select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)110*36e94dc5SPeter Avalos static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*36e94dc5SPeter Avalos {
112*36e94dc5SPeter Avalos unsigned int j;
113*36e94dc5SPeter Avalos unsigned int t;
114*36e94dc5SPeter Avalos unsigned int bminus1;
115*36e94dc5SPeter Avalos
116*36e94dc5SPeter Avalos bminus1 = b - 1;
117*36e94dc5SPeter Avalos for (j = 0;j < 64;++j) {
118*36e94dc5SPeter Avalos t = bminus1 & (r[j] ^ s[j]);
119*36e94dc5SPeter Avalos p[j] = s[j] ^ t;
120*36e94dc5SPeter Avalos q[j] = r[j] ^ t;
121*36e94dc5SPeter Avalos }
122*36e94dc5SPeter Avalos }
123*36e94dc5SPeter Avalos
mainloop(unsigned int work[64],const unsigned char e[32])124*36e94dc5SPeter Avalos static void mainloop(unsigned int work[64],const unsigned char e[32])
125*36e94dc5SPeter Avalos {
126*36e94dc5SPeter Avalos unsigned int xzm1[64];
127*36e94dc5SPeter Avalos unsigned int xzm[64];
128*36e94dc5SPeter Avalos unsigned int xzmb[64];
129*36e94dc5SPeter Avalos unsigned int xzm1b[64];
130*36e94dc5SPeter Avalos unsigned int xznb[64];
131*36e94dc5SPeter Avalos unsigned int xzn1b[64];
132*36e94dc5SPeter Avalos unsigned int a0[64];
133*36e94dc5SPeter Avalos unsigned int a1[64];
134*36e94dc5SPeter Avalos unsigned int b0[64];
135*36e94dc5SPeter Avalos unsigned int b1[64];
136*36e94dc5SPeter Avalos unsigned int c1[64];
137*36e94dc5SPeter Avalos unsigned int r[32];
138*36e94dc5SPeter Avalos unsigned int s[32];
139*36e94dc5SPeter Avalos unsigned int t[32];
140*36e94dc5SPeter Avalos unsigned int u[32];
141*36e94dc5SPeter Avalos unsigned int j;
142*36e94dc5SPeter Avalos unsigned int b;
143*36e94dc5SPeter Avalos int pos;
144*36e94dc5SPeter Avalos
145*36e94dc5SPeter Avalos for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*36e94dc5SPeter Avalos xzm1[32] = 1;
147*36e94dc5SPeter Avalos for (j = 33;j < 64;++j) xzm1[j] = 0;
148*36e94dc5SPeter Avalos
149*36e94dc5SPeter Avalos xzm[0] = 1;
150*36e94dc5SPeter Avalos for (j = 1;j < 64;++j) xzm[j] = 0;
151*36e94dc5SPeter Avalos
152*36e94dc5SPeter Avalos for (pos = 254;pos >= 0;--pos) {
153*36e94dc5SPeter Avalos b = e[pos / 8] >> (pos & 7);
154*36e94dc5SPeter Avalos b &= 1;
155*36e94dc5SPeter Avalos select(xzmb,xzm1b,xzm,xzm1,b);
156*36e94dc5SPeter Avalos add(a0,xzmb,xzmb + 32);
157*36e94dc5SPeter Avalos sub(a0 + 32,xzmb,xzmb + 32);
158*36e94dc5SPeter Avalos add(a1,xzm1b,xzm1b + 32);
159*36e94dc5SPeter Avalos sub(a1 + 32,xzm1b,xzm1b + 32);
160*36e94dc5SPeter Avalos square(b0,a0);
161*36e94dc5SPeter Avalos square(b0 + 32,a0 + 32);
162*36e94dc5SPeter Avalos mult(b1,a1,a0 + 32);
163*36e94dc5SPeter Avalos mult(b1 + 32,a1 + 32,a0);
164*36e94dc5SPeter Avalos add(c1,b1,b1 + 32);
165*36e94dc5SPeter Avalos sub(c1 + 32,b1,b1 + 32);
166*36e94dc5SPeter Avalos square(r,c1 + 32);
167*36e94dc5SPeter Avalos sub(s,b0,b0 + 32);
168*36e94dc5SPeter Avalos mult121665(t,s);
169*36e94dc5SPeter Avalos add(u,t,b0);
170*36e94dc5SPeter Avalos mult(xznb,b0,b0 + 32);
171*36e94dc5SPeter Avalos mult(xznb + 32,s,u);
172*36e94dc5SPeter Avalos square(xzn1b,c1);
173*36e94dc5SPeter Avalos mult(xzn1b + 32,r,work);
174*36e94dc5SPeter Avalos select(xzm,xzm1,xznb,xzn1b,b);
175*36e94dc5SPeter Avalos }
176*36e94dc5SPeter Avalos
177*36e94dc5SPeter Avalos for (j = 0;j < 64;++j) work[j] = xzm[j];
178*36e94dc5SPeter Avalos }
179*36e94dc5SPeter Avalos
recip(unsigned int out[32],const unsigned int z[32])180*36e94dc5SPeter Avalos static void recip(unsigned int out[32],const unsigned int z[32])
181*36e94dc5SPeter Avalos {
182*36e94dc5SPeter Avalos unsigned int z2[32];
183*36e94dc5SPeter Avalos unsigned int z9[32];
184*36e94dc5SPeter Avalos unsigned int z11[32];
185*36e94dc5SPeter Avalos unsigned int z2_5_0[32];
186*36e94dc5SPeter Avalos unsigned int z2_10_0[32];
187*36e94dc5SPeter Avalos unsigned int z2_20_0[32];
188*36e94dc5SPeter Avalos unsigned int z2_50_0[32];
189*36e94dc5SPeter Avalos unsigned int z2_100_0[32];
190*36e94dc5SPeter Avalos unsigned int t0[32];
191*36e94dc5SPeter Avalos unsigned int t1[32];
192*36e94dc5SPeter Avalos int i;
193*36e94dc5SPeter Avalos
194*36e94dc5SPeter Avalos /* 2 */ square(z2,z);
195*36e94dc5SPeter Avalos /* 4 */ square(t1,z2);
196*36e94dc5SPeter Avalos /* 8 */ square(t0,t1);
197*36e94dc5SPeter Avalos /* 9 */ mult(z9,t0,z);
198*36e94dc5SPeter Avalos /* 11 */ mult(z11,z9,z2);
199*36e94dc5SPeter Avalos /* 22 */ square(t0,z11);
200*36e94dc5SPeter Avalos /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*36e94dc5SPeter Avalos
202*36e94dc5SPeter Avalos /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*36e94dc5SPeter Avalos /* 2^7 - 2^2 */ square(t1,t0);
204*36e94dc5SPeter Avalos /* 2^8 - 2^3 */ square(t0,t1);
205*36e94dc5SPeter Avalos /* 2^9 - 2^4 */ square(t1,t0);
206*36e94dc5SPeter Avalos /* 2^10 - 2^5 */ square(t0,t1);
207*36e94dc5SPeter Avalos /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*36e94dc5SPeter Avalos
209*36e94dc5SPeter Avalos /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*36e94dc5SPeter Avalos /* 2^12 - 2^2 */ square(t1,t0);
211*36e94dc5SPeter Avalos /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*36e94dc5SPeter Avalos /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*36e94dc5SPeter Avalos
214*36e94dc5SPeter Avalos /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*36e94dc5SPeter Avalos /* 2^22 - 2^2 */ square(t1,t0);
216*36e94dc5SPeter Avalos /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*36e94dc5SPeter Avalos /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*36e94dc5SPeter Avalos
219*36e94dc5SPeter Avalos /* 2^41 - 2^1 */ square(t1,t0);
220*36e94dc5SPeter Avalos /* 2^42 - 2^2 */ square(t0,t1);
221*36e94dc5SPeter Avalos /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*36e94dc5SPeter Avalos /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*36e94dc5SPeter Avalos
224*36e94dc5SPeter Avalos /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*36e94dc5SPeter Avalos /* 2^52 - 2^2 */ square(t1,t0);
226*36e94dc5SPeter Avalos /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*36e94dc5SPeter Avalos /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*36e94dc5SPeter Avalos
229*36e94dc5SPeter Avalos /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*36e94dc5SPeter Avalos /* 2^102 - 2^2 */ square(t0,t1);
231*36e94dc5SPeter Avalos /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*36e94dc5SPeter Avalos /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*36e94dc5SPeter Avalos
234*36e94dc5SPeter Avalos /* 2^201 - 2^1 */ square(t0,t1);
235*36e94dc5SPeter Avalos /* 2^202 - 2^2 */ square(t1,t0);
236*36e94dc5SPeter Avalos /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*36e94dc5SPeter Avalos /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*36e94dc5SPeter Avalos
239*36e94dc5SPeter Avalos /* 2^251 - 2^1 */ square(t1,t0);
240*36e94dc5SPeter Avalos /* 2^252 - 2^2 */ square(t0,t1);
241*36e94dc5SPeter Avalos /* 2^253 - 2^3 */ square(t1,t0);
242*36e94dc5SPeter Avalos /* 2^254 - 2^4 */ square(t0,t1);
243*36e94dc5SPeter Avalos /* 2^255 - 2^5 */ square(t1,t0);
244*36e94dc5SPeter Avalos /* 2^255 - 21 */ mult(out,t1,z11);
245*36e94dc5SPeter Avalos }
246*36e94dc5SPeter Avalos
crypto_scalarmult_curve25519(unsigned char * q,const unsigned char * n,const unsigned char * p)247*36e94dc5SPeter Avalos int crypto_scalarmult_curve25519(unsigned char *q,
248*36e94dc5SPeter Avalos const unsigned char *n,
249*36e94dc5SPeter Avalos const unsigned char *p)
250*36e94dc5SPeter Avalos {
251*36e94dc5SPeter Avalos unsigned int work[96];
252*36e94dc5SPeter Avalos unsigned char e[32];
253*36e94dc5SPeter Avalos unsigned int i;
254*36e94dc5SPeter Avalos for (i = 0;i < 32;++i) e[i] = n[i];
255*36e94dc5SPeter Avalos e[0] &= 248;
256*36e94dc5SPeter Avalos e[31] &= 127;
257*36e94dc5SPeter Avalos e[31] |= 64;
258*36e94dc5SPeter Avalos for (i = 0;i < 32;++i) work[i] = p[i];
259*36e94dc5SPeter Avalos mainloop(work,e);
260*36e94dc5SPeter Avalos recip(work + 32,work + 32);
261*36e94dc5SPeter Avalos mult(work + 64,work,work + 32);
262*36e94dc5SPeter Avalos freeze(work + 64);
263*36e94dc5SPeter Avalos for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*36e94dc5SPeter Avalos return 0;
265*36e94dc5SPeter Avalos }
266