1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package edwards25519
6
7import "encoding/binary"
8
9// This code is a port of the public domain, “ref10” implementation of ed25519
10// from SUPERCOP.
11
12// FieldElement represents an element of the field GF(2^255 - 19).  An element
13// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
14// t[3]+2^102 t[4]+...+2^230 t[9].  Bounds on each t[i] vary depending on
15// context.
16type FieldElement [10]int32
17
18var zero FieldElement
19
20func FeZero(fe *FieldElement) {
21	copy(fe[:], zero[:])
22}
23
24func FeOne(fe *FieldElement) {
25	FeZero(fe)
26	fe[0] = 1
27}
28
29func FeAdd(dst, a, b *FieldElement) {
30	dst[0] = a[0] + b[0]
31	dst[1] = a[1] + b[1]
32	dst[2] = a[2] + b[2]
33	dst[3] = a[3] + b[3]
34	dst[4] = a[4] + b[4]
35	dst[5] = a[5] + b[5]
36	dst[6] = a[6] + b[6]
37	dst[7] = a[7] + b[7]
38	dst[8] = a[8] + b[8]
39	dst[9] = a[9] + b[9]
40}
41
42func FeSub(dst, a, b *FieldElement) {
43	dst[0] = a[0] - b[0]
44	dst[1] = a[1] - b[1]
45	dst[2] = a[2] - b[2]
46	dst[3] = a[3] - b[3]
47	dst[4] = a[4] - b[4]
48	dst[5] = a[5] - b[5]
49	dst[6] = a[6] - b[6]
50	dst[7] = a[7] - b[7]
51	dst[8] = a[8] - b[8]
52	dst[9] = a[9] - b[9]
53}
54
55func FeCopy(dst, src *FieldElement) {
56	copy(dst[:], src[:])
57}
58
59// Replace (f,g) with (g,g) if b == 1;
60// replace (f,g) with (f,g) if b == 0.
61//
62// Preconditions: b in {0,1}.
63func FeCMove(f, g *FieldElement, b int32) {
64	b = -b
65	f[0] ^= b & (f[0] ^ g[0])
66	f[1] ^= b & (f[1] ^ g[1])
67	f[2] ^= b & (f[2] ^ g[2])
68	f[3] ^= b & (f[3] ^ g[3])
69	f[4] ^= b & (f[4] ^ g[4])
70	f[5] ^= b & (f[5] ^ g[5])
71	f[6] ^= b & (f[6] ^ g[6])
72	f[7] ^= b & (f[7] ^ g[7])
73	f[8] ^= b & (f[8] ^ g[8])
74	f[9] ^= b & (f[9] ^ g[9])
75}
76
77func load3(in []byte) int64 {
78	var r int64
79	r = int64(in[0])
80	r |= int64(in[1]) << 8
81	r |= int64(in[2]) << 16
82	return r
83}
84
85func load4(in []byte) int64 {
86	var r int64
87	r = int64(in[0])
88	r |= int64(in[1]) << 8
89	r |= int64(in[2]) << 16
90	r |= int64(in[3]) << 24
91	return r
92}
93
94func FeFromBytes(dst *FieldElement, src *[32]byte) {
95	h0 := load4(src[:])
96	h1 := load3(src[4:]) << 6
97	h2 := load3(src[7:]) << 5
98	h3 := load3(src[10:]) << 3
99	h4 := load3(src[13:]) << 2
100	h5 := load4(src[16:])
101	h6 := load3(src[20:]) << 7
102	h7 := load3(src[23:]) << 5
103	h8 := load3(src[26:]) << 4
104	h9 := (load3(src[29:]) & 8388607) << 2
105
106	FeCombine(dst, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
107}
108
109// FeToBytes marshals h to s.
110// Preconditions:
111//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
112//
113// Write p=2^255-19; q=floor(h/p).
114// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
115//
116// Proof:
117//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
118//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
119//
120//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
121//   Then 0<y<1.
122//
123//   Write r=h-pq.
124//   Have 0<=r<=p-1=2^255-20.
125//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
126//
127//   Write x=r+19(2^-255)r+y.
128//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
129//
130//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
131//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
132func FeToBytes(s *[32]byte, h *FieldElement) {
133	var carry [10]int32
134
135	q := (19*h[9] + (1 << 24)) >> 25
136	q = (h[0] + q) >> 26
137	q = (h[1] + q) >> 25
138	q = (h[2] + q) >> 26
139	q = (h[3] + q) >> 25
140	q = (h[4] + q) >> 26
141	q = (h[5] + q) >> 25
142	q = (h[6] + q) >> 26
143	q = (h[7] + q) >> 25
144	q = (h[8] + q) >> 26
145	q = (h[9] + q) >> 25
146
147	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
148	h[0] += 19 * q
149	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
150
151	carry[0] = h[0] >> 26
152	h[1] += carry[0]
153	h[0] -= carry[0] << 26
154	carry[1] = h[1] >> 25
155	h[2] += carry[1]
156	h[1] -= carry[1] << 25
157	carry[2] = h[2] >> 26
158	h[3] += carry[2]
159	h[2] -= carry[2] << 26
160	carry[3] = h[3] >> 25
161	h[4] += carry[3]
162	h[3] -= carry[3] << 25
163	carry[4] = h[4] >> 26
164	h[5] += carry[4]
165	h[4] -= carry[4] << 26
166	carry[5] = h[5] >> 25
167	h[6] += carry[5]
168	h[5] -= carry[5] << 25
169	carry[6] = h[6] >> 26
170	h[7] += carry[6]
171	h[6] -= carry[6] << 26
172	carry[7] = h[7] >> 25
173	h[8] += carry[7]
174	h[7] -= carry[7] << 25
175	carry[8] = h[8] >> 26
176	h[9] += carry[8]
177	h[8] -= carry[8] << 26
178	carry[9] = h[9] >> 25
179	h[9] -= carry[9] << 25
180	// h10 = carry9
181
182	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
183	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
184	// evidently 2^255 h10-2^255 q = 0.
185	// Goal: Output h[0]+...+2^230 h[9].
186
187	s[0] = byte(h[0] >> 0)
188	s[1] = byte(h[0] >> 8)
189	s[2] = byte(h[0] >> 16)
190	s[3] = byte((h[0] >> 24) | (h[1] << 2))
191	s[4] = byte(h[1] >> 6)
192	s[5] = byte(h[1] >> 14)
193	s[6] = byte((h[1] >> 22) | (h[2] << 3))
194	s[7] = byte(h[2] >> 5)
195	s[8] = byte(h[2] >> 13)
196	s[9] = byte((h[2] >> 21) | (h[3] << 5))
197	s[10] = byte(h[3] >> 3)
198	s[11] = byte(h[3] >> 11)
199	s[12] = byte((h[3] >> 19) | (h[4] << 6))
200	s[13] = byte(h[4] >> 2)
201	s[14] = byte(h[4] >> 10)
202	s[15] = byte(h[4] >> 18)
203	s[16] = byte(h[5] >> 0)
204	s[17] = byte(h[5] >> 8)
205	s[18] = byte(h[5] >> 16)
206	s[19] = byte((h[5] >> 24) | (h[6] << 1))
207	s[20] = byte(h[6] >> 7)
208	s[21] = byte(h[6] >> 15)
209	s[22] = byte((h[6] >> 23) | (h[7] << 3))
210	s[23] = byte(h[7] >> 5)
211	s[24] = byte(h[7] >> 13)
212	s[25] = byte((h[7] >> 21) | (h[8] << 4))
213	s[26] = byte(h[8] >> 4)
214	s[27] = byte(h[8] >> 12)
215	s[28] = byte((h[8] >> 20) | (h[9] << 6))
216	s[29] = byte(h[9] >> 2)
217	s[30] = byte(h[9] >> 10)
218	s[31] = byte(h[9] >> 18)
219}
220
221func FeIsNegative(f *FieldElement) byte {
222	var s [32]byte
223	FeToBytes(&s, f)
224	return s[0] & 1
225}
226
227func FeIsNonZero(f *FieldElement) int32 {
228	var s [32]byte
229	FeToBytes(&s, f)
230	var x uint8
231	for _, b := range s {
232		x |= b
233	}
234	x |= x >> 4
235	x |= x >> 2
236	x |= x >> 1
237	return int32(x & 1)
238}
239
240// FeNeg sets h = -f
241//
242// Preconditions:
243//    |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
244//
245// Postconditions:
246//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
247func FeNeg(h, f *FieldElement) {
248	h[0] = -f[0]
249	h[1] = -f[1]
250	h[2] = -f[2]
251	h[3] = -f[3]
252	h[4] = -f[4]
253	h[5] = -f[5]
254	h[6] = -f[6]
255	h[7] = -f[7]
256	h[8] = -f[8]
257	h[9] = -f[9]
258}
259
260func FeCombine(h *FieldElement, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
261	var c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 int64
262
263	/*
264	  |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
265	    i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
266	  |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
267	    i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
268	*/
269
270	c0 = (h0 + (1 << 25)) >> 26
271	h1 += c0
272	h0 -= c0 << 26
273	c4 = (h4 + (1 << 25)) >> 26
274	h5 += c4
275	h4 -= c4 << 26
276	/* |h0| <= 2^25 */
277	/* |h4| <= 2^25 */
278	/* |h1| <= 1.51*2^58 */
279	/* |h5| <= 1.51*2^58 */
280
281	c1 = (h1 + (1 << 24)) >> 25
282	h2 += c1
283	h1 -= c1 << 25
284	c5 = (h5 + (1 << 24)) >> 25
285	h6 += c5
286	h5 -= c5 << 25
287	/* |h1| <= 2^24; from now on fits into int32 */
288	/* |h5| <= 2^24; from now on fits into int32 */
289	/* |h2| <= 1.21*2^59 */
290	/* |h6| <= 1.21*2^59 */
291
292	c2 = (h2 + (1 << 25)) >> 26
293	h3 += c2
294	h2 -= c2 << 26
295	c6 = (h6 + (1 << 25)) >> 26
296	h7 += c6
297	h6 -= c6 << 26
298	/* |h2| <= 2^25; from now on fits into int32 unchanged */
299	/* |h6| <= 2^25; from now on fits into int32 unchanged */
300	/* |h3| <= 1.51*2^58 */
301	/* |h7| <= 1.51*2^58 */
302
303	c3 = (h3 + (1 << 24)) >> 25
304	h4 += c3
305	h3 -= c3 << 25
306	c7 = (h7 + (1 << 24)) >> 25
307	h8 += c7
308	h7 -= c7 << 25
309	/* |h3| <= 2^24; from now on fits into int32 unchanged */
310	/* |h7| <= 2^24; from now on fits into int32 unchanged */
311	/* |h4| <= 1.52*2^33 */
312	/* |h8| <= 1.52*2^33 */
313
314	c4 = (h4 + (1 << 25)) >> 26
315	h5 += c4
316	h4 -= c4 << 26
317	c8 = (h8 + (1 << 25)) >> 26
318	h9 += c8
319	h8 -= c8 << 26
320	/* |h4| <= 2^25; from now on fits into int32 unchanged */
321	/* |h8| <= 2^25; from now on fits into int32 unchanged */
322	/* |h5| <= 1.01*2^24 */
323	/* |h9| <= 1.51*2^58 */
324
325	c9 = (h9 + (1 << 24)) >> 25
326	h0 += c9 * 19
327	h9 -= c9 << 25
328	/* |h9| <= 2^24; from now on fits into int32 unchanged */
329	/* |h0| <= 1.8*2^37 */
330
331	c0 = (h0 + (1 << 25)) >> 26
332	h1 += c0
333	h0 -= c0 << 26
334	/* |h0| <= 2^25; from now on fits into int32 unchanged */
335	/* |h1| <= 1.01*2^24 */
336
337	h[0] = int32(h0)
338	h[1] = int32(h1)
339	h[2] = int32(h2)
340	h[3] = int32(h3)
341	h[4] = int32(h4)
342	h[5] = int32(h5)
343	h[6] = int32(h6)
344	h[7] = int32(h7)
345	h[8] = int32(h8)
346	h[9] = int32(h9)
347}
348
349// FeMul calculates h = f * g
350// Can overlap h with f or g.
351//
352// Preconditions:
353//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
354//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
355//
356// Postconditions:
357//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
358//
359// Notes on implementation strategy:
360//
361// Using schoolbook multiplication.
362// Karatsuba would save a little in some cost models.
363//
364// Most multiplications by 2 and 19 are 32-bit precomputations;
365// cheaper than 64-bit postcomputations.
366//
367// There is one remaining multiplication by 19 in the carry chain;
368// one *19 precomputation can be merged into this,
369// but the resulting data flow is considerably less clean.
370//
371// There are 12 carries below.
372// 10 of them are 2-way parallelizable and vectorizable.
373// Can get away with 11 carries, but then data flow is much deeper.
374//
375// With tighter constraints on inputs, can squeeze carries into int32.
376func FeMul(h, f, g *FieldElement) {
377	f0 := int64(f[0])
378	f1 := int64(f[1])
379	f2 := int64(f[2])
380	f3 := int64(f[3])
381	f4 := int64(f[4])
382	f5 := int64(f[5])
383	f6 := int64(f[6])
384	f7 := int64(f[7])
385	f8 := int64(f[8])
386	f9 := int64(f[9])
387
388	f1_2 := int64(2 * f[1])
389	f3_2 := int64(2 * f[3])
390	f5_2 := int64(2 * f[5])
391	f7_2 := int64(2 * f[7])
392	f9_2 := int64(2 * f[9])
393
394	g0 := int64(g[0])
395	g1 := int64(g[1])
396	g2 := int64(g[2])
397	g3 := int64(g[3])
398	g4 := int64(g[4])
399	g5 := int64(g[5])
400	g6 := int64(g[6])
401	g7 := int64(g[7])
402	g8 := int64(g[8])
403	g9 := int64(g[9])
404
405	g1_19 := int64(19 * g[1]) /* 1.4*2^29 */
406	g2_19 := int64(19 * g[2]) /* 1.4*2^30; still ok */
407	g3_19 := int64(19 * g[3])
408	g4_19 := int64(19 * g[4])
409	g5_19 := int64(19 * g[5])
410	g6_19 := int64(19 * g[6])
411	g7_19 := int64(19 * g[7])
412	g8_19 := int64(19 * g[8])
413	g9_19 := int64(19 * g[9])
414
415	h0 := f0*g0 + f1_2*g9_19 + f2*g8_19 + f3_2*g7_19 + f4*g6_19 + f5_2*g5_19 + f6*g4_19 + f7_2*g3_19 + f8*g2_19 + f9_2*g1_19
416	h1 := f0*g1 + f1*g0 + f2*g9_19 + f3*g8_19 + f4*g7_19 + f5*g6_19 + f6*g5_19 + f7*g4_19 + f8*g3_19 + f9*g2_19
417	h2 := f0*g2 + f1_2*g1 + f2*g0 + f3_2*g9_19 + f4*g8_19 + f5_2*g7_19 + f6*g6_19 + f7_2*g5_19 + f8*g4_19 + f9_2*g3_19
418	h3 := f0*g3 + f1*g2 + f2*g1 + f3*g0 + f4*g9_19 + f5*g8_19 + f6*g7_19 + f7*g6_19 + f8*g5_19 + f9*g4_19
419	h4 := f0*g4 + f1_2*g3 + f2*g2 + f3_2*g1 + f4*g0 + f5_2*g9_19 + f6*g8_19 + f7_2*g7_19 + f8*g6_19 + f9_2*g5_19
420	h5 := f0*g5 + f1*g4 + f2*g3 + f3*g2 + f4*g1 + f5*g0 + f6*g9_19 + f7*g8_19 + f8*g7_19 + f9*g6_19
421	h6 := f0*g6 + f1_2*g5 + f2*g4 + f3_2*g3 + f4*g2 + f5_2*g1 + f6*g0 + f7_2*g9_19 + f8*g8_19 + f9_2*g7_19
422	h7 := f0*g7 + f1*g6 + f2*g5 + f3*g4 + f4*g3 + f5*g2 + f6*g1 + f7*g0 + f8*g9_19 + f9*g8_19
423	h8 := f0*g8 + f1_2*g7 + f2*g6 + f3_2*g5 + f4*g4 + f5_2*g3 + f6*g2 + f7_2*g1 + f8*g0 + f9_2*g9_19
424	h9 := f0*g9 + f1*g8 + f2*g7 + f3*g6 + f4*g5 + f5*g4 + f6*g3 + f7*g2 + f8*g1 + f9*g0
425
426	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
427}
428
429func feSquare(f *FieldElement) (h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 int64) {
430	f0 := int64(f[0])
431	f1 := int64(f[1])
432	f2 := int64(f[2])
433	f3 := int64(f[3])
434	f4 := int64(f[4])
435	f5 := int64(f[5])
436	f6 := int64(f[6])
437	f7 := int64(f[7])
438	f8 := int64(f[8])
439	f9 := int64(f[9])
440	f0_2 := int64(2 * f[0])
441	f1_2 := int64(2 * f[1])
442	f2_2 := int64(2 * f[2])
443	f3_2 := int64(2 * f[3])
444	f4_2 := int64(2 * f[4])
445	f5_2 := int64(2 * f[5])
446	f6_2 := int64(2 * f[6])
447	f7_2 := int64(2 * f[7])
448	f5_38 := 38 * f5 // 1.31*2^30
449	f6_19 := 19 * f6 // 1.31*2^30
450	f7_38 := 38 * f7 // 1.31*2^30
451	f8_19 := 19 * f8 // 1.31*2^30
452	f9_38 := 38 * f9 // 1.31*2^30
453
454	h0 = f0*f0 + f1_2*f9_38 + f2_2*f8_19 + f3_2*f7_38 + f4_2*f6_19 + f5*f5_38
455	h1 = f0_2*f1 + f2*f9_38 + f3_2*f8_19 + f4*f7_38 + f5_2*f6_19
456	h2 = f0_2*f2 + f1_2*f1 + f3_2*f9_38 + f4_2*f8_19 + f5_2*f7_38 + f6*f6_19
457	h3 = f0_2*f3 + f1_2*f2 + f4*f9_38 + f5_2*f8_19 + f6*f7_38
458	h4 = f0_2*f4 + f1_2*f3_2 + f2*f2 + f5_2*f9_38 + f6_2*f8_19 + f7*f7_38
459	h5 = f0_2*f5 + f1_2*f4 + f2_2*f3 + f6*f9_38 + f7_2*f8_19
460	h6 = f0_2*f6 + f1_2*f5_2 + f2_2*f4 + f3_2*f3 + f7_2*f9_38 + f8*f8_19
461	h7 = f0_2*f7 + f1_2*f6 + f2_2*f5 + f3_2*f4 + f8*f9_38
462	h8 = f0_2*f8 + f1_2*f7_2 + f2_2*f6 + f3_2*f5_2 + f4*f4 + f9*f9_38
463	h9 = f0_2*f9 + f1_2*f8 + f2_2*f7 + f3_2*f6 + f4_2*f5
464
465	return
466}
467
468// FeSquare calculates h = f*f. Can overlap h with f.
469//
470// Preconditions:
471//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
472//
473// Postconditions:
474//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
475func FeSquare(h, f *FieldElement) {
476	h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
477	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
478}
479
480// FeSquare2 sets h = 2 * f * f
481//
482// Can overlap h with f.
483//
484// Preconditions:
485//    |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
486//
487// Postconditions:
488//    |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
489// See fe_mul.c for discussion of implementation strategy.
490func FeSquare2(h, f *FieldElement) {
491	h0, h1, h2, h3, h4, h5, h6, h7, h8, h9 := feSquare(f)
492
493	h0 += h0
494	h1 += h1
495	h2 += h2
496	h3 += h3
497	h4 += h4
498	h5 += h5
499	h6 += h6
500	h7 += h7
501	h8 += h8
502	h9 += h9
503
504	FeCombine(h, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9)
505}
506
507func FeInvert(out, z *FieldElement) {
508	var t0, t1, t2, t3 FieldElement
509	var i int
510
511	FeSquare(&t0, z)        // 2^1
512	FeSquare(&t1, &t0)      // 2^2
513	for i = 1; i < 2; i++ { // 2^3
514		FeSquare(&t1, &t1)
515	}
516	FeMul(&t1, z, &t1)      // 2^3 + 2^0
517	FeMul(&t0, &t0, &t1)    // 2^3 + 2^1 + 2^0
518	FeSquare(&t2, &t0)      // 2^4 + 2^2 + 2^1
519	FeMul(&t1, &t1, &t2)    // 2^4 + 2^3 + 2^2 + 2^1 + 2^0
520	FeSquare(&t2, &t1)      // 5,4,3,2,1
521	for i = 1; i < 5; i++ { // 9,8,7,6,5
522		FeSquare(&t2, &t2)
523	}
524	FeMul(&t1, &t2, &t1)     // 9,8,7,6,5,4,3,2,1,0
525	FeSquare(&t2, &t1)       // 10..1
526	for i = 1; i < 10; i++ { // 19..10
527		FeSquare(&t2, &t2)
528	}
529	FeMul(&t2, &t2, &t1)     // 19..0
530	FeSquare(&t3, &t2)       // 20..1
531	for i = 1; i < 20; i++ { // 39..20
532		FeSquare(&t3, &t3)
533	}
534	FeMul(&t2, &t3, &t2)     // 39..0
535	FeSquare(&t2, &t2)       // 40..1
536	for i = 1; i < 10; i++ { // 49..10
537		FeSquare(&t2, &t2)
538	}
539	FeMul(&t1, &t2, &t1)     // 49..0
540	FeSquare(&t2, &t1)       // 50..1
541	for i = 1; i < 50; i++ { // 99..50
542		FeSquare(&t2, &t2)
543	}
544	FeMul(&t2, &t2, &t1)      // 99..0
545	FeSquare(&t3, &t2)        // 100..1
546	for i = 1; i < 100; i++ { // 199..100
547		FeSquare(&t3, &t3)
548	}
549	FeMul(&t2, &t3, &t2)     // 199..0
550	FeSquare(&t2, &t2)       // 200..1
551	for i = 1; i < 50; i++ { // 249..50
552		FeSquare(&t2, &t2)
553	}
554	FeMul(&t1, &t2, &t1)    // 249..0
555	FeSquare(&t1, &t1)      // 250..1
556	for i = 1; i < 5; i++ { // 254..5
557		FeSquare(&t1, &t1)
558	}
559	FeMul(out, &t1, &t0) // 254..5,3,1,0
560}
561
562func fePow22523(out, z *FieldElement) {
563	var t0, t1, t2 FieldElement
564	var i int
565
566	FeSquare(&t0, z)
567	for i = 1; i < 1; i++ {
568		FeSquare(&t0, &t0)
569	}
570	FeSquare(&t1, &t0)
571	for i = 1; i < 2; i++ {
572		FeSquare(&t1, &t1)
573	}
574	FeMul(&t1, z, &t1)
575	FeMul(&t0, &t0, &t1)
576	FeSquare(&t0, &t0)
577	for i = 1; i < 1; i++ {
578		FeSquare(&t0, &t0)
579	}
580	FeMul(&t0, &t1, &t0)
581	FeSquare(&t1, &t0)
582	for i = 1; i < 5; i++ {
583		FeSquare(&t1, &t1)
584	}
585	FeMul(&t0, &t1, &t0)
586	FeSquare(&t1, &t0)
587	for i = 1; i < 10; i++ {
588		FeSquare(&t1, &t1)
589	}
590	FeMul(&t1, &t1, &t0)
591	FeSquare(&t2, &t1)
592	for i = 1; i < 20; i++ {
593		FeSquare(&t2, &t2)
594	}
595	FeMul(&t1, &t2, &t1)
596	FeSquare(&t1, &t1)
597	for i = 1; i < 10; i++ {
598		FeSquare(&t1, &t1)
599	}
600	FeMul(&t0, &t1, &t0)
601	FeSquare(&t1, &t0)
602	for i = 1; i < 50; i++ {
603		FeSquare(&t1, &t1)
604	}
605	FeMul(&t1, &t1, &t0)
606	FeSquare(&t2, &t1)
607	for i = 1; i < 100; i++ {
608		FeSquare(&t2, &t2)
609	}
610	FeMul(&t1, &t2, &t1)
611	FeSquare(&t1, &t1)
612	for i = 1; i < 50; i++ {
613		FeSquare(&t1, &t1)
614	}
615	FeMul(&t0, &t1, &t0)
616	FeSquare(&t0, &t0)
617	for i = 1; i < 2; i++ {
618		FeSquare(&t0, &t0)
619	}
620	FeMul(out, &t0, z)
621}
622
623// Group elements are members of the elliptic curve -x^2 + y^2 = 1 + d * x^2 *
624// y^2 where d = -121665/121666.
625//
626// Several representations are used:
627//   ProjectiveGroupElement: (X:Y:Z) satisfying x=X/Z, y=Y/Z
628//   ExtendedGroupElement: (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
629//   CompletedGroupElement: ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
630//   PreComputedGroupElement: (y+x,y-x,2dxy)
631
632type ProjectiveGroupElement struct {
633	X, Y, Z FieldElement
634}
635
636type ExtendedGroupElement struct {
637	X, Y, Z, T FieldElement
638}
639
640type CompletedGroupElement struct {
641	X, Y, Z, T FieldElement
642}
643
644type PreComputedGroupElement struct {
645	yPlusX, yMinusX, xy2d FieldElement
646}
647
648type CachedGroupElement struct {
649	yPlusX, yMinusX, Z, T2d FieldElement
650}
651
652func (p *ProjectiveGroupElement) Zero() {
653	FeZero(&p.X)
654	FeOne(&p.Y)
655	FeOne(&p.Z)
656}
657
658func (p *ProjectiveGroupElement) Double(r *CompletedGroupElement) {
659	var t0 FieldElement
660
661	FeSquare(&r.X, &p.X)
662	FeSquare(&r.Z, &p.Y)
663	FeSquare2(&r.T, &p.Z)
664	FeAdd(&r.Y, &p.X, &p.Y)
665	FeSquare(&t0, &r.Y)
666	FeAdd(&r.Y, &r.Z, &r.X)
667	FeSub(&r.Z, &r.Z, &r.X)
668	FeSub(&r.X, &t0, &r.Y)
669	FeSub(&r.T, &r.T, &r.Z)
670}
671
672func (p *ProjectiveGroupElement) ToBytes(s *[32]byte) {
673	var recip, x, y FieldElement
674
675	FeInvert(&recip, &p.Z)
676	FeMul(&x, &p.X, &recip)
677	FeMul(&y, &p.Y, &recip)
678	FeToBytes(s, &y)
679	s[31] ^= FeIsNegative(&x) << 7
680}
681
682func (p *ExtendedGroupElement) Zero() {
683	FeZero(&p.X)
684	FeOne(&p.Y)
685	FeOne(&p.Z)
686	FeZero(&p.T)
687}
688
689func (p *ExtendedGroupElement) Double(r *CompletedGroupElement) {
690	var q ProjectiveGroupElement
691	p.ToProjective(&q)
692	q.Double(r)
693}
694
695func (p *ExtendedGroupElement) ToCached(r *CachedGroupElement) {
696	FeAdd(&r.yPlusX, &p.Y, &p.X)
697	FeSub(&r.yMinusX, &p.Y, &p.X)
698	FeCopy(&r.Z, &p.Z)
699	FeMul(&r.T2d, &p.T, &d2)
700}
701
702func (p *ExtendedGroupElement) ToProjective(r *ProjectiveGroupElement) {
703	FeCopy(&r.X, &p.X)
704	FeCopy(&r.Y, &p.Y)
705	FeCopy(&r.Z, &p.Z)
706}
707
708func (p *ExtendedGroupElement) ToBytes(s *[32]byte) {
709	var recip, x, y FieldElement
710
711	FeInvert(&recip, &p.Z)
712	FeMul(&x, &p.X, &recip)
713	FeMul(&y, &p.Y, &recip)
714	FeToBytes(s, &y)
715	s[31] ^= FeIsNegative(&x) << 7
716}
717
718func (p *ExtendedGroupElement) FromBytes(s *[32]byte) bool {
719	var u, v, v3, vxx, check FieldElement
720
721	FeFromBytes(&p.Y, s)
722	FeOne(&p.Z)
723	FeSquare(&u, &p.Y)
724	FeMul(&v, &u, &d)
725	FeSub(&u, &u, &p.Z) // y = y^2-1
726	FeAdd(&v, &v, &p.Z) // v = dy^2+1
727
728	FeSquare(&v3, &v)
729	FeMul(&v3, &v3, &v) // v3 = v^3
730	FeSquare(&p.X, &v3)
731	FeMul(&p.X, &p.X, &v)
732	FeMul(&p.X, &p.X, &u) // x = uv^7
733
734	fePow22523(&p.X, &p.X) // x = (uv^7)^((q-5)/8)
735	FeMul(&p.X, &p.X, &v3)
736	FeMul(&p.X, &p.X, &u) // x = uv^3(uv^7)^((q-5)/8)
737
738	var tmpX, tmp2 [32]byte
739
740	FeSquare(&vxx, &p.X)
741	FeMul(&vxx, &vxx, &v)
742	FeSub(&check, &vxx, &u) // vx^2-u
743	if FeIsNonZero(&check) == 1 {
744		FeAdd(&check, &vxx, &u) // vx^2+u
745		if FeIsNonZero(&check) == 1 {
746			return false
747		}
748		FeMul(&p.X, &p.X, &SqrtM1)
749
750		FeToBytes(&tmpX, &p.X)
751		for i, v := range tmpX {
752			tmp2[31-i] = v
753		}
754	}
755
756	if FeIsNegative(&p.X) != (s[31] >> 7) {
757		FeNeg(&p.X, &p.X)
758	}
759
760	FeMul(&p.T, &p.X, &p.Y)
761	return true
762}
763
764func (p *CompletedGroupElement) ToProjective(r *ProjectiveGroupElement) {
765	FeMul(&r.X, &p.X, &p.T)
766	FeMul(&r.Y, &p.Y, &p.Z)
767	FeMul(&r.Z, &p.Z, &p.T)
768}
769
770func (p *CompletedGroupElement) ToExtended(r *ExtendedGroupElement) {
771	FeMul(&r.X, &p.X, &p.T)
772	FeMul(&r.Y, &p.Y, &p.Z)
773	FeMul(&r.Z, &p.Z, &p.T)
774	FeMul(&r.T, &p.X, &p.Y)
775}
776
777func (p *PreComputedGroupElement) Zero() {
778	FeOne(&p.yPlusX)
779	FeOne(&p.yMinusX)
780	FeZero(&p.xy2d)
781}
782
783func geAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
784	var t0 FieldElement
785
786	FeAdd(&r.X, &p.Y, &p.X)
787	FeSub(&r.Y, &p.Y, &p.X)
788	FeMul(&r.Z, &r.X, &q.yPlusX)
789	FeMul(&r.Y, &r.Y, &q.yMinusX)
790	FeMul(&r.T, &q.T2d, &p.T)
791	FeMul(&r.X, &p.Z, &q.Z)
792	FeAdd(&t0, &r.X, &r.X)
793	FeSub(&r.X, &r.Z, &r.Y)
794	FeAdd(&r.Y, &r.Z, &r.Y)
795	FeAdd(&r.Z, &t0, &r.T)
796	FeSub(&r.T, &t0, &r.T)
797}
798
799func geSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *CachedGroupElement) {
800	var t0 FieldElement
801
802	FeAdd(&r.X, &p.Y, &p.X)
803	FeSub(&r.Y, &p.Y, &p.X)
804	FeMul(&r.Z, &r.X, &q.yMinusX)
805	FeMul(&r.Y, &r.Y, &q.yPlusX)
806	FeMul(&r.T, &q.T2d, &p.T)
807	FeMul(&r.X, &p.Z, &q.Z)
808	FeAdd(&t0, &r.X, &r.X)
809	FeSub(&r.X, &r.Z, &r.Y)
810	FeAdd(&r.Y, &r.Z, &r.Y)
811	FeSub(&r.Z, &t0, &r.T)
812	FeAdd(&r.T, &t0, &r.T)
813}
814
815func geMixedAdd(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
816	var t0 FieldElement
817
818	FeAdd(&r.X, &p.Y, &p.X)
819	FeSub(&r.Y, &p.Y, &p.X)
820	FeMul(&r.Z, &r.X, &q.yPlusX)
821	FeMul(&r.Y, &r.Y, &q.yMinusX)
822	FeMul(&r.T, &q.xy2d, &p.T)
823	FeAdd(&t0, &p.Z, &p.Z)
824	FeSub(&r.X, &r.Z, &r.Y)
825	FeAdd(&r.Y, &r.Z, &r.Y)
826	FeAdd(&r.Z, &t0, &r.T)
827	FeSub(&r.T, &t0, &r.T)
828}
829
830func geMixedSub(r *CompletedGroupElement, p *ExtendedGroupElement, q *PreComputedGroupElement) {
831	var t0 FieldElement
832
833	FeAdd(&r.X, &p.Y, &p.X)
834	FeSub(&r.Y, &p.Y, &p.X)
835	FeMul(&r.Z, &r.X, &q.yMinusX)
836	FeMul(&r.Y, &r.Y, &q.yPlusX)
837	FeMul(&r.T, &q.xy2d, &p.T)
838	FeAdd(&t0, &p.Z, &p.Z)
839	FeSub(&r.X, &r.Z, &r.Y)
840	FeAdd(&r.Y, &r.Z, &r.Y)
841	FeSub(&r.Z, &t0, &r.T)
842	FeAdd(&r.T, &t0, &r.T)
843}
844
845func slide(r *[256]int8, a *[32]byte) {
846	for i := range r {
847		r[i] = int8(1 & (a[i>>3] >> uint(i&7)))
848	}
849
850	for i := range r {
851		if r[i] != 0 {
852			for b := 1; b <= 6 && i+b < 256; b++ {
853				if r[i+b] != 0 {
854					if r[i]+(r[i+b]<<uint(b)) <= 15 {
855						r[i] += r[i+b] << uint(b)
856						r[i+b] = 0
857					} else if r[i]-(r[i+b]<<uint(b)) >= -15 {
858						r[i] -= r[i+b] << uint(b)
859						for k := i + b; k < 256; k++ {
860							if r[k] == 0 {
861								r[k] = 1
862								break
863							}
864							r[k] = 0
865						}
866					} else {
867						break
868					}
869				}
870			}
871		}
872	}
873}
874
875// GeDoubleScalarMultVartime sets r = a*A + b*B
876// where a = a[0]+256*a[1]+...+256^31 a[31].
877// and b = b[0]+256*b[1]+...+256^31 b[31].
878// B is the Ed25519 base point (x,4/5) with x positive.
879func GeDoubleScalarMultVartime(r *ProjectiveGroupElement, a *[32]byte, A *ExtendedGroupElement, b *[32]byte) {
880	var aSlide, bSlide [256]int8
881	var Ai [8]CachedGroupElement // A,3A,5A,7A,9A,11A,13A,15A
882	var t CompletedGroupElement
883	var u, A2 ExtendedGroupElement
884	var i int
885
886	slide(&aSlide, a)
887	slide(&bSlide, b)
888
889	A.ToCached(&Ai[0])
890	A.Double(&t)
891	t.ToExtended(&A2)
892
893	for i := 0; i < 7; i++ {
894		geAdd(&t, &A2, &Ai[i])
895		t.ToExtended(&u)
896		u.ToCached(&Ai[i+1])
897	}
898
899	r.Zero()
900
901	for i = 255; i >= 0; i-- {
902		if aSlide[i] != 0 || bSlide[i] != 0 {
903			break
904		}
905	}
906
907	for ; i >= 0; i-- {
908		r.Double(&t)
909
910		if aSlide[i] > 0 {
911			t.ToExtended(&u)
912			geAdd(&t, &u, &Ai[aSlide[i]/2])
913		} else if aSlide[i] < 0 {
914			t.ToExtended(&u)
915			geSub(&t, &u, &Ai[(-aSlide[i])/2])
916		}
917
918		if bSlide[i] > 0 {
919			t.ToExtended(&u)
920			geMixedAdd(&t, &u, &bi[bSlide[i]/2])
921		} else if bSlide[i] < 0 {
922			t.ToExtended(&u)
923			geMixedSub(&t, &u, &bi[(-bSlide[i])/2])
924		}
925
926		t.ToProjective(r)
927	}
928}
929
930// equal returns 1 if b == c and 0 otherwise, assuming that b and c are
931// non-negative.
932func equal(b, c int32) int32 {
933	x := uint32(b ^ c)
934	x--
935	return int32(x >> 31)
936}
937
938// negative returns 1 if b < 0 and 0 otherwise.
939func negative(b int32) int32 {
940	return (b >> 31) & 1
941}
942
943func PreComputedGroupElementCMove(t, u *PreComputedGroupElement, b int32) {
944	FeCMove(&t.yPlusX, &u.yPlusX, b)
945	FeCMove(&t.yMinusX, &u.yMinusX, b)
946	FeCMove(&t.xy2d, &u.xy2d, b)
947}
948
949func selectPoint(t *PreComputedGroupElement, pos int32, b int32) {
950	var minusT PreComputedGroupElement
951	bNegative := negative(b)
952	bAbs := b - (((-bNegative) & b) << 1)
953
954	t.Zero()
955	for i := int32(0); i < 8; i++ {
956		PreComputedGroupElementCMove(t, &base[pos][i], equal(bAbs, i+1))
957	}
958	FeCopy(&minusT.yPlusX, &t.yMinusX)
959	FeCopy(&minusT.yMinusX, &t.yPlusX)
960	FeNeg(&minusT.xy2d, &t.xy2d)
961	PreComputedGroupElementCMove(t, &minusT, bNegative)
962}
963
964// GeScalarMultBase computes h = a*B, where
965//   a = a[0]+256*a[1]+...+256^31 a[31]
966//   B is the Ed25519 base point (x,4/5) with x positive.
967//
968// Preconditions:
969//   a[31] <= 127
970func GeScalarMultBase(h *ExtendedGroupElement, a *[32]byte) {
971	var e [64]int8
972
973	for i, v := range a {
974		e[2*i] = int8(v & 15)
975		e[2*i+1] = int8((v >> 4) & 15)
976	}
977
978	// each e[i] is between 0 and 15 and e[63] is between 0 and 7.
979
980	carry := int8(0)
981	for i := 0; i < 63; i++ {
982		e[i] += carry
983		carry = (e[i] + 8) >> 4
984		e[i] -= carry << 4
985	}
986	e[63] += carry
987	// each e[i] is between -8 and 8.
988
989	h.Zero()
990	var t PreComputedGroupElement
991	var r CompletedGroupElement
992	for i := int32(1); i < 64; i += 2 {
993		selectPoint(&t, i/2, int32(e[i]))
994		geMixedAdd(&r, h, &t)
995		r.ToExtended(h)
996	}
997
998	var s ProjectiveGroupElement
999
1000	h.Double(&r)
1001	r.ToProjective(&s)
1002	s.Double(&r)
1003	r.ToProjective(&s)
1004	s.Double(&r)
1005	r.ToProjective(&s)
1006	s.Double(&r)
1007	r.ToExtended(h)
1008
1009	for i := int32(0); i < 64; i += 2 {
1010		selectPoint(&t, i/2, int32(e[i]))
1011		geMixedAdd(&r, h, &t)
1012		r.ToExtended(h)
1013	}
1014}
1015
1016// The scalars are GF(2^252 + 27742317777372353535851937790883648493).
1017
1018// Input:
1019//   a[0]+256*a[1]+...+256^31*a[31] = a
1020//   b[0]+256*b[1]+...+256^31*b[31] = b
1021//   c[0]+256*c[1]+...+256^31*c[31] = c
1022//
1023// Output:
1024//   s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1025//   where l = 2^252 + 27742317777372353535851937790883648493.
1026func ScMulAdd(s, a, b, c *[32]byte) {
1027	a0 := 2097151 & load3(a[:])
1028	a1 := 2097151 & (load4(a[2:]) >> 5)
1029	a2 := 2097151 & (load3(a[5:]) >> 2)
1030	a3 := 2097151 & (load4(a[7:]) >> 7)
1031	a4 := 2097151 & (load4(a[10:]) >> 4)
1032	a5 := 2097151 & (load3(a[13:]) >> 1)
1033	a6 := 2097151 & (load4(a[15:]) >> 6)
1034	a7 := 2097151 & (load3(a[18:]) >> 3)
1035	a8 := 2097151 & load3(a[21:])
1036	a9 := 2097151 & (load4(a[23:]) >> 5)
1037	a10 := 2097151 & (load3(a[26:]) >> 2)
1038	a11 := (load4(a[28:]) >> 7)
1039	b0 := 2097151 & load3(b[:])
1040	b1 := 2097151 & (load4(b[2:]) >> 5)
1041	b2 := 2097151 & (load3(b[5:]) >> 2)
1042	b3 := 2097151 & (load4(b[7:]) >> 7)
1043	b4 := 2097151 & (load4(b[10:]) >> 4)
1044	b5 := 2097151 & (load3(b[13:]) >> 1)
1045	b6 := 2097151 & (load4(b[15:]) >> 6)
1046	b7 := 2097151 & (load3(b[18:]) >> 3)
1047	b8 := 2097151 & load3(b[21:])
1048	b9 := 2097151 & (load4(b[23:]) >> 5)
1049	b10 := 2097151 & (load3(b[26:]) >> 2)
1050	b11 := (load4(b[28:]) >> 7)
1051	c0 := 2097151 & load3(c[:])
1052	c1 := 2097151 & (load4(c[2:]) >> 5)
1053	c2 := 2097151 & (load3(c[5:]) >> 2)
1054	c3 := 2097151 & (load4(c[7:]) >> 7)
1055	c4 := 2097151 & (load4(c[10:]) >> 4)
1056	c5 := 2097151 & (load3(c[13:]) >> 1)
1057	c6 := 2097151 & (load4(c[15:]) >> 6)
1058	c7 := 2097151 & (load3(c[18:]) >> 3)
1059	c8 := 2097151 & load3(c[21:])
1060	c9 := 2097151 & (load4(c[23:]) >> 5)
1061	c10 := 2097151 & (load3(c[26:]) >> 2)
1062	c11 := (load4(c[28:]) >> 7)
1063	var carry [23]int64
1064
1065	s0 := c0 + a0*b0
1066	s1 := c1 + a0*b1 + a1*b0
1067	s2 := c2 + a0*b2 + a1*b1 + a2*b0
1068	s3 := c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0
1069	s4 := c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0
1070	s5 := c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0
1071	s6 := c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0
1072	s7 := c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0
1073	s8 := c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0
1074	s9 := c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0
1075	s10 := c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0
1076	s11 := c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0
1077	s12 := a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1
1078	s13 := a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2
1079	s14 := a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3
1080	s15 := a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4
1081	s16 := a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5
1082	s17 := a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6
1083	s18 := a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7
1084	s19 := a8*b11 + a9*b10 + a10*b9 + a11*b8
1085	s20 := a9*b11 + a10*b10 + a11*b9
1086	s21 := a10*b11 + a11*b10
1087	s22 := a11 * b11
1088	s23 := int64(0)
1089
1090	carry[0] = (s0 + (1 << 20)) >> 21
1091	s1 += carry[0]
1092	s0 -= carry[0] << 21
1093	carry[2] = (s2 + (1 << 20)) >> 21
1094	s3 += carry[2]
1095	s2 -= carry[2] << 21
1096	carry[4] = (s4 + (1 << 20)) >> 21
1097	s5 += carry[4]
1098	s4 -= carry[4] << 21
1099	carry[6] = (s6 + (1 << 20)) >> 21
1100	s7 += carry[6]
1101	s6 -= carry[6] << 21
1102	carry[8] = (s8 + (1 << 20)) >> 21
1103	s9 += carry[8]
1104	s8 -= carry[8] << 21
1105	carry[10] = (s10 + (1 << 20)) >> 21
1106	s11 += carry[10]
1107	s10 -= carry[10] << 21
1108	carry[12] = (s12 + (1 << 20)) >> 21
1109	s13 += carry[12]
1110	s12 -= carry[12] << 21
1111	carry[14] = (s14 + (1 << 20)) >> 21
1112	s15 += carry[14]
1113	s14 -= carry[14] << 21
1114	carry[16] = (s16 + (1 << 20)) >> 21
1115	s17 += carry[16]
1116	s16 -= carry[16] << 21
1117	carry[18] = (s18 + (1 << 20)) >> 21
1118	s19 += carry[18]
1119	s18 -= carry[18] << 21
1120	carry[20] = (s20 + (1 << 20)) >> 21
1121	s21 += carry[20]
1122	s20 -= carry[20] << 21
1123	carry[22] = (s22 + (1 << 20)) >> 21
1124	s23 += carry[22]
1125	s22 -= carry[22] << 21
1126
1127	carry[1] = (s1 + (1 << 20)) >> 21
1128	s2 += carry[1]
1129	s1 -= carry[1] << 21
1130	carry[3] = (s3 + (1 << 20)) >> 21
1131	s4 += carry[3]
1132	s3 -= carry[3] << 21
1133	carry[5] = (s5 + (1 << 20)) >> 21
1134	s6 += carry[5]
1135	s5 -= carry[5] << 21
1136	carry[7] = (s7 + (1 << 20)) >> 21
1137	s8 += carry[7]
1138	s7 -= carry[7] << 21
1139	carry[9] = (s9 + (1 << 20)) >> 21
1140	s10 += carry[9]
1141	s9 -= carry[9] << 21
1142	carry[11] = (s11 + (1 << 20)) >> 21
1143	s12 += carry[11]
1144	s11 -= carry[11] << 21
1145	carry[13] = (s13 + (1 << 20)) >> 21
1146	s14 += carry[13]
1147	s13 -= carry[13] << 21
1148	carry[15] = (s15 + (1 << 20)) >> 21
1149	s16 += carry[15]
1150	s15 -= carry[15] << 21
1151	carry[17] = (s17 + (1 << 20)) >> 21
1152	s18 += carry[17]
1153	s17 -= carry[17] << 21
1154	carry[19] = (s19 + (1 << 20)) >> 21
1155	s20 += carry[19]
1156	s19 -= carry[19] << 21
1157	carry[21] = (s21 + (1 << 20)) >> 21
1158	s22 += carry[21]
1159	s21 -= carry[21] << 21
1160
1161	s11 += s23 * 666643
1162	s12 += s23 * 470296
1163	s13 += s23 * 654183
1164	s14 -= s23 * 997805
1165	s15 += s23 * 136657
1166	s16 -= s23 * 683901
1167	s23 = 0
1168
1169	s10 += s22 * 666643
1170	s11 += s22 * 470296
1171	s12 += s22 * 654183
1172	s13 -= s22 * 997805
1173	s14 += s22 * 136657
1174	s15 -= s22 * 683901
1175	s22 = 0
1176
1177	s9 += s21 * 666643
1178	s10 += s21 * 470296
1179	s11 += s21 * 654183
1180	s12 -= s21 * 997805
1181	s13 += s21 * 136657
1182	s14 -= s21 * 683901
1183	s21 = 0
1184
1185	s8 += s20 * 666643
1186	s9 += s20 * 470296
1187	s10 += s20 * 654183
1188	s11 -= s20 * 997805
1189	s12 += s20 * 136657
1190	s13 -= s20 * 683901
1191	s20 = 0
1192
1193	s7 += s19 * 666643
1194	s8 += s19 * 470296
1195	s9 += s19 * 654183
1196	s10 -= s19 * 997805
1197	s11 += s19 * 136657
1198	s12 -= s19 * 683901
1199	s19 = 0
1200
1201	s6 += s18 * 666643
1202	s7 += s18 * 470296
1203	s8 += s18 * 654183
1204	s9 -= s18 * 997805
1205	s10 += s18 * 136657
1206	s11 -= s18 * 683901
1207	s18 = 0
1208
1209	carry[6] = (s6 + (1 << 20)) >> 21
1210	s7 += carry[6]
1211	s6 -= carry[6] << 21
1212	carry[8] = (s8 + (1 << 20)) >> 21
1213	s9 += carry[8]
1214	s8 -= carry[8] << 21
1215	carry[10] = (s10 + (1 << 20)) >> 21
1216	s11 += carry[10]
1217	s10 -= carry[10] << 21
1218	carry[12] = (s12 + (1 << 20)) >> 21
1219	s13 += carry[12]
1220	s12 -= carry[12] << 21
1221	carry[14] = (s14 + (1 << 20)) >> 21
1222	s15 += carry[14]
1223	s14 -= carry[14] << 21
1224	carry[16] = (s16 + (1 << 20)) >> 21
1225	s17 += carry[16]
1226	s16 -= carry[16] << 21
1227
1228	carry[7] = (s7 + (1 << 20)) >> 21
1229	s8 += carry[7]
1230	s7 -= carry[7] << 21
1231	carry[9] = (s9 + (1 << 20)) >> 21
1232	s10 += carry[9]
1233	s9 -= carry[9] << 21
1234	carry[11] = (s11 + (1 << 20)) >> 21
1235	s12 += carry[11]
1236	s11 -= carry[11] << 21
1237	carry[13] = (s13 + (1 << 20)) >> 21
1238	s14 += carry[13]
1239	s13 -= carry[13] << 21
1240	carry[15] = (s15 + (1 << 20)) >> 21
1241	s16 += carry[15]
1242	s15 -= carry[15] << 21
1243
1244	s5 += s17 * 666643
1245	s6 += s17 * 470296
1246	s7 += s17 * 654183
1247	s8 -= s17 * 997805
1248	s9 += s17 * 136657
1249	s10 -= s17 * 683901
1250	s17 = 0
1251
1252	s4 += s16 * 666643
1253	s5 += s16 * 470296
1254	s6 += s16 * 654183
1255	s7 -= s16 * 997805
1256	s8 += s16 * 136657
1257	s9 -= s16 * 683901
1258	s16 = 0
1259
1260	s3 += s15 * 666643
1261	s4 += s15 * 470296
1262	s5 += s15 * 654183
1263	s6 -= s15 * 997805
1264	s7 += s15 * 136657
1265	s8 -= s15 * 683901
1266	s15 = 0
1267
1268	s2 += s14 * 666643
1269	s3 += s14 * 470296
1270	s4 += s14 * 654183
1271	s5 -= s14 * 997805
1272	s6 += s14 * 136657
1273	s7 -= s14 * 683901
1274	s14 = 0
1275
1276	s1 += s13 * 666643
1277	s2 += s13 * 470296
1278	s3 += s13 * 654183
1279	s4 -= s13 * 997805
1280	s5 += s13 * 136657
1281	s6 -= s13 * 683901
1282	s13 = 0
1283
1284	s0 += s12 * 666643
1285	s1 += s12 * 470296
1286	s2 += s12 * 654183
1287	s3 -= s12 * 997805
1288	s4 += s12 * 136657
1289	s5 -= s12 * 683901
1290	s12 = 0
1291
1292	carry[0] = (s0 + (1 << 20)) >> 21
1293	s1 += carry[0]
1294	s0 -= carry[0] << 21
1295	carry[2] = (s2 + (1 << 20)) >> 21
1296	s3 += carry[2]
1297	s2 -= carry[2] << 21
1298	carry[4] = (s4 + (1 << 20)) >> 21
1299	s5 += carry[4]
1300	s4 -= carry[4] << 21
1301	carry[6] = (s6 + (1 << 20)) >> 21
1302	s7 += carry[6]
1303	s6 -= carry[6] << 21
1304	carry[8] = (s8 + (1 << 20)) >> 21
1305	s9 += carry[8]
1306	s8 -= carry[8] << 21
1307	carry[10] = (s10 + (1 << 20)) >> 21
1308	s11 += carry[10]
1309	s10 -= carry[10] << 21
1310
1311	carry[1] = (s1 + (1 << 20)) >> 21
1312	s2 += carry[1]
1313	s1 -= carry[1] << 21
1314	carry[3] = (s3 + (1 << 20)) >> 21
1315	s4 += carry[3]
1316	s3 -= carry[3] << 21
1317	carry[5] = (s5 + (1 << 20)) >> 21
1318	s6 += carry[5]
1319	s5 -= carry[5] << 21
1320	carry[7] = (s7 + (1 << 20)) >> 21
1321	s8 += carry[7]
1322	s7 -= carry[7] << 21
1323	carry[9] = (s9 + (1 << 20)) >> 21
1324	s10 += carry[9]
1325	s9 -= carry[9] << 21
1326	carry[11] = (s11 + (1 << 20)) >> 21
1327	s12 += carry[11]
1328	s11 -= carry[11] << 21
1329
1330	s0 += s12 * 666643
1331	s1 += s12 * 470296
1332	s2 += s12 * 654183
1333	s3 -= s12 * 997805
1334	s4 += s12 * 136657
1335	s5 -= s12 * 683901
1336	s12 = 0
1337
1338	carry[0] = s0 >> 21
1339	s1 += carry[0]
1340	s0 -= carry[0] << 21
1341	carry[1] = s1 >> 21
1342	s2 += carry[1]
1343	s1 -= carry[1] << 21
1344	carry[2] = s2 >> 21
1345	s3 += carry[2]
1346	s2 -= carry[2] << 21
1347	carry[3] = s3 >> 21
1348	s4 += carry[3]
1349	s3 -= carry[3] << 21
1350	carry[4] = s4 >> 21
1351	s5 += carry[4]
1352	s4 -= carry[4] << 21
1353	carry[5] = s5 >> 21
1354	s6 += carry[5]
1355	s5 -= carry[5] << 21
1356	carry[6] = s6 >> 21
1357	s7 += carry[6]
1358	s6 -= carry[6] << 21
1359	carry[7] = s7 >> 21
1360	s8 += carry[7]
1361	s7 -= carry[7] << 21
1362	carry[8] = s8 >> 21
1363	s9 += carry[8]
1364	s8 -= carry[8] << 21
1365	carry[9] = s9 >> 21
1366	s10 += carry[9]
1367	s9 -= carry[9] << 21
1368	carry[10] = s10 >> 21
1369	s11 += carry[10]
1370	s10 -= carry[10] << 21
1371	carry[11] = s11 >> 21
1372	s12 += carry[11]
1373	s11 -= carry[11] << 21
1374
1375	s0 += s12 * 666643
1376	s1 += s12 * 470296
1377	s2 += s12 * 654183
1378	s3 -= s12 * 997805
1379	s4 += s12 * 136657
1380	s5 -= s12 * 683901
1381	s12 = 0
1382
1383	carry[0] = s0 >> 21
1384	s1 += carry[0]
1385	s0 -= carry[0] << 21
1386	carry[1] = s1 >> 21
1387	s2 += carry[1]
1388	s1 -= carry[1] << 21
1389	carry[2] = s2 >> 21
1390	s3 += carry[2]
1391	s2 -= carry[2] << 21
1392	carry[3] = s3 >> 21
1393	s4 += carry[3]
1394	s3 -= carry[3] << 21
1395	carry[4] = s4 >> 21
1396	s5 += carry[4]
1397	s4 -= carry[4] << 21
1398	carry[5] = s5 >> 21
1399	s6 += carry[5]
1400	s5 -= carry[5] << 21
1401	carry[6] = s6 >> 21
1402	s7 += carry[6]
1403	s6 -= carry[6] << 21
1404	carry[7] = s7 >> 21
1405	s8 += carry[7]
1406	s7 -= carry[7] << 21
1407	carry[8] = s8 >> 21
1408	s9 += carry[8]
1409	s8 -= carry[8] << 21
1410	carry[9] = s9 >> 21
1411	s10 += carry[9]
1412	s9 -= carry[9] << 21
1413	carry[10] = s10 >> 21
1414	s11 += carry[10]
1415	s10 -= carry[10] << 21
1416
1417	s[0] = byte(s0 >> 0)
1418	s[1] = byte(s0 >> 8)
1419	s[2] = byte((s0 >> 16) | (s1 << 5))
1420	s[3] = byte(s1 >> 3)
1421	s[4] = byte(s1 >> 11)
1422	s[5] = byte((s1 >> 19) | (s2 << 2))
1423	s[6] = byte(s2 >> 6)
1424	s[7] = byte((s2 >> 14) | (s3 << 7))
1425	s[8] = byte(s3 >> 1)
1426	s[9] = byte(s3 >> 9)
1427	s[10] = byte((s3 >> 17) | (s4 << 4))
1428	s[11] = byte(s4 >> 4)
1429	s[12] = byte(s4 >> 12)
1430	s[13] = byte((s4 >> 20) | (s5 << 1))
1431	s[14] = byte(s5 >> 7)
1432	s[15] = byte((s5 >> 15) | (s6 << 6))
1433	s[16] = byte(s6 >> 2)
1434	s[17] = byte(s6 >> 10)
1435	s[18] = byte((s6 >> 18) | (s7 << 3))
1436	s[19] = byte(s7 >> 5)
1437	s[20] = byte(s7 >> 13)
1438	s[21] = byte(s8 >> 0)
1439	s[22] = byte(s8 >> 8)
1440	s[23] = byte((s8 >> 16) | (s9 << 5))
1441	s[24] = byte(s9 >> 3)
1442	s[25] = byte(s9 >> 11)
1443	s[26] = byte((s9 >> 19) | (s10 << 2))
1444	s[27] = byte(s10 >> 6)
1445	s[28] = byte((s10 >> 14) | (s11 << 7))
1446	s[29] = byte(s11 >> 1)
1447	s[30] = byte(s11 >> 9)
1448	s[31] = byte(s11 >> 17)
1449}
1450
1451// Input:
1452//   s[0]+256*s[1]+...+256^63*s[63] = s
1453//
1454// Output:
1455//   s[0]+256*s[1]+...+256^31*s[31] = s mod l
1456//   where l = 2^252 + 27742317777372353535851937790883648493.
1457func ScReduce(out *[32]byte, s *[64]byte) {
1458	s0 := 2097151 & load3(s[:])
1459	s1 := 2097151 & (load4(s[2:]) >> 5)
1460	s2 := 2097151 & (load3(s[5:]) >> 2)
1461	s3 := 2097151 & (load4(s[7:]) >> 7)
1462	s4 := 2097151 & (load4(s[10:]) >> 4)
1463	s5 := 2097151 & (load3(s[13:]) >> 1)
1464	s6 := 2097151 & (load4(s[15:]) >> 6)
1465	s7 := 2097151 & (load3(s[18:]) >> 3)
1466	s8 := 2097151 & load3(s[21:])
1467	s9 := 2097151 & (load4(s[23:]) >> 5)
1468	s10 := 2097151 & (load3(s[26:]) >> 2)
1469	s11 := 2097151 & (load4(s[28:]) >> 7)
1470	s12 := 2097151 & (load4(s[31:]) >> 4)
1471	s13 := 2097151 & (load3(s[34:]) >> 1)
1472	s14 := 2097151 & (load4(s[36:]) >> 6)
1473	s15 := 2097151 & (load3(s[39:]) >> 3)
1474	s16 := 2097151 & load3(s[42:])
1475	s17 := 2097151 & (load4(s[44:]) >> 5)
1476	s18 := 2097151 & (load3(s[47:]) >> 2)
1477	s19 := 2097151 & (load4(s[49:]) >> 7)
1478	s20 := 2097151 & (load4(s[52:]) >> 4)
1479	s21 := 2097151 & (load3(s[55:]) >> 1)
1480	s22 := 2097151 & (load4(s[57:]) >> 6)
1481	s23 := (load4(s[60:]) >> 3)
1482
1483	s11 += s23 * 666643
1484	s12 += s23 * 470296
1485	s13 += s23 * 654183
1486	s14 -= s23 * 997805
1487	s15 += s23 * 136657
1488	s16 -= s23 * 683901
1489	s23 = 0
1490
1491	s10 += s22 * 666643
1492	s11 += s22 * 470296
1493	s12 += s22 * 654183
1494	s13 -= s22 * 997805
1495	s14 += s22 * 136657
1496	s15 -= s22 * 683901
1497	s22 = 0
1498
1499	s9 += s21 * 666643
1500	s10 += s21 * 470296
1501	s11 += s21 * 654183
1502	s12 -= s21 * 997805
1503	s13 += s21 * 136657
1504	s14 -= s21 * 683901
1505	s21 = 0
1506
1507	s8 += s20 * 666643
1508	s9 += s20 * 470296
1509	s10 += s20 * 654183
1510	s11 -= s20 * 997805
1511	s12 += s20 * 136657
1512	s13 -= s20 * 683901
1513	s20 = 0
1514
1515	s7 += s19 * 666643
1516	s8 += s19 * 470296
1517	s9 += s19 * 654183
1518	s10 -= s19 * 997805
1519	s11 += s19 * 136657
1520	s12 -= s19 * 683901
1521	s19 = 0
1522
1523	s6 += s18 * 666643
1524	s7 += s18 * 470296
1525	s8 += s18 * 654183
1526	s9 -= s18 * 997805
1527	s10 += s18 * 136657
1528	s11 -= s18 * 683901
1529	s18 = 0
1530
1531	var carry [17]int64
1532
1533	carry[6] = (s6 + (1 << 20)) >> 21
1534	s7 += carry[6]
1535	s6 -= carry[6] << 21
1536	carry[8] = (s8 + (1 << 20)) >> 21
1537	s9 += carry[8]
1538	s8 -= carry[8] << 21
1539	carry[10] = (s10 + (1 << 20)) >> 21
1540	s11 += carry[10]
1541	s10 -= carry[10] << 21
1542	carry[12] = (s12 + (1 << 20)) >> 21
1543	s13 += carry[12]
1544	s12 -= carry[12] << 21
1545	carry[14] = (s14 + (1 << 20)) >> 21
1546	s15 += carry[14]
1547	s14 -= carry[14] << 21
1548	carry[16] = (s16 + (1 << 20)) >> 21
1549	s17 += carry[16]
1550	s16 -= carry[16] << 21
1551
1552	carry[7] = (s7 + (1 << 20)) >> 21
1553	s8 += carry[7]
1554	s7 -= carry[7] << 21
1555	carry[9] = (s9 + (1 << 20)) >> 21
1556	s10 += carry[9]
1557	s9 -= carry[9] << 21
1558	carry[11] = (s11 + (1 << 20)) >> 21
1559	s12 += carry[11]
1560	s11 -= carry[11] << 21
1561	carry[13] = (s13 + (1 << 20)) >> 21
1562	s14 += carry[13]
1563	s13 -= carry[13] << 21
1564	carry[15] = (s15 + (1 << 20)) >> 21
1565	s16 += carry[15]
1566	s15 -= carry[15] << 21
1567
1568	s5 += s17 * 666643
1569	s6 += s17 * 470296
1570	s7 += s17 * 654183
1571	s8 -= s17 * 997805
1572	s9 += s17 * 136657
1573	s10 -= s17 * 683901
1574	s17 = 0
1575
1576	s4 += s16 * 666643
1577	s5 += s16 * 470296
1578	s6 += s16 * 654183
1579	s7 -= s16 * 997805
1580	s8 += s16 * 136657
1581	s9 -= s16 * 683901
1582	s16 = 0
1583
1584	s3 += s15 * 666643
1585	s4 += s15 * 470296
1586	s5 += s15 * 654183
1587	s6 -= s15 * 997805
1588	s7 += s15 * 136657
1589	s8 -= s15 * 683901
1590	s15 = 0
1591
1592	s2 += s14 * 666643
1593	s3 += s14 * 470296
1594	s4 += s14 * 654183
1595	s5 -= s14 * 997805
1596	s6 += s14 * 136657
1597	s7 -= s14 * 683901
1598	s14 = 0
1599
1600	s1 += s13 * 666643
1601	s2 += s13 * 470296
1602	s3 += s13 * 654183
1603	s4 -= s13 * 997805
1604	s5 += s13 * 136657
1605	s6 -= s13 * 683901
1606	s13 = 0
1607
1608	s0 += s12 * 666643
1609	s1 += s12 * 470296
1610	s2 += s12 * 654183
1611	s3 -= s12 * 997805
1612	s4 += s12 * 136657
1613	s5 -= s12 * 683901
1614	s12 = 0
1615
1616	carry[0] = (s0 + (1 << 20)) >> 21
1617	s1 += carry[0]
1618	s0 -= carry[0] << 21
1619	carry[2] = (s2 + (1 << 20)) >> 21
1620	s3 += carry[2]
1621	s2 -= carry[2] << 21
1622	carry[4] = (s4 + (1 << 20)) >> 21
1623	s5 += carry[4]
1624	s4 -= carry[4] << 21
1625	carry[6] = (s6 + (1 << 20)) >> 21
1626	s7 += carry[6]
1627	s6 -= carry[6] << 21
1628	carry[8] = (s8 + (1 << 20)) >> 21
1629	s9 += carry[8]
1630	s8 -= carry[8] << 21
1631	carry[10] = (s10 + (1 << 20)) >> 21
1632	s11 += carry[10]
1633	s10 -= carry[10] << 21
1634
1635	carry[1] = (s1 + (1 << 20)) >> 21
1636	s2 += carry[1]
1637	s1 -= carry[1] << 21
1638	carry[3] = (s3 + (1 << 20)) >> 21
1639	s4 += carry[3]
1640	s3 -= carry[3] << 21
1641	carry[5] = (s5 + (1 << 20)) >> 21
1642	s6 += carry[5]
1643	s5 -= carry[5] << 21
1644	carry[7] = (s7 + (1 << 20)) >> 21
1645	s8 += carry[7]
1646	s7 -= carry[7] << 21
1647	carry[9] = (s9 + (1 << 20)) >> 21
1648	s10 += carry[9]
1649	s9 -= carry[9] << 21
1650	carry[11] = (s11 + (1 << 20)) >> 21
1651	s12 += carry[11]
1652	s11 -= carry[11] << 21
1653
1654	s0 += s12 * 666643
1655	s1 += s12 * 470296
1656	s2 += s12 * 654183
1657	s3 -= s12 * 997805
1658	s4 += s12 * 136657
1659	s5 -= s12 * 683901
1660	s12 = 0
1661
1662	carry[0] = s0 >> 21
1663	s1 += carry[0]
1664	s0 -= carry[0] << 21
1665	carry[1] = s1 >> 21
1666	s2 += carry[1]
1667	s1 -= carry[1] << 21
1668	carry[2] = s2 >> 21
1669	s3 += carry[2]
1670	s2 -= carry[2] << 21
1671	carry[3] = s3 >> 21
1672	s4 += carry[3]
1673	s3 -= carry[3] << 21
1674	carry[4] = s4 >> 21
1675	s5 += carry[4]
1676	s4 -= carry[4] << 21
1677	carry[5] = s5 >> 21
1678	s6 += carry[5]
1679	s5 -= carry[5] << 21
1680	carry[6] = s6 >> 21
1681	s7 += carry[6]
1682	s6 -= carry[6] << 21
1683	carry[7] = s7 >> 21
1684	s8 += carry[7]
1685	s7 -= carry[7] << 21
1686	carry[8] = s8 >> 21
1687	s9 += carry[8]
1688	s8 -= carry[8] << 21
1689	carry[9] = s9 >> 21
1690	s10 += carry[9]
1691	s9 -= carry[9] << 21
1692	carry[10] = s10 >> 21
1693	s11 += carry[10]
1694	s10 -= carry[10] << 21
1695	carry[11] = s11 >> 21
1696	s12 += carry[11]
1697	s11 -= carry[11] << 21
1698
1699	s0 += s12 * 666643
1700	s1 += s12 * 470296
1701	s2 += s12 * 654183
1702	s3 -= s12 * 997805
1703	s4 += s12 * 136657
1704	s5 -= s12 * 683901
1705	s12 = 0
1706
1707	carry[0] = s0 >> 21
1708	s1 += carry[0]
1709	s0 -= carry[0] << 21
1710	carry[1] = s1 >> 21
1711	s2 += carry[1]
1712	s1 -= carry[1] << 21
1713	carry[2] = s2 >> 21
1714	s3 += carry[2]
1715	s2 -= carry[2] << 21
1716	carry[3] = s3 >> 21
1717	s4 += carry[3]
1718	s3 -= carry[3] << 21
1719	carry[4] = s4 >> 21
1720	s5 += carry[4]
1721	s4 -= carry[4] << 21
1722	carry[5] = s5 >> 21
1723	s6 += carry[5]
1724	s5 -= carry[5] << 21
1725	carry[6] = s6 >> 21
1726	s7 += carry[6]
1727	s6 -= carry[6] << 21
1728	carry[7] = s7 >> 21
1729	s8 += carry[7]
1730	s7 -= carry[7] << 21
1731	carry[8] = s8 >> 21
1732	s9 += carry[8]
1733	s8 -= carry[8] << 21
1734	carry[9] = s9 >> 21
1735	s10 += carry[9]
1736	s9 -= carry[9] << 21
1737	carry[10] = s10 >> 21
1738	s11 += carry[10]
1739	s10 -= carry[10] << 21
1740
1741	out[0] = byte(s0 >> 0)
1742	out[1] = byte(s0 >> 8)
1743	out[2] = byte((s0 >> 16) | (s1 << 5))
1744	out[3] = byte(s1 >> 3)
1745	out[4] = byte(s1 >> 11)
1746	out[5] = byte((s1 >> 19) | (s2 << 2))
1747	out[6] = byte(s2 >> 6)
1748	out[7] = byte((s2 >> 14) | (s3 << 7))
1749	out[8] = byte(s3 >> 1)
1750	out[9] = byte(s3 >> 9)
1751	out[10] = byte((s3 >> 17) | (s4 << 4))
1752	out[11] = byte(s4 >> 4)
1753	out[12] = byte(s4 >> 12)
1754	out[13] = byte((s4 >> 20) | (s5 << 1))
1755	out[14] = byte(s5 >> 7)
1756	out[15] = byte((s5 >> 15) | (s6 << 6))
1757	out[16] = byte(s6 >> 2)
1758	out[17] = byte(s6 >> 10)
1759	out[18] = byte((s6 >> 18) | (s7 << 3))
1760	out[19] = byte(s7 >> 5)
1761	out[20] = byte(s7 >> 13)
1762	out[21] = byte(s8 >> 0)
1763	out[22] = byte(s8 >> 8)
1764	out[23] = byte((s8 >> 16) | (s9 << 5))
1765	out[24] = byte(s9 >> 3)
1766	out[25] = byte(s9 >> 11)
1767	out[26] = byte((s9 >> 19) | (s10 << 2))
1768	out[27] = byte(s10 >> 6)
1769	out[28] = byte((s10 >> 14) | (s11 << 7))
1770	out[29] = byte(s11 >> 1)
1771	out[30] = byte(s11 >> 9)
1772	out[31] = byte(s11 >> 17)
1773}
1774
1775// order is the order of Curve25519 in little-endian form.
1776var order = [4]uint64{0x5812631a5cf5d3ed, 0x14def9dea2f79cd6, 0, 0x1000000000000000}
1777
1778// ScMinimal returns true if the given scalar is less than the order of the
1779// curve.
1780func ScMinimal(scalar *[32]byte) bool {
1781	for i := 3; ; i-- {
1782		v := binary.LittleEndian.Uint64(scalar[i*8:])
1783		if v > order[i] {
1784			return false
1785		} else if v < order[i] {
1786			break
1787		} else if i == 0 {
1788			return false
1789		}
1790	}
1791
1792	return true
1793}
1794