1// Copyright 2020 ConsenSys Software Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package amd64
16
17import (
18	"io"
19
20	"github.com/consensys/bavard"
21	ramd64 "github.com/consensys/bavard/amd64"
22	"github.com/consensys/gnark-crypto/ecc"
23	"github.com/consensys/gnark-crypto/field"
24	"github.com/consensys/gnark-crypto/field/asm/amd64"
25	"github.com/consensys/gnark-crypto/internal/generator/config"
26)
27
28// Fq2Amd64 ...
29type Fq2Amd64 struct {
30	*amd64.FFAmd64
31	config config.Curve
32	w      io.Writer
33	F      *field.Field
34}
35
36// NewFq2Amd64 ...
37func NewFq2Amd64(w io.Writer, F *field.Field, config config.Curve) *Fq2Amd64 {
38	return &Fq2Amd64{
39		amd64.NewFFAmd64(w, F),
40		config,
41		w,
42		F,
43	}
44}
45
46// Generate ...
47func (fq2 *Fq2Amd64) Generate(forceADXCheck bool) error {
48	fq2.WriteLn(bavard.Apache2Header("ConsenSys Software Inc.", 2020))
49
50	fq2.WriteLn("#include \"textflag.h\"")
51	fq2.WriteLn("#include \"funcdata.h\"")
52
53	fq2.GenerateDefines()
54	if fq2.config.ID() == ecc.BN254 {
55		fq2.generateMulDefine()
56	}
57
58	fq2.generateAddE2()
59	fq2.generateDoubleE2()
60	fq2.generateSubE2()
61	fq2.generateNegE2()
62
63	switch fq2.config.ID() {
64	case ecc.BN254:
65		fq2.generateMulByNonResidueE2BN254()
66		fq2.generateMulE2BN254(forceADXCheck)
67		fq2.generateSquareE2BN254(forceADXCheck)
68	case ecc.BLS12_381:
69		fq2.generateMulByNonResidueE2BLS381()
70		fq2.generateSquareE2BLS381(forceADXCheck)
71		fq2.generateMulE2BLS381(forceADXCheck)
72	}
73
74	return nil
75}
76
77func (fq2 *Fq2Amd64) generateAddE2() {
78	registers := fq2.FnHeader("addE2", 0, 24)
79
80	// registers
81	x := registers.Pop()
82	y := registers.Pop()
83	r := registers.Pop()
84	t := registers.PopN(fq2.NbWords)
85
86	fq2.MOVQ("x+8(FP)", x)
87
88	// move t = x
89	fq2.Mov(x, t)
90
91	fq2.MOVQ("y+16(FP)", y)
92
93	// t = t + y = x + y
94	fq2.Add(y, t)
95
96	// reduce
97	fq2.Reduce(&registers, t)
98	fq2.MOVQ("res+0(FP)", r)
99	fq2.Mov(t, r)
100
101	// move x+offset(fq2.NbWords) into t
102	fq2.Mov(x, t, fq2.NbWords)
103
104	// add y+offset(fq2.NbWords) into t
105	fq2.Add(y, t, fq2.NbWords)
106
107	// reduce t into r with offset fq2.NbWords
108	fq2.Reduce(&registers, t)
109	fq2.Mov(t, r, 0, fq2.NbWords)
110
111	fq2.RET()
112
113}
114
115func (fq2 *Fq2Amd64) generateDoubleE2() {
116	// func header
117	registers := fq2.FnHeader("doubleE2", 0, 16)
118
119	// registers
120	x := registers.Pop()
121	r := registers.Pop()
122	t := registers.PopN(fq2.NbWords)
123
124	fq2.MOVQ("res+0(FP)", r)
125	fq2.MOVQ("x+8(FP)", x)
126
127	fq2.Mov(x, t)
128	fq2.Add(t, t)
129	fq2.Reduce(&registers, t)
130	fq2.Mov(t, r)
131	fq2.Mov(x, t, fq2.NbWords)
132	fq2.Add(t, t)
133	fq2.Reduce(&registers, t)
134	fq2.Mov(t, r, 0, fq2.NbWords)
135
136	fq2.RET()
137}
138
139func (fq2 *Fq2Amd64) generateNegE2() {
140	registers := fq2.FnHeader("negE2", 0, 16)
141
142	nonZeroA := fq2.NewLabel()
143	nonZeroB := fq2.NewLabel()
144	B := fq2.NewLabel()
145
146	// registers
147	x := registers.Pop()
148	r := registers.Pop()
149	q := registers.Pop()
150	t := registers.PopN(fq2.NbWords)
151
152	fq2.MOVQ("res+0(FP)", r)
153	fq2.MOVQ("x+8(FP)", x)
154
155	// t = x
156	fq2.Mov(x, t)
157
158	// x = t[0] | ... | t[n]
159	fq2.MOVQ(t[0], x)
160	for i := 1; i < fq2.NbWords; i++ {
161		fq2.ORQ(t[i], x)
162	}
163
164	fq2.TESTQ(x, x)
165
166	// if x != 0, we jump to nonzero label
167	fq2.JNE(nonZeroA)
168
169	// if x == 0, we set the result to zero and continue
170	for i := 0; i < fq2.NbWords; i++ {
171		fq2.MOVQ(x, r.At(i+fq2.NbWords))
172	}
173	fq2.JMP(B)
174
175	fq2.LABEL(nonZeroA)
176
177	// z = x - q
178	for i := 0; i < fq2.NbWords; i++ {
179		fq2.MOVQ(fq2.Q[i], q)
180		if i == 0 {
181			fq2.SUBQ(t[i], q)
182		} else {
183			fq2.SBBQ(t[i], q)
184		}
185		fq2.MOVQ(q, r.At(i))
186	}
187
188	fq2.LABEL(B)
189	fq2.MOVQ("x+8(FP)", x)
190	fq2.Mov(x, t, fq2.NbWords)
191
192	// x = t[0] | ... | t[n]
193	fq2.MOVQ(t[0], x)
194	for i := 1; i < fq2.NbWords; i++ {
195		fq2.ORQ(t[i], x)
196	}
197
198	fq2.TESTQ(x, x)
199
200	// if x != 0, we jump to nonzero label
201	fq2.JNE(nonZeroB)
202
203	// if x == 0, we set the result to zero and return
204	for i := 0; i < fq2.NbWords; i++ {
205		fq2.MOVQ(x, r.At(i+fq2.NbWords))
206	}
207	fq2.RET()
208
209	fq2.LABEL(nonZeroB)
210
211	// z = x - q
212	for i := 0; i < fq2.NbWords; i++ {
213		fq2.MOVQ(fq2.Q[i], q)
214		if i == 0 {
215			fq2.SUBQ(t[i], q)
216		} else {
217			fq2.SBBQ(t[i], q)
218		}
219		fq2.MOVQ(q, r.At(i+fq2.NbWords))
220	}
221
222	fq2.RET()
223
224}
225
226func (fq2 *Fq2Amd64) generateSubE2() {
227	registers := fq2.FnHeader("subE2", 0, 24)
228
229	// registers
230	t := registers.PopN(fq2.NbWords)
231	xy := registers.Pop()
232
233	zero := registers.Pop()
234	fq2.XORQ(zero, zero)
235
236	fq2.MOVQ("x+8(FP)", xy)
237	fq2.Mov(xy, t)
238
239	// z = x - y mod q
240	// move t = x
241	fq2.MOVQ("y+16(FP)", xy)
242	fq2.Sub(xy, t)
243	fq2.MOVQ("x+8(FP)", xy)
244
245	fq2.modReduceAfterSub(&registers, zero, t)
246
247	r := registers.Pop()
248	fq2.MOVQ("res+0(FP)", r)
249	fq2.Mov(t, r)
250	registers.Push(r)
251
252	fq2.Mov(xy, t, fq2.NbWords)
253
254	// z = x - y mod q
255	// move t = x
256	fq2.MOVQ("y+16(FP)", xy)
257	fq2.Sub(xy, t, fq2.NbWords)
258
259	fq2.modReduceAfterSub(&registers, zero, t)
260
261	r = xy
262	fq2.MOVQ("res+0(FP)", r)
263
264	fq2.Mov(t, r, 0, fq2.NbWords)
265
266	fq2.RET()
267
268}
269
270func (fq2 *Fq2Amd64) modReduceAfterSub(registers *ramd64.Registers, zero ramd64.Register, t []ramd64.Register) {
271	q := registers.PopN(fq2.NbWords)
272	fq2.modReduceAfterSubScratch(zero, t, q)
273	registers.Push(q...)
274}
275
276func (fq2 *Fq2Amd64) modReduceAfterSubScratch(zero ramd64.Register, t, scratch []ramd64.Register) {
277	fq2.Mov(fq2.Q, scratch)
278	for i := 0; i < fq2.NbWords; i++ {
279		fq2.CMOVQCC(zero, scratch[i])
280	}
281	// add registers (q or 0) to t, and set to result
282	fq2.Add(scratch, t)
283}
284