1// Copyright 2020 ConsenSys Software Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "textflag.h"
16#include "funcdata.h"
17
18// modulus q
19DATA q<>+0(SB)/8, $0xb9feffffffffaaab
20DATA q<>+8(SB)/8, $0x1eabfffeb153ffff
21DATA q<>+16(SB)/8, $0x6730d2a0f6b0f624
22DATA q<>+24(SB)/8, $0x64774b84f38512bf
23DATA q<>+32(SB)/8, $0x4b1ba7b6434bacd7
24DATA q<>+40(SB)/8, $0x1a0111ea397fe69a
25GLOBL q<>(SB), (RODATA+NOPTR), $48
26
27// qInv0 q'[0]
28DATA qInv0<>(SB)/8, $0x89f3fffcfffcfffd
29GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
30
31#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
32	MOVQ    ra0, rb0;        \
33	SUBQ    q<>(SB), ra0;    \
34	MOVQ    ra1, rb1;        \
35	SBBQ    q<>+8(SB), ra1;  \
36	MOVQ    ra2, rb2;        \
37	SBBQ    q<>+16(SB), ra2; \
38	MOVQ    ra3, rb3;        \
39	SBBQ    q<>+24(SB), ra3; \
40	MOVQ    ra4, rb4;        \
41	SBBQ    q<>+32(SB), ra4; \
42	MOVQ    ra5, rb5;        \
43	SBBQ    q<>+40(SB), ra5; \
44	CMOVQCS rb0, ra0;        \
45	CMOVQCS rb1, ra1;        \
46	CMOVQCS rb2, ra2;        \
47	CMOVQCS rb3, ra3;        \
48	CMOVQCS rb4, ra4;        \
49	CMOVQCS rb5, ra5;        \
50
51// add(res, x, y *Element)
52TEXT ·add(SB), NOSPLIT, $0-24
53	MOVQ x+8(FP), AX
54	MOVQ 0(AX), CX
55	MOVQ 8(AX), BX
56	MOVQ 16(AX), SI
57	MOVQ 24(AX), DI
58	MOVQ 32(AX), R8
59	MOVQ 40(AX), R9
60	MOVQ y+16(FP), DX
61	ADDQ 0(DX), CX
62	ADCQ 8(DX), BX
63	ADCQ 16(DX), SI
64	ADCQ 24(DX), DI
65	ADCQ 32(DX), R8
66	ADCQ 40(DX), R9
67
68	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
69	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
70
71	MOVQ res+0(FP), AX
72	MOVQ CX, 0(AX)
73	MOVQ BX, 8(AX)
74	MOVQ SI, 16(AX)
75	MOVQ DI, 24(AX)
76	MOVQ R8, 32(AX)
77	MOVQ R9, 40(AX)
78	RET
79
80// sub(res, x, y *Element)
81TEXT ·sub(SB), NOSPLIT, $0-24
82	XORQ    R9, R9
83	MOVQ    x+8(FP), R8
84	MOVQ    0(R8), AX
85	MOVQ    8(R8), DX
86	MOVQ    16(R8), CX
87	MOVQ    24(R8), BX
88	MOVQ    32(R8), SI
89	MOVQ    40(R8), DI
90	MOVQ    y+16(FP), R8
91	SUBQ    0(R8), AX
92	SBBQ    8(R8), DX
93	SBBQ    16(R8), CX
94	SBBQ    24(R8), BX
95	SBBQ    32(R8), SI
96	SBBQ    40(R8), DI
97	MOVQ    $0xb9feffffffffaaab, R10
98	MOVQ    $0x1eabfffeb153ffff, R11
99	MOVQ    $0x6730d2a0f6b0f624, R12
100	MOVQ    $0x64774b84f38512bf, R13
101	MOVQ    $0x4b1ba7b6434bacd7, R14
102	MOVQ    $0x1a0111ea397fe69a, R15
103	CMOVQCC R9, R10
104	CMOVQCC R9, R11
105	CMOVQCC R9, R12
106	CMOVQCC R9, R13
107	CMOVQCC R9, R14
108	CMOVQCC R9, R15
109	ADDQ    R10, AX
110	ADCQ    R11, DX
111	ADCQ    R12, CX
112	ADCQ    R13, BX
113	ADCQ    R14, SI
114	ADCQ    R15, DI
115	MOVQ    res+0(FP), R8
116	MOVQ    AX, 0(R8)
117	MOVQ    DX, 8(R8)
118	MOVQ    CX, 16(R8)
119	MOVQ    BX, 24(R8)
120	MOVQ    SI, 32(R8)
121	MOVQ    DI, 40(R8)
122	RET
123
124// double(res, x *Element)
125TEXT ·double(SB), NOSPLIT, $0-16
126	MOVQ x+8(FP), AX
127	MOVQ 0(AX), DX
128	MOVQ 8(AX), CX
129	MOVQ 16(AX), BX
130	MOVQ 24(AX), SI
131	MOVQ 32(AX), DI
132	MOVQ 40(AX), R8
133	ADDQ DX, DX
134	ADCQ CX, CX
135	ADCQ BX, BX
136	ADCQ SI, SI
137	ADCQ DI, DI
138	ADCQ R8, R8
139
140	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
141	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
142
143	MOVQ res+0(FP), R15
144	MOVQ DX, 0(R15)
145	MOVQ CX, 8(R15)
146	MOVQ BX, 16(R15)
147	MOVQ SI, 24(R15)
148	MOVQ DI, 32(R15)
149	MOVQ R8, 40(R15)
150	RET
151
152// neg(res, x *Element)
153TEXT ·neg(SB), NOSPLIT, $0-16
154	MOVQ  res+0(FP), R9
155	MOVQ  x+8(FP), AX
156	MOVQ  0(AX), DX
157	MOVQ  8(AX), CX
158	MOVQ  16(AX), BX
159	MOVQ  24(AX), SI
160	MOVQ  32(AX), DI
161	MOVQ  40(AX), R8
162	MOVQ  DX, AX
163	ORQ   CX, AX
164	ORQ   BX, AX
165	ORQ   SI, AX
166	ORQ   DI, AX
167	ORQ   R8, AX
168	TESTQ AX, AX
169	JEQ   l1
170	MOVQ  $0xb9feffffffffaaab, R10
171	SUBQ  DX, R10
172	MOVQ  R10, 0(R9)
173	MOVQ  $0x1eabfffeb153ffff, R10
174	SBBQ  CX, R10
175	MOVQ  R10, 8(R9)
176	MOVQ  $0x6730d2a0f6b0f624, R10
177	SBBQ  BX, R10
178	MOVQ  R10, 16(R9)
179	MOVQ  $0x64774b84f38512bf, R10
180	SBBQ  SI, R10
181	MOVQ  R10, 24(R9)
182	MOVQ  $0x4b1ba7b6434bacd7, R10
183	SBBQ  DI, R10
184	MOVQ  R10, 32(R9)
185	MOVQ  $0x1a0111ea397fe69a, R10
186	SBBQ  R8, R10
187	MOVQ  R10, 40(R9)
188	RET
189
190l1:
191	MOVQ AX, 0(R9)
192	MOVQ AX, 8(R9)
193	MOVQ AX, 16(R9)
194	MOVQ AX, 24(R9)
195	MOVQ AX, 32(R9)
196	MOVQ AX, 40(R9)
197	RET
198
199TEXT ·reduce(SB), NOSPLIT, $0-8
200	MOVQ res+0(FP), AX
201	MOVQ 0(AX), DX
202	MOVQ 8(AX), CX
203	MOVQ 16(AX), BX
204	MOVQ 24(AX), SI
205	MOVQ 32(AX), DI
206	MOVQ 40(AX), R8
207
208	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
209	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
210
211	MOVQ DX, 0(AX)
212	MOVQ CX, 8(AX)
213	MOVQ BX, 16(AX)
214	MOVQ SI, 24(AX)
215	MOVQ DI, 32(AX)
216	MOVQ R8, 40(AX)
217	RET
218
219// MulBy3(x *Element)
220TEXT ·MulBy3(SB), NOSPLIT, $0-8
221	MOVQ x+0(FP), AX
222	MOVQ 0(AX), DX
223	MOVQ 8(AX), CX
224	MOVQ 16(AX), BX
225	MOVQ 24(AX), SI
226	MOVQ 32(AX), DI
227	MOVQ 40(AX), R8
228	ADDQ DX, DX
229	ADCQ CX, CX
230	ADCQ BX, BX
231	ADCQ SI, SI
232	ADCQ DI, DI
233	ADCQ R8, R8
234
235	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
236	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
237
238	ADDQ 0(AX), DX
239	ADCQ 8(AX), CX
240	ADCQ 16(AX), BX
241	ADCQ 24(AX), SI
242	ADCQ 32(AX), DI
243	ADCQ 40(AX), R8
244
245	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
246	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
247
248	MOVQ DX, 0(AX)
249	MOVQ CX, 8(AX)
250	MOVQ BX, 16(AX)
251	MOVQ SI, 24(AX)
252	MOVQ DI, 32(AX)
253	MOVQ R8, 40(AX)
254	RET
255
256// MulBy5(x *Element)
257TEXT ·MulBy5(SB), NOSPLIT, $0-8
258	MOVQ x+0(FP), AX
259	MOVQ 0(AX), DX
260	MOVQ 8(AX), CX
261	MOVQ 16(AX), BX
262	MOVQ 24(AX), SI
263	MOVQ 32(AX), DI
264	MOVQ 40(AX), R8
265	ADDQ DX, DX
266	ADCQ CX, CX
267	ADCQ BX, BX
268	ADCQ SI, SI
269	ADCQ DI, DI
270	ADCQ R8, R8
271
272	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
273	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
274
275	ADDQ DX, DX
276	ADCQ CX, CX
277	ADCQ BX, BX
278	ADCQ SI, SI
279	ADCQ DI, DI
280	ADCQ R8, R8
281
282	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
283	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
284
285	ADDQ 0(AX), DX
286	ADCQ 8(AX), CX
287	ADCQ 16(AX), BX
288	ADCQ 24(AX), SI
289	ADCQ 32(AX), DI
290	ADCQ 40(AX), R8
291
292	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12)
293	REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12)
294
295	MOVQ DX, 0(AX)
296	MOVQ CX, 8(AX)
297	MOVQ BX, 16(AX)
298	MOVQ SI, 24(AX)
299	MOVQ DI, 32(AX)
300	MOVQ R8, 40(AX)
301	RET
302
303// MulBy13(x *Element)
304TEXT ·MulBy13(SB), $40-8
305	MOVQ x+0(FP), AX
306	MOVQ 0(AX), DX
307	MOVQ 8(AX), CX
308	MOVQ 16(AX), BX
309	MOVQ 24(AX), SI
310	MOVQ 32(AX), DI
311	MOVQ 40(AX), R8
312	ADDQ DX, DX
313	ADCQ CX, CX
314	ADCQ BX, BX
315	ADCQ SI, SI
316	ADCQ DI, DI
317	ADCQ R8, R8
318
319	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
320	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
321
322	ADDQ DX, DX
323	ADCQ CX, CX
324	ADCQ BX, BX
325	ADCQ SI, SI
326	ADCQ DI, DI
327	ADCQ R8, R8
328
329	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
330	REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
331
332	MOVQ DX, R15
333	MOVQ CX, s0-8(SP)
334	MOVQ BX, s1-16(SP)
335	MOVQ SI, s2-24(SP)
336	MOVQ DI, s3-32(SP)
337	MOVQ R8, s4-40(SP)
338	ADDQ DX, DX
339	ADCQ CX, CX
340	ADCQ BX, BX
341	ADCQ SI, SI
342	ADCQ DI, DI
343	ADCQ R8, R8
344
345	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
346	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
347
348	ADDQ R15, DX
349	ADCQ s0-8(SP), CX
350	ADCQ s1-16(SP), BX
351	ADCQ s2-24(SP), SI
352	ADCQ s3-32(SP), DI
353	ADCQ s4-40(SP), R8
354
355	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
356	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
357
358	ADDQ 0(AX), DX
359	ADCQ 8(AX), CX
360	ADCQ 16(AX), BX
361	ADCQ 24(AX), SI
362	ADCQ 32(AX), DI
363	ADCQ 40(AX), R8
364
365	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
366	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
367
368	MOVQ DX, 0(AX)
369	MOVQ CX, 8(AX)
370	MOVQ BX, 16(AX)
371	MOVQ SI, 24(AX)
372	MOVQ DI, 32(AX)
373	MOVQ R8, 40(AX)
374	RET
375