1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This code was translated into a form compatible with 6a from the public
6// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
7
8// +build amd64,!gccgo,!appengine
9
10// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
11TEXT ·poly1305(SB),0,$224-32
12	MOVQ out+0(FP),DI
13	MOVQ m+8(FP),SI
14	MOVQ mlen+16(FP),DX
15	MOVQ key+24(FP),CX
16
17	MOVQ SP,R11
18	MOVQ $31,R9
19	NOTQ R9
20	ANDQ R9,SP
21	ADDQ $32,SP
22
23	MOVQ R11,32(SP)
24	MOVQ R12,40(SP)
25	MOVQ R13,48(SP)
26	MOVQ R14,56(SP)
27	MOVQ R15,64(SP)
28	MOVQ BX,72(SP)
29	MOVQ BP,80(SP)
30	FLDCW ·ROUNDING(SB)
31	MOVL 0(CX),R8
32	MOVL 4(CX),R9
33	MOVL 8(CX),AX
34	MOVL 12(CX),R10
35	MOVQ DI,88(SP)
36	MOVQ CX,96(SP)
37	MOVL $0X43300000,108(SP)
38	MOVL $0X45300000,116(SP)
39	MOVL $0X47300000,124(SP)
40	MOVL $0X49300000,132(SP)
41	ANDL $0X0FFFFFFF,R8
42	ANDL $0X0FFFFFFC,R9
43	ANDL $0X0FFFFFFC,AX
44	ANDL $0X0FFFFFFC,R10
45	MOVL R8,104(SP)
46	MOVL R9,112(SP)
47	MOVL AX,120(SP)
48	MOVL R10,128(SP)
49	FMOVD 104(SP), F0
50	FSUBD ·DOFFSET0(SB), F0
51	FMOVD 112(SP), F0
52	FSUBD ·DOFFSET1(SB), F0
53	FMOVD 120(SP), F0
54	FSUBD ·DOFFSET2(SB), F0
55	FMOVD 128(SP), F0
56	FSUBD ·DOFFSET3(SB), F0
57	FXCHD F0, F3
58	FMOVDP F0, 136(SP)
59	FXCHD F0, F1
60	FMOVD F0, 144(SP)
61	FMULD ·SCALE(SB), F0
62	FMOVDP F0, 152(SP)
63	FMOVD F0, 160(SP)
64	FMULD ·SCALE(SB), F0
65	FMOVDP F0, 168(SP)
66	FMOVD F0, 176(SP)
67	FMULD ·SCALE(SB), F0
68	FMOVDP F0, 184(SP)
69	FLDZ
70	FLDZ
71	FLDZ
72	FLDZ
73	CMPQ DX,$16
74	JB ADDATMOST15BYTES
75	INITIALATLEAST16BYTES:
76	MOVL 12(SI),DI
77	MOVL 8(SI),CX
78	MOVL 4(SI),R8
79	MOVL 0(SI),R9
80	MOVL DI,128(SP)
81	MOVL CX,120(SP)
82	MOVL R8,112(SP)
83	MOVL R9,104(SP)
84	ADDQ $16,SI
85	SUBQ $16,DX
86	FXCHD F0, F3
87	FADDD 128(SP), F0
88	FSUBD ·DOFFSET3MINUSTWO128(SB), F0
89	FXCHD F0, F1
90	FADDD 112(SP), F0
91	FSUBD ·DOFFSET1(SB), F0
92	FXCHD F0, F2
93	FADDD 120(SP), F0
94	FSUBD ·DOFFSET2(SB), F0
95	FXCHD F0, F3
96	FADDD 104(SP), F0
97	FSUBD ·DOFFSET0(SB), F0
98	CMPQ DX,$16
99	JB MULTIPLYADDATMOST15BYTES
100	MULTIPLYADDATLEAST16BYTES:
101	MOVL 12(SI),DI
102	MOVL 8(SI),CX
103	MOVL 4(SI),R8
104	MOVL 0(SI),R9
105	MOVL DI,128(SP)
106	MOVL CX,120(SP)
107	MOVL R8,112(SP)
108	MOVL R9,104(SP)
109	ADDQ $16,SI
110	SUBQ $16,DX
111	FMOVD ·ALPHA130(SB), F0
112	FADDD F2,F0
113	FSUBD ·ALPHA130(SB), F0
114	FSUBD F0,F2
115	FMULD ·SCALE(SB), F0
116	FMOVD ·ALPHA32(SB), F0
117	FADDD F2,F0
118	FSUBD ·ALPHA32(SB), F0
119	FSUBD F0,F2
120	FXCHD F0, F2
121	FADDDP F0,F1
122	FMOVD ·ALPHA64(SB), F0
123	FADDD F4,F0
124	FSUBD ·ALPHA64(SB), F0
125	FSUBD F0,F4
126	FMOVD ·ALPHA96(SB), F0
127	FADDD F6,F0
128	FSUBD ·ALPHA96(SB), F0
129	FSUBD F0,F6
130	FXCHD F0, F6
131	FADDDP F0,F1
132	FXCHD F0, F3
133	FADDDP F0,F5
134	FXCHD F0, F3
135	FADDDP F0,F1
136	FMOVD 176(SP), F0
137	FMULD F3,F0
138	FMOVD 160(SP), F0
139	FMULD F4,F0
140	FMOVD 144(SP), F0
141	FMULD F5,F0
142	FMOVD 136(SP), F0
143	FMULDP F0,F6
144	FMOVD 160(SP), F0
145	FMULD F4,F0
146	FADDDP F0,F3
147	FMOVD 144(SP), F0
148	FMULD F4,F0
149	FADDDP F0,F2
150	FMOVD 136(SP), F0
151	FMULD F4,F0
152	FADDDP F0,F1
153	FMOVD 184(SP), F0
154	FMULDP F0,F4
155	FXCHD F0, F3
156	FADDDP F0,F5
157	FMOVD 144(SP), F0
158	FMULD F4,F0
159	FADDDP F0,F2
160	FMOVD 136(SP), F0
161	FMULD F4,F0
162	FADDDP F0,F1
163	FMOVD 184(SP), F0
164	FMULD F4,F0
165	FADDDP F0,F3
166	FMOVD 168(SP), F0
167	FMULDP F0,F4
168	FXCHD F0, F3
169	FADDDP F0,F4
170	FMOVD 136(SP), F0
171	FMULD F5,F0
172	FADDDP F0,F1
173	FXCHD F0, F3
174	FMOVD 184(SP), F0
175	FMULD F5,F0
176	FADDDP F0,F3
177	FXCHD F0, F1
178	FMOVD 168(SP), F0
179	FMULD F5,F0
180	FADDDP F0,F1
181	FMOVD 152(SP), F0
182	FMULDP F0,F5
183	FXCHD F0, F4
184	FADDDP F0,F1
185	CMPQ DX,$16
186	FXCHD F0, F2
187	FMOVD 128(SP), F0
188	FSUBD ·DOFFSET3MINUSTWO128(SB), F0
189	FADDDP F0,F1
190	FXCHD F0, F1
191	FMOVD 120(SP), F0
192	FSUBD ·DOFFSET2(SB), F0
193	FADDDP F0,F1
194	FXCHD F0, F3
195	FMOVD 112(SP), F0
196	FSUBD ·DOFFSET1(SB), F0
197	FADDDP F0,F1
198	FXCHD F0, F2
199	FMOVD 104(SP), F0
200	FSUBD ·DOFFSET0(SB), F0
201	FADDDP F0,F1
202	JAE MULTIPLYADDATLEAST16BYTES
203	MULTIPLYADDATMOST15BYTES:
204	FMOVD ·ALPHA130(SB), F0
205	FADDD F2,F0
206	FSUBD ·ALPHA130(SB), F0
207	FSUBD F0,F2
208	FMULD ·SCALE(SB), F0
209	FMOVD ·ALPHA32(SB), F0
210	FADDD F2,F0
211	FSUBD ·ALPHA32(SB), F0
212	FSUBD F0,F2
213	FMOVD ·ALPHA64(SB), F0
214	FADDD F5,F0
215	FSUBD ·ALPHA64(SB), F0
216	FSUBD F0,F5
217	FMOVD ·ALPHA96(SB), F0
218	FADDD F7,F0
219	FSUBD ·ALPHA96(SB), F0
220	FSUBD F0,F7
221	FXCHD F0, F7
222	FADDDP F0,F1
223	FXCHD F0, F5
224	FADDDP F0,F1
225	FXCHD F0, F3
226	FADDDP F0,F5
227	FADDDP F0,F1
228	FMOVD 176(SP), F0
229	FMULD F1,F0
230	FMOVD 160(SP), F0
231	FMULD F2,F0
232	FMOVD 144(SP), F0
233	FMULD F3,F0
234	FMOVD 136(SP), F0
235	FMULDP F0,F4
236	FMOVD 160(SP), F0
237	FMULD F5,F0
238	FADDDP F0,F3
239	FMOVD 144(SP), F0
240	FMULD F5,F0
241	FADDDP F0,F2
242	FMOVD 136(SP), F0
243	FMULD F5,F0
244	FADDDP F0,F1
245	FMOVD 184(SP), F0
246	FMULDP F0,F5
247	FXCHD F0, F4
248	FADDDP F0,F3
249	FMOVD 144(SP), F0
250	FMULD F5,F0
251	FADDDP F0,F2
252	FMOVD 136(SP), F0
253	FMULD F5,F0
254	FADDDP F0,F1
255	FMOVD 184(SP), F0
256	FMULD F5,F0
257	FADDDP F0,F4
258	FMOVD 168(SP), F0
259	FMULDP F0,F5
260	FXCHD F0, F4
261	FADDDP F0,F2
262	FMOVD 136(SP), F0
263	FMULD F5,F0
264	FADDDP F0,F1
265	FMOVD 184(SP), F0
266	FMULD F5,F0
267	FADDDP F0,F4
268	FMOVD 168(SP), F0
269	FMULD F5,F0
270	FADDDP F0,F3
271	FMOVD 152(SP), F0
272	FMULDP F0,F5
273	FXCHD F0, F4
274	FADDDP F0,F1
275	ADDATMOST15BYTES:
276	CMPQ DX,$0
277	JE NOMOREBYTES
278	MOVL $0,0(SP)
279	MOVL $0, 4 (SP)
280	MOVL $0, 8 (SP)
281	MOVL $0, 12 (SP)
282	LEAQ 0(SP),DI
283	MOVQ DX,CX
284	REP; MOVSB
285	MOVB $1,0(DI)
286	MOVL  12 (SP),DI
287	MOVL  8 (SP),SI
288	MOVL  4 (SP),DX
289	MOVL 0(SP),CX
290	MOVL DI,128(SP)
291	MOVL SI,120(SP)
292	MOVL DX,112(SP)
293	MOVL CX,104(SP)
294	FXCHD F0, F3
295	FADDD 128(SP), F0
296	FSUBD ·DOFFSET3(SB), F0
297	FXCHD F0, F2
298	FADDD 120(SP), F0
299	FSUBD ·DOFFSET2(SB), F0
300	FXCHD F0, F1
301	FADDD 112(SP), F0
302	FSUBD ·DOFFSET1(SB), F0
303	FXCHD F0, F3
304	FADDD 104(SP), F0
305	FSUBD ·DOFFSET0(SB), F0
306	FMOVD ·ALPHA130(SB), F0
307	FADDD F3,F0
308	FSUBD ·ALPHA130(SB), F0
309	FSUBD F0,F3
310	FMULD ·SCALE(SB), F0
311	FMOVD ·ALPHA32(SB), F0
312	FADDD F2,F0
313	FSUBD ·ALPHA32(SB), F0
314	FSUBD F0,F2
315	FMOVD ·ALPHA64(SB), F0
316	FADDD F6,F0
317	FSUBD ·ALPHA64(SB), F0
318	FSUBD F0,F6
319	FMOVD ·ALPHA96(SB), F0
320	FADDD F5,F0
321	FSUBD ·ALPHA96(SB), F0
322	FSUBD F0,F5
323	FXCHD F0, F4
324	FADDDP F0,F3
325	FXCHD F0, F6
326	FADDDP F0,F1
327	FXCHD F0, F3
328	FADDDP F0,F5
329	FXCHD F0, F3
330	FADDDP F0,F1
331	FMOVD 176(SP), F0
332	FMULD F3,F0
333	FMOVD 160(SP), F0
334	FMULD F4,F0
335	FMOVD 144(SP), F0
336	FMULD F5,F0
337	FMOVD 136(SP), F0
338	FMULDP F0,F6
339	FMOVD 160(SP), F0
340	FMULD F5,F0
341	FADDDP F0,F3
342	FMOVD 144(SP), F0
343	FMULD F5,F0
344	FADDDP F0,F2
345	FMOVD 136(SP), F0
346	FMULD F5,F0
347	FADDDP F0,F1
348	FMOVD 184(SP), F0
349	FMULDP F0,F5
350	FXCHD F0, F4
351	FADDDP F0,F5
352	FMOVD 144(SP), F0
353	FMULD F6,F0
354	FADDDP F0,F2
355	FMOVD 136(SP), F0
356	FMULD F6,F0
357	FADDDP F0,F1
358	FMOVD 184(SP), F0
359	FMULD F6,F0
360	FADDDP F0,F4
361	FMOVD 168(SP), F0
362	FMULDP F0,F6
363	FXCHD F0, F5
364	FADDDP F0,F4
365	FMOVD 136(SP), F0
366	FMULD F2,F0
367	FADDDP F0,F1
368	FMOVD 184(SP), F0
369	FMULD F2,F0
370	FADDDP F0,F5
371	FMOVD 168(SP), F0
372	FMULD F2,F0
373	FADDDP F0,F3
374	FMOVD 152(SP), F0
375	FMULDP F0,F2
376	FXCHD F0, F1
377	FADDDP F0,F3
378	FXCHD F0, F3
379	FXCHD F0, F2
380	NOMOREBYTES:
381	MOVL $0,R10
382	FMOVD ·ALPHA130(SB), F0
383	FADDD F4,F0
384	FSUBD ·ALPHA130(SB), F0
385	FSUBD F0,F4
386	FMULD ·SCALE(SB), F0
387	FMOVD ·ALPHA32(SB), F0
388	FADDD F2,F0
389	FSUBD ·ALPHA32(SB), F0
390	FSUBD F0,F2
391	FMOVD ·ALPHA64(SB), F0
392	FADDD F4,F0
393	FSUBD ·ALPHA64(SB), F0
394	FSUBD F0,F4
395	FMOVD ·ALPHA96(SB), F0
396	FADDD F6,F0
397	FSUBD ·ALPHA96(SB), F0
398	FXCHD F0, F6
399	FSUBD F6,F0
400	FXCHD F0, F4
401	FADDDP F0,F3
402	FXCHD F0, F4
403	FADDDP F0,F1
404	FXCHD F0, F2
405	FADDDP F0,F3
406	FXCHD F0, F4
407	FADDDP F0,F3
408	FXCHD F0, F3
409	FADDD ·HOFFSET0(SB), F0
410	FXCHD F0, F3
411	FADDD ·HOFFSET1(SB), F0
412	FXCHD F0, F1
413	FADDD ·HOFFSET2(SB), F0
414	FXCHD F0, F2
415	FADDD ·HOFFSET3(SB), F0
416	FXCHD F0, F3
417	FMOVDP F0, 104(SP)
418	FMOVDP F0, 112(SP)
419	FMOVDP F0, 120(SP)
420	FMOVDP F0, 128(SP)
421	MOVL 108(SP),DI
422	ANDL $63,DI
423	MOVL 116(SP),SI
424	ANDL $63,SI
425	MOVL 124(SP),DX
426	ANDL $63,DX
427	MOVL 132(SP),CX
428	ANDL $63,CX
429	MOVL 112(SP),R8
430	ADDL DI,R8
431	MOVQ R8,112(SP)
432	MOVL 120(SP),DI
433	ADCL SI,DI
434	MOVQ DI,120(SP)
435	MOVL 128(SP),DI
436	ADCL DX,DI
437	MOVQ DI,128(SP)
438	MOVL R10,DI
439	ADCL CX,DI
440	MOVQ DI,136(SP)
441	MOVQ $5,DI
442	MOVL 104(SP),SI
443	ADDL SI,DI
444	MOVQ DI,104(SP)
445	MOVL R10,DI
446	MOVQ 112(SP),DX
447	ADCL DX,DI
448	MOVQ DI,112(SP)
449	MOVL R10,DI
450	MOVQ 120(SP),CX
451	ADCL CX,DI
452	MOVQ DI,120(SP)
453	MOVL R10,DI
454	MOVQ 128(SP),R8
455	ADCL R8,DI
456	MOVQ DI,128(SP)
457	MOVQ $0XFFFFFFFC,DI
458	MOVQ 136(SP),R9
459	ADCL R9,DI
460	SARL $16,DI
461	MOVQ DI,R9
462	XORL $0XFFFFFFFF,R9
463	ANDQ DI,SI
464	MOVQ 104(SP),AX
465	ANDQ R9,AX
466	ORQ AX,SI
467	ANDQ DI,DX
468	MOVQ 112(SP),AX
469	ANDQ R9,AX
470	ORQ AX,DX
471	ANDQ DI,CX
472	MOVQ 120(SP),AX
473	ANDQ R9,AX
474	ORQ AX,CX
475	ANDQ DI,R8
476	MOVQ 128(SP),DI
477	ANDQ R9,DI
478	ORQ DI,R8
479	MOVQ 88(SP),DI
480	MOVQ 96(SP),R9
481	ADDL 16(R9),SI
482	ADCL 20(R9),DX
483	ADCL 24(R9),CX
484	ADCL 28(R9),R8
485	MOVL SI,0(DI)
486	MOVL DX,4(DI)
487	MOVL CX,8(DI)
488	MOVL R8,12(DI)
489	MOVQ 32(SP),R11
490	MOVQ 40(SP),R12
491	MOVQ 48(SP),R13
492	MOVQ 56(SP),R14
493	MOVQ 64(SP),R15
494	MOVQ 72(SP),BX
495	MOVQ 80(SP),BP
496	MOVQ R11,SP
497	RET
498