1#include "x86_arch.h"
2.text
3
4.type	_mul_1x1,@function
5.align	16
6_mul_1x1:
7	subq	$128+8,%rsp
8	movq	$-1,%r9
9	leaq	(%rax,%rax,1),%rsi
10	shrq	$3,%r9
11	leaq	(,%rax,4),%rdi
12	andq	%rax,%r9
13	leaq	(,%rax,8),%r12
14	sarq	$63,%rax
15	leaq	(%r9,%r9,1),%r10
16	sarq	$63,%rsi
17	leaq	(,%r9,4),%r11
18	andq	%rbp,%rax
19	sarq	$63,%rdi
20	movq	%rax,%rdx
21	shlq	$63,%rax
22	andq	%rbp,%rsi
23	shrq	$1,%rdx
24	movq	%rsi,%rcx
25	shlq	$62,%rsi
26	andq	%rbp,%rdi
27	shrq	$2,%rcx
28	xorq	%rsi,%rax
29	movq	%rdi,%rbx
30	shlq	$61,%rdi
31	xorq	%rcx,%rdx
32	shrq	$3,%rbx
33	xorq	%rdi,%rax
34	xorq	%rbx,%rdx
35
36	movq	%r9,%r13
37	movq	$0,0(%rsp)
38	xorq	%r10,%r13
39	movq	%r9,8(%rsp)
40	movq	%r11,%r14
41	movq	%r10,16(%rsp)
42	xorq	%r12,%r14
43	movq	%r13,24(%rsp)
44
45	xorq	%r11,%r9
46	movq	%r11,32(%rsp)
47	xorq	%r11,%r10
48	movq	%r9,40(%rsp)
49	xorq	%r11,%r13
50	movq	%r10,48(%rsp)
51	xorq	%r14,%r9
52	movq	%r13,56(%rsp)
53	xorq	%r14,%r10
54
55	movq	%r12,64(%rsp)
56	xorq	%r14,%r13
57	movq	%r9,72(%rsp)
58	xorq	%r11,%r9
59	movq	%r10,80(%rsp)
60	xorq	%r11,%r10
61	movq	%r13,88(%rsp)
62
63	xorq	%r11,%r13
64	movq	%r14,96(%rsp)
65	movq	%r8,%rsi
66	movq	%r9,104(%rsp)
67	andq	%rbp,%rsi
68	movq	%r10,112(%rsp)
69	shrq	$4,%rbp
70	movq	%r13,120(%rsp)
71	movq	%r8,%rdi
72	andq	%rbp,%rdi
73	shrq	$4,%rbp
74
75	movq	(%rsp,%rsi,8),%xmm0
76	movq	%r8,%rsi
77	andq	%rbp,%rsi
78	shrq	$4,%rbp
79	movq	(%rsp,%rdi,8),%rcx
80	movq	%r8,%rdi
81	movq	%rcx,%rbx
82	shlq	$4,%rcx
83	andq	%rbp,%rdi
84	movq	(%rsp,%rsi,8),%xmm1
85	shrq	$60,%rbx
86	xorq	%rcx,%rax
87	pslldq	$1,%xmm1
88	movq	%r8,%rsi
89	shrq	$4,%rbp
90	xorq	%rbx,%rdx
91	andq	%rbp,%rsi
92	shrq	$4,%rbp
93	pxor	%xmm1,%xmm0
94	movq	(%rsp,%rdi,8),%rcx
95	movq	%r8,%rdi
96	movq	%rcx,%rbx
97	shlq	$12,%rcx
98	andq	%rbp,%rdi
99	movq	(%rsp,%rsi,8),%xmm1
100	shrq	$52,%rbx
101	xorq	%rcx,%rax
102	pslldq	$2,%xmm1
103	movq	%r8,%rsi
104	shrq	$4,%rbp
105	xorq	%rbx,%rdx
106	andq	%rbp,%rsi
107	shrq	$4,%rbp
108	pxor	%xmm1,%xmm0
109	movq	(%rsp,%rdi,8),%rcx
110	movq	%r8,%rdi
111	movq	%rcx,%rbx
112	shlq	$20,%rcx
113	andq	%rbp,%rdi
114	movq	(%rsp,%rsi,8),%xmm1
115	shrq	$44,%rbx
116	xorq	%rcx,%rax
117	pslldq	$3,%xmm1
118	movq	%r8,%rsi
119	shrq	$4,%rbp
120	xorq	%rbx,%rdx
121	andq	%rbp,%rsi
122	shrq	$4,%rbp
123	pxor	%xmm1,%xmm0
124	movq	(%rsp,%rdi,8),%rcx
125	movq	%r8,%rdi
126	movq	%rcx,%rbx
127	shlq	$28,%rcx
128	andq	%rbp,%rdi
129	movq	(%rsp,%rsi,8),%xmm1
130	shrq	$36,%rbx
131	xorq	%rcx,%rax
132	pslldq	$4,%xmm1
133	movq	%r8,%rsi
134	shrq	$4,%rbp
135	xorq	%rbx,%rdx
136	andq	%rbp,%rsi
137	shrq	$4,%rbp
138	pxor	%xmm1,%xmm0
139	movq	(%rsp,%rdi,8),%rcx
140	movq	%r8,%rdi
141	movq	%rcx,%rbx
142	shlq	$36,%rcx
143	andq	%rbp,%rdi
144	movq	(%rsp,%rsi,8),%xmm1
145	shrq	$28,%rbx
146	xorq	%rcx,%rax
147	pslldq	$5,%xmm1
148	movq	%r8,%rsi
149	shrq	$4,%rbp
150	xorq	%rbx,%rdx
151	andq	%rbp,%rsi
152	shrq	$4,%rbp
153	pxor	%xmm1,%xmm0
154	movq	(%rsp,%rdi,8),%rcx
155	movq	%r8,%rdi
156	movq	%rcx,%rbx
157	shlq	$44,%rcx
158	andq	%rbp,%rdi
159	movq	(%rsp,%rsi,8),%xmm1
160	shrq	$20,%rbx
161	xorq	%rcx,%rax
162	pslldq	$6,%xmm1
163	movq	%r8,%rsi
164	shrq	$4,%rbp
165	xorq	%rbx,%rdx
166	andq	%rbp,%rsi
167	shrq	$4,%rbp
168	pxor	%xmm1,%xmm0
169	movq	(%rsp,%rdi,8),%rcx
170	movq	%r8,%rdi
171	movq	%rcx,%rbx
172	shlq	$52,%rcx
173	andq	%rbp,%rdi
174	movq	(%rsp,%rsi,8),%xmm1
175	shrq	$12,%rbx
176	xorq	%rcx,%rax
177	pslldq	$7,%xmm1
178	movq	%r8,%rsi
179	shrq	$4,%rbp
180	xorq	%rbx,%rdx
181	andq	%rbp,%rsi
182	shrq	$4,%rbp
183	pxor	%xmm1,%xmm0
184	movq	(%rsp,%rdi,8),%rcx
185	movq	%rcx,%rbx
186	shlq	$60,%rcx
187	movd	%xmm0,%rsi
188	shrq	$4,%rbx
189	xorq	%rcx,%rax
190	psrldq	$8,%xmm0
191	xorq	%rbx,%rdx
192	movd	%xmm0,%rdi
193	xorq	%rsi,%rax
194	xorq	%rdi,%rdx
195
196	addq	$128+8,%rsp
197	retq
198.Lend_mul_1x1:
199.size	_mul_1x1,.-_mul_1x1
200
201.hidden	OPENSSL_ia32cap_P
202.globl	bn_GF2m_mul_2x2
203.type	bn_GF2m_mul_2x2,@function
204.align	16
205bn_GF2m_mul_2x2:
206	movl	OPENSSL_ia32cap_P+4(%rip),%eax
207	btl	$IA32CAP_BIT1_PCLMUL,%eax
208	jnc	.Lvanilla_mul_2x2
209
210	movd	%rsi,%xmm0
211	movd	%rcx,%xmm1
212	movd	%rdx,%xmm2
213	movd	%r8,%xmm3
214	movdqa	%xmm0,%xmm4
215	movdqa	%xmm1,%xmm5
216.byte	102,15,58,68,193,0
217	pxor	%xmm2,%xmm4
218	pxor	%xmm3,%xmm5
219.byte	102,15,58,68,211,0
220.byte	102,15,58,68,229,0
221	xorps	%xmm0,%xmm4
222	xorps	%xmm2,%xmm4
223	movdqa	%xmm4,%xmm5
224	pslldq	$8,%xmm4
225	psrldq	$8,%xmm5
226	pxor	%xmm4,%xmm2
227	pxor	%xmm5,%xmm0
228	movdqu	%xmm2,0(%rdi)
229	movdqu	%xmm0,16(%rdi)
230	retq
231
232.align	16
233.Lvanilla_mul_2x2:
234	leaq	-136(%rsp),%rsp
235	movq	%r14,80(%rsp)
236	movq	%r13,88(%rsp)
237	movq	%r12,96(%rsp)
238	movq	%rbp,104(%rsp)
239	movq	%rbx,112(%rsp)
240.Lbody_mul_2x2:
241	movq	%rdi,32(%rsp)
242	movq	%rsi,40(%rsp)
243	movq	%rdx,48(%rsp)
244	movq	%rcx,56(%rsp)
245	movq	%r8,64(%rsp)
246
247	movq	$15,%r8
248	movq	%rsi,%rax
249	movq	%rcx,%rbp
250	call	_mul_1x1
251	movq	%rax,16(%rsp)
252	movq	%rdx,24(%rsp)
253
254	movq	48(%rsp),%rax
255	movq	64(%rsp),%rbp
256	call	_mul_1x1
257	movq	%rax,0(%rsp)
258	movq	%rdx,8(%rsp)
259
260	movq	40(%rsp),%rax
261	movq	56(%rsp),%rbp
262	xorq	48(%rsp),%rax
263	xorq	64(%rsp),%rbp
264	call	_mul_1x1
265	movq	0(%rsp),%rbx
266	movq	8(%rsp),%rcx
267	movq	16(%rsp),%rdi
268	movq	24(%rsp),%rsi
269	movq	32(%rsp),%rbp
270
271	xorq	%rdx,%rax
272	xorq	%rcx,%rdx
273	xorq	%rbx,%rax
274	movq	%rbx,0(%rbp)
275	xorq	%rdi,%rdx
276	movq	%rsi,24(%rbp)
277	xorq	%rsi,%rax
278	xorq	%rsi,%rdx
279	xorq	%rdx,%rax
280	movq	%rdx,16(%rbp)
281	movq	%rax,8(%rbp)
282
283	movq	80(%rsp),%r14
284	movq	88(%rsp),%r13
285	movq	96(%rsp),%r12
286	movq	104(%rsp),%rbp
287	movq	112(%rsp),%rbx
288	leaq	136(%rsp),%rsp
289	retq
290.Lend_mul_2x2:
291.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
292.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
293.align	16
294#if defined(HAVE_GNU_STACK)
295.section .note.GNU-stack,"",%progbits
296#endif
297