xref: /freebsd/sys/crypto/openssl/amd64/x86_64-gf2m.S (revision 9768746b)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from x86_64-gf2m.pl. */
3.text
4
5.type	_mul_1x1,@function
6.align	16
7_mul_1x1:
8.cfi_startproc
9	subq	$128+8,%rsp
10.cfi_adjust_cfa_offset	128+8
11	movq	$-1,%r9
12	leaq	(%rax,%rax,1),%rsi
13	shrq	$3,%r9
14	leaq	(,%rax,4),%rdi
15	andq	%rax,%r9
16	leaq	(,%rax,8),%r12
17	sarq	$63,%rax
18	leaq	(%r9,%r9,1),%r10
19	sarq	$63,%rsi
20	leaq	(,%r9,4),%r11
21	andq	%rbp,%rax
22	sarq	$63,%rdi
23	movq	%rax,%rdx
24	shlq	$63,%rax
25	andq	%rbp,%rsi
26	shrq	$1,%rdx
27	movq	%rsi,%rcx
28	shlq	$62,%rsi
29	andq	%rbp,%rdi
30	shrq	$2,%rcx
31	xorq	%rsi,%rax
32	movq	%rdi,%rbx
33	shlq	$61,%rdi
34	xorq	%rcx,%rdx
35	shrq	$3,%rbx
36	xorq	%rdi,%rax
37	xorq	%rbx,%rdx
38
39	movq	%r9,%r13
40	movq	$0,0(%rsp)
41	xorq	%r10,%r13
42	movq	%r9,8(%rsp)
43	movq	%r11,%r14
44	movq	%r10,16(%rsp)
45	xorq	%r12,%r14
46	movq	%r13,24(%rsp)
47
48	xorq	%r11,%r9
49	movq	%r11,32(%rsp)
50	xorq	%r11,%r10
51	movq	%r9,40(%rsp)
52	xorq	%r11,%r13
53	movq	%r10,48(%rsp)
54	xorq	%r14,%r9
55	movq	%r13,56(%rsp)
56	xorq	%r14,%r10
57
58	movq	%r12,64(%rsp)
59	xorq	%r14,%r13
60	movq	%r9,72(%rsp)
61	xorq	%r11,%r9
62	movq	%r10,80(%rsp)
63	xorq	%r11,%r10
64	movq	%r13,88(%rsp)
65
66	xorq	%r11,%r13
67	movq	%r14,96(%rsp)
68	movq	%r8,%rsi
69	movq	%r9,104(%rsp)
70	andq	%rbp,%rsi
71	movq	%r10,112(%rsp)
72	shrq	$4,%rbp
73	movq	%r13,120(%rsp)
74	movq	%r8,%rdi
75	andq	%rbp,%rdi
76	shrq	$4,%rbp
77
78	movq	(%rsp,%rsi,8),%xmm0
79	movq	%r8,%rsi
80	andq	%rbp,%rsi
81	shrq	$4,%rbp
82	movq	(%rsp,%rdi,8),%rcx
83	movq	%r8,%rdi
84	movq	%rcx,%rbx
85	shlq	$4,%rcx
86	andq	%rbp,%rdi
87	movq	(%rsp,%rsi,8),%xmm1
88	shrq	$60,%rbx
89	xorq	%rcx,%rax
90	pslldq	$1,%xmm1
91	movq	%r8,%rsi
92	shrq	$4,%rbp
93	xorq	%rbx,%rdx
94	andq	%rbp,%rsi
95	shrq	$4,%rbp
96	pxor	%xmm1,%xmm0
97	movq	(%rsp,%rdi,8),%rcx
98	movq	%r8,%rdi
99	movq	%rcx,%rbx
100	shlq	$12,%rcx
101	andq	%rbp,%rdi
102	movq	(%rsp,%rsi,8),%xmm1
103	shrq	$52,%rbx
104	xorq	%rcx,%rax
105	pslldq	$2,%xmm1
106	movq	%r8,%rsi
107	shrq	$4,%rbp
108	xorq	%rbx,%rdx
109	andq	%rbp,%rsi
110	shrq	$4,%rbp
111	pxor	%xmm1,%xmm0
112	movq	(%rsp,%rdi,8),%rcx
113	movq	%r8,%rdi
114	movq	%rcx,%rbx
115	shlq	$20,%rcx
116	andq	%rbp,%rdi
117	movq	(%rsp,%rsi,8),%xmm1
118	shrq	$44,%rbx
119	xorq	%rcx,%rax
120	pslldq	$3,%xmm1
121	movq	%r8,%rsi
122	shrq	$4,%rbp
123	xorq	%rbx,%rdx
124	andq	%rbp,%rsi
125	shrq	$4,%rbp
126	pxor	%xmm1,%xmm0
127	movq	(%rsp,%rdi,8),%rcx
128	movq	%r8,%rdi
129	movq	%rcx,%rbx
130	shlq	$28,%rcx
131	andq	%rbp,%rdi
132	movq	(%rsp,%rsi,8),%xmm1
133	shrq	$36,%rbx
134	xorq	%rcx,%rax
135	pslldq	$4,%xmm1
136	movq	%r8,%rsi
137	shrq	$4,%rbp
138	xorq	%rbx,%rdx
139	andq	%rbp,%rsi
140	shrq	$4,%rbp
141	pxor	%xmm1,%xmm0
142	movq	(%rsp,%rdi,8),%rcx
143	movq	%r8,%rdi
144	movq	%rcx,%rbx
145	shlq	$36,%rcx
146	andq	%rbp,%rdi
147	movq	(%rsp,%rsi,8),%xmm1
148	shrq	$28,%rbx
149	xorq	%rcx,%rax
150	pslldq	$5,%xmm1
151	movq	%r8,%rsi
152	shrq	$4,%rbp
153	xorq	%rbx,%rdx
154	andq	%rbp,%rsi
155	shrq	$4,%rbp
156	pxor	%xmm1,%xmm0
157	movq	(%rsp,%rdi,8),%rcx
158	movq	%r8,%rdi
159	movq	%rcx,%rbx
160	shlq	$44,%rcx
161	andq	%rbp,%rdi
162	movq	(%rsp,%rsi,8),%xmm1
163	shrq	$20,%rbx
164	xorq	%rcx,%rax
165	pslldq	$6,%xmm1
166	movq	%r8,%rsi
167	shrq	$4,%rbp
168	xorq	%rbx,%rdx
169	andq	%rbp,%rsi
170	shrq	$4,%rbp
171	pxor	%xmm1,%xmm0
172	movq	(%rsp,%rdi,8),%rcx
173	movq	%r8,%rdi
174	movq	%rcx,%rbx
175	shlq	$52,%rcx
176	andq	%rbp,%rdi
177	movq	(%rsp,%rsi,8),%xmm1
178	shrq	$12,%rbx
179	xorq	%rcx,%rax
180	pslldq	$7,%xmm1
181	movq	%r8,%rsi
182	shrq	$4,%rbp
183	xorq	%rbx,%rdx
184	andq	%rbp,%rsi
185	shrq	$4,%rbp
186	pxor	%xmm1,%xmm0
187	movq	(%rsp,%rdi,8),%rcx
188	movq	%rcx,%rbx
189	shlq	$60,%rcx
190.byte	102,72,15,126,198
191	shrq	$4,%rbx
192	xorq	%rcx,%rax
193	psrldq	$8,%xmm0
194	xorq	%rbx,%rdx
195.byte	102,72,15,126,199
196	xorq	%rsi,%rax
197	xorq	%rdi,%rdx
198
199	addq	$128+8,%rsp
200.cfi_adjust_cfa_offset	-128-8
201	.byte	0xf3,0xc3
202.Lend_mul_1x1:
203.cfi_endproc
204.size	_mul_1x1,.-_mul_1x1
205
206.globl	bn_GF2m_mul_2x2
207.type	bn_GF2m_mul_2x2,@function
208.align	16
209bn_GF2m_mul_2x2:
210.cfi_startproc
211	movq	%rsp,%rax
212	movq	OPENSSL_ia32cap_P(%rip),%r10
213	btq	$33,%r10
214	jnc	.Lvanilla_mul_2x2
215
216.byte	102,72,15,110,198
217.byte	102,72,15,110,201
218.byte	102,72,15,110,210
219.byte	102,73,15,110,216
220	movdqa	%xmm0,%xmm4
221	movdqa	%xmm1,%xmm5
222.byte	102,15,58,68,193,0
223	pxor	%xmm2,%xmm4
224	pxor	%xmm3,%xmm5
225.byte	102,15,58,68,211,0
226.byte	102,15,58,68,229,0
227	xorps	%xmm0,%xmm4
228	xorps	%xmm2,%xmm4
229	movdqa	%xmm4,%xmm5
230	pslldq	$8,%xmm4
231	psrldq	$8,%xmm5
232	pxor	%xmm4,%xmm2
233	pxor	%xmm5,%xmm0
234	movdqu	%xmm2,0(%rdi)
235	movdqu	%xmm0,16(%rdi)
236	.byte	0xf3,0xc3
237
238.align	16
239.Lvanilla_mul_2x2:
240	leaq	-136(%rsp),%rsp
241.cfi_adjust_cfa_offset	8*17
242	movq	%r14,80(%rsp)
243.cfi_rel_offset	%r14,8*10
244	movq	%r13,88(%rsp)
245.cfi_rel_offset	%r13,8*11
246	movq	%r12,96(%rsp)
247.cfi_rel_offset	%r12,8*12
248	movq	%rbp,104(%rsp)
249.cfi_rel_offset	%rbp,8*13
250	movq	%rbx,112(%rsp)
251.cfi_rel_offset	%rbx,8*14
252.Lbody_mul_2x2:
253	movq	%rdi,32(%rsp)
254	movq	%rsi,40(%rsp)
255	movq	%rdx,48(%rsp)
256	movq	%rcx,56(%rsp)
257	movq	%r8,64(%rsp)
258
259	movq	$0xf,%r8
260	movq	%rsi,%rax
261	movq	%rcx,%rbp
262	call	_mul_1x1
263	movq	%rax,16(%rsp)
264	movq	%rdx,24(%rsp)
265
266	movq	48(%rsp),%rax
267	movq	64(%rsp),%rbp
268	call	_mul_1x1
269	movq	%rax,0(%rsp)
270	movq	%rdx,8(%rsp)
271
272	movq	40(%rsp),%rax
273	movq	56(%rsp),%rbp
274	xorq	48(%rsp),%rax
275	xorq	64(%rsp),%rbp
276	call	_mul_1x1
277	movq	0(%rsp),%rbx
278	movq	8(%rsp),%rcx
279	movq	16(%rsp),%rdi
280	movq	24(%rsp),%rsi
281	movq	32(%rsp),%rbp
282
283	xorq	%rdx,%rax
284	xorq	%rcx,%rdx
285	xorq	%rbx,%rax
286	movq	%rbx,0(%rbp)
287	xorq	%rdi,%rdx
288	movq	%rsi,24(%rbp)
289	xorq	%rsi,%rax
290	xorq	%rsi,%rdx
291	xorq	%rdx,%rax
292	movq	%rdx,16(%rbp)
293	movq	%rax,8(%rbp)
294
295	movq	80(%rsp),%r14
296.cfi_restore	%r14
297	movq	88(%rsp),%r13
298.cfi_restore	%r13
299	movq	96(%rsp),%r12
300.cfi_restore	%r12
301	movq	104(%rsp),%rbp
302.cfi_restore	%rbp
303	movq	112(%rsp),%rbx
304.cfi_restore	%rbx
305	leaq	136(%rsp),%rsp
306.cfi_adjust_cfa_offset	-8*17
307.Lepilogue_mul_2x2:
308	.byte	0xf3,0xc3
309.Lend_mul_2x2:
310.cfi_endproc
311.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
312.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
313.align	16
314