1dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
5dnl
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12dnl
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17dnl
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C           cycles/limb
24C Itanium:      2
25C Itanium 2:    1
26
27C TODO
28C  * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in
29C    wind-down code).
30
31C INPUT PARAMETERS
32define(`rp', `r32')
33define(`up', `r33')
34define(`vp', `r34')
35define(`n', `r35')
36
37define(`OPERATION_ior_n',1)
38
39ifdef(`OPERATION_and_n',
40`	define(`func',`mpn_and_n')
41	define(`logop',		`and	$1 = $2, $3')
42	define(`notormov',	`mov	$1 = $2')')
43ifdef(`OPERATION_andn_n',
44`	define(`func',`mpn_andn_n')
45	define(`logop',		`andcm	$1 = $2, $3')
46	define(`notormov',	`mov	$1 = $2')')
47ifdef(`OPERATION_nand_n',
48`	define(`func',`mpn_nand_n')
49	define(`logop',		`and	$1 = $2, $3')
50	define(`notormov',	`sub	$1 = -1, $2')')
51ifdef(`OPERATION_ior_n',
52`	define(`func',`mpn_ior_n')
53	define(`logop',		`or	$1 = $2, $3')
54	define(`notormov',	`mov	$1 = $2')')
55ifdef(`OPERATION_iorn_n',
56`	define(`func',`mpn_iorn_n')
57	define(`logop',		`andcm	$1 = $3, $2')
58	define(`notormov',	`sub	$1 = -1, $2')')
59ifdef(`OPERATION_nior_n',
60`	define(`func',`mpn_nior_n')
61	define(`logop',		`or	$1 = $2, $3')
62	define(`notormov',	`sub	$1 = -1, $2')')
63ifdef(`OPERATION_xor_n',
64`	define(`func',`mpn_xor_n')
65	define(`logop',		`xor	$1 = $2, $3')
66	define(`notormov',	`mov	$1 = $2')')
67ifdef(`OPERATION_xnor_n',
68`	define(`func',`mpn_xnor_n')
69	define(`logop',		`xor	$1 = $2, $3')
70	define(`notormov',	`sub	$1 = -1, $2')')
71
72ASM_START()
73PROLOGUE(func)
74	.prologue
75	.save	ar.lc, r2
76	.body
77ifdef(`HAVE_ABI_32',
78`	addp4	rp = 0, rp			C			M I
79	addp4	up = 0, up			C			M I
80	addp4	vp = 0, vp			C			M I
81	zxt4	n = n				C			I
82	;;
83')
84{.mmi
85	ld8		r10 = [up], 8		C			M
86	ld8		r11 = [vp], 8		C			M
87	mov.i		r2 = ar.lc		C			I0
88}
89{.mmi
90	and		r14 = 3, n		C			M I
91	cmp.lt		p15, p14 = 4, n		C			M I
92	shr.u		n = n, 2		C			I0
93	;;
94}
95{.mmi
96	cmp.eq		p6, p0 = 1, r14		C			M I
97	cmp.eq		p7, p0 = 2, r14		C			M I
98	cmp.eq		p8, p0 = 3, r14		C			M I
99}
100{.bbb
101   (p6)	br.dptk		.Lb01			C			B
102   (p7)	br.dptk		.Lb10			C			B
103   (p8)	br.dptk		.Lb11			C			B
104}
105
106.Lb00:	ld8		r17 = [up], 8		C			M
107	ld8		r21 = [vp], 8		C			M
108	add		n = -2, n		C			M I
109	;;
110	ld8		r18 = [up], 8		C			M
111	ld8		r22 = [vp], 8		C			M
112	;;
113	ld8		r19 = [up], 8		C			M
114	ld8		r23 = [vp], 8		C			M
115  (p15)	br.cond.dpnt	.grt4			C			B
116
117	logop(		r14, r10, r11)		C			M I
118	;;
119	logop(		r15, r17, r21)		C			M I
120	notormov(	r8, r14)		C			M I
121	br		.Lcj4			C			B
122
123.grt4:	logop(		r14, r10, r11)		C			M I
124	ld8		r16 = [up], 8		C			M
125	ld8		r20 = [vp], 8		C			M
126	;;
127	logop(		r15, r17, r21)		C			M I
128	ld8		r17 = [up], 8		C			M
129	mov.i		ar.lc = n		C			I0
130	notormov(	r8, r14)		C			M I
131	ld8		r21 = [vp], 8		C			M
132	br		.LL00			C			B
133
134.Lb01:	add		n = -1, n		C			M I
135	logop(		r15, r10, r11)		C			M I
136  (p15)	br.cond.dpnt	.grt1			C			B
137	;;
138
139	notormov(	r9, r15)		C			M I
140	br		.Lcj1			C			B
141
142.grt1:	ld8		r16 = [up], 8		C			M
143	ld8		r20 = [vp], 8		C			M
144	;;
145	ld8		r17 = [up], 8		C			M
146	ld8		r21 = [vp], 8		C			M
147	mov.i		ar.lc = n		C			I0
148	;;
149	ld8		r18 = [up], 8		C			M
150	ld8		r22 = [vp], 8		C			M
151	;;
152	ld8		r19 = [up], 8		C			M
153	ld8		r23 = [vp], 8		C			M
154	br.cloop.dptk	.grt5			C			B
155	;;
156
157	logop(		r14, r16, r20)		C			M I
158	notormov(	r9, r15)		C			M I
159	br		.Lcj5			C			B
160
161.grt5:	logop(		r14, r16, r20)		C			M I
162	ld8		r16 = [up], 8		C			M
163	notormov(	r9, r15)		C			M I
164	ld8		r20 = [vp], 8		C			M
165	br		.LL01			C			B
166
167.Lb10:	ld8		r19 = [up], 8		C			M
168	ld8		r23 = [vp], 8		C			M
169  (p15)	br.cond.dpnt	.grt2			C			B
170
171	logop(		r14, r10, r11)		C			M I
172	;;
173	logop(		r15, r19, r23)		C			M I
174	notormov(	r8, r14)		C			M I
175	br		.Lcj2			C			B
176
177.grt2:	ld8		r16 = [up], 8		C			M
178	ld8		r20 = [vp], 8		C			M
179	add		n = -1, n		C			M I
180	;;
181	ld8		r17 = [up], 8		C			M
182	ld8		r21 = [vp], 8		C			M
183	logop(		r14, r10, r11)		C			M I
184	;;
185	ld8		r18 = [up], 8		C			M
186	ld8		r22 = [vp], 8		C			M
187	mov.i		ar.lc = n		C			I0
188	;;
189	logop(		r15, r19, r23)		C			M I
190	ld8		r19 = [up], 8		C			M
191	notormov(	r8, r14)		C			M I
192	ld8		r23 = [vp], 8		C			M
193	br.cloop.dptk	.Loop			C			B
194	br		.Lcj6			C			B
195
196.Lb11:	ld8		r18 = [up], 8		C			M
197	ld8		r22 = [vp], 8		C			M
198	add		n = -1, n		C			M I
199	;;
200	ld8		r19 = [up], 8		C			M
201	ld8		r23 = [vp], 8		C			M
202	logop(		r15, r10, r11)		C			M I
203  (p15)	br.cond.dpnt	.grt3			C			B
204	;;
205
206	logop(		r14, r18, r22)		C			M I
207	notormov(	r9, r15)		C			M I
208	br		.Lcj3			C			B
209
210.grt3:	ld8		r16 = [up], 8		C			M
211	ld8		r20 = [vp], 8		C			M
212	;;
213	ld8		r17 = [up], 8		C			M
214	ld8		r21 = [vp], 8		C			M
215	mov.i		ar.lc = n		C			I0
216	;;
217	logop(		r14, r18, r22)		C			M I
218	ld8		r18 = [up], 8		C			M
219	notormov(	r9, r15)		C			M I
220	ld8		r22 = [vp], 8		C			M
221	br		.LL11			C			B
222
223C *** MAIN LOOP START ***
224	ALIGN(32)
225.Loop:	st8		[rp] = r8, 8		C			M
226	logop(		r14, r16, r20)		C			M I
227	notormov(	r9, r15)		C			M I
228	ld8		r16 = [up], 8		C			M
229	ld8		r20 = [vp], 8		C			M
230	nop.b		0
231	;;
232.LL01:	st8		[rp] = r9, 8		C			M
233	logop(		r15, r17, r21)		C			M I
234	notormov(	r8, r14)		C			M I
235	ld8		r17 = [up], 8		C			M
236	ld8		r21 = [vp], 8		C			M
237	nop.b		0
238	;;
239.LL00:	st8		[rp] = r8, 8		C			M
240	logop(		r14, r18, r22)		C			M I
241	notormov(	r9, r15)		C			M I
242	ld8		r18 = [up], 8		C			M
243	ld8		r22 = [vp], 8		C			M
244	nop.b		0
245	;;
246.LL11:	st8		[rp] = r9, 8		C			M
247	logop(		r15, r19, r23)		C			M I
248	notormov(	r8, r14)		C			M I
249	ld8		r19 = [up], 8		C			M
250	ld8		r23 = [vp], 8		C			M
251	br.cloop.dptk	.Loop	;;		C			B
252C *** MAIN LOOP END ***
253
254.Lcj6:	st8		[rp] = r8, 8		C			M
255	logop(		r14, r16, r20)		C			M I
256	notormov(	r9, r15)		C			M I
257	;;
258.Lcj5:	st8		[rp] = r9, 8		C			M
259	logop(		r15, r17, r21)		C			M I
260	notormov(	r8, r14)		C			M I
261	;;
262.Lcj4:	st8		[rp] = r8, 8		C			M
263	logop(		r14, r18, r22)		C			M I
264	notormov(	r9, r15)		C			M I
265	;;
266.Lcj3:	st8		[rp] = r9, 8		C			M
267	logop(		r15, r19, r23)		C			M I
268	notormov(	r8, r14)		C			M I
269	;;
270.Lcj2:	st8		[rp] = r8, 8		C			M
271	notormov(	r9, r15)		C			M I
272	;;
273.Lcj1:	st8		[rp] = r9, 8		C			M
274	mov.i		ar.lc = r2		C			I0
275	br.ret.sptk.many b0			C			B
276EPILOGUE()
277ASM_END()
278