1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * arch/ia64/lib/xor.S
4 *
5 * Optimized RAID-5 checksumming functions for IA-64.
6 */
7
8#include <asm/asmmacro.h>
9#include <asm/export.h>
10
11GLOBAL_ENTRY(xor_ia64_2)
12	.prologue
13	.fframe 0
14	.save ar.pfs, r31
15	alloc r31 = ar.pfs, 3, 0, 13, 16
16	.save ar.lc, r30
17	mov r30 = ar.lc
18	.save pr, r29
19	mov r29 = pr
20	;;
21	.body
22	mov r8 = in1
23	mov ar.ec = 6 + 2
24	shr in0 = in0, 3
25	;;
26	adds in0 = -1, in0
27	mov r16 = in1
28	mov r17 = in2
29	;;
30	mov ar.lc = in0
31	mov pr.rot = 1 << 16
32	;;
33	.rotr s1[6+1], s2[6+1], d[2]
34	.rotp p[6+2]
350:
36(p[0])	ld8.nta s1[0] = [r16], 8
37(p[0])	ld8.nta s2[0] = [r17], 8
38(p[6])	xor d[0] = s1[6], s2[6]
39(p[6+1])st8.nta [r8] = d[1], 8
40	nop.f 0
41	br.ctop.dptk.few 0b
42	;;
43	mov ar.lc = r30
44	mov pr = r29, -1
45	br.ret.sptk.few rp
46END(xor_ia64_2)
47EXPORT_SYMBOL(xor_ia64_2)
48
49GLOBAL_ENTRY(xor_ia64_3)
50	.prologue
51	.fframe 0
52	.save ar.pfs, r31
53	alloc r31 = ar.pfs, 4, 0, 20, 24
54	.save ar.lc, r30
55	mov r30 = ar.lc
56	.save pr, r29
57	mov r29 = pr
58	;;
59	.body
60	mov r8 = in1
61	mov ar.ec = 6 + 2
62	shr in0 = in0, 3
63	;;
64	adds in0 = -1, in0
65	mov r16 = in1
66	mov r17 = in2
67	;;
68	mov r18 = in3
69	mov ar.lc = in0
70	mov pr.rot = 1 << 16
71	;;
72	.rotr s1[6+1], s2[6+1], s3[6+1], d[2]
73	.rotp p[6+2]
740:
75(p[0])	ld8.nta s1[0] = [r16], 8
76(p[0])	ld8.nta s2[0] = [r17], 8
77(p[6])	xor d[0] = s1[6], s2[6]
78	;;
79(p[0])	ld8.nta s3[0] = [r18], 8
80(p[6+1])st8.nta [r8] = d[1], 8
81(p[6])	xor d[0] = d[0], s3[6]
82	br.ctop.dptk.few 0b
83	;;
84	mov ar.lc = r30
85	mov pr = r29, -1
86	br.ret.sptk.few rp
87END(xor_ia64_3)
88EXPORT_SYMBOL(xor_ia64_3)
89
90GLOBAL_ENTRY(xor_ia64_4)
91	.prologue
92	.fframe 0
93	.save ar.pfs, r31
94	alloc r31 = ar.pfs, 5, 0, 27, 32
95	.save ar.lc, r30
96	mov r30 = ar.lc
97	.save pr, r29
98	mov r29 = pr
99	;;
100	.body
101	mov r8 = in1
102	mov ar.ec = 6 + 2
103	shr in0 = in0, 3
104	;;
105	adds in0 = -1, in0
106	mov r16 = in1
107	mov r17 = in2
108	;;
109	mov r18 = in3
110	mov ar.lc = in0
111	mov pr.rot = 1 << 16
112	mov r19 = in4
113	;;
114	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
115	.rotp p[6+2]
1160:
117(p[0])	ld8.nta s1[0] = [r16], 8
118(p[0])	ld8.nta s2[0] = [r17], 8
119(p[6])	xor d[0] = s1[6], s2[6]
120(p[0])	ld8.nta s3[0] = [r18], 8
121(p[0])	ld8.nta s4[0] = [r19], 8
122(p[6])	xor r20 = s3[6], s4[6]
123	;;
124(p[6+1])st8.nta [r8] = d[1], 8
125(p[6])	xor d[0] = d[0], r20
126	br.ctop.dptk.few 0b
127	;;
128	mov ar.lc = r30
129	mov pr = r29, -1
130	br.ret.sptk.few rp
131END(xor_ia64_4)
132EXPORT_SYMBOL(xor_ia64_4)
133
134GLOBAL_ENTRY(xor_ia64_5)
135	.prologue
136	.fframe 0
137	.save ar.pfs, r31
138	alloc r31 = ar.pfs, 6, 0, 34, 40
139	.save ar.lc, r30
140	mov r30 = ar.lc
141	.save pr, r29
142	mov r29 = pr
143	;;
144	.body
145	mov r8 = in1
146	mov ar.ec = 6 + 2
147	shr in0 = in0, 3
148	;;
149	adds in0 = -1, in0
150	mov r16 = in1
151	mov r17 = in2
152	;;
153	mov r18 = in3
154	mov ar.lc = in0
155	mov pr.rot = 1 << 16
156	mov r19 = in4
157	mov r20 = in5
158	;;
159	.rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
160	.rotp p[6+2]
1610:
162(p[0])	ld8.nta s1[0] = [r16], 8
163(p[0])	ld8.nta s2[0] = [r17], 8
164(p[6])	xor d[0] = s1[6], s2[6]
165(p[0])	ld8.nta s3[0] = [r18], 8
166(p[0])	ld8.nta s4[0] = [r19], 8
167(p[6])	xor r21 = s3[6], s4[6]
168	;;
169(p[0])	ld8.nta s5[0] = [r20], 8
170(p[6+1])st8.nta [r8] = d[1], 8
171(p[6])	xor d[0] = d[0], r21
172	;;
173(p[6])	  xor d[0] = d[0], s5[6]
174	nop.f 0
175	br.ctop.dptk.few 0b
176	;;
177	mov ar.lc = r30
178	mov pr = r29, -1
179	br.ret.sptk.few rp
180END(xor_ia64_5)
181EXPORT_SYMBOL(xor_ia64_5)
182