1#include <sparc_arch.h>
2
3#ifdef __arch64__
4.register	%g2,#scratch
5.register	%g3,#scratch
6#endif
7
8#ifdef __PIC__
9SPARC_PIC_THUNK(%g1)
10#endif
11
12.globl	bn_GF2m_mul_2x2
13.align	16
14bn_GF2m_mul_2x2:
15        SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
16        ld	[%g1+0],%g1             	! OPENSSL_sparcv9cap_P[0]
17
18        andcc	%g1, SPARCV9_VIS3, %g0
19        bz,pn	%icc,.Lsoftware
20        nop
21
22	sllx	%o1, 32, %o1
23	sllx	%o3, 32, %o3
24	or	%o2, %o1, %o1
25	or	%o4, %o3, %o3
26	.word	0x95b262ab			! xmulx   %o1, %o3, %o2
27	.word	0x99b262cb			! xmulxhi %o1, %o3, %o4
28	srlx	%o2, 32, %o1			! 13 cycles later
29	st	%o2, [%o0+0]
30	st	%o1, [%o0+4]
31	srlx	%o4, 32, %o3
32	st	%o4, [%o0+8]
33	retl
34	st	%o3, [%o0+12]
35
36.align	16
37.Lsoftware:
38	save	%sp,-STACK_FRAME-128,%sp
39
40	sllx	%i1,32,%g1
41	mov	-1,%o4
42	sllx	%i3,32,%o7
43	or	%i2,%g1,%g1
44	srlx	%o4,1,%o5			! 0x7fff...
45	or	%i4,%o7,%o7
46	srlx	%o4,2,%o4			! 0x3fff...
47	add	%sp,STACK_BIAS+STACK_FRAME,%l0
48
49	sllx	%g1,2,%o2
50	mov	%g1,%o0
51	sllx	%g1,1,%o1
52
53	srax	%o2,63,%g5			! broadcast 61st bit
54	and	%o5,%o2,%o2			! (a<<2)&0x7fff...
55	srlx	%o5,2,%o5
56	srax	%o1,63,%g4			! broadcast 62nd bit
57	and	%o4,%o1,%o1			! (a<<1)&0x3fff...
58	srax	%o0,63,%g1			! broadcast 63rd bit
59	and	%o5,%o0,%o0			! (a<<0)&0x1fff...
60
61	sllx	%o0,3,%o3
62	and	%o7,%g1,%g1
63	and	%o7,%g4,%g4
64	and	%o7,%g5,%g5
65
66	stx	%g0,[%l0+0*8]			! tab[0]=0
67	xor	%o0,%o1,%o4
68	stx	%o0,[%l0+1*8]			! tab[1]=a1
69	stx	%o1,[%l0+2*8]			! tab[2]=a2
70	 xor	%o2,%o3,%o5
71	stx	%o4,[%l0+3*8]			! tab[3]=a1^a2
72	 xor	%o2,%o0,%o0
73
74	stx	%o2,[%l0+4*8]			! tab[4]=a4
75	xor	%o2,%o1,%o1
76	stx	%o0,[%l0+5*8]			! tab[5]=a1^a4
77	xor	%o2,%o4,%o4
78	stx	%o1,[%l0+6*8]			! tab[6]=a2^a4
79	 xor	%o5,%o0,%o0
80	stx	%o4,[%l0+7*8]			! tab[7]=a1^a2^a4
81	 xor	%o5,%o1,%o1
82
83	stx	%o3,[%l0+8*8]			! tab[8]=a8
84	xor	%o5,%o4,%o4
85	stx	%o0,[%l0+9*8]			! tab[9]=a1^a8
86	 xor	%o2,%o0,%o0
87	stx	%o1,[%l0+10*8]			! tab[10]=a2^a8
88	 xor	%o2,%o1,%o1
89	stx	%o4,[%l0+11*8]		! tab[11]=a1^a2^a8
90
91	xor	%o2,%o4,%o4
92	stx	%o5,[%l0+12*8]		! tab[12]=a4^a8
93	 srlx	%g1,1,%o3
94	stx	%o0,[%l0+13*8]			! tab[13]=a1^a4^a8
95	 sllx	%g1,63,%g1
96	stx	%o1,[%l0+14*8]			! tab[14]=a2^a4^a8
97	 srlx	%g4,2,%g2
98	stx	%o4,[%l0+15*8]		! tab[15]=a1^a2^a4^a8
99
100	sllx	%g4,62,%o0
101	 sllx	%o7,3,%g4
102	srlx	%g5,3,%g3
103	 and	%g4,120,%g4
104	sllx	%g5,61,%o1
105	 ldx	[%l0+%g4],%g4
106	 srlx	%o7,4-3,%g5
107	xor	%g2,%o3,%o3
108	 and	%g5,120,%g5
109	xor	%o0,%g1,%g1
110	 ldx	[%l0+%g5],%g5
111	xor	%g3,%o3,%o3
112
113	xor	%g4,%g1,%g1
114	srlx	%o7,8-3,%g4
115	 xor	%o1,%g1,%g1
116	and	%g4,120,%g4
117	sllx	%g5,4,%g2
118	ldx	[%l0+%g4],%g4
119	srlx	%g5,60,%g3
120	xor	%g2,%g1,%g1
121	srlx	%o7,12-3,%g5
122	xor	%g3,%o3,%o3
123	and	%g5,120,%g5
124	sllx	%g4,8,%g3
125	ldx	[%l0+%g5],%g5
126	srlx	%g4,56,%g2
127	xor	%g3,%g1,%g1
128	srlx	%o7,16-3,%g4
129	xor	%g2,%o3,%o3
130	and	%g4,120,%g4
131	sllx	%g5,12,%g2
132	ldx	[%l0+%g4],%g4
133	srlx	%g5,52,%g3
134	xor	%g2,%g1,%g1
135	srlx	%o7,20-3,%g5
136	xor	%g3,%o3,%o3
137	and	%g5,120,%g5
138	sllx	%g4,16,%g3
139	ldx	[%l0+%g5],%g5
140	srlx	%g4,48,%g2
141	xor	%g3,%g1,%g1
142	srlx	%o7,24-3,%g4
143	xor	%g2,%o3,%o3
144	and	%g4,120,%g4
145	sllx	%g5,20,%g2
146	ldx	[%l0+%g4],%g4
147	srlx	%g5,44,%g3
148	xor	%g2,%g1,%g1
149	srlx	%o7,28-3,%g5
150	xor	%g3,%o3,%o3
151	and	%g5,120,%g5
152	sllx	%g4,24,%g3
153	ldx	[%l0+%g5],%g5
154	srlx	%g4,40,%g2
155	xor	%g3,%g1,%g1
156	srlx	%o7,32-3,%g4
157	xor	%g2,%o3,%o3
158	and	%g4,120,%g4
159	sllx	%g5,28,%g2
160	ldx	[%l0+%g4],%g4
161	srlx	%g5,36,%g3
162	xor	%g2,%g1,%g1
163	srlx	%o7,36-3,%g5
164	xor	%g3,%o3,%o3
165	and	%g5,120,%g5
166	sllx	%g4,32,%g3
167	ldx	[%l0+%g5],%g5
168	srlx	%g4,32,%g2
169	xor	%g3,%g1,%g1
170	srlx	%o7,40-3,%g4
171	xor	%g2,%o3,%o3
172	and	%g4,120,%g4
173	sllx	%g5,36,%g2
174	ldx	[%l0+%g4],%g4
175	srlx	%g5,28,%g3
176	xor	%g2,%g1,%g1
177	srlx	%o7,44-3,%g5
178	xor	%g3,%o3,%o3
179	and	%g5,120,%g5
180	sllx	%g4,40,%g3
181	ldx	[%l0+%g5],%g5
182	srlx	%g4,24,%g2
183	xor	%g3,%g1,%g1
184	srlx	%o7,48-3,%g4
185	xor	%g2,%o3,%o3
186	and	%g4,120,%g4
187	sllx	%g5,44,%g2
188	ldx	[%l0+%g4],%g4
189	srlx	%g5,20,%g3
190	xor	%g2,%g1,%g1
191	srlx	%o7,52-3,%g5
192	xor	%g3,%o3,%o3
193	and	%g5,120,%g5
194	sllx	%g4,48,%g3
195	ldx	[%l0+%g5],%g5
196	srlx	%g4,16,%g2
197	xor	%g3,%g1,%g1
198	srlx	%o7,56-3,%g4
199	xor	%g2,%o3,%o3
200	and	%g4,120,%g4
201	sllx	%g5,52,%g2
202	ldx	[%l0+%g4],%g4
203	srlx	%g5,12,%g3
204	xor	%g2,%g1,%g1
205	srlx	%o7,60-3,%g5
206	xor	%g3,%o3,%o3
207	and	%g5,120,%g5
208	sllx	%g4,56,%g3
209	ldx	[%l0+%g5],%g5
210	srlx	%g4,8,%g2
211	xor	%g3,%g1,%g1
212
213	sllx	%g5,60,%g3
214	 xor	%g2,%o3,%o3
215	srlx	%g5,4,%g2
216	xor	%g3,%g1,%g1
217	xor	%g2,%o3,%o3
218
219	srlx	%g1,32,%i1
220	st	%g1,[%i0+0]
221	st	%i1,[%i0+4]
222	srlx	%o3,32,%i2
223	st	%o3,[%i0+8]
224	st	%i2,[%i0+12]
225
226	ret
227	restore
228.type	bn_GF2m_mul_2x2,#function
229.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
230.asciz	"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro@openssl.org>"
231.align	4
232