1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6#ifdef	__arch64__
7.register	%g2,#scratch
8.register	%g3,#scratch
9#endif
10
11.section	".text",#alloc,#execinstr
12
13#ifdef	__PIC__
14SPARC_PIC_THUNK(%g1)
15#endif
16.globl	bn_mul_mont_t4_8
17.align	32
18bn_mul_mont_t4_8:
19#ifdef	__arch64__
20	mov	0,%g5
21	mov	-128,%g4
22#elif defined(SPARCV9_64BIT_STACK)
23	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
24	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
25	mov	-2047,%g4
26	and	%g1,SPARCV9_64BIT_STACK,%g1
27	movrz	%g1,0,%g4
28	mov	-1,%g5
29	add	%g4,-128,%g4
30#else
31	mov	-1,%g5
32	mov	-128,%g4
33#endif
34	sllx	%g5,32,%g5
35	save	%sp,%g4,%sp
36#ifndef	__arch64__
37	save	%sp,-128,%sp	! warm it up
38	save	%sp,-128,%sp
39	save	%sp,-128,%sp
40	save	%sp,-128,%sp
41	save	%sp,-128,%sp
42	save	%sp,-128,%sp
43	restore
44	restore
45	restore
46	restore
47	restore
48	restore
49#endif
50	and	%sp,1,%g4
51	or	%g5,%fp,%fp
52	or	%g4,%g5,%g5
53
54	! copy arguments to global registers
55	mov	%i0,%g1
56	mov	%i1,%g2
57	mov	%i2,%g3
58	mov	%i3,%g4
59	ld	[%i4+0],%f1	! load *n0
60	ld	[%i4+4],%f0
61	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
62	save	%sp,-128,%sp;		or	%g5,%fp,%fp
63	ld	[%g2+0*8+0],%l1
64	ld	[%g2+0*8+4],%l0
65	sllx	%l0,32,%l0
66	or	%l1,%l0,%l0
67	ld	[%g2+1*8+0],%l2
68	ld	[%g2+1*8+4],%l1
69	sllx	%l1,32,%l1
70	or	%l2,%l1,%l1
71	ld	[%g2+2*8+0],%l3
72	ld	[%g2+2*8+4],%l2
73	sllx	%l2,32,%l2
74	or	%l3,%l2,%l2
75	ld	[%g2+3*8+0],%l4
76	ld	[%g2+3*8+4],%l3
77	sllx	%l3,32,%l3
78	or	%l4,%l3,%l3
79	ld	[%g2+4*8+0],%l5
80	ld	[%g2+4*8+4],%l4
81	sllx	%l4,32,%l4
82	or	%l5,%l4,%l4
83	ld	[%g2+5*8+0],%l6
84	ld	[%g2+5*8+4],%l5
85	sllx	%l5,32,%l5
86	or	%l6,%l5,%l5
87	ld	[%g2+6*8+0],%l7
88	ld	[%g2+6*8+4],%l6
89	sllx	%l6,32,%l6
90	or	%l7,%l6,%l6
91	ld	[%g2+7*8+0],%o0
92	ld	[%g2+7*8+4],%l7
93	sllx	%l7,32,%l7
94	or	%o0,%l7,%l7
95	save	%sp,-128,%sp;		or	%g5,%fp,%fp
96	ld	[%g4+0*8+0],%l1
97	ld	[%g4+0*8+4],%l0
98	sllx	%l0,32,%l0
99	or	%l1,%l0,%l0
100	ld	[%g4+1*8+0],%l2
101	ld	[%g4+1*8+4],%l1
102	sllx	%l1,32,%l1
103	or	%l2,%l1,%l1
104	ld	[%g4+2*8+0],%l3
105	ld	[%g4+2*8+4],%l2
106	sllx	%l2,32,%l2
107	or	%l3,%l2,%l2
108	ld	[%g4+3*8+0],%l4
109	ld	[%g4+3*8+4],%l3
110	sllx	%l3,32,%l3
111	or	%l4,%l3,%l3
112	ld	[%g4+4*8+0],%l5
113	ld	[%g4+4*8+4],%l4
114	sllx	%l4,32,%l4
115	or	%l5,%l4,%l4
116	ld	[%g4+5*8+0],%l6
117	ld	[%g4+5*8+4],%l5
118	sllx	%l5,32,%l5
119	or	%l6,%l5,%l5
120	ld	[%g4+6*8+0],%l7
121	ld	[%g4+6*8+4],%l6
122	sllx	%l6,32,%l6
123	or	%l7,%l6,%l6
124	ld	[%g4+7*8+0],%o0
125	ld	[%g4+7*8+4],%l7
126	sllx	%l7,32,%l7
127	or	%o0,%l7,%l7
128	save	%sp,-128,%sp;		or	%g5,%fp,%fp
129	save	%sp,-128,%sp;		or	%g5,%fp,%fp
130	cmp	%g2,%g3
131	be	SIZE_T_CC,.Lmsquare_8
132	nop
133	save	%sp,-128,%sp;		or	%g5,%fp,%fp
134	ld	[%g3+0*8+0],%i1
135	ld	[%g3+0*8+4],%i0
136	sllx	%i0,32,%i0
137	or	%i1,%i0,%i0
138	ld	[%g3+1*8+0],%i2
139	ld	[%g3+1*8+4],%i1
140	sllx	%i1,32,%i1
141	or	%i2,%i1,%i1
142	ld	[%g3+2*8+0],%i3
143	ld	[%g3+2*8+4],%i2
144	sllx	%i2,32,%i2
145	or	%i3,%i2,%i2
146	ld	[%g3+3*8+0],%i4
147	ld	[%g3+3*8+4],%i3
148	sllx	%i3,32,%i3
149	or	%i4,%i3,%i3
150	ld	[%g3+4*8+0],%i5
151	ld	[%g3+4*8+4],%i4
152	sllx	%i4,32,%i4
153	or	%i5,%i4,%i4
154	ld	[%g3+5*8+0],%l0
155	ld	[%g3+5*8+4],%i5
156	sllx	%i5,32,%i5
157	or	%l0,%i5,%i5
158	ld	[%g3+6*8+0],%l1
159	ld	[%g3+6*8+4],%l0
160	sllx	%l0,32,%l0
161	or	%l1,%l0,%l0
162	ld	[%g3+7*8+0],%l2
163	ld	[%g3+7*8+4],%l1
164	sllx	%l1,32,%l1
165	or	%l2,%l1,%l1
166	save	%sp,-128,%sp;		or	%g5,%fp,%fp
167	.word	0x81b02920+8-1	! montmul	8-1
168.Lmresume_8:
169	fbu,pn	%fcc3,.Lmabort_8
170#ifndef	__arch64__
171	and	%fp,%g5,%g5
172	brz,pn	%g5,.Lmabort_8
173#endif
174	nop
175#ifdef	__arch64__
176	restore
177	restore
178	restore
179	restore
180	restore
181#else
182	restore;		and	%fp,%g5,%g5
183	restore;		and	%fp,%g5,%g5
184	restore;		and	%fp,%g5,%g5
185	restore;		and	%fp,%g5,%g5
186	 brz,pn	%g5,.Lmabort1_8
187	restore
188#endif
189	.word	0x81b02310 !movxtod	%l0,%f0
190	.word	0x85b02311 !movxtod	%l1,%f2
191	.word	0x89b02312 !movxtod	%l2,%f4
192	.word	0x8db02313 !movxtod	%l3,%f6
193	.word	0x91b02314 !movxtod	%l4,%f8
194	.word	0x95b02315 !movxtod	%l5,%f10
195	.word	0x99b02316 !movxtod	%l6,%f12
196	.word	0x9db02317 !movxtod	%l7,%f14
197#ifdef	__arch64__
198	restore
199#else
200	 and	%fp,%g5,%g5
201	restore
202	 and	%g5,1,%o7
203	 and	%fp,%g5,%g5
204	 srl	%fp,0,%fp		! just in case?
205	 or	%o7,%g5,%g5
206	brz,a,pn %g5,.Lmdone_8
207	mov	0,%i0		! return failure
208#endif
209	st	%f1,[%g1+0*8+0]
210	st	%f0,[%g1+0*8+4]
211	st	%f3,[%g1+1*8+0]
212	st	%f2,[%g1+1*8+4]
213	st	%f5,[%g1+2*8+0]
214	st	%f4,[%g1+2*8+4]
215	st	%f7,[%g1+3*8+0]
216	st	%f6,[%g1+3*8+4]
217	st	%f9,[%g1+4*8+0]
218	st	%f8,[%g1+4*8+4]
219	st	%f11,[%g1+5*8+0]
220	st	%f10,[%g1+5*8+4]
221	st	%f13,[%g1+6*8+0]
222	st	%f12,[%g1+6*8+4]
223	st	%f15,[%g1+7*8+0]
224	st	%f14,[%g1+7*8+4]
225	mov	1,%i0		! return success
226.Lmdone_8:
227	ret
228	restore
229
230.Lmabort_8:
231	restore
232	restore
233	restore
234	restore
235	restore
236.Lmabort1_8:
237	restore
238
239	mov	0,%i0		! return failure
240	ret
241	restore
242
243.align	32
244.Lmsquare_8:
245	save	%sp,-128,%sp;		or	%g5,%fp,%fp
246	save	%sp,-128,%sp;		or	%g5,%fp,%fp
247	.word   0x81b02940+8-1	! montsqr	8-1
248	ba	.Lmresume_8
249	nop
250.type	bn_mul_mont_t4_8, #function
251.size	bn_mul_mont_t4_8, .-bn_mul_mont_t4_8
252.globl	bn_mul_mont_t4_16
253.align	32
254bn_mul_mont_t4_16:
255#ifdef	__arch64__
256	mov	0,%g5
257	mov	-128,%g4
258#elif defined(SPARCV9_64BIT_STACK)
259	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
260	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
261	mov	-2047,%g4
262	and	%g1,SPARCV9_64BIT_STACK,%g1
263	movrz	%g1,0,%g4
264	mov	-1,%g5
265	add	%g4,-128,%g4
266#else
267	mov	-1,%g5
268	mov	-128,%g4
269#endif
270	sllx	%g5,32,%g5
271	save	%sp,%g4,%sp
272#ifndef	__arch64__
273	save	%sp,-128,%sp	! warm it up
274	save	%sp,-128,%sp
275	save	%sp,-128,%sp
276	save	%sp,-128,%sp
277	save	%sp,-128,%sp
278	save	%sp,-128,%sp
279	restore
280	restore
281	restore
282	restore
283	restore
284	restore
285#endif
286	and	%sp,1,%g4
287	or	%g5,%fp,%fp
288	or	%g4,%g5,%g5
289
290	! copy arguments to global registers
291	mov	%i0,%g1
292	mov	%i1,%g2
293	mov	%i2,%g3
294	mov	%i3,%g4
295	ld	[%i4+0],%f1	! load *n0
296	ld	[%i4+4],%f0
297	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
298	save	%sp,-128,%sp;		or	%g5,%fp,%fp
299	ld	[%g2+0*8+0],%l1
300	ld	[%g2+0*8+4],%l0
301	sllx	%l0,32,%l0
302	or	%l1,%l0,%l0
303	ld	[%g2+1*8+0],%l2
304	ld	[%g2+1*8+4],%l1
305	sllx	%l1,32,%l1
306	or	%l2,%l1,%l1
307	ld	[%g2+2*8+0],%l3
308	ld	[%g2+2*8+4],%l2
309	sllx	%l2,32,%l2
310	or	%l3,%l2,%l2
311	ld	[%g2+3*8+0],%l4
312	ld	[%g2+3*8+4],%l3
313	sllx	%l3,32,%l3
314	or	%l4,%l3,%l3
315	ld	[%g2+4*8+0],%l5
316	ld	[%g2+4*8+4],%l4
317	sllx	%l4,32,%l4
318	or	%l5,%l4,%l4
319	ld	[%g2+5*8+0],%l6
320	ld	[%g2+5*8+4],%l5
321	sllx	%l5,32,%l5
322	or	%l6,%l5,%l5
323	ld	[%g2+6*8+0],%l7
324	ld	[%g2+6*8+4],%l6
325	sllx	%l6,32,%l6
326	or	%l7,%l6,%l6
327	ld	[%g2+7*8+0],%o0
328	ld	[%g2+7*8+4],%l7
329	sllx	%l7,32,%l7
330	or	%o0,%l7,%l7
331	ld	[%g2+8*8+0],%o1
332	ld	[%g2+8*8+4],%o0
333	sllx	%o0,32,%o0
334	or	%o1,%o0,%o0
335	ld	[%g2+9*8+0],%o2
336	ld	[%g2+9*8+4],%o1
337	sllx	%o1,32,%o1
338	or	%o2,%o1,%o1
339	ld	[%g2+10*8+0],%o3
340	ld	[%g2+10*8+4],%o2
341	sllx	%o2,32,%o2
342	or	%o3,%o2,%o2
343	ld	[%g2+11*8+0],%o4
344	ld	[%g2+11*8+4],%o3
345	sllx	%o3,32,%o3
346	or	%o4,%o3,%o3
347	ld	[%g2+12*8+0],%o5
348	ld	[%g2+12*8+4],%o4
349	sllx	%o4,32,%o4
350	or	%o5,%o4,%o4
351	ld	[%g2+13*8+0],%o7
352	ld	[%g2+13*8+4],%o5
353	sllx	%o5,32,%o5
354	or	%o7,%o5,%o5
355	ld	[%g2+14*8+0],%f5
356	ld	[%g2+14*8+4],%f4
357	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
358	ld	[%g2+15*8+0],%f7
359	ld	[%g2+15*8+4],%f6
360	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
361	save	%sp,-128,%sp;		or	%g5,%fp,%fp
362	ld	[%g4+0*8+0],%l1
363	ld	[%g4+0*8+4],%l0
364	sllx	%l0,32,%l0
365	or	%l1,%l0,%l0
366	ld	[%g4+1*8+0],%l2
367	ld	[%g4+1*8+4],%l1
368	sllx	%l1,32,%l1
369	or	%l2,%l1,%l1
370	ld	[%g4+2*8+0],%l3
371	ld	[%g4+2*8+4],%l2
372	sllx	%l2,32,%l2
373	or	%l3,%l2,%l2
374	ld	[%g4+3*8+0],%l4
375	ld	[%g4+3*8+4],%l3
376	sllx	%l3,32,%l3
377	or	%l4,%l3,%l3
378	ld	[%g4+4*8+0],%l5
379	ld	[%g4+4*8+4],%l4
380	sllx	%l4,32,%l4
381	or	%l5,%l4,%l4
382	ld	[%g4+5*8+0],%l6
383	ld	[%g4+5*8+4],%l5
384	sllx	%l5,32,%l5
385	or	%l6,%l5,%l5
386	ld	[%g4+6*8+0],%l7
387	ld	[%g4+6*8+4],%l6
388	sllx	%l6,32,%l6
389	or	%l7,%l6,%l6
390	ld	[%g4+7*8+0],%o0
391	ld	[%g4+7*8+4],%l7
392	sllx	%l7,32,%l7
393	or	%o0,%l7,%l7
394	ld	[%g4+8*8+0],%o1
395	ld	[%g4+8*8+4],%o0
396	sllx	%o0,32,%o0
397	or	%o1,%o0,%o0
398	ld	[%g4+9*8+0],%o2
399	ld	[%g4+9*8+4],%o1
400	sllx	%o1,32,%o1
401	or	%o2,%o1,%o1
402	ld	[%g4+10*8+0],%o3
403	ld	[%g4+10*8+4],%o2
404	sllx	%o2,32,%o2
405	or	%o3,%o2,%o2
406	ld	[%g4+11*8+0],%o4
407	ld	[%g4+11*8+4],%o3
408	sllx	%o3,32,%o3
409	or	%o4,%o3,%o3
410	ld	[%g4+12*8+0],%o5
411	ld	[%g4+12*8+4],%o4
412	sllx	%o4,32,%o4
413	or	%o5,%o4,%o4
414	ld	[%g4+13*8+0],%o7
415	ld	[%g4+13*8+4],%o5
416	sllx	%o5,32,%o5
417	or	%o7,%o5,%o5
418	save	%sp,-128,%sp;		or	%g5,%fp,%fp
419	ld	[%g4+14*8+0],%l1
420	ld	[%g4+14*8+4],%l0
421	sllx	%l0,32,%l0
422	or	%l1,%l0,%l0
423	ld	[%g4+15*8+0],%l2
424	ld	[%g4+15*8+4],%l1
425	sllx	%l1,32,%l1
426	or	%l2,%l1,%l1
427	save	%sp,-128,%sp;		or	%g5,%fp,%fp
428	cmp	%g2,%g3
429	be	SIZE_T_CC,.Lmsquare_16
430	nop
431	save	%sp,-128,%sp;		or	%g5,%fp,%fp
432	ld	[%g3+0*8+0],%i1
433	ld	[%g3+0*8+4],%i0
434	sllx	%i0,32,%i0
435	or	%i1,%i0,%i0
436	ld	[%g3+1*8+0],%i2
437	ld	[%g3+1*8+4],%i1
438	sllx	%i1,32,%i1
439	or	%i2,%i1,%i1
440	ld	[%g3+2*8+0],%i3
441	ld	[%g3+2*8+4],%i2
442	sllx	%i2,32,%i2
443	or	%i3,%i2,%i2
444	ld	[%g3+3*8+0],%i4
445	ld	[%g3+3*8+4],%i3
446	sllx	%i3,32,%i3
447	or	%i4,%i3,%i3
448	ld	[%g3+4*8+0],%i5
449	ld	[%g3+4*8+4],%i4
450	sllx	%i4,32,%i4
451	or	%i5,%i4,%i4
452	ld	[%g3+5*8+0],%l0
453	ld	[%g3+5*8+4],%i5
454	sllx	%i5,32,%i5
455	or	%l0,%i5,%i5
456	ld	[%g3+6*8+0],%l1
457	ld	[%g3+6*8+4],%l0
458	sllx	%l0,32,%l0
459	or	%l1,%l0,%l0
460	ld	[%g3+7*8+0],%l2
461	ld	[%g3+7*8+4],%l1
462	sllx	%l1,32,%l1
463	or	%l2,%l1,%l1
464	ld	[%g3+8*8+0],%l3
465	ld	[%g3+8*8+4],%l2
466	sllx	%l2,32,%l2
467	or	%l3,%l2,%l2
468	ld	[%g3+9*8+0],%l4
469	ld	[%g3+9*8+4],%l3
470	sllx	%l3,32,%l3
471	or	%l4,%l3,%l3
472	ld	[%g3+10*8+0],%l5
473	ld	[%g3+10*8+4],%l4
474	sllx	%l4,32,%l4
475	or	%l5,%l4,%l4
476	ld	[%g3+11*8+0],%l6
477	ld	[%g3+11*8+4],%l5
478	sllx	%l5,32,%l5
479	or	%l6,%l5,%l5
480	ld	[%g3+12*8+0],%l7
481	ld	[%g3+12*8+4],%l6
482	sllx	%l6,32,%l6
483	or	%l7,%l6,%l6
484	ld	[%g3+13*8+0],%o7
485	ld	[%g3+13*8+4],%l7
486	sllx	%l7,32,%l7
487	or	%o7,%l7,%l7
488	save	%sp,-128,%sp;		or	%g5,%fp,%fp
489	ld	[%g3+14*8+0],%i1
490	ld	[%g3+14*8+4],%i0
491	sllx	%i0,32,%i0
492	or	%i1,%i0,%i0
493	ld	[%g3+15*8+0],%o7
494	ld	[%g3+15*8+4],%i1
495	sllx	%i1,32,%i1
496	or	%o7,%i1,%i1
497	.word	0x81b02920+16-1	! montmul	16-1
498.Lmresume_16:
499	fbu,pn	%fcc3,.Lmabort_16
500#ifndef	__arch64__
501	and	%fp,%g5,%g5
502	brz,pn	%g5,.Lmabort_16
503#endif
504	nop
505#ifdef	__arch64__
506	restore
507	restore
508	restore
509	restore
510	restore
511#else
512	restore;		and	%fp,%g5,%g5
513	restore;		and	%fp,%g5,%g5
514	restore;		and	%fp,%g5,%g5
515	restore;		and	%fp,%g5,%g5
516	 brz,pn	%g5,.Lmabort1_16
517	restore
518#endif
519	.word	0x81b02310 !movxtod	%l0,%f0
520	.word	0x85b02311 !movxtod	%l1,%f2
521	.word	0x89b02312 !movxtod	%l2,%f4
522	.word	0x8db02313 !movxtod	%l3,%f6
523	.word	0x91b02314 !movxtod	%l4,%f8
524	.word	0x95b02315 !movxtod	%l5,%f10
525	.word	0x99b02316 !movxtod	%l6,%f12
526	.word	0x9db02317 !movxtod	%l7,%f14
527	.word	0xa1b02308 !movxtod	%o0,%f16
528	.word	0xa5b02309 !movxtod	%o1,%f18
529	.word	0xa9b0230a !movxtod	%o2,%f20
530	.word	0xadb0230b !movxtod	%o3,%f22
531	.word	0xbbb0230c !movxtod	%o4,%f60
532	.word	0xbfb0230d !movxtod	%o5,%f62
533#ifdef	__arch64__
534	restore
535#else
536	 and	%fp,%g5,%g5
537	restore
538	 and	%g5,1,%o7
539	 and	%fp,%g5,%g5
540	 srl	%fp,0,%fp		! just in case?
541	 or	%o7,%g5,%g5
542	brz,a,pn %g5,.Lmdone_16
543	mov	0,%i0		! return failure
544#endif
545	st	%f1,[%g1+0*8+0]
546	st	%f0,[%g1+0*8+4]
547	st	%f3,[%g1+1*8+0]
548	st	%f2,[%g1+1*8+4]
549	st	%f5,[%g1+2*8+0]
550	st	%f4,[%g1+2*8+4]
551	st	%f7,[%g1+3*8+0]
552	st	%f6,[%g1+3*8+4]
553	st	%f9,[%g1+4*8+0]
554	st	%f8,[%g1+4*8+4]
555	st	%f11,[%g1+5*8+0]
556	st	%f10,[%g1+5*8+4]
557	st	%f13,[%g1+6*8+0]
558	st	%f12,[%g1+6*8+4]
559	st	%f15,[%g1+7*8+0]
560	st	%f14,[%g1+7*8+4]
561	st	%f17,[%g1+8*8+0]
562	st	%f16,[%g1+8*8+4]
563	st	%f19,[%g1+9*8+0]
564	st	%f18,[%g1+9*8+4]
565	st	%f21,[%g1+10*8+0]
566	st	%f20,[%g1+10*8+4]
567	st	%f23,[%g1+11*8+0]
568	st	%f22,[%g1+11*8+4]
569	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
570	st	%f1,[%g1+12*8+0]
571	st	%f0,[%g1+12*8+4]
572	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
573	st	%f3,[%g1+13*8+0]
574	st	%f2,[%g1+13*8+4]
575	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
576	st	%f5,[%g1+14*8+0]
577	st	%f4,[%g1+14*8+4]
578	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
579	st	%f7,[%g1+15*8+0]
580	st	%f6,[%g1+15*8+4]
581	mov	1,%i0		! return success
582.Lmdone_16:
583	ret
584	restore
585
586.Lmabort_16:
587	restore
588	restore
589	restore
590	restore
591	restore
592.Lmabort1_16:
593	restore
594
595	mov	0,%i0		! return failure
596	ret
597	restore
598
599.align	32
600.Lmsquare_16:
601	save	%sp,-128,%sp;		or	%g5,%fp,%fp
602	save	%sp,-128,%sp;		or	%g5,%fp,%fp
603	.word   0x81b02940+16-1	! montsqr	16-1
604	ba	.Lmresume_16
605	nop
606.type	bn_mul_mont_t4_16, #function
607.size	bn_mul_mont_t4_16, .-bn_mul_mont_t4_16
608.globl	bn_mul_mont_t4_24
609.align	32
610bn_mul_mont_t4_24:
611#ifdef	__arch64__
612	mov	0,%g5
613	mov	-128,%g4
614#elif defined(SPARCV9_64BIT_STACK)
615	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
616	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
617	mov	-2047,%g4
618	and	%g1,SPARCV9_64BIT_STACK,%g1
619	movrz	%g1,0,%g4
620	mov	-1,%g5
621	add	%g4,-128,%g4
622#else
623	mov	-1,%g5
624	mov	-128,%g4
625#endif
626	sllx	%g5,32,%g5
627	save	%sp,%g4,%sp
628#ifndef	__arch64__
629	save	%sp,-128,%sp	! warm it up
630	save	%sp,-128,%sp
631	save	%sp,-128,%sp
632	save	%sp,-128,%sp
633	save	%sp,-128,%sp
634	save	%sp,-128,%sp
635	restore
636	restore
637	restore
638	restore
639	restore
640	restore
641#endif
642	and	%sp,1,%g4
643	or	%g5,%fp,%fp
644	or	%g4,%g5,%g5
645
646	! copy arguments to global registers
647	mov	%i0,%g1
648	mov	%i1,%g2
649	mov	%i2,%g3
650	mov	%i3,%g4
651	ld	[%i4+0],%f1	! load *n0
652	ld	[%i4+4],%f0
653	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
654	save	%sp,-128,%sp;		or	%g5,%fp,%fp
655	ld	[%g2+0*8+0],%l1
656	ld	[%g2+0*8+4],%l0
657	sllx	%l0,32,%l0
658	or	%l1,%l0,%l0
659	ld	[%g2+1*8+0],%l2
660	ld	[%g2+1*8+4],%l1
661	sllx	%l1,32,%l1
662	or	%l2,%l1,%l1
663	ld	[%g2+2*8+0],%l3
664	ld	[%g2+2*8+4],%l2
665	sllx	%l2,32,%l2
666	or	%l3,%l2,%l2
667	ld	[%g2+3*8+0],%l4
668	ld	[%g2+3*8+4],%l3
669	sllx	%l3,32,%l3
670	or	%l4,%l3,%l3
671	ld	[%g2+4*8+0],%l5
672	ld	[%g2+4*8+4],%l4
673	sllx	%l4,32,%l4
674	or	%l5,%l4,%l4
675	ld	[%g2+5*8+0],%l6
676	ld	[%g2+5*8+4],%l5
677	sllx	%l5,32,%l5
678	or	%l6,%l5,%l5
679	ld	[%g2+6*8+0],%l7
680	ld	[%g2+6*8+4],%l6
681	sllx	%l6,32,%l6
682	or	%l7,%l6,%l6
683	ld	[%g2+7*8+0],%o0
684	ld	[%g2+7*8+4],%l7
685	sllx	%l7,32,%l7
686	or	%o0,%l7,%l7
687	ld	[%g2+8*8+0],%o1
688	ld	[%g2+8*8+4],%o0
689	sllx	%o0,32,%o0
690	or	%o1,%o0,%o0
691	ld	[%g2+9*8+0],%o2
692	ld	[%g2+9*8+4],%o1
693	sllx	%o1,32,%o1
694	or	%o2,%o1,%o1
695	ld	[%g2+10*8+0],%o3
696	ld	[%g2+10*8+4],%o2
697	sllx	%o2,32,%o2
698	or	%o3,%o2,%o2
699	ld	[%g2+11*8+0],%o4
700	ld	[%g2+11*8+4],%o3
701	sllx	%o3,32,%o3
702	or	%o4,%o3,%o3
703	ld	[%g2+12*8+0],%o5
704	ld	[%g2+12*8+4],%o4
705	sllx	%o4,32,%o4
706	or	%o5,%o4,%o4
707	ld	[%g2+13*8+0],%o7
708	ld	[%g2+13*8+4],%o5
709	sllx	%o5,32,%o5
710	or	%o7,%o5,%o5
711	ld	[%g2+14*8+0],%f5
712	ld	[%g2+14*8+4],%f4
713	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
714	ld	[%g2+15*8+0],%f7
715	ld	[%g2+15*8+4],%f6
716	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
717	ld	[%g2+16*8+0],%f1
718	ld	[%g2+16*8+4],%f0
719	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
720	ld	[%g2+17*8+0],%f3
721	ld	[%g2+17*8+4],%f2
722	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
723	ld	[%g2+18*8+0],%f5
724	ld	[%g2+18*8+4],%f4
725	.word	0x83b00f04 !fsrc2	%f0,%f4,%f32
726	ld	[%g2+19*8+0],%f7
727	ld	[%g2+19*8+4],%f6
728	.word	0x87b00f06 !fsrc2	%f0,%f6,%f34
729	ld	[%g2+20*8+0],%f1
730	ld	[%g2+20*8+4],%f0
731	.word	0x8bb00f00 !fsrc2	%f0,%f0,%f36
732	ld	[%g2+21*8+0],%f3
733	ld	[%g2+21*8+4],%f2
734	.word	0x8fb00f02 !fsrc2	%f0,%f2,%f38
735	ld	[%g2+22*8+0],%f5
736	ld	[%g2+22*8+4],%f4
737	.word	0x93b00f04 !fsrc2	%f0,%f4,%f40
738	ld	[%g2+23*8+0],%f7
739	ld	[%g2+23*8+4],%f6
740	.word	0x97b00f06 !fsrc2	%f0,%f6,%f42
741	save	%sp,-128,%sp;		or	%g5,%fp,%fp
742	ld	[%g4+0*8+0],%l1
743	ld	[%g4+0*8+4],%l0
744	sllx	%l0,32,%l0
745	or	%l1,%l0,%l0
746	ld	[%g4+1*8+0],%l2
747	ld	[%g4+1*8+4],%l1
748	sllx	%l1,32,%l1
749	or	%l2,%l1,%l1
750	ld	[%g4+2*8+0],%l3
751	ld	[%g4+2*8+4],%l2
752	sllx	%l2,32,%l2
753	or	%l3,%l2,%l2
754	ld	[%g4+3*8+0],%l4
755	ld	[%g4+3*8+4],%l3
756	sllx	%l3,32,%l3
757	or	%l4,%l3,%l3
758	ld	[%g4+4*8+0],%l5
759	ld	[%g4+4*8+4],%l4
760	sllx	%l4,32,%l4
761	or	%l5,%l4,%l4
762	ld	[%g4+5*8+0],%l6
763	ld	[%g4+5*8+4],%l5
764	sllx	%l5,32,%l5
765	or	%l6,%l5,%l5
766	ld	[%g4+6*8+0],%l7
767	ld	[%g4+6*8+4],%l6
768	sllx	%l6,32,%l6
769	or	%l7,%l6,%l6
770	ld	[%g4+7*8+0],%o0
771	ld	[%g4+7*8+4],%l7
772	sllx	%l7,32,%l7
773	or	%o0,%l7,%l7
774	ld	[%g4+8*8+0],%o1
775	ld	[%g4+8*8+4],%o0
776	sllx	%o0,32,%o0
777	or	%o1,%o0,%o0
778	ld	[%g4+9*8+0],%o2
779	ld	[%g4+9*8+4],%o1
780	sllx	%o1,32,%o1
781	or	%o2,%o1,%o1
782	ld	[%g4+10*8+0],%o3
783	ld	[%g4+10*8+4],%o2
784	sllx	%o2,32,%o2
785	or	%o3,%o2,%o2
786	ld	[%g4+11*8+0],%o4
787	ld	[%g4+11*8+4],%o3
788	sllx	%o3,32,%o3
789	or	%o4,%o3,%o3
790	ld	[%g4+12*8+0],%o5
791	ld	[%g4+12*8+4],%o4
792	sllx	%o4,32,%o4
793	or	%o5,%o4,%o4
794	ld	[%g4+13*8+0],%o7
795	ld	[%g4+13*8+4],%o5
796	sllx	%o5,32,%o5
797	or	%o7,%o5,%o5
798	save	%sp,-128,%sp;		or	%g5,%fp,%fp
799	ld	[%g4+14*8+0],%l1
800	ld	[%g4+14*8+4],%l0
801	sllx	%l0,32,%l0
802	or	%l1,%l0,%l0
803	ld	[%g4+15*8+0],%l2
804	ld	[%g4+15*8+4],%l1
805	sllx	%l1,32,%l1
806	or	%l2,%l1,%l1
807	ld	[%g4+16*8+0],%l3
808	ld	[%g4+16*8+4],%l2
809	sllx	%l2,32,%l2
810	or	%l3,%l2,%l2
811	ld	[%g4+17*8+0],%l4
812	ld	[%g4+17*8+4],%l3
813	sllx	%l3,32,%l3
814	or	%l4,%l3,%l3
815	ld	[%g4+18*8+0],%l5
816	ld	[%g4+18*8+4],%l4
817	sllx	%l4,32,%l4
818	or	%l5,%l4,%l4
819	ld	[%g4+19*8+0],%l6
820	ld	[%g4+19*8+4],%l5
821	sllx	%l5,32,%l5
822	or	%l6,%l5,%l5
823	ld	[%g4+20*8+0],%l7
824	ld	[%g4+20*8+4],%l6
825	sllx	%l6,32,%l6
826	or	%l7,%l6,%l6
827	ld	[%g4+21*8+0],%o0
828	ld	[%g4+21*8+4],%l7
829	sllx	%l7,32,%l7
830	or	%o0,%l7,%l7
831	ld	[%g4+22*8+0],%o1
832	ld	[%g4+22*8+4],%o0
833	sllx	%o0,32,%o0
834	or	%o1,%o0,%o0
835	ld	[%g4+23*8+0],%o2
836	ld	[%g4+23*8+4],%o1
837	sllx	%o1,32,%o1
838	or	%o2,%o1,%o1
839	save	%sp,-128,%sp;		or	%g5,%fp,%fp
840	cmp	%g2,%g3
841	be	SIZE_T_CC,.Lmsquare_24
842	nop
843	save	%sp,-128,%sp;		or	%g5,%fp,%fp
844	ld	[%g3+0*8+0],%i1
845	ld	[%g3+0*8+4],%i0
846	sllx	%i0,32,%i0
847	or	%i1,%i0,%i0
848	ld	[%g3+1*8+0],%i2
849	ld	[%g3+1*8+4],%i1
850	sllx	%i1,32,%i1
851	or	%i2,%i1,%i1
852	ld	[%g3+2*8+0],%i3
853	ld	[%g3+2*8+4],%i2
854	sllx	%i2,32,%i2
855	or	%i3,%i2,%i2
856	ld	[%g3+3*8+0],%i4
857	ld	[%g3+3*8+4],%i3
858	sllx	%i3,32,%i3
859	or	%i4,%i3,%i3
860	ld	[%g3+4*8+0],%i5
861	ld	[%g3+4*8+4],%i4
862	sllx	%i4,32,%i4
863	or	%i5,%i4,%i4
864	ld	[%g3+5*8+0],%l0
865	ld	[%g3+5*8+4],%i5
866	sllx	%i5,32,%i5
867	or	%l0,%i5,%i5
868	ld	[%g3+6*8+0],%l1
869	ld	[%g3+6*8+4],%l0
870	sllx	%l0,32,%l0
871	or	%l1,%l0,%l0
872	ld	[%g3+7*8+0],%l2
873	ld	[%g3+7*8+4],%l1
874	sllx	%l1,32,%l1
875	or	%l2,%l1,%l1
876	ld	[%g3+8*8+0],%l3
877	ld	[%g3+8*8+4],%l2
878	sllx	%l2,32,%l2
879	or	%l3,%l2,%l2
880	ld	[%g3+9*8+0],%l4
881	ld	[%g3+9*8+4],%l3
882	sllx	%l3,32,%l3
883	or	%l4,%l3,%l3
884	ld	[%g3+10*8+0],%l5
885	ld	[%g3+10*8+4],%l4
886	sllx	%l4,32,%l4
887	or	%l5,%l4,%l4
888	ld	[%g3+11*8+0],%l6
889	ld	[%g3+11*8+4],%l5
890	sllx	%l5,32,%l5
891	or	%l6,%l5,%l5
892	ld	[%g3+12*8+0],%l7
893	ld	[%g3+12*8+4],%l6
894	sllx	%l6,32,%l6
895	or	%l7,%l6,%l6
896	ld	[%g3+13*8+0],%o7
897	ld	[%g3+13*8+4],%l7
898	sllx	%l7,32,%l7
899	or	%o7,%l7,%l7
900	save	%sp,-128,%sp;		or	%g5,%fp,%fp
901	ld	[%g3+14*8+0],%i1
902	ld	[%g3+14*8+4],%i0
903	sllx	%i0,32,%i0
904	or	%i1,%i0,%i0
905	ld	[%g3+15*8+0],%i2
906	ld	[%g3+15*8+4],%i1
907	sllx	%i1,32,%i1
908	or	%i2,%i1,%i1
909	ld	[%g3+16*8+0],%i3
910	ld	[%g3+16*8+4],%i2
911	sllx	%i2,32,%i2
912	or	%i3,%i2,%i2
913	ld	[%g3+17*8+0],%i4
914	ld	[%g3+17*8+4],%i3
915	sllx	%i3,32,%i3
916	or	%i4,%i3,%i3
917	ld	[%g3+18*8+0],%i5
918	ld	[%g3+18*8+4],%i4
919	sllx	%i4,32,%i4
920	or	%i5,%i4,%i4
921	ld	[%g3+19*8+0],%l0
922	ld	[%g3+19*8+4],%i5
923	sllx	%i5,32,%i5
924	or	%l0,%i5,%i5
925	ld	[%g3+20*8+0],%l1
926	ld	[%g3+20*8+4],%l0
927	sllx	%l0,32,%l0
928	or	%l1,%l0,%l0
929	ld	[%g3+21*8+0],%l2
930	ld	[%g3+21*8+4],%l1
931	sllx	%l1,32,%l1
932	or	%l2,%l1,%l1
933	ld	[%g3+22*8+0],%l3
934	ld	[%g3+22*8+4],%l2
935	sllx	%l2,32,%l2
936	or	%l3,%l2,%l2
937	ld	[%g3+23*8+0],%o7
938	ld	[%g3+23*8+4],%l3
939	sllx	%l3,32,%l3
940	or	%o7,%l3,%l3
941	.word	0x81b02920+24-1	! montmul	24-1
942.Lmresume_24:
943	fbu,pn	%fcc3,.Lmabort_24
944#ifndef	__arch64__
945	and	%fp,%g5,%g5
946	brz,pn	%g5,.Lmabort_24
947#endif
948	nop
949#ifdef	__arch64__
950	restore
951	restore
952	restore
953	restore
954	restore
955#else
956	restore;		and	%fp,%g5,%g5
957	restore;		and	%fp,%g5,%g5
958	restore;		and	%fp,%g5,%g5
959	restore;		and	%fp,%g5,%g5
960	 brz,pn	%g5,.Lmabort1_24
961	restore
962#endif
963	.word	0x81b02310 !movxtod	%l0,%f0
964	.word	0x85b02311 !movxtod	%l1,%f2
965	.word	0x89b02312 !movxtod	%l2,%f4
966	.word	0x8db02313 !movxtod	%l3,%f6
967	.word	0x91b02314 !movxtod	%l4,%f8
968	.word	0x95b02315 !movxtod	%l5,%f10
969	.word	0x99b02316 !movxtod	%l6,%f12
970	.word	0x9db02317 !movxtod	%l7,%f14
971	.word	0xa1b02308 !movxtod	%o0,%f16
972	.word	0xa5b02309 !movxtod	%o1,%f18
973	.word	0xa9b0230a !movxtod	%o2,%f20
974	.word	0xadb0230b !movxtod	%o3,%f22
975	.word	0xbbb0230c !movxtod	%o4,%f60
976	.word	0xbfb0230d !movxtod	%o5,%f62
977#ifdef	__arch64__
978	restore
979#else
980	 and	%fp,%g5,%g5
981	restore
982	 and	%g5,1,%o7
983	 and	%fp,%g5,%g5
984	 srl	%fp,0,%fp		! just in case?
985	 or	%o7,%g5,%g5
986	brz,a,pn %g5,.Lmdone_24
987	mov	0,%i0		! return failure
988#endif
989	st	%f1,[%g1+0*8+0]
990	st	%f0,[%g1+0*8+4]
991	st	%f3,[%g1+1*8+0]
992	st	%f2,[%g1+1*8+4]
993	st	%f5,[%g1+2*8+0]
994	st	%f4,[%g1+2*8+4]
995	st	%f7,[%g1+3*8+0]
996	st	%f6,[%g1+3*8+4]
997	st	%f9,[%g1+4*8+0]
998	st	%f8,[%g1+4*8+4]
999	st	%f11,[%g1+5*8+0]
1000	st	%f10,[%g1+5*8+4]
1001	st	%f13,[%g1+6*8+0]
1002	st	%f12,[%g1+6*8+4]
1003	st	%f15,[%g1+7*8+0]
1004	st	%f14,[%g1+7*8+4]
1005	st	%f17,[%g1+8*8+0]
1006	st	%f16,[%g1+8*8+4]
1007	st	%f19,[%g1+9*8+0]
1008	st	%f18,[%g1+9*8+4]
1009	st	%f21,[%g1+10*8+0]
1010	st	%f20,[%g1+10*8+4]
1011	st	%f23,[%g1+11*8+0]
1012	st	%f22,[%g1+11*8+4]
1013	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
1014	st	%f1,[%g1+12*8+0]
1015	st	%f0,[%g1+12*8+4]
1016	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
1017	st	%f3,[%g1+13*8+0]
1018	st	%f2,[%g1+13*8+4]
1019	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
1020	st	%f5,[%g1+14*8+0]
1021	st	%f4,[%g1+14*8+4]
1022	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
1023	st	%f7,[%g1+15*8+0]
1024	st	%f6,[%g1+15*8+4]
1025	.word	0x81b00f1c !fsrc2	%f0,%f28,%f0
1026	st	%f1,[%g1+16*8+0]
1027	st	%f0,[%g1+16*8+4]
1028	.word	0x85b00f1e !fsrc2	%f0,%f30,%f2
1029	st	%f3,[%g1+17*8+0]
1030	st	%f2,[%g1+17*8+4]
1031	.word	0x89b00f01 !fsrc2	%f0,%f32,%f4
1032	st	%f5,[%g1+18*8+0]
1033	st	%f4,[%g1+18*8+4]
1034	.word	0x8db00f03 !fsrc2	%f0,%f34,%f6
1035	st	%f7,[%g1+19*8+0]
1036	st	%f6,[%g1+19*8+4]
1037	.word	0x81b00f05 !fsrc2	%f0,%f36,%f0
1038	st	%f1,[%g1+20*8+0]
1039	st	%f0,[%g1+20*8+4]
1040	.word	0x85b00f07 !fsrc2	%f0,%f38,%f2
1041	st	%f3,[%g1+21*8+0]
1042	st	%f2,[%g1+21*8+4]
1043	.word	0x89b00f09 !fsrc2	%f0,%f40,%f4
1044	st	%f5,[%g1+22*8+0]
1045	st	%f4,[%g1+22*8+4]
1046	.word	0x8db00f0b !fsrc2	%f0,%f42,%f6
1047	st	%f7,[%g1+23*8+0]
1048	st	%f6,[%g1+23*8+4]
1049	mov	1,%i0		! return success
1050.Lmdone_24:
1051	ret
1052	restore
1053
1054.Lmabort_24:
1055	restore
1056	restore
1057	restore
1058	restore
1059	restore
1060.Lmabort1_24:
1061	restore
1062
1063	mov	0,%i0		! return failure
1064	ret
1065	restore
1066
1067.align	32
1068.Lmsquare_24:
1069	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1070	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1071	.word   0x81b02940+24-1	! montsqr	24-1
1072	ba	.Lmresume_24
1073	nop
1074.type	bn_mul_mont_t4_24, #function
1075.size	bn_mul_mont_t4_24, .-bn_mul_mont_t4_24
1076.globl	bn_mul_mont_t4_32
1077.align	32
1078bn_mul_mont_t4_32:
1079#ifdef	__arch64__
1080	mov	0,%g5
1081	mov	-128,%g4
1082#elif defined(SPARCV9_64BIT_STACK)
1083	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
1084	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
1085	mov	-2047,%g4
1086	and	%g1,SPARCV9_64BIT_STACK,%g1
1087	movrz	%g1,0,%g4
1088	mov	-1,%g5
1089	add	%g4,-128,%g4
1090#else
1091	mov	-1,%g5
1092	mov	-128,%g4
1093#endif
1094	sllx	%g5,32,%g5
1095	save	%sp,%g4,%sp
1096#ifndef	__arch64__
1097	save	%sp,-128,%sp	! warm it up
1098	save	%sp,-128,%sp
1099	save	%sp,-128,%sp
1100	save	%sp,-128,%sp
1101	save	%sp,-128,%sp
1102	save	%sp,-128,%sp
1103	restore
1104	restore
1105	restore
1106	restore
1107	restore
1108	restore
1109#endif
1110	and	%sp,1,%g4
1111	or	%g5,%fp,%fp
1112	or	%g4,%g5,%g5
1113
1114	! copy arguments to global registers
1115	mov	%i0,%g1
1116	mov	%i1,%g2
1117	mov	%i2,%g3
1118	mov	%i3,%g4
1119	ld	[%i4+0],%f1	! load *n0
1120	ld	[%i4+4],%f0
1121	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
1122	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1123	ld	[%g2+0*8+0],%l1
1124	ld	[%g2+0*8+4],%l0
1125	sllx	%l0,32,%l0
1126	or	%l1,%l0,%l0
1127	ld	[%g2+1*8+0],%l2
1128	ld	[%g2+1*8+4],%l1
1129	sllx	%l1,32,%l1
1130	or	%l2,%l1,%l1
1131	ld	[%g2+2*8+0],%l3
1132	ld	[%g2+2*8+4],%l2
1133	sllx	%l2,32,%l2
1134	or	%l3,%l2,%l2
1135	ld	[%g2+3*8+0],%l4
1136	ld	[%g2+3*8+4],%l3
1137	sllx	%l3,32,%l3
1138	or	%l4,%l3,%l3
1139	ld	[%g2+4*8+0],%l5
1140	ld	[%g2+4*8+4],%l4
1141	sllx	%l4,32,%l4
1142	or	%l5,%l4,%l4
1143	ld	[%g2+5*8+0],%l6
1144	ld	[%g2+5*8+4],%l5
1145	sllx	%l5,32,%l5
1146	or	%l6,%l5,%l5
1147	ld	[%g2+6*8+0],%l7
1148	ld	[%g2+6*8+4],%l6
1149	sllx	%l6,32,%l6
1150	or	%l7,%l6,%l6
1151	ld	[%g2+7*8+0],%o0
1152	ld	[%g2+7*8+4],%l7
1153	sllx	%l7,32,%l7
1154	or	%o0,%l7,%l7
1155	ld	[%g2+8*8+0],%o1
1156	ld	[%g2+8*8+4],%o0
1157	sllx	%o0,32,%o0
1158	or	%o1,%o0,%o0
1159	ld	[%g2+9*8+0],%o2
1160	ld	[%g2+9*8+4],%o1
1161	sllx	%o1,32,%o1
1162	or	%o2,%o1,%o1
1163	ld	[%g2+10*8+0],%o3
1164	ld	[%g2+10*8+4],%o2
1165	sllx	%o2,32,%o2
1166	or	%o3,%o2,%o2
1167	ld	[%g2+11*8+0],%o4
1168	ld	[%g2+11*8+4],%o3
1169	sllx	%o3,32,%o3
1170	or	%o4,%o3,%o3
1171	ld	[%g2+12*8+0],%o5
1172	ld	[%g2+12*8+4],%o4
1173	sllx	%o4,32,%o4
1174	or	%o5,%o4,%o4
1175	ld	[%g2+13*8+0],%o7
1176	ld	[%g2+13*8+4],%o5
1177	sllx	%o5,32,%o5
1178	or	%o7,%o5,%o5
1179	ld	[%g2+14*8+0],%f5
1180	ld	[%g2+14*8+4],%f4
1181	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
1182	ld	[%g2+15*8+0],%f7
1183	ld	[%g2+15*8+4],%f6
1184	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
1185	ld	[%g2+16*8+0],%f1
1186	ld	[%g2+16*8+4],%f0
1187	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
1188	ld	[%g2+17*8+0],%f3
1189	ld	[%g2+17*8+4],%f2
1190	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
1191	ld	[%g2+18*8+0],%f5
1192	ld	[%g2+18*8+4],%f4
1193	.word	0x83b00f04 !fsrc2	%f0,%f4,%f32
1194	ld	[%g2+19*8+0],%f7
1195	ld	[%g2+19*8+4],%f6
1196	.word	0x87b00f06 !fsrc2	%f0,%f6,%f34
1197	ld	[%g2+20*8+0],%f1
1198	ld	[%g2+20*8+4],%f0
1199	.word	0x8bb00f00 !fsrc2	%f0,%f0,%f36
1200	ld	[%g2+21*8+0],%f3
1201	ld	[%g2+21*8+4],%f2
1202	.word	0x8fb00f02 !fsrc2	%f0,%f2,%f38
1203	ld	[%g2+22*8+0],%f5
1204	ld	[%g2+22*8+4],%f4
1205	.word	0x93b00f04 !fsrc2	%f0,%f4,%f40
1206	ld	[%g2+23*8+0],%f7
1207	ld	[%g2+23*8+4],%f6
1208	.word	0x97b00f06 !fsrc2	%f0,%f6,%f42
1209	ld	[%g2+24*8+0],%f1
1210	ld	[%g2+24*8+4],%f0
1211	.word	0x9bb00f00 !fsrc2	%f0,%f0,%f44
1212	ld	[%g2+25*8+0],%f3
1213	ld	[%g2+25*8+4],%f2
1214	.word	0x9fb00f02 !fsrc2	%f0,%f2,%f46
1215	ld	[%g2+26*8+0],%f5
1216	ld	[%g2+26*8+4],%f4
1217	.word	0xa3b00f04 !fsrc2	%f0,%f4,%f48
1218	ld	[%g2+27*8+0],%f7
1219	ld	[%g2+27*8+4],%f6
1220	.word	0xa7b00f06 !fsrc2	%f0,%f6,%f50
1221	ld	[%g2+28*8+0],%f1
1222	ld	[%g2+28*8+4],%f0
1223	.word	0xabb00f00 !fsrc2	%f0,%f0,%f52
1224	ld	[%g2+29*8+0],%f3
1225	ld	[%g2+29*8+4],%f2
1226	.word	0xafb00f02 !fsrc2	%f0,%f2,%f54
1227	ld	[%g2+30*8+0],%f5
1228	ld	[%g2+30*8+4],%f4
1229	.word	0xb3b00f04 !fsrc2	%f0,%f4,%f56
1230	ld	[%g2+31*8+0],%f7
1231	ld	[%g2+31*8+4],%f6
1232	.word	0xb7b00f06 !fsrc2	%f0,%f6,%f58
1233	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1234	ld	[%g4+0*8+0],%l1
1235	ld	[%g4+0*8+4],%l0
1236	sllx	%l0,32,%l0
1237	or	%l1,%l0,%l0
1238	ld	[%g4+1*8+0],%l2
1239	ld	[%g4+1*8+4],%l1
1240	sllx	%l1,32,%l1
1241	or	%l2,%l1,%l1
1242	ld	[%g4+2*8+0],%l3
1243	ld	[%g4+2*8+4],%l2
1244	sllx	%l2,32,%l2
1245	or	%l3,%l2,%l2
1246	ld	[%g4+3*8+0],%l4
1247	ld	[%g4+3*8+4],%l3
1248	sllx	%l3,32,%l3
1249	or	%l4,%l3,%l3
1250	ld	[%g4+4*8+0],%l5
1251	ld	[%g4+4*8+4],%l4
1252	sllx	%l4,32,%l4
1253	or	%l5,%l4,%l4
1254	ld	[%g4+5*8+0],%l6
1255	ld	[%g4+5*8+4],%l5
1256	sllx	%l5,32,%l5
1257	or	%l6,%l5,%l5
1258	ld	[%g4+6*8+0],%l7
1259	ld	[%g4+6*8+4],%l6
1260	sllx	%l6,32,%l6
1261	or	%l7,%l6,%l6
1262	ld	[%g4+7*8+0],%o0
1263	ld	[%g4+7*8+4],%l7
1264	sllx	%l7,32,%l7
1265	or	%o0,%l7,%l7
1266	ld	[%g4+8*8+0],%o1
1267	ld	[%g4+8*8+4],%o0
1268	sllx	%o0,32,%o0
1269	or	%o1,%o0,%o0
1270	ld	[%g4+9*8+0],%o2
1271	ld	[%g4+9*8+4],%o1
1272	sllx	%o1,32,%o1
1273	or	%o2,%o1,%o1
1274	ld	[%g4+10*8+0],%o3
1275	ld	[%g4+10*8+4],%o2
1276	sllx	%o2,32,%o2
1277	or	%o3,%o2,%o2
1278	ld	[%g4+11*8+0],%o4
1279	ld	[%g4+11*8+4],%o3
1280	sllx	%o3,32,%o3
1281	or	%o4,%o3,%o3
1282	ld	[%g4+12*8+0],%o5
1283	ld	[%g4+12*8+4],%o4
1284	sllx	%o4,32,%o4
1285	or	%o5,%o4,%o4
1286	ld	[%g4+13*8+0],%o7
1287	ld	[%g4+13*8+4],%o5
1288	sllx	%o5,32,%o5
1289	or	%o7,%o5,%o5
1290	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1291	ld	[%g4+14*8+0],%l1
1292	ld	[%g4+14*8+4],%l0
1293	sllx	%l0,32,%l0
1294	or	%l1,%l0,%l0
1295	ld	[%g4+15*8+0],%l2
1296	ld	[%g4+15*8+4],%l1
1297	sllx	%l1,32,%l1
1298	or	%l2,%l1,%l1
1299	ld	[%g4+16*8+0],%l3
1300	ld	[%g4+16*8+4],%l2
1301	sllx	%l2,32,%l2
1302	or	%l3,%l2,%l2
1303	ld	[%g4+17*8+0],%l4
1304	ld	[%g4+17*8+4],%l3
1305	sllx	%l3,32,%l3
1306	or	%l4,%l3,%l3
1307	ld	[%g4+18*8+0],%l5
1308	ld	[%g4+18*8+4],%l4
1309	sllx	%l4,32,%l4
1310	or	%l5,%l4,%l4
1311	ld	[%g4+19*8+0],%l6
1312	ld	[%g4+19*8+4],%l5
1313	sllx	%l5,32,%l5
1314	or	%l6,%l5,%l5
1315	ld	[%g4+20*8+0],%l7
1316	ld	[%g4+20*8+4],%l6
1317	sllx	%l6,32,%l6
1318	or	%l7,%l6,%l6
1319	ld	[%g4+21*8+0],%o0
1320	ld	[%g4+21*8+4],%l7
1321	sllx	%l7,32,%l7
1322	or	%o0,%l7,%l7
1323	ld	[%g4+22*8+0],%o1
1324	ld	[%g4+22*8+4],%o0
1325	sllx	%o0,32,%o0
1326	or	%o1,%o0,%o0
1327	ld	[%g4+23*8+0],%o2
1328	ld	[%g4+23*8+4],%o1
1329	sllx	%o1,32,%o1
1330	or	%o2,%o1,%o1
1331	ld	[%g4+24*8+0],%o3
1332	ld	[%g4+24*8+4],%o2
1333	sllx	%o2,32,%o2
1334	or	%o3,%o2,%o2
1335	ld	[%g4+25*8+0],%o4
1336	ld	[%g4+25*8+4],%o3
1337	sllx	%o3,32,%o3
1338	or	%o4,%o3,%o3
1339	ld	[%g4+26*8+0],%o5
1340	ld	[%g4+26*8+4],%o4
1341	sllx	%o4,32,%o4
1342	or	%o5,%o4,%o4
1343	ld	[%g4+27*8+0],%o7
1344	ld	[%g4+27*8+4],%o5
1345	sllx	%o5,32,%o5
1346	or	%o7,%o5,%o5
1347	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1348	ld	[%g4+28*8+0],%l1
1349	ld	[%g4+28*8+4],%l0
1350	sllx	%l0,32,%l0
1351	or	%l1,%l0,%l0
1352	ld	[%g4+29*8+0],%l2
1353	ld	[%g4+29*8+4],%l1
1354	sllx	%l1,32,%l1
1355	or	%l2,%l1,%l1
1356	ld	[%g4+30*8+0],%l3
1357	ld	[%g4+30*8+4],%l2
1358	sllx	%l2,32,%l2
1359	or	%l3,%l2,%l2
1360	ld	[%g4+31*8+0],%o7
1361	ld	[%g4+31*8+4],%l3
1362	sllx	%l3,32,%l3
1363	or	%o7,%l3,%l3
1364	cmp	%g2,%g3
1365	be	SIZE_T_CC,.Lmsquare_32
1366	nop
1367	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1368	ld	[%g3+0*8+0],%i1
1369	ld	[%g3+0*8+4],%i0
1370	sllx	%i0,32,%i0
1371	or	%i1,%i0,%i0
1372	ld	[%g3+1*8+0],%i2
1373	ld	[%g3+1*8+4],%i1
1374	sllx	%i1,32,%i1
1375	or	%i2,%i1,%i1
1376	ld	[%g3+2*8+0],%i3
1377	ld	[%g3+2*8+4],%i2
1378	sllx	%i2,32,%i2
1379	or	%i3,%i2,%i2
1380	ld	[%g3+3*8+0],%i4
1381	ld	[%g3+3*8+4],%i3
1382	sllx	%i3,32,%i3
1383	or	%i4,%i3,%i3
1384	ld	[%g3+4*8+0],%i5
1385	ld	[%g3+4*8+4],%i4
1386	sllx	%i4,32,%i4
1387	or	%i5,%i4,%i4
1388	ld	[%g3+5*8+0],%l0
1389	ld	[%g3+5*8+4],%i5
1390	sllx	%i5,32,%i5
1391	or	%l0,%i5,%i5
1392	ld	[%g3+6*8+0],%l1
1393	ld	[%g3+6*8+4],%l0
1394	sllx	%l0,32,%l0
1395	or	%l1,%l0,%l0
1396	ld	[%g3+7*8+0],%l2
1397	ld	[%g3+7*8+4],%l1
1398	sllx	%l1,32,%l1
1399	or	%l2,%l1,%l1
1400	ld	[%g3+8*8+0],%l3
1401	ld	[%g3+8*8+4],%l2
1402	sllx	%l2,32,%l2
1403	or	%l3,%l2,%l2
1404	ld	[%g3+9*8+0],%l4
1405	ld	[%g3+9*8+4],%l3
1406	sllx	%l3,32,%l3
1407	or	%l4,%l3,%l3
1408	ld	[%g3+10*8+0],%l5
1409	ld	[%g3+10*8+4],%l4
1410	sllx	%l4,32,%l4
1411	or	%l5,%l4,%l4
1412	ld	[%g3+11*8+0],%l6
1413	ld	[%g3+11*8+4],%l5
1414	sllx	%l5,32,%l5
1415	or	%l6,%l5,%l5
1416	ld	[%g3+12*8+0],%l7
1417	ld	[%g3+12*8+4],%l6
1418	sllx	%l6,32,%l6
1419	or	%l7,%l6,%l6
1420	ld	[%g3+13*8+0],%o7
1421	ld	[%g3+13*8+4],%l7
1422	sllx	%l7,32,%l7
1423	or	%o7,%l7,%l7
1424	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1425	ld	[%g3+14*8+0],%i1
1426	ld	[%g3+14*8+4],%i0
1427	sllx	%i0,32,%i0
1428	or	%i1,%i0,%i0
1429	ld	[%g3+15*8+0],%i2
1430	ld	[%g3+15*8+4],%i1
1431	sllx	%i1,32,%i1
1432	or	%i2,%i1,%i1
1433	ld	[%g3+16*8+0],%i3
1434	ld	[%g3+16*8+4],%i2
1435	sllx	%i2,32,%i2
1436	or	%i3,%i2,%i2
1437	ld	[%g3+17*8+0],%i4
1438	ld	[%g3+17*8+4],%i3
1439	sllx	%i3,32,%i3
1440	or	%i4,%i3,%i3
1441	ld	[%g3+18*8+0],%i5
1442	ld	[%g3+18*8+4],%i4
1443	sllx	%i4,32,%i4
1444	or	%i5,%i4,%i4
1445	ld	[%g3+19*8+0],%l0
1446	ld	[%g3+19*8+4],%i5
1447	sllx	%i5,32,%i5
1448	or	%l0,%i5,%i5
1449	ld	[%g3+20*8+0],%l1
1450	ld	[%g3+20*8+4],%l0
1451	sllx	%l0,32,%l0
1452	or	%l1,%l0,%l0
1453	ld	[%g3+21*8+0],%l2
1454	ld	[%g3+21*8+4],%l1
1455	sllx	%l1,32,%l1
1456	or	%l2,%l1,%l1
1457	ld	[%g3+22*8+0],%l3
1458	ld	[%g3+22*8+4],%l2
1459	sllx	%l2,32,%l2
1460	or	%l3,%l2,%l2
1461	ld	[%g3+23*8+0],%l4
1462	ld	[%g3+23*8+4],%l3
1463	sllx	%l3,32,%l3
1464	or	%l4,%l3,%l3
1465	ld	[%g3+24*8+0],%l5
1466	ld	[%g3+24*8+4],%l4
1467	sllx	%l4,32,%l4
1468	or	%l5,%l4,%l4
1469	ld	[%g3+25*8+0],%l6
1470	ld	[%g3+25*8+4],%l5
1471	sllx	%l5,32,%l5
1472	or	%l6,%l5,%l5
1473	ld	[%g3+26*8+0],%l7
1474	ld	[%g3+26*8+4],%l6
1475	sllx	%l6,32,%l6
1476	or	%l7,%l6,%l6
1477	ld	[%g3+27*8+0],%o0
1478	ld	[%g3+27*8+4],%l7
1479	sllx	%l7,32,%l7
1480	or	%o0,%l7,%l7
1481	ld	[%g3+28*8+0],%o1
1482	ld	[%g3+28*8+4],%o0
1483	sllx	%o0,32,%o0
1484	or	%o1,%o0,%o0
1485	ld	[%g3+29*8+0],%o2
1486	ld	[%g3+29*8+4],%o1
1487	sllx	%o1,32,%o1
1488	or	%o2,%o1,%o1
1489	ld	[%g3+30*8+0],%o3
1490	ld	[%g3+30*8+4],%o2
1491	sllx	%o2,32,%o2
1492	or	%o3,%o2,%o2
1493	ld	[%g3+31*8+0],%o7
1494	ld	[%g3+31*8+4],%o3
1495	sllx	%o3,32,%o3
1496	or	%o7,%o3,%o3
1497	.word	0x81b02920+32-1	! montmul	32-1
1498.Lmresume_32:
1499	fbu,pn	%fcc3,.Lmabort_32
1500#ifndef	__arch64__
1501	and	%fp,%g5,%g5
1502	brz,pn	%g5,.Lmabort_32
1503#endif
1504	nop
1505#ifdef	__arch64__
1506	restore
1507	restore
1508	restore
1509	restore
1510	restore
1511#else
1512	restore;		and	%fp,%g5,%g5
1513	restore;		and	%fp,%g5,%g5
1514	restore;		and	%fp,%g5,%g5
1515	restore;		and	%fp,%g5,%g5
1516	 brz,pn	%g5,.Lmabort1_32
1517	restore
1518#endif
1519	.word	0x81b02310 !movxtod	%l0,%f0
1520	.word	0x85b02311 !movxtod	%l1,%f2
1521	.word	0x89b02312 !movxtod	%l2,%f4
1522	.word	0x8db02313 !movxtod	%l3,%f6
1523	.word	0x91b02314 !movxtod	%l4,%f8
1524	.word	0x95b02315 !movxtod	%l5,%f10
1525	.word	0x99b02316 !movxtod	%l6,%f12
1526	.word	0x9db02317 !movxtod	%l7,%f14
1527	.word	0xa1b02308 !movxtod	%o0,%f16
1528	.word	0xa5b02309 !movxtod	%o1,%f18
1529	.word	0xa9b0230a !movxtod	%o2,%f20
1530	.word	0xadb0230b !movxtod	%o3,%f22
1531	.word	0xbbb0230c !movxtod	%o4,%f60
1532	.word	0xbfb0230d !movxtod	%o5,%f62
1533#ifdef	__arch64__
1534	restore
1535#else
1536	 and	%fp,%g5,%g5
1537	restore
1538	 and	%g5,1,%o7
1539	 and	%fp,%g5,%g5
1540	 srl	%fp,0,%fp		! just in case?
1541	 or	%o7,%g5,%g5
1542	brz,a,pn %g5,.Lmdone_32
1543	mov	0,%i0		! return failure
1544#endif
1545	st	%f1,[%g1+0*8+0]
1546	st	%f0,[%g1+0*8+4]
1547	st	%f3,[%g1+1*8+0]
1548	st	%f2,[%g1+1*8+4]
1549	st	%f5,[%g1+2*8+0]
1550	st	%f4,[%g1+2*8+4]
1551	st	%f7,[%g1+3*8+0]
1552	st	%f6,[%g1+3*8+4]
1553	st	%f9,[%g1+4*8+0]
1554	st	%f8,[%g1+4*8+4]
1555	st	%f11,[%g1+5*8+0]
1556	st	%f10,[%g1+5*8+4]
1557	st	%f13,[%g1+6*8+0]
1558	st	%f12,[%g1+6*8+4]
1559	st	%f15,[%g1+7*8+0]
1560	st	%f14,[%g1+7*8+4]
1561	st	%f17,[%g1+8*8+0]
1562	st	%f16,[%g1+8*8+4]
1563	st	%f19,[%g1+9*8+0]
1564	st	%f18,[%g1+9*8+4]
1565	st	%f21,[%g1+10*8+0]
1566	st	%f20,[%g1+10*8+4]
1567	st	%f23,[%g1+11*8+0]
1568	st	%f22,[%g1+11*8+4]
1569	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
1570	st	%f1,[%g1+12*8+0]
1571	st	%f0,[%g1+12*8+4]
1572	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
1573	st	%f3,[%g1+13*8+0]
1574	st	%f2,[%g1+13*8+4]
1575	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
1576	st	%f5,[%g1+14*8+0]
1577	st	%f4,[%g1+14*8+4]
1578	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
1579	st	%f7,[%g1+15*8+0]
1580	st	%f6,[%g1+15*8+4]
1581	.word	0x81b00f1c !fsrc2	%f0,%f28,%f0
1582	st	%f1,[%g1+16*8+0]
1583	st	%f0,[%g1+16*8+4]
1584	.word	0x85b00f1e !fsrc2	%f0,%f30,%f2
1585	st	%f3,[%g1+17*8+0]
1586	st	%f2,[%g1+17*8+4]
1587	.word	0x89b00f01 !fsrc2	%f0,%f32,%f4
1588	st	%f5,[%g1+18*8+0]
1589	st	%f4,[%g1+18*8+4]
1590	.word	0x8db00f03 !fsrc2	%f0,%f34,%f6
1591	st	%f7,[%g1+19*8+0]
1592	st	%f6,[%g1+19*8+4]
1593	.word	0x81b00f05 !fsrc2	%f0,%f36,%f0
1594	st	%f1,[%g1+20*8+0]
1595	st	%f0,[%g1+20*8+4]
1596	.word	0x85b00f07 !fsrc2	%f0,%f38,%f2
1597	st	%f3,[%g1+21*8+0]
1598	st	%f2,[%g1+21*8+4]
1599	.word	0x89b00f09 !fsrc2	%f0,%f40,%f4
1600	st	%f5,[%g1+22*8+0]
1601	st	%f4,[%g1+22*8+4]
1602	.word	0x8db00f0b !fsrc2	%f0,%f42,%f6
1603	st	%f7,[%g1+23*8+0]
1604	st	%f6,[%g1+23*8+4]
1605	.word	0x81b00f0d !fsrc2	%f0,%f44,%f0
1606	st	%f1,[%g1+24*8+0]
1607	st	%f0,[%g1+24*8+4]
1608	.word	0x85b00f0f !fsrc2	%f0,%f46,%f2
1609	st	%f3,[%g1+25*8+0]
1610	st	%f2,[%g1+25*8+4]
1611	.word	0x89b00f11 !fsrc2	%f0,%f48,%f4
1612	st	%f5,[%g1+26*8+0]
1613	st	%f4,[%g1+26*8+4]
1614	.word	0x8db00f13 !fsrc2	%f0,%f50,%f6
1615	st	%f7,[%g1+27*8+0]
1616	st	%f6,[%g1+27*8+4]
1617	.word	0x81b00f15 !fsrc2	%f0,%f52,%f0
1618	st	%f1,[%g1+28*8+0]
1619	st	%f0,[%g1+28*8+4]
1620	.word	0x85b00f17 !fsrc2	%f0,%f54,%f2
1621	st	%f3,[%g1+29*8+0]
1622	st	%f2,[%g1+29*8+4]
1623	.word	0x89b00f19 !fsrc2	%f0,%f56,%f4
1624	st	%f5,[%g1+30*8+0]
1625	st	%f4,[%g1+30*8+4]
1626	.word	0x8db00f1b !fsrc2	%f0,%f58,%f6
1627	st	%f7,[%g1+31*8+0]
1628	st	%f6,[%g1+31*8+4]
1629	mov	1,%i0		! return success
1630.Lmdone_32:
1631	ret
1632	restore
1633
1634.Lmabort_32:
1635	restore
1636	restore
1637	restore
1638	restore
1639	restore
1640.Lmabort1_32:
1641	restore
1642
1643	mov	0,%i0		! return failure
1644	ret
1645	restore
1646
1647.align	32
1648.Lmsquare_32:
1649	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1650	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1651	.word   0x81b02940+32-1	! montsqr	32-1
1652	ba	.Lmresume_32
1653	nop
1654.type	bn_mul_mont_t4_32, #function
1655.size	bn_mul_mont_t4_32, .-bn_mul_mont_t4_32
1656.globl	bn_pwr5_mont_t4_8
1657.align	32
1658bn_pwr5_mont_t4_8:
1659#ifdef	__arch64__
1660	mov	0,%g5
1661	mov	-128,%g4
1662#elif defined(SPARCV9_64BIT_STACK)
1663	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
1664	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
1665	mov	-2047,%g4
1666	and	%g1,SPARCV9_64BIT_STACK,%g1
1667	movrz	%g1,0,%g4
1668	mov	-1,%g5
1669	add	%g4,-128,%g4
1670#else
1671	mov	-1,%g5
1672	mov	-128,%g4
1673#endif
1674	sllx	%g5,32,%g5
1675	save	%sp,%g4,%sp
1676#ifndef	__arch64__
1677	save	%sp,-128,%sp	! warm it up
1678	save	%sp,-128,%sp
1679	save	%sp,-128,%sp
1680	save	%sp,-128,%sp
1681	save	%sp,-128,%sp
1682	save	%sp,-128,%sp
1683	restore
1684	restore
1685	restore
1686	restore
1687	restore
1688	restore
1689#endif
1690	and	%sp,1,%g4
1691	or	%g5,%fp,%fp
1692	or	%g4,%g5,%g5
1693
1694	! copy arguments to global registers
1695	mov	%i0,%g1
1696	mov	%i1,%g2
1697	ld	[%i2+0],%f1	! load *n0
1698	ld	[%i2+4],%f0
1699	mov	%i3,%g3
1700	srl	%i4,%g0,%i4	! pack last arguments
1701	sllx	%i5,32,%g4
1702	or	%i4,%g4,%g4
1703	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
1704	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1705	ldx	[%g1+0*8],%l0
1706	ldx	[%g1+1*8],%l1
1707	ldx	[%g1+2*8],%l2
1708	ldx	[%g1+3*8],%l3
1709	ldx	[%g1+4*8],%l4
1710	ldx	[%g1+5*8],%l5
1711	ldx	[%g1+6*8],%l6
1712	ldx	[%g1+7*8],%l7
1713	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1714	ldx	[%g2+0*8],%l0
1715	ldx	[%g2+1*8],%l1
1716	ldx	[%g2+2*8],%l2
1717	ldx	[%g2+3*8],%l3
1718	ldx	[%g2+4*8],%l4
1719	ldx	[%g2+5*8],%l5
1720	ldx	[%g2+6*8],%l6
1721	ldx	[%g2+7*8],%l7
1722	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1723	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1724	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1725
1726	srlx	%g4,	32,	%o4		! unpack %g4
1727	srl	%g4,	%g0,	%o5
1728	sub	%o4,	5,	%o4
1729	mov	%g3,	%o7
1730	sllx	%o4,	32,	%g4		! re-pack %g4
1731	or	%o5,	%g4,	%g4
1732	srl	%o5,	%o4,	%o5
1733	srl	%o5,	2,	%o4
1734	and	%o5,	3,	%o5
1735	and	%o4,	7,	%o4
1736	sll	%o5,	3,	%o5	! offset within first cache line
1737	add	%o5,	%o7,	%o7	! of the pwrtbl
1738	or	%g0,	1,	%o5
1739	sll	%o5,	%o4,	%o4
1740	wr	%o4,	%g0,	%ccr
1741	b	.Lstride_8
1742	nop
1743.align	16
1744.Lstride_8:
1745	ldx	[%o7+0*32],	%i0
1746	ldx	[%o7+8*32],	%i1
1747	ldx	[%o7+1*32],	%o4
1748	ldx	[%o7+9*32],	%o5
1749	movvs	%icc,	%o4,	%i0
1750	ldx	[%o7+2*32],	%o4
1751	movvs	%icc,	%o5,	%i1
1752	ldx	[%o7+10*32],%o5
1753	move	%icc,	%o4,	%i0
1754	ldx	[%o7+3*32],	%o4
1755	move	%icc,	%o5,	%i1
1756	ldx	[%o7+11*32],%o5
1757	movneg	%icc,	%o4,	%i0
1758	ldx	[%o7+4*32],	%o4
1759	movneg	%icc,	%o5,	%i1
1760	ldx	[%o7+12*32],%o5
1761	movcs	%xcc,	%o4,	%i0
1762	ldx	[%o7+5*32],%o4
1763	movcs	%xcc,	%o5,	%i1
1764	ldx	[%o7+13*32],%o5
1765	movvs	%xcc,	%o4,	%i0
1766	ldx	[%o7+6*32],	%o4
1767	movvs	%xcc,	%o5,	%i1
1768	ldx	[%o7+14*32],%o5
1769	move	%xcc,	%o4,	%i0
1770	ldx	[%o7+7*32],	%o4
1771	move	%xcc,	%o5,	%i1
1772	ldx	[%o7+15*32],%o5
1773	movneg	%xcc,	%o4,	%i0
1774	add	%o7,16*32,	%o7
1775	movneg	%xcc,	%o5,	%i1
1776	ldx	[%o7+0*32],	%i2
1777	ldx	[%o7+8*32],	%i3
1778	ldx	[%o7+1*32],	%o4
1779	ldx	[%o7+9*32],	%o5
1780	movvs	%icc,	%o4,	%i2
1781	ldx	[%o7+2*32],	%o4
1782	movvs	%icc,	%o5,	%i3
1783	ldx	[%o7+10*32],%o5
1784	move	%icc,	%o4,	%i2
1785	ldx	[%o7+3*32],	%o4
1786	move	%icc,	%o5,	%i3
1787	ldx	[%o7+11*32],%o5
1788	movneg	%icc,	%o4,	%i2
1789	ldx	[%o7+4*32],	%o4
1790	movneg	%icc,	%o5,	%i3
1791	ldx	[%o7+12*32],%o5
1792	movcs	%xcc,	%o4,	%i2
1793	ldx	[%o7+5*32],%o4
1794	movcs	%xcc,	%o5,	%i3
1795	ldx	[%o7+13*32],%o5
1796	movvs	%xcc,	%o4,	%i2
1797	ldx	[%o7+6*32],	%o4
1798	movvs	%xcc,	%o5,	%i3
1799	ldx	[%o7+14*32],%o5
1800	move	%xcc,	%o4,	%i2
1801	ldx	[%o7+7*32],	%o4
1802	move	%xcc,	%o5,	%i3
1803	ldx	[%o7+15*32],%o5
1804	movneg	%xcc,	%o4,	%i2
1805	add	%o7,16*32,	%o7
1806	movneg	%xcc,	%o5,	%i3
1807	ldx	[%o7+0*32],	%i4
1808	ldx	[%o7+8*32],	%i5
1809	ldx	[%o7+1*32],	%o4
1810	ldx	[%o7+9*32],	%o5
1811	movvs	%icc,	%o4,	%i4
1812	ldx	[%o7+2*32],	%o4
1813	movvs	%icc,	%o5,	%i5
1814	ldx	[%o7+10*32],%o5
1815	move	%icc,	%o4,	%i4
1816	ldx	[%o7+3*32],	%o4
1817	move	%icc,	%o5,	%i5
1818	ldx	[%o7+11*32],%o5
1819	movneg	%icc,	%o4,	%i4
1820	ldx	[%o7+4*32],	%o4
1821	movneg	%icc,	%o5,	%i5
1822	ldx	[%o7+12*32],%o5
1823	movcs	%xcc,	%o4,	%i4
1824	ldx	[%o7+5*32],%o4
1825	movcs	%xcc,	%o5,	%i5
1826	ldx	[%o7+13*32],%o5
1827	movvs	%xcc,	%o4,	%i4
1828	ldx	[%o7+6*32],	%o4
1829	movvs	%xcc,	%o5,	%i5
1830	ldx	[%o7+14*32],%o5
1831	move	%xcc,	%o4,	%i4
1832	ldx	[%o7+7*32],	%o4
1833	move	%xcc,	%o5,	%i5
1834	ldx	[%o7+15*32],%o5
1835	movneg	%xcc,	%o4,	%i4
1836	add	%o7,16*32,	%o7
1837	movneg	%xcc,	%o5,	%i5
1838	ldx	[%o7+0*32],	%l0
1839	ldx	[%o7+8*32],	%l1
1840	ldx	[%o7+1*32],	%o4
1841	ldx	[%o7+9*32],	%o5
1842	movvs	%icc,	%o4,	%l0
1843	ldx	[%o7+2*32],	%o4
1844	movvs	%icc,	%o5,	%l1
1845	ldx	[%o7+10*32],%o5
1846	move	%icc,	%o4,	%l0
1847	ldx	[%o7+3*32],	%o4
1848	move	%icc,	%o5,	%l1
1849	ldx	[%o7+11*32],%o5
1850	movneg	%icc,	%o4,	%l0
1851	ldx	[%o7+4*32],	%o4
1852	movneg	%icc,	%o5,	%l1
1853	ldx	[%o7+12*32],%o5
1854	movcs	%xcc,	%o4,	%l0
1855	ldx	[%o7+5*32],%o4
1856	movcs	%xcc,	%o5,	%l1
1857	ldx	[%o7+13*32],%o5
1858	movvs	%xcc,	%o4,	%l0
1859	ldx	[%o7+6*32],	%o4
1860	movvs	%xcc,	%o5,	%l1
1861	ldx	[%o7+14*32],%o5
1862	move	%xcc,	%o4,	%l0
1863	ldx	[%o7+7*32],	%o4
1864	move	%xcc,	%o5,	%l1
1865	ldx	[%o7+15*32],%o5
1866	movneg	%xcc,	%o4,	%l0
1867	add	%o7,16*32,	%o7
1868	movneg	%xcc,	%o5,	%l1
1869	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1870	srax	%g4,	32,	%o4		! unpack %g4
1871	srl	%g4,	%g0,	%o5
1872	sub	%o4,	5,	%o4
1873	mov	%g3,	%i7
1874	sllx	%o4,	32,	%g4		! re-pack %g4
1875	or	%o5,	%g4,	%g4
1876	srl	%o5,	%o4,	%o5
1877	srl	%o5,	2,	%o4
1878	and	%o5,	3,	%o5
1879	and	%o4,	7,	%o4
1880	sll	%o5,	3,	%o5	! offset within first cache line
1881	add	%o5,	%i7,	%i7	! of the pwrtbl
1882	or	%g0,	1,	%o5
1883	sll	%o5,	%o4,	%o4
1884	.word	0x81b02940+8-1	! montsqr	8-1
1885	fbu,pn	%fcc3,.Labort_8
1886#ifndef	__arch64__
1887	and	%fp,%g5,%g5
1888	brz,pn	%g5,.Labort_8
1889#endif
1890	nop
1891	.word	0x81b02940+8-1	! montsqr	8-1
1892	fbu,pn	%fcc3,.Labort_8
1893#ifndef	__arch64__
1894	and	%fp,%g5,%g5
1895	brz,pn	%g5,.Labort_8
1896#endif
1897	nop
1898	.word	0x81b02940+8-1	! montsqr	8-1
1899	fbu,pn	%fcc3,.Labort_8
1900#ifndef	__arch64__
1901	and	%fp,%g5,%g5
1902	brz,pn	%g5,.Labort_8
1903#endif
1904	nop
1905	.word	0x81b02940+8-1	! montsqr	8-1
1906	fbu,pn	%fcc3,.Labort_8
1907#ifndef	__arch64__
1908	and	%fp,%g5,%g5
1909	brz,pn	%g5,.Labort_8
1910#endif
1911	nop
1912	.word	0x81b02940+8-1	! montsqr	8-1
1913	fbu,pn	%fcc3,.Labort_8
1914#ifndef	__arch64__
1915	and	%fp,%g5,%g5
1916	brz,pn	%g5,.Labort_8
1917#endif
1918	nop
1919	wr	%o4,	%g0,	%ccr
1920	.word	0x81b02920+8-1	! montmul	8-1
1921	fbu,pn	%fcc3,.Labort_8
1922#ifndef	__arch64__
1923	and	%fp,%g5,%g5
1924	brz,pn	%g5,.Labort_8
1925#endif
1926
1927	srax	%g4,	32,	%o4
1928#ifdef	__arch64__
1929	brgez	%o4,.Lstride_8
1930	restore
1931	restore
1932	restore
1933	restore
1934	restore
1935#else
1936	brgez	%o4,.Lstride_8
1937	restore;		and	%fp,%g5,%g5
1938	restore;		and	%fp,%g5,%g5
1939	restore;		and	%fp,%g5,%g5
1940	restore;		and	%fp,%g5,%g5
1941	 brz,pn	%g5,.Labort1_8
1942	restore
1943#endif
1944	.word	0x81b02310 !movxtod	%l0,%f0
1945	.word	0x85b02311 !movxtod	%l1,%f2
1946	.word	0x89b02312 !movxtod	%l2,%f4
1947	.word	0x8db02313 !movxtod	%l3,%f6
1948	.word	0x91b02314 !movxtod	%l4,%f8
1949	.word	0x95b02315 !movxtod	%l5,%f10
1950	.word	0x99b02316 !movxtod	%l6,%f12
1951	.word	0x9db02317 !movxtod	%l7,%f14
1952#ifdef	__arch64__
1953	restore
1954#else
1955	 and	%fp,%g5,%g5
1956	restore
1957	 and	%g5,1,%o7
1958	 and	%fp,%g5,%g5
1959	 srl	%fp,0,%fp		! just in case?
1960	 or	%o7,%g5,%g5
1961	brz,a,pn %g5,.Ldone_8
1962	mov	0,%i0		! return failure
1963#endif
1964	std	%f0,[%g1+0*8]
1965	std	%f2,[%g1+1*8]
1966	std	%f4,[%g1+2*8]
1967	std	%f6,[%g1+3*8]
1968	std	%f8,[%g1+4*8]
1969	std	%f10,[%g1+5*8]
1970	std	%f12,[%g1+6*8]
1971	std	%f14,[%g1+7*8]
1972	mov	1,%i0		! return success
1973.Ldone_8:
1974	ret
1975	restore
1976
1977.Labort_8:
1978	restore
1979	restore
1980	restore
1981	restore
1982	restore
1983.Labort1_8:
1984	restore
1985
1986	mov	0,%i0		! return failure
1987	ret
1988	restore
1989.type	bn_pwr5_mont_t4_8, #function
1990.size	bn_pwr5_mont_t4_8, .-bn_pwr5_mont_t4_8
1991.globl	bn_pwr5_mont_t4_16
1992.align	32
1993bn_pwr5_mont_t4_16:
1994#ifdef	__arch64__
1995	mov	0,%g5
1996	mov	-128,%g4
1997#elif defined(SPARCV9_64BIT_STACK)
1998	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
1999	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
2000	mov	-2047,%g4
2001	and	%g1,SPARCV9_64BIT_STACK,%g1
2002	movrz	%g1,0,%g4
2003	mov	-1,%g5
2004	add	%g4,-128,%g4
2005#else
2006	mov	-1,%g5
2007	mov	-128,%g4
2008#endif
2009	sllx	%g5,32,%g5
2010	save	%sp,%g4,%sp
2011#ifndef	__arch64__
2012	save	%sp,-128,%sp	! warm it up
2013	save	%sp,-128,%sp
2014	save	%sp,-128,%sp
2015	save	%sp,-128,%sp
2016	save	%sp,-128,%sp
2017	save	%sp,-128,%sp
2018	restore
2019	restore
2020	restore
2021	restore
2022	restore
2023	restore
2024#endif
2025	and	%sp,1,%g4
2026	or	%g5,%fp,%fp
2027	or	%g4,%g5,%g5
2028
2029	! copy arguments to global registers
2030	mov	%i0,%g1
2031	mov	%i1,%g2
2032	ld	[%i2+0],%f1	! load *n0
2033	ld	[%i2+4],%f0
2034	mov	%i3,%g3
2035	srl	%i4,%g0,%i4	! pack last arguments
2036	sllx	%i5,32,%g4
2037	or	%i4,%g4,%g4
2038	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
2039	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2040	ldx	[%g1+0*8],%l0
2041	ldx	[%g1+1*8],%l1
2042	ldx	[%g1+2*8],%l2
2043	ldx	[%g1+3*8],%l3
2044	ldx	[%g1+4*8],%l4
2045	ldx	[%g1+5*8],%l5
2046	ldx	[%g1+6*8],%l6
2047	ldx	[%g1+7*8],%l7
2048	ldx	[%g1+8*8],%o0
2049	ldx	[%g1+9*8],%o1
2050	ldx	[%g1+10*8],%o2
2051	ldx	[%g1+11*8],%o3
2052	ldx	[%g1+12*8],%o4
2053	ldx	[%g1+13*8],%o5
2054	ldd	[%g1+14*8],%f24
2055	ldd	[%g1+15*8],%f26
2056	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2057	ldx	[%g2+0*8],%l0
2058	ldx	[%g2+1*8],%l1
2059	ldx	[%g2+2*8],%l2
2060	ldx	[%g2+3*8],%l3
2061	ldx	[%g2+4*8],%l4
2062	ldx	[%g2+5*8],%l5
2063	ldx	[%g2+6*8],%l6
2064	ldx	[%g2+7*8],%l7
2065	ldx	[%g2+8*8],%o0
2066	ldx	[%g2+9*8],%o1
2067	ldx	[%g2+10*8],%o2
2068	ldx	[%g2+11*8],%o3
2069	ldx	[%g2+12*8],%o4
2070	ldx	[%g2+13*8],%o5
2071	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2072	ldx	[%g2+14*8],%l0
2073	ldx	[%g2+15*8],%l1
2074	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2075	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2076
2077	srlx	%g4,	32,	%o4		! unpack %g4
2078	srl	%g4,	%g0,	%o5
2079	sub	%o4,	5,	%o4
2080	mov	%g3,	%o7
2081	sllx	%o4,	32,	%g4		! re-pack %g4
2082	or	%o5,	%g4,	%g4
2083	srl	%o5,	%o4,	%o5
2084	srl	%o5,	2,	%o4
2085	and	%o5,	3,	%o5
2086	and	%o4,	7,	%o4
2087	sll	%o5,	3,	%o5	! offset within first cache line
2088	add	%o5,	%o7,	%o7	! of the pwrtbl
2089	or	%g0,	1,	%o5
2090	sll	%o5,	%o4,	%o4
2091	wr	%o4,	%g0,	%ccr
2092	b	.Lstride_16
2093	nop
2094.align	16
2095.Lstride_16:
2096	ldx	[%o7+0*32],	%i0
2097	ldx	[%o7+8*32],	%i1
2098	ldx	[%o7+1*32],	%o4
2099	ldx	[%o7+9*32],	%o5
2100	movvs	%icc,	%o4,	%i0
2101	ldx	[%o7+2*32],	%o4
2102	movvs	%icc,	%o5,	%i1
2103	ldx	[%o7+10*32],%o5
2104	move	%icc,	%o4,	%i0
2105	ldx	[%o7+3*32],	%o4
2106	move	%icc,	%o5,	%i1
2107	ldx	[%o7+11*32],%o5
2108	movneg	%icc,	%o4,	%i0
2109	ldx	[%o7+4*32],	%o4
2110	movneg	%icc,	%o5,	%i1
2111	ldx	[%o7+12*32],%o5
2112	movcs	%xcc,	%o4,	%i0
2113	ldx	[%o7+5*32],%o4
2114	movcs	%xcc,	%o5,	%i1
2115	ldx	[%o7+13*32],%o5
2116	movvs	%xcc,	%o4,	%i0
2117	ldx	[%o7+6*32],	%o4
2118	movvs	%xcc,	%o5,	%i1
2119	ldx	[%o7+14*32],%o5
2120	move	%xcc,	%o4,	%i0
2121	ldx	[%o7+7*32],	%o4
2122	move	%xcc,	%o5,	%i1
2123	ldx	[%o7+15*32],%o5
2124	movneg	%xcc,	%o4,	%i0
2125	add	%o7,16*32,	%o7
2126	movneg	%xcc,	%o5,	%i1
2127	ldx	[%o7+0*32],	%i2
2128	ldx	[%o7+8*32],	%i3
2129	ldx	[%o7+1*32],	%o4
2130	ldx	[%o7+9*32],	%o5
2131	movvs	%icc,	%o4,	%i2
2132	ldx	[%o7+2*32],	%o4
2133	movvs	%icc,	%o5,	%i3
2134	ldx	[%o7+10*32],%o5
2135	move	%icc,	%o4,	%i2
2136	ldx	[%o7+3*32],	%o4
2137	move	%icc,	%o5,	%i3
2138	ldx	[%o7+11*32],%o5
2139	movneg	%icc,	%o4,	%i2
2140	ldx	[%o7+4*32],	%o4
2141	movneg	%icc,	%o5,	%i3
2142	ldx	[%o7+12*32],%o5
2143	movcs	%xcc,	%o4,	%i2
2144	ldx	[%o7+5*32],%o4
2145	movcs	%xcc,	%o5,	%i3
2146	ldx	[%o7+13*32],%o5
2147	movvs	%xcc,	%o4,	%i2
2148	ldx	[%o7+6*32],	%o4
2149	movvs	%xcc,	%o5,	%i3
2150	ldx	[%o7+14*32],%o5
2151	move	%xcc,	%o4,	%i2
2152	ldx	[%o7+7*32],	%o4
2153	move	%xcc,	%o5,	%i3
2154	ldx	[%o7+15*32],%o5
2155	movneg	%xcc,	%o4,	%i2
2156	add	%o7,16*32,	%o7
2157	movneg	%xcc,	%o5,	%i3
2158	ldx	[%o7+0*32],	%i4
2159	ldx	[%o7+8*32],	%i5
2160	ldx	[%o7+1*32],	%o4
2161	ldx	[%o7+9*32],	%o5
2162	movvs	%icc,	%o4,	%i4
2163	ldx	[%o7+2*32],	%o4
2164	movvs	%icc,	%o5,	%i5
2165	ldx	[%o7+10*32],%o5
2166	move	%icc,	%o4,	%i4
2167	ldx	[%o7+3*32],	%o4
2168	move	%icc,	%o5,	%i5
2169	ldx	[%o7+11*32],%o5
2170	movneg	%icc,	%o4,	%i4
2171	ldx	[%o7+4*32],	%o4
2172	movneg	%icc,	%o5,	%i5
2173	ldx	[%o7+12*32],%o5
2174	movcs	%xcc,	%o4,	%i4
2175	ldx	[%o7+5*32],%o4
2176	movcs	%xcc,	%o5,	%i5
2177	ldx	[%o7+13*32],%o5
2178	movvs	%xcc,	%o4,	%i4
2179	ldx	[%o7+6*32],	%o4
2180	movvs	%xcc,	%o5,	%i5
2181	ldx	[%o7+14*32],%o5
2182	move	%xcc,	%o4,	%i4
2183	ldx	[%o7+7*32],	%o4
2184	move	%xcc,	%o5,	%i5
2185	ldx	[%o7+15*32],%o5
2186	movneg	%xcc,	%o4,	%i4
2187	add	%o7,16*32,	%o7
2188	movneg	%xcc,	%o5,	%i5
2189	ldx	[%o7+0*32],	%l0
2190	ldx	[%o7+8*32],	%l1
2191	ldx	[%o7+1*32],	%o4
2192	ldx	[%o7+9*32],	%o5
2193	movvs	%icc,	%o4,	%l0
2194	ldx	[%o7+2*32],	%o4
2195	movvs	%icc,	%o5,	%l1
2196	ldx	[%o7+10*32],%o5
2197	move	%icc,	%o4,	%l0
2198	ldx	[%o7+3*32],	%o4
2199	move	%icc,	%o5,	%l1
2200	ldx	[%o7+11*32],%o5
2201	movneg	%icc,	%o4,	%l0
2202	ldx	[%o7+4*32],	%o4
2203	movneg	%icc,	%o5,	%l1
2204	ldx	[%o7+12*32],%o5
2205	movcs	%xcc,	%o4,	%l0
2206	ldx	[%o7+5*32],%o4
2207	movcs	%xcc,	%o5,	%l1
2208	ldx	[%o7+13*32],%o5
2209	movvs	%xcc,	%o4,	%l0
2210	ldx	[%o7+6*32],	%o4
2211	movvs	%xcc,	%o5,	%l1
2212	ldx	[%o7+14*32],%o5
2213	move	%xcc,	%o4,	%l0
2214	ldx	[%o7+7*32],	%o4
2215	move	%xcc,	%o5,	%l1
2216	ldx	[%o7+15*32],%o5
2217	movneg	%xcc,	%o4,	%l0
2218	add	%o7,16*32,	%o7
2219	movneg	%xcc,	%o5,	%l1
2220	ldx	[%o7+0*32],	%l2
2221	ldx	[%o7+8*32],	%l3
2222	ldx	[%o7+1*32],	%o4
2223	ldx	[%o7+9*32],	%o5
2224	movvs	%icc,	%o4,	%l2
2225	ldx	[%o7+2*32],	%o4
2226	movvs	%icc,	%o5,	%l3
2227	ldx	[%o7+10*32],%o5
2228	move	%icc,	%o4,	%l2
2229	ldx	[%o7+3*32],	%o4
2230	move	%icc,	%o5,	%l3
2231	ldx	[%o7+11*32],%o5
2232	movneg	%icc,	%o4,	%l2
2233	ldx	[%o7+4*32],	%o4
2234	movneg	%icc,	%o5,	%l3
2235	ldx	[%o7+12*32],%o5
2236	movcs	%xcc,	%o4,	%l2
2237	ldx	[%o7+5*32],%o4
2238	movcs	%xcc,	%o5,	%l3
2239	ldx	[%o7+13*32],%o5
2240	movvs	%xcc,	%o4,	%l2
2241	ldx	[%o7+6*32],	%o4
2242	movvs	%xcc,	%o5,	%l3
2243	ldx	[%o7+14*32],%o5
2244	move	%xcc,	%o4,	%l2
2245	ldx	[%o7+7*32],	%o4
2246	move	%xcc,	%o5,	%l3
2247	ldx	[%o7+15*32],%o5
2248	movneg	%xcc,	%o4,	%l2
2249	add	%o7,16*32,	%o7
2250	movneg	%xcc,	%o5,	%l3
2251	ldx	[%o7+0*32],	%l4
2252	ldx	[%o7+8*32],	%l5
2253	ldx	[%o7+1*32],	%o4
2254	ldx	[%o7+9*32],	%o5
2255	movvs	%icc,	%o4,	%l4
2256	ldx	[%o7+2*32],	%o4
2257	movvs	%icc,	%o5,	%l5
2258	ldx	[%o7+10*32],%o5
2259	move	%icc,	%o4,	%l4
2260	ldx	[%o7+3*32],	%o4
2261	move	%icc,	%o5,	%l5
2262	ldx	[%o7+11*32],%o5
2263	movneg	%icc,	%o4,	%l4
2264	ldx	[%o7+4*32],	%o4
2265	movneg	%icc,	%o5,	%l5
2266	ldx	[%o7+12*32],%o5
2267	movcs	%xcc,	%o4,	%l4
2268	ldx	[%o7+5*32],%o4
2269	movcs	%xcc,	%o5,	%l5
2270	ldx	[%o7+13*32],%o5
2271	movvs	%xcc,	%o4,	%l4
2272	ldx	[%o7+6*32],	%o4
2273	movvs	%xcc,	%o5,	%l5
2274	ldx	[%o7+14*32],%o5
2275	move	%xcc,	%o4,	%l4
2276	ldx	[%o7+7*32],	%o4
2277	move	%xcc,	%o5,	%l5
2278	ldx	[%o7+15*32],%o5
2279	movneg	%xcc,	%o4,	%l4
2280	add	%o7,16*32,	%o7
2281	movneg	%xcc,	%o5,	%l5
2282	ldx	[%o7+0*32],	%l6
2283	ldx	[%o7+8*32],	%l7
2284	ldx	[%o7+1*32],	%o4
2285	ldx	[%o7+9*32],	%o5
2286	movvs	%icc,	%o4,	%l6
2287	ldx	[%o7+2*32],	%o4
2288	movvs	%icc,	%o5,	%l7
2289	ldx	[%o7+10*32],%o5
2290	move	%icc,	%o4,	%l6
2291	ldx	[%o7+3*32],	%o4
2292	move	%icc,	%o5,	%l7
2293	ldx	[%o7+11*32],%o5
2294	movneg	%icc,	%o4,	%l6
2295	ldx	[%o7+4*32],	%o4
2296	movneg	%icc,	%o5,	%l7
2297	ldx	[%o7+12*32],%o5
2298	movcs	%xcc,	%o4,	%l6
2299	ldx	[%o7+5*32],%o4
2300	movcs	%xcc,	%o5,	%l7
2301	ldx	[%o7+13*32],%o5
2302	movvs	%xcc,	%o4,	%l6
2303	ldx	[%o7+6*32],	%o4
2304	movvs	%xcc,	%o5,	%l7
2305	ldx	[%o7+14*32],%o5
2306	move	%xcc,	%o4,	%l6
2307	ldx	[%o7+7*32],	%o4
2308	move	%xcc,	%o5,	%l7
2309	ldx	[%o7+15*32],%o5
2310	movneg	%xcc,	%o4,	%l6
2311	add	%o7,16*32,	%o7
2312	movneg	%xcc,	%o5,	%l7
2313	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2314	ldx	[%i7+0*32],	%i0
2315	ldx	[%i7+8*32],	%i1
2316	ldx	[%i7+1*32],	%o4
2317	ldx	[%i7+9*32],	%o5
2318	movvs	%icc,	%o4,	%i0
2319	ldx	[%i7+2*32],	%o4
2320	movvs	%icc,	%o5,	%i1
2321	ldx	[%i7+10*32],%o5
2322	move	%icc,	%o4,	%i0
2323	ldx	[%i7+3*32],	%o4
2324	move	%icc,	%o5,	%i1
2325	ldx	[%i7+11*32],%o5
2326	movneg	%icc,	%o4,	%i0
2327	ldx	[%i7+4*32],	%o4
2328	movneg	%icc,	%o5,	%i1
2329	ldx	[%i7+12*32],%o5
2330	movcs	%xcc,	%o4,	%i0
2331	ldx	[%i7+5*32],%o4
2332	movcs	%xcc,	%o5,	%i1
2333	ldx	[%i7+13*32],%o5
2334	movvs	%xcc,	%o4,	%i0
2335	ldx	[%i7+6*32],	%o4
2336	movvs	%xcc,	%o5,	%i1
2337	ldx	[%i7+14*32],%o5
2338	move	%xcc,	%o4,	%i0
2339	ldx	[%i7+7*32],	%o4
2340	move	%xcc,	%o5,	%i1
2341	ldx	[%i7+15*32],%o5
2342	movneg	%xcc,	%o4,	%i0
2343	add	%i7,16*32,	%i7
2344	movneg	%xcc,	%o5,	%i1
2345	srax	%g4,	32,	%o4		! unpack %g4
2346	srl	%g4,	%g0,	%o5
2347	sub	%o4,	5,	%o4
2348	mov	%g3,	%i7
2349	sllx	%o4,	32,	%g4		! re-pack %g4
2350	or	%o5,	%g4,	%g4
2351	srl	%o5,	%o4,	%o5
2352	srl	%o5,	2,	%o4
2353	and	%o5,	3,	%o5
2354	and	%o4,	7,	%o4
2355	sll	%o5,	3,	%o5	! offset within first cache line
2356	add	%o5,	%i7,	%i7	! of the pwrtbl
2357	or	%g0,	1,	%o5
2358	sll	%o5,	%o4,	%o4
2359	.word	0x81b02940+16-1	! montsqr	16-1
2360	fbu,pn	%fcc3,.Labort_16
2361#ifndef	__arch64__
2362	and	%fp,%g5,%g5
2363	brz,pn	%g5,.Labort_16
2364#endif
2365	nop
2366	.word	0x81b02940+16-1	! montsqr	16-1
2367	fbu,pn	%fcc3,.Labort_16
2368#ifndef	__arch64__
2369	and	%fp,%g5,%g5
2370	brz,pn	%g5,.Labort_16
2371#endif
2372	nop
2373	.word	0x81b02940+16-1	! montsqr	16-1
2374	fbu,pn	%fcc3,.Labort_16
2375#ifndef	__arch64__
2376	and	%fp,%g5,%g5
2377	brz,pn	%g5,.Labort_16
2378#endif
2379	nop
2380	.word	0x81b02940+16-1	! montsqr	16-1
2381	fbu,pn	%fcc3,.Labort_16
2382#ifndef	__arch64__
2383	and	%fp,%g5,%g5
2384	brz,pn	%g5,.Labort_16
2385#endif
2386	nop
2387	.word	0x81b02940+16-1	! montsqr	16-1
2388	fbu,pn	%fcc3,.Labort_16
2389#ifndef	__arch64__
2390	and	%fp,%g5,%g5
2391	brz,pn	%g5,.Labort_16
2392#endif
2393	nop
2394	wr	%o4,	%g0,	%ccr
2395	.word	0x81b02920+16-1	! montmul	16-1
2396	fbu,pn	%fcc3,.Labort_16
2397#ifndef	__arch64__
2398	and	%fp,%g5,%g5
2399	brz,pn	%g5,.Labort_16
2400#endif
2401
2402	srax	%g4,	32,	%o4
2403#ifdef	__arch64__
2404	brgez	%o4,.Lstride_16
2405	restore
2406	restore
2407	restore
2408	restore
2409	restore
2410#else
2411	brgez	%o4,.Lstride_16
2412	restore;		and	%fp,%g5,%g5
2413	restore;		and	%fp,%g5,%g5
2414	restore;		and	%fp,%g5,%g5
2415	restore;		and	%fp,%g5,%g5
2416	 brz,pn	%g5,.Labort1_16
2417	restore
2418#endif
2419	.word	0x81b02310 !movxtod	%l0,%f0
2420	.word	0x85b02311 !movxtod	%l1,%f2
2421	.word	0x89b02312 !movxtod	%l2,%f4
2422	.word	0x8db02313 !movxtod	%l3,%f6
2423	.word	0x91b02314 !movxtod	%l4,%f8
2424	.word	0x95b02315 !movxtod	%l5,%f10
2425	.word	0x99b02316 !movxtod	%l6,%f12
2426	.word	0x9db02317 !movxtod	%l7,%f14
2427	.word	0xa1b02308 !movxtod	%o0,%f16
2428	.word	0xa5b02309 !movxtod	%o1,%f18
2429	.word	0xa9b0230a !movxtod	%o2,%f20
2430	.word	0xadb0230b !movxtod	%o3,%f22
2431	.word	0xbbb0230c !movxtod	%o4,%f60
2432	.word	0xbfb0230d !movxtod	%o5,%f62
2433#ifdef	__arch64__
2434	restore
2435#else
2436	 and	%fp,%g5,%g5
2437	restore
2438	 and	%g5,1,%o7
2439	 and	%fp,%g5,%g5
2440	 srl	%fp,0,%fp		! just in case?
2441	 or	%o7,%g5,%g5
2442	brz,a,pn %g5,.Ldone_16
2443	mov	0,%i0		! return failure
2444#endif
2445	std	%f0,[%g1+0*8]
2446	std	%f2,[%g1+1*8]
2447	std	%f4,[%g1+2*8]
2448	std	%f6,[%g1+3*8]
2449	std	%f8,[%g1+4*8]
2450	std	%f10,[%g1+5*8]
2451	std	%f12,[%g1+6*8]
2452	std	%f14,[%g1+7*8]
2453	std	%f16,[%g1+8*8]
2454	std	%f18,[%g1+9*8]
2455	std	%f20,[%g1+10*8]
2456	std	%f22,[%g1+11*8]
2457	std	%f60,[%g1+12*8]
2458	std	%f62,[%g1+13*8]
2459	std	%f24,[%g1+14*8]
2460	std	%f26,[%g1+15*8]
2461	mov	1,%i0		! return success
2462.Ldone_16:
2463	ret
2464	restore
2465
2466.Labort_16:
2467	restore
2468	restore
2469	restore
2470	restore
2471	restore
2472.Labort1_16:
2473	restore
2474
2475	mov	0,%i0		! return failure
2476	ret
2477	restore
2478.type	bn_pwr5_mont_t4_16, #function
2479.size	bn_pwr5_mont_t4_16, .-bn_pwr5_mont_t4_16
2480.globl	bn_pwr5_mont_t4_24
2481.align	32
2482bn_pwr5_mont_t4_24:
2483#ifdef	__arch64__
2484	mov	0,%g5
2485	mov	-128,%g4
2486#elif defined(SPARCV9_64BIT_STACK)
2487	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
2488	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
2489	mov	-2047,%g4
2490	and	%g1,SPARCV9_64BIT_STACK,%g1
2491	movrz	%g1,0,%g4
2492	mov	-1,%g5
2493	add	%g4,-128,%g4
2494#else
2495	mov	-1,%g5
2496	mov	-128,%g4
2497#endif
2498	sllx	%g5,32,%g5
2499	save	%sp,%g4,%sp
2500#ifndef	__arch64__
2501	save	%sp,-128,%sp	! warm it up
2502	save	%sp,-128,%sp
2503	save	%sp,-128,%sp
2504	save	%sp,-128,%sp
2505	save	%sp,-128,%sp
2506	save	%sp,-128,%sp
2507	restore
2508	restore
2509	restore
2510	restore
2511	restore
2512	restore
2513#endif
2514	and	%sp,1,%g4
2515	or	%g5,%fp,%fp
2516	or	%g4,%g5,%g5
2517
2518	! copy arguments to global registers
2519	mov	%i0,%g1
2520	mov	%i1,%g2
2521	ld	[%i2+0],%f1	! load *n0
2522	ld	[%i2+4],%f0
2523	mov	%i3,%g3
2524	srl	%i4,%g0,%i4	! pack last arguments
2525	sllx	%i5,32,%g4
2526	or	%i4,%g4,%g4
2527	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
2528	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2529	ldx	[%g1+0*8],%l0
2530	ldx	[%g1+1*8],%l1
2531	ldx	[%g1+2*8],%l2
2532	ldx	[%g1+3*8],%l3
2533	ldx	[%g1+4*8],%l4
2534	ldx	[%g1+5*8],%l5
2535	ldx	[%g1+6*8],%l6
2536	ldx	[%g1+7*8],%l7
2537	ldx	[%g1+8*8],%o0
2538	ldx	[%g1+9*8],%o1
2539	ldx	[%g1+10*8],%o2
2540	ldx	[%g1+11*8],%o3
2541	ldx	[%g1+12*8],%o4
2542	ldx	[%g1+13*8],%o5
2543	ldd	[%g1+14*8],%f24
2544	ldd	[%g1+15*8],%f26
2545	ldd	[%g1+16*8],%f28
2546	ldd	[%g1+17*8],%f30
2547	ldd	[%g1+18*8],%f32
2548	ldd	[%g1+19*8],%f34
2549	ldd	[%g1+20*8],%f36
2550	ldd	[%g1+21*8],%f38
2551	ldd	[%g1+22*8],%f40
2552	ldd	[%g1+23*8],%f42
2553	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2554	ldx	[%g2+0*8],%l0
2555	ldx	[%g2+1*8],%l1
2556	ldx	[%g2+2*8],%l2
2557	ldx	[%g2+3*8],%l3
2558	ldx	[%g2+4*8],%l4
2559	ldx	[%g2+5*8],%l5
2560	ldx	[%g2+6*8],%l6
2561	ldx	[%g2+7*8],%l7
2562	ldx	[%g2+8*8],%o0
2563	ldx	[%g2+9*8],%o1
2564	ldx	[%g2+10*8],%o2
2565	ldx	[%g2+11*8],%o3
2566	ldx	[%g2+12*8],%o4
2567	ldx	[%g2+13*8],%o5
2568	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2569	ldx	[%g2+14*8],%l0
2570	ldx	[%g2+15*8],%l1
2571	ldx	[%g2+16*8],%l2
2572	ldx	[%g2+17*8],%l3
2573	ldx	[%g2+18*8],%l4
2574	ldx	[%g2+19*8],%l5
2575	ldx	[%g2+20*8],%l6
2576	ldx	[%g2+21*8],%l7
2577	ldx	[%g2+22*8],%o0
2578	ldx	[%g2+23*8],%o1
2579	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2580	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2581
2582	srlx	%g4,	32,	%o4		! unpack %g4
2583	srl	%g4,	%g0,	%o5
2584	sub	%o4,	5,	%o4
2585	mov	%g3,	%o7
2586	sllx	%o4,	32,	%g4		! re-pack %g4
2587	or	%o5,	%g4,	%g4
2588	srl	%o5,	%o4,	%o5
2589	srl	%o5,	2,	%o4
2590	and	%o5,	3,	%o5
2591	and	%o4,	7,	%o4
2592	sll	%o5,	3,	%o5	! offset within first cache line
2593	add	%o5,	%o7,	%o7	! of the pwrtbl
2594	or	%g0,	1,	%o5
2595	sll	%o5,	%o4,	%o4
2596	wr	%o4,	%g0,	%ccr
2597	b	.Lstride_24
2598	nop
2599.align	16
2600.Lstride_24:
2601	ldx	[%o7+0*32],	%i0
2602	ldx	[%o7+8*32],	%i1
2603	ldx	[%o7+1*32],	%o4
2604	ldx	[%o7+9*32],	%o5
2605	movvs	%icc,	%o4,	%i0
2606	ldx	[%o7+2*32],	%o4
2607	movvs	%icc,	%o5,	%i1
2608	ldx	[%o7+10*32],%o5
2609	move	%icc,	%o4,	%i0
2610	ldx	[%o7+3*32],	%o4
2611	move	%icc,	%o5,	%i1
2612	ldx	[%o7+11*32],%o5
2613	movneg	%icc,	%o4,	%i0
2614	ldx	[%o7+4*32],	%o4
2615	movneg	%icc,	%o5,	%i1
2616	ldx	[%o7+12*32],%o5
2617	movcs	%xcc,	%o4,	%i0
2618	ldx	[%o7+5*32],%o4
2619	movcs	%xcc,	%o5,	%i1
2620	ldx	[%o7+13*32],%o5
2621	movvs	%xcc,	%o4,	%i0
2622	ldx	[%o7+6*32],	%o4
2623	movvs	%xcc,	%o5,	%i1
2624	ldx	[%o7+14*32],%o5
2625	move	%xcc,	%o4,	%i0
2626	ldx	[%o7+7*32],	%o4
2627	move	%xcc,	%o5,	%i1
2628	ldx	[%o7+15*32],%o5
2629	movneg	%xcc,	%o4,	%i0
2630	add	%o7,16*32,	%o7
2631	movneg	%xcc,	%o5,	%i1
2632	ldx	[%o7+0*32],	%i2
2633	ldx	[%o7+8*32],	%i3
2634	ldx	[%o7+1*32],	%o4
2635	ldx	[%o7+9*32],	%o5
2636	movvs	%icc,	%o4,	%i2
2637	ldx	[%o7+2*32],	%o4
2638	movvs	%icc,	%o5,	%i3
2639	ldx	[%o7+10*32],%o5
2640	move	%icc,	%o4,	%i2
2641	ldx	[%o7+3*32],	%o4
2642	move	%icc,	%o5,	%i3
2643	ldx	[%o7+11*32],%o5
2644	movneg	%icc,	%o4,	%i2
2645	ldx	[%o7+4*32],	%o4
2646	movneg	%icc,	%o5,	%i3
2647	ldx	[%o7+12*32],%o5
2648	movcs	%xcc,	%o4,	%i2
2649	ldx	[%o7+5*32],%o4
2650	movcs	%xcc,	%o5,	%i3
2651	ldx	[%o7+13*32],%o5
2652	movvs	%xcc,	%o4,	%i2
2653	ldx	[%o7+6*32],	%o4
2654	movvs	%xcc,	%o5,	%i3
2655	ldx	[%o7+14*32],%o5
2656	move	%xcc,	%o4,	%i2
2657	ldx	[%o7+7*32],	%o4
2658	move	%xcc,	%o5,	%i3
2659	ldx	[%o7+15*32],%o5
2660	movneg	%xcc,	%o4,	%i2
2661	add	%o7,16*32,	%o7
2662	movneg	%xcc,	%o5,	%i3
2663	ldx	[%o7+0*32],	%i4
2664	ldx	[%o7+8*32],	%i5
2665	ldx	[%o7+1*32],	%o4
2666	ldx	[%o7+9*32],	%o5
2667	movvs	%icc,	%o4,	%i4
2668	ldx	[%o7+2*32],	%o4
2669	movvs	%icc,	%o5,	%i5
2670	ldx	[%o7+10*32],%o5
2671	move	%icc,	%o4,	%i4
2672	ldx	[%o7+3*32],	%o4
2673	move	%icc,	%o5,	%i5
2674	ldx	[%o7+11*32],%o5
2675	movneg	%icc,	%o4,	%i4
2676	ldx	[%o7+4*32],	%o4
2677	movneg	%icc,	%o5,	%i5
2678	ldx	[%o7+12*32],%o5
2679	movcs	%xcc,	%o4,	%i4
2680	ldx	[%o7+5*32],%o4
2681	movcs	%xcc,	%o5,	%i5
2682	ldx	[%o7+13*32],%o5
2683	movvs	%xcc,	%o4,	%i4
2684	ldx	[%o7+6*32],	%o4
2685	movvs	%xcc,	%o5,	%i5
2686	ldx	[%o7+14*32],%o5
2687	move	%xcc,	%o4,	%i4
2688	ldx	[%o7+7*32],	%o4
2689	move	%xcc,	%o5,	%i5
2690	ldx	[%o7+15*32],%o5
2691	movneg	%xcc,	%o4,	%i4
2692	add	%o7,16*32,	%o7
2693	movneg	%xcc,	%o5,	%i5
2694	ldx	[%o7+0*32],	%l0
2695	ldx	[%o7+8*32],	%l1
2696	ldx	[%o7+1*32],	%o4
2697	ldx	[%o7+9*32],	%o5
2698	movvs	%icc,	%o4,	%l0
2699	ldx	[%o7+2*32],	%o4
2700	movvs	%icc,	%o5,	%l1
2701	ldx	[%o7+10*32],%o5
2702	move	%icc,	%o4,	%l0
2703	ldx	[%o7+3*32],	%o4
2704	move	%icc,	%o5,	%l1
2705	ldx	[%o7+11*32],%o5
2706	movneg	%icc,	%o4,	%l0
2707	ldx	[%o7+4*32],	%o4
2708	movneg	%icc,	%o5,	%l1
2709	ldx	[%o7+12*32],%o5
2710	movcs	%xcc,	%o4,	%l0
2711	ldx	[%o7+5*32],%o4
2712	movcs	%xcc,	%o5,	%l1
2713	ldx	[%o7+13*32],%o5
2714	movvs	%xcc,	%o4,	%l0
2715	ldx	[%o7+6*32],	%o4
2716	movvs	%xcc,	%o5,	%l1
2717	ldx	[%o7+14*32],%o5
2718	move	%xcc,	%o4,	%l0
2719	ldx	[%o7+7*32],	%o4
2720	move	%xcc,	%o5,	%l1
2721	ldx	[%o7+15*32],%o5
2722	movneg	%xcc,	%o4,	%l0
2723	add	%o7,16*32,	%o7
2724	movneg	%xcc,	%o5,	%l1
2725	ldx	[%o7+0*32],	%l2
2726	ldx	[%o7+8*32],	%l3
2727	ldx	[%o7+1*32],	%o4
2728	ldx	[%o7+9*32],	%o5
2729	movvs	%icc,	%o4,	%l2
2730	ldx	[%o7+2*32],	%o4
2731	movvs	%icc,	%o5,	%l3
2732	ldx	[%o7+10*32],%o5
2733	move	%icc,	%o4,	%l2
2734	ldx	[%o7+3*32],	%o4
2735	move	%icc,	%o5,	%l3
2736	ldx	[%o7+11*32],%o5
2737	movneg	%icc,	%o4,	%l2
2738	ldx	[%o7+4*32],	%o4
2739	movneg	%icc,	%o5,	%l3
2740	ldx	[%o7+12*32],%o5
2741	movcs	%xcc,	%o4,	%l2
2742	ldx	[%o7+5*32],%o4
2743	movcs	%xcc,	%o5,	%l3
2744	ldx	[%o7+13*32],%o5
2745	movvs	%xcc,	%o4,	%l2
2746	ldx	[%o7+6*32],	%o4
2747	movvs	%xcc,	%o5,	%l3
2748	ldx	[%o7+14*32],%o5
2749	move	%xcc,	%o4,	%l2
2750	ldx	[%o7+7*32],	%o4
2751	move	%xcc,	%o5,	%l3
2752	ldx	[%o7+15*32],%o5
2753	movneg	%xcc,	%o4,	%l2
2754	add	%o7,16*32,	%o7
2755	movneg	%xcc,	%o5,	%l3
2756	ldx	[%o7+0*32],	%l4
2757	ldx	[%o7+8*32],	%l5
2758	ldx	[%o7+1*32],	%o4
2759	ldx	[%o7+9*32],	%o5
2760	movvs	%icc,	%o4,	%l4
2761	ldx	[%o7+2*32],	%o4
2762	movvs	%icc,	%o5,	%l5
2763	ldx	[%o7+10*32],%o5
2764	move	%icc,	%o4,	%l4
2765	ldx	[%o7+3*32],	%o4
2766	move	%icc,	%o5,	%l5
2767	ldx	[%o7+11*32],%o5
2768	movneg	%icc,	%o4,	%l4
2769	ldx	[%o7+4*32],	%o4
2770	movneg	%icc,	%o5,	%l5
2771	ldx	[%o7+12*32],%o5
2772	movcs	%xcc,	%o4,	%l4
2773	ldx	[%o7+5*32],%o4
2774	movcs	%xcc,	%o5,	%l5
2775	ldx	[%o7+13*32],%o5
2776	movvs	%xcc,	%o4,	%l4
2777	ldx	[%o7+6*32],	%o4
2778	movvs	%xcc,	%o5,	%l5
2779	ldx	[%o7+14*32],%o5
2780	move	%xcc,	%o4,	%l4
2781	ldx	[%o7+7*32],	%o4
2782	move	%xcc,	%o5,	%l5
2783	ldx	[%o7+15*32],%o5
2784	movneg	%xcc,	%o4,	%l4
2785	add	%o7,16*32,	%o7
2786	movneg	%xcc,	%o5,	%l5
2787	ldx	[%o7+0*32],	%l6
2788	ldx	[%o7+8*32],	%l7
2789	ldx	[%o7+1*32],	%o4
2790	ldx	[%o7+9*32],	%o5
2791	movvs	%icc,	%o4,	%l6
2792	ldx	[%o7+2*32],	%o4
2793	movvs	%icc,	%o5,	%l7
2794	ldx	[%o7+10*32],%o5
2795	move	%icc,	%o4,	%l6
2796	ldx	[%o7+3*32],	%o4
2797	move	%icc,	%o5,	%l7
2798	ldx	[%o7+11*32],%o5
2799	movneg	%icc,	%o4,	%l6
2800	ldx	[%o7+4*32],	%o4
2801	movneg	%icc,	%o5,	%l7
2802	ldx	[%o7+12*32],%o5
2803	movcs	%xcc,	%o4,	%l6
2804	ldx	[%o7+5*32],%o4
2805	movcs	%xcc,	%o5,	%l7
2806	ldx	[%o7+13*32],%o5
2807	movvs	%xcc,	%o4,	%l6
2808	ldx	[%o7+6*32],	%o4
2809	movvs	%xcc,	%o5,	%l7
2810	ldx	[%o7+14*32],%o5
2811	move	%xcc,	%o4,	%l6
2812	ldx	[%o7+7*32],	%o4
2813	move	%xcc,	%o5,	%l7
2814	ldx	[%o7+15*32],%o5
2815	movneg	%xcc,	%o4,	%l6
2816	add	%o7,16*32,	%o7
2817	movneg	%xcc,	%o5,	%l7
2818	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2819	ldx	[%i7+0*32],	%i0
2820	ldx	[%i7+8*32],	%i1
2821	ldx	[%i7+1*32],	%o4
2822	ldx	[%i7+9*32],	%o5
2823	movvs	%icc,	%o4,	%i0
2824	ldx	[%i7+2*32],	%o4
2825	movvs	%icc,	%o5,	%i1
2826	ldx	[%i7+10*32],%o5
2827	move	%icc,	%o4,	%i0
2828	ldx	[%i7+3*32],	%o4
2829	move	%icc,	%o5,	%i1
2830	ldx	[%i7+11*32],%o5
2831	movneg	%icc,	%o4,	%i0
2832	ldx	[%i7+4*32],	%o4
2833	movneg	%icc,	%o5,	%i1
2834	ldx	[%i7+12*32],%o5
2835	movcs	%xcc,	%o4,	%i0
2836	ldx	[%i7+5*32],%o4
2837	movcs	%xcc,	%o5,	%i1
2838	ldx	[%i7+13*32],%o5
2839	movvs	%xcc,	%o4,	%i0
2840	ldx	[%i7+6*32],	%o4
2841	movvs	%xcc,	%o5,	%i1
2842	ldx	[%i7+14*32],%o5
2843	move	%xcc,	%o4,	%i0
2844	ldx	[%i7+7*32],	%o4
2845	move	%xcc,	%o5,	%i1
2846	ldx	[%i7+15*32],%o5
2847	movneg	%xcc,	%o4,	%i0
2848	add	%i7,16*32,	%i7
2849	movneg	%xcc,	%o5,	%i1
2850	ldx	[%i7+0*32],	%i2
2851	ldx	[%i7+8*32],	%i3
2852	ldx	[%i7+1*32],	%o4
2853	ldx	[%i7+9*32],	%o5
2854	movvs	%icc,	%o4,	%i2
2855	ldx	[%i7+2*32],	%o4
2856	movvs	%icc,	%o5,	%i3
2857	ldx	[%i7+10*32],%o5
2858	move	%icc,	%o4,	%i2
2859	ldx	[%i7+3*32],	%o4
2860	move	%icc,	%o5,	%i3
2861	ldx	[%i7+11*32],%o5
2862	movneg	%icc,	%o4,	%i2
2863	ldx	[%i7+4*32],	%o4
2864	movneg	%icc,	%o5,	%i3
2865	ldx	[%i7+12*32],%o5
2866	movcs	%xcc,	%o4,	%i2
2867	ldx	[%i7+5*32],%o4
2868	movcs	%xcc,	%o5,	%i3
2869	ldx	[%i7+13*32],%o5
2870	movvs	%xcc,	%o4,	%i2
2871	ldx	[%i7+6*32],	%o4
2872	movvs	%xcc,	%o5,	%i3
2873	ldx	[%i7+14*32],%o5
2874	move	%xcc,	%o4,	%i2
2875	ldx	[%i7+7*32],	%o4
2876	move	%xcc,	%o5,	%i3
2877	ldx	[%i7+15*32],%o5
2878	movneg	%xcc,	%o4,	%i2
2879	add	%i7,16*32,	%i7
2880	movneg	%xcc,	%o5,	%i3
2881	ldx	[%i7+0*32],	%i4
2882	ldx	[%i7+8*32],	%i5
2883	ldx	[%i7+1*32],	%o4
2884	ldx	[%i7+9*32],	%o5
2885	movvs	%icc,	%o4,	%i4
2886	ldx	[%i7+2*32],	%o4
2887	movvs	%icc,	%o5,	%i5
2888	ldx	[%i7+10*32],%o5
2889	move	%icc,	%o4,	%i4
2890	ldx	[%i7+3*32],	%o4
2891	move	%icc,	%o5,	%i5
2892	ldx	[%i7+11*32],%o5
2893	movneg	%icc,	%o4,	%i4
2894	ldx	[%i7+4*32],	%o4
2895	movneg	%icc,	%o5,	%i5
2896	ldx	[%i7+12*32],%o5
2897	movcs	%xcc,	%o4,	%i4
2898	ldx	[%i7+5*32],%o4
2899	movcs	%xcc,	%o5,	%i5
2900	ldx	[%i7+13*32],%o5
2901	movvs	%xcc,	%o4,	%i4
2902	ldx	[%i7+6*32],	%o4
2903	movvs	%xcc,	%o5,	%i5
2904	ldx	[%i7+14*32],%o5
2905	move	%xcc,	%o4,	%i4
2906	ldx	[%i7+7*32],	%o4
2907	move	%xcc,	%o5,	%i5
2908	ldx	[%i7+15*32],%o5
2909	movneg	%xcc,	%o4,	%i4
2910	add	%i7,16*32,	%i7
2911	movneg	%xcc,	%o5,	%i5
2912	ldx	[%i7+0*32],	%l0
2913	ldx	[%i7+8*32],	%l1
2914	ldx	[%i7+1*32],	%o4
2915	ldx	[%i7+9*32],	%o5
2916	movvs	%icc,	%o4,	%l0
2917	ldx	[%i7+2*32],	%o4
2918	movvs	%icc,	%o5,	%l1
2919	ldx	[%i7+10*32],%o5
2920	move	%icc,	%o4,	%l0
2921	ldx	[%i7+3*32],	%o4
2922	move	%icc,	%o5,	%l1
2923	ldx	[%i7+11*32],%o5
2924	movneg	%icc,	%o4,	%l0
2925	ldx	[%i7+4*32],	%o4
2926	movneg	%icc,	%o5,	%l1
2927	ldx	[%i7+12*32],%o5
2928	movcs	%xcc,	%o4,	%l0
2929	ldx	[%i7+5*32],%o4
2930	movcs	%xcc,	%o5,	%l1
2931	ldx	[%i7+13*32],%o5
2932	movvs	%xcc,	%o4,	%l0
2933	ldx	[%i7+6*32],	%o4
2934	movvs	%xcc,	%o5,	%l1
2935	ldx	[%i7+14*32],%o5
2936	move	%xcc,	%o4,	%l0
2937	ldx	[%i7+7*32],	%o4
2938	move	%xcc,	%o5,	%l1
2939	ldx	[%i7+15*32],%o5
2940	movneg	%xcc,	%o4,	%l0
2941	add	%i7,16*32,	%i7
2942	movneg	%xcc,	%o5,	%l1
2943	ldx	[%i7+0*32],	%l2
2944	ldx	[%i7+8*32],	%l3
2945	ldx	[%i7+1*32],	%o4
2946	ldx	[%i7+9*32],	%o5
2947	movvs	%icc,	%o4,	%l2
2948	ldx	[%i7+2*32],	%o4
2949	movvs	%icc,	%o5,	%l3
2950	ldx	[%i7+10*32],%o5
2951	move	%icc,	%o4,	%l2
2952	ldx	[%i7+3*32],	%o4
2953	move	%icc,	%o5,	%l3
2954	ldx	[%i7+11*32],%o5
2955	movneg	%icc,	%o4,	%l2
2956	ldx	[%i7+4*32],	%o4
2957	movneg	%icc,	%o5,	%l3
2958	ldx	[%i7+12*32],%o5
2959	movcs	%xcc,	%o4,	%l2
2960	ldx	[%i7+5*32],%o4
2961	movcs	%xcc,	%o5,	%l3
2962	ldx	[%i7+13*32],%o5
2963	movvs	%xcc,	%o4,	%l2
2964	ldx	[%i7+6*32],	%o4
2965	movvs	%xcc,	%o5,	%l3
2966	ldx	[%i7+14*32],%o5
2967	move	%xcc,	%o4,	%l2
2968	ldx	[%i7+7*32],	%o4
2969	move	%xcc,	%o5,	%l3
2970	ldx	[%i7+15*32],%o5
2971	movneg	%xcc,	%o4,	%l2
2972	add	%i7,16*32,	%i7
2973	movneg	%xcc,	%o5,	%l3
2974	srax	%g4,	32,	%o4		! unpack %g4
2975	srl	%g4,	%g0,	%o5
2976	sub	%o4,	5,	%o4
2977	mov	%g3,	%i7
2978	sllx	%o4,	32,	%g4		! re-pack %g4
2979	or	%o5,	%g4,	%g4
2980	srl	%o5,	%o4,	%o5
2981	srl	%o5,	2,	%o4
2982	and	%o5,	3,	%o5
2983	and	%o4,	7,	%o4
2984	sll	%o5,	3,	%o5	! offset within first cache line
2985	add	%o5,	%i7,	%i7	! of the pwrtbl
2986	or	%g0,	1,	%o5
2987	sll	%o5,	%o4,	%o4
2988	.word	0x81b02940+24-1	! montsqr	24-1
2989	fbu,pn	%fcc3,.Labort_24
2990#ifndef	__arch64__
2991	and	%fp,%g5,%g5
2992	brz,pn	%g5,.Labort_24
2993#endif
2994	nop
2995	.word	0x81b02940+24-1	! montsqr	24-1
2996	fbu,pn	%fcc3,.Labort_24
2997#ifndef	__arch64__
2998	and	%fp,%g5,%g5
2999	brz,pn	%g5,.Labort_24
3000#endif
3001	nop
3002	.word	0x81b02940+24-1	! montsqr	24-1
3003	fbu,pn	%fcc3,.Labort_24
3004#ifndef	__arch64__
3005	and	%fp,%g5,%g5
3006	brz,pn	%g5,.Labort_24
3007#endif
3008	nop
3009	.word	0x81b02940+24-1	! montsqr	24-1
3010	fbu,pn	%fcc3,.Labort_24
3011#ifndef	__arch64__
3012	and	%fp,%g5,%g5
3013	brz,pn	%g5,.Labort_24
3014#endif
3015	nop
3016	.word	0x81b02940+24-1	! montsqr	24-1
3017	fbu,pn	%fcc3,.Labort_24
3018#ifndef	__arch64__
3019	and	%fp,%g5,%g5
3020	brz,pn	%g5,.Labort_24
3021#endif
3022	nop
3023	wr	%o4,	%g0,	%ccr
3024	.word	0x81b02920+24-1	! montmul	24-1
3025	fbu,pn	%fcc3,.Labort_24
3026#ifndef	__arch64__
3027	and	%fp,%g5,%g5
3028	brz,pn	%g5,.Labort_24
3029#endif
3030
3031	srax	%g4,	32,	%o4
3032#ifdef	__arch64__
3033	brgez	%o4,.Lstride_24
3034	restore
3035	restore
3036	restore
3037	restore
3038	restore
3039#else
3040	brgez	%o4,.Lstride_24
3041	restore;		and	%fp,%g5,%g5
3042	restore;		and	%fp,%g5,%g5
3043	restore;		and	%fp,%g5,%g5
3044	restore;		and	%fp,%g5,%g5
3045	 brz,pn	%g5,.Labort1_24
3046	restore
3047#endif
3048	.word	0x81b02310 !movxtod	%l0,%f0
3049	.word	0x85b02311 !movxtod	%l1,%f2
3050	.word	0x89b02312 !movxtod	%l2,%f4
3051	.word	0x8db02313 !movxtod	%l3,%f6
3052	.word	0x91b02314 !movxtod	%l4,%f8
3053	.word	0x95b02315 !movxtod	%l5,%f10
3054	.word	0x99b02316 !movxtod	%l6,%f12
3055	.word	0x9db02317 !movxtod	%l7,%f14
3056	.word	0xa1b02308 !movxtod	%o0,%f16
3057	.word	0xa5b02309 !movxtod	%o1,%f18
3058	.word	0xa9b0230a !movxtod	%o2,%f20
3059	.word	0xadb0230b !movxtod	%o3,%f22
3060	.word	0xbbb0230c !movxtod	%o4,%f60
3061	.word	0xbfb0230d !movxtod	%o5,%f62
3062#ifdef	__arch64__
3063	restore
3064#else
3065	 and	%fp,%g5,%g5
3066	restore
3067	 and	%g5,1,%o7
3068	 and	%fp,%g5,%g5
3069	 srl	%fp,0,%fp		! just in case?
3070	 or	%o7,%g5,%g5
3071	brz,a,pn %g5,.Ldone_24
3072	mov	0,%i0		! return failure
3073#endif
3074	std	%f0,[%g1+0*8]
3075	std	%f2,[%g1+1*8]
3076	std	%f4,[%g1+2*8]
3077	std	%f6,[%g1+3*8]
3078	std	%f8,[%g1+4*8]
3079	std	%f10,[%g1+5*8]
3080	std	%f12,[%g1+6*8]
3081	std	%f14,[%g1+7*8]
3082	std	%f16,[%g1+8*8]
3083	std	%f18,[%g1+9*8]
3084	std	%f20,[%g1+10*8]
3085	std	%f22,[%g1+11*8]
3086	std	%f60,[%g1+12*8]
3087	std	%f62,[%g1+13*8]
3088	std	%f24,[%g1+14*8]
3089	std	%f26,[%g1+15*8]
3090	std	%f28,[%g1+16*8]
3091	std	%f30,[%g1+17*8]
3092	std	%f32,[%g1+18*8]
3093	std	%f34,[%g1+19*8]
3094	std	%f36,[%g1+20*8]
3095	std	%f38,[%g1+21*8]
3096	std	%f40,[%g1+22*8]
3097	std	%f42,[%g1+23*8]
3098	mov	1,%i0		! return success
3099.Ldone_24:
3100	ret
3101	restore
3102
3103.Labort_24:
3104	restore
3105	restore
3106	restore
3107	restore
3108	restore
3109.Labort1_24:
3110	restore
3111
3112	mov	0,%i0		! return failure
3113	ret
3114	restore
3115.type	bn_pwr5_mont_t4_24, #function
3116.size	bn_pwr5_mont_t4_24, .-bn_pwr5_mont_t4_24
3117.globl	bn_pwr5_mont_t4_32
3118.align	32
3119bn_pwr5_mont_t4_32:
3120#ifdef	__arch64__
3121	mov	0,%g5
3122	mov	-128,%g4
3123#elif defined(SPARCV9_64BIT_STACK)
3124	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
3125	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
3126	mov	-2047,%g4
3127	and	%g1,SPARCV9_64BIT_STACK,%g1
3128	movrz	%g1,0,%g4
3129	mov	-1,%g5
3130	add	%g4,-128,%g4
3131#else
3132	mov	-1,%g5
3133	mov	-128,%g4
3134#endif
3135	sllx	%g5,32,%g5
3136	save	%sp,%g4,%sp
3137#ifndef	__arch64__
3138	save	%sp,-128,%sp	! warm it up
3139	save	%sp,-128,%sp
3140	save	%sp,-128,%sp
3141	save	%sp,-128,%sp
3142	save	%sp,-128,%sp
3143	save	%sp,-128,%sp
3144	restore
3145	restore
3146	restore
3147	restore
3148	restore
3149	restore
3150#endif
3151	and	%sp,1,%g4
3152	or	%g5,%fp,%fp
3153	or	%g4,%g5,%g5
3154
3155	! copy arguments to global registers
3156	mov	%i0,%g1
3157	mov	%i1,%g2
3158	ld	[%i2+0],%f1	! load *n0
3159	ld	[%i2+4],%f0
3160	mov	%i3,%g3
3161	srl	%i4,%g0,%i4	! pack last arguments
3162	sllx	%i5,32,%g4
3163	or	%i4,%g4,%g4
3164	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
3165	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3166	ldx	[%g1+0*8],%l0
3167	ldx	[%g1+1*8],%l1
3168	ldx	[%g1+2*8],%l2
3169	ldx	[%g1+3*8],%l3
3170	ldx	[%g1+4*8],%l4
3171	ldx	[%g1+5*8],%l5
3172	ldx	[%g1+6*8],%l6
3173	ldx	[%g1+7*8],%l7
3174	ldx	[%g1+8*8],%o0
3175	ldx	[%g1+9*8],%o1
3176	ldx	[%g1+10*8],%o2
3177	ldx	[%g1+11*8],%o3
3178	ldx	[%g1+12*8],%o4
3179	ldx	[%g1+13*8],%o5
3180	ldd	[%g1+14*8],%f24
3181	ldd	[%g1+15*8],%f26
3182	ldd	[%g1+16*8],%f28
3183	ldd	[%g1+17*8],%f30
3184	ldd	[%g1+18*8],%f32
3185	ldd	[%g1+19*8],%f34
3186	ldd	[%g1+20*8],%f36
3187	ldd	[%g1+21*8],%f38
3188	ldd	[%g1+22*8],%f40
3189	ldd	[%g1+23*8],%f42
3190	ldd	[%g1+24*8],%f44
3191	ldd	[%g1+25*8],%f46
3192	ldd	[%g1+26*8],%f48
3193	ldd	[%g1+27*8],%f50
3194	ldd	[%g1+28*8],%f52
3195	ldd	[%g1+29*8],%f54
3196	ldd	[%g1+30*8],%f56
3197	ldd	[%g1+31*8],%f58
3198	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3199	ldx	[%g2+0*8],%l0
3200	ldx	[%g2+1*8],%l1
3201	ldx	[%g2+2*8],%l2
3202	ldx	[%g2+3*8],%l3
3203	ldx	[%g2+4*8],%l4
3204	ldx	[%g2+5*8],%l5
3205	ldx	[%g2+6*8],%l6
3206	ldx	[%g2+7*8],%l7
3207	ldx	[%g2+8*8],%o0
3208	ldx	[%g2+9*8],%o1
3209	ldx	[%g2+10*8],%o2
3210	ldx	[%g2+11*8],%o3
3211	ldx	[%g2+12*8],%o4
3212	ldx	[%g2+13*8],%o5
3213	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3214	ldx	[%g2+14*8],%l0
3215	ldx	[%g2+15*8],%l1
3216	ldx	[%g2+16*8],%l2
3217	ldx	[%g2+17*8],%l3
3218	ldx	[%g2+18*8],%l4
3219	ldx	[%g2+19*8],%l5
3220	ldx	[%g2+20*8],%l6
3221	ldx	[%g2+21*8],%l7
3222	ldx	[%g2+22*8],%o0
3223	ldx	[%g2+23*8],%o1
3224	ldx	[%g2+24*8],%o2
3225	ldx	[%g2+25*8],%o3
3226	ldx	[%g2+26*8],%o4
3227	ldx	[%g2+27*8],%o5
3228	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3229	ldx	[%g2+28*8],%l0
3230	ldx	[%g2+29*8],%l1
3231	ldx	[%g2+30*8],%l2
3232	ldx	[%g2+31*8],%l3
3233	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3234
3235	srlx	%g4,	32,	%o4		! unpack %g4
3236	srl	%g4,	%g0,	%o5
3237	sub	%o4,	5,	%o4
3238	mov	%g3,	%o7
3239	sllx	%o4,	32,	%g4		! re-pack %g4
3240	or	%o5,	%g4,	%g4
3241	srl	%o5,	%o4,	%o5
3242	srl	%o5,	2,	%o4
3243	and	%o5,	3,	%o5
3244	and	%o4,	7,	%o4
3245	sll	%o5,	3,	%o5	! offset within first cache line
3246	add	%o5,	%o7,	%o7	! of the pwrtbl
3247	or	%g0,	1,	%o5
3248	sll	%o5,	%o4,	%o4
3249	wr	%o4,	%g0,	%ccr
3250	b	.Lstride_32
3251	nop
3252.align	16
3253.Lstride_32:
3254	ldx	[%o7+0*32],	%i0
3255	ldx	[%o7+8*32],	%i1
3256	ldx	[%o7+1*32],	%o4
3257	ldx	[%o7+9*32],	%o5
3258	movvs	%icc,	%o4,	%i0
3259	ldx	[%o7+2*32],	%o4
3260	movvs	%icc,	%o5,	%i1
3261	ldx	[%o7+10*32],%o5
3262	move	%icc,	%o4,	%i0
3263	ldx	[%o7+3*32],	%o4
3264	move	%icc,	%o5,	%i1
3265	ldx	[%o7+11*32],%o5
3266	movneg	%icc,	%o4,	%i0
3267	ldx	[%o7+4*32],	%o4
3268	movneg	%icc,	%o5,	%i1
3269	ldx	[%o7+12*32],%o5
3270	movcs	%xcc,	%o4,	%i0
3271	ldx	[%o7+5*32],%o4
3272	movcs	%xcc,	%o5,	%i1
3273	ldx	[%o7+13*32],%o5
3274	movvs	%xcc,	%o4,	%i0
3275	ldx	[%o7+6*32],	%o4
3276	movvs	%xcc,	%o5,	%i1
3277	ldx	[%o7+14*32],%o5
3278	move	%xcc,	%o4,	%i0
3279	ldx	[%o7+7*32],	%o4
3280	move	%xcc,	%o5,	%i1
3281	ldx	[%o7+15*32],%o5
3282	movneg	%xcc,	%o4,	%i0
3283	add	%o7,16*32,	%o7
3284	movneg	%xcc,	%o5,	%i1
3285	ldx	[%o7+0*32],	%i2
3286	ldx	[%o7+8*32],	%i3
3287	ldx	[%o7+1*32],	%o4
3288	ldx	[%o7+9*32],	%o5
3289	movvs	%icc,	%o4,	%i2
3290	ldx	[%o7+2*32],	%o4
3291	movvs	%icc,	%o5,	%i3
3292	ldx	[%o7+10*32],%o5
3293	move	%icc,	%o4,	%i2
3294	ldx	[%o7+3*32],	%o4
3295	move	%icc,	%o5,	%i3
3296	ldx	[%o7+11*32],%o5
3297	movneg	%icc,	%o4,	%i2
3298	ldx	[%o7+4*32],	%o4
3299	movneg	%icc,	%o5,	%i3
3300	ldx	[%o7+12*32],%o5
3301	movcs	%xcc,	%o4,	%i2
3302	ldx	[%o7+5*32],%o4
3303	movcs	%xcc,	%o5,	%i3
3304	ldx	[%o7+13*32],%o5
3305	movvs	%xcc,	%o4,	%i2
3306	ldx	[%o7+6*32],	%o4
3307	movvs	%xcc,	%o5,	%i3
3308	ldx	[%o7+14*32],%o5
3309	move	%xcc,	%o4,	%i2
3310	ldx	[%o7+7*32],	%o4
3311	move	%xcc,	%o5,	%i3
3312	ldx	[%o7+15*32],%o5
3313	movneg	%xcc,	%o4,	%i2
3314	add	%o7,16*32,	%o7
3315	movneg	%xcc,	%o5,	%i3
3316	ldx	[%o7+0*32],	%i4
3317	ldx	[%o7+8*32],	%i5
3318	ldx	[%o7+1*32],	%o4
3319	ldx	[%o7+9*32],	%o5
3320	movvs	%icc,	%o4,	%i4
3321	ldx	[%o7+2*32],	%o4
3322	movvs	%icc,	%o5,	%i5
3323	ldx	[%o7+10*32],%o5
3324	move	%icc,	%o4,	%i4
3325	ldx	[%o7+3*32],	%o4
3326	move	%icc,	%o5,	%i5
3327	ldx	[%o7+11*32],%o5
3328	movneg	%icc,	%o4,	%i4
3329	ldx	[%o7+4*32],	%o4
3330	movneg	%icc,	%o5,	%i5
3331	ldx	[%o7+12*32],%o5
3332	movcs	%xcc,	%o4,	%i4
3333	ldx	[%o7+5*32],%o4
3334	movcs	%xcc,	%o5,	%i5
3335	ldx	[%o7+13*32],%o5
3336	movvs	%xcc,	%o4,	%i4
3337	ldx	[%o7+6*32],	%o4
3338	movvs	%xcc,	%o5,	%i5
3339	ldx	[%o7+14*32],%o5
3340	move	%xcc,	%o4,	%i4
3341	ldx	[%o7+7*32],	%o4
3342	move	%xcc,	%o5,	%i5
3343	ldx	[%o7+15*32],%o5
3344	movneg	%xcc,	%o4,	%i4
3345	add	%o7,16*32,	%o7
3346	movneg	%xcc,	%o5,	%i5
3347	ldx	[%o7+0*32],	%l0
3348	ldx	[%o7+8*32],	%l1
3349	ldx	[%o7+1*32],	%o4
3350	ldx	[%o7+9*32],	%o5
3351	movvs	%icc,	%o4,	%l0
3352	ldx	[%o7+2*32],	%o4
3353	movvs	%icc,	%o5,	%l1
3354	ldx	[%o7+10*32],%o5
3355	move	%icc,	%o4,	%l0
3356	ldx	[%o7+3*32],	%o4
3357	move	%icc,	%o5,	%l1
3358	ldx	[%o7+11*32],%o5
3359	movneg	%icc,	%o4,	%l0
3360	ldx	[%o7+4*32],	%o4
3361	movneg	%icc,	%o5,	%l1
3362	ldx	[%o7+12*32],%o5
3363	movcs	%xcc,	%o4,	%l0
3364	ldx	[%o7+5*32],%o4
3365	movcs	%xcc,	%o5,	%l1
3366	ldx	[%o7+13*32],%o5
3367	movvs	%xcc,	%o4,	%l0
3368	ldx	[%o7+6*32],	%o4
3369	movvs	%xcc,	%o5,	%l1
3370	ldx	[%o7+14*32],%o5
3371	move	%xcc,	%o4,	%l0
3372	ldx	[%o7+7*32],	%o4
3373	move	%xcc,	%o5,	%l1
3374	ldx	[%o7+15*32],%o5
3375	movneg	%xcc,	%o4,	%l0
3376	add	%o7,16*32,	%o7
3377	movneg	%xcc,	%o5,	%l1
3378	ldx	[%o7+0*32],	%l2
3379	ldx	[%o7+8*32],	%l3
3380	ldx	[%o7+1*32],	%o4
3381	ldx	[%o7+9*32],	%o5
3382	movvs	%icc,	%o4,	%l2
3383	ldx	[%o7+2*32],	%o4
3384	movvs	%icc,	%o5,	%l3
3385	ldx	[%o7+10*32],%o5
3386	move	%icc,	%o4,	%l2
3387	ldx	[%o7+3*32],	%o4
3388	move	%icc,	%o5,	%l3
3389	ldx	[%o7+11*32],%o5
3390	movneg	%icc,	%o4,	%l2
3391	ldx	[%o7+4*32],	%o4
3392	movneg	%icc,	%o5,	%l3
3393	ldx	[%o7+12*32],%o5
3394	movcs	%xcc,	%o4,	%l2
3395	ldx	[%o7+5*32],%o4
3396	movcs	%xcc,	%o5,	%l3
3397	ldx	[%o7+13*32],%o5
3398	movvs	%xcc,	%o4,	%l2
3399	ldx	[%o7+6*32],	%o4
3400	movvs	%xcc,	%o5,	%l3
3401	ldx	[%o7+14*32],%o5
3402	move	%xcc,	%o4,	%l2
3403	ldx	[%o7+7*32],	%o4
3404	move	%xcc,	%o5,	%l3
3405	ldx	[%o7+15*32],%o5
3406	movneg	%xcc,	%o4,	%l2
3407	add	%o7,16*32,	%o7
3408	movneg	%xcc,	%o5,	%l3
3409	ldx	[%o7+0*32],	%l4
3410	ldx	[%o7+8*32],	%l5
3411	ldx	[%o7+1*32],	%o4
3412	ldx	[%o7+9*32],	%o5
3413	movvs	%icc,	%o4,	%l4
3414	ldx	[%o7+2*32],	%o4
3415	movvs	%icc,	%o5,	%l5
3416	ldx	[%o7+10*32],%o5
3417	move	%icc,	%o4,	%l4
3418	ldx	[%o7+3*32],	%o4
3419	move	%icc,	%o5,	%l5
3420	ldx	[%o7+11*32],%o5
3421	movneg	%icc,	%o4,	%l4
3422	ldx	[%o7+4*32],	%o4
3423	movneg	%icc,	%o5,	%l5
3424	ldx	[%o7+12*32],%o5
3425	movcs	%xcc,	%o4,	%l4
3426	ldx	[%o7+5*32],%o4
3427	movcs	%xcc,	%o5,	%l5
3428	ldx	[%o7+13*32],%o5
3429	movvs	%xcc,	%o4,	%l4
3430	ldx	[%o7+6*32],	%o4
3431	movvs	%xcc,	%o5,	%l5
3432	ldx	[%o7+14*32],%o5
3433	move	%xcc,	%o4,	%l4
3434	ldx	[%o7+7*32],	%o4
3435	move	%xcc,	%o5,	%l5
3436	ldx	[%o7+15*32],%o5
3437	movneg	%xcc,	%o4,	%l4
3438	add	%o7,16*32,	%o7
3439	movneg	%xcc,	%o5,	%l5
3440	ldx	[%o7+0*32],	%l6
3441	ldx	[%o7+8*32],	%l7
3442	ldx	[%o7+1*32],	%o4
3443	ldx	[%o7+9*32],	%o5
3444	movvs	%icc,	%o4,	%l6
3445	ldx	[%o7+2*32],	%o4
3446	movvs	%icc,	%o5,	%l7
3447	ldx	[%o7+10*32],%o5
3448	move	%icc,	%o4,	%l6
3449	ldx	[%o7+3*32],	%o4
3450	move	%icc,	%o5,	%l7
3451	ldx	[%o7+11*32],%o5
3452	movneg	%icc,	%o4,	%l6
3453	ldx	[%o7+4*32],	%o4
3454	movneg	%icc,	%o5,	%l7
3455	ldx	[%o7+12*32],%o5
3456	movcs	%xcc,	%o4,	%l6
3457	ldx	[%o7+5*32],%o4
3458	movcs	%xcc,	%o5,	%l7
3459	ldx	[%o7+13*32],%o5
3460	movvs	%xcc,	%o4,	%l6
3461	ldx	[%o7+6*32],	%o4
3462	movvs	%xcc,	%o5,	%l7
3463	ldx	[%o7+14*32],%o5
3464	move	%xcc,	%o4,	%l6
3465	ldx	[%o7+7*32],	%o4
3466	move	%xcc,	%o5,	%l7
3467	ldx	[%o7+15*32],%o5
3468	movneg	%xcc,	%o4,	%l6
3469	add	%o7,16*32,	%o7
3470	movneg	%xcc,	%o5,	%l7
3471	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3472	ldx	[%i7+0*32],	%i0
3473	ldx	[%i7+8*32],	%i1
3474	ldx	[%i7+1*32],	%o4
3475	ldx	[%i7+9*32],	%o5
3476	movvs	%icc,	%o4,	%i0
3477	ldx	[%i7+2*32],	%o4
3478	movvs	%icc,	%o5,	%i1
3479	ldx	[%i7+10*32],%o5
3480	move	%icc,	%o4,	%i0
3481	ldx	[%i7+3*32],	%o4
3482	move	%icc,	%o5,	%i1
3483	ldx	[%i7+11*32],%o5
3484	movneg	%icc,	%o4,	%i0
3485	ldx	[%i7+4*32],	%o4
3486	movneg	%icc,	%o5,	%i1
3487	ldx	[%i7+12*32],%o5
3488	movcs	%xcc,	%o4,	%i0
3489	ldx	[%i7+5*32],%o4
3490	movcs	%xcc,	%o5,	%i1
3491	ldx	[%i7+13*32],%o5
3492	movvs	%xcc,	%o4,	%i0
3493	ldx	[%i7+6*32],	%o4
3494	movvs	%xcc,	%o5,	%i1
3495	ldx	[%i7+14*32],%o5
3496	move	%xcc,	%o4,	%i0
3497	ldx	[%i7+7*32],	%o4
3498	move	%xcc,	%o5,	%i1
3499	ldx	[%i7+15*32],%o5
3500	movneg	%xcc,	%o4,	%i0
3501	add	%i7,16*32,	%i7
3502	movneg	%xcc,	%o5,	%i1
3503	ldx	[%i7+0*32],	%i2
3504	ldx	[%i7+8*32],	%i3
3505	ldx	[%i7+1*32],	%o4
3506	ldx	[%i7+9*32],	%o5
3507	movvs	%icc,	%o4,	%i2
3508	ldx	[%i7+2*32],	%o4
3509	movvs	%icc,	%o5,	%i3
3510	ldx	[%i7+10*32],%o5
3511	move	%icc,	%o4,	%i2
3512	ldx	[%i7+3*32],	%o4
3513	move	%icc,	%o5,	%i3
3514	ldx	[%i7+11*32],%o5
3515	movneg	%icc,	%o4,	%i2
3516	ldx	[%i7+4*32],	%o4
3517	movneg	%icc,	%o5,	%i3
3518	ldx	[%i7+12*32],%o5
3519	movcs	%xcc,	%o4,	%i2
3520	ldx	[%i7+5*32],%o4
3521	movcs	%xcc,	%o5,	%i3
3522	ldx	[%i7+13*32],%o5
3523	movvs	%xcc,	%o4,	%i2
3524	ldx	[%i7+6*32],	%o4
3525	movvs	%xcc,	%o5,	%i3
3526	ldx	[%i7+14*32],%o5
3527	move	%xcc,	%o4,	%i2
3528	ldx	[%i7+7*32],	%o4
3529	move	%xcc,	%o5,	%i3
3530	ldx	[%i7+15*32],%o5
3531	movneg	%xcc,	%o4,	%i2
3532	add	%i7,16*32,	%i7
3533	movneg	%xcc,	%o5,	%i3
3534	ldx	[%i7+0*32],	%i4
3535	ldx	[%i7+8*32],	%i5
3536	ldx	[%i7+1*32],	%o4
3537	ldx	[%i7+9*32],	%o5
3538	movvs	%icc,	%o4,	%i4
3539	ldx	[%i7+2*32],	%o4
3540	movvs	%icc,	%o5,	%i5
3541	ldx	[%i7+10*32],%o5
3542	move	%icc,	%o4,	%i4
3543	ldx	[%i7+3*32],	%o4
3544	move	%icc,	%o5,	%i5
3545	ldx	[%i7+11*32],%o5
3546	movneg	%icc,	%o4,	%i4
3547	ldx	[%i7+4*32],	%o4
3548	movneg	%icc,	%o5,	%i5
3549	ldx	[%i7+12*32],%o5
3550	movcs	%xcc,	%o4,	%i4
3551	ldx	[%i7+5*32],%o4
3552	movcs	%xcc,	%o5,	%i5
3553	ldx	[%i7+13*32],%o5
3554	movvs	%xcc,	%o4,	%i4
3555	ldx	[%i7+6*32],	%o4
3556	movvs	%xcc,	%o5,	%i5
3557	ldx	[%i7+14*32],%o5
3558	move	%xcc,	%o4,	%i4
3559	ldx	[%i7+7*32],	%o4
3560	move	%xcc,	%o5,	%i5
3561	ldx	[%i7+15*32],%o5
3562	movneg	%xcc,	%o4,	%i4
3563	add	%i7,16*32,	%i7
3564	movneg	%xcc,	%o5,	%i5
3565	ldx	[%i7+0*32],	%l0
3566	ldx	[%i7+8*32],	%l1
3567	ldx	[%i7+1*32],	%o4
3568	ldx	[%i7+9*32],	%o5
3569	movvs	%icc,	%o4,	%l0
3570	ldx	[%i7+2*32],	%o4
3571	movvs	%icc,	%o5,	%l1
3572	ldx	[%i7+10*32],%o5
3573	move	%icc,	%o4,	%l0
3574	ldx	[%i7+3*32],	%o4
3575	move	%icc,	%o5,	%l1
3576	ldx	[%i7+11*32],%o5
3577	movneg	%icc,	%o4,	%l0
3578	ldx	[%i7+4*32],	%o4
3579	movneg	%icc,	%o5,	%l1
3580	ldx	[%i7+12*32],%o5
3581	movcs	%xcc,	%o4,	%l0
3582	ldx	[%i7+5*32],%o4
3583	movcs	%xcc,	%o5,	%l1
3584	ldx	[%i7+13*32],%o5
3585	movvs	%xcc,	%o4,	%l0
3586	ldx	[%i7+6*32],	%o4
3587	movvs	%xcc,	%o5,	%l1
3588	ldx	[%i7+14*32],%o5
3589	move	%xcc,	%o4,	%l0
3590	ldx	[%i7+7*32],	%o4
3591	move	%xcc,	%o5,	%l1
3592	ldx	[%i7+15*32],%o5
3593	movneg	%xcc,	%o4,	%l0
3594	add	%i7,16*32,	%i7
3595	movneg	%xcc,	%o5,	%l1
3596	ldx	[%i7+0*32],	%l2
3597	ldx	[%i7+8*32],	%l3
3598	ldx	[%i7+1*32],	%o4
3599	ldx	[%i7+9*32],	%o5
3600	movvs	%icc,	%o4,	%l2
3601	ldx	[%i7+2*32],	%o4
3602	movvs	%icc,	%o5,	%l3
3603	ldx	[%i7+10*32],%o5
3604	move	%icc,	%o4,	%l2
3605	ldx	[%i7+3*32],	%o4
3606	move	%icc,	%o5,	%l3
3607	ldx	[%i7+11*32],%o5
3608	movneg	%icc,	%o4,	%l2
3609	ldx	[%i7+4*32],	%o4
3610	movneg	%icc,	%o5,	%l3
3611	ldx	[%i7+12*32],%o5
3612	movcs	%xcc,	%o4,	%l2
3613	ldx	[%i7+5*32],%o4
3614	movcs	%xcc,	%o5,	%l3
3615	ldx	[%i7+13*32],%o5
3616	movvs	%xcc,	%o4,	%l2
3617	ldx	[%i7+6*32],	%o4
3618	movvs	%xcc,	%o5,	%l3
3619	ldx	[%i7+14*32],%o5
3620	move	%xcc,	%o4,	%l2
3621	ldx	[%i7+7*32],	%o4
3622	move	%xcc,	%o5,	%l3
3623	ldx	[%i7+15*32],%o5
3624	movneg	%xcc,	%o4,	%l2
3625	add	%i7,16*32,	%i7
3626	movneg	%xcc,	%o5,	%l3
3627	ldx	[%i7+0*32],	%l4
3628	ldx	[%i7+8*32],	%l5
3629	ldx	[%i7+1*32],	%o4
3630	ldx	[%i7+9*32],	%o5
3631	movvs	%icc,	%o4,	%l4
3632	ldx	[%i7+2*32],	%o4
3633	movvs	%icc,	%o5,	%l5
3634	ldx	[%i7+10*32],%o5
3635	move	%icc,	%o4,	%l4
3636	ldx	[%i7+3*32],	%o4
3637	move	%icc,	%o5,	%l5
3638	ldx	[%i7+11*32],%o5
3639	movneg	%icc,	%o4,	%l4
3640	ldx	[%i7+4*32],	%o4
3641	movneg	%icc,	%o5,	%l5
3642	ldx	[%i7+12*32],%o5
3643	movcs	%xcc,	%o4,	%l4
3644	ldx	[%i7+5*32],%o4
3645	movcs	%xcc,	%o5,	%l5
3646	ldx	[%i7+13*32],%o5
3647	movvs	%xcc,	%o4,	%l4
3648	ldx	[%i7+6*32],	%o4
3649	movvs	%xcc,	%o5,	%l5
3650	ldx	[%i7+14*32],%o5
3651	move	%xcc,	%o4,	%l4
3652	ldx	[%i7+7*32],	%o4
3653	move	%xcc,	%o5,	%l5
3654	ldx	[%i7+15*32],%o5
3655	movneg	%xcc,	%o4,	%l4
3656	add	%i7,16*32,	%i7
3657	movneg	%xcc,	%o5,	%l5
3658	ldx	[%i7+0*32],	%l6
3659	ldx	[%i7+8*32],	%l7
3660	ldx	[%i7+1*32],	%o4
3661	ldx	[%i7+9*32],	%o5
3662	movvs	%icc,	%o4,	%l6
3663	ldx	[%i7+2*32],	%o4
3664	movvs	%icc,	%o5,	%l7
3665	ldx	[%i7+10*32],%o5
3666	move	%icc,	%o4,	%l6
3667	ldx	[%i7+3*32],	%o4
3668	move	%icc,	%o5,	%l7
3669	ldx	[%i7+11*32],%o5
3670	movneg	%icc,	%o4,	%l6
3671	ldx	[%i7+4*32],	%o4
3672	movneg	%icc,	%o5,	%l7
3673	ldx	[%i7+12*32],%o5
3674	movcs	%xcc,	%o4,	%l6
3675	ldx	[%i7+5*32],%o4
3676	movcs	%xcc,	%o5,	%l7
3677	ldx	[%i7+13*32],%o5
3678	movvs	%xcc,	%o4,	%l6
3679	ldx	[%i7+6*32],	%o4
3680	movvs	%xcc,	%o5,	%l7
3681	ldx	[%i7+14*32],%o5
3682	move	%xcc,	%o4,	%l6
3683	ldx	[%i7+7*32],	%o4
3684	move	%xcc,	%o5,	%l7
3685	ldx	[%i7+15*32],%o5
3686	movneg	%xcc,	%o4,	%l6
3687	add	%i7,16*32,	%i7
3688	movneg	%xcc,	%o5,	%l7
3689	ldx	[%i7+0*32],	%o0
3690	ldx	[%i7+8*32],	%o1
3691	ldx	[%i7+1*32],	%o4
3692	ldx	[%i7+9*32],	%o5
3693	movvs	%icc,	%o4,	%o0
3694	ldx	[%i7+2*32],	%o4
3695	movvs	%icc,	%o5,	%o1
3696	ldx	[%i7+10*32],%o5
3697	move	%icc,	%o4,	%o0
3698	ldx	[%i7+3*32],	%o4
3699	move	%icc,	%o5,	%o1
3700	ldx	[%i7+11*32],%o5
3701	movneg	%icc,	%o4,	%o0
3702	ldx	[%i7+4*32],	%o4
3703	movneg	%icc,	%o5,	%o1
3704	ldx	[%i7+12*32],%o5
3705	movcs	%xcc,	%o4,	%o0
3706	ldx	[%i7+5*32],%o4
3707	movcs	%xcc,	%o5,	%o1
3708	ldx	[%i7+13*32],%o5
3709	movvs	%xcc,	%o4,	%o0
3710	ldx	[%i7+6*32],	%o4
3711	movvs	%xcc,	%o5,	%o1
3712	ldx	[%i7+14*32],%o5
3713	move	%xcc,	%o4,	%o0
3714	ldx	[%i7+7*32],	%o4
3715	move	%xcc,	%o5,	%o1
3716	ldx	[%i7+15*32],%o5
3717	movneg	%xcc,	%o4,	%o0
3718	add	%i7,16*32,	%i7
3719	movneg	%xcc,	%o5,	%o1
3720	ldx	[%i7+0*32],	%o2
3721	ldx	[%i7+8*32],	%o3
3722	ldx	[%i7+1*32],	%o4
3723	ldx	[%i7+9*32],	%o5
3724	movvs	%icc,	%o4,	%o2
3725	ldx	[%i7+2*32],	%o4
3726	movvs	%icc,	%o5,	%o3
3727	ldx	[%i7+10*32],%o5
3728	move	%icc,	%o4,	%o2
3729	ldx	[%i7+3*32],	%o4
3730	move	%icc,	%o5,	%o3
3731	ldx	[%i7+11*32],%o5
3732	movneg	%icc,	%o4,	%o2
3733	ldx	[%i7+4*32],	%o4
3734	movneg	%icc,	%o5,	%o3
3735	ldx	[%i7+12*32],%o5
3736	movcs	%xcc,	%o4,	%o2
3737	ldx	[%i7+5*32],%o4
3738	movcs	%xcc,	%o5,	%o3
3739	ldx	[%i7+13*32],%o5
3740	movvs	%xcc,	%o4,	%o2
3741	ldx	[%i7+6*32],	%o4
3742	movvs	%xcc,	%o5,	%o3
3743	ldx	[%i7+14*32],%o5
3744	move	%xcc,	%o4,	%o2
3745	ldx	[%i7+7*32],	%o4
3746	move	%xcc,	%o5,	%o3
3747	ldx	[%i7+15*32],%o5
3748	movneg	%xcc,	%o4,	%o2
3749	add	%i7,16*32,	%i7
3750	movneg	%xcc,	%o5,	%o3
3751	srax	%g4,	32,	%o4		! unpack %g4
3752	srl	%g4,	%g0,	%o5
3753	sub	%o4,	5,	%o4
3754	mov	%g3,	%i7
3755	sllx	%o4,	32,	%g4		! re-pack %g4
3756	or	%o5,	%g4,	%g4
3757	srl	%o5,	%o4,	%o5
3758	srl	%o5,	2,	%o4
3759	and	%o5,	3,	%o5
3760	and	%o4,	7,	%o4
3761	sll	%o5,	3,	%o5	! offset within first cache line
3762	add	%o5,	%i7,	%i7	! of the pwrtbl
3763	or	%g0,	1,	%o5
3764	sll	%o5,	%o4,	%o4
3765	.word	0x81b02940+32-1	! montsqr	32-1
3766	fbu,pn	%fcc3,.Labort_32
3767#ifndef	__arch64__
3768	and	%fp,%g5,%g5
3769	brz,pn	%g5,.Labort_32
3770#endif
3771	nop
3772	.word	0x81b02940+32-1	! montsqr	32-1
3773	fbu,pn	%fcc3,.Labort_32
3774#ifndef	__arch64__
3775	and	%fp,%g5,%g5
3776	brz,pn	%g5,.Labort_32
3777#endif
3778	nop
3779	.word	0x81b02940+32-1	! montsqr	32-1
3780	fbu,pn	%fcc3,.Labort_32
3781#ifndef	__arch64__
3782	and	%fp,%g5,%g5
3783	brz,pn	%g5,.Labort_32
3784#endif
3785	nop
3786	.word	0x81b02940+32-1	! montsqr	32-1
3787	fbu,pn	%fcc3,.Labort_32
3788#ifndef	__arch64__
3789	and	%fp,%g5,%g5
3790	brz,pn	%g5,.Labort_32
3791#endif
3792	nop
3793	.word	0x81b02940+32-1	! montsqr	32-1
3794	fbu,pn	%fcc3,.Labort_32
3795#ifndef	__arch64__
3796	and	%fp,%g5,%g5
3797	brz,pn	%g5,.Labort_32
3798#endif
3799	nop
3800	wr	%o4,	%g0,	%ccr
3801	.word	0x81b02920+32-1	! montmul	32-1
3802	fbu,pn	%fcc3,.Labort_32
3803#ifndef	__arch64__
3804	and	%fp,%g5,%g5
3805	brz,pn	%g5,.Labort_32
3806#endif
3807
3808	srax	%g4,	32,	%o4
3809#ifdef	__arch64__
3810	brgez	%o4,.Lstride_32
3811	restore
3812	restore
3813	restore
3814	restore
3815	restore
3816#else
3817	brgez	%o4,.Lstride_32
3818	restore;		and	%fp,%g5,%g5
3819	restore;		and	%fp,%g5,%g5
3820	restore;		and	%fp,%g5,%g5
3821	restore;		and	%fp,%g5,%g5
3822	 brz,pn	%g5,.Labort1_32
3823	restore
3824#endif
3825	.word	0x81b02310 !movxtod	%l0,%f0
3826	.word	0x85b02311 !movxtod	%l1,%f2
3827	.word	0x89b02312 !movxtod	%l2,%f4
3828	.word	0x8db02313 !movxtod	%l3,%f6
3829	.word	0x91b02314 !movxtod	%l4,%f8
3830	.word	0x95b02315 !movxtod	%l5,%f10
3831	.word	0x99b02316 !movxtod	%l6,%f12
3832	.word	0x9db02317 !movxtod	%l7,%f14
3833	.word	0xa1b02308 !movxtod	%o0,%f16
3834	.word	0xa5b02309 !movxtod	%o1,%f18
3835	.word	0xa9b0230a !movxtod	%o2,%f20
3836	.word	0xadb0230b !movxtod	%o3,%f22
3837	.word	0xbbb0230c !movxtod	%o4,%f60
3838	.word	0xbfb0230d !movxtod	%o5,%f62
3839#ifdef	__arch64__
3840	restore
3841#else
3842	 and	%fp,%g5,%g5
3843	restore
3844	 and	%g5,1,%o7
3845	 and	%fp,%g5,%g5
3846	 srl	%fp,0,%fp		! just in case?
3847	 or	%o7,%g5,%g5
3848	brz,a,pn %g5,.Ldone_32
3849	mov	0,%i0		! return failure
3850#endif
3851	std	%f0,[%g1+0*8]
3852	std	%f2,[%g1+1*8]
3853	std	%f4,[%g1+2*8]
3854	std	%f6,[%g1+3*8]
3855	std	%f8,[%g1+4*8]
3856	std	%f10,[%g1+5*8]
3857	std	%f12,[%g1+6*8]
3858	std	%f14,[%g1+7*8]
3859	std	%f16,[%g1+8*8]
3860	std	%f18,[%g1+9*8]
3861	std	%f20,[%g1+10*8]
3862	std	%f22,[%g1+11*8]
3863	std	%f60,[%g1+12*8]
3864	std	%f62,[%g1+13*8]
3865	std	%f24,[%g1+14*8]
3866	std	%f26,[%g1+15*8]
3867	std	%f28,[%g1+16*8]
3868	std	%f30,[%g1+17*8]
3869	std	%f32,[%g1+18*8]
3870	std	%f34,[%g1+19*8]
3871	std	%f36,[%g1+20*8]
3872	std	%f38,[%g1+21*8]
3873	std	%f40,[%g1+22*8]
3874	std	%f42,[%g1+23*8]
3875	std	%f44,[%g1+24*8]
3876	std	%f46,[%g1+25*8]
3877	std	%f48,[%g1+26*8]
3878	std	%f50,[%g1+27*8]
3879	std	%f52,[%g1+28*8]
3880	std	%f54,[%g1+29*8]
3881	std	%f56,[%g1+30*8]
3882	std	%f58,[%g1+31*8]
3883	mov	1,%i0		! return success
3884.Ldone_32:
3885	ret
3886	restore
3887
3888.Labort_32:
3889	restore
3890	restore
3891	restore
3892	restore
3893	restore
3894.Labort1_32:
3895	restore
3896
3897	mov	0,%i0		! return failure
3898	ret
3899	restore
3900.type	bn_pwr5_mont_t4_32, #function
3901.size	bn_pwr5_mont_t4_32, .-bn_pwr5_mont_t4_32
3902.globl	bn_mul_mont_t4
3903.align	32
3904bn_mul_mont_t4:
3905	add	%sp,	STACK_BIAS,	%g4	! real top of stack
3906	sll	%o5,	3,	%o5		! size in bytes
3907	add	%o5,	63,	%g1
3908	andn	%g1,	63,	%g1		! buffer size rounded up to 64 bytes
3909	sub	%g4,	%g1,	%g1
3910	andn	%g1,	63,	%g1		! align at 64 byte
3911	sub	%g1,	STACK_FRAME,	%g1	! new top of stack
3912	sub	%g1,	%g4,	%g1
3913
3914	save	%sp,	%g1,	%sp
3915	ld	[%i4+0],	%l0	! pull n0[0..1] value
3916	ld	[%i4+4],	%l1
3917	add	%sp, STACK_BIAS+STACK_FRAME, %l5
3918	ldx	[%i2+0],	%g2	! m0=bp[0]
3919	sllx	%l1,	32,	%g1
3920	add	%i2,	8,	%i2
3921	or	%l0,	%g1,	%g1
3922
3923	ldx	[%i1+0],	%o2	! ap[0]
3924
3925	mulx	%o2,	%g2,	%g4	! ap[0]*bp[0]
3926	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
3927
3928	ldx	[%i1+8],	%o2	! ap[1]
3929	add	%i1,	16,	%i1
3930	ldx	[%i3+0],	%o4	! np[0]
3931
3932	mulx	%g4,	%g1,	%g3	! "tp[0]"*n0
3933
3934	mulx	%o2,	%g2,	%o3	! ap[1]*bp[0]
3935	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
3936
3937	mulx	%o4,	%g3,	%o0	! np[0]*m1
3938	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
3939
3940	ldx	[%i3+8],	%o4	! np[1]
3941
3942	addcc	%g4,	%o0,	%o0
3943	add	%i3,	16,	%i3
3944	.word	0x93b00229 !addxc	%g0,%o1,%o1
3945
3946	mulx	%o4,	%g3,	%o5	! np[1]*m1
3947	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
3948
3949	ba	.L1st
3950	sub	%i5,	24,	%l4	! cnt=num-3
3951
3952.align	16
3953.L1st:
3954	addcc	%o3,	%g5,	%g4
3955	.word	0x8bb28220 !addxc	%o2,%g0,%g5
3956
3957	ldx	[%i1+0],	%o2	! ap[j]
3958	addcc	%o5,	%o1,	%o0
3959	add	%i1,	8,	%i1
3960	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
3961
3962	ldx	[%i3+0],	%o4	! np[j]
3963	mulx	%o2,	%g2,	%o3	! ap[j]*bp[0]
3964	add	%i3,	8,	%i3
3965	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
3966
3967	mulx	%o4,	%g3,	%o5	! np[j]*m1
3968	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
3969	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
3970	.word	0x93b00229 !addxc	%g0,%o1,%o1
3971	stxa	%o0,	[%l5]0xe2	! tp[j-1]
3972	add	%l5,	8,	%l5	! tp++
3973
3974	brnz,pt	%l4,	.L1st
3975	sub	%l4,	8,	%l4	! j--
3976!.L1st
3977	addcc	%o3,	%g5,	%g4
3978	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
3979
3980	addcc	%o5,	%o1,	%o0
3981	.word	0x93b30220 !addxc	%o4,%g0,%o1
3982	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
3983	.word	0x93b00229 !addxc	%g0,%o1,%o1
3984	stxa	%o0,	[%l5]0xe2	! tp[j-1]
3985	add	%l5,	8,	%l5
3986
3987	addcc	%g5,	%o1,	%o1
3988	.word	0xa1b00220 !addxc	%g0,%g0,%l0	! upmost overflow bit
3989	stxa	%o1,	[%l5]0xe2
3990	add	%l5,	8,	%l5
3991
3992	ba	.Louter
3993	sub	%i5,	16,	%l1	! i=num-2
3994
3995.align	16
3996.Louter:
3997	ldx	[%i2+0],	%g2	! m0=bp[i]
3998	add	%i2,	8,	%i2
3999
4000	sub	%i1,	%i5,	%i1	! rewind
4001	sub	%i3,	%i5,	%i3
4002	sub	%l5,	%i5,	%l5
4003
4004	ldx	[%i1+0],	%o2	! ap[0]
4005	ldx	[%i3+0],	%o4	! np[0]
4006
4007	mulx	%o2,	%g2,	%g4	! ap[0]*bp[i]
4008	ldx	[%l5],		%o7	! tp[0]
4009	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
4010	ldx	[%i1+8],	%o2	! ap[1]
4011	addcc	%g4,	%o7,	%g4	! ap[0]*bp[i]+tp[0]
4012	mulx	%o2,	%g2,	%o3	! ap[1]*bp[i]
4013	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4014	mulx	%g4,	%g1,	%g3	! tp[0]*n0
4015	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4016	mulx	%o4,	%g3,	%o0	! np[0]*m1
4017	add	%i1,	16,	%i1
4018	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
4019	ldx	[%i3+8],	%o4	! np[1]
4020	add	%i3,	16,	%i3
4021	addcc	%o0,	%g4,	%o0
4022	mulx	%o4,	%g3,	%o5	! np[1]*m1
4023	.word	0x93b00229 !addxc	%g0,%o1,%o1
4024	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4025
4026	ba	.Linner
4027	sub	%i5,	24,	%l4	! cnt=num-3
4028.align	16
4029.Linner:
4030	addcc	%o3,	%g5,	%g4
4031	ldx	[%l5+8],	%o7	! tp[j]
4032	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4033	ldx	[%i1+0],	%o2	! ap[j]
4034	add	%i1,	8,	%i1
4035	addcc	%o5,	%o1,	%o0
4036	mulx	%o2,	%g2,	%o3	! ap[j]*bp[i]
4037	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4038	ldx	[%i3+0],	%o4	! np[j]
4039	add	%i3,	8,	%i3
4040	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4041	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4042	mulx	%o4,	%g3,	%o5	! np[j]*m1
4043	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4044	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4045	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4046	.word	0x93b00229 !addxc	%g0,%o1,%o1
4047	stx	%o0,	[%l5]		! tp[j-1]
4048	add	%l5,	8,	%l5
4049	brnz,pt	%l4,	.Linner
4050	sub	%l4,	8,	%l4
4051!.Linner
4052	ldx	[%l5+8],	%o7	! tp[j]
4053	addcc	%o3,	%g5,	%g4
4054	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4055	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4056	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4057
4058	addcc	%o5,	%o1,	%o0
4059	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4060	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4061	.word	0x93b00229 !addxc	%g0,%o1,%o1
4062	stx	%o0,	[%l5]		! tp[j-1]
4063
4064	subcc	%g0,	%l0,	%g0	! move upmost overflow to CCR.xcc
4065	.word	0x93b24265 !addxccc	%o1,%g5,%o1
4066	.word	0xa1b00220 !addxc	%g0,%g0,%l0
4067	stx	%o1,	[%l5+8]
4068	add	%l5,	16,	%l5
4069
4070	brnz,pt	%l1,	.Louter
4071	sub	%l1,	8,	%l1
4072
4073	sub	%i1,	%i5,	%i1	! rewind
4074	sub	%i3,	%i5,	%i3
4075	sub	%l5,	%i5,	%l5
4076	ba	.Lsub
4077	subcc	%i5,	8,	%l4	! cnt=num-1 and clear CCR.xcc
4078
4079.align	16
4080.Lsub:
4081	ldx	[%l5],		%o7
4082	add	%l5,	8,	%l5
4083	ldx	[%i3+0],	%o4
4084	add	%i3,	8,	%i3
4085	subccc	%o7,	%o4,	%l2	! tp[j]-np[j]
4086	srlx	%o7,	32,	%o7
4087	srlx	%o4,	32,	%o4
4088	subccc	%o7,	%o4,	%l3
4089	add	%i0,	8,	%i0
4090	st	%l2,	[%i0-4]		! reverse order
4091	st	%l3,	[%i0-8]
4092	brnz,pt	%l4,	.Lsub
4093	sub	%l4,	8,	%l4
4094
4095	sub	%i3,	%i5,	%i3	! rewind
4096	sub	%l5,	%i5,	%l5
4097	sub	%i0,	%i5,	%i0
4098
4099	subccc	%l0,	%g0,	%l0	! handle upmost overflow bit
4100	ba	.Lcopy
4101	sub	%i5,	8,	%l4
4102
4103.align	16
4104.Lcopy:					! conditional copy
4105	ldx	[%l5],		%o7
4106	ldx	[%i0+0],	%l2
4107	stx	%g0,	[%l5]		! zap
4108	add	%l5,	8,	%l5
4109	movcs	%icc,	%o7,	%l2
4110	stx	%l2,	[%i0+0]
4111	add	%i0,	8,	%i0
4112	brnz	%l4,	.Lcopy
4113	sub	%l4,	8,	%l4
4114
4115	mov	1,	%o0
4116	ret
4117	restore
4118.type	bn_mul_mont_t4, #function
4119.size	bn_mul_mont_t4, .-bn_mul_mont_t4
4120.globl	bn_mul_mont_gather5_t4
4121.align	32
4122bn_mul_mont_gather5_t4:
4123	add	%sp,	STACK_BIAS,	%g4	! real top of stack
4124	sll	%o5,	3,	%o5		! size in bytes
4125	add	%o5,	63,	%g1
4126	andn	%g1,	63,	%g1		! buffer size rounded up to 64 bytes
4127	sub	%g4,	%g1,	%g1
4128	andn	%g1,	63,	%g1		! align at 64 byte
4129	sub	%g1,	STACK_FRAME,	%g1	! new top of stack
4130	sub	%g1,	%g4,	%g1
4131	LDPTR	[%sp+STACK_7thARG],	%g4	! load power, 7th argument
4132
4133	save	%sp,	%g1,	%sp
4134	srl	%g4,	2,	%o4
4135	and	%g4,	3,	%o5
4136	and	%o4,	7,	%o4
4137	sll	%o5,	3,	%o5	! offset within first cache line
4138	add	%o5,	%i2,	%i2	! of the pwrtbl
4139	or	%g0,	1,	%o5
4140	sll	%o5,	%o4,	%l7
4141	wr	%l7,	%g0,	%ccr
4142	ldx	[%i2+0*32],	%g2
4143	ldx	[%i2+1*32],	%o4
4144	ldx	[%i2+2*32],	%o5
4145	movvs	%icc,	%o4,	%g2
4146	ldx	[%i2+3*32],	%o4
4147	move	%icc,	%o5,	%g2
4148	ldx	[%i2+4*32],	%o5
4149	movneg	%icc,	%o4,	%g2
4150	ldx	[%i2+5*32],	%o4
4151	movcs	%xcc,	%o5,	%g2
4152	ldx	[%i2+6*32],	%o5
4153	movvs	%xcc,	%o4,	%g2
4154	ldx	[%i2+7*32],	%o4
4155	move	%xcc,	%o5,	%g2
4156	add	%i2,8*32,	%i2
4157	movneg	%xcc,	%o4,	%g2
4158	ld	[%i4+0],	%l0	! pull n0[0..1] value
4159	ld	[%i4+4],	%l1
4160	add	%sp, STACK_BIAS+STACK_FRAME, %l5
4161	sllx	%l1,	32,	%g1
4162	or	%l0,	%g1,	%g1
4163
4164	ldx	[%i1+0],	%o2	! ap[0]
4165
4166	mulx	%o2,	%g2,	%g4	! ap[0]*bp[0]
4167	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
4168
4169	ldx	[%i1+8],	%o2	! ap[1]
4170	add	%i1,	16,	%i1
4171	ldx	[%i3+0],	%o4	! np[0]
4172
4173	mulx	%g4,	%g1,	%g3	! "tp[0]"*n0
4174
4175	mulx	%o2,	%g2,	%o3	! ap[1]*bp[0]
4176	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4177
4178	mulx	%o4,	%g3,	%o0	! np[0]*m1
4179	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
4180
4181	ldx	[%i3+8],	%o4	! np[1]
4182
4183	addcc	%g4,	%o0,	%o0
4184	add	%i3,	16,	%i3
4185	.word	0x93b00229 !addxc	%g0,%o1,%o1
4186
4187	mulx	%o4,	%g3,	%o5	! np[1]*m1
4188	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4189
4190	ba	.L1st_g5
4191	sub	%i5,	24,	%l4	! cnt=num-3
4192
4193.align	16
4194.L1st_g5:
4195	addcc	%o3,	%g5,	%g4
4196	.word	0x8bb28220 !addxc	%o2,%g0,%g5
4197
4198	ldx	[%i1+0],	%o2	! ap[j]
4199	addcc	%o5,	%o1,	%o0
4200	add	%i1,	8,	%i1
4201	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4202
4203	ldx	[%i3+0],	%o4	! np[j]
4204	mulx	%o2,	%g2,	%o3	! ap[j]*bp[0]
4205	add	%i3,	8,	%i3
4206	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4207
4208	mulx	%o4,	%g3,	%o5	! np[j]*m1
4209	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
4210	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4211	.word	0x93b00229 !addxc	%g0,%o1,%o1
4212	stxa	%o0,	[%l5]0xe2	! tp[j-1]
4213	add	%l5,	8,	%l5	! tp++
4214
4215	brnz,pt	%l4,	.L1st_g5
4216	sub	%l4,	8,	%l4	! j--
4217!.L1st_g5
4218	addcc	%o3,	%g5,	%g4
4219	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4220
4221	addcc	%o5,	%o1,	%o0
4222	.word	0x93b30220 !addxc	%o4,%g0,%o1
4223	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
4224	.word	0x93b00229 !addxc	%g0,%o1,%o1
4225	stxa	%o0,	[%l5]0xe2	! tp[j-1]
4226	add	%l5,	8,	%l5
4227
4228	addcc	%g5,	%o1,	%o1
4229	.word	0xa1b00220 !addxc	%g0,%g0,%l0	! upmost overflow bit
4230	stxa	%o1,	[%l5]0xe2
4231	add	%l5,	8,	%l5
4232
4233	ba	.Louter_g5
4234	sub	%i5,	16,	%l1	! i=num-2
4235
4236.align	16
4237.Louter_g5:
4238	wr	%l7,	%g0,	%ccr
4239	ldx	[%i2+0*32],	%g2
4240	ldx	[%i2+1*32],	%o4
4241	ldx	[%i2+2*32],	%o5
4242	movvs	%icc,	%o4,	%g2
4243	ldx	[%i2+3*32],	%o4
4244	move	%icc,	%o5,	%g2
4245	ldx	[%i2+4*32],	%o5
4246	movneg	%icc,	%o4,	%g2
4247	ldx	[%i2+5*32],	%o4
4248	movcs	%xcc,	%o5,	%g2
4249	ldx	[%i2+6*32],	%o5
4250	movvs	%xcc,	%o4,	%g2
4251	ldx	[%i2+7*32],	%o4
4252	move	%xcc,	%o5,	%g2
4253	add	%i2,8*32,	%i2
4254	movneg	%xcc,	%o4,	%g2
4255	sub	%i1,	%i5,	%i1	! rewind
4256	sub	%i3,	%i5,	%i3
4257	sub	%l5,	%i5,	%l5
4258
4259	ldx	[%i1+0],	%o2	! ap[0]
4260	ldx	[%i3+0],	%o4	! np[0]
4261
4262	mulx	%o2,	%g2,	%g4	! ap[0]*bp[i]
4263	ldx	[%l5],		%o7	! tp[0]
4264	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
4265	ldx	[%i1+8],	%o2	! ap[1]
4266	addcc	%g4,	%o7,	%g4	! ap[0]*bp[i]+tp[0]
4267	mulx	%o2,	%g2,	%o3	! ap[1]*bp[i]
4268	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4269	mulx	%g4,	%g1,	%g3	! tp[0]*n0
4270	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4271	mulx	%o4,	%g3,	%o0	! np[0]*m1
4272	add	%i1,	16,	%i1
4273	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
4274	ldx	[%i3+8],	%o4	! np[1]
4275	add	%i3,	16,	%i3
4276	addcc	%o0,	%g4,	%o0
4277	mulx	%o4,	%g3,	%o5	! np[1]*m1
4278	.word	0x93b00229 !addxc	%g0,%o1,%o1
4279	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4280
4281	ba	.Linner_g5
4282	sub	%i5,	24,	%l4	! cnt=num-3
4283.align	16
4284.Linner_g5:
4285	addcc	%o3,	%g5,	%g4
4286	ldx	[%l5+8],	%o7	! tp[j]
4287	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4288	ldx	[%i1+0],	%o2	! ap[j]
4289	add	%i1,	8,	%i1
4290	addcc	%o5,	%o1,	%o0
4291	mulx	%o2,	%g2,	%o3	! ap[j]*bp[i]
4292	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4293	ldx	[%i3+0],	%o4	! np[j]
4294	add	%i3,	8,	%i3
4295	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4296	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4297	mulx	%o4,	%g3,	%o5	! np[j]*m1
4298	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4299	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4300	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4301	.word	0x93b00229 !addxc	%g0,%o1,%o1
4302	stx	%o0,	[%l5]		! tp[j-1]
4303	add	%l5,	8,	%l5
4304	brnz,pt	%l4,	.Linner_g5
4305	sub	%l4,	8,	%l4
4306!.Linner_g5
4307	ldx	[%l5+8],	%o7	! tp[j]
4308	addcc	%o3,	%g5,	%g4
4309	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4310	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4311	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4312
4313	addcc	%o5,	%o1,	%o0
4314	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4315	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4316	.word	0x93b00229 !addxc	%g0,%o1,%o1
4317	stx	%o0,	[%l5]		! tp[j-1]
4318
4319	subcc	%g0,	%l0,	%g0	! move upmost overflow to CCR.xcc
4320	.word	0x93b24265 !addxccc	%o1,%g5,%o1
4321	.word	0xa1b00220 !addxc	%g0,%g0,%l0
4322	stx	%o1,	[%l5+8]
4323	add	%l5,	16,	%l5
4324
4325	brnz,pt	%l1,	.Louter_g5
4326	sub	%l1,	8,	%l1
4327
4328	sub	%i1,	%i5,	%i1	! rewind
4329	sub	%i3,	%i5,	%i3
4330	sub	%l5,	%i5,	%l5
4331	ba	.Lsub_g5
4332	subcc	%i5,	8,	%l4	! cnt=num-1 and clear CCR.xcc
4333
4334.align	16
4335.Lsub_g5:
4336	ldx	[%l5],		%o7
4337	add	%l5,	8,	%l5
4338	ldx	[%i3+0],	%o4
4339	add	%i3,	8,	%i3
4340	subccc	%o7,	%o4,	%l2	! tp[j]-np[j]
4341	srlx	%o7,	32,	%o7
4342	srlx	%o4,	32,	%o4
4343	subccc	%o7,	%o4,	%l3
4344	add	%i0,	8,	%i0
4345	st	%l2,	[%i0-4]		! reverse order
4346	st	%l3,	[%i0-8]
4347	brnz,pt	%l4,	.Lsub_g5
4348	sub	%l4,	8,	%l4
4349
4350	sub	%i3,	%i5,	%i3	! rewind
4351	sub	%l5,	%i5,	%l5
4352	sub	%i0,	%i5,	%i0
4353
4354	subccc	%l0,	%g0,	%l0	! handle upmost overflow bit
4355	ba	.Lcopy_g5
4356	sub	%i5,	8,	%l4
4357
4358.align	16
4359.Lcopy_g5:				! conditional copy
4360	ldx	[%l5],		%o7
4361	ldx	[%i0+0],	%l2
4362	stx	%g0,	[%l5]		! zap
4363	add	%l5,	8,	%l5
4364	movcs	%icc,	%o7,	%l2
4365	stx	%l2,	[%i0+0]
4366	add	%i0,	8,	%i0
4367	brnz	%l4,	.Lcopy_g5
4368	sub	%l4,	8,	%l4
4369
4370	mov	1,	%o0
4371	ret
4372	restore
4373.type	bn_mul_mont_gather5_t4, #function
4374.size	bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4
4375.globl	bn_flip_t4
4376.align	32
4377bn_flip_t4:
4378.Loop_flip:
4379	ld	[%o1+0],	%o4
4380	sub	%o2,	1,	%o2
4381	ld	[%o1+4],	%o5
4382	add	%o1,	8,	%o1
4383	st	%o5,	[%o0+0]
4384	st	%o4,	[%o0+4]
4385	brnz	%o2,	.Loop_flip
4386	add	%o0,	8,	%o0
4387	retl
4388	nop
4389.type	bn_flip_t4, #function
4390.size	bn_flip_t4, .-bn_flip_t4
4391
4392.globl	bn_flip_n_scatter5_t4
4393.align	32
4394bn_flip_n_scatter5_t4:
4395	sll	%o3,	3,	%o3
4396	srl	%o1,	1,	%o1
4397	add	%o3,	%o2,	%o2	! &pwrtbl[pwr]
4398	sub	%o1,	1,	%o1
4399.Loop_flip_n_scatter5:
4400	ld	[%o0+0],	%o4	! inp[i]
4401	ld	[%o0+4],	%o5
4402	add	%o0,	8,	%o0
4403	sllx	%o5,	32,	%o5
4404	or	%o4,	%o5,	%o5
4405	stx	%o5,	[%o2]
4406	add	%o2,	32*8,	%o2
4407	brnz	%o1,	.Loop_flip_n_scatter5
4408	sub	%o1,	1,	%o1
4409	retl
4410	nop
4411.type	bn_flip_n_scatter5_t4, #function
4412.size	bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4
4413
4414.globl	bn_gather5_t4
4415.align	32
4416bn_gather5_t4:
4417	srl	%o3,	2,	%o4
4418	and	%o3,	3,	%o5
4419	and	%o4,	7,	%o4
4420	sll	%o5,	3,	%o5	! offset within first cache line
4421	add	%o5,	%o2,	%o2	! of the pwrtbl
4422	or	%g0,	1,	%o5
4423	sll	%o5,	%o4,	%g1
4424	wr	%g1,	%g0,	%ccr
4425	sub	%o1,	1,	%o1
4426.Loop_gather5:
4427	ldx	[%o2+0*32],	%g1
4428	ldx	[%o2+1*32],	%o4
4429	ldx	[%o2+2*32],	%o5
4430	movvs	%icc,	%o4,	%g1
4431	ldx	[%o2+3*32],	%o4
4432	move	%icc,	%o5,	%g1
4433	ldx	[%o2+4*32],	%o5
4434	movneg	%icc,	%o4,	%g1
4435	ldx	[%o2+5*32],	%o4
4436	movcs	%xcc,	%o5,	%g1
4437	ldx	[%o2+6*32],	%o5
4438	movvs	%xcc,	%o4,	%g1
4439	ldx	[%o2+7*32],	%o4
4440	move	%xcc,	%o5,	%g1
4441	add	%o2,8*32,	%o2
4442	movneg	%xcc,	%o4,	%g1
4443	stx	%g1,	[%o0]
4444	add	%o0,	8,	%o0
4445	brnz	%o1,	.Loop_gather5
4446	sub	%o1,	1,	%o1
4447
4448	retl
4449	nop
4450.type	bn_gather5_t4, #function
4451.size	bn_gather5_t4, .-bn_gather5_t4
4452
4453.asciz	"Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov"
4454.align	4
4455