1#include "sparc_arch.h"
2
3#ifdef	__arch64__
4.register	%g2,#scratch
5.register	%g3,#scratch
6#endif
7
8.section	".text",#alloc,#execinstr
9
10#ifdef	__PIC__
11SPARC_PIC_THUNK(%g1)
12#endif
13.globl	bn_mul_mont_t4_8
14.align	32
15bn_mul_mont_t4_8:
16#ifdef	__arch64__
17	mov	0,%g5
18	mov	-128,%g4
19#elif defined(SPARCV9_64BIT_STACK)
20	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
21	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
22	mov	-2047,%g4
23	and	%g1,SPARCV9_64BIT_STACK,%g1
24	movrz	%g1,0,%g4
25	mov	-1,%g5
26	add	%g4,-128,%g4
27#else
28	mov	-1,%g5
29	mov	-128,%g4
30#endif
31	sllx	%g5,32,%g5
32	save	%sp,%g4,%sp
33#ifndef	__arch64__
34	save	%sp,-128,%sp	! warm it up
35	save	%sp,-128,%sp
36	save	%sp,-128,%sp
37	save	%sp,-128,%sp
38	save	%sp,-128,%sp
39	save	%sp,-128,%sp
40	restore
41	restore
42	restore
43	restore
44	restore
45	restore
46#endif
47	and	%sp,1,%g4
48	or	%g5,%fp,%fp
49	or	%g4,%g5,%g5
50
51	! copy arguments to global registers
52	mov	%i0,%g1
53	mov	%i1,%g2
54	mov	%i2,%g3
55	mov	%i3,%g4
56	ld	[%i4+0],%f1	! load *n0
57	ld	[%i4+4],%f0
58	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
59	save	%sp,-128,%sp;		or	%g5,%fp,%fp
60	ld	[%g2+0*8+0],%l1
61	ld	[%g2+0*8+4],%l0
62	sllx	%l0,32,%l0
63	or	%l1,%l0,%l0
64	ld	[%g2+1*8+0],%l2
65	ld	[%g2+1*8+4],%l1
66	sllx	%l1,32,%l1
67	or	%l2,%l1,%l1
68	ld	[%g2+2*8+0],%l3
69	ld	[%g2+2*8+4],%l2
70	sllx	%l2,32,%l2
71	or	%l3,%l2,%l2
72	ld	[%g2+3*8+0],%l4
73	ld	[%g2+3*8+4],%l3
74	sllx	%l3,32,%l3
75	or	%l4,%l3,%l3
76	ld	[%g2+4*8+0],%l5
77	ld	[%g2+4*8+4],%l4
78	sllx	%l4,32,%l4
79	or	%l5,%l4,%l4
80	ld	[%g2+5*8+0],%l6
81	ld	[%g2+5*8+4],%l5
82	sllx	%l5,32,%l5
83	or	%l6,%l5,%l5
84	ld	[%g2+6*8+0],%l7
85	ld	[%g2+6*8+4],%l6
86	sllx	%l6,32,%l6
87	or	%l7,%l6,%l6
88	ld	[%g2+7*8+0],%o0
89	ld	[%g2+7*8+4],%l7
90	sllx	%l7,32,%l7
91	or	%o0,%l7,%l7
92	save	%sp,-128,%sp;		or	%g5,%fp,%fp
93	ld	[%g4+0*8+0],%l1
94	ld	[%g4+0*8+4],%l0
95	sllx	%l0,32,%l0
96	or	%l1,%l0,%l0
97	ld	[%g4+1*8+0],%l2
98	ld	[%g4+1*8+4],%l1
99	sllx	%l1,32,%l1
100	or	%l2,%l1,%l1
101	ld	[%g4+2*8+0],%l3
102	ld	[%g4+2*8+4],%l2
103	sllx	%l2,32,%l2
104	or	%l3,%l2,%l2
105	ld	[%g4+3*8+0],%l4
106	ld	[%g4+3*8+4],%l3
107	sllx	%l3,32,%l3
108	or	%l4,%l3,%l3
109	ld	[%g4+4*8+0],%l5
110	ld	[%g4+4*8+4],%l4
111	sllx	%l4,32,%l4
112	or	%l5,%l4,%l4
113	ld	[%g4+5*8+0],%l6
114	ld	[%g4+5*8+4],%l5
115	sllx	%l5,32,%l5
116	or	%l6,%l5,%l5
117	ld	[%g4+6*8+0],%l7
118	ld	[%g4+6*8+4],%l6
119	sllx	%l6,32,%l6
120	or	%l7,%l6,%l6
121	ld	[%g4+7*8+0],%o0
122	ld	[%g4+7*8+4],%l7
123	sllx	%l7,32,%l7
124	or	%o0,%l7,%l7
125	save	%sp,-128,%sp;		or	%g5,%fp,%fp
126	save	%sp,-128,%sp;		or	%g5,%fp,%fp
127	cmp	%g2,%g3
128	be	SIZE_T_CC,.Lmsquare_8
129	nop
130	save	%sp,-128,%sp;		or	%g5,%fp,%fp
131	ld	[%g3+0*8+0],%i1
132	ld	[%g3+0*8+4],%i0
133	sllx	%i0,32,%i0
134	or	%i1,%i0,%i0
135	ld	[%g3+1*8+0],%i2
136	ld	[%g3+1*8+4],%i1
137	sllx	%i1,32,%i1
138	or	%i2,%i1,%i1
139	ld	[%g3+2*8+0],%i3
140	ld	[%g3+2*8+4],%i2
141	sllx	%i2,32,%i2
142	or	%i3,%i2,%i2
143	ld	[%g3+3*8+0],%i4
144	ld	[%g3+3*8+4],%i3
145	sllx	%i3,32,%i3
146	or	%i4,%i3,%i3
147	ld	[%g3+4*8+0],%i5
148	ld	[%g3+4*8+4],%i4
149	sllx	%i4,32,%i4
150	or	%i5,%i4,%i4
151	ld	[%g3+5*8+0],%l0
152	ld	[%g3+5*8+4],%i5
153	sllx	%i5,32,%i5
154	or	%l0,%i5,%i5
155	ld	[%g3+6*8+0],%l1
156	ld	[%g3+6*8+4],%l0
157	sllx	%l0,32,%l0
158	or	%l1,%l0,%l0
159	ld	[%g3+7*8+0],%l2
160	ld	[%g3+7*8+4],%l1
161	sllx	%l1,32,%l1
162	or	%l2,%l1,%l1
163	save	%sp,-128,%sp;		or	%g5,%fp,%fp
164	.word	0x81b02920+8-1	! montmul	8-1
165.Lmresume_8:
166	fbu,pn	%fcc3,.Lmabort_8
167#ifndef	__arch64__
168	and	%fp,%g5,%g5
169	brz,pn	%g5,.Lmabort_8
170#endif
171	nop
172#ifdef	__arch64__
173	restore
174	restore
175	restore
176	restore
177	restore
178#else
179	restore;		and	%fp,%g5,%g5
180	restore;		and	%fp,%g5,%g5
181	restore;		and	%fp,%g5,%g5
182	restore;		and	%fp,%g5,%g5
183	 brz,pn	%g5,.Lmabort1_8
184	restore
185#endif
186	.word	0x81b02310 !movxtod	%l0,%f0
187	.word	0x85b02311 !movxtod	%l1,%f2
188	.word	0x89b02312 !movxtod	%l2,%f4
189	.word	0x8db02313 !movxtod	%l3,%f6
190	.word	0x91b02314 !movxtod	%l4,%f8
191	.word	0x95b02315 !movxtod	%l5,%f10
192	.word	0x99b02316 !movxtod	%l6,%f12
193	.word	0x9db02317 !movxtod	%l7,%f14
194#ifdef	__arch64__
195	restore
196#else
197	 and	%fp,%g5,%g5
198	restore
199	 and	%g5,1,%o7
200	 and	%fp,%g5,%g5
201	 srl	%fp,0,%fp		! just in case?
202	 or	%o7,%g5,%g5
203	brz,a,pn %g5,.Lmdone_8
204	mov	0,%i0		! return failure
205#endif
206	st	%f1,[%g1+0*8+0]
207	st	%f0,[%g1+0*8+4]
208	st	%f3,[%g1+1*8+0]
209	st	%f2,[%g1+1*8+4]
210	st	%f5,[%g1+2*8+0]
211	st	%f4,[%g1+2*8+4]
212	st	%f7,[%g1+3*8+0]
213	st	%f6,[%g1+3*8+4]
214	st	%f9,[%g1+4*8+0]
215	st	%f8,[%g1+4*8+4]
216	st	%f11,[%g1+5*8+0]
217	st	%f10,[%g1+5*8+4]
218	st	%f13,[%g1+6*8+0]
219	st	%f12,[%g1+6*8+4]
220	st	%f15,[%g1+7*8+0]
221	st	%f14,[%g1+7*8+4]
222	mov	1,%i0		! return success
223.Lmdone_8:
224	ret
225	restore
226
227.Lmabort_8:
228	restore
229	restore
230	restore
231	restore
232	restore
233.Lmabort1_8:
234	restore
235
236	mov	0,%i0		! return failure
237	ret
238	restore
239
240.align	32
241.Lmsquare_8:
242	save	%sp,-128,%sp;		or	%g5,%fp,%fp
243	save	%sp,-128,%sp;		or	%g5,%fp,%fp
244	.word   0x81b02940+8-1	! montsqr	8-1
245	ba	.Lmresume_8
246	nop
247.type	bn_mul_mont_t4_8, #function
248.size	bn_mul_mont_t4_8, .-bn_mul_mont_t4_8
249.globl	bn_mul_mont_t4_16
250.align	32
251bn_mul_mont_t4_16:
252#ifdef	__arch64__
253	mov	0,%g5
254	mov	-128,%g4
255#elif defined(SPARCV9_64BIT_STACK)
256	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
257	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
258	mov	-2047,%g4
259	and	%g1,SPARCV9_64BIT_STACK,%g1
260	movrz	%g1,0,%g4
261	mov	-1,%g5
262	add	%g4,-128,%g4
263#else
264	mov	-1,%g5
265	mov	-128,%g4
266#endif
267	sllx	%g5,32,%g5
268	save	%sp,%g4,%sp
269#ifndef	__arch64__
270	save	%sp,-128,%sp	! warm it up
271	save	%sp,-128,%sp
272	save	%sp,-128,%sp
273	save	%sp,-128,%sp
274	save	%sp,-128,%sp
275	save	%sp,-128,%sp
276	restore
277	restore
278	restore
279	restore
280	restore
281	restore
282#endif
283	and	%sp,1,%g4
284	or	%g5,%fp,%fp
285	or	%g4,%g5,%g5
286
287	! copy arguments to global registers
288	mov	%i0,%g1
289	mov	%i1,%g2
290	mov	%i2,%g3
291	mov	%i3,%g4
292	ld	[%i4+0],%f1	! load *n0
293	ld	[%i4+4],%f0
294	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
295	save	%sp,-128,%sp;		or	%g5,%fp,%fp
296	ld	[%g2+0*8+0],%l1
297	ld	[%g2+0*8+4],%l0
298	sllx	%l0,32,%l0
299	or	%l1,%l0,%l0
300	ld	[%g2+1*8+0],%l2
301	ld	[%g2+1*8+4],%l1
302	sllx	%l1,32,%l1
303	or	%l2,%l1,%l1
304	ld	[%g2+2*8+0],%l3
305	ld	[%g2+2*8+4],%l2
306	sllx	%l2,32,%l2
307	or	%l3,%l2,%l2
308	ld	[%g2+3*8+0],%l4
309	ld	[%g2+3*8+4],%l3
310	sllx	%l3,32,%l3
311	or	%l4,%l3,%l3
312	ld	[%g2+4*8+0],%l5
313	ld	[%g2+4*8+4],%l4
314	sllx	%l4,32,%l4
315	or	%l5,%l4,%l4
316	ld	[%g2+5*8+0],%l6
317	ld	[%g2+5*8+4],%l5
318	sllx	%l5,32,%l5
319	or	%l6,%l5,%l5
320	ld	[%g2+6*8+0],%l7
321	ld	[%g2+6*8+4],%l6
322	sllx	%l6,32,%l6
323	or	%l7,%l6,%l6
324	ld	[%g2+7*8+0],%o0
325	ld	[%g2+7*8+4],%l7
326	sllx	%l7,32,%l7
327	or	%o0,%l7,%l7
328	ld	[%g2+8*8+0],%o1
329	ld	[%g2+8*8+4],%o0
330	sllx	%o0,32,%o0
331	or	%o1,%o0,%o0
332	ld	[%g2+9*8+0],%o2
333	ld	[%g2+9*8+4],%o1
334	sllx	%o1,32,%o1
335	or	%o2,%o1,%o1
336	ld	[%g2+10*8+0],%o3
337	ld	[%g2+10*8+4],%o2
338	sllx	%o2,32,%o2
339	or	%o3,%o2,%o2
340	ld	[%g2+11*8+0],%o4
341	ld	[%g2+11*8+4],%o3
342	sllx	%o3,32,%o3
343	or	%o4,%o3,%o3
344	ld	[%g2+12*8+0],%o5
345	ld	[%g2+12*8+4],%o4
346	sllx	%o4,32,%o4
347	or	%o5,%o4,%o4
348	ld	[%g2+13*8+0],%o7
349	ld	[%g2+13*8+4],%o5
350	sllx	%o5,32,%o5
351	or	%o7,%o5,%o5
352	ld	[%g2+14*8+0],%f5
353	ld	[%g2+14*8+4],%f4
354	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
355	ld	[%g2+15*8+0],%f7
356	ld	[%g2+15*8+4],%f6
357	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
358	save	%sp,-128,%sp;		or	%g5,%fp,%fp
359	ld	[%g4+0*8+0],%l1
360	ld	[%g4+0*8+4],%l0
361	sllx	%l0,32,%l0
362	or	%l1,%l0,%l0
363	ld	[%g4+1*8+0],%l2
364	ld	[%g4+1*8+4],%l1
365	sllx	%l1,32,%l1
366	or	%l2,%l1,%l1
367	ld	[%g4+2*8+0],%l3
368	ld	[%g4+2*8+4],%l2
369	sllx	%l2,32,%l2
370	or	%l3,%l2,%l2
371	ld	[%g4+3*8+0],%l4
372	ld	[%g4+3*8+4],%l3
373	sllx	%l3,32,%l3
374	or	%l4,%l3,%l3
375	ld	[%g4+4*8+0],%l5
376	ld	[%g4+4*8+4],%l4
377	sllx	%l4,32,%l4
378	or	%l5,%l4,%l4
379	ld	[%g4+5*8+0],%l6
380	ld	[%g4+5*8+4],%l5
381	sllx	%l5,32,%l5
382	or	%l6,%l5,%l5
383	ld	[%g4+6*8+0],%l7
384	ld	[%g4+6*8+4],%l6
385	sllx	%l6,32,%l6
386	or	%l7,%l6,%l6
387	ld	[%g4+7*8+0],%o0
388	ld	[%g4+7*8+4],%l7
389	sllx	%l7,32,%l7
390	or	%o0,%l7,%l7
391	ld	[%g4+8*8+0],%o1
392	ld	[%g4+8*8+4],%o0
393	sllx	%o0,32,%o0
394	or	%o1,%o0,%o0
395	ld	[%g4+9*8+0],%o2
396	ld	[%g4+9*8+4],%o1
397	sllx	%o1,32,%o1
398	or	%o2,%o1,%o1
399	ld	[%g4+10*8+0],%o3
400	ld	[%g4+10*8+4],%o2
401	sllx	%o2,32,%o2
402	or	%o3,%o2,%o2
403	ld	[%g4+11*8+0],%o4
404	ld	[%g4+11*8+4],%o3
405	sllx	%o3,32,%o3
406	or	%o4,%o3,%o3
407	ld	[%g4+12*8+0],%o5
408	ld	[%g4+12*8+4],%o4
409	sllx	%o4,32,%o4
410	or	%o5,%o4,%o4
411	ld	[%g4+13*8+0],%o7
412	ld	[%g4+13*8+4],%o5
413	sllx	%o5,32,%o5
414	or	%o7,%o5,%o5
415	save	%sp,-128,%sp;		or	%g5,%fp,%fp
416	ld	[%g4+14*8+0],%l1
417	ld	[%g4+14*8+4],%l0
418	sllx	%l0,32,%l0
419	or	%l1,%l0,%l0
420	ld	[%g4+15*8+0],%l2
421	ld	[%g4+15*8+4],%l1
422	sllx	%l1,32,%l1
423	or	%l2,%l1,%l1
424	save	%sp,-128,%sp;		or	%g5,%fp,%fp
425	cmp	%g2,%g3
426	be	SIZE_T_CC,.Lmsquare_16
427	nop
428	save	%sp,-128,%sp;		or	%g5,%fp,%fp
429	ld	[%g3+0*8+0],%i1
430	ld	[%g3+0*8+4],%i0
431	sllx	%i0,32,%i0
432	or	%i1,%i0,%i0
433	ld	[%g3+1*8+0],%i2
434	ld	[%g3+1*8+4],%i1
435	sllx	%i1,32,%i1
436	or	%i2,%i1,%i1
437	ld	[%g3+2*8+0],%i3
438	ld	[%g3+2*8+4],%i2
439	sllx	%i2,32,%i2
440	or	%i3,%i2,%i2
441	ld	[%g3+3*8+0],%i4
442	ld	[%g3+3*8+4],%i3
443	sllx	%i3,32,%i3
444	or	%i4,%i3,%i3
445	ld	[%g3+4*8+0],%i5
446	ld	[%g3+4*8+4],%i4
447	sllx	%i4,32,%i4
448	or	%i5,%i4,%i4
449	ld	[%g3+5*8+0],%l0
450	ld	[%g3+5*8+4],%i5
451	sllx	%i5,32,%i5
452	or	%l0,%i5,%i5
453	ld	[%g3+6*8+0],%l1
454	ld	[%g3+6*8+4],%l0
455	sllx	%l0,32,%l0
456	or	%l1,%l0,%l0
457	ld	[%g3+7*8+0],%l2
458	ld	[%g3+7*8+4],%l1
459	sllx	%l1,32,%l1
460	or	%l2,%l1,%l1
461	ld	[%g3+8*8+0],%l3
462	ld	[%g3+8*8+4],%l2
463	sllx	%l2,32,%l2
464	or	%l3,%l2,%l2
465	ld	[%g3+9*8+0],%l4
466	ld	[%g3+9*8+4],%l3
467	sllx	%l3,32,%l3
468	or	%l4,%l3,%l3
469	ld	[%g3+10*8+0],%l5
470	ld	[%g3+10*8+4],%l4
471	sllx	%l4,32,%l4
472	or	%l5,%l4,%l4
473	ld	[%g3+11*8+0],%l6
474	ld	[%g3+11*8+4],%l5
475	sllx	%l5,32,%l5
476	or	%l6,%l5,%l5
477	ld	[%g3+12*8+0],%l7
478	ld	[%g3+12*8+4],%l6
479	sllx	%l6,32,%l6
480	or	%l7,%l6,%l6
481	ld	[%g3+13*8+0],%o7
482	ld	[%g3+13*8+4],%l7
483	sllx	%l7,32,%l7
484	or	%o7,%l7,%l7
485	save	%sp,-128,%sp;		or	%g5,%fp,%fp
486	ld	[%g3+14*8+0],%i1
487	ld	[%g3+14*8+4],%i0
488	sllx	%i0,32,%i0
489	or	%i1,%i0,%i0
490	ld	[%g3+15*8+0],%o7
491	ld	[%g3+15*8+4],%i1
492	sllx	%i1,32,%i1
493	or	%o7,%i1,%i1
494	.word	0x81b02920+16-1	! montmul	16-1
495.Lmresume_16:
496	fbu,pn	%fcc3,.Lmabort_16
497#ifndef	__arch64__
498	and	%fp,%g5,%g5
499	brz,pn	%g5,.Lmabort_16
500#endif
501	nop
502#ifdef	__arch64__
503	restore
504	restore
505	restore
506	restore
507	restore
508#else
509	restore;		and	%fp,%g5,%g5
510	restore;		and	%fp,%g5,%g5
511	restore;		and	%fp,%g5,%g5
512	restore;		and	%fp,%g5,%g5
513	 brz,pn	%g5,.Lmabort1_16
514	restore
515#endif
516	.word	0x81b02310 !movxtod	%l0,%f0
517	.word	0x85b02311 !movxtod	%l1,%f2
518	.word	0x89b02312 !movxtod	%l2,%f4
519	.word	0x8db02313 !movxtod	%l3,%f6
520	.word	0x91b02314 !movxtod	%l4,%f8
521	.word	0x95b02315 !movxtod	%l5,%f10
522	.word	0x99b02316 !movxtod	%l6,%f12
523	.word	0x9db02317 !movxtod	%l7,%f14
524	.word	0xa1b02308 !movxtod	%o0,%f16
525	.word	0xa5b02309 !movxtod	%o1,%f18
526	.word	0xa9b0230a !movxtod	%o2,%f20
527	.word	0xadb0230b !movxtod	%o3,%f22
528	.word	0xbbb0230c !movxtod	%o4,%f60
529	.word	0xbfb0230d !movxtod	%o5,%f62
530#ifdef	__arch64__
531	restore
532#else
533	 and	%fp,%g5,%g5
534	restore
535	 and	%g5,1,%o7
536	 and	%fp,%g5,%g5
537	 srl	%fp,0,%fp		! just in case?
538	 or	%o7,%g5,%g5
539	brz,a,pn %g5,.Lmdone_16
540	mov	0,%i0		! return failure
541#endif
542	st	%f1,[%g1+0*8+0]
543	st	%f0,[%g1+0*8+4]
544	st	%f3,[%g1+1*8+0]
545	st	%f2,[%g1+1*8+4]
546	st	%f5,[%g1+2*8+0]
547	st	%f4,[%g1+2*8+4]
548	st	%f7,[%g1+3*8+0]
549	st	%f6,[%g1+3*8+4]
550	st	%f9,[%g1+4*8+0]
551	st	%f8,[%g1+4*8+4]
552	st	%f11,[%g1+5*8+0]
553	st	%f10,[%g1+5*8+4]
554	st	%f13,[%g1+6*8+0]
555	st	%f12,[%g1+6*8+4]
556	st	%f15,[%g1+7*8+0]
557	st	%f14,[%g1+7*8+4]
558	st	%f17,[%g1+8*8+0]
559	st	%f16,[%g1+8*8+4]
560	st	%f19,[%g1+9*8+0]
561	st	%f18,[%g1+9*8+4]
562	st	%f21,[%g1+10*8+0]
563	st	%f20,[%g1+10*8+4]
564	st	%f23,[%g1+11*8+0]
565	st	%f22,[%g1+11*8+4]
566	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
567	st	%f1,[%g1+12*8+0]
568	st	%f0,[%g1+12*8+4]
569	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
570	st	%f3,[%g1+13*8+0]
571	st	%f2,[%g1+13*8+4]
572	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
573	st	%f5,[%g1+14*8+0]
574	st	%f4,[%g1+14*8+4]
575	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
576	st	%f7,[%g1+15*8+0]
577	st	%f6,[%g1+15*8+4]
578	mov	1,%i0		! return success
579.Lmdone_16:
580	ret
581	restore
582
583.Lmabort_16:
584	restore
585	restore
586	restore
587	restore
588	restore
589.Lmabort1_16:
590	restore
591
592	mov	0,%i0		! return failure
593	ret
594	restore
595
596.align	32
597.Lmsquare_16:
598	save	%sp,-128,%sp;		or	%g5,%fp,%fp
599	save	%sp,-128,%sp;		or	%g5,%fp,%fp
600	.word   0x81b02940+16-1	! montsqr	16-1
601	ba	.Lmresume_16
602	nop
603.type	bn_mul_mont_t4_16, #function
604.size	bn_mul_mont_t4_16, .-bn_mul_mont_t4_16
605.globl	bn_mul_mont_t4_24
606.align	32
607bn_mul_mont_t4_24:
608#ifdef	__arch64__
609	mov	0,%g5
610	mov	-128,%g4
611#elif defined(SPARCV9_64BIT_STACK)
612	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
613	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
614	mov	-2047,%g4
615	and	%g1,SPARCV9_64BIT_STACK,%g1
616	movrz	%g1,0,%g4
617	mov	-1,%g5
618	add	%g4,-128,%g4
619#else
620	mov	-1,%g5
621	mov	-128,%g4
622#endif
623	sllx	%g5,32,%g5
624	save	%sp,%g4,%sp
625#ifndef	__arch64__
626	save	%sp,-128,%sp	! warm it up
627	save	%sp,-128,%sp
628	save	%sp,-128,%sp
629	save	%sp,-128,%sp
630	save	%sp,-128,%sp
631	save	%sp,-128,%sp
632	restore
633	restore
634	restore
635	restore
636	restore
637	restore
638#endif
639	and	%sp,1,%g4
640	or	%g5,%fp,%fp
641	or	%g4,%g5,%g5
642
643	! copy arguments to global registers
644	mov	%i0,%g1
645	mov	%i1,%g2
646	mov	%i2,%g3
647	mov	%i3,%g4
648	ld	[%i4+0],%f1	! load *n0
649	ld	[%i4+4],%f0
650	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
651	save	%sp,-128,%sp;		or	%g5,%fp,%fp
652	ld	[%g2+0*8+0],%l1
653	ld	[%g2+0*8+4],%l0
654	sllx	%l0,32,%l0
655	or	%l1,%l0,%l0
656	ld	[%g2+1*8+0],%l2
657	ld	[%g2+1*8+4],%l1
658	sllx	%l1,32,%l1
659	or	%l2,%l1,%l1
660	ld	[%g2+2*8+0],%l3
661	ld	[%g2+2*8+4],%l2
662	sllx	%l2,32,%l2
663	or	%l3,%l2,%l2
664	ld	[%g2+3*8+0],%l4
665	ld	[%g2+3*8+4],%l3
666	sllx	%l3,32,%l3
667	or	%l4,%l3,%l3
668	ld	[%g2+4*8+0],%l5
669	ld	[%g2+4*8+4],%l4
670	sllx	%l4,32,%l4
671	or	%l5,%l4,%l4
672	ld	[%g2+5*8+0],%l6
673	ld	[%g2+5*8+4],%l5
674	sllx	%l5,32,%l5
675	or	%l6,%l5,%l5
676	ld	[%g2+6*8+0],%l7
677	ld	[%g2+6*8+4],%l6
678	sllx	%l6,32,%l6
679	or	%l7,%l6,%l6
680	ld	[%g2+7*8+0],%o0
681	ld	[%g2+7*8+4],%l7
682	sllx	%l7,32,%l7
683	or	%o0,%l7,%l7
684	ld	[%g2+8*8+0],%o1
685	ld	[%g2+8*8+4],%o0
686	sllx	%o0,32,%o0
687	or	%o1,%o0,%o0
688	ld	[%g2+9*8+0],%o2
689	ld	[%g2+9*8+4],%o1
690	sllx	%o1,32,%o1
691	or	%o2,%o1,%o1
692	ld	[%g2+10*8+0],%o3
693	ld	[%g2+10*8+4],%o2
694	sllx	%o2,32,%o2
695	or	%o3,%o2,%o2
696	ld	[%g2+11*8+0],%o4
697	ld	[%g2+11*8+4],%o3
698	sllx	%o3,32,%o3
699	or	%o4,%o3,%o3
700	ld	[%g2+12*8+0],%o5
701	ld	[%g2+12*8+4],%o4
702	sllx	%o4,32,%o4
703	or	%o5,%o4,%o4
704	ld	[%g2+13*8+0],%o7
705	ld	[%g2+13*8+4],%o5
706	sllx	%o5,32,%o5
707	or	%o7,%o5,%o5
708	ld	[%g2+14*8+0],%f5
709	ld	[%g2+14*8+4],%f4
710	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
711	ld	[%g2+15*8+0],%f7
712	ld	[%g2+15*8+4],%f6
713	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
714	ld	[%g2+16*8+0],%f1
715	ld	[%g2+16*8+4],%f0
716	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
717	ld	[%g2+17*8+0],%f3
718	ld	[%g2+17*8+4],%f2
719	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
720	ld	[%g2+18*8+0],%f5
721	ld	[%g2+18*8+4],%f4
722	.word	0x83b00f04 !fsrc2	%f0,%f4,%f32
723	ld	[%g2+19*8+0],%f7
724	ld	[%g2+19*8+4],%f6
725	.word	0x87b00f06 !fsrc2	%f0,%f6,%f34
726	ld	[%g2+20*8+0],%f1
727	ld	[%g2+20*8+4],%f0
728	.word	0x8bb00f00 !fsrc2	%f0,%f0,%f36
729	ld	[%g2+21*8+0],%f3
730	ld	[%g2+21*8+4],%f2
731	.word	0x8fb00f02 !fsrc2	%f0,%f2,%f38
732	ld	[%g2+22*8+0],%f5
733	ld	[%g2+22*8+4],%f4
734	.word	0x93b00f04 !fsrc2	%f0,%f4,%f40
735	ld	[%g2+23*8+0],%f7
736	ld	[%g2+23*8+4],%f6
737	.word	0x97b00f06 !fsrc2	%f0,%f6,%f42
738	save	%sp,-128,%sp;		or	%g5,%fp,%fp
739	ld	[%g4+0*8+0],%l1
740	ld	[%g4+0*8+4],%l0
741	sllx	%l0,32,%l0
742	or	%l1,%l0,%l0
743	ld	[%g4+1*8+0],%l2
744	ld	[%g4+1*8+4],%l1
745	sllx	%l1,32,%l1
746	or	%l2,%l1,%l1
747	ld	[%g4+2*8+0],%l3
748	ld	[%g4+2*8+4],%l2
749	sllx	%l2,32,%l2
750	or	%l3,%l2,%l2
751	ld	[%g4+3*8+0],%l4
752	ld	[%g4+3*8+4],%l3
753	sllx	%l3,32,%l3
754	or	%l4,%l3,%l3
755	ld	[%g4+4*8+0],%l5
756	ld	[%g4+4*8+4],%l4
757	sllx	%l4,32,%l4
758	or	%l5,%l4,%l4
759	ld	[%g4+5*8+0],%l6
760	ld	[%g4+5*8+4],%l5
761	sllx	%l5,32,%l5
762	or	%l6,%l5,%l5
763	ld	[%g4+6*8+0],%l7
764	ld	[%g4+6*8+4],%l6
765	sllx	%l6,32,%l6
766	or	%l7,%l6,%l6
767	ld	[%g4+7*8+0],%o0
768	ld	[%g4+7*8+4],%l7
769	sllx	%l7,32,%l7
770	or	%o0,%l7,%l7
771	ld	[%g4+8*8+0],%o1
772	ld	[%g4+8*8+4],%o0
773	sllx	%o0,32,%o0
774	or	%o1,%o0,%o0
775	ld	[%g4+9*8+0],%o2
776	ld	[%g4+9*8+4],%o1
777	sllx	%o1,32,%o1
778	or	%o2,%o1,%o1
779	ld	[%g4+10*8+0],%o3
780	ld	[%g4+10*8+4],%o2
781	sllx	%o2,32,%o2
782	or	%o3,%o2,%o2
783	ld	[%g4+11*8+0],%o4
784	ld	[%g4+11*8+4],%o3
785	sllx	%o3,32,%o3
786	or	%o4,%o3,%o3
787	ld	[%g4+12*8+0],%o5
788	ld	[%g4+12*8+4],%o4
789	sllx	%o4,32,%o4
790	or	%o5,%o4,%o4
791	ld	[%g4+13*8+0],%o7
792	ld	[%g4+13*8+4],%o5
793	sllx	%o5,32,%o5
794	or	%o7,%o5,%o5
795	save	%sp,-128,%sp;		or	%g5,%fp,%fp
796	ld	[%g4+14*8+0],%l1
797	ld	[%g4+14*8+4],%l0
798	sllx	%l0,32,%l0
799	or	%l1,%l0,%l0
800	ld	[%g4+15*8+0],%l2
801	ld	[%g4+15*8+4],%l1
802	sllx	%l1,32,%l1
803	or	%l2,%l1,%l1
804	ld	[%g4+16*8+0],%l3
805	ld	[%g4+16*8+4],%l2
806	sllx	%l2,32,%l2
807	or	%l3,%l2,%l2
808	ld	[%g4+17*8+0],%l4
809	ld	[%g4+17*8+4],%l3
810	sllx	%l3,32,%l3
811	or	%l4,%l3,%l3
812	ld	[%g4+18*8+0],%l5
813	ld	[%g4+18*8+4],%l4
814	sllx	%l4,32,%l4
815	or	%l5,%l4,%l4
816	ld	[%g4+19*8+0],%l6
817	ld	[%g4+19*8+4],%l5
818	sllx	%l5,32,%l5
819	or	%l6,%l5,%l5
820	ld	[%g4+20*8+0],%l7
821	ld	[%g4+20*8+4],%l6
822	sllx	%l6,32,%l6
823	or	%l7,%l6,%l6
824	ld	[%g4+21*8+0],%o0
825	ld	[%g4+21*8+4],%l7
826	sllx	%l7,32,%l7
827	or	%o0,%l7,%l7
828	ld	[%g4+22*8+0],%o1
829	ld	[%g4+22*8+4],%o0
830	sllx	%o0,32,%o0
831	or	%o1,%o0,%o0
832	ld	[%g4+23*8+0],%o2
833	ld	[%g4+23*8+4],%o1
834	sllx	%o1,32,%o1
835	or	%o2,%o1,%o1
836	save	%sp,-128,%sp;		or	%g5,%fp,%fp
837	cmp	%g2,%g3
838	be	SIZE_T_CC,.Lmsquare_24
839	nop
840	save	%sp,-128,%sp;		or	%g5,%fp,%fp
841	ld	[%g3+0*8+0],%i1
842	ld	[%g3+0*8+4],%i0
843	sllx	%i0,32,%i0
844	or	%i1,%i0,%i0
845	ld	[%g3+1*8+0],%i2
846	ld	[%g3+1*8+4],%i1
847	sllx	%i1,32,%i1
848	or	%i2,%i1,%i1
849	ld	[%g3+2*8+0],%i3
850	ld	[%g3+2*8+4],%i2
851	sllx	%i2,32,%i2
852	or	%i3,%i2,%i2
853	ld	[%g3+3*8+0],%i4
854	ld	[%g3+3*8+4],%i3
855	sllx	%i3,32,%i3
856	or	%i4,%i3,%i3
857	ld	[%g3+4*8+0],%i5
858	ld	[%g3+4*8+4],%i4
859	sllx	%i4,32,%i4
860	or	%i5,%i4,%i4
861	ld	[%g3+5*8+0],%l0
862	ld	[%g3+5*8+4],%i5
863	sllx	%i5,32,%i5
864	or	%l0,%i5,%i5
865	ld	[%g3+6*8+0],%l1
866	ld	[%g3+6*8+4],%l0
867	sllx	%l0,32,%l0
868	or	%l1,%l0,%l0
869	ld	[%g3+7*8+0],%l2
870	ld	[%g3+7*8+4],%l1
871	sllx	%l1,32,%l1
872	or	%l2,%l1,%l1
873	ld	[%g3+8*8+0],%l3
874	ld	[%g3+8*8+4],%l2
875	sllx	%l2,32,%l2
876	or	%l3,%l2,%l2
877	ld	[%g3+9*8+0],%l4
878	ld	[%g3+9*8+4],%l3
879	sllx	%l3,32,%l3
880	or	%l4,%l3,%l3
881	ld	[%g3+10*8+0],%l5
882	ld	[%g3+10*8+4],%l4
883	sllx	%l4,32,%l4
884	or	%l5,%l4,%l4
885	ld	[%g3+11*8+0],%l6
886	ld	[%g3+11*8+4],%l5
887	sllx	%l5,32,%l5
888	or	%l6,%l5,%l5
889	ld	[%g3+12*8+0],%l7
890	ld	[%g3+12*8+4],%l6
891	sllx	%l6,32,%l6
892	or	%l7,%l6,%l6
893	ld	[%g3+13*8+0],%o7
894	ld	[%g3+13*8+4],%l7
895	sllx	%l7,32,%l7
896	or	%o7,%l7,%l7
897	save	%sp,-128,%sp;		or	%g5,%fp,%fp
898	ld	[%g3+14*8+0],%i1
899	ld	[%g3+14*8+4],%i0
900	sllx	%i0,32,%i0
901	or	%i1,%i0,%i0
902	ld	[%g3+15*8+0],%i2
903	ld	[%g3+15*8+4],%i1
904	sllx	%i1,32,%i1
905	or	%i2,%i1,%i1
906	ld	[%g3+16*8+0],%i3
907	ld	[%g3+16*8+4],%i2
908	sllx	%i2,32,%i2
909	or	%i3,%i2,%i2
910	ld	[%g3+17*8+0],%i4
911	ld	[%g3+17*8+4],%i3
912	sllx	%i3,32,%i3
913	or	%i4,%i3,%i3
914	ld	[%g3+18*8+0],%i5
915	ld	[%g3+18*8+4],%i4
916	sllx	%i4,32,%i4
917	or	%i5,%i4,%i4
918	ld	[%g3+19*8+0],%l0
919	ld	[%g3+19*8+4],%i5
920	sllx	%i5,32,%i5
921	or	%l0,%i5,%i5
922	ld	[%g3+20*8+0],%l1
923	ld	[%g3+20*8+4],%l0
924	sllx	%l0,32,%l0
925	or	%l1,%l0,%l0
926	ld	[%g3+21*8+0],%l2
927	ld	[%g3+21*8+4],%l1
928	sllx	%l1,32,%l1
929	or	%l2,%l1,%l1
930	ld	[%g3+22*8+0],%l3
931	ld	[%g3+22*8+4],%l2
932	sllx	%l2,32,%l2
933	or	%l3,%l2,%l2
934	ld	[%g3+23*8+0],%o7
935	ld	[%g3+23*8+4],%l3
936	sllx	%l3,32,%l3
937	or	%o7,%l3,%l3
938	.word	0x81b02920+24-1	! montmul	24-1
939.Lmresume_24:
940	fbu,pn	%fcc3,.Lmabort_24
941#ifndef	__arch64__
942	and	%fp,%g5,%g5
943	brz,pn	%g5,.Lmabort_24
944#endif
945	nop
946#ifdef	__arch64__
947	restore
948	restore
949	restore
950	restore
951	restore
952#else
953	restore;		and	%fp,%g5,%g5
954	restore;		and	%fp,%g5,%g5
955	restore;		and	%fp,%g5,%g5
956	restore;		and	%fp,%g5,%g5
957	 brz,pn	%g5,.Lmabort1_24
958	restore
959#endif
960	.word	0x81b02310 !movxtod	%l0,%f0
961	.word	0x85b02311 !movxtod	%l1,%f2
962	.word	0x89b02312 !movxtod	%l2,%f4
963	.word	0x8db02313 !movxtod	%l3,%f6
964	.word	0x91b02314 !movxtod	%l4,%f8
965	.word	0x95b02315 !movxtod	%l5,%f10
966	.word	0x99b02316 !movxtod	%l6,%f12
967	.word	0x9db02317 !movxtod	%l7,%f14
968	.word	0xa1b02308 !movxtod	%o0,%f16
969	.word	0xa5b02309 !movxtod	%o1,%f18
970	.word	0xa9b0230a !movxtod	%o2,%f20
971	.word	0xadb0230b !movxtod	%o3,%f22
972	.word	0xbbb0230c !movxtod	%o4,%f60
973	.word	0xbfb0230d !movxtod	%o5,%f62
974#ifdef	__arch64__
975	restore
976#else
977	 and	%fp,%g5,%g5
978	restore
979	 and	%g5,1,%o7
980	 and	%fp,%g5,%g5
981	 srl	%fp,0,%fp		! just in case?
982	 or	%o7,%g5,%g5
983	brz,a,pn %g5,.Lmdone_24
984	mov	0,%i0		! return failure
985#endif
986	st	%f1,[%g1+0*8+0]
987	st	%f0,[%g1+0*8+4]
988	st	%f3,[%g1+1*8+0]
989	st	%f2,[%g1+1*8+4]
990	st	%f5,[%g1+2*8+0]
991	st	%f4,[%g1+2*8+4]
992	st	%f7,[%g1+3*8+0]
993	st	%f6,[%g1+3*8+4]
994	st	%f9,[%g1+4*8+0]
995	st	%f8,[%g1+4*8+4]
996	st	%f11,[%g1+5*8+0]
997	st	%f10,[%g1+5*8+4]
998	st	%f13,[%g1+6*8+0]
999	st	%f12,[%g1+6*8+4]
1000	st	%f15,[%g1+7*8+0]
1001	st	%f14,[%g1+7*8+4]
1002	st	%f17,[%g1+8*8+0]
1003	st	%f16,[%g1+8*8+4]
1004	st	%f19,[%g1+9*8+0]
1005	st	%f18,[%g1+9*8+4]
1006	st	%f21,[%g1+10*8+0]
1007	st	%f20,[%g1+10*8+4]
1008	st	%f23,[%g1+11*8+0]
1009	st	%f22,[%g1+11*8+4]
1010	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
1011	st	%f1,[%g1+12*8+0]
1012	st	%f0,[%g1+12*8+4]
1013	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
1014	st	%f3,[%g1+13*8+0]
1015	st	%f2,[%g1+13*8+4]
1016	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
1017	st	%f5,[%g1+14*8+0]
1018	st	%f4,[%g1+14*8+4]
1019	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
1020	st	%f7,[%g1+15*8+0]
1021	st	%f6,[%g1+15*8+4]
1022	.word	0x81b00f1c !fsrc2	%f0,%f28,%f0
1023	st	%f1,[%g1+16*8+0]
1024	st	%f0,[%g1+16*8+4]
1025	.word	0x85b00f1e !fsrc2	%f0,%f30,%f2
1026	st	%f3,[%g1+17*8+0]
1027	st	%f2,[%g1+17*8+4]
1028	.word	0x89b00f01 !fsrc2	%f0,%f32,%f4
1029	st	%f5,[%g1+18*8+0]
1030	st	%f4,[%g1+18*8+4]
1031	.word	0x8db00f03 !fsrc2	%f0,%f34,%f6
1032	st	%f7,[%g1+19*8+0]
1033	st	%f6,[%g1+19*8+4]
1034	.word	0x81b00f05 !fsrc2	%f0,%f36,%f0
1035	st	%f1,[%g1+20*8+0]
1036	st	%f0,[%g1+20*8+4]
1037	.word	0x85b00f07 !fsrc2	%f0,%f38,%f2
1038	st	%f3,[%g1+21*8+0]
1039	st	%f2,[%g1+21*8+4]
1040	.word	0x89b00f09 !fsrc2	%f0,%f40,%f4
1041	st	%f5,[%g1+22*8+0]
1042	st	%f4,[%g1+22*8+4]
1043	.word	0x8db00f0b !fsrc2	%f0,%f42,%f6
1044	st	%f7,[%g1+23*8+0]
1045	st	%f6,[%g1+23*8+4]
1046	mov	1,%i0		! return success
1047.Lmdone_24:
1048	ret
1049	restore
1050
1051.Lmabort_24:
1052	restore
1053	restore
1054	restore
1055	restore
1056	restore
1057.Lmabort1_24:
1058	restore
1059
1060	mov	0,%i0		! return failure
1061	ret
1062	restore
1063
1064.align	32
1065.Lmsquare_24:
1066	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1067	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1068	.word   0x81b02940+24-1	! montsqr	24-1
1069	ba	.Lmresume_24
1070	nop
1071.type	bn_mul_mont_t4_24, #function
1072.size	bn_mul_mont_t4_24, .-bn_mul_mont_t4_24
1073.globl	bn_mul_mont_t4_32
1074.align	32
1075bn_mul_mont_t4_32:
1076#ifdef	__arch64__
1077	mov	0,%g5
1078	mov	-128,%g4
1079#elif defined(SPARCV9_64BIT_STACK)
1080	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
1081	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
1082	mov	-2047,%g4
1083	and	%g1,SPARCV9_64BIT_STACK,%g1
1084	movrz	%g1,0,%g4
1085	mov	-1,%g5
1086	add	%g4,-128,%g4
1087#else
1088	mov	-1,%g5
1089	mov	-128,%g4
1090#endif
1091	sllx	%g5,32,%g5
1092	save	%sp,%g4,%sp
1093#ifndef	__arch64__
1094	save	%sp,-128,%sp	! warm it up
1095	save	%sp,-128,%sp
1096	save	%sp,-128,%sp
1097	save	%sp,-128,%sp
1098	save	%sp,-128,%sp
1099	save	%sp,-128,%sp
1100	restore
1101	restore
1102	restore
1103	restore
1104	restore
1105	restore
1106#endif
1107	and	%sp,1,%g4
1108	or	%g5,%fp,%fp
1109	or	%g4,%g5,%g5
1110
1111	! copy arguments to global registers
1112	mov	%i0,%g1
1113	mov	%i1,%g2
1114	mov	%i2,%g3
1115	mov	%i3,%g4
1116	ld	[%i4+0],%f1	! load *n0
1117	ld	[%i4+4],%f0
1118	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
1119	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1120	ld	[%g2+0*8+0],%l1
1121	ld	[%g2+0*8+4],%l0
1122	sllx	%l0,32,%l0
1123	or	%l1,%l0,%l0
1124	ld	[%g2+1*8+0],%l2
1125	ld	[%g2+1*8+4],%l1
1126	sllx	%l1,32,%l1
1127	or	%l2,%l1,%l1
1128	ld	[%g2+2*8+0],%l3
1129	ld	[%g2+2*8+4],%l2
1130	sllx	%l2,32,%l2
1131	or	%l3,%l2,%l2
1132	ld	[%g2+3*8+0],%l4
1133	ld	[%g2+3*8+4],%l3
1134	sllx	%l3,32,%l3
1135	or	%l4,%l3,%l3
1136	ld	[%g2+4*8+0],%l5
1137	ld	[%g2+4*8+4],%l4
1138	sllx	%l4,32,%l4
1139	or	%l5,%l4,%l4
1140	ld	[%g2+5*8+0],%l6
1141	ld	[%g2+5*8+4],%l5
1142	sllx	%l5,32,%l5
1143	or	%l6,%l5,%l5
1144	ld	[%g2+6*8+0],%l7
1145	ld	[%g2+6*8+4],%l6
1146	sllx	%l6,32,%l6
1147	or	%l7,%l6,%l6
1148	ld	[%g2+7*8+0],%o0
1149	ld	[%g2+7*8+4],%l7
1150	sllx	%l7,32,%l7
1151	or	%o0,%l7,%l7
1152	ld	[%g2+8*8+0],%o1
1153	ld	[%g2+8*8+4],%o0
1154	sllx	%o0,32,%o0
1155	or	%o1,%o0,%o0
1156	ld	[%g2+9*8+0],%o2
1157	ld	[%g2+9*8+4],%o1
1158	sllx	%o1,32,%o1
1159	or	%o2,%o1,%o1
1160	ld	[%g2+10*8+0],%o3
1161	ld	[%g2+10*8+4],%o2
1162	sllx	%o2,32,%o2
1163	or	%o3,%o2,%o2
1164	ld	[%g2+11*8+0],%o4
1165	ld	[%g2+11*8+4],%o3
1166	sllx	%o3,32,%o3
1167	or	%o4,%o3,%o3
1168	ld	[%g2+12*8+0],%o5
1169	ld	[%g2+12*8+4],%o4
1170	sllx	%o4,32,%o4
1171	or	%o5,%o4,%o4
1172	ld	[%g2+13*8+0],%o7
1173	ld	[%g2+13*8+4],%o5
1174	sllx	%o5,32,%o5
1175	or	%o7,%o5,%o5
1176	ld	[%g2+14*8+0],%f5
1177	ld	[%g2+14*8+4],%f4
1178	.word	0xb1b00f04 !fsrc2	%f0,%f4,%f24
1179	ld	[%g2+15*8+0],%f7
1180	ld	[%g2+15*8+4],%f6
1181	.word	0xb5b00f06 !fsrc2	%f0,%f6,%f26
1182	ld	[%g2+16*8+0],%f1
1183	ld	[%g2+16*8+4],%f0
1184	.word	0xb9b00f00 !fsrc2	%f0,%f0,%f28
1185	ld	[%g2+17*8+0],%f3
1186	ld	[%g2+17*8+4],%f2
1187	.word	0xbdb00f02 !fsrc2	%f0,%f2,%f30
1188	ld	[%g2+18*8+0],%f5
1189	ld	[%g2+18*8+4],%f4
1190	.word	0x83b00f04 !fsrc2	%f0,%f4,%f32
1191	ld	[%g2+19*8+0],%f7
1192	ld	[%g2+19*8+4],%f6
1193	.word	0x87b00f06 !fsrc2	%f0,%f6,%f34
1194	ld	[%g2+20*8+0],%f1
1195	ld	[%g2+20*8+4],%f0
1196	.word	0x8bb00f00 !fsrc2	%f0,%f0,%f36
1197	ld	[%g2+21*8+0],%f3
1198	ld	[%g2+21*8+4],%f2
1199	.word	0x8fb00f02 !fsrc2	%f0,%f2,%f38
1200	ld	[%g2+22*8+0],%f5
1201	ld	[%g2+22*8+4],%f4
1202	.word	0x93b00f04 !fsrc2	%f0,%f4,%f40
1203	ld	[%g2+23*8+0],%f7
1204	ld	[%g2+23*8+4],%f6
1205	.word	0x97b00f06 !fsrc2	%f0,%f6,%f42
1206	ld	[%g2+24*8+0],%f1
1207	ld	[%g2+24*8+4],%f0
1208	.word	0x9bb00f00 !fsrc2	%f0,%f0,%f44
1209	ld	[%g2+25*8+0],%f3
1210	ld	[%g2+25*8+4],%f2
1211	.word	0x9fb00f02 !fsrc2	%f0,%f2,%f46
1212	ld	[%g2+26*8+0],%f5
1213	ld	[%g2+26*8+4],%f4
1214	.word	0xa3b00f04 !fsrc2	%f0,%f4,%f48
1215	ld	[%g2+27*8+0],%f7
1216	ld	[%g2+27*8+4],%f6
1217	.word	0xa7b00f06 !fsrc2	%f0,%f6,%f50
1218	ld	[%g2+28*8+0],%f1
1219	ld	[%g2+28*8+4],%f0
1220	.word	0xabb00f00 !fsrc2	%f0,%f0,%f52
1221	ld	[%g2+29*8+0],%f3
1222	ld	[%g2+29*8+4],%f2
1223	.word	0xafb00f02 !fsrc2	%f0,%f2,%f54
1224	ld	[%g2+30*8+0],%f5
1225	ld	[%g2+30*8+4],%f4
1226	.word	0xb3b00f04 !fsrc2	%f0,%f4,%f56
1227	ld	[%g2+31*8+0],%f7
1228	ld	[%g2+31*8+4],%f6
1229	.word	0xb7b00f06 !fsrc2	%f0,%f6,%f58
1230	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1231	ld	[%g4+0*8+0],%l1
1232	ld	[%g4+0*8+4],%l0
1233	sllx	%l0,32,%l0
1234	or	%l1,%l0,%l0
1235	ld	[%g4+1*8+0],%l2
1236	ld	[%g4+1*8+4],%l1
1237	sllx	%l1,32,%l1
1238	or	%l2,%l1,%l1
1239	ld	[%g4+2*8+0],%l3
1240	ld	[%g4+2*8+4],%l2
1241	sllx	%l2,32,%l2
1242	or	%l3,%l2,%l2
1243	ld	[%g4+3*8+0],%l4
1244	ld	[%g4+3*8+4],%l3
1245	sllx	%l3,32,%l3
1246	or	%l4,%l3,%l3
1247	ld	[%g4+4*8+0],%l5
1248	ld	[%g4+4*8+4],%l4
1249	sllx	%l4,32,%l4
1250	or	%l5,%l4,%l4
1251	ld	[%g4+5*8+0],%l6
1252	ld	[%g4+5*8+4],%l5
1253	sllx	%l5,32,%l5
1254	or	%l6,%l5,%l5
1255	ld	[%g4+6*8+0],%l7
1256	ld	[%g4+6*8+4],%l6
1257	sllx	%l6,32,%l6
1258	or	%l7,%l6,%l6
1259	ld	[%g4+7*8+0],%o0
1260	ld	[%g4+7*8+4],%l7
1261	sllx	%l7,32,%l7
1262	or	%o0,%l7,%l7
1263	ld	[%g4+8*8+0],%o1
1264	ld	[%g4+8*8+4],%o0
1265	sllx	%o0,32,%o0
1266	or	%o1,%o0,%o0
1267	ld	[%g4+9*8+0],%o2
1268	ld	[%g4+9*8+4],%o1
1269	sllx	%o1,32,%o1
1270	or	%o2,%o1,%o1
1271	ld	[%g4+10*8+0],%o3
1272	ld	[%g4+10*8+4],%o2
1273	sllx	%o2,32,%o2
1274	or	%o3,%o2,%o2
1275	ld	[%g4+11*8+0],%o4
1276	ld	[%g4+11*8+4],%o3
1277	sllx	%o3,32,%o3
1278	or	%o4,%o3,%o3
1279	ld	[%g4+12*8+0],%o5
1280	ld	[%g4+12*8+4],%o4
1281	sllx	%o4,32,%o4
1282	or	%o5,%o4,%o4
1283	ld	[%g4+13*8+0],%o7
1284	ld	[%g4+13*8+4],%o5
1285	sllx	%o5,32,%o5
1286	or	%o7,%o5,%o5
1287	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1288	ld	[%g4+14*8+0],%l1
1289	ld	[%g4+14*8+4],%l0
1290	sllx	%l0,32,%l0
1291	or	%l1,%l0,%l0
1292	ld	[%g4+15*8+0],%l2
1293	ld	[%g4+15*8+4],%l1
1294	sllx	%l1,32,%l1
1295	or	%l2,%l1,%l1
1296	ld	[%g4+16*8+0],%l3
1297	ld	[%g4+16*8+4],%l2
1298	sllx	%l2,32,%l2
1299	or	%l3,%l2,%l2
1300	ld	[%g4+17*8+0],%l4
1301	ld	[%g4+17*8+4],%l3
1302	sllx	%l3,32,%l3
1303	or	%l4,%l3,%l3
1304	ld	[%g4+18*8+0],%l5
1305	ld	[%g4+18*8+4],%l4
1306	sllx	%l4,32,%l4
1307	or	%l5,%l4,%l4
1308	ld	[%g4+19*8+0],%l6
1309	ld	[%g4+19*8+4],%l5
1310	sllx	%l5,32,%l5
1311	or	%l6,%l5,%l5
1312	ld	[%g4+20*8+0],%l7
1313	ld	[%g4+20*8+4],%l6
1314	sllx	%l6,32,%l6
1315	or	%l7,%l6,%l6
1316	ld	[%g4+21*8+0],%o0
1317	ld	[%g4+21*8+4],%l7
1318	sllx	%l7,32,%l7
1319	or	%o0,%l7,%l7
1320	ld	[%g4+22*8+0],%o1
1321	ld	[%g4+22*8+4],%o0
1322	sllx	%o0,32,%o0
1323	or	%o1,%o0,%o0
1324	ld	[%g4+23*8+0],%o2
1325	ld	[%g4+23*8+4],%o1
1326	sllx	%o1,32,%o1
1327	or	%o2,%o1,%o1
1328	ld	[%g4+24*8+0],%o3
1329	ld	[%g4+24*8+4],%o2
1330	sllx	%o2,32,%o2
1331	or	%o3,%o2,%o2
1332	ld	[%g4+25*8+0],%o4
1333	ld	[%g4+25*8+4],%o3
1334	sllx	%o3,32,%o3
1335	or	%o4,%o3,%o3
1336	ld	[%g4+26*8+0],%o5
1337	ld	[%g4+26*8+4],%o4
1338	sllx	%o4,32,%o4
1339	or	%o5,%o4,%o4
1340	ld	[%g4+27*8+0],%o7
1341	ld	[%g4+27*8+4],%o5
1342	sllx	%o5,32,%o5
1343	or	%o7,%o5,%o5
1344	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1345	ld	[%g4+28*8+0],%l1
1346	ld	[%g4+28*8+4],%l0
1347	sllx	%l0,32,%l0
1348	or	%l1,%l0,%l0
1349	ld	[%g4+29*8+0],%l2
1350	ld	[%g4+29*8+4],%l1
1351	sllx	%l1,32,%l1
1352	or	%l2,%l1,%l1
1353	ld	[%g4+30*8+0],%l3
1354	ld	[%g4+30*8+4],%l2
1355	sllx	%l2,32,%l2
1356	or	%l3,%l2,%l2
1357	ld	[%g4+31*8+0],%o7
1358	ld	[%g4+31*8+4],%l3
1359	sllx	%l3,32,%l3
1360	or	%o7,%l3,%l3
1361	cmp	%g2,%g3
1362	be	SIZE_T_CC,.Lmsquare_32
1363	nop
1364	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1365	ld	[%g3+0*8+0],%i1
1366	ld	[%g3+0*8+4],%i0
1367	sllx	%i0,32,%i0
1368	or	%i1,%i0,%i0
1369	ld	[%g3+1*8+0],%i2
1370	ld	[%g3+1*8+4],%i1
1371	sllx	%i1,32,%i1
1372	or	%i2,%i1,%i1
1373	ld	[%g3+2*8+0],%i3
1374	ld	[%g3+2*8+4],%i2
1375	sllx	%i2,32,%i2
1376	or	%i3,%i2,%i2
1377	ld	[%g3+3*8+0],%i4
1378	ld	[%g3+3*8+4],%i3
1379	sllx	%i3,32,%i3
1380	or	%i4,%i3,%i3
1381	ld	[%g3+4*8+0],%i5
1382	ld	[%g3+4*8+4],%i4
1383	sllx	%i4,32,%i4
1384	or	%i5,%i4,%i4
1385	ld	[%g3+5*8+0],%l0
1386	ld	[%g3+5*8+4],%i5
1387	sllx	%i5,32,%i5
1388	or	%l0,%i5,%i5
1389	ld	[%g3+6*8+0],%l1
1390	ld	[%g3+6*8+4],%l0
1391	sllx	%l0,32,%l0
1392	or	%l1,%l0,%l0
1393	ld	[%g3+7*8+0],%l2
1394	ld	[%g3+7*8+4],%l1
1395	sllx	%l1,32,%l1
1396	or	%l2,%l1,%l1
1397	ld	[%g3+8*8+0],%l3
1398	ld	[%g3+8*8+4],%l2
1399	sllx	%l2,32,%l2
1400	or	%l3,%l2,%l2
1401	ld	[%g3+9*8+0],%l4
1402	ld	[%g3+9*8+4],%l3
1403	sllx	%l3,32,%l3
1404	or	%l4,%l3,%l3
1405	ld	[%g3+10*8+0],%l5
1406	ld	[%g3+10*8+4],%l4
1407	sllx	%l4,32,%l4
1408	or	%l5,%l4,%l4
1409	ld	[%g3+11*8+0],%l6
1410	ld	[%g3+11*8+4],%l5
1411	sllx	%l5,32,%l5
1412	or	%l6,%l5,%l5
1413	ld	[%g3+12*8+0],%l7
1414	ld	[%g3+12*8+4],%l6
1415	sllx	%l6,32,%l6
1416	or	%l7,%l6,%l6
1417	ld	[%g3+13*8+0],%o7
1418	ld	[%g3+13*8+4],%l7
1419	sllx	%l7,32,%l7
1420	or	%o7,%l7,%l7
1421	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1422	ld	[%g3+14*8+0],%i1
1423	ld	[%g3+14*8+4],%i0
1424	sllx	%i0,32,%i0
1425	or	%i1,%i0,%i0
1426	ld	[%g3+15*8+0],%i2
1427	ld	[%g3+15*8+4],%i1
1428	sllx	%i1,32,%i1
1429	or	%i2,%i1,%i1
1430	ld	[%g3+16*8+0],%i3
1431	ld	[%g3+16*8+4],%i2
1432	sllx	%i2,32,%i2
1433	or	%i3,%i2,%i2
1434	ld	[%g3+17*8+0],%i4
1435	ld	[%g3+17*8+4],%i3
1436	sllx	%i3,32,%i3
1437	or	%i4,%i3,%i3
1438	ld	[%g3+18*8+0],%i5
1439	ld	[%g3+18*8+4],%i4
1440	sllx	%i4,32,%i4
1441	or	%i5,%i4,%i4
1442	ld	[%g3+19*8+0],%l0
1443	ld	[%g3+19*8+4],%i5
1444	sllx	%i5,32,%i5
1445	or	%l0,%i5,%i5
1446	ld	[%g3+20*8+0],%l1
1447	ld	[%g3+20*8+4],%l0
1448	sllx	%l0,32,%l0
1449	or	%l1,%l0,%l0
1450	ld	[%g3+21*8+0],%l2
1451	ld	[%g3+21*8+4],%l1
1452	sllx	%l1,32,%l1
1453	or	%l2,%l1,%l1
1454	ld	[%g3+22*8+0],%l3
1455	ld	[%g3+22*8+4],%l2
1456	sllx	%l2,32,%l2
1457	or	%l3,%l2,%l2
1458	ld	[%g3+23*8+0],%l4
1459	ld	[%g3+23*8+4],%l3
1460	sllx	%l3,32,%l3
1461	or	%l4,%l3,%l3
1462	ld	[%g3+24*8+0],%l5
1463	ld	[%g3+24*8+4],%l4
1464	sllx	%l4,32,%l4
1465	or	%l5,%l4,%l4
1466	ld	[%g3+25*8+0],%l6
1467	ld	[%g3+25*8+4],%l5
1468	sllx	%l5,32,%l5
1469	or	%l6,%l5,%l5
1470	ld	[%g3+26*8+0],%l7
1471	ld	[%g3+26*8+4],%l6
1472	sllx	%l6,32,%l6
1473	or	%l7,%l6,%l6
1474	ld	[%g3+27*8+0],%o0
1475	ld	[%g3+27*8+4],%l7
1476	sllx	%l7,32,%l7
1477	or	%o0,%l7,%l7
1478	ld	[%g3+28*8+0],%o1
1479	ld	[%g3+28*8+4],%o0
1480	sllx	%o0,32,%o0
1481	or	%o1,%o0,%o0
1482	ld	[%g3+29*8+0],%o2
1483	ld	[%g3+29*8+4],%o1
1484	sllx	%o1,32,%o1
1485	or	%o2,%o1,%o1
1486	ld	[%g3+30*8+0],%o3
1487	ld	[%g3+30*8+4],%o2
1488	sllx	%o2,32,%o2
1489	or	%o3,%o2,%o2
1490	ld	[%g3+31*8+0],%o7
1491	ld	[%g3+31*8+4],%o3
1492	sllx	%o3,32,%o3
1493	or	%o7,%o3,%o3
1494	.word	0x81b02920+32-1	! montmul	32-1
1495.Lmresume_32:
1496	fbu,pn	%fcc3,.Lmabort_32
1497#ifndef	__arch64__
1498	and	%fp,%g5,%g5
1499	brz,pn	%g5,.Lmabort_32
1500#endif
1501	nop
1502#ifdef	__arch64__
1503	restore
1504	restore
1505	restore
1506	restore
1507	restore
1508#else
1509	restore;		and	%fp,%g5,%g5
1510	restore;		and	%fp,%g5,%g5
1511	restore;		and	%fp,%g5,%g5
1512	restore;		and	%fp,%g5,%g5
1513	 brz,pn	%g5,.Lmabort1_32
1514	restore
1515#endif
1516	.word	0x81b02310 !movxtod	%l0,%f0
1517	.word	0x85b02311 !movxtod	%l1,%f2
1518	.word	0x89b02312 !movxtod	%l2,%f4
1519	.word	0x8db02313 !movxtod	%l3,%f6
1520	.word	0x91b02314 !movxtod	%l4,%f8
1521	.word	0x95b02315 !movxtod	%l5,%f10
1522	.word	0x99b02316 !movxtod	%l6,%f12
1523	.word	0x9db02317 !movxtod	%l7,%f14
1524	.word	0xa1b02308 !movxtod	%o0,%f16
1525	.word	0xa5b02309 !movxtod	%o1,%f18
1526	.word	0xa9b0230a !movxtod	%o2,%f20
1527	.word	0xadb0230b !movxtod	%o3,%f22
1528	.word	0xbbb0230c !movxtod	%o4,%f60
1529	.word	0xbfb0230d !movxtod	%o5,%f62
1530#ifdef	__arch64__
1531	restore
1532#else
1533	 and	%fp,%g5,%g5
1534	restore
1535	 and	%g5,1,%o7
1536	 and	%fp,%g5,%g5
1537	 srl	%fp,0,%fp		! just in case?
1538	 or	%o7,%g5,%g5
1539	brz,a,pn %g5,.Lmdone_32
1540	mov	0,%i0		! return failure
1541#endif
1542	st	%f1,[%g1+0*8+0]
1543	st	%f0,[%g1+0*8+4]
1544	st	%f3,[%g1+1*8+0]
1545	st	%f2,[%g1+1*8+4]
1546	st	%f5,[%g1+2*8+0]
1547	st	%f4,[%g1+2*8+4]
1548	st	%f7,[%g1+3*8+0]
1549	st	%f6,[%g1+3*8+4]
1550	st	%f9,[%g1+4*8+0]
1551	st	%f8,[%g1+4*8+4]
1552	st	%f11,[%g1+5*8+0]
1553	st	%f10,[%g1+5*8+4]
1554	st	%f13,[%g1+6*8+0]
1555	st	%f12,[%g1+6*8+4]
1556	st	%f15,[%g1+7*8+0]
1557	st	%f14,[%g1+7*8+4]
1558	st	%f17,[%g1+8*8+0]
1559	st	%f16,[%g1+8*8+4]
1560	st	%f19,[%g1+9*8+0]
1561	st	%f18,[%g1+9*8+4]
1562	st	%f21,[%g1+10*8+0]
1563	st	%f20,[%g1+10*8+4]
1564	st	%f23,[%g1+11*8+0]
1565	st	%f22,[%g1+11*8+4]
1566	.word	0x81b00f1d !fsrc2	%f0,%f60,%f0
1567	st	%f1,[%g1+12*8+0]
1568	st	%f0,[%g1+12*8+4]
1569	.word	0x85b00f1f !fsrc2	%f0,%f62,%f2
1570	st	%f3,[%g1+13*8+0]
1571	st	%f2,[%g1+13*8+4]
1572	.word	0x89b00f18 !fsrc2	%f0,%f24,%f4
1573	st	%f5,[%g1+14*8+0]
1574	st	%f4,[%g1+14*8+4]
1575	.word	0x8db00f1a !fsrc2	%f0,%f26,%f6
1576	st	%f7,[%g1+15*8+0]
1577	st	%f6,[%g1+15*8+4]
1578	.word	0x81b00f1c !fsrc2	%f0,%f28,%f0
1579	st	%f1,[%g1+16*8+0]
1580	st	%f0,[%g1+16*8+4]
1581	.word	0x85b00f1e !fsrc2	%f0,%f30,%f2
1582	st	%f3,[%g1+17*8+0]
1583	st	%f2,[%g1+17*8+4]
1584	.word	0x89b00f01 !fsrc2	%f0,%f32,%f4
1585	st	%f5,[%g1+18*8+0]
1586	st	%f4,[%g1+18*8+4]
1587	.word	0x8db00f03 !fsrc2	%f0,%f34,%f6
1588	st	%f7,[%g1+19*8+0]
1589	st	%f6,[%g1+19*8+4]
1590	.word	0x81b00f05 !fsrc2	%f0,%f36,%f0
1591	st	%f1,[%g1+20*8+0]
1592	st	%f0,[%g1+20*8+4]
1593	.word	0x85b00f07 !fsrc2	%f0,%f38,%f2
1594	st	%f3,[%g1+21*8+0]
1595	st	%f2,[%g1+21*8+4]
1596	.word	0x89b00f09 !fsrc2	%f0,%f40,%f4
1597	st	%f5,[%g1+22*8+0]
1598	st	%f4,[%g1+22*8+4]
1599	.word	0x8db00f0b !fsrc2	%f0,%f42,%f6
1600	st	%f7,[%g1+23*8+0]
1601	st	%f6,[%g1+23*8+4]
1602	.word	0x81b00f0d !fsrc2	%f0,%f44,%f0
1603	st	%f1,[%g1+24*8+0]
1604	st	%f0,[%g1+24*8+4]
1605	.word	0x85b00f0f !fsrc2	%f0,%f46,%f2
1606	st	%f3,[%g1+25*8+0]
1607	st	%f2,[%g1+25*8+4]
1608	.word	0x89b00f11 !fsrc2	%f0,%f48,%f4
1609	st	%f5,[%g1+26*8+0]
1610	st	%f4,[%g1+26*8+4]
1611	.word	0x8db00f13 !fsrc2	%f0,%f50,%f6
1612	st	%f7,[%g1+27*8+0]
1613	st	%f6,[%g1+27*8+4]
1614	.word	0x81b00f15 !fsrc2	%f0,%f52,%f0
1615	st	%f1,[%g1+28*8+0]
1616	st	%f0,[%g1+28*8+4]
1617	.word	0x85b00f17 !fsrc2	%f0,%f54,%f2
1618	st	%f3,[%g1+29*8+0]
1619	st	%f2,[%g1+29*8+4]
1620	.word	0x89b00f19 !fsrc2	%f0,%f56,%f4
1621	st	%f5,[%g1+30*8+0]
1622	st	%f4,[%g1+30*8+4]
1623	.word	0x8db00f1b !fsrc2	%f0,%f58,%f6
1624	st	%f7,[%g1+31*8+0]
1625	st	%f6,[%g1+31*8+4]
1626	mov	1,%i0		! return success
1627.Lmdone_32:
1628	ret
1629	restore
1630
1631.Lmabort_32:
1632	restore
1633	restore
1634	restore
1635	restore
1636	restore
1637.Lmabort1_32:
1638	restore
1639
1640	mov	0,%i0		! return failure
1641	ret
1642	restore
1643
1644.align	32
1645.Lmsquare_32:
1646	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1647	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1648	.word   0x81b02940+32-1	! montsqr	32-1
1649	ba	.Lmresume_32
1650	nop
1651.type	bn_mul_mont_t4_32, #function
1652.size	bn_mul_mont_t4_32, .-bn_mul_mont_t4_32
1653.globl	bn_pwr5_mont_t4_8
1654.align	32
1655bn_pwr5_mont_t4_8:
1656#ifdef	__arch64__
1657	mov	0,%g5
1658	mov	-128,%g4
1659#elif defined(SPARCV9_64BIT_STACK)
1660	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
1661	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
1662	mov	-2047,%g4
1663	and	%g1,SPARCV9_64BIT_STACK,%g1
1664	movrz	%g1,0,%g4
1665	mov	-1,%g5
1666	add	%g4,-128,%g4
1667#else
1668	mov	-1,%g5
1669	mov	-128,%g4
1670#endif
1671	sllx	%g5,32,%g5
1672	save	%sp,%g4,%sp
1673#ifndef	__arch64__
1674	save	%sp,-128,%sp	! warm it up
1675	save	%sp,-128,%sp
1676	save	%sp,-128,%sp
1677	save	%sp,-128,%sp
1678	save	%sp,-128,%sp
1679	save	%sp,-128,%sp
1680	restore
1681	restore
1682	restore
1683	restore
1684	restore
1685	restore
1686#endif
1687	and	%sp,1,%g4
1688	or	%g5,%fp,%fp
1689	or	%g4,%g5,%g5
1690
1691	! copy arguments to global registers
1692	mov	%i0,%g1
1693	mov	%i1,%g2
1694	ld	[%i2+0],%f1	! load *n0
1695	ld	[%i2+4],%f0
1696	mov	%i3,%g3
1697	srl	%i4,%g0,%i4	! pack last arguments
1698	sllx	%i5,32,%g4
1699	or	%i4,%g4,%g4
1700	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
1701	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1702	ldx	[%g1+0*8],%l0
1703	ldx	[%g1+1*8],%l1
1704	ldx	[%g1+2*8],%l2
1705	ldx	[%g1+3*8],%l3
1706	ldx	[%g1+4*8],%l4
1707	ldx	[%g1+5*8],%l5
1708	ldx	[%g1+6*8],%l6
1709	ldx	[%g1+7*8],%l7
1710	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1711	ldx	[%g2+0*8],%l0
1712	ldx	[%g2+1*8],%l1
1713	ldx	[%g2+2*8],%l2
1714	ldx	[%g2+3*8],%l3
1715	ldx	[%g2+4*8],%l4
1716	ldx	[%g2+5*8],%l5
1717	ldx	[%g2+6*8],%l6
1718	ldx	[%g2+7*8],%l7
1719	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1720	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1721	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1722
1723	srlx	%g4,	32,	%o4		! unpack %g4
1724	srl	%g4,	%g0,	%o5
1725	sub	%o4,	5,	%o4
1726	mov	%g3,	%o7
1727	sllx	%o4,	32,	%g4		! re-pack %g4
1728	or	%o5,	%g4,	%g4
1729	srl	%o5,	%o4,	%o5
1730	srl	%o5,	2,	%o4
1731	and	%o5,	3,	%o5
1732	and	%o4,	7,	%o4
1733	sll	%o5,	3,	%o5	! offset within first cache line
1734	add	%o5,	%o7,	%o7	! of the pwrtbl
1735	or	%g0,	1,	%o5
1736	sll	%o5,	%o4,	%o4
1737	wr	%o4,	%g0,	%ccr
1738	b	.Lstride_8
1739	nop
1740.align	16
1741.Lstride_8:
1742	ldx	[%o7+0*32],	%i0
1743	ldx	[%o7+8*32],	%i1
1744	ldx	[%o7+1*32],	%o4
1745	ldx	[%o7+9*32],	%o5
1746	movvs	%icc,	%o4,	%i0
1747	ldx	[%o7+2*32],	%o4
1748	movvs	%icc,	%o5,	%i1
1749	ldx	[%o7+10*32],%o5
1750	move	%icc,	%o4,	%i0
1751	ldx	[%o7+3*32],	%o4
1752	move	%icc,	%o5,	%i1
1753	ldx	[%o7+11*32],%o5
1754	movneg	%icc,	%o4,	%i0
1755	ldx	[%o7+4*32],	%o4
1756	movneg	%icc,	%o5,	%i1
1757	ldx	[%o7+12*32],%o5
1758	movcs	%xcc,	%o4,	%i0
1759	ldx	[%o7+5*32],%o4
1760	movcs	%xcc,	%o5,	%i1
1761	ldx	[%o7+13*32],%o5
1762	movvs	%xcc,	%o4,	%i0
1763	ldx	[%o7+6*32],	%o4
1764	movvs	%xcc,	%o5,	%i1
1765	ldx	[%o7+14*32],%o5
1766	move	%xcc,	%o4,	%i0
1767	ldx	[%o7+7*32],	%o4
1768	move	%xcc,	%o5,	%i1
1769	ldx	[%o7+15*32],%o5
1770	movneg	%xcc,	%o4,	%i0
1771	add	%o7,16*32,	%o7
1772	movneg	%xcc,	%o5,	%i1
1773	ldx	[%o7+0*32],	%i2
1774	ldx	[%o7+8*32],	%i3
1775	ldx	[%o7+1*32],	%o4
1776	ldx	[%o7+9*32],	%o5
1777	movvs	%icc,	%o4,	%i2
1778	ldx	[%o7+2*32],	%o4
1779	movvs	%icc,	%o5,	%i3
1780	ldx	[%o7+10*32],%o5
1781	move	%icc,	%o4,	%i2
1782	ldx	[%o7+3*32],	%o4
1783	move	%icc,	%o5,	%i3
1784	ldx	[%o7+11*32],%o5
1785	movneg	%icc,	%o4,	%i2
1786	ldx	[%o7+4*32],	%o4
1787	movneg	%icc,	%o5,	%i3
1788	ldx	[%o7+12*32],%o5
1789	movcs	%xcc,	%o4,	%i2
1790	ldx	[%o7+5*32],%o4
1791	movcs	%xcc,	%o5,	%i3
1792	ldx	[%o7+13*32],%o5
1793	movvs	%xcc,	%o4,	%i2
1794	ldx	[%o7+6*32],	%o4
1795	movvs	%xcc,	%o5,	%i3
1796	ldx	[%o7+14*32],%o5
1797	move	%xcc,	%o4,	%i2
1798	ldx	[%o7+7*32],	%o4
1799	move	%xcc,	%o5,	%i3
1800	ldx	[%o7+15*32],%o5
1801	movneg	%xcc,	%o4,	%i2
1802	add	%o7,16*32,	%o7
1803	movneg	%xcc,	%o5,	%i3
1804	ldx	[%o7+0*32],	%i4
1805	ldx	[%o7+8*32],	%i5
1806	ldx	[%o7+1*32],	%o4
1807	ldx	[%o7+9*32],	%o5
1808	movvs	%icc,	%o4,	%i4
1809	ldx	[%o7+2*32],	%o4
1810	movvs	%icc,	%o5,	%i5
1811	ldx	[%o7+10*32],%o5
1812	move	%icc,	%o4,	%i4
1813	ldx	[%o7+3*32],	%o4
1814	move	%icc,	%o5,	%i5
1815	ldx	[%o7+11*32],%o5
1816	movneg	%icc,	%o4,	%i4
1817	ldx	[%o7+4*32],	%o4
1818	movneg	%icc,	%o5,	%i5
1819	ldx	[%o7+12*32],%o5
1820	movcs	%xcc,	%o4,	%i4
1821	ldx	[%o7+5*32],%o4
1822	movcs	%xcc,	%o5,	%i5
1823	ldx	[%o7+13*32],%o5
1824	movvs	%xcc,	%o4,	%i4
1825	ldx	[%o7+6*32],	%o4
1826	movvs	%xcc,	%o5,	%i5
1827	ldx	[%o7+14*32],%o5
1828	move	%xcc,	%o4,	%i4
1829	ldx	[%o7+7*32],	%o4
1830	move	%xcc,	%o5,	%i5
1831	ldx	[%o7+15*32],%o5
1832	movneg	%xcc,	%o4,	%i4
1833	add	%o7,16*32,	%o7
1834	movneg	%xcc,	%o5,	%i5
1835	ldx	[%o7+0*32],	%l0
1836	ldx	[%o7+8*32],	%l1
1837	ldx	[%o7+1*32],	%o4
1838	ldx	[%o7+9*32],	%o5
1839	movvs	%icc,	%o4,	%l0
1840	ldx	[%o7+2*32],	%o4
1841	movvs	%icc,	%o5,	%l1
1842	ldx	[%o7+10*32],%o5
1843	move	%icc,	%o4,	%l0
1844	ldx	[%o7+3*32],	%o4
1845	move	%icc,	%o5,	%l1
1846	ldx	[%o7+11*32],%o5
1847	movneg	%icc,	%o4,	%l0
1848	ldx	[%o7+4*32],	%o4
1849	movneg	%icc,	%o5,	%l1
1850	ldx	[%o7+12*32],%o5
1851	movcs	%xcc,	%o4,	%l0
1852	ldx	[%o7+5*32],%o4
1853	movcs	%xcc,	%o5,	%l1
1854	ldx	[%o7+13*32],%o5
1855	movvs	%xcc,	%o4,	%l0
1856	ldx	[%o7+6*32],	%o4
1857	movvs	%xcc,	%o5,	%l1
1858	ldx	[%o7+14*32],%o5
1859	move	%xcc,	%o4,	%l0
1860	ldx	[%o7+7*32],	%o4
1861	move	%xcc,	%o5,	%l1
1862	ldx	[%o7+15*32],%o5
1863	movneg	%xcc,	%o4,	%l0
1864	add	%o7,16*32,	%o7
1865	movneg	%xcc,	%o5,	%l1
1866	save	%sp,-128,%sp;		or	%g5,%fp,%fp
1867	srax	%g4,	32,	%o4		! unpack %g4
1868	srl	%g4,	%g0,	%o5
1869	sub	%o4,	5,	%o4
1870	mov	%g3,	%i7
1871	sllx	%o4,	32,	%g4		! re-pack %g4
1872	or	%o5,	%g4,	%g4
1873	srl	%o5,	%o4,	%o5
1874	srl	%o5,	2,	%o4
1875	and	%o5,	3,	%o5
1876	and	%o4,	7,	%o4
1877	sll	%o5,	3,	%o5	! offset within first cache line
1878	add	%o5,	%i7,	%i7	! of the pwrtbl
1879	or	%g0,	1,	%o5
1880	sll	%o5,	%o4,	%o4
1881	.word	0x81b02940+8-1	! montsqr	8-1
1882	fbu,pn	%fcc3,.Labort_8
1883#ifndef	__arch64__
1884	and	%fp,%g5,%g5
1885	brz,pn	%g5,.Labort_8
1886#endif
1887	nop
1888	.word	0x81b02940+8-1	! montsqr	8-1
1889	fbu,pn	%fcc3,.Labort_8
1890#ifndef	__arch64__
1891	and	%fp,%g5,%g5
1892	brz,pn	%g5,.Labort_8
1893#endif
1894	nop
1895	.word	0x81b02940+8-1	! montsqr	8-1
1896	fbu,pn	%fcc3,.Labort_8
1897#ifndef	__arch64__
1898	and	%fp,%g5,%g5
1899	brz,pn	%g5,.Labort_8
1900#endif
1901	nop
1902	.word	0x81b02940+8-1	! montsqr	8-1
1903	fbu,pn	%fcc3,.Labort_8
1904#ifndef	__arch64__
1905	and	%fp,%g5,%g5
1906	brz,pn	%g5,.Labort_8
1907#endif
1908	nop
1909	.word	0x81b02940+8-1	! montsqr	8-1
1910	fbu,pn	%fcc3,.Labort_8
1911#ifndef	__arch64__
1912	and	%fp,%g5,%g5
1913	brz,pn	%g5,.Labort_8
1914#endif
1915	nop
1916	wr	%o4,	%g0,	%ccr
1917	.word	0x81b02920+8-1	! montmul	8-1
1918	fbu,pn	%fcc3,.Labort_8
1919#ifndef	__arch64__
1920	and	%fp,%g5,%g5
1921	brz,pn	%g5,.Labort_8
1922#endif
1923
1924	srax	%g4,	32,	%o4
1925#ifdef	__arch64__
1926	brgez	%o4,.Lstride_8
1927	restore
1928	restore
1929	restore
1930	restore
1931	restore
1932#else
1933	brgez	%o4,.Lstride_8
1934	restore;		and	%fp,%g5,%g5
1935	restore;		and	%fp,%g5,%g5
1936	restore;		and	%fp,%g5,%g5
1937	restore;		and	%fp,%g5,%g5
1938	 brz,pn	%g5,.Labort1_8
1939	restore
1940#endif
1941	.word	0x81b02310 !movxtod	%l0,%f0
1942	.word	0x85b02311 !movxtod	%l1,%f2
1943	.word	0x89b02312 !movxtod	%l2,%f4
1944	.word	0x8db02313 !movxtod	%l3,%f6
1945	.word	0x91b02314 !movxtod	%l4,%f8
1946	.word	0x95b02315 !movxtod	%l5,%f10
1947	.word	0x99b02316 !movxtod	%l6,%f12
1948	.word	0x9db02317 !movxtod	%l7,%f14
1949#ifdef	__arch64__
1950	restore
1951#else
1952	 and	%fp,%g5,%g5
1953	restore
1954	 and	%g5,1,%o7
1955	 and	%fp,%g5,%g5
1956	 srl	%fp,0,%fp		! just in case?
1957	 or	%o7,%g5,%g5
1958	brz,a,pn %g5,.Ldone_8
1959	mov	0,%i0		! return failure
1960#endif
1961	std	%f0,[%g1+0*8]
1962	std	%f2,[%g1+1*8]
1963	std	%f4,[%g1+2*8]
1964	std	%f6,[%g1+3*8]
1965	std	%f8,[%g1+4*8]
1966	std	%f10,[%g1+5*8]
1967	std	%f12,[%g1+6*8]
1968	std	%f14,[%g1+7*8]
1969	mov	1,%i0		! return success
1970.Ldone_8:
1971	ret
1972	restore
1973
1974.Labort_8:
1975	restore
1976	restore
1977	restore
1978	restore
1979	restore
1980.Labort1_8:
1981	restore
1982
1983	mov	0,%i0		! return failure
1984	ret
1985	restore
1986.type	bn_pwr5_mont_t4_8, #function
1987.size	bn_pwr5_mont_t4_8, .-bn_pwr5_mont_t4_8
1988.globl	bn_pwr5_mont_t4_16
1989.align	32
1990bn_pwr5_mont_t4_16:
1991#ifdef	__arch64__
1992	mov	0,%g5
1993	mov	-128,%g4
1994#elif defined(SPARCV9_64BIT_STACK)
1995	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
1996	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
1997	mov	-2047,%g4
1998	and	%g1,SPARCV9_64BIT_STACK,%g1
1999	movrz	%g1,0,%g4
2000	mov	-1,%g5
2001	add	%g4,-128,%g4
2002#else
2003	mov	-1,%g5
2004	mov	-128,%g4
2005#endif
2006	sllx	%g5,32,%g5
2007	save	%sp,%g4,%sp
2008#ifndef	__arch64__
2009	save	%sp,-128,%sp	! warm it up
2010	save	%sp,-128,%sp
2011	save	%sp,-128,%sp
2012	save	%sp,-128,%sp
2013	save	%sp,-128,%sp
2014	save	%sp,-128,%sp
2015	restore
2016	restore
2017	restore
2018	restore
2019	restore
2020	restore
2021#endif
2022	and	%sp,1,%g4
2023	or	%g5,%fp,%fp
2024	or	%g4,%g5,%g5
2025
2026	! copy arguments to global registers
2027	mov	%i0,%g1
2028	mov	%i1,%g2
2029	ld	[%i2+0],%f1	! load *n0
2030	ld	[%i2+4],%f0
2031	mov	%i3,%g3
2032	srl	%i4,%g0,%i4	! pack last arguments
2033	sllx	%i5,32,%g4
2034	or	%i4,%g4,%g4
2035	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
2036	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2037	ldx	[%g1+0*8],%l0
2038	ldx	[%g1+1*8],%l1
2039	ldx	[%g1+2*8],%l2
2040	ldx	[%g1+3*8],%l3
2041	ldx	[%g1+4*8],%l4
2042	ldx	[%g1+5*8],%l5
2043	ldx	[%g1+6*8],%l6
2044	ldx	[%g1+7*8],%l7
2045	ldx	[%g1+8*8],%o0
2046	ldx	[%g1+9*8],%o1
2047	ldx	[%g1+10*8],%o2
2048	ldx	[%g1+11*8],%o3
2049	ldx	[%g1+12*8],%o4
2050	ldx	[%g1+13*8],%o5
2051	ldd	[%g1+14*8],%f24
2052	ldd	[%g1+15*8],%f26
2053	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2054	ldx	[%g2+0*8],%l0
2055	ldx	[%g2+1*8],%l1
2056	ldx	[%g2+2*8],%l2
2057	ldx	[%g2+3*8],%l3
2058	ldx	[%g2+4*8],%l4
2059	ldx	[%g2+5*8],%l5
2060	ldx	[%g2+6*8],%l6
2061	ldx	[%g2+7*8],%l7
2062	ldx	[%g2+8*8],%o0
2063	ldx	[%g2+9*8],%o1
2064	ldx	[%g2+10*8],%o2
2065	ldx	[%g2+11*8],%o3
2066	ldx	[%g2+12*8],%o4
2067	ldx	[%g2+13*8],%o5
2068	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2069	ldx	[%g2+14*8],%l0
2070	ldx	[%g2+15*8],%l1
2071	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2072	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2073
2074	srlx	%g4,	32,	%o4		! unpack %g4
2075	srl	%g4,	%g0,	%o5
2076	sub	%o4,	5,	%o4
2077	mov	%g3,	%o7
2078	sllx	%o4,	32,	%g4		! re-pack %g4
2079	or	%o5,	%g4,	%g4
2080	srl	%o5,	%o4,	%o5
2081	srl	%o5,	2,	%o4
2082	and	%o5,	3,	%o5
2083	and	%o4,	7,	%o4
2084	sll	%o5,	3,	%o5	! offset within first cache line
2085	add	%o5,	%o7,	%o7	! of the pwrtbl
2086	or	%g0,	1,	%o5
2087	sll	%o5,	%o4,	%o4
2088	wr	%o4,	%g0,	%ccr
2089	b	.Lstride_16
2090	nop
2091.align	16
2092.Lstride_16:
2093	ldx	[%o7+0*32],	%i0
2094	ldx	[%o7+8*32],	%i1
2095	ldx	[%o7+1*32],	%o4
2096	ldx	[%o7+9*32],	%o5
2097	movvs	%icc,	%o4,	%i0
2098	ldx	[%o7+2*32],	%o4
2099	movvs	%icc,	%o5,	%i1
2100	ldx	[%o7+10*32],%o5
2101	move	%icc,	%o4,	%i0
2102	ldx	[%o7+3*32],	%o4
2103	move	%icc,	%o5,	%i1
2104	ldx	[%o7+11*32],%o5
2105	movneg	%icc,	%o4,	%i0
2106	ldx	[%o7+4*32],	%o4
2107	movneg	%icc,	%o5,	%i1
2108	ldx	[%o7+12*32],%o5
2109	movcs	%xcc,	%o4,	%i0
2110	ldx	[%o7+5*32],%o4
2111	movcs	%xcc,	%o5,	%i1
2112	ldx	[%o7+13*32],%o5
2113	movvs	%xcc,	%o4,	%i0
2114	ldx	[%o7+6*32],	%o4
2115	movvs	%xcc,	%o5,	%i1
2116	ldx	[%o7+14*32],%o5
2117	move	%xcc,	%o4,	%i0
2118	ldx	[%o7+7*32],	%o4
2119	move	%xcc,	%o5,	%i1
2120	ldx	[%o7+15*32],%o5
2121	movneg	%xcc,	%o4,	%i0
2122	add	%o7,16*32,	%o7
2123	movneg	%xcc,	%o5,	%i1
2124	ldx	[%o7+0*32],	%i2
2125	ldx	[%o7+8*32],	%i3
2126	ldx	[%o7+1*32],	%o4
2127	ldx	[%o7+9*32],	%o5
2128	movvs	%icc,	%o4,	%i2
2129	ldx	[%o7+2*32],	%o4
2130	movvs	%icc,	%o5,	%i3
2131	ldx	[%o7+10*32],%o5
2132	move	%icc,	%o4,	%i2
2133	ldx	[%o7+3*32],	%o4
2134	move	%icc,	%o5,	%i3
2135	ldx	[%o7+11*32],%o5
2136	movneg	%icc,	%o4,	%i2
2137	ldx	[%o7+4*32],	%o4
2138	movneg	%icc,	%o5,	%i3
2139	ldx	[%o7+12*32],%o5
2140	movcs	%xcc,	%o4,	%i2
2141	ldx	[%o7+5*32],%o4
2142	movcs	%xcc,	%o5,	%i3
2143	ldx	[%o7+13*32],%o5
2144	movvs	%xcc,	%o4,	%i2
2145	ldx	[%o7+6*32],	%o4
2146	movvs	%xcc,	%o5,	%i3
2147	ldx	[%o7+14*32],%o5
2148	move	%xcc,	%o4,	%i2
2149	ldx	[%o7+7*32],	%o4
2150	move	%xcc,	%o5,	%i3
2151	ldx	[%o7+15*32],%o5
2152	movneg	%xcc,	%o4,	%i2
2153	add	%o7,16*32,	%o7
2154	movneg	%xcc,	%o5,	%i3
2155	ldx	[%o7+0*32],	%i4
2156	ldx	[%o7+8*32],	%i5
2157	ldx	[%o7+1*32],	%o4
2158	ldx	[%o7+9*32],	%o5
2159	movvs	%icc,	%o4,	%i4
2160	ldx	[%o7+2*32],	%o4
2161	movvs	%icc,	%o5,	%i5
2162	ldx	[%o7+10*32],%o5
2163	move	%icc,	%o4,	%i4
2164	ldx	[%o7+3*32],	%o4
2165	move	%icc,	%o5,	%i5
2166	ldx	[%o7+11*32],%o5
2167	movneg	%icc,	%o4,	%i4
2168	ldx	[%o7+4*32],	%o4
2169	movneg	%icc,	%o5,	%i5
2170	ldx	[%o7+12*32],%o5
2171	movcs	%xcc,	%o4,	%i4
2172	ldx	[%o7+5*32],%o4
2173	movcs	%xcc,	%o5,	%i5
2174	ldx	[%o7+13*32],%o5
2175	movvs	%xcc,	%o4,	%i4
2176	ldx	[%o7+6*32],	%o4
2177	movvs	%xcc,	%o5,	%i5
2178	ldx	[%o7+14*32],%o5
2179	move	%xcc,	%o4,	%i4
2180	ldx	[%o7+7*32],	%o4
2181	move	%xcc,	%o5,	%i5
2182	ldx	[%o7+15*32],%o5
2183	movneg	%xcc,	%o4,	%i4
2184	add	%o7,16*32,	%o7
2185	movneg	%xcc,	%o5,	%i5
2186	ldx	[%o7+0*32],	%l0
2187	ldx	[%o7+8*32],	%l1
2188	ldx	[%o7+1*32],	%o4
2189	ldx	[%o7+9*32],	%o5
2190	movvs	%icc,	%o4,	%l0
2191	ldx	[%o7+2*32],	%o4
2192	movvs	%icc,	%o5,	%l1
2193	ldx	[%o7+10*32],%o5
2194	move	%icc,	%o4,	%l0
2195	ldx	[%o7+3*32],	%o4
2196	move	%icc,	%o5,	%l1
2197	ldx	[%o7+11*32],%o5
2198	movneg	%icc,	%o4,	%l0
2199	ldx	[%o7+4*32],	%o4
2200	movneg	%icc,	%o5,	%l1
2201	ldx	[%o7+12*32],%o5
2202	movcs	%xcc,	%o4,	%l0
2203	ldx	[%o7+5*32],%o4
2204	movcs	%xcc,	%o5,	%l1
2205	ldx	[%o7+13*32],%o5
2206	movvs	%xcc,	%o4,	%l0
2207	ldx	[%o7+6*32],	%o4
2208	movvs	%xcc,	%o5,	%l1
2209	ldx	[%o7+14*32],%o5
2210	move	%xcc,	%o4,	%l0
2211	ldx	[%o7+7*32],	%o4
2212	move	%xcc,	%o5,	%l1
2213	ldx	[%o7+15*32],%o5
2214	movneg	%xcc,	%o4,	%l0
2215	add	%o7,16*32,	%o7
2216	movneg	%xcc,	%o5,	%l1
2217	ldx	[%o7+0*32],	%l2
2218	ldx	[%o7+8*32],	%l3
2219	ldx	[%o7+1*32],	%o4
2220	ldx	[%o7+9*32],	%o5
2221	movvs	%icc,	%o4,	%l2
2222	ldx	[%o7+2*32],	%o4
2223	movvs	%icc,	%o5,	%l3
2224	ldx	[%o7+10*32],%o5
2225	move	%icc,	%o4,	%l2
2226	ldx	[%o7+3*32],	%o4
2227	move	%icc,	%o5,	%l3
2228	ldx	[%o7+11*32],%o5
2229	movneg	%icc,	%o4,	%l2
2230	ldx	[%o7+4*32],	%o4
2231	movneg	%icc,	%o5,	%l3
2232	ldx	[%o7+12*32],%o5
2233	movcs	%xcc,	%o4,	%l2
2234	ldx	[%o7+5*32],%o4
2235	movcs	%xcc,	%o5,	%l3
2236	ldx	[%o7+13*32],%o5
2237	movvs	%xcc,	%o4,	%l2
2238	ldx	[%o7+6*32],	%o4
2239	movvs	%xcc,	%o5,	%l3
2240	ldx	[%o7+14*32],%o5
2241	move	%xcc,	%o4,	%l2
2242	ldx	[%o7+7*32],	%o4
2243	move	%xcc,	%o5,	%l3
2244	ldx	[%o7+15*32],%o5
2245	movneg	%xcc,	%o4,	%l2
2246	add	%o7,16*32,	%o7
2247	movneg	%xcc,	%o5,	%l3
2248	ldx	[%o7+0*32],	%l4
2249	ldx	[%o7+8*32],	%l5
2250	ldx	[%o7+1*32],	%o4
2251	ldx	[%o7+9*32],	%o5
2252	movvs	%icc,	%o4,	%l4
2253	ldx	[%o7+2*32],	%o4
2254	movvs	%icc,	%o5,	%l5
2255	ldx	[%o7+10*32],%o5
2256	move	%icc,	%o4,	%l4
2257	ldx	[%o7+3*32],	%o4
2258	move	%icc,	%o5,	%l5
2259	ldx	[%o7+11*32],%o5
2260	movneg	%icc,	%o4,	%l4
2261	ldx	[%o7+4*32],	%o4
2262	movneg	%icc,	%o5,	%l5
2263	ldx	[%o7+12*32],%o5
2264	movcs	%xcc,	%o4,	%l4
2265	ldx	[%o7+5*32],%o4
2266	movcs	%xcc,	%o5,	%l5
2267	ldx	[%o7+13*32],%o5
2268	movvs	%xcc,	%o4,	%l4
2269	ldx	[%o7+6*32],	%o4
2270	movvs	%xcc,	%o5,	%l5
2271	ldx	[%o7+14*32],%o5
2272	move	%xcc,	%o4,	%l4
2273	ldx	[%o7+7*32],	%o4
2274	move	%xcc,	%o5,	%l5
2275	ldx	[%o7+15*32],%o5
2276	movneg	%xcc,	%o4,	%l4
2277	add	%o7,16*32,	%o7
2278	movneg	%xcc,	%o5,	%l5
2279	ldx	[%o7+0*32],	%l6
2280	ldx	[%o7+8*32],	%l7
2281	ldx	[%o7+1*32],	%o4
2282	ldx	[%o7+9*32],	%o5
2283	movvs	%icc,	%o4,	%l6
2284	ldx	[%o7+2*32],	%o4
2285	movvs	%icc,	%o5,	%l7
2286	ldx	[%o7+10*32],%o5
2287	move	%icc,	%o4,	%l6
2288	ldx	[%o7+3*32],	%o4
2289	move	%icc,	%o5,	%l7
2290	ldx	[%o7+11*32],%o5
2291	movneg	%icc,	%o4,	%l6
2292	ldx	[%o7+4*32],	%o4
2293	movneg	%icc,	%o5,	%l7
2294	ldx	[%o7+12*32],%o5
2295	movcs	%xcc,	%o4,	%l6
2296	ldx	[%o7+5*32],%o4
2297	movcs	%xcc,	%o5,	%l7
2298	ldx	[%o7+13*32],%o5
2299	movvs	%xcc,	%o4,	%l6
2300	ldx	[%o7+6*32],	%o4
2301	movvs	%xcc,	%o5,	%l7
2302	ldx	[%o7+14*32],%o5
2303	move	%xcc,	%o4,	%l6
2304	ldx	[%o7+7*32],	%o4
2305	move	%xcc,	%o5,	%l7
2306	ldx	[%o7+15*32],%o5
2307	movneg	%xcc,	%o4,	%l6
2308	add	%o7,16*32,	%o7
2309	movneg	%xcc,	%o5,	%l7
2310	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2311	ldx	[%i7+0*32],	%i0
2312	ldx	[%i7+8*32],	%i1
2313	ldx	[%i7+1*32],	%o4
2314	ldx	[%i7+9*32],	%o5
2315	movvs	%icc,	%o4,	%i0
2316	ldx	[%i7+2*32],	%o4
2317	movvs	%icc,	%o5,	%i1
2318	ldx	[%i7+10*32],%o5
2319	move	%icc,	%o4,	%i0
2320	ldx	[%i7+3*32],	%o4
2321	move	%icc,	%o5,	%i1
2322	ldx	[%i7+11*32],%o5
2323	movneg	%icc,	%o4,	%i0
2324	ldx	[%i7+4*32],	%o4
2325	movneg	%icc,	%o5,	%i1
2326	ldx	[%i7+12*32],%o5
2327	movcs	%xcc,	%o4,	%i0
2328	ldx	[%i7+5*32],%o4
2329	movcs	%xcc,	%o5,	%i1
2330	ldx	[%i7+13*32],%o5
2331	movvs	%xcc,	%o4,	%i0
2332	ldx	[%i7+6*32],	%o4
2333	movvs	%xcc,	%o5,	%i1
2334	ldx	[%i7+14*32],%o5
2335	move	%xcc,	%o4,	%i0
2336	ldx	[%i7+7*32],	%o4
2337	move	%xcc,	%o5,	%i1
2338	ldx	[%i7+15*32],%o5
2339	movneg	%xcc,	%o4,	%i0
2340	add	%i7,16*32,	%i7
2341	movneg	%xcc,	%o5,	%i1
2342	srax	%g4,	32,	%o4		! unpack %g4
2343	srl	%g4,	%g0,	%o5
2344	sub	%o4,	5,	%o4
2345	mov	%g3,	%i7
2346	sllx	%o4,	32,	%g4		! re-pack %g4
2347	or	%o5,	%g4,	%g4
2348	srl	%o5,	%o4,	%o5
2349	srl	%o5,	2,	%o4
2350	and	%o5,	3,	%o5
2351	and	%o4,	7,	%o4
2352	sll	%o5,	3,	%o5	! offset within first cache line
2353	add	%o5,	%i7,	%i7	! of the pwrtbl
2354	or	%g0,	1,	%o5
2355	sll	%o5,	%o4,	%o4
2356	.word	0x81b02940+16-1	! montsqr	16-1
2357	fbu,pn	%fcc3,.Labort_16
2358#ifndef	__arch64__
2359	and	%fp,%g5,%g5
2360	brz,pn	%g5,.Labort_16
2361#endif
2362	nop
2363	.word	0x81b02940+16-1	! montsqr	16-1
2364	fbu,pn	%fcc3,.Labort_16
2365#ifndef	__arch64__
2366	and	%fp,%g5,%g5
2367	brz,pn	%g5,.Labort_16
2368#endif
2369	nop
2370	.word	0x81b02940+16-1	! montsqr	16-1
2371	fbu,pn	%fcc3,.Labort_16
2372#ifndef	__arch64__
2373	and	%fp,%g5,%g5
2374	brz,pn	%g5,.Labort_16
2375#endif
2376	nop
2377	.word	0x81b02940+16-1	! montsqr	16-1
2378	fbu,pn	%fcc3,.Labort_16
2379#ifndef	__arch64__
2380	and	%fp,%g5,%g5
2381	brz,pn	%g5,.Labort_16
2382#endif
2383	nop
2384	.word	0x81b02940+16-1	! montsqr	16-1
2385	fbu,pn	%fcc3,.Labort_16
2386#ifndef	__arch64__
2387	and	%fp,%g5,%g5
2388	brz,pn	%g5,.Labort_16
2389#endif
2390	nop
2391	wr	%o4,	%g0,	%ccr
2392	.word	0x81b02920+16-1	! montmul	16-1
2393	fbu,pn	%fcc3,.Labort_16
2394#ifndef	__arch64__
2395	and	%fp,%g5,%g5
2396	brz,pn	%g5,.Labort_16
2397#endif
2398
2399	srax	%g4,	32,	%o4
2400#ifdef	__arch64__
2401	brgez	%o4,.Lstride_16
2402	restore
2403	restore
2404	restore
2405	restore
2406	restore
2407#else
2408	brgez	%o4,.Lstride_16
2409	restore;		and	%fp,%g5,%g5
2410	restore;		and	%fp,%g5,%g5
2411	restore;		and	%fp,%g5,%g5
2412	restore;		and	%fp,%g5,%g5
2413	 brz,pn	%g5,.Labort1_16
2414	restore
2415#endif
2416	.word	0x81b02310 !movxtod	%l0,%f0
2417	.word	0x85b02311 !movxtod	%l1,%f2
2418	.word	0x89b02312 !movxtod	%l2,%f4
2419	.word	0x8db02313 !movxtod	%l3,%f6
2420	.word	0x91b02314 !movxtod	%l4,%f8
2421	.word	0x95b02315 !movxtod	%l5,%f10
2422	.word	0x99b02316 !movxtod	%l6,%f12
2423	.word	0x9db02317 !movxtod	%l7,%f14
2424	.word	0xa1b02308 !movxtod	%o0,%f16
2425	.word	0xa5b02309 !movxtod	%o1,%f18
2426	.word	0xa9b0230a !movxtod	%o2,%f20
2427	.word	0xadb0230b !movxtod	%o3,%f22
2428	.word	0xbbb0230c !movxtod	%o4,%f60
2429	.word	0xbfb0230d !movxtod	%o5,%f62
2430#ifdef	__arch64__
2431	restore
2432#else
2433	 and	%fp,%g5,%g5
2434	restore
2435	 and	%g5,1,%o7
2436	 and	%fp,%g5,%g5
2437	 srl	%fp,0,%fp		! just in case?
2438	 or	%o7,%g5,%g5
2439	brz,a,pn %g5,.Ldone_16
2440	mov	0,%i0		! return failure
2441#endif
2442	std	%f0,[%g1+0*8]
2443	std	%f2,[%g1+1*8]
2444	std	%f4,[%g1+2*8]
2445	std	%f6,[%g1+3*8]
2446	std	%f8,[%g1+4*8]
2447	std	%f10,[%g1+5*8]
2448	std	%f12,[%g1+6*8]
2449	std	%f14,[%g1+7*8]
2450	std	%f16,[%g1+8*8]
2451	std	%f18,[%g1+9*8]
2452	std	%f20,[%g1+10*8]
2453	std	%f22,[%g1+11*8]
2454	std	%f60,[%g1+12*8]
2455	std	%f62,[%g1+13*8]
2456	std	%f24,[%g1+14*8]
2457	std	%f26,[%g1+15*8]
2458	mov	1,%i0		! return success
2459.Ldone_16:
2460	ret
2461	restore
2462
2463.Labort_16:
2464	restore
2465	restore
2466	restore
2467	restore
2468	restore
2469.Labort1_16:
2470	restore
2471
2472	mov	0,%i0		! return failure
2473	ret
2474	restore
2475.type	bn_pwr5_mont_t4_16, #function
2476.size	bn_pwr5_mont_t4_16, .-bn_pwr5_mont_t4_16
2477.globl	bn_pwr5_mont_t4_24
2478.align	32
2479bn_pwr5_mont_t4_24:
2480#ifdef	__arch64__
2481	mov	0,%g5
2482	mov	-128,%g4
2483#elif defined(SPARCV9_64BIT_STACK)
2484	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
2485	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
2486	mov	-2047,%g4
2487	and	%g1,SPARCV9_64BIT_STACK,%g1
2488	movrz	%g1,0,%g4
2489	mov	-1,%g5
2490	add	%g4,-128,%g4
2491#else
2492	mov	-1,%g5
2493	mov	-128,%g4
2494#endif
2495	sllx	%g5,32,%g5
2496	save	%sp,%g4,%sp
2497#ifndef	__arch64__
2498	save	%sp,-128,%sp	! warm it up
2499	save	%sp,-128,%sp
2500	save	%sp,-128,%sp
2501	save	%sp,-128,%sp
2502	save	%sp,-128,%sp
2503	save	%sp,-128,%sp
2504	restore
2505	restore
2506	restore
2507	restore
2508	restore
2509	restore
2510#endif
2511	and	%sp,1,%g4
2512	or	%g5,%fp,%fp
2513	or	%g4,%g5,%g5
2514
2515	! copy arguments to global registers
2516	mov	%i0,%g1
2517	mov	%i1,%g2
2518	ld	[%i2+0],%f1	! load *n0
2519	ld	[%i2+4],%f0
2520	mov	%i3,%g3
2521	srl	%i4,%g0,%i4	! pack last arguments
2522	sllx	%i5,32,%g4
2523	or	%i4,%g4,%g4
2524	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
2525	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2526	ldx	[%g1+0*8],%l0
2527	ldx	[%g1+1*8],%l1
2528	ldx	[%g1+2*8],%l2
2529	ldx	[%g1+3*8],%l3
2530	ldx	[%g1+4*8],%l4
2531	ldx	[%g1+5*8],%l5
2532	ldx	[%g1+6*8],%l6
2533	ldx	[%g1+7*8],%l7
2534	ldx	[%g1+8*8],%o0
2535	ldx	[%g1+9*8],%o1
2536	ldx	[%g1+10*8],%o2
2537	ldx	[%g1+11*8],%o3
2538	ldx	[%g1+12*8],%o4
2539	ldx	[%g1+13*8],%o5
2540	ldd	[%g1+14*8],%f24
2541	ldd	[%g1+15*8],%f26
2542	ldd	[%g1+16*8],%f28
2543	ldd	[%g1+17*8],%f30
2544	ldd	[%g1+18*8],%f32
2545	ldd	[%g1+19*8],%f34
2546	ldd	[%g1+20*8],%f36
2547	ldd	[%g1+21*8],%f38
2548	ldd	[%g1+22*8],%f40
2549	ldd	[%g1+23*8],%f42
2550	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2551	ldx	[%g2+0*8],%l0
2552	ldx	[%g2+1*8],%l1
2553	ldx	[%g2+2*8],%l2
2554	ldx	[%g2+3*8],%l3
2555	ldx	[%g2+4*8],%l4
2556	ldx	[%g2+5*8],%l5
2557	ldx	[%g2+6*8],%l6
2558	ldx	[%g2+7*8],%l7
2559	ldx	[%g2+8*8],%o0
2560	ldx	[%g2+9*8],%o1
2561	ldx	[%g2+10*8],%o2
2562	ldx	[%g2+11*8],%o3
2563	ldx	[%g2+12*8],%o4
2564	ldx	[%g2+13*8],%o5
2565	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2566	ldx	[%g2+14*8],%l0
2567	ldx	[%g2+15*8],%l1
2568	ldx	[%g2+16*8],%l2
2569	ldx	[%g2+17*8],%l3
2570	ldx	[%g2+18*8],%l4
2571	ldx	[%g2+19*8],%l5
2572	ldx	[%g2+20*8],%l6
2573	ldx	[%g2+21*8],%l7
2574	ldx	[%g2+22*8],%o0
2575	ldx	[%g2+23*8],%o1
2576	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2577	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2578
2579	srlx	%g4,	32,	%o4		! unpack %g4
2580	srl	%g4,	%g0,	%o5
2581	sub	%o4,	5,	%o4
2582	mov	%g3,	%o7
2583	sllx	%o4,	32,	%g4		! re-pack %g4
2584	or	%o5,	%g4,	%g4
2585	srl	%o5,	%o4,	%o5
2586	srl	%o5,	2,	%o4
2587	and	%o5,	3,	%o5
2588	and	%o4,	7,	%o4
2589	sll	%o5,	3,	%o5	! offset within first cache line
2590	add	%o5,	%o7,	%o7	! of the pwrtbl
2591	or	%g0,	1,	%o5
2592	sll	%o5,	%o4,	%o4
2593	wr	%o4,	%g0,	%ccr
2594	b	.Lstride_24
2595	nop
2596.align	16
2597.Lstride_24:
2598	ldx	[%o7+0*32],	%i0
2599	ldx	[%o7+8*32],	%i1
2600	ldx	[%o7+1*32],	%o4
2601	ldx	[%o7+9*32],	%o5
2602	movvs	%icc,	%o4,	%i0
2603	ldx	[%o7+2*32],	%o4
2604	movvs	%icc,	%o5,	%i1
2605	ldx	[%o7+10*32],%o5
2606	move	%icc,	%o4,	%i0
2607	ldx	[%o7+3*32],	%o4
2608	move	%icc,	%o5,	%i1
2609	ldx	[%o7+11*32],%o5
2610	movneg	%icc,	%o4,	%i0
2611	ldx	[%o7+4*32],	%o4
2612	movneg	%icc,	%o5,	%i1
2613	ldx	[%o7+12*32],%o5
2614	movcs	%xcc,	%o4,	%i0
2615	ldx	[%o7+5*32],%o4
2616	movcs	%xcc,	%o5,	%i1
2617	ldx	[%o7+13*32],%o5
2618	movvs	%xcc,	%o4,	%i0
2619	ldx	[%o7+6*32],	%o4
2620	movvs	%xcc,	%o5,	%i1
2621	ldx	[%o7+14*32],%o5
2622	move	%xcc,	%o4,	%i0
2623	ldx	[%o7+7*32],	%o4
2624	move	%xcc,	%o5,	%i1
2625	ldx	[%o7+15*32],%o5
2626	movneg	%xcc,	%o4,	%i0
2627	add	%o7,16*32,	%o7
2628	movneg	%xcc,	%o5,	%i1
2629	ldx	[%o7+0*32],	%i2
2630	ldx	[%o7+8*32],	%i3
2631	ldx	[%o7+1*32],	%o4
2632	ldx	[%o7+9*32],	%o5
2633	movvs	%icc,	%o4,	%i2
2634	ldx	[%o7+2*32],	%o4
2635	movvs	%icc,	%o5,	%i3
2636	ldx	[%o7+10*32],%o5
2637	move	%icc,	%o4,	%i2
2638	ldx	[%o7+3*32],	%o4
2639	move	%icc,	%o5,	%i3
2640	ldx	[%o7+11*32],%o5
2641	movneg	%icc,	%o4,	%i2
2642	ldx	[%o7+4*32],	%o4
2643	movneg	%icc,	%o5,	%i3
2644	ldx	[%o7+12*32],%o5
2645	movcs	%xcc,	%o4,	%i2
2646	ldx	[%o7+5*32],%o4
2647	movcs	%xcc,	%o5,	%i3
2648	ldx	[%o7+13*32],%o5
2649	movvs	%xcc,	%o4,	%i2
2650	ldx	[%o7+6*32],	%o4
2651	movvs	%xcc,	%o5,	%i3
2652	ldx	[%o7+14*32],%o5
2653	move	%xcc,	%o4,	%i2
2654	ldx	[%o7+7*32],	%o4
2655	move	%xcc,	%o5,	%i3
2656	ldx	[%o7+15*32],%o5
2657	movneg	%xcc,	%o4,	%i2
2658	add	%o7,16*32,	%o7
2659	movneg	%xcc,	%o5,	%i3
2660	ldx	[%o7+0*32],	%i4
2661	ldx	[%o7+8*32],	%i5
2662	ldx	[%o7+1*32],	%o4
2663	ldx	[%o7+9*32],	%o5
2664	movvs	%icc,	%o4,	%i4
2665	ldx	[%o7+2*32],	%o4
2666	movvs	%icc,	%o5,	%i5
2667	ldx	[%o7+10*32],%o5
2668	move	%icc,	%o4,	%i4
2669	ldx	[%o7+3*32],	%o4
2670	move	%icc,	%o5,	%i5
2671	ldx	[%o7+11*32],%o5
2672	movneg	%icc,	%o4,	%i4
2673	ldx	[%o7+4*32],	%o4
2674	movneg	%icc,	%o5,	%i5
2675	ldx	[%o7+12*32],%o5
2676	movcs	%xcc,	%o4,	%i4
2677	ldx	[%o7+5*32],%o4
2678	movcs	%xcc,	%o5,	%i5
2679	ldx	[%o7+13*32],%o5
2680	movvs	%xcc,	%o4,	%i4
2681	ldx	[%o7+6*32],	%o4
2682	movvs	%xcc,	%o5,	%i5
2683	ldx	[%o7+14*32],%o5
2684	move	%xcc,	%o4,	%i4
2685	ldx	[%o7+7*32],	%o4
2686	move	%xcc,	%o5,	%i5
2687	ldx	[%o7+15*32],%o5
2688	movneg	%xcc,	%o4,	%i4
2689	add	%o7,16*32,	%o7
2690	movneg	%xcc,	%o5,	%i5
2691	ldx	[%o7+0*32],	%l0
2692	ldx	[%o7+8*32],	%l1
2693	ldx	[%o7+1*32],	%o4
2694	ldx	[%o7+9*32],	%o5
2695	movvs	%icc,	%o4,	%l0
2696	ldx	[%o7+2*32],	%o4
2697	movvs	%icc,	%o5,	%l1
2698	ldx	[%o7+10*32],%o5
2699	move	%icc,	%o4,	%l0
2700	ldx	[%o7+3*32],	%o4
2701	move	%icc,	%o5,	%l1
2702	ldx	[%o7+11*32],%o5
2703	movneg	%icc,	%o4,	%l0
2704	ldx	[%o7+4*32],	%o4
2705	movneg	%icc,	%o5,	%l1
2706	ldx	[%o7+12*32],%o5
2707	movcs	%xcc,	%o4,	%l0
2708	ldx	[%o7+5*32],%o4
2709	movcs	%xcc,	%o5,	%l1
2710	ldx	[%o7+13*32],%o5
2711	movvs	%xcc,	%o4,	%l0
2712	ldx	[%o7+6*32],	%o4
2713	movvs	%xcc,	%o5,	%l1
2714	ldx	[%o7+14*32],%o5
2715	move	%xcc,	%o4,	%l0
2716	ldx	[%o7+7*32],	%o4
2717	move	%xcc,	%o5,	%l1
2718	ldx	[%o7+15*32],%o5
2719	movneg	%xcc,	%o4,	%l0
2720	add	%o7,16*32,	%o7
2721	movneg	%xcc,	%o5,	%l1
2722	ldx	[%o7+0*32],	%l2
2723	ldx	[%o7+8*32],	%l3
2724	ldx	[%o7+1*32],	%o4
2725	ldx	[%o7+9*32],	%o5
2726	movvs	%icc,	%o4,	%l2
2727	ldx	[%o7+2*32],	%o4
2728	movvs	%icc,	%o5,	%l3
2729	ldx	[%o7+10*32],%o5
2730	move	%icc,	%o4,	%l2
2731	ldx	[%o7+3*32],	%o4
2732	move	%icc,	%o5,	%l3
2733	ldx	[%o7+11*32],%o5
2734	movneg	%icc,	%o4,	%l2
2735	ldx	[%o7+4*32],	%o4
2736	movneg	%icc,	%o5,	%l3
2737	ldx	[%o7+12*32],%o5
2738	movcs	%xcc,	%o4,	%l2
2739	ldx	[%o7+5*32],%o4
2740	movcs	%xcc,	%o5,	%l3
2741	ldx	[%o7+13*32],%o5
2742	movvs	%xcc,	%o4,	%l2
2743	ldx	[%o7+6*32],	%o4
2744	movvs	%xcc,	%o5,	%l3
2745	ldx	[%o7+14*32],%o5
2746	move	%xcc,	%o4,	%l2
2747	ldx	[%o7+7*32],	%o4
2748	move	%xcc,	%o5,	%l3
2749	ldx	[%o7+15*32],%o5
2750	movneg	%xcc,	%o4,	%l2
2751	add	%o7,16*32,	%o7
2752	movneg	%xcc,	%o5,	%l3
2753	ldx	[%o7+0*32],	%l4
2754	ldx	[%o7+8*32],	%l5
2755	ldx	[%o7+1*32],	%o4
2756	ldx	[%o7+9*32],	%o5
2757	movvs	%icc,	%o4,	%l4
2758	ldx	[%o7+2*32],	%o4
2759	movvs	%icc,	%o5,	%l5
2760	ldx	[%o7+10*32],%o5
2761	move	%icc,	%o4,	%l4
2762	ldx	[%o7+3*32],	%o4
2763	move	%icc,	%o5,	%l5
2764	ldx	[%o7+11*32],%o5
2765	movneg	%icc,	%o4,	%l4
2766	ldx	[%o7+4*32],	%o4
2767	movneg	%icc,	%o5,	%l5
2768	ldx	[%o7+12*32],%o5
2769	movcs	%xcc,	%o4,	%l4
2770	ldx	[%o7+5*32],%o4
2771	movcs	%xcc,	%o5,	%l5
2772	ldx	[%o7+13*32],%o5
2773	movvs	%xcc,	%o4,	%l4
2774	ldx	[%o7+6*32],	%o4
2775	movvs	%xcc,	%o5,	%l5
2776	ldx	[%o7+14*32],%o5
2777	move	%xcc,	%o4,	%l4
2778	ldx	[%o7+7*32],	%o4
2779	move	%xcc,	%o5,	%l5
2780	ldx	[%o7+15*32],%o5
2781	movneg	%xcc,	%o4,	%l4
2782	add	%o7,16*32,	%o7
2783	movneg	%xcc,	%o5,	%l5
2784	ldx	[%o7+0*32],	%l6
2785	ldx	[%o7+8*32],	%l7
2786	ldx	[%o7+1*32],	%o4
2787	ldx	[%o7+9*32],	%o5
2788	movvs	%icc,	%o4,	%l6
2789	ldx	[%o7+2*32],	%o4
2790	movvs	%icc,	%o5,	%l7
2791	ldx	[%o7+10*32],%o5
2792	move	%icc,	%o4,	%l6
2793	ldx	[%o7+3*32],	%o4
2794	move	%icc,	%o5,	%l7
2795	ldx	[%o7+11*32],%o5
2796	movneg	%icc,	%o4,	%l6
2797	ldx	[%o7+4*32],	%o4
2798	movneg	%icc,	%o5,	%l7
2799	ldx	[%o7+12*32],%o5
2800	movcs	%xcc,	%o4,	%l6
2801	ldx	[%o7+5*32],%o4
2802	movcs	%xcc,	%o5,	%l7
2803	ldx	[%o7+13*32],%o5
2804	movvs	%xcc,	%o4,	%l6
2805	ldx	[%o7+6*32],	%o4
2806	movvs	%xcc,	%o5,	%l7
2807	ldx	[%o7+14*32],%o5
2808	move	%xcc,	%o4,	%l6
2809	ldx	[%o7+7*32],	%o4
2810	move	%xcc,	%o5,	%l7
2811	ldx	[%o7+15*32],%o5
2812	movneg	%xcc,	%o4,	%l6
2813	add	%o7,16*32,	%o7
2814	movneg	%xcc,	%o5,	%l7
2815	save	%sp,-128,%sp;		or	%g5,%fp,%fp
2816	ldx	[%i7+0*32],	%i0
2817	ldx	[%i7+8*32],	%i1
2818	ldx	[%i7+1*32],	%o4
2819	ldx	[%i7+9*32],	%o5
2820	movvs	%icc,	%o4,	%i0
2821	ldx	[%i7+2*32],	%o4
2822	movvs	%icc,	%o5,	%i1
2823	ldx	[%i7+10*32],%o5
2824	move	%icc,	%o4,	%i0
2825	ldx	[%i7+3*32],	%o4
2826	move	%icc,	%o5,	%i1
2827	ldx	[%i7+11*32],%o5
2828	movneg	%icc,	%o4,	%i0
2829	ldx	[%i7+4*32],	%o4
2830	movneg	%icc,	%o5,	%i1
2831	ldx	[%i7+12*32],%o5
2832	movcs	%xcc,	%o4,	%i0
2833	ldx	[%i7+5*32],%o4
2834	movcs	%xcc,	%o5,	%i1
2835	ldx	[%i7+13*32],%o5
2836	movvs	%xcc,	%o4,	%i0
2837	ldx	[%i7+6*32],	%o4
2838	movvs	%xcc,	%o5,	%i1
2839	ldx	[%i7+14*32],%o5
2840	move	%xcc,	%o4,	%i0
2841	ldx	[%i7+7*32],	%o4
2842	move	%xcc,	%o5,	%i1
2843	ldx	[%i7+15*32],%o5
2844	movneg	%xcc,	%o4,	%i0
2845	add	%i7,16*32,	%i7
2846	movneg	%xcc,	%o5,	%i1
2847	ldx	[%i7+0*32],	%i2
2848	ldx	[%i7+8*32],	%i3
2849	ldx	[%i7+1*32],	%o4
2850	ldx	[%i7+9*32],	%o5
2851	movvs	%icc,	%o4,	%i2
2852	ldx	[%i7+2*32],	%o4
2853	movvs	%icc,	%o5,	%i3
2854	ldx	[%i7+10*32],%o5
2855	move	%icc,	%o4,	%i2
2856	ldx	[%i7+3*32],	%o4
2857	move	%icc,	%o5,	%i3
2858	ldx	[%i7+11*32],%o5
2859	movneg	%icc,	%o4,	%i2
2860	ldx	[%i7+4*32],	%o4
2861	movneg	%icc,	%o5,	%i3
2862	ldx	[%i7+12*32],%o5
2863	movcs	%xcc,	%o4,	%i2
2864	ldx	[%i7+5*32],%o4
2865	movcs	%xcc,	%o5,	%i3
2866	ldx	[%i7+13*32],%o5
2867	movvs	%xcc,	%o4,	%i2
2868	ldx	[%i7+6*32],	%o4
2869	movvs	%xcc,	%o5,	%i3
2870	ldx	[%i7+14*32],%o5
2871	move	%xcc,	%o4,	%i2
2872	ldx	[%i7+7*32],	%o4
2873	move	%xcc,	%o5,	%i3
2874	ldx	[%i7+15*32],%o5
2875	movneg	%xcc,	%o4,	%i2
2876	add	%i7,16*32,	%i7
2877	movneg	%xcc,	%o5,	%i3
2878	ldx	[%i7+0*32],	%i4
2879	ldx	[%i7+8*32],	%i5
2880	ldx	[%i7+1*32],	%o4
2881	ldx	[%i7+9*32],	%o5
2882	movvs	%icc,	%o4,	%i4
2883	ldx	[%i7+2*32],	%o4
2884	movvs	%icc,	%o5,	%i5
2885	ldx	[%i7+10*32],%o5
2886	move	%icc,	%o4,	%i4
2887	ldx	[%i7+3*32],	%o4
2888	move	%icc,	%o5,	%i5
2889	ldx	[%i7+11*32],%o5
2890	movneg	%icc,	%o4,	%i4
2891	ldx	[%i7+4*32],	%o4
2892	movneg	%icc,	%o5,	%i5
2893	ldx	[%i7+12*32],%o5
2894	movcs	%xcc,	%o4,	%i4
2895	ldx	[%i7+5*32],%o4
2896	movcs	%xcc,	%o5,	%i5
2897	ldx	[%i7+13*32],%o5
2898	movvs	%xcc,	%o4,	%i4
2899	ldx	[%i7+6*32],	%o4
2900	movvs	%xcc,	%o5,	%i5
2901	ldx	[%i7+14*32],%o5
2902	move	%xcc,	%o4,	%i4
2903	ldx	[%i7+7*32],	%o4
2904	move	%xcc,	%o5,	%i5
2905	ldx	[%i7+15*32],%o5
2906	movneg	%xcc,	%o4,	%i4
2907	add	%i7,16*32,	%i7
2908	movneg	%xcc,	%o5,	%i5
2909	ldx	[%i7+0*32],	%l0
2910	ldx	[%i7+8*32],	%l1
2911	ldx	[%i7+1*32],	%o4
2912	ldx	[%i7+9*32],	%o5
2913	movvs	%icc,	%o4,	%l0
2914	ldx	[%i7+2*32],	%o4
2915	movvs	%icc,	%o5,	%l1
2916	ldx	[%i7+10*32],%o5
2917	move	%icc,	%o4,	%l0
2918	ldx	[%i7+3*32],	%o4
2919	move	%icc,	%o5,	%l1
2920	ldx	[%i7+11*32],%o5
2921	movneg	%icc,	%o4,	%l0
2922	ldx	[%i7+4*32],	%o4
2923	movneg	%icc,	%o5,	%l1
2924	ldx	[%i7+12*32],%o5
2925	movcs	%xcc,	%o4,	%l0
2926	ldx	[%i7+5*32],%o4
2927	movcs	%xcc,	%o5,	%l1
2928	ldx	[%i7+13*32],%o5
2929	movvs	%xcc,	%o4,	%l0
2930	ldx	[%i7+6*32],	%o4
2931	movvs	%xcc,	%o5,	%l1
2932	ldx	[%i7+14*32],%o5
2933	move	%xcc,	%o4,	%l0
2934	ldx	[%i7+7*32],	%o4
2935	move	%xcc,	%o5,	%l1
2936	ldx	[%i7+15*32],%o5
2937	movneg	%xcc,	%o4,	%l0
2938	add	%i7,16*32,	%i7
2939	movneg	%xcc,	%o5,	%l1
2940	ldx	[%i7+0*32],	%l2
2941	ldx	[%i7+8*32],	%l3
2942	ldx	[%i7+1*32],	%o4
2943	ldx	[%i7+9*32],	%o5
2944	movvs	%icc,	%o4,	%l2
2945	ldx	[%i7+2*32],	%o4
2946	movvs	%icc,	%o5,	%l3
2947	ldx	[%i7+10*32],%o5
2948	move	%icc,	%o4,	%l2
2949	ldx	[%i7+3*32],	%o4
2950	move	%icc,	%o5,	%l3
2951	ldx	[%i7+11*32],%o5
2952	movneg	%icc,	%o4,	%l2
2953	ldx	[%i7+4*32],	%o4
2954	movneg	%icc,	%o5,	%l3
2955	ldx	[%i7+12*32],%o5
2956	movcs	%xcc,	%o4,	%l2
2957	ldx	[%i7+5*32],%o4
2958	movcs	%xcc,	%o5,	%l3
2959	ldx	[%i7+13*32],%o5
2960	movvs	%xcc,	%o4,	%l2
2961	ldx	[%i7+6*32],	%o4
2962	movvs	%xcc,	%o5,	%l3
2963	ldx	[%i7+14*32],%o5
2964	move	%xcc,	%o4,	%l2
2965	ldx	[%i7+7*32],	%o4
2966	move	%xcc,	%o5,	%l3
2967	ldx	[%i7+15*32],%o5
2968	movneg	%xcc,	%o4,	%l2
2969	add	%i7,16*32,	%i7
2970	movneg	%xcc,	%o5,	%l3
2971	srax	%g4,	32,	%o4		! unpack %g4
2972	srl	%g4,	%g0,	%o5
2973	sub	%o4,	5,	%o4
2974	mov	%g3,	%i7
2975	sllx	%o4,	32,	%g4		! re-pack %g4
2976	or	%o5,	%g4,	%g4
2977	srl	%o5,	%o4,	%o5
2978	srl	%o5,	2,	%o4
2979	and	%o5,	3,	%o5
2980	and	%o4,	7,	%o4
2981	sll	%o5,	3,	%o5	! offset within first cache line
2982	add	%o5,	%i7,	%i7	! of the pwrtbl
2983	or	%g0,	1,	%o5
2984	sll	%o5,	%o4,	%o4
2985	.word	0x81b02940+24-1	! montsqr	24-1
2986	fbu,pn	%fcc3,.Labort_24
2987#ifndef	__arch64__
2988	and	%fp,%g5,%g5
2989	brz,pn	%g5,.Labort_24
2990#endif
2991	nop
2992	.word	0x81b02940+24-1	! montsqr	24-1
2993	fbu,pn	%fcc3,.Labort_24
2994#ifndef	__arch64__
2995	and	%fp,%g5,%g5
2996	brz,pn	%g5,.Labort_24
2997#endif
2998	nop
2999	.word	0x81b02940+24-1	! montsqr	24-1
3000	fbu,pn	%fcc3,.Labort_24
3001#ifndef	__arch64__
3002	and	%fp,%g5,%g5
3003	brz,pn	%g5,.Labort_24
3004#endif
3005	nop
3006	.word	0x81b02940+24-1	! montsqr	24-1
3007	fbu,pn	%fcc3,.Labort_24
3008#ifndef	__arch64__
3009	and	%fp,%g5,%g5
3010	brz,pn	%g5,.Labort_24
3011#endif
3012	nop
3013	.word	0x81b02940+24-1	! montsqr	24-1
3014	fbu,pn	%fcc3,.Labort_24
3015#ifndef	__arch64__
3016	and	%fp,%g5,%g5
3017	brz,pn	%g5,.Labort_24
3018#endif
3019	nop
3020	wr	%o4,	%g0,	%ccr
3021	.word	0x81b02920+24-1	! montmul	24-1
3022	fbu,pn	%fcc3,.Labort_24
3023#ifndef	__arch64__
3024	and	%fp,%g5,%g5
3025	brz,pn	%g5,.Labort_24
3026#endif
3027
3028	srax	%g4,	32,	%o4
3029#ifdef	__arch64__
3030	brgez	%o4,.Lstride_24
3031	restore
3032	restore
3033	restore
3034	restore
3035	restore
3036#else
3037	brgez	%o4,.Lstride_24
3038	restore;		and	%fp,%g5,%g5
3039	restore;		and	%fp,%g5,%g5
3040	restore;		and	%fp,%g5,%g5
3041	restore;		and	%fp,%g5,%g5
3042	 brz,pn	%g5,.Labort1_24
3043	restore
3044#endif
3045	.word	0x81b02310 !movxtod	%l0,%f0
3046	.word	0x85b02311 !movxtod	%l1,%f2
3047	.word	0x89b02312 !movxtod	%l2,%f4
3048	.word	0x8db02313 !movxtod	%l3,%f6
3049	.word	0x91b02314 !movxtod	%l4,%f8
3050	.word	0x95b02315 !movxtod	%l5,%f10
3051	.word	0x99b02316 !movxtod	%l6,%f12
3052	.word	0x9db02317 !movxtod	%l7,%f14
3053	.word	0xa1b02308 !movxtod	%o0,%f16
3054	.word	0xa5b02309 !movxtod	%o1,%f18
3055	.word	0xa9b0230a !movxtod	%o2,%f20
3056	.word	0xadb0230b !movxtod	%o3,%f22
3057	.word	0xbbb0230c !movxtod	%o4,%f60
3058	.word	0xbfb0230d !movxtod	%o5,%f62
3059#ifdef	__arch64__
3060	restore
3061#else
3062	 and	%fp,%g5,%g5
3063	restore
3064	 and	%g5,1,%o7
3065	 and	%fp,%g5,%g5
3066	 srl	%fp,0,%fp		! just in case?
3067	 or	%o7,%g5,%g5
3068	brz,a,pn %g5,.Ldone_24
3069	mov	0,%i0		! return failure
3070#endif
3071	std	%f0,[%g1+0*8]
3072	std	%f2,[%g1+1*8]
3073	std	%f4,[%g1+2*8]
3074	std	%f6,[%g1+3*8]
3075	std	%f8,[%g1+4*8]
3076	std	%f10,[%g1+5*8]
3077	std	%f12,[%g1+6*8]
3078	std	%f14,[%g1+7*8]
3079	std	%f16,[%g1+8*8]
3080	std	%f18,[%g1+9*8]
3081	std	%f20,[%g1+10*8]
3082	std	%f22,[%g1+11*8]
3083	std	%f60,[%g1+12*8]
3084	std	%f62,[%g1+13*8]
3085	std	%f24,[%g1+14*8]
3086	std	%f26,[%g1+15*8]
3087	std	%f28,[%g1+16*8]
3088	std	%f30,[%g1+17*8]
3089	std	%f32,[%g1+18*8]
3090	std	%f34,[%g1+19*8]
3091	std	%f36,[%g1+20*8]
3092	std	%f38,[%g1+21*8]
3093	std	%f40,[%g1+22*8]
3094	std	%f42,[%g1+23*8]
3095	mov	1,%i0		! return success
3096.Ldone_24:
3097	ret
3098	restore
3099
3100.Labort_24:
3101	restore
3102	restore
3103	restore
3104	restore
3105	restore
3106.Labort1_24:
3107	restore
3108
3109	mov	0,%i0		! return failure
3110	ret
3111	restore
3112.type	bn_pwr5_mont_t4_24, #function
3113.size	bn_pwr5_mont_t4_24, .-bn_pwr5_mont_t4_24
3114.globl	bn_pwr5_mont_t4_32
3115.align	32
3116bn_pwr5_mont_t4_32:
3117#ifdef	__arch64__
3118	mov	0,%g5
3119	mov	-128,%g4
3120#elif defined(SPARCV9_64BIT_STACK)
3121	SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
3122	ld	[%g1+0],%g1	! OPENSSL_sparcv9_P[0]
3123	mov	-2047,%g4
3124	and	%g1,SPARCV9_64BIT_STACK,%g1
3125	movrz	%g1,0,%g4
3126	mov	-1,%g5
3127	add	%g4,-128,%g4
3128#else
3129	mov	-1,%g5
3130	mov	-128,%g4
3131#endif
3132	sllx	%g5,32,%g5
3133	save	%sp,%g4,%sp
3134#ifndef	__arch64__
3135	save	%sp,-128,%sp	! warm it up
3136	save	%sp,-128,%sp
3137	save	%sp,-128,%sp
3138	save	%sp,-128,%sp
3139	save	%sp,-128,%sp
3140	save	%sp,-128,%sp
3141	restore
3142	restore
3143	restore
3144	restore
3145	restore
3146	restore
3147#endif
3148	and	%sp,1,%g4
3149	or	%g5,%fp,%fp
3150	or	%g4,%g5,%g5
3151
3152	! copy arguments to global registers
3153	mov	%i0,%g1
3154	mov	%i1,%g2
3155	ld	[%i2+0],%f1	! load *n0
3156	ld	[%i2+4],%f0
3157	mov	%i3,%g3
3158	srl	%i4,%g0,%i4	! pack last arguments
3159	sllx	%i5,32,%g4
3160	or	%i4,%g4,%g4
3161	.word	0xbbb00f00 !fsrc2	%f0,%f0,%f60
3162	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3163	ldx	[%g1+0*8],%l0
3164	ldx	[%g1+1*8],%l1
3165	ldx	[%g1+2*8],%l2
3166	ldx	[%g1+3*8],%l3
3167	ldx	[%g1+4*8],%l4
3168	ldx	[%g1+5*8],%l5
3169	ldx	[%g1+6*8],%l6
3170	ldx	[%g1+7*8],%l7
3171	ldx	[%g1+8*8],%o0
3172	ldx	[%g1+9*8],%o1
3173	ldx	[%g1+10*8],%o2
3174	ldx	[%g1+11*8],%o3
3175	ldx	[%g1+12*8],%o4
3176	ldx	[%g1+13*8],%o5
3177	ldd	[%g1+14*8],%f24
3178	ldd	[%g1+15*8],%f26
3179	ldd	[%g1+16*8],%f28
3180	ldd	[%g1+17*8],%f30
3181	ldd	[%g1+18*8],%f32
3182	ldd	[%g1+19*8],%f34
3183	ldd	[%g1+20*8],%f36
3184	ldd	[%g1+21*8],%f38
3185	ldd	[%g1+22*8],%f40
3186	ldd	[%g1+23*8],%f42
3187	ldd	[%g1+24*8],%f44
3188	ldd	[%g1+25*8],%f46
3189	ldd	[%g1+26*8],%f48
3190	ldd	[%g1+27*8],%f50
3191	ldd	[%g1+28*8],%f52
3192	ldd	[%g1+29*8],%f54
3193	ldd	[%g1+30*8],%f56
3194	ldd	[%g1+31*8],%f58
3195	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3196	ldx	[%g2+0*8],%l0
3197	ldx	[%g2+1*8],%l1
3198	ldx	[%g2+2*8],%l2
3199	ldx	[%g2+3*8],%l3
3200	ldx	[%g2+4*8],%l4
3201	ldx	[%g2+5*8],%l5
3202	ldx	[%g2+6*8],%l6
3203	ldx	[%g2+7*8],%l7
3204	ldx	[%g2+8*8],%o0
3205	ldx	[%g2+9*8],%o1
3206	ldx	[%g2+10*8],%o2
3207	ldx	[%g2+11*8],%o3
3208	ldx	[%g2+12*8],%o4
3209	ldx	[%g2+13*8],%o5
3210	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3211	ldx	[%g2+14*8],%l0
3212	ldx	[%g2+15*8],%l1
3213	ldx	[%g2+16*8],%l2
3214	ldx	[%g2+17*8],%l3
3215	ldx	[%g2+18*8],%l4
3216	ldx	[%g2+19*8],%l5
3217	ldx	[%g2+20*8],%l6
3218	ldx	[%g2+21*8],%l7
3219	ldx	[%g2+22*8],%o0
3220	ldx	[%g2+23*8],%o1
3221	ldx	[%g2+24*8],%o2
3222	ldx	[%g2+25*8],%o3
3223	ldx	[%g2+26*8],%o4
3224	ldx	[%g2+27*8],%o5
3225	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3226	ldx	[%g2+28*8],%l0
3227	ldx	[%g2+29*8],%l1
3228	ldx	[%g2+30*8],%l2
3229	ldx	[%g2+31*8],%l3
3230	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3231
3232	srlx	%g4,	32,	%o4		! unpack %g4
3233	srl	%g4,	%g0,	%o5
3234	sub	%o4,	5,	%o4
3235	mov	%g3,	%o7
3236	sllx	%o4,	32,	%g4		! re-pack %g4
3237	or	%o5,	%g4,	%g4
3238	srl	%o5,	%o4,	%o5
3239	srl	%o5,	2,	%o4
3240	and	%o5,	3,	%o5
3241	and	%o4,	7,	%o4
3242	sll	%o5,	3,	%o5	! offset within first cache line
3243	add	%o5,	%o7,	%o7	! of the pwrtbl
3244	or	%g0,	1,	%o5
3245	sll	%o5,	%o4,	%o4
3246	wr	%o4,	%g0,	%ccr
3247	b	.Lstride_32
3248	nop
3249.align	16
3250.Lstride_32:
3251	ldx	[%o7+0*32],	%i0
3252	ldx	[%o7+8*32],	%i1
3253	ldx	[%o7+1*32],	%o4
3254	ldx	[%o7+9*32],	%o5
3255	movvs	%icc,	%o4,	%i0
3256	ldx	[%o7+2*32],	%o4
3257	movvs	%icc,	%o5,	%i1
3258	ldx	[%o7+10*32],%o5
3259	move	%icc,	%o4,	%i0
3260	ldx	[%o7+3*32],	%o4
3261	move	%icc,	%o5,	%i1
3262	ldx	[%o7+11*32],%o5
3263	movneg	%icc,	%o4,	%i0
3264	ldx	[%o7+4*32],	%o4
3265	movneg	%icc,	%o5,	%i1
3266	ldx	[%o7+12*32],%o5
3267	movcs	%xcc,	%o4,	%i0
3268	ldx	[%o7+5*32],%o4
3269	movcs	%xcc,	%o5,	%i1
3270	ldx	[%o7+13*32],%o5
3271	movvs	%xcc,	%o4,	%i0
3272	ldx	[%o7+6*32],	%o4
3273	movvs	%xcc,	%o5,	%i1
3274	ldx	[%o7+14*32],%o5
3275	move	%xcc,	%o4,	%i0
3276	ldx	[%o7+7*32],	%o4
3277	move	%xcc,	%o5,	%i1
3278	ldx	[%o7+15*32],%o5
3279	movneg	%xcc,	%o4,	%i0
3280	add	%o7,16*32,	%o7
3281	movneg	%xcc,	%o5,	%i1
3282	ldx	[%o7+0*32],	%i2
3283	ldx	[%o7+8*32],	%i3
3284	ldx	[%o7+1*32],	%o4
3285	ldx	[%o7+9*32],	%o5
3286	movvs	%icc,	%o4,	%i2
3287	ldx	[%o7+2*32],	%o4
3288	movvs	%icc,	%o5,	%i3
3289	ldx	[%o7+10*32],%o5
3290	move	%icc,	%o4,	%i2
3291	ldx	[%o7+3*32],	%o4
3292	move	%icc,	%o5,	%i3
3293	ldx	[%o7+11*32],%o5
3294	movneg	%icc,	%o4,	%i2
3295	ldx	[%o7+4*32],	%o4
3296	movneg	%icc,	%o5,	%i3
3297	ldx	[%o7+12*32],%o5
3298	movcs	%xcc,	%o4,	%i2
3299	ldx	[%o7+5*32],%o4
3300	movcs	%xcc,	%o5,	%i3
3301	ldx	[%o7+13*32],%o5
3302	movvs	%xcc,	%o4,	%i2
3303	ldx	[%o7+6*32],	%o4
3304	movvs	%xcc,	%o5,	%i3
3305	ldx	[%o7+14*32],%o5
3306	move	%xcc,	%o4,	%i2
3307	ldx	[%o7+7*32],	%o4
3308	move	%xcc,	%o5,	%i3
3309	ldx	[%o7+15*32],%o5
3310	movneg	%xcc,	%o4,	%i2
3311	add	%o7,16*32,	%o7
3312	movneg	%xcc,	%o5,	%i3
3313	ldx	[%o7+0*32],	%i4
3314	ldx	[%o7+8*32],	%i5
3315	ldx	[%o7+1*32],	%o4
3316	ldx	[%o7+9*32],	%o5
3317	movvs	%icc,	%o4,	%i4
3318	ldx	[%o7+2*32],	%o4
3319	movvs	%icc,	%o5,	%i5
3320	ldx	[%o7+10*32],%o5
3321	move	%icc,	%o4,	%i4
3322	ldx	[%o7+3*32],	%o4
3323	move	%icc,	%o5,	%i5
3324	ldx	[%o7+11*32],%o5
3325	movneg	%icc,	%o4,	%i4
3326	ldx	[%o7+4*32],	%o4
3327	movneg	%icc,	%o5,	%i5
3328	ldx	[%o7+12*32],%o5
3329	movcs	%xcc,	%o4,	%i4
3330	ldx	[%o7+5*32],%o4
3331	movcs	%xcc,	%o5,	%i5
3332	ldx	[%o7+13*32],%o5
3333	movvs	%xcc,	%o4,	%i4
3334	ldx	[%o7+6*32],	%o4
3335	movvs	%xcc,	%o5,	%i5
3336	ldx	[%o7+14*32],%o5
3337	move	%xcc,	%o4,	%i4
3338	ldx	[%o7+7*32],	%o4
3339	move	%xcc,	%o5,	%i5
3340	ldx	[%o7+15*32],%o5
3341	movneg	%xcc,	%o4,	%i4
3342	add	%o7,16*32,	%o7
3343	movneg	%xcc,	%o5,	%i5
3344	ldx	[%o7+0*32],	%l0
3345	ldx	[%o7+8*32],	%l1
3346	ldx	[%o7+1*32],	%o4
3347	ldx	[%o7+9*32],	%o5
3348	movvs	%icc,	%o4,	%l0
3349	ldx	[%o7+2*32],	%o4
3350	movvs	%icc,	%o5,	%l1
3351	ldx	[%o7+10*32],%o5
3352	move	%icc,	%o4,	%l0
3353	ldx	[%o7+3*32],	%o4
3354	move	%icc,	%o5,	%l1
3355	ldx	[%o7+11*32],%o5
3356	movneg	%icc,	%o4,	%l0
3357	ldx	[%o7+4*32],	%o4
3358	movneg	%icc,	%o5,	%l1
3359	ldx	[%o7+12*32],%o5
3360	movcs	%xcc,	%o4,	%l0
3361	ldx	[%o7+5*32],%o4
3362	movcs	%xcc,	%o5,	%l1
3363	ldx	[%o7+13*32],%o5
3364	movvs	%xcc,	%o4,	%l0
3365	ldx	[%o7+6*32],	%o4
3366	movvs	%xcc,	%o5,	%l1
3367	ldx	[%o7+14*32],%o5
3368	move	%xcc,	%o4,	%l0
3369	ldx	[%o7+7*32],	%o4
3370	move	%xcc,	%o5,	%l1
3371	ldx	[%o7+15*32],%o5
3372	movneg	%xcc,	%o4,	%l0
3373	add	%o7,16*32,	%o7
3374	movneg	%xcc,	%o5,	%l1
3375	ldx	[%o7+0*32],	%l2
3376	ldx	[%o7+8*32],	%l3
3377	ldx	[%o7+1*32],	%o4
3378	ldx	[%o7+9*32],	%o5
3379	movvs	%icc,	%o4,	%l2
3380	ldx	[%o7+2*32],	%o4
3381	movvs	%icc,	%o5,	%l3
3382	ldx	[%o7+10*32],%o5
3383	move	%icc,	%o4,	%l2
3384	ldx	[%o7+3*32],	%o4
3385	move	%icc,	%o5,	%l3
3386	ldx	[%o7+11*32],%o5
3387	movneg	%icc,	%o4,	%l2
3388	ldx	[%o7+4*32],	%o4
3389	movneg	%icc,	%o5,	%l3
3390	ldx	[%o7+12*32],%o5
3391	movcs	%xcc,	%o4,	%l2
3392	ldx	[%o7+5*32],%o4
3393	movcs	%xcc,	%o5,	%l3
3394	ldx	[%o7+13*32],%o5
3395	movvs	%xcc,	%o4,	%l2
3396	ldx	[%o7+6*32],	%o4
3397	movvs	%xcc,	%o5,	%l3
3398	ldx	[%o7+14*32],%o5
3399	move	%xcc,	%o4,	%l2
3400	ldx	[%o7+7*32],	%o4
3401	move	%xcc,	%o5,	%l3
3402	ldx	[%o7+15*32],%o5
3403	movneg	%xcc,	%o4,	%l2
3404	add	%o7,16*32,	%o7
3405	movneg	%xcc,	%o5,	%l3
3406	ldx	[%o7+0*32],	%l4
3407	ldx	[%o7+8*32],	%l5
3408	ldx	[%o7+1*32],	%o4
3409	ldx	[%o7+9*32],	%o5
3410	movvs	%icc,	%o4,	%l4
3411	ldx	[%o7+2*32],	%o4
3412	movvs	%icc,	%o5,	%l5
3413	ldx	[%o7+10*32],%o5
3414	move	%icc,	%o4,	%l4
3415	ldx	[%o7+3*32],	%o4
3416	move	%icc,	%o5,	%l5
3417	ldx	[%o7+11*32],%o5
3418	movneg	%icc,	%o4,	%l4
3419	ldx	[%o7+4*32],	%o4
3420	movneg	%icc,	%o5,	%l5
3421	ldx	[%o7+12*32],%o5
3422	movcs	%xcc,	%o4,	%l4
3423	ldx	[%o7+5*32],%o4
3424	movcs	%xcc,	%o5,	%l5
3425	ldx	[%o7+13*32],%o5
3426	movvs	%xcc,	%o4,	%l4
3427	ldx	[%o7+6*32],	%o4
3428	movvs	%xcc,	%o5,	%l5
3429	ldx	[%o7+14*32],%o5
3430	move	%xcc,	%o4,	%l4
3431	ldx	[%o7+7*32],	%o4
3432	move	%xcc,	%o5,	%l5
3433	ldx	[%o7+15*32],%o5
3434	movneg	%xcc,	%o4,	%l4
3435	add	%o7,16*32,	%o7
3436	movneg	%xcc,	%o5,	%l5
3437	ldx	[%o7+0*32],	%l6
3438	ldx	[%o7+8*32],	%l7
3439	ldx	[%o7+1*32],	%o4
3440	ldx	[%o7+9*32],	%o5
3441	movvs	%icc,	%o4,	%l6
3442	ldx	[%o7+2*32],	%o4
3443	movvs	%icc,	%o5,	%l7
3444	ldx	[%o7+10*32],%o5
3445	move	%icc,	%o4,	%l6
3446	ldx	[%o7+3*32],	%o4
3447	move	%icc,	%o5,	%l7
3448	ldx	[%o7+11*32],%o5
3449	movneg	%icc,	%o4,	%l6
3450	ldx	[%o7+4*32],	%o4
3451	movneg	%icc,	%o5,	%l7
3452	ldx	[%o7+12*32],%o5
3453	movcs	%xcc,	%o4,	%l6
3454	ldx	[%o7+5*32],%o4
3455	movcs	%xcc,	%o5,	%l7
3456	ldx	[%o7+13*32],%o5
3457	movvs	%xcc,	%o4,	%l6
3458	ldx	[%o7+6*32],	%o4
3459	movvs	%xcc,	%o5,	%l7
3460	ldx	[%o7+14*32],%o5
3461	move	%xcc,	%o4,	%l6
3462	ldx	[%o7+7*32],	%o4
3463	move	%xcc,	%o5,	%l7
3464	ldx	[%o7+15*32],%o5
3465	movneg	%xcc,	%o4,	%l6
3466	add	%o7,16*32,	%o7
3467	movneg	%xcc,	%o5,	%l7
3468	save	%sp,-128,%sp;		or	%g5,%fp,%fp
3469	ldx	[%i7+0*32],	%i0
3470	ldx	[%i7+8*32],	%i1
3471	ldx	[%i7+1*32],	%o4
3472	ldx	[%i7+9*32],	%o5
3473	movvs	%icc,	%o4,	%i0
3474	ldx	[%i7+2*32],	%o4
3475	movvs	%icc,	%o5,	%i1
3476	ldx	[%i7+10*32],%o5
3477	move	%icc,	%o4,	%i0
3478	ldx	[%i7+3*32],	%o4
3479	move	%icc,	%o5,	%i1
3480	ldx	[%i7+11*32],%o5
3481	movneg	%icc,	%o4,	%i0
3482	ldx	[%i7+4*32],	%o4
3483	movneg	%icc,	%o5,	%i1
3484	ldx	[%i7+12*32],%o5
3485	movcs	%xcc,	%o4,	%i0
3486	ldx	[%i7+5*32],%o4
3487	movcs	%xcc,	%o5,	%i1
3488	ldx	[%i7+13*32],%o5
3489	movvs	%xcc,	%o4,	%i0
3490	ldx	[%i7+6*32],	%o4
3491	movvs	%xcc,	%o5,	%i1
3492	ldx	[%i7+14*32],%o5
3493	move	%xcc,	%o4,	%i0
3494	ldx	[%i7+7*32],	%o4
3495	move	%xcc,	%o5,	%i1
3496	ldx	[%i7+15*32],%o5
3497	movneg	%xcc,	%o4,	%i0
3498	add	%i7,16*32,	%i7
3499	movneg	%xcc,	%o5,	%i1
3500	ldx	[%i7+0*32],	%i2
3501	ldx	[%i7+8*32],	%i3
3502	ldx	[%i7+1*32],	%o4
3503	ldx	[%i7+9*32],	%o5
3504	movvs	%icc,	%o4,	%i2
3505	ldx	[%i7+2*32],	%o4
3506	movvs	%icc,	%o5,	%i3
3507	ldx	[%i7+10*32],%o5
3508	move	%icc,	%o4,	%i2
3509	ldx	[%i7+3*32],	%o4
3510	move	%icc,	%o5,	%i3
3511	ldx	[%i7+11*32],%o5
3512	movneg	%icc,	%o4,	%i2
3513	ldx	[%i7+4*32],	%o4
3514	movneg	%icc,	%o5,	%i3
3515	ldx	[%i7+12*32],%o5
3516	movcs	%xcc,	%o4,	%i2
3517	ldx	[%i7+5*32],%o4
3518	movcs	%xcc,	%o5,	%i3
3519	ldx	[%i7+13*32],%o5
3520	movvs	%xcc,	%o4,	%i2
3521	ldx	[%i7+6*32],	%o4
3522	movvs	%xcc,	%o5,	%i3
3523	ldx	[%i7+14*32],%o5
3524	move	%xcc,	%o4,	%i2
3525	ldx	[%i7+7*32],	%o4
3526	move	%xcc,	%o5,	%i3
3527	ldx	[%i7+15*32],%o5
3528	movneg	%xcc,	%o4,	%i2
3529	add	%i7,16*32,	%i7
3530	movneg	%xcc,	%o5,	%i3
3531	ldx	[%i7+0*32],	%i4
3532	ldx	[%i7+8*32],	%i5
3533	ldx	[%i7+1*32],	%o4
3534	ldx	[%i7+9*32],	%o5
3535	movvs	%icc,	%o4,	%i4
3536	ldx	[%i7+2*32],	%o4
3537	movvs	%icc,	%o5,	%i5
3538	ldx	[%i7+10*32],%o5
3539	move	%icc,	%o4,	%i4
3540	ldx	[%i7+3*32],	%o4
3541	move	%icc,	%o5,	%i5
3542	ldx	[%i7+11*32],%o5
3543	movneg	%icc,	%o4,	%i4
3544	ldx	[%i7+4*32],	%o4
3545	movneg	%icc,	%o5,	%i5
3546	ldx	[%i7+12*32],%o5
3547	movcs	%xcc,	%o4,	%i4
3548	ldx	[%i7+5*32],%o4
3549	movcs	%xcc,	%o5,	%i5
3550	ldx	[%i7+13*32],%o5
3551	movvs	%xcc,	%o4,	%i4
3552	ldx	[%i7+6*32],	%o4
3553	movvs	%xcc,	%o5,	%i5
3554	ldx	[%i7+14*32],%o5
3555	move	%xcc,	%o4,	%i4
3556	ldx	[%i7+7*32],	%o4
3557	move	%xcc,	%o5,	%i5
3558	ldx	[%i7+15*32],%o5
3559	movneg	%xcc,	%o4,	%i4
3560	add	%i7,16*32,	%i7
3561	movneg	%xcc,	%o5,	%i5
3562	ldx	[%i7+0*32],	%l0
3563	ldx	[%i7+8*32],	%l1
3564	ldx	[%i7+1*32],	%o4
3565	ldx	[%i7+9*32],	%o5
3566	movvs	%icc,	%o4,	%l0
3567	ldx	[%i7+2*32],	%o4
3568	movvs	%icc,	%o5,	%l1
3569	ldx	[%i7+10*32],%o5
3570	move	%icc,	%o4,	%l0
3571	ldx	[%i7+3*32],	%o4
3572	move	%icc,	%o5,	%l1
3573	ldx	[%i7+11*32],%o5
3574	movneg	%icc,	%o4,	%l0
3575	ldx	[%i7+4*32],	%o4
3576	movneg	%icc,	%o5,	%l1
3577	ldx	[%i7+12*32],%o5
3578	movcs	%xcc,	%o4,	%l0
3579	ldx	[%i7+5*32],%o4
3580	movcs	%xcc,	%o5,	%l1
3581	ldx	[%i7+13*32],%o5
3582	movvs	%xcc,	%o4,	%l0
3583	ldx	[%i7+6*32],	%o4
3584	movvs	%xcc,	%o5,	%l1
3585	ldx	[%i7+14*32],%o5
3586	move	%xcc,	%o4,	%l0
3587	ldx	[%i7+7*32],	%o4
3588	move	%xcc,	%o5,	%l1
3589	ldx	[%i7+15*32],%o5
3590	movneg	%xcc,	%o4,	%l0
3591	add	%i7,16*32,	%i7
3592	movneg	%xcc,	%o5,	%l1
3593	ldx	[%i7+0*32],	%l2
3594	ldx	[%i7+8*32],	%l3
3595	ldx	[%i7+1*32],	%o4
3596	ldx	[%i7+9*32],	%o5
3597	movvs	%icc,	%o4,	%l2
3598	ldx	[%i7+2*32],	%o4
3599	movvs	%icc,	%o5,	%l3
3600	ldx	[%i7+10*32],%o5
3601	move	%icc,	%o4,	%l2
3602	ldx	[%i7+3*32],	%o4
3603	move	%icc,	%o5,	%l3
3604	ldx	[%i7+11*32],%o5
3605	movneg	%icc,	%o4,	%l2
3606	ldx	[%i7+4*32],	%o4
3607	movneg	%icc,	%o5,	%l3
3608	ldx	[%i7+12*32],%o5
3609	movcs	%xcc,	%o4,	%l2
3610	ldx	[%i7+5*32],%o4
3611	movcs	%xcc,	%o5,	%l3
3612	ldx	[%i7+13*32],%o5
3613	movvs	%xcc,	%o4,	%l2
3614	ldx	[%i7+6*32],	%o4
3615	movvs	%xcc,	%o5,	%l3
3616	ldx	[%i7+14*32],%o5
3617	move	%xcc,	%o4,	%l2
3618	ldx	[%i7+7*32],	%o4
3619	move	%xcc,	%o5,	%l3
3620	ldx	[%i7+15*32],%o5
3621	movneg	%xcc,	%o4,	%l2
3622	add	%i7,16*32,	%i7
3623	movneg	%xcc,	%o5,	%l3
3624	ldx	[%i7+0*32],	%l4
3625	ldx	[%i7+8*32],	%l5
3626	ldx	[%i7+1*32],	%o4
3627	ldx	[%i7+9*32],	%o5
3628	movvs	%icc,	%o4,	%l4
3629	ldx	[%i7+2*32],	%o4
3630	movvs	%icc,	%o5,	%l5
3631	ldx	[%i7+10*32],%o5
3632	move	%icc,	%o4,	%l4
3633	ldx	[%i7+3*32],	%o4
3634	move	%icc,	%o5,	%l5
3635	ldx	[%i7+11*32],%o5
3636	movneg	%icc,	%o4,	%l4
3637	ldx	[%i7+4*32],	%o4
3638	movneg	%icc,	%o5,	%l5
3639	ldx	[%i7+12*32],%o5
3640	movcs	%xcc,	%o4,	%l4
3641	ldx	[%i7+5*32],%o4
3642	movcs	%xcc,	%o5,	%l5
3643	ldx	[%i7+13*32],%o5
3644	movvs	%xcc,	%o4,	%l4
3645	ldx	[%i7+6*32],	%o4
3646	movvs	%xcc,	%o5,	%l5
3647	ldx	[%i7+14*32],%o5
3648	move	%xcc,	%o4,	%l4
3649	ldx	[%i7+7*32],	%o4
3650	move	%xcc,	%o5,	%l5
3651	ldx	[%i7+15*32],%o5
3652	movneg	%xcc,	%o4,	%l4
3653	add	%i7,16*32,	%i7
3654	movneg	%xcc,	%o5,	%l5
3655	ldx	[%i7+0*32],	%l6
3656	ldx	[%i7+8*32],	%l7
3657	ldx	[%i7+1*32],	%o4
3658	ldx	[%i7+9*32],	%o5
3659	movvs	%icc,	%o4,	%l6
3660	ldx	[%i7+2*32],	%o4
3661	movvs	%icc,	%o5,	%l7
3662	ldx	[%i7+10*32],%o5
3663	move	%icc,	%o4,	%l6
3664	ldx	[%i7+3*32],	%o4
3665	move	%icc,	%o5,	%l7
3666	ldx	[%i7+11*32],%o5
3667	movneg	%icc,	%o4,	%l6
3668	ldx	[%i7+4*32],	%o4
3669	movneg	%icc,	%o5,	%l7
3670	ldx	[%i7+12*32],%o5
3671	movcs	%xcc,	%o4,	%l6
3672	ldx	[%i7+5*32],%o4
3673	movcs	%xcc,	%o5,	%l7
3674	ldx	[%i7+13*32],%o5
3675	movvs	%xcc,	%o4,	%l6
3676	ldx	[%i7+6*32],	%o4
3677	movvs	%xcc,	%o5,	%l7
3678	ldx	[%i7+14*32],%o5
3679	move	%xcc,	%o4,	%l6
3680	ldx	[%i7+7*32],	%o4
3681	move	%xcc,	%o5,	%l7
3682	ldx	[%i7+15*32],%o5
3683	movneg	%xcc,	%o4,	%l6
3684	add	%i7,16*32,	%i7
3685	movneg	%xcc,	%o5,	%l7
3686	ldx	[%i7+0*32],	%o0
3687	ldx	[%i7+8*32],	%o1
3688	ldx	[%i7+1*32],	%o4
3689	ldx	[%i7+9*32],	%o5
3690	movvs	%icc,	%o4,	%o0
3691	ldx	[%i7+2*32],	%o4
3692	movvs	%icc,	%o5,	%o1
3693	ldx	[%i7+10*32],%o5
3694	move	%icc,	%o4,	%o0
3695	ldx	[%i7+3*32],	%o4
3696	move	%icc,	%o5,	%o1
3697	ldx	[%i7+11*32],%o5
3698	movneg	%icc,	%o4,	%o0
3699	ldx	[%i7+4*32],	%o4
3700	movneg	%icc,	%o5,	%o1
3701	ldx	[%i7+12*32],%o5
3702	movcs	%xcc,	%o4,	%o0
3703	ldx	[%i7+5*32],%o4
3704	movcs	%xcc,	%o5,	%o1
3705	ldx	[%i7+13*32],%o5
3706	movvs	%xcc,	%o4,	%o0
3707	ldx	[%i7+6*32],	%o4
3708	movvs	%xcc,	%o5,	%o1
3709	ldx	[%i7+14*32],%o5
3710	move	%xcc,	%o4,	%o0
3711	ldx	[%i7+7*32],	%o4
3712	move	%xcc,	%o5,	%o1
3713	ldx	[%i7+15*32],%o5
3714	movneg	%xcc,	%o4,	%o0
3715	add	%i7,16*32,	%i7
3716	movneg	%xcc,	%o5,	%o1
3717	ldx	[%i7+0*32],	%o2
3718	ldx	[%i7+8*32],	%o3
3719	ldx	[%i7+1*32],	%o4
3720	ldx	[%i7+9*32],	%o5
3721	movvs	%icc,	%o4,	%o2
3722	ldx	[%i7+2*32],	%o4
3723	movvs	%icc,	%o5,	%o3
3724	ldx	[%i7+10*32],%o5
3725	move	%icc,	%o4,	%o2
3726	ldx	[%i7+3*32],	%o4
3727	move	%icc,	%o5,	%o3
3728	ldx	[%i7+11*32],%o5
3729	movneg	%icc,	%o4,	%o2
3730	ldx	[%i7+4*32],	%o4
3731	movneg	%icc,	%o5,	%o3
3732	ldx	[%i7+12*32],%o5
3733	movcs	%xcc,	%o4,	%o2
3734	ldx	[%i7+5*32],%o4
3735	movcs	%xcc,	%o5,	%o3
3736	ldx	[%i7+13*32],%o5
3737	movvs	%xcc,	%o4,	%o2
3738	ldx	[%i7+6*32],	%o4
3739	movvs	%xcc,	%o5,	%o3
3740	ldx	[%i7+14*32],%o5
3741	move	%xcc,	%o4,	%o2
3742	ldx	[%i7+7*32],	%o4
3743	move	%xcc,	%o5,	%o3
3744	ldx	[%i7+15*32],%o5
3745	movneg	%xcc,	%o4,	%o2
3746	add	%i7,16*32,	%i7
3747	movneg	%xcc,	%o5,	%o3
3748	srax	%g4,	32,	%o4		! unpack %g4
3749	srl	%g4,	%g0,	%o5
3750	sub	%o4,	5,	%o4
3751	mov	%g3,	%i7
3752	sllx	%o4,	32,	%g4		! re-pack %g4
3753	or	%o5,	%g4,	%g4
3754	srl	%o5,	%o4,	%o5
3755	srl	%o5,	2,	%o4
3756	and	%o5,	3,	%o5
3757	and	%o4,	7,	%o4
3758	sll	%o5,	3,	%o5	! offset within first cache line
3759	add	%o5,	%i7,	%i7	! of the pwrtbl
3760	or	%g0,	1,	%o5
3761	sll	%o5,	%o4,	%o4
3762	.word	0x81b02940+32-1	! montsqr	32-1
3763	fbu,pn	%fcc3,.Labort_32
3764#ifndef	__arch64__
3765	and	%fp,%g5,%g5
3766	brz,pn	%g5,.Labort_32
3767#endif
3768	nop
3769	.word	0x81b02940+32-1	! montsqr	32-1
3770	fbu,pn	%fcc3,.Labort_32
3771#ifndef	__arch64__
3772	and	%fp,%g5,%g5
3773	brz,pn	%g5,.Labort_32
3774#endif
3775	nop
3776	.word	0x81b02940+32-1	! montsqr	32-1
3777	fbu,pn	%fcc3,.Labort_32
3778#ifndef	__arch64__
3779	and	%fp,%g5,%g5
3780	brz,pn	%g5,.Labort_32
3781#endif
3782	nop
3783	.word	0x81b02940+32-1	! montsqr	32-1
3784	fbu,pn	%fcc3,.Labort_32
3785#ifndef	__arch64__
3786	and	%fp,%g5,%g5
3787	brz,pn	%g5,.Labort_32
3788#endif
3789	nop
3790	.word	0x81b02940+32-1	! montsqr	32-1
3791	fbu,pn	%fcc3,.Labort_32
3792#ifndef	__arch64__
3793	and	%fp,%g5,%g5
3794	brz,pn	%g5,.Labort_32
3795#endif
3796	nop
3797	wr	%o4,	%g0,	%ccr
3798	.word	0x81b02920+32-1	! montmul	32-1
3799	fbu,pn	%fcc3,.Labort_32
3800#ifndef	__arch64__
3801	and	%fp,%g5,%g5
3802	brz,pn	%g5,.Labort_32
3803#endif
3804
3805	srax	%g4,	32,	%o4
3806#ifdef	__arch64__
3807	brgez	%o4,.Lstride_32
3808	restore
3809	restore
3810	restore
3811	restore
3812	restore
3813#else
3814	brgez	%o4,.Lstride_32
3815	restore;		and	%fp,%g5,%g5
3816	restore;		and	%fp,%g5,%g5
3817	restore;		and	%fp,%g5,%g5
3818	restore;		and	%fp,%g5,%g5
3819	 brz,pn	%g5,.Labort1_32
3820	restore
3821#endif
3822	.word	0x81b02310 !movxtod	%l0,%f0
3823	.word	0x85b02311 !movxtod	%l1,%f2
3824	.word	0x89b02312 !movxtod	%l2,%f4
3825	.word	0x8db02313 !movxtod	%l3,%f6
3826	.word	0x91b02314 !movxtod	%l4,%f8
3827	.word	0x95b02315 !movxtod	%l5,%f10
3828	.word	0x99b02316 !movxtod	%l6,%f12
3829	.word	0x9db02317 !movxtod	%l7,%f14
3830	.word	0xa1b02308 !movxtod	%o0,%f16
3831	.word	0xa5b02309 !movxtod	%o1,%f18
3832	.word	0xa9b0230a !movxtod	%o2,%f20
3833	.word	0xadb0230b !movxtod	%o3,%f22
3834	.word	0xbbb0230c !movxtod	%o4,%f60
3835	.word	0xbfb0230d !movxtod	%o5,%f62
3836#ifdef	__arch64__
3837	restore
3838#else
3839	 and	%fp,%g5,%g5
3840	restore
3841	 and	%g5,1,%o7
3842	 and	%fp,%g5,%g5
3843	 srl	%fp,0,%fp		! just in case?
3844	 or	%o7,%g5,%g5
3845	brz,a,pn %g5,.Ldone_32
3846	mov	0,%i0		! return failure
3847#endif
3848	std	%f0,[%g1+0*8]
3849	std	%f2,[%g1+1*8]
3850	std	%f4,[%g1+2*8]
3851	std	%f6,[%g1+3*8]
3852	std	%f8,[%g1+4*8]
3853	std	%f10,[%g1+5*8]
3854	std	%f12,[%g1+6*8]
3855	std	%f14,[%g1+7*8]
3856	std	%f16,[%g1+8*8]
3857	std	%f18,[%g1+9*8]
3858	std	%f20,[%g1+10*8]
3859	std	%f22,[%g1+11*8]
3860	std	%f60,[%g1+12*8]
3861	std	%f62,[%g1+13*8]
3862	std	%f24,[%g1+14*8]
3863	std	%f26,[%g1+15*8]
3864	std	%f28,[%g1+16*8]
3865	std	%f30,[%g1+17*8]
3866	std	%f32,[%g1+18*8]
3867	std	%f34,[%g1+19*8]
3868	std	%f36,[%g1+20*8]
3869	std	%f38,[%g1+21*8]
3870	std	%f40,[%g1+22*8]
3871	std	%f42,[%g1+23*8]
3872	std	%f44,[%g1+24*8]
3873	std	%f46,[%g1+25*8]
3874	std	%f48,[%g1+26*8]
3875	std	%f50,[%g1+27*8]
3876	std	%f52,[%g1+28*8]
3877	std	%f54,[%g1+29*8]
3878	std	%f56,[%g1+30*8]
3879	std	%f58,[%g1+31*8]
3880	mov	1,%i0		! return success
3881.Ldone_32:
3882	ret
3883	restore
3884
3885.Labort_32:
3886	restore
3887	restore
3888	restore
3889	restore
3890	restore
3891.Labort1_32:
3892	restore
3893
3894	mov	0,%i0		! return failure
3895	ret
3896	restore
3897.type	bn_pwr5_mont_t4_32, #function
3898.size	bn_pwr5_mont_t4_32, .-bn_pwr5_mont_t4_32
3899.globl	bn_mul_mont_t4
3900.align	32
3901bn_mul_mont_t4:
3902	add	%sp,	STACK_BIAS,	%g4	! real top of stack
3903	sll	%o5,	3,	%o5		! size in bytes
3904	add	%o5,	63,	%g1
3905	andn	%g1,	63,	%g1		! buffer size rounded up to 64 bytes
3906	sub	%g4,	%g1,	%g1
3907	andn	%g1,	63,	%g1		! align at 64 byte
3908	sub	%g1,	STACK_FRAME,	%g1	! new top of stack
3909	sub	%g1,	%g4,	%g1
3910
3911	save	%sp,	%g1,	%sp
3912	ld	[%i4+0],	%l0	! pull n0[0..1] value
3913	ld	[%i4+4],	%l1
3914	add	%sp, STACK_BIAS+STACK_FRAME, %l5
3915	ldx	[%i2+0],	%g2	! m0=bp[0]
3916	sllx	%l1,	32,	%g1
3917	add	%i2,	8,	%i2
3918	or	%l0,	%g1,	%g1
3919
3920	ldx	[%i1+0],	%o2	! ap[0]
3921
3922	mulx	%o2,	%g2,	%g4	! ap[0]*bp[0]
3923	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
3924
3925	ldx	[%i1+8],	%o2	! ap[1]
3926	add	%i1,	16,	%i1
3927	ldx	[%i3+0],	%o4	! np[0]
3928
3929	mulx	%g4,	%g1,	%g3	! "tp[0]"*n0
3930
3931	mulx	%o2,	%g2,	%o3	! ap[1]*bp[0]
3932	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
3933
3934	mulx	%o4,	%g3,	%o0	! np[0]*m1
3935	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
3936
3937	ldx	[%i3+8],	%o4	! np[1]
3938
3939	addcc	%g4,	%o0,	%o0
3940	add	%i3,	16,	%i3
3941	.word	0x93b00229 !addxc	%g0,%o1,%o1
3942
3943	mulx	%o4,	%g3,	%o5	! np[1]*m1
3944	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
3945
3946	ba	.L1st
3947	sub	%i5,	24,	%l4	! cnt=num-3
3948
3949.align	16
3950.L1st:
3951	addcc	%o3,	%g5,	%g4
3952	.word	0x8bb28220 !addxc	%o2,%g0,%g5
3953
3954	ldx	[%i1+0],	%o2	! ap[j]
3955	addcc	%o5,	%o1,	%o0
3956	add	%i1,	8,	%i1
3957	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
3958
3959	ldx	[%i3+0],	%o4	! np[j]
3960	mulx	%o2,	%g2,	%o3	! ap[j]*bp[0]
3961	add	%i3,	8,	%i3
3962	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
3963
3964	mulx	%o4,	%g3,	%o5	! np[j]*m1
3965	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
3966	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
3967	.word	0x93b00229 !addxc	%g0,%o1,%o1
3968	stxa	%o0,	[%l5]0xe2	! tp[j-1]
3969	add	%l5,	8,	%l5	! tp++
3970
3971	brnz,pt	%l4,	.L1st
3972	sub	%l4,	8,	%l4	! j--
3973!.L1st
3974	addcc	%o3,	%g5,	%g4
3975	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
3976
3977	addcc	%o5,	%o1,	%o0
3978	.word	0x93b30220 !addxc	%o4,%g0,%o1
3979	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
3980	.word	0x93b00229 !addxc	%g0,%o1,%o1
3981	stxa	%o0,	[%l5]0xe2	! tp[j-1]
3982	add	%l5,	8,	%l5
3983
3984	addcc	%g5,	%o1,	%o1
3985	.word	0xa1b00220 !addxc	%g0,%g0,%l0	! upmost overflow bit
3986	stxa	%o1,	[%l5]0xe2
3987	add	%l5,	8,	%l5
3988
3989	ba	.Louter
3990	sub	%i5,	16,	%l1	! i=num-2
3991
3992.align	16
3993.Louter:
3994	ldx	[%i2+0],	%g2	! m0=bp[i]
3995	add	%i2,	8,	%i2
3996
3997	sub	%i1,	%i5,	%i1	! rewind
3998	sub	%i3,	%i5,	%i3
3999	sub	%l5,	%i5,	%l5
4000
4001	ldx	[%i1+0],	%o2	! ap[0]
4002	ldx	[%i3+0],	%o4	! np[0]
4003
4004	mulx	%o2,	%g2,	%g4	! ap[0]*bp[i]
4005	ldx	[%l5],		%o7	! tp[0]
4006	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
4007	ldx	[%i1+8],	%o2	! ap[1]
4008	addcc	%g4,	%o7,	%g4	! ap[0]*bp[i]+tp[0]
4009	mulx	%o2,	%g2,	%o3	! ap[1]*bp[i]
4010	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4011	mulx	%g4,	%g1,	%g3	! tp[0]*n0
4012	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4013	mulx	%o4,	%g3,	%o0	! np[0]*m1
4014	add	%i1,	16,	%i1
4015	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
4016	ldx	[%i3+8],	%o4	! np[1]
4017	add	%i3,	16,	%i3
4018	addcc	%o0,	%g4,	%o0
4019	mulx	%o4,	%g3,	%o5	! np[1]*m1
4020	.word	0x93b00229 !addxc	%g0,%o1,%o1
4021	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4022
4023	ba	.Linner
4024	sub	%i5,	24,	%l4	! cnt=num-3
4025.align	16
4026.Linner:
4027	addcc	%o3,	%g5,	%g4
4028	ldx	[%l5+8],	%o7	! tp[j]
4029	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4030	ldx	[%i1+0],	%o2	! ap[j]
4031	add	%i1,	8,	%i1
4032	addcc	%o5,	%o1,	%o0
4033	mulx	%o2,	%g2,	%o3	! ap[j]*bp[i]
4034	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4035	ldx	[%i3+0],	%o4	! np[j]
4036	add	%i3,	8,	%i3
4037	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4038	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4039	mulx	%o4,	%g3,	%o5	! np[j]*m1
4040	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4041	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4042	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4043	.word	0x93b00229 !addxc	%g0,%o1,%o1
4044	stx	%o0,	[%l5]		! tp[j-1]
4045	add	%l5,	8,	%l5
4046	brnz,pt	%l4,	.Linner
4047	sub	%l4,	8,	%l4
4048!.Linner
4049	ldx	[%l5+8],	%o7	! tp[j]
4050	addcc	%o3,	%g5,	%g4
4051	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4052	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4053	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4054
4055	addcc	%o5,	%o1,	%o0
4056	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4057	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4058	.word	0x93b00229 !addxc	%g0,%o1,%o1
4059	stx	%o0,	[%l5]		! tp[j-1]
4060
4061	subcc	%g0,	%l0,	%g0	! move upmost overflow to CCR.xcc
4062	.word	0x93b24265 !addxccc	%o1,%g5,%o1
4063	.word	0xa1b00220 !addxc	%g0,%g0,%l0
4064	stx	%o1,	[%l5+8]
4065	add	%l5,	16,	%l5
4066
4067	brnz,pt	%l1,	.Louter
4068	sub	%l1,	8,	%l1
4069
4070	sub	%i1,	%i5,	%i1	! rewind
4071	sub	%i3,	%i5,	%i3
4072	sub	%l5,	%i5,	%l5
4073	ba	.Lsub
4074	subcc	%i5,	8,	%l4	! cnt=num-1 and clear CCR.xcc
4075
4076.align	16
4077.Lsub:
4078	ldx	[%l5],		%o7
4079	add	%l5,	8,	%l5
4080	ldx	[%i3+0],	%o4
4081	add	%i3,	8,	%i3
4082	subccc	%o7,	%o4,	%l2	! tp[j]-np[j]
4083	srlx	%o7,	32,	%o7
4084	srlx	%o4,	32,	%o4
4085	subccc	%o7,	%o4,	%l3
4086	add	%i0,	8,	%i0
4087	st	%l2,	[%i0-4]		! reverse order
4088	st	%l3,	[%i0-8]
4089	brnz,pt	%l4,	.Lsub
4090	sub	%l4,	8,	%l4
4091
4092	sub	%i3,	%i5,	%i3	! rewind
4093	sub	%l5,	%i5,	%l5
4094	sub	%i0,	%i5,	%i0
4095
4096	subccc	%l0,	%g0,	%l0	! handle upmost overflow bit
4097	ba	.Lcopy
4098	sub	%i5,	8,	%l4
4099
4100.align	16
4101.Lcopy:					! conditional copy
4102	ldx	[%l5],		%o7
4103	ldx	[%i0+0],	%l2
4104	stx	%g0,	[%l5]		! zap
4105	add	%l5,	8,	%l5
4106	movcs	%icc,	%o7,	%l2
4107	stx	%l2,	[%i0+0]
4108	add	%i0,	8,	%i0
4109	brnz	%l4,	.Lcopy
4110	sub	%l4,	8,	%l4
4111
4112	mov	1,	%o0
4113	ret
4114	restore
4115.type	bn_mul_mont_t4, #function
4116.size	bn_mul_mont_t4, .-bn_mul_mont_t4
4117.globl	bn_mul_mont_gather5_t4
4118.align	32
4119bn_mul_mont_gather5_t4:
4120	add	%sp,	STACK_BIAS,	%g4	! real top of stack
4121	sll	%o5,	3,	%o5		! size in bytes
4122	add	%o5,	63,	%g1
4123	andn	%g1,	63,	%g1		! buffer size rounded up to 64 bytes
4124	sub	%g4,	%g1,	%g1
4125	andn	%g1,	63,	%g1		! align at 64 byte
4126	sub	%g1,	STACK_FRAME,	%g1	! new top of stack
4127	sub	%g1,	%g4,	%g1
4128	LDPTR	[%sp+STACK_7thARG],	%g4	! load power, 7th argument
4129
4130	save	%sp,	%g1,	%sp
4131	srl	%g4,	2,	%o4
4132	and	%g4,	3,	%o5
4133	and	%o4,	7,	%o4
4134	sll	%o5,	3,	%o5	! offset within first cache line
4135	add	%o5,	%i2,	%i2	! of the pwrtbl
4136	or	%g0,	1,	%o5
4137	sll	%o5,	%o4,	%l7
4138	wr	%l7,	%g0,	%ccr
4139	ldx	[%i2+0*32],	%g2
4140	ldx	[%i2+1*32],	%o4
4141	ldx	[%i2+2*32],	%o5
4142	movvs	%icc,	%o4,	%g2
4143	ldx	[%i2+3*32],	%o4
4144	move	%icc,	%o5,	%g2
4145	ldx	[%i2+4*32],	%o5
4146	movneg	%icc,	%o4,	%g2
4147	ldx	[%i2+5*32],	%o4
4148	movcs	%xcc,	%o5,	%g2
4149	ldx	[%i2+6*32],	%o5
4150	movvs	%xcc,	%o4,	%g2
4151	ldx	[%i2+7*32],	%o4
4152	move	%xcc,	%o5,	%g2
4153	add	%i2,8*32,	%i2
4154	movneg	%xcc,	%o4,	%g2
4155	ld	[%i4+0],	%l0	! pull n0[0..1] value
4156	ld	[%i4+4],	%l1
4157	add	%sp, STACK_BIAS+STACK_FRAME, %l5
4158	sllx	%l1,	32,	%g1
4159	or	%l0,	%g1,	%g1
4160
4161	ldx	[%i1+0],	%o2	! ap[0]
4162
4163	mulx	%o2,	%g2,	%g4	! ap[0]*bp[0]
4164	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
4165
4166	ldx	[%i1+8],	%o2	! ap[1]
4167	add	%i1,	16,	%i1
4168	ldx	[%i3+0],	%o4	! np[0]
4169
4170	mulx	%g4,	%g1,	%g3	! "tp[0]"*n0
4171
4172	mulx	%o2,	%g2,	%o3	! ap[1]*bp[0]
4173	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4174
4175	mulx	%o4,	%g3,	%o0	! np[0]*m1
4176	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
4177
4178	ldx	[%i3+8],	%o4	! np[1]
4179
4180	addcc	%g4,	%o0,	%o0
4181	add	%i3,	16,	%i3
4182	.word	0x93b00229 !addxc	%g0,%o1,%o1
4183
4184	mulx	%o4,	%g3,	%o5	! np[1]*m1
4185	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4186
4187	ba	.L1st_g5
4188	sub	%i5,	24,	%l4	! cnt=num-3
4189
4190.align	16
4191.L1st_g5:
4192	addcc	%o3,	%g5,	%g4
4193	.word	0x8bb28220 !addxc	%o2,%g0,%g5
4194
4195	ldx	[%i1+0],	%o2	! ap[j]
4196	addcc	%o5,	%o1,	%o0
4197	add	%i1,	8,	%i1
4198	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4199
4200	ldx	[%i3+0],	%o4	! np[j]
4201	mulx	%o2,	%g2,	%o3	! ap[j]*bp[0]
4202	add	%i3,	8,	%i3
4203	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4204
4205	mulx	%o4,	%g3,	%o5	! np[j]*m1
4206	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
4207	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4208	.word	0x93b00229 !addxc	%g0,%o1,%o1
4209	stxa	%o0,	[%l5]0xe2	! tp[j-1]
4210	add	%l5,	8,	%l5	! tp++
4211
4212	brnz,pt	%l4,	.L1st_g5
4213	sub	%l4,	8,	%l4	! j--
4214!.L1st_g5
4215	addcc	%o3,	%g5,	%g4
4216	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4217
4218	addcc	%o5,	%o1,	%o0
4219	.word	0x93b30220 !addxc	%o4,%g0,%o1
4220	addcc	%g4,	%o0,	%o0	! np[j]*m1+ap[j]*bp[0]
4221	.word	0x93b00229 !addxc	%g0,%o1,%o1
4222	stxa	%o0,	[%l5]0xe2	! tp[j-1]
4223	add	%l5,	8,	%l5
4224
4225	addcc	%g5,	%o1,	%o1
4226	.word	0xa1b00220 !addxc	%g0,%g0,%l0	! upmost overflow bit
4227	stxa	%o1,	[%l5]0xe2
4228	add	%l5,	8,	%l5
4229
4230	ba	.Louter_g5
4231	sub	%i5,	16,	%l1	! i=num-2
4232
4233.align	16
4234.Louter_g5:
4235	wr	%l7,	%g0,	%ccr
4236	ldx	[%i2+0*32],	%g2
4237	ldx	[%i2+1*32],	%o4
4238	ldx	[%i2+2*32],	%o5
4239	movvs	%icc,	%o4,	%g2
4240	ldx	[%i2+3*32],	%o4
4241	move	%icc,	%o5,	%g2
4242	ldx	[%i2+4*32],	%o5
4243	movneg	%icc,	%o4,	%g2
4244	ldx	[%i2+5*32],	%o4
4245	movcs	%xcc,	%o5,	%g2
4246	ldx	[%i2+6*32],	%o5
4247	movvs	%xcc,	%o4,	%g2
4248	ldx	[%i2+7*32],	%o4
4249	move	%xcc,	%o5,	%g2
4250	add	%i2,8*32,	%i2
4251	movneg	%xcc,	%o4,	%g2
4252	sub	%i1,	%i5,	%i1	! rewind
4253	sub	%i3,	%i5,	%i3
4254	sub	%l5,	%i5,	%l5
4255
4256	ldx	[%i1+0],	%o2	! ap[0]
4257	ldx	[%i3+0],	%o4	! np[0]
4258
4259	mulx	%o2,	%g2,	%g4	! ap[0]*bp[i]
4260	ldx	[%l5],		%o7	! tp[0]
4261	.word	0x8bb282c2 !umulxhi	%o2,%g2,%g5
4262	ldx	[%i1+8],	%o2	! ap[1]
4263	addcc	%g4,	%o7,	%g4	! ap[0]*bp[i]+tp[0]
4264	mulx	%o2,	%g2,	%o3	! ap[1]*bp[i]
4265	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4266	mulx	%g4,	%g1,	%g3	! tp[0]*n0
4267	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4268	mulx	%o4,	%g3,	%o0	! np[0]*m1
4269	add	%i1,	16,	%i1
4270	.word	0x93b302c3 !umulxhi	%o4,%g3,%o1
4271	ldx	[%i3+8],	%o4	! np[1]
4272	add	%i3,	16,	%i3
4273	addcc	%o0,	%g4,	%o0
4274	mulx	%o4,	%g3,	%o5	! np[1]*m1
4275	.word	0x93b00229 !addxc	%g0,%o1,%o1
4276	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4277
4278	ba	.Linner_g5
4279	sub	%i5,	24,	%l4	! cnt=num-3
4280.align	16
4281.Linner_g5:
4282	addcc	%o3,	%g5,	%g4
4283	ldx	[%l5+8],	%o7	! tp[j]
4284	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4285	ldx	[%i1+0],	%o2	! ap[j]
4286	add	%i1,	8,	%i1
4287	addcc	%o5,	%o1,	%o0
4288	mulx	%o2,	%g2,	%o3	! ap[j]*bp[i]
4289	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4290	ldx	[%i3+0],	%o4	! np[j]
4291	add	%i3,	8,	%i3
4292	.word	0x95b282c2 !umulxhi	%o2,%g2,%o2	! ahi=aj
4293	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4294	mulx	%o4,	%g3,	%o5	! np[j]*m1
4295	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4296	.word	0x99b302c3 !umulxhi	%o4,%g3,%o4	! nhi=nj
4297	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4298	.word	0x93b00229 !addxc	%g0,%o1,%o1
4299	stx	%o0,	[%l5]		! tp[j-1]
4300	add	%l5,	8,	%l5
4301	brnz,pt	%l4,	.Linner_g5
4302	sub	%l4,	8,	%l4
4303!.Linner_g5
4304	ldx	[%l5+8],	%o7	! tp[j]
4305	addcc	%o3,	%g5,	%g4
4306	.word	0x8bb28220 !addxc	%o2,%g0,%g5	! ahi=aj
4307	addcc	%g4,	%o7,	%g4	! ap[j]*bp[i]+tp[j]
4308	.word	0x8bb00225 !addxc	%g0,%g5,%g5
4309
4310	addcc	%o5,	%o1,	%o0
4311	.word	0x93b30220 !addxc	%o4,%g0,%o1	! nhi=nj
4312	addcc	%o0,	%g4,	%o0	! np[j]*m1+ap[j]*bp[i]+tp[j]
4313	.word	0x93b00229 !addxc	%g0,%o1,%o1
4314	stx	%o0,	[%l5]		! tp[j-1]
4315
4316	subcc	%g0,	%l0,	%g0	! move upmost overflow to CCR.xcc
4317	.word	0x93b24265 !addxccc	%o1,%g5,%o1
4318	.word	0xa1b00220 !addxc	%g0,%g0,%l0
4319	stx	%o1,	[%l5+8]
4320	add	%l5,	16,	%l5
4321
4322	brnz,pt	%l1,	.Louter_g5
4323	sub	%l1,	8,	%l1
4324
4325	sub	%i1,	%i5,	%i1	! rewind
4326	sub	%i3,	%i5,	%i3
4327	sub	%l5,	%i5,	%l5
4328	ba	.Lsub_g5
4329	subcc	%i5,	8,	%l4	! cnt=num-1 and clear CCR.xcc
4330
4331.align	16
4332.Lsub_g5:
4333	ldx	[%l5],		%o7
4334	add	%l5,	8,	%l5
4335	ldx	[%i3+0],	%o4
4336	add	%i3,	8,	%i3
4337	subccc	%o7,	%o4,	%l2	! tp[j]-np[j]
4338	srlx	%o7,	32,	%o7
4339	srlx	%o4,	32,	%o4
4340	subccc	%o7,	%o4,	%l3
4341	add	%i0,	8,	%i0
4342	st	%l2,	[%i0-4]		! reverse order
4343	st	%l3,	[%i0-8]
4344	brnz,pt	%l4,	.Lsub_g5
4345	sub	%l4,	8,	%l4
4346
4347	sub	%i3,	%i5,	%i3	! rewind
4348	sub	%l5,	%i5,	%l5
4349	sub	%i0,	%i5,	%i0
4350
4351	subccc	%l0,	%g0,	%l0	! handle upmost overflow bit
4352	ba	.Lcopy_g5
4353	sub	%i5,	8,	%l4
4354
4355.align	16
4356.Lcopy_g5:				! conditional copy
4357	ldx	[%l5],		%o7
4358	ldx	[%i0+0],	%l2
4359	stx	%g0,	[%l5]		! zap
4360	add	%l5,	8,	%l5
4361	movcs	%icc,	%o7,	%l2
4362	stx	%l2,	[%i0+0]
4363	add	%i0,	8,	%i0
4364	brnz	%l4,	.Lcopy_g5
4365	sub	%l4,	8,	%l4
4366
4367	mov	1,	%o0
4368	ret
4369	restore
4370.type	bn_mul_mont_gather5_t4, #function
4371.size	bn_mul_mont_gather5_t4, .-bn_mul_mont_gather5_t4
4372.globl	bn_flip_t4
4373.align	32
4374bn_flip_t4:
4375.Loop_flip:
4376	ld	[%o1+0],	%o4
4377	sub	%o2,	1,	%o2
4378	ld	[%o1+4],	%o5
4379	add	%o1,	8,	%o1
4380	st	%o5,	[%o0+0]
4381	st	%o4,	[%o0+4]
4382	brnz	%o2,	.Loop_flip
4383	add	%o0,	8,	%o0
4384	retl
4385	nop
4386.type	bn_flip_t4, #function
4387.size	bn_flip_t4, .-bn_flip_t4
4388
4389.globl	bn_flip_n_scatter5_t4
4390.align	32
4391bn_flip_n_scatter5_t4:
4392	sll	%o3,	3,	%o3
4393	srl	%o1,	1,	%o1
4394	add	%o3,	%o2,	%o2	! &pwrtbl[pwr]
4395	sub	%o1,	1,	%o1
4396.Loop_flip_n_scatter5:
4397	ld	[%o0+0],	%o4	! inp[i]
4398	ld	[%o0+4],	%o5
4399	add	%o0,	8,	%o0
4400	sllx	%o5,	32,	%o5
4401	or	%o4,	%o5,	%o5
4402	stx	%o5,	[%o2]
4403	add	%o2,	32*8,	%o2
4404	brnz	%o1,	.Loop_flip_n_scatter5
4405	sub	%o1,	1,	%o1
4406	retl
4407	nop
4408.type	bn_flip_n_scatter5_t4, #function
4409.size	bn_flip_n_scatter5_t4, .-bn_flip_n_scatter5_t4
4410
4411.globl	bn_gather5_t4
4412.align	32
4413bn_gather5_t4:
4414	srl	%o3,	2,	%o4
4415	and	%o3,	3,	%o5
4416	and	%o4,	7,	%o4
4417	sll	%o5,	3,	%o5	! offset within first cache line
4418	add	%o5,	%o2,	%o2	! of the pwrtbl
4419	or	%g0,	1,	%o5
4420	sll	%o5,	%o4,	%g1
4421	wr	%g1,	%g0,	%ccr
4422	sub	%o1,	1,	%o1
4423.Loop_gather5:
4424	ldx	[%o2+0*32],	%g1
4425	ldx	[%o2+1*32],	%o4
4426	ldx	[%o2+2*32],	%o5
4427	movvs	%icc,	%o4,	%g1
4428	ldx	[%o2+3*32],	%o4
4429	move	%icc,	%o5,	%g1
4430	ldx	[%o2+4*32],	%o5
4431	movneg	%icc,	%o4,	%g1
4432	ldx	[%o2+5*32],	%o4
4433	movcs	%xcc,	%o5,	%g1
4434	ldx	[%o2+6*32],	%o5
4435	movvs	%xcc,	%o4,	%g1
4436	ldx	[%o2+7*32],	%o4
4437	move	%xcc,	%o5,	%g1
4438	add	%o2,8*32,	%o2
4439	movneg	%xcc,	%o4,	%g1
4440	stx	%g1,	[%o0]
4441	add	%o0,	8,	%o0
4442	brnz	%o1,	.Loop_gather5
4443	sub	%o1,	1,	%o1
4444
4445	retl
4446	nop
4447.type	bn_gather5_t4, #function
4448.size	bn_gather5_t4, .-bn_gather5_t4
4449
4450.asciz	"Montgomery Multiplication for SPARC T4, David S. Miller, Andy Polyakov"
4451.align	4
4452