xref: /freebsd/sys/crypto/openssl/powerpc/ppc-mont.S (revision 1d386b48)
1/* Do not modify. This file is auto-generated from ppc-mont.pl. */
2.machine	"any"
3.text
4
5.globl	bn_mul_mont_int
6.type	bn_mul_mont_int,@function
7.align	5
8bn_mul_mont_int:
9	mr	9,3
10	li	3,0
11	cmpwi	8,32
12	bgelr
13	slwi	8,8,2
14	li	12,-4096
15	addi	3,8,256
16	subf	3,3,1
17	and	3,3,12
18	subf	3,1,3
19	mr	12,1
20	srwi	8,8,2
21	stwux	1,1,3
22
23	stw	20,-48(12)
24	stw	21,-44(12)
25	stw	22,-40(12)
26	stw	23,-36(12)
27	stw	24,-32(12)
28	stw	25,-28(12)
29	stw	26,-24(12)
30	stw	27,-20(12)
31	stw	28,-16(12)
32	stw	29,-12(12)
33	stw	30,-8(12)
34	stw	31,-4(12)
35
36	lwz	7,0(7)
37	addi	8,8,-2
38
39	lwz	23,0(5)
40	lwz	10,0(4)
41	addi	22,1,32
42	mullw	25,10,23
43	mulhwu	26,10,23
44
45	lwz	10,4(4)
46	lwz	11,0(6)
47
48	mullw	24,25,7
49
50	mullw	29,10,23
51	mulhwu	30,10,23
52
53	mullw	27,11,24
54	mulhwu	28,11,24
55	lwz	11,4(6)
56	addc	27,27,25
57	addze	28,28
58
59	mullw	31,11,24
60	mulhwu	0,11,24
61
62	mtctr	8
63	li	21,8
64.align	4
65.L1st:
66	lwzx	10,4,21
67	addc	25,29,26
68	lwzx	11,6,21
69	addze	26,30
70	mullw	29,10,23
71	addc	27,31,28
72	mulhwu	30,10,23
73	addze	28,0
74	mullw	31,11,24
75	addc	27,27,25
76	mulhwu	0,11,24
77	addze	28,28
78	stw	27,0(22)
79
80	addi	21,21,4
81	addi	22,22,4
82	bdnz	.L1st
83
84	addc	25,29,26
85	addze	26,30
86
87	addc	27,31,28
88	addze	28,0
89	addc	27,27,25
90	addze	28,28
91	stw	27,0(22)
92
93	li	3,0
94	addc	28,28,26
95	addze	3,3
96	stw	28,4(22)
97
98	li	20,4
99.align	4
100.Louter:
101	lwzx	23,5,20
102	lwz	10,0(4)
103	addi	22,1,32
104	lwz	12,32(1)
105	mullw	25,10,23
106	mulhwu	26,10,23
107	lwz	10,4(4)
108	lwz	11,0(6)
109	addc	25,25,12
110	mullw	29,10,23
111	addze	26,26
112	mullw	24,25,7
113	mulhwu	30,10,23
114	mullw	27,11,24
115	mulhwu	28,11,24
116	lwz	11,4(6)
117	addc	27,27,25
118	mullw	31,11,24
119	addze	28,28
120	mulhwu	0,11,24
121
122	mtctr	8
123	li	21,8
124.align	4
125.Linner:
126	lwzx	10,4,21
127	addc	25,29,26
128	lwz	12,4(22)
129	addze	26,30
130	lwzx	11,6,21
131	addc	27,31,28
132	mullw	29,10,23
133	addze	28,0
134	mulhwu	30,10,23
135	addc	25,25,12
136	mullw	31,11,24
137	addze	26,26
138	mulhwu	0,11,24
139	addc	27,27,25
140	addi	21,21,4
141	addze	28,28
142	stw	27,0(22)
143	addi	22,22,4
144	bdnz	.Linner
145
146	lwz	12,4(22)
147	addc	25,29,26
148	addze	26,30
149	addc	25,25,12
150	addze	26,26
151
152	addc	27,31,28
153	addze	28,0
154	addc	27,27,25
155	addze	28,28
156	stw	27,0(22)
157
158	addic	3,3,-1
159	li	3,0
160	adde	28,28,26
161	addze	3,3
162	stw	28,4(22)
163
164	slwi	12,8,2
165	.long	0x7c146040
166	addi	20,20,4
167	ble	.Louter
168
169	addi	8,8,2
170	subfc	21,21,21
171	addi	22,1,32
172	mtctr	8
173
174.align	4
175.Lsub:	lwzx	12,22,21
176	lwzx	11,6,21
177	subfe	10,11,12
178	stwx	10,9,21
179	addi	21,21,4
180	bdnz	.Lsub
181
182	li	21,0
183	mtctr	8
184	subfe	3,21,3
185
186.align	4
187.Lcopy:
188	lwzx	12,22,21
189	lwzx	10,9,21
190	and	12,12,3
191	andc	10,10,3
192	stwx	21,22,21
193	or	10,10,12
194	stwx	10,9,21
195	addi	21,21,4
196	bdnz	.Lcopy
197
198	lwz	12,0(1)
199	li	3,1
200	lwz	20,-48(12)
201	lwz	21,-44(12)
202	lwz	22,-40(12)
203	lwz	23,-36(12)
204	lwz	24,-32(12)
205	lwz	25,-28(12)
206	lwz	26,-24(12)
207	lwz	27,-20(12)
208	lwz	28,-16(12)
209	lwz	29,-12(12)
210	lwz	30,-8(12)
211	lwz	31,-4(12)
212	mr	1,12
213	blr
214.long	0
215.byte	0,12,4,0,0x80,12,6,0
216.long	0
217.size	bn_mul_mont_int,.-bn_mul_mont_int
218.globl	bn_mul4x_mont_int
219.type	bn_mul4x_mont_int,@function
220.align	5
221bn_mul4x_mont_int:
222	andi.	0,8,7
223	bne	.Lmul4x_do
224	.long	0x7c042840
225	bne	.Lmul4x_do
226	b	.Lsqr8x_do
227.Lmul4x_do:
228	slwi	8,8,2
229	mr	9,1
230	li	10,-32*4
231	sub	10,10,8
232	stwux	1,1,10
233
234	stw	14,-4*18(9)
235	stw	15,-4*17(9)
236	stw	16,-4*16(9)
237	stw	17,-4*15(9)
238	stw	18,-4*14(9)
239	stw	19,-4*13(9)
240	stw	20,-4*12(9)
241	stw	21,-4*11(9)
242	stw	22,-4*10(9)
243	stw	23,-4*9(9)
244	stw	24,-4*8(9)
245	stw	25,-4*7(9)
246	stw	26,-4*6(9)
247	stw	27,-4*5(9)
248	stw	28,-4*4(9)
249	stw	29,-4*3(9)
250	stw	30,-4*2(9)
251	stw	31,-4*1(9)
252
253	subi	4,4,4
254	subi	6,6,4
255	subi	3,3,4
256	lwz	7,0(7)
257
258	add	14,5,8
259	add	30,4,8
260	subi	14,14,4*4
261
262	lwz	27,4*0(5)
263	li	22,0
264	lwz	9,4*1(4)
265	li	23,0
266	lwz	10,4*2(4)
267	li	24,0
268	lwz	11,4*3(4)
269	li	25,0
270	lwzu	12,4*4(4)
271	lwz	18,4*1(6)
272	lwz	19,4*2(6)
273	lwz	20,4*3(6)
274	lwzu	21,4*4(6)
275
276	stw	3,4*6(1)
277	stw	14,4*7(1)
278	li	3,0
279	addic	29,1,4*7
280	li	31,0
281	li	0,0
282	b	.Loop_mul4x_1st_reduction
283
284.align	5
285.Loop_mul4x_1st_reduction:
286	mullw	14,9,27
287	addze	3,3
288	mullw	15,10,27
289	addi	31,31,4
290	mullw	16,11,27
291	andi.	31,31,4*4-1
292	mullw	17,12,27
293	addc	22,22,14
294	mulhwu	14,9,27
295	adde	23,23,15
296	mulhwu	15,10,27
297	adde	24,24,16
298	mullw	28,22,7
299	adde	25,25,17
300	mulhwu	16,11,27
301	addze	26,0
302	mulhwu	17,12,27
303	lwzx	27,5,31
304	addc	23,23,14
305
306	stwu	28,4(29)
307	adde	24,24,15
308	mullw	15,19,28
309	adde	25,25,16
310	mullw	16,20,28
311	adde	26,26,17
312	mullw	17,21,28
313
314
315
316
317
318
319
320
321
322
323	addic	22,22,-1
324	mulhwu	14,18,28
325	adde	22,23,15
326	mulhwu	15,19,28
327	adde	23,24,16
328	mulhwu	16,20,28
329	adde	24,25,17
330	mulhwu	17,21,28
331	adde	25,26,3
332	addze	3,0
333	addc	22,22,14
334	adde	23,23,15
335	adde	24,24,16
336	adde	25,25,17
337
338	bne	.Loop_mul4x_1st_reduction
339
340	.long	0x7c1e2040
341	beq	.Lmul4x4_post_condition
342
343	lwz	9,4*1(4)
344	lwz	10,4*2(4)
345	lwz	11,4*3(4)
346	lwzu	12,4*4(4)
347	lwz	28,4*8(1)
348	lwz	18,4*1(6)
349	lwz	19,4*2(6)
350	lwz	20,4*3(6)
351	lwzu	21,4*4(6)
352	b	.Loop_mul4x_1st_tail
353
354.align	5
355.Loop_mul4x_1st_tail:
356	mullw	14,9,27
357	addze	3,3
358	mullw	15,10,27
359	addi	31,31,4
360	mullw	16,11,27
361	andi.	31,31,4*4-1
362	mullw	17,12,27
363	addc	22,22,14
364	mulhwu	14,9,27
365	adde	23,23,15
366	mulhwu	15,10,27
367	adde	24,24,16
368	mulhwu	16,11,27
369	adde	25,25,17
370	mulhwu	17,12,27
371	addze	26,0
372	lwzx	27,5,31
373	addc	23,23,14
374	mullw	14,18,28
375	adde	24,24,15
376	mullw	15,19,28
377	adde	25,25,16
378	mullw	16,20,28
379	adde	26,26,17
380	mullw	17,21,28
381	addc	22,22,14
382	mulhwu	14,18,28
383	adde	23,23,15
384	mulhwu	15,19,28
385	adde	24,24,16
386	mulhwu	16,20,28
387	adde	25,25,17
388	adde	26,26,3
389	mulhwu	17,21,28
390	addze	3,0
391	addi	28,1,4*8
392	lwzx	28,28,31
393	stwu	22,4(29)
394	addc	22,23,14
395	adde	23,24,15
396	adde	24,25,16
397	adde	25,26,17
398
399	bne	.Loop_mul4x_1st_tail
400
401	sub	15,30,8
402	.long	0x7c1e2040
403	beq	.Lmul4x_proceed
404
405	lwz	9,4*1(4)
406	lwz	10,4*2(4)
407	lwz	11,4*3(4)
408	lwzu	12,4*4(4)
409	lwz	18,4*1(6)
410	lwz	19,4*2(6)
411	lwz	20,4*3(6)
412	lwzu	21,4*4(6)
413	b	.Loop_mul4x_1st_tail
414
415.align	5
416.Lmul4x_proceed:
417	lwzu	27,4*4(5)
418	addze	3,3
419	lwz	9,4*1(15)
420	lwz	10,4*2(15)
421	lwz	11,4*3(15)
422	lwz	12,4*4(15)
423	addi	4,15,4*4
424	sub	6,6,8
425
426	stw	22,4*1(29)
427	stw	23,4*2(29)
428	stw	24,4*3(29)
429	stw	25,4*4(29)
430	stw	3,4*5(29)
431	lwz	22,4*12(1)
432	lwz	23,4*13(1)
433	lwz	24,4*14(1)
434	lwz	25,4*15(1)
435
436	lwz	18,4*1(6)
437	lwz	19,4*2(6)
438	lwz	20,4*3(6)
439	lwzu	21,4*4(6)
440	addic	29,1,4*7
441	li	3,0
442	b	.Loop_mul4x_reduction
443
444.align	5
445.Loop_mul4x_reduction:
446	mullw	14,9,27
447	addze	3,3
448	mullw	15,10,27
449	addi	31,31,4
450	mullw	16,11,27
451	andi.	31,31,4*4-1
452	mullw	17,12,27
453	addc	22,22,14
454	mulhwu	14,9,27
455	adde	23,23,15
456	mulhwu	15,10,27
457	adde	24,24,16
458	mullw	28,22,7
459	adde	25,25,17
460	mulhwu	16,11,27
461	addze	26,0
462	mulhwu	17,12,27
463	lwzx	27,5,31
464	addc	23,23,14
465
466	stwu	28,4(29)
467	adde	24,24,15
468	mullw	15,19,28
469	adde	25,25,16
470	mullw	16,20,28
471	adde	26,26,17
472	mullw	17,21,28
473
474	addic	22,22,-1
475	mulhwu	14,18,28
476	adde	22,23,15
477	mulhwu	15,19,28
478	adde	23,24,16
479	mulhwu	16,20,28
480	adde	24,25,17
481	mulhwu	17,21,28
482	adde	25,26,3
483	addze	3,0
484	addc	22,22,14
485	adde	23,23,15
486	adde	24,24,16
487	adde	25,25,17
488
489	bne	.Loop_mul4x_reduction
490
491	lwz	14,4*5(29)
492	addze	3,3
493	lwz	15,4*6(29)
494	lwz	16,4*7(29)
495	lwz	17,4*8(29)
496	lwz	9,4*1(4)
497	lwz	10,4*2(4)
498	lwz	11,4*3(4)
499	lwzu	12,4*4(4)
500	addc	22,22,14
501	adde	23,23,15
502	adde	24,24,16
503	adde	25,25,17
504
505
506	lwz	28,4*8(1)
507	lwz	18,4*1(6)
508	lwz	19,4*2(6)
509	lwz	20,4*3(6)
510	lwzu	21,4*4(6)
511	b	.Loop_mul4x_tail
512
513.align	5
514.Loop_mul4x_tail:
515	mullw	14,9,27
516	addze	3,3
517	mullw	15,10,27
518	addi	31,31,4
519	mullw	16,11,27
520	andi.	31,31,4*4-1
521	mullw	17,12,27
522	addc	22,22,14
523	mulhwu	14,9,27
524	adde	23,23,15
525	mulhwu	15,10,27
526	adde	24,24,16
527	mulhwu	16,11,27
528	adde	25,25,17
529	mulhwu	17,12,27
530	addze	26,0
531	lwzx	27,5,31
532	addc	23,23,14
533	mullw	14,18,28
534	adde	24,24,15
535	mullw	15,19,28
536	adde	25,25,16
537	mullw	16,20,28
538	adde	26,26,17
539	mullw	17,21,28
540	addc	22,22,14
541	mulhwu	14,18,28
542	adde	23,23,15
543	mulhwu	15,19,28
544	adde	24,24,16
545	mulhwu	16,20,28
546	adde	25,25,17
547	mulhwu	17,21,28
548	adde	26,26,3
549	addi	28,1,4*8
550	lwzx	28,28,31
551	addze	3,0
552	stwu	22,4(29)
553	addc	22,23,14
554	adde	23,24,15
555	adde	24,25,16
556	adde	25,26,17
557
558	bne	.Loop_mul4x_tail
559
560	lwz	14,4*5(29)
561	sub	15,6,8
562	addze	3,3
563	.long	0x7c1e2040
564	beq	.Loop_mul4x_break
565
566	lwz	15,4*6(29)
567	lwz	16,4*7(29)
568	lwz	17,4*8(29)
569	lwz	9,4*1(4)
570	lwz	10,4*2(4)
571	lwz	11,4*3(4)
572	lwzu	12,4*4(4)
573	addc	22,22,14
574	adde	23,23,15
575	adde	24,24,16
576	adde	25,25,17
577
578
579	lwz	18,4*1(6)
580	lwz	19,4*2(6)
581	lwz	20,4*3(6)
582	lwzu	21,4*4(6)
583	b	.Loop_mul4x_tail
584
585.align	5
586.Loop_mul4x_break:
587	lwz	16,4*6(1)
588	lwz	17,4*7(1)
589	addc	9,22,14
590	lwz	22,4*12(1)
591	addze	10,23
592	lwz	23,4*13(1)
593	addze	11,24
594	lwz	24,4*14(1)
595	addze	12,25
596	lwz	25,4*15(1)
597	addze	3,3
598	stw	9,4*1(29)
599	sub	4,30,8
600	stw	10,4*2(29)
601	stw	11,4*3(29)
602	stw	12,4*4(29)
603	stw	3,4*5(29)
604
605	lwz	18,4*1(15)
606	lwz	19,4*2(15)
607	lwz	20,4*3(15)
608	lwz	21,4*4(15)
609	addi	6,15,4*4
610	.long	0x7c058840
611	beq	.Lmul4x_post
612
613	lwzu	27,4*4(5)
614	lwz	9,4*1(4)
615	lwz	10,4*2(4)
616	lwz	11,4*3(4)
617	lwzu	12,4*4(4)
618	li	3,0
619	addic	29,1,4*7
620	b	.Loop_mul4x_reduction
621
622.align	5
623.Lmul4x_post:
624
625
626
627
628	srwi	31,8,4
629	mr	5,16
630	subi	31,31,1
631	mr	30,16
632	subfc	14,18,22
633	addi	29,1,4*15
634	subfe	15,19,23
635
636	mtctr	31
637.Lmul4x_sub:
638	lwz	18,4*1(6)
639	lwz	22,4*1(29)
640	subfe	16,20,24
641	lwz	19,4*2(6)
642	lwz	23,4*2(29)
643	subfe	17,21,25
644	lwz	20,4*3(6)
645	lwz	24,4*3(29)
646	lwzu	21,4*4(6)
647	lwzu	25,4*4(29)
648	stw	14,4*1(5)
649	stw	15,4*2(5)
650	subfe	14,18,22
651	stw	16,4*3(5)
652	stwu	17,4*4(5)
653	subfe	15,19,23
654	bdnz	.Lmul4x_sub
655
656	lwz	9,4*1(30)
657	stw	14,4*1(5)
658	lwz	14,4*12(1)
659	subfe	16,20,24
660	lwz	10,4*2(30)
661	stw	15,4*2(5)
662	lwz	15,4*13(1)
663	subfe	17,21,25
664	subfe	3,0,3
665	addi	29,1,4*12
666	lwz	11,4*3(30)
667	stw	16,4*3(5)
668	lwz	16,4*14(1)
669	lwz	12,4*4(30)
670	stw	17,4*4(5)
671	lwz	17,4*15(1)
672
673	mtctr	31
674.Lmul4x_cond_copy:
675	and	14,14,3
676	andc	9,9,3
677	stw	0,4*0(29)
678	and	15,15,3
679	andc	10,10,3
680	stw	0,4*1(29)
681	and	16,16,3
682	andc	11,11,3
683	stw	0,4*2(29)
684	and	17,17,3
685	andc	12,12,3
686	stw	0,4*3(29)
687	or	22,14,9
688	lwz	9,4*5(30)
689	lwz	14,4*4(29)
690	or	23,15,10
691	lwz	10,4*6(30)
692	lwz	15,4*5(29)
693	or	24,16,11
694	lwz	11,4*7(30)
695	lwz	16,4*6(29)
696	or	25,17,12
697	lwz	12,4*8(30)
698	lwz	17,4*7(29)
699	addi	29,29,4*4
700	stw	22,4*1(30)
701	stw	23,4*2(30)
702	stw	24,4*3(30)
703	stwu	25,4*4(30)
704	bdnz	.Lmul4x_cond_copy
705
706	lwz	5,0(1)
707	and	14,14,3
708	andc	9,9,3
709	stw	0,4*0(29)
710	and	15,15,3
711	andc	10,10,3
712	stw	0,4*1(29)
713	and	16,16,3
714	andc	11,11,3
715	stw	0,4*2(29)
716	and	17,17,3
717	andc	12,12,3
718	stw	0,4*3(29)
719	or	22,14,9
720	or	23,15,10
721	stw	0,4*4(29)
722	or	24,16,11
723	or	25,17,12
724	stw	22,4*1(30)
725	stw	23,4*2(30)
726	stw	24,4*3(30)
727	stw	25,4*4(30)
728
729	b	.Lmul4x_done
730
731.align	4
732.Lmul4x4_post_condition:
733	lwz	4,4*6(1)
734	lwz	5,0(1)
735	addze	3,3
736
737	subfc	9,18,22
738	subfe	10,19,23
739	subfe	11,20,24
740	subfe	12,21,25
741	subfe	3,0,3
742
743	and	18,18,3
744	and	19,19,3
745	addc	9,9,18
746	and	20,20,3
747	adde	10,10,19
748	and	21,21,3
749	adde	11,11,20
750	adde	12,12,21
751
752	stw	9,4*1(4)
753	stw	10,4*2(4)
754	stw	11,4*3(4)
755	stw	12,4*4(4)
756
757.Lmul4x_done:
758	stw	0,4*8(1)
759	stw	0,4*9(1)
760	stw	0,4*10(1)
761	stw	0,4*11(1)
762	li	3,1
763	lwz	14,-4*18(5)
764	lwz	15,-4*17(5)
765	lwz	16,-4*16(5)
766	lwz	17,-4*15(5)
767	lwz	18,-4*14(5)
768	lwz	19,-4*13(5)
769	lwz	20,-4*12(5)
770	lwz	21,-4*11(5)
771	lwz	22,-4*10(5)
772	lwz	23,-4*9(5)
773	lwz	24,-4*8(5)
774	lwz	25,-4*7(5)
775	lwz	26,-4*6(5)
776	lwz	27,-4*5(5)
777	lwz	28,-4*4(5)
778	lwz	29,-4*3(5)
779	lwz	30,-4*2(5)
780	lwz	31,-4*1(5)
781	mr	1,5
782	blr
783.long	0
784.byte	0,12,4,0x20,0x80,18,6,0
785.long	0
786.size	bn_mul4x_mont_int,.-bn_mul4x_mont_int
787.align	5
788__bn_sqr8x_mont:
789.Lsqr8x_do:
790	mr	9,1
791	slwi	10,8,3
792	li	11,-32*4
793	sub	10,11,10
794	slwi	8,8,2
795	stwux	1,1,10
796
797	stw	14,-4*18(9)
798	stw	15,-4*17(9)
799	stw	16,-4*16(9)
800	stw	17,-4*15(9)
801	stw	18,-4*14(9)
802	stw	19,-4*13(9)
803	stw	20,-4*12(9)
804	stw	21,-4*11(9)
805	stw	22,-4*10(9)
806	stw	23,-4*9(9)
807	stw	24,-4*8(9)
808	stw	25,-4*7(9)
809	stw	26,-4*6(9)
810	stw	27,-4*5(9)
811	stw	28,-4*4(9)
812	stw	29,-4*3(9)
813	stw	30,-4*2(9)
814	stw	31,-4*1(9)
815
816	subi	4,4,4
817	subi	18,6,4
818	subi	3,3,4
819	lwz	7,0(7)
820	li	0,0
821
822	add	6,4,8
823	lwz	9,4*1(4)
824
825	lwz	10,4*2(4)
826	li	23,0
827	lwz	11,4*3(4)
828	li	24,0
829	lwz	12,4*4(4)
830	li	25,0
831	lwz	14,4*5(4)
832	li	26,0
833	lwz	15,4*6(4)
834	li	27,0
835	lwz	16,4*7(4)
836	li	28,0
837	lwzu	17,4*8(4)
838	li	29,0
839
840	addi	5,1,4*11
841	subic.	30,8,4*8
842	b	.Lsqr8x_zero_start
843
844.align	5
845.Lsqr8x_zero:
846	subic.	30,30,4*8
847	stw	0,4*1(5)
848	stw	0,4*2(5)
849	stw	0,4*3(5)
850	stw	0,4*4(5)
851	stw	0,4*5(5)
852	stw	0,4*6(5)
853	stw	0,4*7(5)
854	stw	0,4*8(5)
855.Lsqr8x_zero_start:
856	stw	0,4*9(5)
857	stw	0,4*10(5)
858	stw	0,4*11(5)
859	stw	0,4*12(5)
860	stw	0,4*13(5)
861	stw	0,4*14(5)
862	stw	0,4*15(5)
863	stwu	0,4*16(5)
864	bne	.Lsqr8x_zero
865
866	stw	3,4*6(1)
867	stw	18,4*7(1)
868	stw	7,4*8(1)
869	stw	5,4*9(1)
870	stw	0,4*10(1)
871	addi	5,1,4*11
872
873
874.align	5
875.Lsqr8x_outer_loop:
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905	mullw	18,10,9
906	mullw	19,11,9
907	mullw	20,12,9
908	mullw	21,14,9
909	addc	23,23,18
910	mullw	18,15,9
911	adde	24,24,19
912	mullw	19,16,9
913	adde	25,25,20
914	mullw	20,17,9
915	adde	26,26,21
916	mulhwu	21,10,9
917	adde	27,27,18
918	mulhwu	18,11,9
919	adde	28,28,19
920	mulhwu	19,12,9
921	adde	29,29,20
922	mulhwu	20,14,9
923	stw	22,4*1(5)
924	addze	22,0
925	stw	23,4*2(5)
926	addc	24,24,21
927	mulhwu	21,15,9
928	adde	25,25,18
929	mulhwu	18,16,9
930	adde	26,26,19
931	mulhwu	19,17,9
932	adde	27,27,20
933	mullw	20,11,10
934	adde	28,28,21
935	mullw	21,12,10
936	adde	29,29,18
937	mullw	18,14,10
938	adde	22,22,19
939
940	mullw	19,15,10
941	addc	25,25,20
942	mullw	20,16,10
943	adde	26,26,21
944	mullw	21,17,10
945	adde	27,27,18
946	mulhwu	18,11,10
947	adde	28,28,19
948	mulhwu	19,12,10
949	adde	29,29,20
950	mulhwu	20,14,10
951	adde	22,22,21
952	mulhwu	21,15,10
953	stw	24,4*3(5)
954	addze	23,0
955	stw	25,4*4(5)
956	addc	26,26,18
957	mulhwu	18,16,10
958	adde	27,27,19
959	mulhwu	19,17,10
960	adde	28,28,20
961	mullw	20,12,11
962	adde	29,29,21
963	mullw	21,14,11
964	adde	22,22,18
965	mullw	18,15,11
966	adde	23,23,19
967
968	mullw	19,16,11
969	addc	27,27,20
970	mullw	20,17,11
971	adde	28,28,21
972	mulhwu	21,12,11
973	adde	29,29,18
974	mulhwu	18,14,11
975	adde	22,22,19
976	mulhwu	19,15,11
977	adde	23,23,20
978	mulhwu	20,16,11
979	stw	26,4*5(5)
980	addze	24,0
981	stw	27,4*6(5)
982	addc	28,28,21
983	mulhwu	21,17,11
984	adde	29,29,18
985	mullw	18,14,12
986	adde	22,22,19
987	mullw	19,15,12
988	adde	23,23,20
989	mullw	20,16,12
990	adde	24,24,21
991
992	mullw	21,17,12
993	addc	29,29,18
994	mulhwu	18,14,12
995	adde	22,22,19
996	mulhwu	19,15,12
997	adde	23,23,20
998	mulhwu	20,16,12
999	adde	24,24,21
1000	mulhwu	21,17,12
1001	stw	28,4*7(5)
1002	addze	25,0
1003	stwu	29,4*8(5)
1004	addc	22,22,18
1005	mullw	18,15,14
1006	adde	23,23,19
1007	mullw	19,16,14
1008	adde	24,24,20
1009	mullw	20,17,14
1010	adde	25,25,21
1011
1012	mulhwu	21,15,14
1013	addc	23,23,18
1014	mulhwu	18,16,14
1015	adde	24,24,19
1016	mulhwu	19,17,14
1017	adde	25,25,20
1018	mullw	20,16,15
1019	addze	26,0
1020	addc	24,24,21
1021	mullw	21,17,15
1022	adde	25,25,18
1023	mulhwu	18,16,15
1024	adde	26,26,19
1025
1026	mulhwu	19,17,15
1027	addc	25,25,20
1028	mullw	20,17,16
1029	adde	26,26,21
1030	mulhwu	21,17,16
1031	addze	27,0
1032	addc	26,26,18
1033	.long	0x7c062040
1034	adde	27,27,19
1035
1036	addc	27,27,20
1037	sub	18,6,8
1038	addze	28,0
1039	add	28,28,21
1040
1041	beq	.Lsqr8x_outer_break
1042
1043	mr	7,9
1044	lwz	9,4*1(5)
1045	lwz	10,4*2(5)
1046	lwz	11,4*3(5)
1047	lwz	12,4*4(5)
1048	lwz	14,4*5(5)
1049	lwz	15,4*6(5)
1050	lwz	16,4*7(5)
1051	lwz	17,4*8(5)
1052	addc	22,22,9
1053	lwz	9,4*1(4)
1054	adde	23,23,10
1055	lwz	10,4*2(4)
1056	adde	24,24,11
1057	lwz	11,4*3(4)
1058	adde	25,25,12
1059	lwz	12,4*4(4)
1060	adde	26,26,14
1061	lwz	14,4*5(4)
1062	adde	27,27,15
1063	lwz	15,4*6(4)
1064	adde	28,28,16
1065	lwz	16,4*7(4)
1066	subi	3,4,4*7
1067	addze	29,17
1068	lwzu	17,4*8(4)
1069
1070	li	30,0
1071	b	.Lsqr8x_mul
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095.align	5
1096.Lsqr8x_mul:
1097	mullw	18,9,7
1098	addze	31,0
1099	mullw	19,10,7
1100	addi	30,30,4
1101	mullw	20,11,7
1102	andi.	30,30,4*8-1
1103	mullw	21,12,7
1104	addc	22,22,18
1105	mullw	18,14,7
1106	adde	23,23,19
1107	mullw	19,15,7
1108	adde	24,24,20
1109	mullw	20,16,7
1110	adde	25,25,21
1111	mullw	21,17,7
1112	adde	26,26,18
1113	mulhwu	18,9,7
1114	adde	27,27,19
1115	mulhwu	19,10,7
1116	adde	28,28,20
1117	mulhwu	20,11,7
1118	adde	29,29,21
1119	mulhwu	21,12,7
1120	addze	31,31
1121	stwu	22,4(5)
1122	addc	22,23,18
1123	mulhwu	18,14,7
1124	adde	23,24,19
1125	mulhwu	19,15,7
1126	adde	24,25,20
1127	mulhwu	20,16,7
1128	adde	25,26,21
1129	mulhwu	21,17,7
1130	lwzx	7,3,30
1131	adde	26,27,18
1132	adde	27,28,19
1133	adde	28,29,20
1134	adde	29,31,21
1135
1136	bne	.Lsqr8x_mul
1137
1138
1139	.long	0x7c043040
1140	beq	.Lsqr8x_break
1141
1142	lwz	9,4*1(5)
1143	lwz	10,4*2(5)
1144	lwz	11,4*3(5)
1145	lwz	12,4*4(5)
1146	lwz	14,4*5(5)
1147	lwz	15,4*6(5)
1148	lwz	16,4*7(5)
1149	lwz	17,4*8(5)
1150	addc	22,22,9
1151	lwz	9,4*1(4)
1152	adde	23,23,10
1153	lwz	10,4*2(4)
1154	adde	24,24,11
1155	lwz	11,4*3(4)
1156	adde	25,25,12
1157	lwz	12,4*4(4)
1158	adde	26,26,14
1159	lwz	14,4*5(4)
1160	adde	27,27,15
1161	lwz	15,4*6(4)
1162	adde	28,28,16
1163	lwz	16,4*7(4)
1164	adde	29,29,17
1165	lwzu	17,4*8(4)
1166
1167	b	.Lsqr8x_mul
1168
1169.align	5
1170.Lsqr8x_break:
1171	lwz	9,4*8(3)
1172	addi	4,3,4*15
1173	lwz	10,4*9(3)
1174	sub.	18,6,4
1175	lwz	11,4*10(3)
1176	sub	19,5,18
1177	lwz	12,4*11(3)
1178	lwz	14,4*12(3)
1179	lwz	15,4*13(3)
1180	lwz	16,4*14(3)
1181	lwz	17,4*15(3)
1182	beq	.Lsqr8x_outer_loop
1183
1184	stw	22,4*1(5)
1185	lwz	22,4*1(19)
1186	stw	23,4*2(5)
1187	lwz	23,4*2(19)
1188	stw	24,4*3(5)
1189	lwz	24,4*3(19)
1190	stw	25,4*4(5)
1191	lwz	25,4*4(19)
1192	stw	26,4*5(5)
1193	lwz	26,4*5(19)
1194	stw	27,4*6(5)
1195	lwz	27,4*6(19)
1196	stw	28,4*7(5)
1197	lwz	28,4*7(19)
1198	stw	29,4*8(5)
1199	lwz	29,4*8(19)
1200	mr	5,19
1201	b	.Lsqr8x_outer_loop
1202
1203.align	5
1204.Lsqr8x_outer_break:
1205
1206
1207	lwz	10,4*1(18)
1208	lwz	12,4*2(18)
1209	lwz	15,4*3(18)
1210	lwz	17,4*4(18)
1211	addi	4,18,4*4
1212
1213	lwz	19,4*13(1)
1214	lwz	20,4*14(1)
1215	lwz	21,4*15(1)
1216	lwz	18,4*16(1)
1217
1218	stw	22,4*1(5)
1219	srwi	30,8,4
1220	stw	23,4*2(5)
1221	subi	30,30,1
1222	stw	24,4*3(5)
1223	stw	25,4*4(5)
1224	stw	26,4*5(5)
1225	stw	27,4*6(5)
1226	stw	28,4*7(5)
1227
1228	addi	5,1,4*11
1229	mullw	22,10,10
1230	mulhwu	10,10,10
1231	add	23,19,19
1232	srwi	19,19,32-1
1233	mullw	11,12,12
1234	mulhwu	12,12,12
1235	addc	23,23,10
1236	add	24,20,20
1237	srwi	20,20,32-1
1238	add	25,21,21
1239	srwi	21,21,32-1
1240	or	24,24,19
1241
1242	mtctr	30
1243.Lsqr4x_shift_n_add:
1244	mullw	14,15,15
1245	mulhwu	15,15,15
1246	lwz	19,4*6(5)
1247	lwz	10,4*1(4)
1248	adde	24,24,11
1249	add	26,18,18
1250	srwi	18,18,32-1
1251	or	25,25,20
1252	lwz	20,4*7(5)
1253	adde	25,25,12
1254	lwz	12,4*2(4)
1255	add	27,19,19
1256	srwi	19,19,32-1
1257	or	26,26,21
1258	lwz	21,4*8(5)
1259	mullw	16,17,17
1260	mulhwu	17,17,17
1261	adde	26,26,14
1262	add	28,20,20
1263	srwi	20,20,32-1
1264	or	27,27,18
1265	lwz	18,4*9(5)
1266	adde	27,27,15
1267	lwz	15,4*3(4)
1268	add	29,21,21
1269	srwi	21,21,32-1
1270	or	28,28,19
1271	lwz	19,4*10(5)
1272	mullw	9,10,10
1273	mulhwu	10,10,10
1274	adde	28,28,16
1275	stw	22,4*1(5)
1276	add	22,18,18
1277	srwi	18,18,32-1
1278	or	29,29,20
1279	lwz	20,4*11(5)
1280	adde	29,29,17
1281	lwzu	17,4*4(4)
1282	stw	23,4*2(5)
1283	add	23,19,19
1284	srwi	19,19,32-1
1285	or	22,22,21
1286	lwz	21,4*12(5)
1287	mullw	11,12,12
1288	mulhwu	12,12,12
1289	adde	22,22,9
1290	stw	24,4*3(5)
1291	add	24,20,20
1292	srwi	20,20,32-1
1293	or	23,23,18
1294	lwz	18,4*13(5)
1295	adde	23,23,10
1296	stw	25,4*4(5)
1297	stw	26,4*5(5)
1298	stw	27,4*6(5)
1299	stw	28,4*7(5)
1300	stwu	29,4*8(5)
1301	add	25,21,21
1302	srwi	21,21,32-1
1303	or	24,24,19
1304	bdnz	.Lsqr4x_shift_n_add
1305	lwz	4,4*7(1)
1306	lwz	7,4*8(1)
1307
1308	mullw	14,15,15
1309	mulhwu	15,15,15
1310	stw	22,4*1(5)
1311	lwz	22,4*12(1)
1312	lwz	19,4*6(5)
1313	adde	24,24,11
1314	add	26,18,18
1315	srwi	18,18,32-1
1316	or	25,25,20
1317	lwz	20,4*7(5)
1318	adde	25,25,12
1319	add	27,19,19
1320	srwi	19,19,32-1
1321	or	26,26,21
1322	mullw	16,17,17
1323	mulhwu	17,17,17
1324	adde	26,26,14
1325	add	28,20,20
1326	srwi	20,20,32-1
1327	or	27,27,18
1328	stw	23,4*2(5)
1329	lwz	23,4*13(1)
1330	adde	27,27,15
1331	or	28,28,19
1332	lwz	9,4*1(4)
1333	lwz	10,4*2(4)
1334	adde	28,28,16
1335	lwz	11,4*3(4)
1336	lwz	12,4*4(4)
1337	adde	29,17,20
1338	lwz	14,4*5(4)
1339	lwz	15,4*6(4)
1340
1341
1342
1343	mullw	31,7,22
1344	li	30,8
1345	lwz	16,4*7(4)
1346	add	6,4,8
1347	lwzu	17,4*8(4)
1348	stw	24,4*3(5)
1349	lwz	24,4*14(1)
1350	stw	25,4*4(5)
1351	lwz	25,4*15(1)
1352	stw	26,4*5(5)
1353	lwz	26,4*16(1)
1354	stw	27,4*6(5)
1355	lwz	27,4*17(1)
1356	stw	28,4*7(5)
1357	lwz	28,4*18(1)
1358	stw	29,4*8(5)
1359	lwz	29,4*19(1)
1360	addi	5,1,4*11
1361	mtctr	30
1362	b	.Lsqr8x_reduction
1363
1364.align	5
1365.Lsqr8x_reduction:
1366
1367	mullw	19,10,31
1368	mullw	20,11,31
1369	stwu	31,4(5)
1370	mullw	21,12,31
1371
1372	addic	22,22,-1
1373	mullw	18,14,31
1374	adde	22,23,19
1375	mullw	19,15,31
1376	adde	23,24,20
1377	mullw	20,16,31
1378	adde	24,25,21
1379	mullw	21,17,31
1380	adde	25,26,18
1381	mulhwu	18,9,31
1382	adde	26,27,19
1383	mulhwu	19,10,31
1384	adde	27,28,20
1385	mulhwu	20,11,31
1386	adde	28,29,21
1387	mulhwu	21,12,31
1388	addze	29,0
1389	addc	22,22,18
1390	mulhwu	18,14,31
1391	adde	23,23,19
1392	mulhwu	19,15,31
1393	adde	24,24,20
1394	mulhwu	20,16,31
1395	adde	25,25,21
1396	mulhwu	21,17,31
1397	mullw	31,7,22
1398	adde	26,26,18
1399	adde	27,27,19
1400	adde	28,28,20
1401	adde	29,29,21
1402	bdnz	.Lsqr8x_reduction
1403
1404	lwz	18,4*1(5)
1405	lwz	19,4*2(5)
1406	lwz	20,4*3(5)
1407	lwz	21,4*4(5)
1408	subi	3,5,4*7
1409	.long	0x7c062040
1410	addc	22,22,18
1411	lwz	18,4*5(5)
1412	adde	23,23,19
1413	lwz	19,4*6(5)
1414	adde	24,24,20
1415	lwz	20,4*7(5)
1416	adde	25,25,21
1417	lwz	21,4*8(5)
1418	adde	26,26,18
1419	adde	27,27,19
1420	adde	28,28,20
1421	adde	29,29,21
1422
1423	beq	.Lsqr8x8_post_condition
1424
1425	lwz	7,4*0(3)
1426	lwz	9,4*1(4)
1427	lwz	10,4*2(4)
1428	lwz	11,4*3(4)
1429	lwz	12,4*4(4)
1430	lwz	14,4*5(4)
1431	lwz	15,4*6(4)
1432	lwz	16,4*7(4)
1433	lwzu	17,4*8(4)
1434	li	30,0
1435
1436.align	5
1437.Lsqr8x_tail:
1438	mullw	18,9,7
1439	addze	31,0
1440	mullw	19,10,7
1441	addi	30,30,4
1442	mullw	20,11,7
1443	andi.	30,30,4*8-1
1444	mullw	21,12,7
1445	addc	22,22,18
1446	mullw	18,14,7
1447	adde	23,23,19
1448	mullw	19,15,7
1449	adde	24,24,20
1450	mullw	20,16,7
1451	adde	25,25,21
1452	mullw	21,17,7
1453	adde	26,26,18
1454	mulhwu	18,9,7
1455	adde	27,27,19
1456	mulhwu	19,10,7
1457	adde	28,28,20
1458	mulhwu	20,11,7
1459	adde	29,29,21
1460	mulhwu	21,12,7
1461	addze	31,31
1462	stwu	22,4(5)
1463	addc	22,23,18
1464	mulhwu	18,14,7
1465	adde	23,24,19
1466	mulhwu	19,15,7
1467	adde	24,25,20
1468	mulhwu	20,16,7
1469	adde	25,26,21
1470	mulhwu	21,17,7
1471	lwzx	7,3,30
1472	adde	26,27,18
1473	adde	27,28,19
1474	adde	28,29,20
1475	adde	29,31,21
1476
1477	bne	.Lsqr8x_tail
1478
1479
1480	lwz	9,4*1(5)
1481	lwz	31,4*10(1)
1482	.long	0x7c062040
1483	lwz	10,4*2(5)
1484	sub	20,6,8
1485	lwz	11,4*3(5)
1486	lwz	12,4*4(5)
1487	lwz	14,4*5(5)
1488	lwz	15,4*6(5)
1489	lwz	16,4*7(5)
1490	lwz	17,4*8(5)
1491	beq	.Lsqr8x_tail_break
1492
1493	addc	22,22,9
1494	lwz	9,4*1(4)
1495	adde	23,23,10
1496	lwz	10,4*2(4)
1497	adde	24,24,11
1498	lwz	11,4*3(4)
1499	adde	25,25,12
1500	lwz	12,4*4(4)
1501	adde	26,26,14
1502	lwz	14,4*5(4)
1503	adde	27,27,15
1504	lwz	15,4*6(4)
1505	adde	28,28,16
1506	lwz	16,4*7(4)
1507	adde	29,29,17
1508	lwzu	17,4*8(4)
1509
1510	b	.Lsqr8x_tail
1511
1512.align	5
1513.Lsqr8x_tail_break:
1514	lwz	7,4*8(1)
1515	lwz	21,4*9(1)
1516	addi	30,5,4*8
1517
1518	addic	31,31,-1
1519	adde	18,22,9
1520	lwz	22,4*8(3)
1521	lwz	9,4*1(20)
1522	adde	19,23,10
1523	lwz	23,4*9(3)
1524	lwz	10,4*2(20)
1525	adde	24,24,11
1526	lwz	11,4*3(20)
1527	adde	25,25,12
1528	lwz	12,4*4(20)
1529	adde	26,26,14
1530	lwz	14,4*5(20)
1531	adde	27,27,15
1532	lwz	15,4*6(20)
1533	adde	28,28,16
1534	lwz	16,4*7(20)
1535	adde	29,29,17
1536	lwz	17,4*8(20)
1537	addi	4,20,4*8
1538	addze	20,0
1539	mullw	31,7,22
1540	stw	18,4*1(5)
1541	.long	0x7c1ea840
1542	stw	19,4*2(5)
1543	li	30,8
1544	stw	24,4*3(5)
1545	lwz	24,4*10(3)
1546	stw	25,4*4(5)
1547	lwz	25,4*11(3)
1548	stw	26,4*5(5)
1549	lwz	26,4*12(3)
1550	stw	27,4*6(5)
1551	lwz	27,4*13(3)
1552	stw	28,4*7(5)
1553	lwz	28,4*14(3)
1554	stw	29,4*8(5)
1555	lwz	29,4*15(3)
1556	stw	20,4*10(1)
1557	addi	5,3,4*7
1558	mtctr	30
1559	bne	.Lsqr8x_reduction
1560
1561
1562
1563
1564
1565
1566	lwz	3,4*6(1)
1567	srwi	30,8,5
1568	mr	7,5
1569	addi	5,5,4*8
1570	subi	30,30,1
1571	subfc	18,9,22
1572	subfe	19,10,23
1573	mr	31,20
1574	mr	6,3
1575
1576	mtctr	30
1577	b	.Lsqr8x_sub
1578
1579.align	5
1580.Lsqr8x_sub:
1581	lwz	9,4*1(4)
1582	lwz	22,4*1(5)
1583	lwz	10,4*2(4)
1584	lwz	23,4*2(5)
1585	subfe	20,11,24
1586	lwz	11,4*3(4)
1587	lwz	24,4*3(5)
1588	subfe	21,12,25
1589	lwz	12,4*4(4)
1590	lwz	25,4*4(5)
1591	stw	18,4*1(3)
1592	subfe	18,14,26
1593	lwz	14,4*5(4)
1594	lwz	26,4*5(5)
1595	stw	19,4*2(3)
1596	subfe	19,15,27
1597	lwz	15,4*6(4)
1598	lwz	27,4*6(5)
1599	stw	20,4*3(3)
1600	subfe	20,16,28
1601	lwz	16,4*7(4)
1602	lwz	28,4*7(5)
1603	stw	21,4*4(3)
1604	subfe	21,17,29
1605	lwzu	17,4*8(4)
1606	lwzu	29,4*8(5)
1607	stw	18,4*5(3)
1608	subfe	18,9,22
1609	stw	19,4*6(3)
1610	subfe	19,10,23
1611	stw	20,4*7(3)
1612	stwu	21,4*8(3)
1613	bdnz	.Lsqr8x_sub
1614
1615	srwi	30,8,4
1616	lwz	9,4*1(6)
1617	lwz	22,4*1(7)
1618	subi	30,30,1
1619	lwz	10,4*2(6)
1620	lwz	23,4*2(7)
1621	subfe	20,11,24
1622	lwz	11,4*3(6)
1623	lwz	24,4*3(7)
1624	subfe	21,12,25
1625	lwz	12,4*4(6)
1626	lwzu	25,4*4(7)
1627	stw	18,4*1(3)
1628	subfe	18,14,26
1629	stw	19,4*2(3)
1630	subfe	19,15,27
1631	stw	20,4*3(3)
1632	subfe	20,16,28
1633	stw	21,4*4(3)
1634	subfe	21,17,29
1635	stw	18,4*5(3)
1636	subfe	31,0,31
1637	stw	19,4*6(3)
1638	stw	20,4*7(3)
1639	stw	21,4*8(3)
1640
1641	addi	5,1,4*11
1642	mtctr	30
1643
1644.Lsqr4x_cond_copy:
1645	andc	9,9,31
1646	stw	0,-4*3(7)
1647	and	22,22,31
1648	stw	0,-4*2(7)
1649	andc	10,10,31
1650	stw	0,-4*1(7)
1651	and	23,23,31
1652	stw	0,-4*0(7)
1653	andc	11,11,31
1654	stw	0,4*1(5)
1655	and	24,24,31
1656	stw	0,4*2(5)
1657	andc	12,12,31
1658	stw	0,4*3(5)
1659	and	25,25,31
1660	stwu	0,4*4(5)
1661	or	18,9,22
1662	lwz	9,4*5(6)
1663	lwz	22,4*1(7)
1664	or	19,10,23
1665	lwz	10,4*6(6)
1666	lwz	23,4*2(7)
1667	or	20,11,24
1668	lwz	11,4*7(6)
1669	lwz	24,4*3(7)
1670	or	21,12,25
1671	lwz	12,4*8(6)
1672	lwzu	25,4*4(7)
1673	stw	18,4*1(6)
1674	stw	19,4*2(6)
1675	stw	20,4*3(6)
1676	stwu	21,4*4(6)
1677	bdnz	.Lsqr4x_cond_copy
1678
1679	lwz	4,0(1)
1680	andc	9,9,31
1681	and	22,22,31
1682	andc	10,10,31
1683	and	23,23,31
1684	andc	11,11,31
1685	and	24,24,31
1686	andc	12,12,31
1687	and	25,25,31
1688	or	18,9,22
1689	or	19,10,23
1690	or	20,11,24
1691	or	21,12,25
1692	stw	18,4*1(6)
1693	stw	19,4*2(6)
1694	stw	20,4*3(6)
1695	stw	21,4*4(6)
1696
1697	b	.Lsqr8x_done
1698
1699.align	5
1700.Lsqr8x8_post_condition:
1701	lwz	3,4*6(1)
1702	lwz	4,0(1)
1703	addze	31,0
1704
1705
1706	subfc	22,9,22
1707	subfe	23,10,23
1708	stw	0,4*12(1)
1709	stw	0,4*13(1)
1710	subfe	24,11,24
1711	stw	0,4*14(1)
1712	stw	0,4*15(1)
1713	subfe	25,12,25
1714	stw	0,4*16(1)
1715	stw	0,4*17(1)
1716	subfe	26,14,26
1717	stw	0,4*18(1)
1718	stw	0,4*19(1)
1719	subfe	27,15,27
1720	stw	0,4*20(1)
1721	stw	0,4*21(1)
1722	subfe	28,16,28
1723	stw	0,4*22(1)
1724	stw	0,4*23(1)
1725	subfe	29,17,29
1726	stw	0,4*24(1)
1727	stw	0,4*25(1)
1728	subfe	31,0,31
1729	stw	0,4*26(1)
1730	stw	0,4*27(1)
1731
1732	and	9,9,31
1733	and	10,10,31
1734	addc	22,22,9
1735	and	11,11,31
1736	adde	23,23,10
1737	and	12,12,31
1738	adde	24,24,11
1739	and	14,14,31
1740	adde	25,25,12
1741	and	15,15,31
1742	adde	26,26,14
1743	and	16,16,31
1744	adde	27,27,15
1745	and	17,17,31
1746	adde	28,28,16
1747	adde	29,29,17
1748	stw	22,4*1(3)
1749	stw	23,4*2(3)
1750	stw	24,4*3(3)
1751	stw	25,4*4(3)
1752	stw	26,4*5(3)
1753	stw	27,4*6(3)
1754	stw	28,4*7(3)
1755	stw	29,4*8(3)
1756
1757.Lsqr8x_done:
1758	stw	0,4*8(1)
1759	stw	0,4*10(1)
1760
1761	lwz	14,-4*18(4)
1762	li	3,1
1763	lwz	15,-4*17(4)
1764	lwz	16,-4*16(4)
1765	lwz	17,-4*15(4)
1766	lwz	18,-4*14(4)
1767	lwz	19,-4*13(4)
1768	lwz	20,-4*12(4)
1769	lwz	21,-4*11(4)
1770	lwz	22,-4*10(4)
1771	lwz	23,-4*9(4)
1772	lwz	24,-4*8(4)
1773	lwz	25,-4*7(4)
1774	lwz	26,-4*6(4)
1775	lwz	27,-4*5(4)
1776	lwz	28,-4*4(4)
1777	lwz	29,-4*3(4)
1778	lwz	30,-4*2(4)
1779	lwz	31,-4*1(4)
1780	mr	1,4
1781	blr
1782.long	0
1783.byte	0,12,4,0x20,0x80,18,6,0
1784.long	0
1785.size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
1786.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1787.align	2
1788