1/*********************************************************************/
2/*                                                                   */
3/*             Optimized BLAS libraries                              */
4/*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5/*                                                                   */
6/* Copyright (c) The University of Texas, 2009. All rights reserved. */
7/* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8/* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9/* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10/* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11/* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12/* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13/* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14/* Under no circumstances shall University be liable for incidental, */
15/* special, indirect, direct or consequential damages or loss of     */
16/* profits, interruption of business, or related expenses which may  */
17/* arise from use of Software or Documentation, including but not    */
18/* limited to those resulting from defects in Software and/or        */
19/* Documentation, or loss or inaccuracy of data of any kind.         */
20/*********************************************************************/
21
22#define ASSEMBLER
23#include "common.h"
24
25#define	M	r3
26#define	N	r4
27#define	A	r5
28#define	LDA	r6
29#define B	r7
30
31#define AO1	r8
32#define AO2	r9
33#define AO3	r10
34#define AO4	r11
35
36#define J	r12
37
38#define INC	r30
39#define INC2	r31
40
41#define c01	f0
42#define c02	f1
43#define c03	f2
44#define c04	f3
45#define c05	f4
46#define c06	f5
47#define c07	f6
48#define c08	f7
49#define c09	f8
50#define c10	f9
51#define c11	f10
52#define c12	f11
53#define c13	f12
54#define c14	f13
55#define c15	f14
56#define c16	f15
57
58#define	sel_p	f16
59#define	sel_s	f17
60
61#define c17	f18
62#define c18	f19
63
64
65	PROLOGUE
66	PROFCODE
67
68	li	r0, -16
69
70	stfpdux	f14, SP, r0
71	stfpdux	f15, SP, r0
72
73	stfpdux	f16, SP, r0
74	stfpdux	f17, SP, r0
75	stfpdux	f18, SP, r0
76	stfpdux	f19, SP, r0
77
78	stwu	r31,  -4(SP)
79	stwu	r30,  -4(SP)
80
81	lis	r9,   0x3f80
82	lis	r10,  0xbf80
83
84	stwu	r9,    -4(SP)
85	stwu	r10,   -4(SP)
86	stwu	r10,   -4(SP)
87	stwu	r9,    -4(SP)
88
89	slwi	LDA, LDA, BASE_SHIFT
90
91	cmpwi	cr0, M, 0
92	ble-	.L99
93	cmpwi	cr0, N, 0
94	ble-	.L99
95
96	andi.	r0, A,   2 * SIZE - 1
97	bne	.L100
98	andi.	r0, LDA, 2 * SIZE - 1
99	bne	.L100
100
101	li	r0, 8
102	addi	SP, SP, -8
103
104	lfpsux	sel_p, SP, r0
105	lfpsux	sel_s, SP, r0
106
107	li	INC,  1 * SIZE
108	li	INC2, 2 * SIZE
109
110	subi	A, A, 2 * SIZE
111	subi	B, B, 2 * SIZE
112
113	srawi.	J,  N,  2
114	ble	.L20
115	.align 4
116.L11:
117	mr	AO1, A
118	add	AO2, A,   LDA
119	add	AO3, AO2, LDA
120	add	AO4, AO3, LDA
121	add	A,   AO4, LDA
122
123	srawi.	r0,  M,  3
124	mtspr	CTR, r0
125	ble	.L15
126	.align 4
127
128.L12:
129	LFPDUX	c01,   AO1, INC2
130	LFXDUX	c05,   AO2, INC2
131	LFPDUX	c09,   AO3, INC2
132	LFXDUX	c13,   AO4, INC2
133
134	LFPDUX	c02,   AO1, INC2
135	LFXDUX	c06,   AO2, INC2
136	LFPDUX	c10,   AO3, INC2
137	LFXDUX	c14,   AO4, INC2
138
139	LFPDUX	c03,   AO1, INC2
140	LFXDUX	c07,   AO2, INC2
141	LFPDUX	c11,   AO3, INC2
142	LFXDUX	c15,   AO4, INC2
143
144	LFPDUX	c04,   AO1, INC2
145	LFXDUX	c08,   AO2, INC2
146	LFPDUX	c12,   AO3, INC2
147	LFXDUX	c16,   AO4, INC2
148
149	fpsel	c17, sel_p, c01, c05
150	fpsel	c18, sel_p, c09, c13
151	fpsel	c01, sel_s, c01, c05
152	fpsel	c05, sel_s, c09, c13
153
154	fpsel	c09, sel_p, c02, c06
155	fpsel	c13, sel_p, c10, c14
156	STFPDUX	c17,   B, INC2
157	fpsel	c02, sel_s, c02, c06
158 	STFPDUX	c18,   B, INC2
159	fpsel	c06, sel_s, c10, c14
160	STFXDUX	c01,   B, INC2
161
162	fpsel	c10, sel_p, c03, c07
163	STFXDUX	c05,   B, INC2
164	fpsel	c14, sel_p, c11, c15
165	STFPDUX	c09,   B, INC2
166	fpsel	c03, sel_s, c03, c07
167	STFPDUX	c13,   B, INC2
168	fpsel	c07, sel_s, c11, c15
169	STFXDUX	c02,   B, INC2
170
171	fpsel	c11, sel_p, c04, c08
172	STFXDUX	c06,   B, INC2
173	fpsel	c15, sel_p, c12, c16
174	STFPDUX	c10,   B, INC2
175	fpsel	c04, sel_s, c04, c08
176	STFPDUX	c14,   B, INC2
177	fpsel	c08, sel_s, c12, c16
178	STFXDUX	c03,   B, INC2
179
180	STFXDUX	c07,   B, INC2
181	STFPDUX	c11,   B, INC2
182	STFPDUX	c15,   B, INC2
183	STFXDUX	c04,   B, INC2
184	STFXDUX	c08,   B, INC2
185	bdnz	.L12
186	.align 4
187
188.L15:
189	andi.	r0,  M,  7
190	ble	.L19
191
192	andi.	r0,  M,  4
193	beq	.L16
194
195	LFPDUX	c01,   AO1, INC2
196	LFXDUX	c05,   AO2, INC2
197	LFPDUX	c09,   AO3, INC2
198	LFXDUX	c13,   AO4, INC2
199
200	LFPDUX	c02,   AO1, INC2
201	LFXDUX	c06,   AO2, INC2
202	LFPDUX	c10,   AO3, INC2
203	LFXDUX	c14,   AO4, INC2
204
205	fpsel	c17, sel_p, c01, c05
206	fpsel	c18, sel_p, c09, c13
207	fpsel	c01, sel_s, c01, c05
208	fpsel	c05, sel_s, c09, c13
209
210	fpsel	c09, sel_p, c02, c06
211	fpsel	c13, sel_p, c10, c14
212	STFPDUX	c17,   B, INC2
213	fpsel	c02, sel_s, c02, c06
214	STFPDUX	c18,   B, INC2
215	fpsel	c06, sel_s, c10, c14
216	STFXDUX	c01,   B, INC2
217	STFXDUX	c05,   B, INC2
218	STFPDUX	c09,   B, INC2
219	STFPDUX	c13,   B, INC2
220	STFXDUX	c02,   B, INC2
221	STFXDUX	c06,   B, INC2
222	.align 4
223
224.L16:
225	andi.	r0,  M,  2
226	beq	.L17
227
228	LFPDUX	c01,   AO1, INC2
229	LFXDUX	c05,   AO2, INC2
230	LFPDUX	c09,   AO3, INC2
231	LFXDUX	c13,   AO4, INC2
232
233	fpsel	c17, sel_p, c01, c05
234	fpsel	c18, sel_p, c09, c13
235	fpsel	c01, sel_s, c01, c05
236	fpsel	c05, sel_s, c09, c13
237
238	STFPDUX	c17,   B, INC2
239	STFPDUX	c18,   B, INC2
240	STFXDUX	c01,   B, INC2
241	STFXDUX	c05,   B, INC2
242	.align 4
243
244.L17:
245	andi.	r0,  M,  1
246	beq	.L19
247
248	LFDUX	c01,   AO1, INC2
249	LFDUX	c02,   AO2, INC2
250	LFDUX	c03,   AO3, INC2
251	LFDUX	c04,   AO4, INC2
252
253	fsmfp	c01, c02
254	fsmfp	c03, c04
255
256	STFPDUX	c01,   B, INC2
257	STFPDUX	c03,   B, INC2
258	.align 4
259
260.L19:
261	addic.	J, J, -1
262	bgt	.L11
263	.align 4
264
265.L20:
266	andi.	J,  N,  2
267	ble	.L30
268
269	mr	AO1, A
270	add	AO2, A,   LDA
271	add	A,   AO2, LDA
272
273	srawi.	r0,  M,  3
274	mtspr	CTR, r0
275	ble	.L25
276	.align 4
277
278.L22:
279	LFPDUX	c01,   AO1, INC2
280	LFXDUX	c05,   AO2, INC2
281	LFPDUX	c02,   AO1, INC2
282	LFXDUX	c06,   AO2, INC2
283
284	LFPDUX	c03,   AO1, INC2
285	LFXDUX	c07,   AO2, INC2
286	LFPDUX	c04,   AO1, INC2
287	LFXDUX	c08,   AO2, INC2
288
289	fpsel	c17, sel_p, c01, c05
290	fpsel	c01, sel_s, c01, c05
291	fpsel	c09, sel_p, c02, c06
292	fpsel	c02, sel_s, c02, c06
293
294	fpsel	c10, sel_p, c03, c07
295	fpsel	c03, sel_s, c03, c07
296	STFPDUX	c17,   B, INC2
297	fpsel	c11, sel_p, c04, c08
298	STFXDUX	c01,   B, INC2
299	fpsel	c04, sel_s, c04, c08
300	STFPDUX	c09,   B, INC2
301
302	STFXDUX	c02,   B, INC2
303	STFPDUX	c10,   B, INC2
304	STFXDUX	c03,   B, INC2
305	STFPDUX	c11,   B, INC2
306	STFXDUX	c04,   B, INC2
307	bdnz	.L22
308	.align 4
309
310.L25:
311	andi.	r0,  M,  7
312	ble	.L30
313
314	andi.	r0,  M,  4
315	beq	.L26
316
317	LFPDUX	c01,   AO1, INC2
318	LFXDUX	c05,   AO2, INC2
319	LFPDUX	c02,   AO1, INC2
320	LFXDUX	c06,   AO2, INC2
321
322	fpsel	c17, sel_p, c01, c05
323	fpsel	c01, sel_s, c01, c05
324	fpsel	c09, sel_p, c02, c06
325	fpsel	c02, sel_s, c02, c06
326
327	STFPDUX	c17,   B, INC2
328	STFXDUX	c01,   B, INC2
329	STFPDUX	c09,   B, INC2
330	STFXDUX	c02,   B, INC2
331	.align 4
332
333.L26:
334	andi.	r0,  M,  2
335	beq	.L27
336
337	LFPDUX	c01,   AO1, INC2
338	LFXDUX	c05,   AO2, INC2
339
340	fpsel	c17, sel_p, c01, c05
341	fpsel	c01, sel_s, c01, c05
342
343	STFPDUX	c17,   B, INC2
344	STFXDUX	c01,   B, INC2
345	.align 4
346
347.L27:
348	andi.	r0,  M,  1
349	beq	.L30
350
351	LFDUX	c01,   AO1, INC2
352	LFDUX	c02,   AO2, INC2
353
354	fsmfp	c01, c02
355	STFPDUX	c01,   B, INC2
356	.align 4
357
358.L30:
359	andi.	J,  N,  1
360	ble	.L99
361
362	mr	AO1, A
363
364	srawi.	r0,  M,  3
365	mtspr	CTR, r0
366	ble	.L35
367	.align 4
368
369.L32:
370	LFPDUX	c01,   AO1, INC2
371	LFPDUX	c02,   AO1, INC2
372	LFPDUX	c03,   AO1, INC2
373	LFPDUX	c04,   AO1, INC2
374
375	STFPDUX	c01,   B, INC2
376	STFPDUX	c02,   B, INC2
377	STFPDUX	c03,   B, INC2
378	STFPDUX	c04,   B, INC2
379	bdnz	.L32
380	.align 4
381
382.L35:
383	andi.	r0,  M,  7
384	ble	.L99
385
386	andi.	r0,  M,  4
387	beq	.L36
388
389	LFPDUX	c01,   AO1, INC2
390	LFPDUX	c02,   AO1, INC2
391
392	STFPDUX	c01,   B, INC2
393	STFPDUX	c02,   B, INC2
394	.align 4
395
396.L36:
397	andi.	r0,  M,  2
398	beq	.L37
399
400	LFPDUX	c01,   AO1, INC2
401
402	STFPDUX	c01,   B, INC2
403	.align 4
404
405.L37:
406	andi.	r0,  M,  1
407	beq	.L99
408
409	LFDX	c01,   AO1, INC2
410	STFDX	c01,   B,  INC2
411	.align 4
412
413.L99:
414	addi	SP, SP, 4
415
416	lwzu	r30,   4(SP)
417	lwzu	r31,   4(SP)
418
419	subi	SP, SP, 12
420	li	r0, 16
421
422	lfpdux	f19, SP, r0
423	lfpdux	f18, SP, r0
424	lfpdux	f17, SP, r0
425	lfpdux	f16, SP, r0
426
427	lfpdux	f15, SP, r0
428	lfpdux	f14, SP, r0
429	addi	SP, SP, 16
430	blr
431	.align 4
432
433.L100:
434	li	INC,  1 * SIZE
435	li	INC2, 2 * SIZE
436
437	subi	A, A, 1 * SIZE
438	subi	B, B, 2 * SIZE
439
440	srawi.	J,  N,  2
441	ble	.L120
442	.align 4
443.L111:
444	mr	AO1, A
445	add	AO2, A,   LDA
446	add	AO3, AO2, LDA
447	add	AO4, AO3, LDA
448	add	A,   AO4, LDA
449
450	srawi.	r0,  M,  3
451	mtspr	CTR, r0
452	ble	.L115
453	.align 4
454
455.L112:
456	LFDUX	c01,   AO1, INC
457	LFDUX	c02,   AO1, INC
458	LFDUX	c03,   AO1, INC
459	LFDUX	c04,   AO1, INC
460
461	LFDUX	c09,   AO1, INC
462	LFDUX	c10,   AO1, INC
463	LFDUX	c11,   AO1, INC
464	LFDUX	c12,   AO1, INC
465
466	LFSDUX	c01,   AO2, INC
467	LFSDUX	c02,   AO2, INC
468	LFSDUX	c03,   AO2, INC
469	LFSDUX	c04,   AO2, INC
470
471	LFSDUX	c09,   AO2, INC
472	LFSDUX	c10,   AO2, INC
473	LFSDUX	c11,   AO2, INC
474	LFSDUX	c12,   AO2, INC
475
476	LFDUX	c05,   AO3, INC
477	LFDUX	c06,   AO3, INC
478	LFDUX	c07,   AO3, INC
479	LFDUX	c08,   AO3, INC
480
481	LFDUX	c13,   AO3, INC
482	LFDUX	c14,   AO3, INC
483	LFDUX	c15,   AO3, INC
484	LFDUX	c16,   AO3, INC
485
486	LFSDUX	c05,   AO4, INC
487	LFSDUX	c06,   AO4, INC
488	LFSDUX	c07,   AO4, INC
489	LFSDUX	c08,   AO4, INC
490
491	LFSDUX	c13,   AO4, INC
492	LFSDUX	c14,   AO4, INC
493	LFSDUX	c15,   AO4, INC
494	LFSDUX	c16,   AO4, INC
495
496	STFPDUX	c01,   B, INC2
497	STFPDUX	c05,   B, INC2
498	STFPDUX	c02,   B, INC2
499	STFPDUX	c06,   B, INC2
500	STFPDUX	c03,   B, INC2
501	STFPDUX	c07,   B, INC2
502	STFPDUX	c04,   B, INC2
503	STFPDUX	c08,   B, INC2
504
505	STFPDUX	c09,   B, INC2
506	STFPDUX	c13,   B, INC2
507	STFPDUX	c10,   B, INC2
508	STFPDUX	c14,   B, INC2
509	STFPDUX	c11,   B, INC2
510	STFPDUX	c15,   B, INC2
511	STFPDUX	c12,   B, INC2
512	STFPDUX	c16,   B, INC2
513	bdnz	.L112
514	.align 4
515
516.L115:
517	andi.	r0,  M,  7
518	ble	.L119
519
520	andi.	r0,  M,  4
521	beq	.L116
522
523	LFDUX	c01,   AO1, INC
524	LFDUX	c02,   AO1, INC
525	LFDUX	c03,   AO1, INC
526	LFDUX	c04,   AO1, INC
527
528	LFSDUX	c01,   AO2, INC
529	LFSDUX	c02,   AO2, INC
530	LFSDUX	c03,   AO2, INC
531	LFSDUX	c04,   AO2, INC
532
533	LFDUX	c05,   AO3, INC
534	LFDUX	c06,   AO3, INC
535	LFDUX	c07,   AO3, INC
536	LFDUX	c08,   AO3, INC
537
538	LFSDUX	c05,   AO4, INC
539	LFSDUX	c06,   AO4, INC
540	LFSDUX	c07,   AO4, INC
541	LFSDUX	c08,   AO4, INC
542
543	STFPDUX	c01,   B, INC2
544	STFPDUX	c05,   B, INC2
545	STFPDUX	c02,   B, INC2
546	STFPDUX	c06,   B, INC2
547	STFPDUX	c03,   B, INC2
548	STFPDUX	c07,   B, INC2
549	STFPDUX	c04,   B, INC2
550	STFPDUX	c08,   B, INC2
551	.align 4
552
553.L116:
554	andi.	r0,  M,  2
555	beq	.L117
556
557	LFDUX	c01,   AO1, INC
558	LFDUX	c02,   AO1, INC
559
560	LFSDUX	c01,   AO2, INC
561	LFSDUX	c02,   AO2, INC
562
563	LFDUX	c05,   AO3, INC
564	LFDUX	c06,   AO3, INC
565
566	LFSDUX	c05,   AO4, INC
567	LFSDUX	c06,   AO4, INC
568
569	STFPDUX	c01,   B, INC2
570	STFPDUX	c05,   B, INC2
571	STFPDUX	c02,   B, INC2
572	STFPDUX	c06,   B, INC2
573	.align 4
574
575.L117:
576	andi.	r0,  M,  1
577	beq	.L119
578
579	LFDUX	c01,   AO1, INC
580	LFDUX	c05,   AO3, INC
581
582	nop
583	nop
584
585	LFSDUX	c01,   AO2, INC
586	LFSDUX	c05,   AO4, INC
587
588	STFPDUX	c01,   B, INC2
589	STFPDUX	c05,   B, INC2
590	.align 4
591
592.L119:
593	addic.	J, J, -1
594	bgt	.L111
595	.align 4
596
597.L120:
598	andi.	J,  N,  2
599	ble	.L130
600
601	mr	AO1, A
602	add	AO2, A,   LDA
603	add	A,   AO2, LDA
604
605	srawi.	r0,  M,  3
606	mtspr	CTR, r0
607	ble	.L125
608	.align 4
609
610.L122:
611	LFDUX	c01,   AO1, INC
612	LFDUX	c02,   AO1, INC
613	LFDUX	c03,   AO1, INC
614	LFDUX	c04,   AO1, INC
615
616	LFDUX	c09,   AO1, INC
617	LFDUX	c10,   AO1, INC
618	LFDUX	c11,   AO1, INC
619	LFDUX	c12,   AO1, INC
620
621	LFSDUX	c01,   AO2, INC
622	LFSDUX	c02,   AO2, INC
623	LFSDUX	c03,   AO2, INC
624	LFSDUX	c04,   AO2, INC
625
626	LFSDUX	c09,   AO2, INC
627	LFSDUX	c10,   AO2, INC
628	LFSDUX	c11,   AO2, INC
629	LFSDUX	c12,   AO2, INC
630
631	STFPDUX	c01,   B, INC2
632	STFPDUX	c02,   B, INC2
633	STFPDUX	c03,   B, INC2
634	STFPDUX	c04,   B, INC2
635
636	STFPDUX	c09,   B, INC2
637	STFPDUX	c10,   B, INC2
638	STFPDUX	c11,   B, INC2
639	STFPDUX	c12,   B, INC2
640	bdnz	.L122
641	.align 4
642
643.L125:
644	andi.	r0,  M,  7
645	ble	.L130
646
647	andi.	r0,  M,  4
648	beq	.L126
649
650	LFDUX	c01,   AO1, INC
651	LFDUX	c02,   AO1, INC
652	LFDUX	c03,   AO1, INC
653	LFDUX	c04,   AO1, INC
654
655	LFSDUX	c01,   AO2, INC
656	LFSDUX	c02,   AO2, INC
657	LFSDUX	c03,   AO2, INC
658	LFSDUX	c04,   AO2, INC
659
660	STFPDUX	c01,   B, INC2
661	STFPDUX	c02,   B, INC2
662	STFPDUX	c03,   B, INC2
663	STFPDUX	c04,   B, INC2
664	.align 4
665
666.L126:
667	andi.	r0,  M,  2
668	beq	.L127
669
670	LFDUX	c01,   AO1, INC
671	LFDUX	c02,   AO1, INC
672
673	LFSDUX	c01,   AO2, INC
674	LFSDUX	c02,   AO2, INC
675
676	STFPDUX	c01,   B, INC2
677	STFPDUX	c02,   B, INC2
678	.align 4
679
680.L127:
681	andi.	r0,  M,  1
682	beq	.L130
683
684	LFDUX	c01,   AO1, INC
685	LFDUX	c02,   AO2, INC
686
687	fsmfp	c01, c02
688	STFPDUX	c01,   B, INC2
689	.align 4
690
691.L130:
692	andi.	J,  N,  1
693	ble	.L999
694
695	mr	AO1, A
696
697	srawi.	r0,  M,  3
698	mtspr	CTR, r0
699	ble	.L135
700	.align 4
701
702.L132:
703	LFDUX	c01,   AO1, INC
704	LFDUX	c02,   AO1, INC
705	LFDUX	c03,   AO1, INC
706	LFDUX	c04,   AO1, INC
707
708	LFDUX	c05,   AO1, INC
709	LFDUX	c06,   AO1, INC
710	LFDUX	c07,   AO1, INC
711	LFDUX	c08,   AO1, INC
712
713	fsmfp	c01, c02
714	fsmfp	c03, c04
715	fsmfp	c05, c06
716	fsmfp	c07, c08
717
718	STFPDUX	c01,   B, INC2
719	STFPDUX	c03,   B, INC2
720	STFPDUX	c05,   B, INC2
721	STFPDUX	c07,   B, INC2
722	bdnz	.L132
723	.align 4
724
725.L135:
726	andi.	r0,  M,  7
727	ble	.L999
728
729	andi.	r0,  M,  4
730	beq	.L136
731
732	LFDUX	c01,   AO1, INC
733	LFDUX	c02,   AO1, INC
734	LFDUX	c03,   AO1, INC
735	LFDUX	c04,   AO1, INC
736
737	fsmfp	c01, c02
738	fsmfp	c03, c04
739
740	STFPDUX	c01,   B, INC2
741	STFPDUX	c03,   B, INC2
742	.align 4
743
744.L136:
745	andi.	r0,  M,  2
746	beq	.L137
747
748	LFDUX	c01,   AO1, INC
749	LFDUX	c02,   AO1, INC
750
751	fsmfp	c01, c02
752	STFPDUX	c01,   B, INC2
753	.align 4
754
755.L137:
756	andi.	r0,  M,  1
757	beq	.L999
758
759	LFDX	c01,   AO1, INC
760	STFDX	c01,   B,  INC2
761	.align 4
762
763.L999:
764	addi	SP, SP, 12
765
766	lwzu	r30,   4(SP)
767	lwzu	r31,   4(SP)
768
769	subi	SP, SP, 12
770	li	r0, 16
771
772	lfpdux	f19, SP, r0
773	lfpdux	f18, SP, r0
774	lfpdux	f17, SP, r0
775	lfpdux	f16, SP, r0
776
777	lfpdux	f15, SP, r0
778	lfpdux	f14, SP, r0
779	addi	SP, SP, 16
780	blr
781	EPILOGUE
782