1// ****************************************************************************
2// *
3// *  XVID MPEG-4 VIDEO CODEC
4// *  - IA64 inverse discrete cosine transform -
5// *
6// *  Copyright(C) 2002 Christian Schwarz, Haiko Gaisser, Sebastian Hack
7// *
8// *  This program is free software; you can redistribute it and/or modify it
9// *  under the terms of the GNU General Public License as published by
10// *  the Free Software Foundation; either version 2 of the License, or
11// *  (at your option) any later version.
12// *
13// *  This program is distributed in the hope that it will be useful,
14// *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15// *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// *  GNU General Public License for more details.
17// *
18// *  You should have received a copy of the GNU General Public License
19// *  along with this program; if not, write to the Free Software
20// *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21// *
22// * $Id: idct_ia64_ecc.s,v 1.2 2009-02-19 17:07:29 Isibaar Exp $
23// *
24// ***************************************************************************/
25//
26// ****************************************************************************
27// *
28// *  idct_ia64_ecc.s, IA-64 optimized inverse DCT
29// *
30// *  This version was implemented during an IA-64 practical training at
31// *  the University of Karlsruhe (http://i44w3.info.uni-karlsruhe.de/)
32// *
33// ****************************************************************************
34//
35
36addreg1 = r14
37addreg2 = r15
38c0 = f32
39c1 = f33
40c2 = f34
41c3 = f35
42c4 = f36
43c5 = f37
44c6 = f38
45c7 = f39
46c8 = f40
47c9 = f41
48c10 = f42
49c11 = f43
50c12 = f44
51c13 = f45
52c14 = f46
53c15 = f47
54.sdata
55.align 16
56.data_c0:
57real4 0.353553390593273730857504233427, 0.353553390593273730857504233427
58.data_c1:
59real4 -2.414213562373094923430016933708, -2.414213562373094923430016933708
60.align 16
61.data_c2:
62real4 -0.414213562373095034452319396223, -0.414213562373095034452319396223
63.data_c3:
64real4 0.198912367379658006072418174881, 0.198912367379658006072418174881
65.align 16
66.data_c4:
67real4 5.027339492125848074977056967327, 5.027339492125848074977056967327
68.data_c5:
69real4 0.668178637919298878955487452913, 0.668178637919298878955487452913
70.align 16
71.data_c6:
72real4 1.496605762665489169904731170391, 1.496605762665489169904731170391
73.data_c7:
74real4 0.461939766255643369241568052530, 0.461939766255643369241568052530
75.align 16
76.data_c8:
77real4 0.191341716182544890889616340246, 0.191341716182544890889616340246
78.data_c9:
79real4 0.847759065022573476966272210120, 0.847759065022573476966272210120
80.align 16
81.data_c10:
82real4 2.847759065022573476966272210120, 2.847759065022573476966272210120
83.data_c11:
84real4 5.027339492125848074977056967327, 5.027339492125848074977056967327
85.align 16
86.data_c12:
87real4 0.490392640201615215289621119155, 0.490392640201615215289621119155
88.data_c13:
89real4 0.068974844820735750627882509889, 0.068974844820735750627882509889
90.align 16
91.data_c14:
92real4 0.097545161008064124041894160655, 0.097545161008064124041894160655
93.data_c15:
94real4 1.000000000000000000000000000000, 1.000000000000000000000000000000
95
96.text
97.global idct_ia64
98.global idct_ia64_init
99.align 16
100.proc idct_ia64_init
101idct_ia64_init:
102br.ret.sptk.few b0
103.endp
104.align 16
105.proc idct_ia64
106idct_ia64:
107
108	addreg3 = r20
109	addreg4 = r21
110	addreg5 = r22
111	addreg6 = r23
112
113	one = f30
114	alloc   r16 = ar.pfs, 1, 71, 0, 0
115	addl	addreg1 = @gprel(.data_c0#), gp
116	addl	addreg2 = @gprel(.data_c2#), gp
117	;;
118	add	addreg3 = 32, addreg1
119	add	addreg4 = 32, addreg2
120	add	addreg5 = 64, addreg1
121	add	addreg6 = 64, addreg2
122	;;
123	ldfp8	c0, c1 = [addreg1]
124	ldfp8	c2, c3 = [addreg2]
125	;;
126	ldfp8	c4, c5 = [addreg3], 16
127	ldfp8	c6, c7 = [addreg4], 16
128	add	addreg1 = 96, addreg1
129	add	addreg2 = 96, addreg2
130	;;
131	ldfp8	c8, c9 = [addreg5], 16
132	ldfp8	c10, c11 = [addreg6], 16
133	;;
134	ldfp8	c12, c13 = [addreg1]
135	ldfp8	c14, c15 = [addreg2]
136	;;
137	mov	addreg1 = in0
138	fpack	one = f1, f1
139	add	addreg2 = 2, in0
140	;;
141
142	ld2  r33 = [addreg1], 4
143	ld2  r34 = [addreg2], 4
144	;;
145	ld2  r35 = [addreg1], 4
146	ld2  r36 = [addreg2], 4
147	;;
148	ld2  r37 = [addreg1], 4
149	ld2  r38 = [addreg2], 4
150	;;
151	ld2  r39 = [addreg1], 4
152	ld2  r40 = [addreg2], 4
153	;;
154	ld2  r41 = [addreg1], 4
155	ld2  r42 = [addreg2], 4
156	;;
157	ld2  r43 = [addreg1], 4
158	ld2  r44 = [addreg2], 4
159	;;
160	ld2  r45 = [addreg1], 4
161	ld2  r46 = [addreg2], 4
162	;;
163	ld2  r47 = [addreg1], 4
164	ld2  r48 = [addreg2], 4
165	;;
166	ld2  r49 = [addreg1], 4
167	ld2  r50 = [addreg2], 4
168	;;
169	ld2  r51 = [addreg1], 4
170	ld2  r52 = [addreg2], 4
171	;;
172	ld2  r53 = [addreg1], 4
173	ld2  r54 = [addreg2], 4
174	;;
175	ld2  r55 = [addreg1], 4
176	ld2  r56 = [addreg2], 4
177	;;
178	ld2  r57 = [addreg1], 4
179	ld2  r58 = [addreg2], 4
180	;;
181	ld2  r59 = [addreg1], 4
182	ld2  r60 = [addreg2], 4
183	;;
184	ld2  r61 = [addreg1], 4
185	ld2  r62 = [addreg2], 4
186	;;
187	ld2  r63 = [addreg1], 4
188	ld2  r64 = [addreg2], 4
189	;;
190	ld2  r65 = [addreg1], 4
191	ld2  r66 = [addreg2], 4
192	;;
193	ld2  r67 = [addreg1], 4
194	ld2  r68 = [addreg2], 4
195	;;
196	ld2  r69 = [addreg1], 4
197	ld2  r70 = [addreg2], 4
198	;;
199	ld2  r71 = [addreg1], 4
200	ld2  r72 = [addreg2], 4
201	;;
202	ld2  r73 = [addreg1], 4
203	ld2  r74 = [addreg2], 4
204	;;
205	ld2  r75 = [addreg1], 4
206	ld2  r76 = [addreg2], 4
207	;;
208	ld2  r77 = [addreg1], 4
209	ld2  r78 = [addreg2], 4
210	;;
211	ld2  r79 = [addreg1], 4
212	ld2  r80 = [addreg2], 4
213	;;
214	ld2  r81 = [addreg1], 4
215	ld2  r82 = [addreg2], 4
216	;;
217	ld2  r83 = [addreg1], 4
218	ld2  r84 = [addreg2], 4
219	;;
220	ld2  r85 = [addreg1], 4
221	ld2  r86 = [addreg2], 4
222	;;
223	ld2  r87 = [addreg1], 4
224	ld2  r88 = [addreg2], 4
225	;;
226	ld2  r89 = [addreg1], 4
227	ld2  r90 = [addreg2], 4
228	;;
229	ld2  r91 = [addreg1], 4
230	ld2  r92 = [addreg2], 4
231	;;
232	ld2  r93 = [addreg1], 4
233	ld2  r94 = [addreg2], 4
234	;;
235	ld2  r95 = [addreg1], 4
236	ld2  r96 = [addreg2], 4
237	;;
238	sxt2  r33 = r33
239	sxt2  r34 = r34
240	sxt2  r35 = r35
241	sxt2  r36 = r36
242	sxt2  r37 = r37
243	sxt2  r38 = r38
244	sxt2  r39 = r39
245	sxt2  r40 = r40
246	sxt2  r41 = r41
247	sxt2  r42 = r42
248	sxt2  r43 = r43
249	sxt2  r44 = r44
250	sxt2  r45 = r45
251	sxt2  r46 = r46
252	sxt2  r47 = r47
253	sxt2  r48 = r48
254	sxt2  r49 = r49
255	sxt2  r50 = r50
256	sxt2  r51 = r51
257	sxt2  r52 = r52
258	sxt2  r53 = r53
259	sxt2  r54 = r54
260	sxt2  r55 = r55
261	sxt2  r56 = r56
262	sxt2  r57 = r57
263	sxt2  r58 = r58
264	sxt2  r59 = r59
265	sxt2  r60 = r60
266	sxt2  r61 = r61
267	sxt2  r62 = r62
268	sxt2  r63 = r63
269	sxt2  r64 = r64
270	sxt2  r65 = r65
271	sxt2  r66 = r66
272	sxt2  r67 = r67
273	sxt2  r68 = r68
274	sxt2  r69 = r69
275	sxt2  r70 = r70
276	sxt2  r71 = r71
277	sxt2  r72 = r72
278	sxt2  r73 = r73
279	sxt2  r74 = r74
280	sxt2  r75 = r75
281	sxt2  r76 = r76
282	sxt2  r77 = r77
283	sxt2  r78 = r78
284	sxt2  r79 = r79
285	sxt2  r80 = r80
286	sxt2  r81 = r81
287	sxt2  r82 = r82
288	sxt2  r83 = r83
289	sxt2  r84 = r84
290	sxt2  r85 = r85
291	sxt2  r86 = r86
292	sxt2  r87 = r87
293	sxt2  r88 = r88
294	sxt2  r89 = r89
295	sxt2  r90 = r90
296	sxt2  r91 = r91
297	sxt2  r92 = r92
298	sxt2  r93 = r93
299	sxt2  r94 = r94
300	sxt2  r95 = r95
301	sxt2  r96 = r96
302	;;
303	setf.sig  f48 = r33
304	setf.sig  f49 = r34
305	setf.sig  f50 = r35
306	setf.sig  f51 = r36
307	setf.sig  f52 = r37
308	setf.sig  f53 = r38
309	setf.sig  f54 = r39
310	setf.sig  f55 = r40
311	setf.sig  f56 = r41
312	setf.sig  f57 = r42
313	setf.sig  f58 = r43
314	setf.sig  f59 = r44
315	setf.sig  f60 = r45
316	setf.sig  f61 = r46
317	setf.sig  f62 = r47
318	setf.sig  f63 = r48
319	setf.sig  f64 = r49
320	setf.sig  f65 = r50
321	setf.sig  f66 = r51
322	setf.sig  f67 = r52
323	setf.sig  f68 = r53
324	setf.sig  f69 = r54
325	setf.sig  f70 = r55
326	setf.sig  f71 = r56
327	setf.sig  f72 = r57
328	setf.sig  f73 = r58
329	setf.sig  f74 = r59
330	setf.sig  f75 = r60
331	setf.sig  f76 = r61
332	setf.sig  f77 = r62
333	setf.sig  f78 = r63
334	setf.sig  f79 = r64
335	setf.sig  f80 = r65
336	setf.sig  f81 = r66
337	setf.sig  f82 = r67
338	setf.sig  f83 = r68
339	setf.sig  f84 = r69
340	setf.sig  f85 = r70
341	setf.sig  f86 = r71
342	setf.sig  f87 = r72
343	setf.sig  f88 = r73
344	setf.sig  f89 = r74
345	setf.sig  f90 = r75
346	setf.sig  f91 = r76
347	setf.sig  f92 = r77
348	setf.sig  f93 = r78
349	setf.sig  f94 = r79
350	setf.sig  f95 = r80
351	setf.sig  f96 = r81
352	setf.sig  f97 = r82
353	setf.sig  f98 = r83
354	setf.sig  f99 = r84
355	setf.sig  f100 = r85
356	setf.sig  f101 = r86
357	setf.sig  f102 = r87
358	setf.sig  f103 = r88
359	setf.sig  f104 = r89
360	setf.sig  f105 = r90
361	setf.sig  f106 = r91
362	setf.sig  f107 = r92
363	setf.sig  f108 = r93
364	setf.sig  f109 = r94
365	setf.sig  f110 = r95
366	setf.sig  f111 = r96
367	;;
368	fcvt.xf  f48 = f48
369	fcvt.xf  f49 = f49
370	fcvt.xf  f50 = f50
371	fcvt.xf  f51 = f51
372	fcvt.xf  f52 = f52
373	fcvt.xf  f53 = f53
374	fcvt.xf  f54 = f54
375	fcvt.xf  f55 = f55
376	fcvt.xf  f56 = f56
377	fcvt.xf  f57 = f57
378	fcvt.xf  f58 = f58
379	fcvt.xf  f59 = f59
380	fcvt.xf  f60 = f60
381	fcvt.xf  f61 = f61
382	fcvt.xf  f62 = f62
383	fcvt.xf  f63 = f63
384	fcvt.xf  f64 = f64
385	fcvt.xf  f65 = f65
386	fcvt.xf  f66 = f66
387	fcvt.xf  f67 = f67
388	fcvt.xf  f68 = f68
389	fcvt.xf  f69 = f69
390	fcvt.xf  f70 = f70
391	fcvt.xf  f71 = f71
392	fcvt.xf  f72 = f72
393	fcvt.xf  f73 = f73
394	fcvt.xf  f74 = f74
395	fcvt.xf  f75 = f75
396	fcvt.xf  f76 = f76
397	fcvt.xf  f77 = f77
398	fcvt.xf  f78 = f78
399	fcvt.xf  f79 = f79
400	fcvt.xf  f80 = f80
401	fcvt.xf  f81 = f81
402	fcvt.xf  f82 = f82
403	fcvt.xf  f83 = f83
404	fcvt.xf  f84 = f84
405	fcvt.xf  f85 = f85
406	fcvt.xf  f86 = f86
407	fcvt.xf  f87 = f87
408	fcvt.xf  f88 = f88
409	fcvt.xf  f89 = f89
410	fcvt.xf  f90 = f90
411	fcvt.xf  f91 = f91
412	fcvt.xf  f92 = f92
413	fcvt.xf  f93 = f93
414	fcvt.xf  f94 = f94
415	fcvt.xf  f95 = f95
416	fcvt.xf  f96 = f96
417	fcvt.xf  f97 = f97
418	fcvt.xf  f98 = f98
419	fcvt.xf  f99 = f99
420	fcvt.xf  f100 = f100
421	fcvt.xf  f101 = f101
422	fcvt.xf  f102 = f102
423	fcvt.xf  f103 = f103
424	fcvt.xf  f104 = f104
425	fcvt.xf  f105 = f105
426	fcvt.xf  f106 = f106
427	fcvt.xf  f107 = f107
428	fcvt.xf  f108 = f108
429	fcvt.xf  f109 = f109
430	fcvt.xf  f110 = f110
431	fcvt.xf  f111 = f111
432	;;
433	fpack    f48 = f48, f49
434	;;
435	fpack    f49 = f50, f51
436	;;
437	fpack    f50 = f52, f53
438	;;
439	fpack    f51 = f54, f55
440	;;
441	fpack    f52 = f56, f57
442	;;
443	fpack    f53 = f58, f59
444	;;
445	fpack    f54 = f60, f61
446	;;
447	fpack    f55 = f62, f63
448	;;
449	fpack    f56 = f64, f65
450	;;
451	fpack    f57 = f66, f67
452	;;
453	fpack    f58 = f68, f69
454	;;
455	fpack    f59 = f70, f71
456	;;
457	fpack    f60 = f72, f73
458	;;
459	fpack    f61 = f74, f75
460	;;
461	fpack    f62 = f76, f77
462	;;
463	fpack    f63 = f78, f79
464	;;
465	fpack    f64 = f80, f81
466	;;
467	fpack    f65 = f82, f83
468	;;
469	fpack    f66 = f84, f85
470	;;
471	fpack    f67 = f86, f87
472	;;
473	fpack    f68 = f88, f89
474	;;
475	fpack    f69 = f90, f91
476	;;
477	fpack    f70 = f92, f93
478	;;
479	fpack    f71 = f94, f95
480	;;
481	fpack    f72 = f96, f97
482	;;
483	fpack    f73 = f98, f99
484	;;
485	fpack    f74 = f100, f101
486	;;
487	fpack    f75 = f102, f103
488	;;
489	fpack    f76 = f104, f105
490	;;
491	fpack    f77 = f106, f107
492	;;
493	fpack    f78 = f108, f109
494	;;
495	fpack    f79 = f110, f111
496	;;
497	fpma    f48 = f48, c0, f0
498	fpma    f49 = f49, c0, f0
499	fpma    f50 = f50, c0, f0
500	fpma    f51 = f51, c0, f0
501	;;
502
503	// before pre shuffle
504	//  48 49 50 51
505	//  52 53 54 55
506	//  56 57 58 59
507	//  60 61 62 63
508	//  64 65 66 67
509	//  68 69 70 71
510	//  72 73 74 75
511	//  76 77 78 79
512
513	// after pre shuffle
514	//  48 49 50 51
515	//  64 53 54 55
516	//  56 57 58 59
517	//  72 61 62 63
518	//  52 65 66 67
519	//  76 69 70 71
520	//  60 73 74 75
521	//  68 77 78 79
522	// (f80, f64) = (f48, f64) $ (c0, c0), (line 0, 1)
523	fpma    f80 = f64, c0, f48
524	fpnma   f64 = f64, c0, f48
525	;;
526	// (f48, f72) = (f56, f72) $ (c1, c2), (line 2, 3)
527	fpma    f48 = f72, c1, f56
528	fpnma   f72 = f72, c2, f56
529	;;
530	// (f56, f76) = (f52, f76) $ (c3, c4), (line 4, 5)
531	fpma    f56 = f76, c3, f52
532	fpnma   f76 = f76, c4, f52
533	;;
534	// (f52, f68) = (f60, f68) $ (c5, c6), (line 6, 7)
535	fpma    f52 = f68, c5, f60
536	fpnma   f68 = f68, c6, f60
537	;;
538	;;
539	// (f60, f72) = (f80, f72) $ (c7, c7), (line 0, 3)
540	fpma    f60 = f72, c7, f80
541	fpnma   f72 = f72, c7, f80
542	;;
543	// (f80, f48) = (f64, f48) $ (c8, c8), (line 1, 2)
544	fpma    f80 = f48, c8, f64
545	fpnma   f48 = f48, c8, f64
546	;;
547	// (f64, f52) = (f56, f52) $ (c9, c9), (line 4, 6)
548	fpma    f64 = f52, c9, f56
549	fpnma   f52 = f52, c9, f56
550	;;
551	// (f56, f68) = (f76, f68) $ (c10, c10), (line 5, 7)
552	fpma    f56 = f68, c10, f76
553	fpnma   f68 = f68, c10, f76
554	;;
555	;;
556	// (f76, f52) = (f56, f52) $ (c11, c11), (line 5, 6)
557	fpma    f76 = f52, c11, f56
558	fpnma   f52 = f52, c11, f56
559	;;
560	// (f56, f64) = (f60, f64) $ (c12, c12), (line 0, 4)
561	fpma    f56 = f64, c12, f60
562	fpnma   f64 = f64, c12, f60
563	;;
564	// (f60, f68) = (f72, f68) $ (c14, c14), (line 3, 7)
565	fpma    f60 = f68, c14, f72
566	fpnma   f68 = f68, c14, f72
567	;;
568	;;
569	// (f72, f76) = (f80, f76) $ (c13, c13), (line 1, 5)
570	fpma    f72 = f76, c13, f80
571	fpnma   f76 = f76, c13, f80
572	;;
573	// (f80, f52) = (f48, f52) $ (c13, c13), (line 2, 6)
574	fpma    f80 = f52, c13, f48
575	fpnma   f52 = f52, c13, f48
576	;;
577
578	// before post shuffle
579	//  56 49 50 51
580	//  72 53 54 55
581	//  80 57 58 59
582	//  60 61 62 63
583	//  64 65 66 67
584	//  76 69 70 71
585	//  52 73 74 75
586	//  68 77 78 79
587
588	// after post shuffle
589	//  56 49 50 51
590	//  72 53 54 55
591	//  52 57 58 59
592	//  60 61 62 63
593	//  68 65 66 67
594	//  80 69 70 71
595	//  76 73 74 75
596	//  64 77 78 79
597
598	// before pre shuffle
599	//  56 49 50 51
600	//  72 53 54 55
601	//  52 57 58 59
602	//  60 61 62 63
603	//  68 65 66 67
604	//  80 69 70 71
605	//  76 73 74 75
606	//  64 77 78 79
607
608	// after pre shuffle
609	//  56 49 50 51
610	//  72 65 54 55
611	//  52 57 58 59
612	//  60 73 62 63
613	//  68 53 66 67
614	//  80 77 70 71
615	//  76 61 74 75
616	//  64 69 78 79
617	// (f48, f65) = (f49, f65) $ (c0, c0), (line 0, 1)
618	fpma    f48 = f65, c0, f49
619	fpnma   f65 = f65, c0, f49
620	;;
621	// (f49, f73) = (f57, f73) $ (c1, c2), (line 2, 3)
622	fpma    f49 = f73, c1, f57
623	fpnma   f73 = f73, c2, f57
624	;;
625	// (f57, f77) = (f53, f77) $ (c3, c4), (line 4, 5)
626	fpma    f57 = f77, c3, f53
627	fpnma   f77 = f77, c4, f53
628	;;
629	// (f53, f69) = (f61, f69) $ (c5, c6), (line 6, 7)
630	fpma    f53 = f69, c5, f61
631	fpnma   f69 = f69, c6, f61
632	;;
633	;;
634	// (f61, f73) = (f48, f73) $ (c7, c7), (line 0, 3)
635	fpma    f61 = f73, c7, f48
636	fpnma   f73 = f73, c7, f48
637	;;
638	// (f48, f49) = (f65, f49) $ (c8, c8), (line 1, 2)
639	fpma    f48 = f49, c8, f65
640	fpnma   f49 = f49, c8, f65
641	;;
642	// (f65, f53) = (f57, f53) $ (c9, c9), (line 4, 6)
643	fpma    f65 = f53, c9, f57
644	fpnma   f53 = f53, c9, f57
645	;;
646	// (f57, f69) = (f77, f69) $ (c10, c10), (line 5, 7)
647	fpma    f57 = f69, c10, f77
648	fpnma   f69 = f69, c10, f77
649	;;
650	;;
651	// (f77, f53) = (f57, f53) $ (c11, c11), (line 5, 6)
652	fpma    f77 = f53, c11, f57
653	fpnma   f53 = f53, c11, f57
654	;;
655	// (f57, f65) = (f61, f65) $ (c12, c12), (line 0, 4)
656	fpma    f57 = f65, c12, f61
657	fpnma   f65 = f65, c12, f61
658	;;
659	// (f61, f69) = (f73, f69) $ (c14, c14), (line 3, 7)
660	fpma    f61 = f69, c14, f73
661	fpnma   f69 = f69, c14, f73
662	;;
663	;;
664	// (f73, f77) = (f48, f77) $ (c13, c13), (line 1, 5)
665	fpma    f73 = f77, c13, f48
666	fpnma   f77 = f77, c13, f48
667	;;
668	// (f48, f53) = (f49, f53) $ (c13, c13), (line 2, 6)
669	fpma    f48 = f53, c13, f49
670	fpnma   f53 = f53, c13, f49
671	;;
672
673	// before post shuffle
674	//  56 57 50 51
675	//  72 73 54 55
676	//  52 48 58 59
677	//  60 61 62 63
678	//  68 65 66 67
679	//  80 77 70 71
680	//  76 53 74 75
681	//  64 69 78 79
682
683	// after post shuffle
684	//  56 57 50 51
685	//  72 73 54 55
686	//  52 53 58 59
687	//  60 61 62 63
688	//  68 69 66 67
689	//  80 48 70 71
690	//  76 77 74 75
691	//  64 65 78 79
692
693	// before pre shuffle
694	//  56 57 50 51
695	//  72 73 54 55
696	//  52 53 58 59
697	//  60 61 62 63
698	//  68 69 66 67
699	//  80 48 70 71
700	//  76 77 74 75
701	//  64 65 78 79
702
703	// after pre shuffle
704	//  56 57 50 51
705	//  72 73 66 55
706	//  52 53 58 59
707	//  60 61 74 63
708	//  68 69 54 67
709	//  80 48 78 71
710	//  76 77 62 75
711	//  64 65 70 79
712	// (f49, f66) = (f50, f66) $ (c0, c0), (line 0, 1)
713	fpma    f49 = f66, c0, f50
714	fpnma   f66 = f66, c0, f50
715	;;
716	// (f50, f74) = (f58, f74) $ (c1, c2), (line 2, 3)
717	fpma    f50 = f74, c1, f58
718	fpnma   f74 = f74, c2, f58
719	;;
720	// (f58, f78) = (f54, f78) $ (c3, c4), (line 4, 5)
721	fpma    f58 = f78, c3, f54
722	fpnma   f78 = f78, c4, f54
723	;;
724	// (f54, f70) = (f62, f70) $ (c5, c6), (line 6, 7)
725	fpma    f54 = f70, c5, f62
726	fpnma   f70 = f70, c6, f62
727	;;
728	;;
729	// (f62, f74) = (f49, f74) $ (c7, c7), (line 0, 3)
730	fpma    f62 = f74, c7, f49
731	fpnma   f74 = f74, c7, f49
732	;;
733	// (f49, f50) = (f66, f50) $ (c8, c8), (line 1, 2)
734	fpma    f49 = f50, c8, f66
735	fpnma   f50 = f50, c8, f66
736	;;
737	// (f66, f54) = (f58, f54) $ (c9, c9), (line 4, 6)
738	fpma    f66 = f54, c9, f58
739	fpnma   f54 = f54, c9, f58
740	;;
741	// (f58, f70) = (f78, f70) $ (c10, c10), (line 5, 7)
742	fpma    f58 = f70, c10, f78
743	fpnma   f70 = f70, c10, f78
744	;;
745	;;
746	// (f78, f54) = (f58, f54) $ (c11, c11), (line 5, 6)
747	fpma    f78 = f54, c11, f58
748	fpnma   f54 = f54, c11, f58
749	;;
750	// (f58, f66) = (f62, f66) $ (c12, c12), (line 0, 4)
751	fpma    f58 = f66, c12, f62
752	fpnma   f66 = f66, c12, f62
753	;;
754	// (f62, f70) = (f74, f70) $ (c14, c14), (line 3, 7)
755	fpma    f62 = f70, c14, f74
756	fpnma   f70 = f70, c14, f74
757	;;
758	;;
759	// (f74, f78) = (f49, f78) $ (c13, c13), (line 1, 5)
760	fpma    f74 = f78, c13, f49
761	fpnma   f78 = f78, c13, f49
762	;;
763	// (f49, f54) = (f50, f54) $ (c13, c13), (line 2, 6)
764	fpma    f49 = f54, c13, f50
765	fpnma   f54 = f54, c13, f50
766	;;
767
768	// before post shuffle
769	//  56 57 58 51
770	//  72 73 74 55
771	//  52 53 49 59
772	//  60 61 62 63
773	//  68 69 66 67
774	//  80 48 78 71
775	//  76 77 54 75
776	//  64 65 70 79
777
778	// after post shuffle
779	//  56 57 58 51
780	//  72 73 74 55
781	//  52 53 54 59
782	//  60 61 62 63
783	//  68 69 70 67
784	//  80 48 49 71
785	//  76 77 78 75
786	//  64 65 66 79
787
788	// before pre shuffle
789	//  56 57 58 51
790	//  72 73 74 55
791	//  52 53 54 59
792	//  60 61 62 63
793	//  68 69 70 67
794	//  80 48 49 71
795	//  76 77 78 75
796	//  64 65 66 79
797
798	// after pre shuffle
799	//  56 57 58 51
800	//  72 73 74 67
801	//  52 53 54 59
802	//  60 61 62 75
803	//  68 69 70 55
804	//  80 48 49 79
805	//  76 77 78 63
806	//  64 65 66 71
807	// (f50, f67) = (f51, f67) $ (c0, c0), (line 0, 1)
808	fpma    f50 = f67, c0, f51
809	fpnma   f67 = f67, c0, f51
810	;;
811	// (f51, f75) = (f59, f75) $ (c1, c2), (line 2, 3)
812	fpma    f51 = f75, c1, f59
813	fpnma   f75 = f75, c2, f59
814	;;
815	// (f59, f79) = (f55, f79) $ (c3, c4), (line 4, 5)
816	fpma    f59 = f79, c3, f55
817	fpnma   f79 = f79, c4, f55
818	;;
819	// (f55, f71) = (f63, f71) $ (c5, c6), (line 6, 7)
820	fpma    f55 = f71, c5, f63
821	fpnma   f71 = f71, c6, f63
822	;;
823	;;
824	// (f63, f75) = (f50, f75) $ (c7, c7), (line 0, 3)
825	fpma    f63 = f75, c7, f50
826	fpnma   f75 = f75, c7, f50
827	;;
828	// (f50, f51) = (f67, f51) $ (c8, c8), (line 1, 2)
829	fpma    f50 = f51, c8, f67
830	fpnma   f51 = f51, c8, f67
831	;;
832	// (f67, f55) = (f59, f55) $ (c9, c9), (line 4, 6)
833	fpma    f67 = f55, c9, f59
834	fpnma   f55 = f55, c9, f59
835	;;
836	// (f59, f71) = (f79, f71) $ (c10, c10), (line 5, 7)
837	fpma    f59 = f71, c10, f79
838	fpnma   f71 = f71, c10, f79
839	;;
840	;;
841	// (f79, f55) = (f59, f55) $ (c11, c11), (line 5, 6)
842	fpma    f79 = f55, c11, f59
843	fpnma   f55 = f55, c11, f59
844	;;
845	// (f59, f67) = (f63, f67) $ (c12, c12), (line 0, 4)
846	fpma    f59 = f67, c12, f63
847	fpnma   f67 = f67, c12, f63
848	;;
849	// (f63, f71) = (f75, f71) $ (c14, c14), (line 3, 7)
850	fpma    f63 = f71, c14, f75
851	fpnma   f71 = f71, c14, f75
852	;;
853	;;
854	// (f75, f79) = (f50, f79) $ (c13, c13), (line 1, 5)
855	fpma    f75 = f79, c13, f50
856	fpnma   f79 = f79, c13, f50
857	;;
858	// (f50, f55) = (f51, f55) $ (c13, c13), (line 2, 6)
859	fpma    f50 = f55, c13, f51
860	fpnma   f55 = f55, c13, f51
861	;;
862
863	// before post shuffle
864	//  56 57 58 59
865	//  72 73 74 75
866	//  52 53 54 50
867	//  60 61 62 63
868	//  68 69 70 67
869	//  80 48 49 79
870	//  76 77 78 55
871	//  64 65 66 71
872
873	// after post shuffle
874	//  56 57 58 59
875	//  72 73 74 75
876	//  52 53 54 55
877	//  60 61 62 63
878	//  68 69 70 71
879	//  80 48 49 50
880	//  76 77 78 79
881	//  64 65 66 67
882	;;
883	fmix.r  f51 = f56, f72
884	fmix.r  f81 = f57, f73
885	fmix.r  f82 = f58, f74
886	fmix.r  f83 = f59, f75
887	fmix.r  f84 = f52, f60
888	fmix.r  f85 = f53, f61
889	fmix.r  f86 = f54, f62
890	fmix.r  f87 = f55, f63
891	fmix.r  f88 = f68, f80
892	fmix.r  f89 = f69, f48
893	fmix.r  f90 = f70, f49
894	fmix.r  f91 = f71, f50
895	fmix.r  f92 = f76, f64
896	fmix.r  f93 = f77, f65
897	fmix.r  f94 = f78, f66
898	fmix.r  f95 = f79, f67
899	;;
900	fmix.l  f56 = f56, f72
901	fmix.l  f57 = f57, f73
902	fmix.l  f58 = f58, f74
903	fmix.l  f59 = f59, f75
904	fmix.l  f52 = f52, f60
905	fmix.l  f53 = f53, f61
906	fmix.l  f54 = f54, f62
907	fmix.l  f55 = f55, f63
908	fmix.l  f68 = f68, f80
909	fmix.l  f69 = f69, f48
910	fmix.l  f70 = f70, f49
911	fmix.l  f71 = f71, f50
912	fmix.l  f76 = f76, f64
913	fmix.l  f77 = f77, f65
914	fmix.l  f78 = f78, f66
915	fmix.l  f79 = f79, f67
916	;;
917	fpma    f56 = f56, c0, f0
918	fpma    f52 = f52, c0, f0
919	fpma    f68 = f68, c0, f0
920	fpma    f76 = f76, c0, f0
921	;;
922
923	// before pre shuffle
924	//  56 52 68 76
925	//  51 84 88 92
926	//  57 53 69 77
927	//  81 85 89 93
928	//  58 54 70 78
929	//  82 86 90 94
930	//  59 55 71 79
931	//  83 87 91 95
932
933	// after pre shuffle
934	//  56 52 68 76
935	//  58 84 88 92
936	//  57 53 69 77
937	//  59 85 89 93
938	//  51 54 70 78
939	//  83 86 90 94
940	//  81 55 71 79
941	//  82 87 91 95
942	// (f48, f58) = (f56, f58) $ (c0, c0), (line 0, 1)
943	fpma    f48 = f58, c0, f56
944	fpnma   f58 = f58, c0, f56
945	;;
946	// (f49, f59) = (f57, f59) $ (c1, c2), (line 2, 3)
947	fpma    f49 = f59, c1, f57
948	fpnma   f59 = f59, c2, f57
949	;;
950	// (f50, f83) = (f51, f83) $ (c3, c4), (line 4, 5)
951	fpma    f50 = f83, c3, f51
952	fpnma   f83 = f83, c4, f51
953	;;
954	// (f51, f82) = (f81, f82) $ (c5, c6), (line 6, 7)
955	fpma    f51 = f82, c5, f81
956	fpnma   f82 = f82, c6, f81
957	;;
958	;;
959	// (f56, f59) = (f48, f59) $ (c7, c7), (line 0, 3)
960	fpma    f56 = f59, c7, f48
961	fpnma   f59 = f59, c7, f48
962	;;
963	// (f48, f49) = (f58, f49) $ (c8, c8), (line 1, 2)
964	fpma    f48 = f49, c8, f58
965	fpnma   f49 = f49, c8, f58
966	;;
967	// (f57, f51) = (f50, f51) $ (c9, c9), (line 4, 6)
968	fpma    f57 = f51, c9, f50
969	fpnma   f51 = f51, c9, f50
970	;;
971	// (f50, f82) = (f83, f82) $ (c10, c10), (line 5, 7)
972	fpma    f50 = f82, c10, f83
973	fpnma   f82 = f82, c10, f83
974	;;
975	;;
976	// (f58, f51) = (f50, f51) $ (c11, c11), (line 5, 6)
977	fpma    f58 = f51, c11, f50
978	fpnma   f51 = f51, c11, f50
979	;;
980	// (f50, f57) = (f56, f57) $ (c12, c12), (line 0, 4)
981	fpma    f50 = f57, c12, f56
982	fpnma   f57 = f57, c12, f56
983	;;
984	// (f56, f82) = (f59, f82) $ (c14, c14), (line 3, 7)
985	fpma    f56 = f82, c14, f59
986	fpnma   f82 = f82, c14, f59
987	;;
988	;;
989	// (f59, f58) = (f48, f58) $ (c13, c13), (line 1, 5)
990	fpma    f59 = f58, c13, f48
991	fpnma   f58 = f58, c13, f48
992	;;
993	// (f48, f51) = (f49, f51) $ (c13, c13), (line 2, 6)
994	fpma    f48 = f51, c13, f49
995	fpnma   f51 = f51, c13, f49
996	;;
997
998	// before post shuffle
999	//  50 52 68 76
1000	//  59 84 88 92
1001	//  48 53 69 77
1002	//  56 85 89 93
1003	//  57 54 70 78
1004	//  58 86 90 94
1005	//  51 55 71 79
1006	//  82 87 91 95
1007
1008	// after post shuffle
1009	//  50 52 68 76
1010	//  59 84 88 92
1011	//  51 53 69 77
1012	//  56 85 89 93
1013	//  82 54 70 78
1014	//  48 86 90 94
1015	//  58 55 71 79
1016	//  57 87 91 95
1017
1018	// before pre shuffle
1019	//  50 52 68 76
1020	//  59 84 88 92
1021	//  51 53 69 77
1022	//  56 85 89 93
1023	//  82 54 70 78
1024	//  48 86 90 94
1025	//  58 55 71 79
1026	//  57 87 91 95
1027
1028	// after pre shuffle
1029	//  50 52 68 76
1030	//  59 54 88 92
1031	//  51 53 69 77
1032	//  56 55 89 93
1033	//  82 84 70 78
1034	//  48 87 90 94
1035	//  58 85 71 79
1036	//  57 86 91 95
1037	// (f49, f54) = (f52, f54) $ (c0, c0), (line 0, 1)
1038	fpma    f49 = f54, c0, f52
1039	fpnma   f54 = f54, c0, f52
1040	;;
1041	// (f52, f55) = (f53, f55) $ (c1, c2), (line 2, 3)
1042	fpma    f52 = f55, c1, f53
1043	fpnma   f55 = f55, c2, f53
1044	;;
1045	// (f53, f87) = (f84, f87) $ (c3, c4), (line 4, 5)
1046	fpma    f53 = f87, c3, f84
1047	fpnma   f87 = f87, c4, f84
1048	;;
1049	// (f60, f86) = (f85, f86) $ (c5, c6), (line 6, 7)
1050	fpma    f60 = f86, c5, f85
1051	fpnma   f86 = f86, c6, f85
1052	;;
1053	;;
1054	// (f61, f55) = (f49, f55) $ (c7, c7), (line 0, 3)
1055	fpma    f61 = f55, c7, f49
1056	fpnma   f55 = f55, c7, f49
1057	;;
1058	// (f49, f52) = (f54, f52) $ (c8, c8), (line 1, 2)
1059	fpma    f49 = f52, c8, f54
1060	fpnma   f52 = f52, c8, f54
1061	;;
1062	// (f54, f60) = (f53, f60) $ (c9, c9), (line 4, 6)
1063	fpma    f54 = f60, c9, f53
1064	fpnma   f60 = f60, c9, f53
1065	;;
1066	// (f53, f86) = (f87, f86) $ (c10, c10), (line 5, 7)
1067	fpma    f53 = f86, c10, f87
1068	fpnma   f86 = f86, c10, f87
1069	;;
1070	;;
1071	// (f62, f60) = (f53, f60) $ (c11, c11), (line 5, 6)
1072	fpma    f62 = f60, c11, f53
1073	fpnma   f60 = f60, c11, f53
1074	;;
1075	// (f53, f54) = (f61, f54) $ (c12, c12), (line 0, 4)
1076	fpma    f53 = f54, c12, f61
1077	fpnma   f54 = f54, c12, f61
1078	;;
1079	// (f61, f86) = (f55, f86) $ (c14, c14), (line 3, 7)
1080	fpma    f61 = f86, c14, f55
1081	fpnma   f86 = f86, c14, f55
1082	;;
1083	;;
1084	// (f55, f62) = (f49, f62) $ (c13, c13), (line 1, 5)
1085	fpma    f55 = f62, c13, f49
1086	fpnma   f62 = f62, c13, f49
1087	;;
1088	// (f49, f60) = (f52, f60) $ (c13, c13), (line 2, 6)
1089	fpma    f49 = f60, c13, f52
1090	fpnma   f60 = f60, c13, f52
1091	;;
1092
1093	// before post shuffle
1094	//  50 53 68 76
1095	//  59 55 88 92
1096	//  51 49 69 77
1097	//  56 61 89 93
1098	//  82 54 70 78
1099	//  48 62 90 94
1100	//  58 60 71 79
1101	//  57 86 91 95
1102
1103	// after post shuffle
1104	//  50 53 68 76
1105	//  59 55 88 92
1106	//  51 60 69 77
1107	//  56 61 89 93
1108	//  82 86 70 78
1109	//  48 49 90 94
1110	//  58 62 71 79
1111	//  57 54 91 95
1112
1113	// before pre shuffle
1114	//  50 53 68 76
1115	//  59 55 88 92
1116	//  51 60 69 77
1117	//  56 61 89 93
1118	//  82 86 70 78
1119	//  48 49 90 94
1120	//  58 62 71 79
1121	//  57 54 91 95
1122
1123	// after pre shuffle
1124	//  50 53 68 76
1125	//  59 55 70 92
1126	//  51 60 69 77
1127	//  56 61 71 93
1128	//  82 86 88 78
1129	//  48 49 91 94
1130	//  58 62 89 79
1131	//  57 54 90 95
1132	// (f52, f70) = (f68, f70) $ (c0, c0), (line 0, 1)
1133	fpma    f52 = f70, c0, f68
1134	fpnma   f70 = f70, c0, f68
1135	;;
1136	// (f63, f71) = (f69, f71) $ (c1, c2), (line 2, 3)
1137	fpma    f63 = f71, c1, f69
1138	fpnma   f71 = f71, c2, f69
1139	;;
1140	// (f64, f91) = (f88, f91) $ (c3, c4), (line 4, 5)
1141	fpma    f64 = f91, c3, f88
1142	fpnma   f91 = f91, c4, f88
1143	;;
1144	// (f65, f90) = (f89, f90) $ (c5, c6), (line 6, 7)
1145	fpma    f65 = f90, c5, f89
1146	fpnma   f90 = f90, c6, f89
1147	;;
1148	;;
1149	// (f66, f71) = (f52, f71) $ (c7, c7), (line 0, 3)
1150	fpma    f66 = f71, c7, f52
1151	fpnma   f71 = f71, c7, f52
1152	;;
1153	// (f52, f63) = (f70, f63) $ (c8, c8), (line 1, 2)
1154	fpma    f52 = f63, c8, f70
1155	fpnma   f63 = f63, c8, f70
1156	;;
1157	// (f67, f65) = (f64, f65) $ (c9, c9), (line 4, 6)
1158	fpma    f67 = f65, c9, f64
1159	fpnma   f65 = f65, c9, f64
1160	;;
1161	// (f64, f90) = (f91, f90) $ (c10, c10), (line 5, 7)
1162	fpma    f64 = f90, c10, f91
1163	fpnma   f90 = f90, c10, f91
1164	;;
1165	;;
1166	// (f68, f65) = (f64, f65) $ (c11, c11), (line 5, 6)
1167	fpma    f68 = f65, c11, f64
1168	fpnma   f65 = f65, c11, f64
1169	;;
1170	// (f64, f67) = (f66, f67) $ (c12, c12), (line 0, 4)
1171	fpma    f64 = f67, c12, f66
1172	fpnma   f67 = f67, c12, f66
1173	;;
1174	// (f66, f90) = (f71, f90) $ (c14, c14), (line 3, 7)
1175	fpma    f66 = f90, c14, f71
1176	fpnma   f90 = f90, c14, f71
1177	;;
1178	;;
1179	// (f69, f68) = (f52, f68) $ (c13, c13), (line 1, 5)
1180	fpma    f69 = f68, c13, f52
1181	fpnma   f68 = f68, c13, f52
1182	;;
1183	// (f52, f65) = (f63, f65) $ (c13, c13), (line 2, 6)
1184	fpma    f52 = f65, c13, f63
1185	fpnma   f65 = f65, c13, f63
1186	;;
1187
1188	// before post shuffle
1189	//  50 53 64 76
1190	//  59 55 69 92
1191	//  51 60 52 77
1192	//  56 61 66 93
1193	//  82 86 67 78
1194	//  48 49 68 94
1195	//  58 62 65 79
1196	//  57 54 90 95
1197
1198	// after post shuffle
1199	//  50 53 64 76
1200	//  59 55 69 92
1201	//  51 60 65 77
1202	//  56 61 66 93
1203	//  82 86 90 78
1204	//  48 49 52 94
1205	//  58 62 68 79
1206	//  57 54 67 95
1207
1208	// before pre shuffle
1209	//  50 53 64 76
1210	//  59 55 69 92
1211	//  51 60 65 77
1212	//  56 61 66 93
1213	//  82 86 90 78
1214	//  48 49 52 94
1215	//  58 62 68 79
1216	//  57 54 67 95
1217
1218	// after pre shuffle
1219	//  50 53 64 76
1220	//  59 55 69 78
1221	//  51 60 65 77
1222	//  56 61 66 79
1223	//  82 86 90 92
1224	//  48 49 52 95
1225	//  58 62 68 93
1226	//  57 54 67 94
1227	// (f63, f78) = (f76, f78) $ (c0, c0), (line 0, 1)
1228	fpma    f63 = f78, c0, f76
1229	fpnma   f78 = f78, c0, f76
1230	;;
1231	// (f70, f79) = (f77, f79) $ (c1, c2), (line 2, 3)
1232	fpma    f70 = f79, c1, f77
1233	fpnma   f79 = f79, c2, f77
1234	;;
1235	// (f71, f95) = (f92, f95) $ (c3, c4), (line 4, 5)
1236	fpma    f71 = f95, c3, f92
1237	fpnma   f95 = f95, c4, f92
1238	;;
1239	// (f72, f94) = (f93, f94) $ (c5, c6), (line 6, 7)
1240	fpma    f72 = f94, c5, f93
1241	fpnma   f94 = f94, c6, f93
1242	;;
1243	;;
1244	// (f73, f79) = (f63, f79) $ (c7, c7), (line 0, 3)
1245	fpma    f73 = f79, c7, f63
1246	fpnma   f79 = f79, c7, f63
1247	;;
1248	// (f63, f70) = (f78, f70) $ (c8, c8), (line 1, 2)
1249	fpma    f63 = f70, c8, f78
1250	fpnma   f70 = f70, c8, f78
1251	;;
1252	// (f74, f72) = (f71, f72) $ (c9, c9), (line 4, 6)
1253	fpma    f74 = f72, c9, f71
1254	fpnma   f72 = f72, c9, f71
1255	;;
1256	// (f71, f94) = (f95, f94) $ (c10, c10), (line 5, 7)
1257	fpma    f71 = f94, c10, f95
1258	fpnma   f94 = f94, c10, f95
1259	;;
1260	;;
1261	// (f75, f72) = (f71, f72) $ (c11, c11), (line 5, 6)
1262	fpma    f75 = f72, c11, f71
1263	fpnma   f72 = f72, c11, f71
1264	;;
1265	// (f71, f74) = (f73, f74) $ (c12, c12), (line 0, 4)
1266	fpma    f71 = f74, c12, f73
1267	fpnma   f74 = f74, c12, f73
1268	;;
1269	// (f73, f94) = (f79, f94) $ (c14, c14), (line 3, 7)
1270	fpma    f73 = f94, c14, f79
1271	fpnma   f94 = f94, c14, f79
1272	;;
1273	;;
1274	// (f76, f75) = (f63, f75) $ (c13, c13), (line 1, 5)
1275	fpma    f76 = f75, c13, f63
1276	fpnma   f75 = f75, c13, f63
1277	;;
1278	// (f63, f72) = (f70, f72) $ (c13, c13), (line 2, 6)
1279	fpma    f63 = f72, c13, f70
1280	fpnma   f72 = f72, c13, f70
1281	;;
1282
1283	// before post shuffle
1284	//  50 53 64 71
1285	//  59 55 69 76
1286	//  51 60 65 63
1287	//  56 61 66 73
1288	//  82 86 90 74
1289	//  48 49 52 75
1290	//  58 62 68 72
1291	//  57 54 67 94
1292
1293	// after post shuffle
1294	//  50 53 64 71
1295	//  59 55 69 76
1296	//  51 60 65 72
1297	//  56 61 66 73
1298	//  82 86 90 94
1299	//  48 49 52 63
1300	//  58 62 68 75
1301	//  57 54 67 74
1302	;;
1303	fmix.r  f70 = f50, f59
1304	fmix.r  f77 = f53, f55
1305	fmix.r  f78 = f64, f69
1306	fmix.r  f79 = f71, f76
1307	fmix.r  f80 = f51, f56
1308	fmix.r  f81 = f60, f61
1309	fmix.r  f83 = f65, f66
1310	fmix.r  f84 = f72, f73
1311	fmix.r  f85 = f82, f48
1312	fmix.r  f87 = f86, f49
1313	fmix.r  f88 = f90, f52
1314	fmix.r  f89 = f94, f63
1315	fmix.r  f91 = f58, f57
1316	fmix.r  f92 = f62, f54
1317	fmix.r  f93 = f68, f67
1318	fmix.r  f95 = f75, f74
1319	;;
1320	fmix.l  f50 = f50, f59
1321	fmix.l  f53 = f53, f55
1322	fmix.l  f64 = f64, f69
1323	fmix.l  f71 = f71, f76
1324	fmix.l  f51 = f51, f56
1325	fmix.l  f60 = f60, f61
1326	fmix.l  f65 = f65, f66
1327	fmix.l  f72 = f72, f73
1328	fmix.l  f82 = f82, f48
1329	fmix.l  f86 = f86, f49
1330	fmix.l  f90 = f90, f52
1331	fmix.l  f94 = f94, f63
1332	fmix.l  f58 = f58, f57
1333	fmix.l  f62 = f62, f54
1334	fmix.l  f68 = f68, f67
1335	fmix.l  f75 = f75, f74
1336	;;
1337	//  50 51 82 58
1338	//  70 80 85 91
1339	//  53 60 86 62
1340	//  77 81 87 92
1341	//  64 65 90 68
1342	//  78 83 88 93
1343	//  71 72 94 75
1344	//  79 84 89 95
1345	mov   addreg1 = in0
1346	add   addreg2 = 4, in0
1347	;;
1348	fpcvt.fx f50 = f50
1349	fpcvt.fx f51 = f51
1350	fpcvt.fx f82 = f82
1351	fpcvt.fx f58 = f58
1352	fpcvt.fx f70 = f70
1353	fpcvt.fx f80 = f80
1354	fpcvt.fx f85 = f85
1355	fpcvt.fx f91 = f91
1356	fpcvt.fx f53 = f53
1357	fpcvt.fx f60 = f60
1358	fpcvt.fx f86 = f86
1359	fpcvt.fx f62 = f62
1360	fpcvt.fx f77 = f77
1361	fpcvt.fx f81 = f81
1362	fpcvt.fx f87 = f87
1363	fpcvt.fx f92 = f92
1364	fpcvt.fx f64 = f64
1365	fpcvt.fx f65 = f65
1366	fpcvt.fx f90 = f90
1367	fpcvt.fx f68 = f68
1368	fpcvt.fx f78 = f78
1369	fpcvt.fx f83 = f83
1370	fpcvt.fx f88 = f88
1371	fpcvt.fx f93 = f93
1372	fpcvt.fx f71 = f71
1373	fpcvt.fx f72 = f72
1374	fpcvt.fx f94 = f94
1375	fpcvt.fx f75 = f75
1376	fpcvt.fx f79 = f79
1377	fpcvt.fx f84 = f84
1378	fpcvt.fx f89 = f89
1379	fpcvt.fx f95 = f95
1380	;;
1381	getf.sig r33 = f50
1382	getf.sig r34 = f51
1383	getf.sig r35 = f82
1384	getf.sig r36 = f58
1385	getf.sig r37 = f70
1386	getf.sig r38 = f80
1387	getf.sig r39 = f85
1388	getf.sig r40 = f91
1389	getf.sig r41 = f53
1390	getf.sig r42 = f60
1391	getf.sig r43 = f86
1392	getf.sig r44 = f62
1393	getf.sig r45 = f77
1394	getf.sig r46 = f81
1395	getf.sig r47 = f87
1396	getf.sig r48 = f92
1397	getf.sig r49 = f64
1398	getf.sig r50 = f65
1399	getf.sig r51 = f90
1400	getf.sig r52 = f68
1401	getf.sig r53 = f78
1402	getf.sig r54 = f83
1403	getf.sig r55 = f88
1404	getf.sig r56 = f93
1405	getf.sig r57 = f71
1406	getf.sig r58 = f72
1407	getf.sig r59 = f94
1408	getf.sig r60 = f75
1409	getf.sig r61 = f79
1410	getf.sig r62 = f84
1411	getf.sig r63 = f89
1412	getf.sig r64 = f95
1413	;;
1414	shl      r33 = r33, 7
1415	shl      r34 = r34, 7
1416	shl      r35 = r35, 7
1417	shl      r36 = r36, 7
1418	shl      r37 = r37, 7
1419	shl      r38 = r38, 7
1420	shl      r39 = r39, 7
1421	shl      r40 = r40, 7
1422	shl      r41 = r41, 7
1423	shl      r42 = r42, 7
1424	shl      r43 = r43, 7
1425	shl      r44 = r44, 7
1426	shl      r45 = r45, 7
1427	shl      r46 = r46, 7
1428	shl      r47 = r47, 7
1429	shl      r48 = r48, 7
1430	shl      r49 = r49, 7
1431	shl      r50 = r50, 7
1432	shl      r51 = r51, 7
1433	shl      r52 = r52, 7
1434	shl      r53 = r53, 7
1435	shl      r54 = r54, 7
1436	shl      r55 = r55, 7
1437	shl      r56 = r56, 7
1438	shl      r57 = r57, 7
1439	shl      r58 = r58, 7
1440	shl      r59 = r59, 7
1441	shl      r60 = r60, 7
1442	shl      r61 = r61, 7
1443	shl      r62 = r62, 7
1444	shl      r63 = r63, 7
1445	shl      r64 = r64, 7
1446	;;
1447	pack4.sss r33 = r33, r0
1448	pack4.sss r34 = r34, r0
1449	pack4.sss r35 = r35, r0
1450	pack4.sss r36 = r36, r0
1451	pack4.sss r37 = r37, r0
1452	pack4.sss r38 = r38, r0
1453	pack4.sss r39 = r39, r0
1454	pack4.sss r40 = r40, r0
1455	pack4.sss r41 = r41, r0
1456	pack4.sss r42 = r42, r0
1457	pack4.sss r43 = r43, r0
1458	pack4.sss r44 = r44, r0
1459	pack4.sss r45 = r45, r0
1460	pack4.sss r46 = r46, r0
1461	pack4.sss r47 = r47, r0
1462	pack4.sss r48 = r48, r0
1463	pack4.sss r49 = r49, r0
1464	pack4.sss r50 = r50, r0
1465	pack4.sss r51 = r51, r0
1466	pack4.sss r52 = r52, r0
1467	pack4.sss r53 = r53, r0
1468	pack4.sss r54 = r54, r0
1469	pack4.sss r55 = r55, r0
1470	pack4.sss r56 = r56, r0
1471	pack4.sss r57 = r57, r0
1472	pack4.sss r58 = r58, r0
1473	pack4.sss r59 = r59, r0
1474	pack4.sss r60 = r60, r0
1475	pack4.sss r61 = r61, r0
1476	pack4.sss r62 = r62, r0
1477	pack4.sss r63 = r63, r0
1478	pack4.sss r64 = r64, r0
1479	;;
1480	pshr2    r33 = r33, 7
1481	pshr2    r34 = r34, 7
1482	pshr2    r35 = r35, 7
1483	pshr2    r36 = r36, 7
1484	pshr2    r37 = r37, 7
1485	pshr2    r38 = r38, 7
1486	pshr2    r39 = r39, 7
1487	pshr2    r40 = r40, 7
1488	pshr2    r41 = r41, 7
1489	pshr2    r42 = r42, 7
1490	pshr2    r43 = r43, 7
1491	pshr2    r44 = r44, 7
1492	pshr2    r45 = r45, 7
1493	pshr2    r46 = r46, 7
1494	pshr2    r47 = r47, 7
1495	pshr2    r48 = r48, 7
1496	pshr2    r49 = r49, 7
1497	pshr2    r50 = r50, 7
1498	pshr2    r51 = r51, 7
1499	pshr2    r52 = r52, 7
1500	pshr2    r53 = r53, 7
1501	pshr2    r54 = r54, 7
1502	pshr2    r55 = r55, 7
1503	pshr2    r56 = r56, 7
1504	pshr2    r57 = r57, 7
1505	pshr2    r58 = r58, 7
1506	pshr2    r59 = r59, 7
1507	pshr2    r60 = r60, 7
1508	pshr2    r61 = r61, 7
1509	pshr2    r62 = r62, 7
1510	pshr2    r63 = r63, 7
1511	pshr2    r64 = r64, 7
1512	;;
1513	mux2     r33 = r33, 0xe1
1514	mux2     r34 = r34, 0xe1
1515	mux2     r35 = r35, 0xe1
1516	mux2     r36 = r36, 0xe1
1517	mux2     r37 = r37, 0xe1
1518	mux2     r38 = r38, 0xe1
1519	mux2     r39 = r39, 0xe1
1520	mux2     r40 = r40, 0xe1
1521	mux2     r41 = r41, 0xe1
1522	mux2     r42 = r42, 0xe1
1523	mux2     r43 = r43, 0xe1
1524	mux2     r44 = r44, 0xe1
1525	mux2     r45 = r45, 0xe1
1526	mux2     r46 = r46, 0xe1
1527	mux2     r47 = r47, 0xe1
1528	mux2     r48 = r48, 0xe1
1529	mux2     r49 = r49, 0xe1
1530	mux2     r50 = r50, 0xe1
1531	mux2     r51 = r51, 0xe1
1532	mux2     r52 = r52, 0xe1
1533	mux2     r53 = r53, 0xe1
1534	mux2     r54 = r54, 0xe1
1535	mux2     r55 = r55, 0xe1
1536	mux2     r56 = r56, 0xe1
1537	mux2     r57 = r57, 0xe1
1538	mux2     r58 = r58, 0xe1
1539	mux2     r59 = r59, 0xe1
1540	mux2     r60 = r60, 0xe1
1541	mux2     r61 = r61, 0xe1
1542	mux2     r62 = r62, 0xe1
1543	mux2     r63 = r63, 0xe1
1544	mux2     r64 = r64, 0xe1
1545	;;
1546	st4   [addreg1] = r33, 8
1547	st4   [addreg2] = r34, 8
1548	;;
1549	st4   [addreg1] = r35, 8
1550	st4   [addreg2] = r36, 8
1551	;;
1552	st4   [addreg1] = r37, 8
1553	st4   [addreg2] = r38, 8
1554	;;
1555	st4   [addreg1] = r39, 8
1556	st4   [addreg2] = r40, 8
1557	;;
1558	st4   [addreg1] = r41, 8
1559	st4   [addreg2] = r42, 8
1560	;;
1561	st4   [addreg1] = r43, 8
1562	st4   [addreg2] = r44, 8
1563	;;
1564	st4   [addreg1] = r45, 8
1565	st4   [addreg2] = r46, 8
1566	;;
1567	st4   [addreg1] = r47, 8
1568	st4   [addreg2] = r48, 8
1569	;;
1570	st4   [addreg1] = r49, 8
1571	st4   [addreg2] = r50, 8
1572	;;
1573	st4   [addreg1] = r51, 8
1574	st4   [addreg2] = r52, 8
1575	;;
1576	st4   [addreg1] = r53, 8
1577	st4   [addreg2] = r54, 8
1578	;;
1579	st4   [addreg1] = r55, 8
1580	st4   [addreg2] = r56, 8
1581	;;
1582	st4   [addreg1] = r57, 8
1583	st4   [addreg2] = r58, 8
1584	;;
1585	st4   [addreg1] = r59, 8
1586	st4   [addreg2] = r60, 8
1587	;;
1588	st4   [addreg1] = r61, 8
1589	st4   [addreg2] = r62, 8
1590	;;
1591	st4   [addreg1] = r63, 8
1592	st4   [addreg2] = r64, 8
1593	;;
1594
1595	mov	ar.pfs = r16
1596	br.ret.sptk.few b0
1597
1598.endp
1599