1// ****************************************************************************
2// *
3// *  XVID MPEG-4 VIDEO CODEC
4// *  - IA64 halfpel refinement -
5// *
6// *  Copyright(C) 2002 Johannes Singler, Daniel Winkler
7// *
8// *  This program is free software; you can redistribute it and/or modify it
9// *  under the terms of the GNU General Public License as published by
10// *  the Free Software Foundation; either version 2 of the License, or
11// *  (at your option) any later version.
12// *
13// *  This program is distributed in the hope that it will be useful,
14// *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15// *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16// *  GNU General Public License for more details.
17// *
18// *  You should have received a copy of the GNU General Public License
19// *  along with this program; if not, write to the Free Software
20// *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21// *
22// * $Id: calc_delta_2.s,v 1.2 2009-02-19 17:07:29 Isibaar Exp $
23// *
24// ***************************************************************************/
25//
26// ****************************************************************************
27// *
28// *  calc_delta_2.s, IA-64 halfpel refinement
29// *
30// *  This version was implemented during an IA-64 practical training at
31// *  the University of Karlsruhe (http://i44w3.info.uni-karlsruhe.de/)
32// *
33// ****************************************************************************
34
35(non0_2)	mov sc[0] = 1
36(non0_3)	mov sc[1] = 1
37	;;
38	add mpr[0] = mpr[0], mpr[1]
39(non0_2)	shl sc[0] = sc[0], iFcode
40	add mpr[2] = mpr[2], mpr[3]
41(non0_3)	shl sc[1] = sc[1], iFcode
42	add mpr[4] = mpr[4], mpr[5]
43	add mpr[6] = mpr[6], mpr[7]
44	;;
45(non0_2)	add sc[0] = -1, sc[0]
46(non0_3)	add sc[1] = -1, sc[1]
47	mov ret0 = 2
48	;;
49(non0_2)	add component[0] = component[0], sc[0]
50(non0_3)	add component[1] = component[1], sc[1]
51	;;
52(non0_2)	shr component[0] = component[0], iFcode
53(non0_3)	shr component[1] = component[1], iFcode
54	add mpr[0] = mpr[0], mpr[2]
55	add mpr[4] = mpr[4], mpr[6]
56	;;
57(non0_2)	cmp.lt cg32_0, p0 = 32, component[0]
58(non0_3)	cmp.lt cg32_1, p0 = 32, component[1]
59	;;
60(cg32_0)	mov component[0] = 32
61(cg32_1)	mov component[1] = 32
62	;;
63(non0_2)	addl tabaddress[0] = @gprel(mvtab#), gp
64(non0_3)	addl tabaddress[1] = @gprel(mvtab#), gp
65	;;
66(non0_2)	shladd tabaddress[0] = component[0], 2, tabaddress[0]
67(non0_3)	shladd tabaddress[1] = component[1], 2, tabaddress[1]
68	;;
69(non0_2)	ld4 sc[0] = [tabaddress[0]]
70(non0_3)	ld4 sc[1] = [tabaddress[1]]
71	mov component[0] = dx
72	mov component[1] = dy
73	cmp.ne non0_0, p0 = 0, dx
74	cmp.gt neg_0, p0 = 0, dx
75	.pred.rel "mutex", p30, p34	//non0_0, neg_0
76
77	cmp.ne non0_1, p0 = 0, dy
78	cmp.gt neg_1, p0 = 0, dy
79	;;
80	.pred.rel "mutex", p31, p35	//non0_1, neg_1
81
82(non0_2)	add sc[0] = iFcode, sc[0]
83(non0_3)	add sc[1] = iFcode, sc[1]
84	;;
85(non0_2)	add ret0 = ret0, sc[0]
86(neg_0)	sub component[0] = 0, component[0]	//abs
87(neg_1)	sub component[1] = 0, component[1]	//abs
88	;;
89(non0_3)	add ret0 = ret0, sc[1]
90		add iSAD = mpr[0], mpr[4]
91	;;
92
93.explicit
94{.mii
95	setf.sig fmv = ret0
96(non0_0)	mov sc[0] = 1
97(non0_1)	mov sc[1] = 1
98	;;
99}
100{.mfb
101	xmpy.l fmv = fmv, fQuant
102}
103{.mii
104(non0_0)	shl sc[0] = sc[0], iFcode
105(non0_1)	shl sc[1] = sc[1], iFcode
106	;;
107}
108
109.default
110
111(non0_0)	add sc[0] = -1, sc[0]
112(non0_1)	add sc[1] = -1, sc[1]
113	;;
114(non0_0)	add component[0] = component[0], sc[0]
115(non0_1)	add component[1] = component[1], sc[1]
116	;;
117(non0_0)	shr component[0] = component[0], iFcode
118(non0_1)	shr component[1] = component[1], iFcode
119	;;
120(non0_0)	cmp.lt cg32_0, p0 = 32, component[0]
121(non0_1)	cmp.lt cg32_1, p0 = 32, component[1]
122	;;
123(cg32_0)	mov component[0] = 32
124(cg32_1)	mov component[1] = 32
125	;;
126(non0_0)	addl tabaddress[0] = @gprel(mvtab#), gp
127(non0_1)	addl tabaddress[1] = @gprel(mvtab#), gp
128	;;
129(non0_0)	shladd tabaddress[0] = component[0], 2, tabaddress[0]
130(non0_1)	shladd tabaddress[1] = component[1], 2, tabaddress[1]
131	getf.sig ret0 = fmv
132	;;
133(non0_0)	ld4 sc[0] = [tabaddress[0]]
134(non0_1)	ld4 sc[1] = [tabaddress[1]]
135	add mpr[8] = mpr[8], ret0
136	;;
137