1// -*- c++ -*-
2
3		// First, get and save our possible Bob values
4		// Assume our pixels are layed out as follows with x the calc'd bob value
5		// and the other pixels are from the current field
6		//
7		//        j a b c k		current field
8		//            x			calculated line
9		//        m d e f n		current field
10		//
11		// we calc the bob value as:
12		//		x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m)
13
14		// selected for the	smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
15
16#ifndef IS_C
17		// a,f
18		"movq    -2(%%"XBX"), %%mm0\n\t"		// value a from top left
19		"movq    2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value f from bottom right
20		"movq	%%mm0, %%mm6\n\t"
21//		pavgb	%%mm6, %%mm1					// avg(a,f), also best so far
22		V_PAVGB ("%%mm6", "%%mm1", "%%mm7", _ShiftMask)	// avg(a,f), also best so far
23        "movq	%%mm0, %%mm7\n\t"
24		"psubusb	 %%mm1, %%mm7\n\t"
25		"psubusb %%mm0, %%mm1\n\t"
26		"por		%%mm1, %%mm7\n\t"					// abs diff, also best so far
27
28		// c,d
29		"movq    2(%%"XBX"), %%mm0\n\t"		// value a from top left
30		"movq    -2(%%"XBX", %%"XCX"), %%mm1\n\t"	// value f from bottom right
31		"movq	%%mm0, %%mm2\n\t"
32//		pavgb	%%mm2, %%mm1					// avg(c,d)
33		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(c,d)
34        "movq	%%mm0, %%mm3\n\t"
35		"psubusb	%%mm1, %%mm3\n\t"
36		"psubusb %%mm0, %%mm1\n\t"
37		"por		%%mm1, %%mm3\n\t"					// abs(c,d)
38		"movq	%%mm3, %%mm1\n\t"					// keep copy
39
40		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
41		"pxor	%%mm4, %%mm4\n\t"
42		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00
43		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
44
45		"pand	%%mm3, %%mm1\n\t"			// keep only better new avg and abs
46		"pand	%%mm3, %%mm2\n\t"
47
48		"pand	%%mm4, %%mm6\n\t"
49		"pand    %%mm4, %%mm7\n\t"
50
51		"por		%%mm2, %%mm6\n\t"			// and merge new & old vals keeping best
52		"por		%%mm1, %%mm7\n\t"
53		"por		"_UVMask", %%mm7\n\t"			// but we know chroma is worthless so far
54		"pand	"_YMask", %%mm5\n\t"			// mask out chroma from here also
55
56		// j,n
57		"movq    -4(%%"XBX"), %%mm0\n\t"		// value j from top left
58		"movq    4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value n from bottom right
59		"movq	%%mm0, %%mm2\n\t"
60//		pavgb	%%mm2, %%mm1					// avg(j,n)
61		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(j,n)
62        "movq	%%mm0, %%mm3\n\t"
63		"psubusb	%%mm1, %%mm3\n\t"
64		"psubusb %%mm0, %%mm1\n\t"
65		"por		%%mm1, %%mm3\n\t"					// abs(j-n)
66		"movq	%%mm3, %%mm1\n\t"					// keep copy
67
68		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
69		"pxor	%%mm4, %%mm4\n\t"
70		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00
71		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
72
73		"pand	%%mm3, %%mm1\n\t"			// keep only better new avg and abs
74		"pand	%%mm2, %%mm3\n\t"
75
76		"pand	%%mm4, %%mm6\n\t"
77		"pand    %%mm4, %%mm7\n\t"
78
79		"por		%%mm3, %%mm6\n\t"			// and merge new & old vals keeping best
80		"por		%%mm1, %%mm7\n\t"			// "
81
82		// k, m
83		"movq    4(%%"XBX"), %%mm0\n\t"		// value k from top right
84		"movq    -4(%%"XBX", %%"XCX"), %%mm1\n\t"	// value n from bottom left
85		"movq	%%mm0, %%mm4\n\t"
86//		pavgb	%%mm4, %%mm1					// avg(k,m)
87		V_PAVGB ("%%mm4", "%%mm1", "%%mm3", _ShiftMask)	// avg(k,m)
88
89        "movq	%%mm0, %%mm3\n\t"
90		"psubusb	%%mm1, %%mm3\n\t"
91		"psubusb %%mm0, %%mm1\n\t"
92		"por		%%mm1, %%mm3\n\t"					// abs(k,m)
93		"movq	%%mm3, %%mm1\n\t"					// keep copy
94
95		"movq	%%mm4, %%mm2\n\t"			// avg(k,m)
96
97		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
98		"pxor	%%mm4, %%mm4\n\t"
99		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00
100		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
101
102		"pand	%%mm3, %%mm1\n\t"			// keep only better new avg and abs
103		"pand	%%mm2, %%mm3\n\t"
104
105		"pand	%%mm4, %%mm6\n\t"
106		"pand    %%mm4, %%mm7\n\t"
107
108		"por		%%mm3, %%mm6\n\t"			// and merge new & old vals keeping best
109		"por		%%mm1, %%mm7\n\t"			// "
110
111		// b,e
112		"movq    (%%"XBX"), %%mm0\n\t"		// value b from top
113		"movq    (%%"XBX", %%"XCX"), %%mm1\n\t"	// value e from bottom
114
115// We will also calc here the max/min values to later limit comb
116// so the max excursion will not exceed the Max_Comb constant
117
118#ifdef SKIP_SEARCH
119		"movq	%%mm0, %%mm2\n\t"
120//		pminub	%%mm2, %%mm1
121		V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
122
123//		pmaxub	%%mm6, %%mm2			// clip our current results so far to be above this
124		V_PMAXUB ("%%mm6", "%%mm2")
125		"movq	%%mm0, %%mm2\n\t"
126		V_PMAXUB ("%%mm2", "%%mm1")
127//		pminub	%%mm6, %%mm2			// clip our current results so far to be below this
128		V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
129
130#else
131        "movq	%%mm0, %%mm2\n\t"
132		"movq	(%%"XAX"), %%mm4\n\t"
133		"psubusb %%mm4, %%mm2\n\t"
134		"psubusb %%mm0, %%mm4\n\t"
135		"por		%%mm2, %%mm4\n\t"			// abs diff
136
137		"movq	%%mm1, %%mm2\n\t"
138		"movq	(%%"XAX", %%"XCX"), %%mm3\n\t"
139		"psubusb %%mm3, %%mm2\n\t"
140		"psubusb %%mm1, %%mm3\n\t"
141		"por		%%mm2, %%mm3\n\t"			// abs diff
142//		pmaxub  %%mm3, %%mm4			// top or bottom pixel moved most
143		V_PMAXUB ("%%mm3", "%%mm4")			// top or bottom pixel moved most
144        "psubusb "_Max_Mov", %%mm3\n\t"		// moved more than allowed? or goes to 0?
145		"pxor	%%mm4, %%mm4\n\t"
146		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where low motion, else high motion
147
148		"movq	%%mm0, %%mm2\n\t"
149//		pminub	%%mm2, %%mm1
150		V_PMINUB ("%%mm2", "%%mm1", "%%mm4")
151
152//		pmaxub	%%mm6, %%mm2			// clip our current results so far to be above this
153		V_PMAXUB ("%%mm6", "%%mm2")
154
155		"psubusb %%mm3, %%mm2\n\t"			// maybe decrease it to 0000.. if no surround motion
156		"movq	%%mm2, "_Min_Vals"\n\t"
157
158		"movq	%%mm0, %%mm2\n\t"
159		V_PMAXUB ("%%mm2", "%%mm1")
160//		pminub	%%mm6, %%mm2			// clip our current results so far to be below this
161		V_PMINUB ("%%mm6", "%%mm2", "%%mm4")
162        "paddusb %%mm3, %%mm2\n\t"			// maybe increase it to ffffff if no surround motion
163		"movq	%%mm2, "_Max_Vals"\n\t"
164#endif
165
166		"movq	%%mm0, %%mm2\n\t"
167//		pavgb	%%mm2, %%mm1					// avg(b,e)
168		V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask)	// avg(b,e)
169
170        "movq	%%mm0, %%mm3\n\t"
171		"psubusb	%%mm1, %%mm3\n\t"
172		"psubusb %%mm0, %%mm1\n\t"
173		"por		%%mm1, %%mm3\n\t"			// abs(c,d)
174		"movq	%%mm3, %%mm1\n\t"			// keep copy of diffs
175
176		"pxor	%%mm4, %%mm4\n\t"
177		"psubusb %%mm7, %%mm3\n\t"			// nonzero where new weights bigger, else 0
178		"pcmpeqb %%mm4, %%mm3\n\t"			// now ff where new better, else 00
179
180		"pcmpeqb	%%mm3, %%mm4\n\t"			// here ff where old better, else 00
181
182		"pand	%%mm3, %%mm1\n\t"
183		"pand	%%mm3, %%mm2\n\t"
184
185		"pand    %%mm4, %%mm6\n\t"
186		"pand    %%mm4, %%mm7\n\t"
187
188		"por		%%mm2, %%mm6\n\t"			// our x2 value
189		"por		%%mm1, %%mm7\n\t"			// our x2 diffs
190		"movq	%%mm7, %%mm4\n\t"			// save as bob uncertainty indicator
191
192#else
193
194        // a,f
195        best[0] = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
196	diff[0] = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
197        best[1] = (pBob[-1] + pBob[src_pitch2 + 3]) / 2;
198	diff[1] = ABS (pBob[-1] - pBob[src_pitch2 + 3]);
199
200        // c,d
201	if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff[0]) {
202          best[0] = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
203	  diff[0] = ABS (pBob[2] - pBob[src_pitch2 - 2]);
204	}
205
206	if (ABS (pBob[3] - pBob[src_pitch2 - 1]) < diff[1]) {
207          best[1] = (pBob[3] + pBob[src_pitch2 - 1]) / 2;
208	  diff[1] = ABS (pBob[3] - pBob[src_pitch2 - 1]);
209	}
210
211	// j,n
212	if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff[0]) {
213          best[0] = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
214	  diff[0] = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
215	}
216
217	if (ABS (pBob[-3] - pBob[src_pitch2 + 5]) < diff[1]) {
218          best[1] = (pBob[-3] + pBob[src_pitch2 + 5]) / 2;
219	  diff[1] = ABS (pBob[-3] - pBob[src_pitch2 + 5]);
220	}
221
222	// k,m
223	if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
224          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
225	  diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
226	}
227
228	if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
229          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
230	  diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
231	}
232	// k,m
233	if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff[0]) {
234          best[0] = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
235	  diff[0] = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
236	}
237
238	if (ABS (pBob[5] - pBob[src_pitch2 - 3]) < diff[1]) {
239          best[1] = (pBob[5] + pBob[src_pitch2 - 3]) / 2;
240	  diff[1] = ABS (pBob[-3] - pBob[src_pitch2 - 3]);
241	}
242
243// We will also calc here the max/min values to later limit comb
244// so the max excursion will not exceed the Max_Comb constant
245
246#ifdef SKIP_SEARCH
247		best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
248		best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
249#else
250		mov[0] = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
251		mov[1] = MAX (ABS (pBob[1] - pBobP[1]), ABS (pBob[src_pitch2 + 1] - pBobP[src_pitch2 + 1]));
252
253		MinVals[0] = 0;
254		MinVals[1] = 0;
255		MaxVals[0] = 255;
256		MaxVals[1] = 255;
257
258		if (mov[0] > Max_Mov[0]) {
259		  MinVals[0] = MAX (MIN (pBob[0], pBob[src_pitch2]), best[0]);
260		  MaxVals[0] = MIN (MAX (pBob[0], pBob[src_pitch2]), best[0]);
261		}
262
263		if (mov[1] > Max_Mov[1]) {
264		  MinVals[1] = MAX (MIN (pBob[1], pBob[src_pitch2 + 1]), best[1]);
265		  MaxVals[1] = MIN (MAX (pBob[1], pBob[src_pitch2 + 1]), best[1]);
266		}
267
268		best[0] = CLAMP (best[0], MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
269		best[1] = CLAMP (best[1], MIN (pBob[src_pitch2 + 1], pBob[1]), MAX (pBob[src_pitch2 + 1], pBob[1]));
270#endif
271
272		avg[0] = (pBob[src_pitch2] + pBob[0]) / 2;
273		avg[1] = (pBob[src_pitch2 + 1] + pBob[1]) / 2;
274		diff2[0] = ABS (pBob[src_pitch2] - pBob[0]);
275		diff2[1] = ABS (pBob[src_pitch2 + 1] - pBob[1]);
276
277		if (diff2[0] < diff[0]) {
278		  best[0] = avg[0];
279		  diff[0] = diff2[0];
280		}
281
282		if (diff2[1] < diff[1]) {
283		  best[1] = avg[1];
284		  diff[1] = diff2[1];
285		}
286#endif
287