1 /*****************************************************************************
2  *
3  *  XVID MPEG-4 VIDEO CODEC
4  *  - Rate-Distortion Based Motion Estimation for B- VOPs  -
5  *
6  *  Copyright(C) 2004 Radoslaw Czyz <xvid@syskin.cjb.net>
7  *  Copyright(C) 2010 Michael Militzer <michael@xvid.org>
8  *
9  *  This program is free software ; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License as published by
11  *  the Free Software Foundation ; either version 2 of the License, or
12  *  (at your option) any later version.
13  *
14  *  This program is distributed in the hope that it will be useful,
15  *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *  GNU General Public License for more details.
18  *
19  *  You should have received a copy of the GNU General Public License
20  *  along with this program ; if not, write to the Free Software
21  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
22  *
23  * $Id: estimation_rd_based_bvop.c 1985 2011-05-18 09:02:35Z Isibaar $
24  *
25  ****************************************************************************/
26 
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>	/* memcpy */
31 
32 #include "../encoder.h"
33 #include "../bitstream/mbcoding.h"
34 #include "../prediction/mbprediction.h"
35 #include "../global.h"
36 #include "../image/interpolate8x8.h"
37 #include "estimation.h"
38 #include "motion.h"
39 #include "sad.h"
40 #include "../bitstream/zigzag.h"
41 #include "../quant/quant.h"
42 #include "../bitstream/vlc_codes.h"
43 #include "../dct/fdct.h"
44 #include "motion_inlines.h"
45 
46 /* rd = BITS_MULT*bits + LAMBDA*distortion */
47 #define LAMBDA		( (int)(BITS_MULT*1.0) )
48 
49 static __inline unsigned int
Block_CalcBits_BVOP(int16_t * const coeff,int16_t * const data,int16_t * const dqcoeff,const uint32_t quant,const int quant_type,uint32_t * cbp,const int block,const uint16_t * scan_table,const unsigned int lambda,const uint16_t * mpeg_quant_matrices,const unsigned int quant_sq,int * const cbpcost,const unsigned int rel_var8,const unsigned int metric)50 Block_CalcBits_BVOP(int16_t * const coeff,
51 				int16_t * const data,
52 				int16_t * const dqcoeff,
53 				const uint32_t quant, const int quant_type,
54 				uint32_t * cbp,
55 				const int block,
56 				const uint16_t * scan_table,
57 				const unsigned int lambda,
58 				const uint16_t * mpeg_quant_matrices,
59 				const unsigned int quant_sq,
60 				int * const cbpcost,
61 				const unsigned int rel_var8,
62 				const unsigned int metric)
63 {
64 	int sum;
65 	int bits;
66 	int distortion = 0;
67 
68 	fdct((short * const)data);
69 
70 	if (quant_type) sum = quant_h263_inter(coeff, data, quant, mpeg_quant_matrices);
71 	else sum = quant_mpeg_inter(coeff, data, quant, mpeg_quant_matrices);
72 
73 	if ((sum >= 3) || (coeff[1] != 0) || (coeff[8] != 0) || (coeff[0] != 0)) {
74 		*cbp |= 1 << (5 - block);
75 		bits = BITS_MULT * CodeCoeffInter_CalcBits(coeff, scan_table);
76 		bits += *cbpcost;
77 		*cbpcost = 0; /* don't add cbp cost second time */
78 
79 		if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
80 		else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
81 
82 		if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8);
83 		else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t));
84 
85 	} else {
86 		const static int16_t zero_block[64] =
87 			{
88 				0, 0, 0, 0, 0, 0, 0, 0,
89 				0, 0, 0, 0, 0, 0, 0, 0,
90 				0, 0, 0, 0, 0, 0, 0, 0,
91 				0, 0, 0, 0, 0, 0, 0, 0,
92 				0, 0, 0, 0, 0, 0, 0, 0,
93 				0, 0, 0, 0, 0, 0, 0, 0,
94 				0, 0, 0, 0, 0, 0, 0, 0,
95 				0, 0, 0, 0, 0, 0, 0, 0,
96 			};
97 		bits = 0;
98 
99 		if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8);
100 		else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t));
101 
102 	}
103 
104 	return bits + (lambda*distortion)/quant_sq;
105 }
106 
107 
108 static __inline unsigned int
Block_CalcBits_BVOP_direct(int16_t * const coeff,int16_t * const data,int16_t * const dqcoeff,const uint32_t quant,const int quant_type,uint32_t * cbp,const int block,const uint16_t * scan_table,const unsigned int lambda,const uint16_t * mpeg_quant_matrices,const unsigned int quant_sq,int * const cbpcost,const unsigned int rel_var8,const unsigned int metric)109 Block_CalcBits_BVOP_direct(int16_t * const coeff,
110 				int16_t * const data,
111 				int16_t * const dqcoeff,
112 				const uint32_t quant, const int quant_type,
113 				uint32_t * cbp,
114 				const int block,
115 				const uint16_t * scan_table,
116 				const unsigned int lambda,
117 				const uint16_t * mpeg_quant_matrices,
118 				const unsigned int quant_sq,
119 				int * const cbpcost,
120 				const unsigned int rel_var8,
121 				const unsigned int metric)
122 {
123 	int sum;
124 	int bits;
125 	int distortion = 0;
126 
127 	fdct((short * const)data);
128 
129 	if (quant_type) sum = quant_h263_inter(coeff, data, quant, mpeg_quant_matrices);
130 	else sum = quant_mpeg_inter(coeff, data, quant, mpeg_quant_matrices);
131 
132 	if ((sum >= 3) || (coeff[1] != 0) || (coeff[8] != 0) || (coeff[0] > 0) || (coeff[0] < -1)) {
133 		*cbp |= 1 << (5 - block);
134 		bits = BITS_MULT * CodeCoeffInter_CalcBits(coeff, scan_table);
135 		bits += *cbpcost;
136 		*cbpcost = 0;
137 
138 		if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
139 		else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
140 
141 		if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8);
142 		else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t));
143 
144 	} else {
145 		const static int16_t zero_block[64] =
146 			{
147 				0, 0, 0, 0, 0, 0, 0, 0,
148 				0, 0, 0, 0, 0, 0, 0, 0,
149 				0, 0, 0, 0, 0, 0, 0, 0,
150 				0, 0, 0, 0, 0, 0, 0, 0,
151 				0, 0, 0, 0, 0, 0, 0, 0,
152 				0, 0, 0, 0, 0, 0, 0, 0,
153 				0, 0, 0, 0, 0, 0, 0, 0,
154 				0, 0, 0, 0, 0, 0, 0, 0,
155 			};
156 		bits = 0;
157 
158 		if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8);
159 		else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t));
160 
161 	}
162 
163 	return bits + (lambda*distortion)/quant_sq;
164 }
165 
166 static void
CheckCandidateRDBF(const int x,const int y,SearchData * const data,const unsigned int Direction)167 CheckCandidateRDBF(const int x, const int y, SearchData * const data, const unsigned int Direction)
168 {
169 
170 	int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64;
171 	int32_t rd = (3+2)*BITS_MULT; /* 3 bits for mode + 2 for vector (minimum) */
172 	VECTOR * current;
173 	const uint8_t * ptr;
174 	int i, xc, yc;
175 	unsigned cbp = 0;
176 	int cbpcost = 7*BITS_MULT; /* how much to add if cbp turns out to be non-zero */
177 
178 	if ( (x > data->max_dx) || (x < data->min_dx)
179 		|| (y > data->max_dy) || (y < data->min_dy) ) return;
180 
181 	if (!data->qpel_precision) {
182 		ptr = GetReference(x, y, data);
183 		current = data->currentMV;
184 		xc = x; yc = y;
185 	} else { /* x and y are in 1/4 precision */
186 		ptr = xvid_me_interpolate16x16qpel(x, y, 0, data);
187 		current = data->currentQMV;
188 		xc = x/2; yc = y/2;
189 	}
190 
191 	rd += BITS_MULT*(d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision)-2);
192 
193 	for(i = 0; i < 4; i++) {
194 		int s = 8*((i&1) + (i>>1)*data->iEdgedWidth);
195 		transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth);
196 		rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
197 								&cbp, i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices,
198 								data->quant_sq, &cbpcost, data->rel_var8[i], data->metric);
199 		if (rd >= data->iMinSAD[0]) return;
200 	}
201 
202 	/* chroma */
203 	xc = (xc >> 1) + roundtab_79[xc & 0x3];
204 	yc = (yc >> 1) + roundtab_79[yc & 0x3];
205 
206 	/* chroma U */
207 	ptr = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding);
208 	transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2);
209 	rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
210 								&cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices,
211 								data->quant_sq, &cbpcost, data->rel_var8[4], data->metric);
212 	if (rd >= data->iMinSAD[0]) return;
213 
214 	/* chroma V */
215 	ptr = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding);
216 	transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2);
217 	rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
218 								&cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices,
219 								data->quant_sq, &cbpcost, data->rel_var8[5], data->metric);
220 
221 	if (rd < data->iMinSAD[0]) {
222 		data->iMinSAD[0] = rd;
223 		current[0].x = x; current[0].y = y;
224 		data->dir = Direction;
225 		*data->cbp = cbp;
226 	}
227 }
228 
229 static void
CheckCandidateRDDirect(const int x,const int y,SearchData * const data,const unsigned int Direction)230 CheckCandidateRDDirect(const int x, const int y, SearchData * const data, const unsigned int Direction)
231 {
232 	int32_t xcf = 0, ycf = 0, xcb = 0, ycb = 0;
233 	int32_t rd = 1*BITS_MULT;
234 	int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64;
235 	unsigned int cbp = 0;
236 	unsigned int k;
237 	VECTOR mvs, b_mvs;
238 	int cbpcost = 6*BITS_MULT; /* how much to add if cbp turns out to be non-zero */
239 
240 	const uint8_t *ReferenceF, *ReferenceB;
241 
242 	if (( x > 31) || ( x < -32) || ( y > 31) || (y < -32)) return;
243 
244 	for (k = 0; k < 4; k++) {
245 		int s = 8*((k&1) + (k>>1)*data->iEdgedWidth);
246 
247 		mvs.x = data->directmvF[k].x + x;
248 		b_mvs.x = ((x == 0) ?
249 			data->directmvB[k].x
250 			: mvs.x - data->referencemv[k].x);
251 
252 		mvs.y = data->directmvF[k].y + y;
253 		b_mvs.y = ((y == 0) ?
254 			data->directmvB[k].y
255 			: mvs.y - data->referencemv[k].y);
256 
257 		if ((mvs.x > data->max_dx)   || (mvs.x < data->min_dx)   ||
258 			(mvs.y > data->max_dy)   || (mvs.y < data->min_dy)   ||
259 			(b_mvs.x > data->max_dx) || (b_mvs.x < data->min_dx) ||
260 			(b_mvs.y > data->max_dy) || (b_mvs.y < data->min_dy) )
261 			return;
262 
263 		if (data->qpel) {
264 			xcf += mvs.x/2; ycf += mvs.y/2;
265 			xcb += b_mvs.x/2; ycb += b_mvs.y/2;
266 			ReferenceF = xvid_me_interpolate8x8qpel(mvs.x, mvs.y, k, 0, data);
267 			ReferenceB = xvid_me_interpolate8x8qpel(b_mvs.x, b_mvs.y, k, 1, data);
268 		} else {
269 			xcf += mvs.x; ycf += mvs.y;
270 			xcb += b_mvs.x; ycb += b_mvs.y;
271 			ReferenceF = GetReference(mvs.x, mvs.y, data) + s;
272 			ReferenceB = GetReferenceB(b_mvs.x, b_mvs.y, 1, data) + s;
273 		}
274 
275 		transfer_8to16sub2ro(in, data->Cur + s, ReferenceF, ReferenceB, data->iEdgedWidth);
276 		rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
277 										&cbp, k, data->scan_table, data->lambda[k], data->mpeg_quant_matrices,
278 										data->quant_sq, &cbpcost, data->rel_var8[k], data->metric);
279 		if (rd > *(data->iMinSAD)) return;
280 	}
281 
282 	/* chroma */
283 	xcf = (xcf >> 3) + roundtab_76[xcf & 0xf];
284 	ycf = (ycf >> 3) + roundtab_76[ycf & 0xf];
285 	xcb = (xcb >> 3) + roundtab_76[xcb & 0xf];
286 	ycb = (ycb >> 3) + roundtab_76[ycb & 0xf];
287 
288 	/* chroma U */
289 	ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
290 	ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[4], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
291 	transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2);
292 	rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
293 									&cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices,
294 									data->quant_sq, &cbpcost, data->rel_var8[4], data->metric);
295 	if (rd >= data->iMinSAD[0]) return;
296 
297 	/* chroma V */
298 	ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
299 	ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[5], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
300 	transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2);
301 	rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
302 									&cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices,
303 									data->quant_sq, &cbpcost, data->rel_var8[5], data->metric);
304 
305 	if (cbp || x != 0 || y != 0)
306 		rd += BITS_MULT * d_mv_bits(x, y, zeroMV, 1, 0);
307 
308 	if (rd < *(data->iMinSAD)) {
309 		*data->iMinSAD = rd;
310 		data->currentMV->x = x; data->currentMV->y = y;
311 		data->dir = Direction;
312 		*data->cbp = cbp;
313 	}
314 }
315 
316 static void
CheckCandidateRDInt(const int x,const int y,SearchData * const data,const unsigned int Direction)317 CheckCandidateRDInt(const int x, const int y, SearchData * const data, const unsigned int Direction)
318 {
319 	int32_t xf, yf, xb, yb, xcf, ycf, xcb, ycb;
320 	int32_t rd = 2*BITS_MULT;
321 	int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64;
322 	unsigned int cbp = 0;
323 	unsigned int i;
324 	int cbpcost = 7*BITS_MULT; /* how much to add if cbp turns out to be non-zero */
325 
326 	const uint8_t *ReferenceF, *ReferenceB;
327 	VECTOR *current;
328 
329 	if ((x > data->max_dx) || (x < data->min_dx) ||
330 		(y > data->max_dy) || (y < data->min_dy))
331 		return;
332 
333 	if (Direction == 1) { /* x and y mean forward vector */
334 		VECTOR backward = data->qpel_precision ? data->currentQMV[1] : data->currentMV[1];
335 		xb = backward.x;
336 		yb = backward.y;
337 		xf = x; yf = y;
338 	} else { /* x and y mean backward vector */
339 		VECTOR forward = data->qpel_precision ? data->currentQMV[0] : data->currentMV[0];
340 		xf = forward.x;
341 		yf = forward.y;
342 		xb = x; yb = y;
343 	}
344 
345 	if (!data->qpel_precision) {
346 		ReferenceF = GetReference(xf, yf, data);
347 		ReferenceB = GetReferenceB(xb, yb, 1, data);
348 		current = data->currentMV + Direction - 1;
349 		xcf = xf; ycf = yf;
350 		xcb = xb; ycb = yb;
351 	} else {
352 		ReferenceF = xvid_me_interpolate16x16qpel(xf, yf, 0, data);
353 		current = data->currentQMV + Direction - 1;
354 		ReferenceB = xvid_me_interpolate16x16qpel(xb, yb, 1, data);
355 		xcf = xf/2; ycf = yf/2;
356 		xcb = xb/2; ycb = yb/2;
357 	}
358 
359 	rd += BITS_MULT * (d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision)
360 					+ d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision));
361 
362 	for(i = 0; i < 4; i++) {
363 		int s = 8*((i&1) + (i>>1)*data->iEdgedWidth);
364 		if (rd >= *data->iMinSAD) return;
365 		transfer_8to16sub2ro(in, data->Cur + s, ReferenceF + s, ReferenceB + s, data->iEdgedWidth);
366 		rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp,
367 								i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices,
368 								data->quant_sq, &cbpcost, data->rel_var8[i], data->metric);
369 	}
370 
371 	/* chroma */
372 	xcf = (xcf >> 1) + roundtab_79[xcf & 0x3];
373 	ycf = (ycf >> 1) + roundtab_79[ycf & 0x3];
374 	xcb = (xcb >> 1) + roundtab_79[xcb & 0x3];
375 	ycb = (ycb >> 1) + roundtab_79[ycb & 0x3];
376 
377 	/* chroma U */
378 	ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
379 	ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[4], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
380 	transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2);
381 	rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp,
382 								4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices,
383 								data->quant_sq, &cbpcost, data->rel_var8[4], data->metric);
384 	if (rd >= data->iMinSAD[0]) return;
385 
386 
387 	/* chroma V */
388 	ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
389 	ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[5], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
390 	transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2);
391 	rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp,
392 								5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices,
393 								data->quant_sq, &cbpcost, data->rel_var8[5], data->metric);
394 
395 	if (rd < *(data->iMinSAD)) {
396 		*data->iMinSAD = rd;
397 		current->x = x; current->y = y;
398 		data->dir = Direction;
399 		*data->cbp = cbp;
400 	}
401 }
402 
403 static int
SearchInterpolate_RD(const int x,const int y,const uint32_t MotionFlags,const MBParam * const pParam,int32_t * const best_sad,SearchData * const Data)404 SearchInterpolate_RD(const int x, const int y,
405 					 const uint32_t MotionFlags,
406 					 const MBParam * const pParam,
407 					 int32_t * const best_sad,
408 					 SearchData * const Data)
409 {
410 	int i, j;
411 
412 	Data->iMinSAD[0] = *best_sad;
413 
414 	get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy,
415 		x, y, 4, pParam->width, pParam->height, Data->iFcode, 1 + Data->qpel);
416 
417 	Data->qpel_precision = Data->qpel;
418 
419 	if (Data->qpel) {
420 		i = Data->currentQMV[0].x; j = Data->currentQMV[0].y;
421 	} else {
422 		i = Data->currentMV[0].x; j = Data->currentMV[0].y;
423 	}
424 
425 	CheckCandidateRDInt(i, j, Data, 1);
426 
427 	return Data->iMinSAD[0];
428 }
429 
430 static int
SearchDirect_RD(const int x,const int y,const uint32_t MotionFlags,const MBParam * const pParam,int32_t * const best_sad,SearchData * const Data)431 SearchDirect_RD(const int x, const int y,
432 					 const uint32_t MotionFlags,
433 					 const MBParam * const pParam,
434 					 int32_t * const best_sad,
435 					 SearchData * const Data)
436 {
437 	Data->iMinSAD[0] = *best_sad;
438 
439 	Data->qpel_precision = Data->qpel;
440 
441 	CheckCandidateRDDirect(Data->currentMV->x, Data->currentMV->y, Data, 255);
442 
443 	return Data->iMinSAD[0];
444 }
445 
446 static int
SearchBF_RD(const int x,const int y,const uint32_t MotionFlags,const MBParam * const pParam,int32_t * const best_sad,SearchData * const Data)447 SearchBF_RD(const int x, const int y,
448 					 const uint32_t MotionFlags,
449 					 const MBParam * const pParam,
450 					 int32_t * const best_sad,
451 					 SearchData * const Data)
452 {
453 	int i, j;
454 
455 	Data->iMinSAD[0] = *best_sad;
456 
457 	get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy,
458 		x, y, 4, pParam->width, pParam->height, Data->iFcode, 1 + Data->qpel);
459 
460 	Data->qpel_precision = Data->qpel;
461 
462 	if (Data->qpel) {
463 		i = Data->currentQMV[0].x; j = Data->currentQMV[0].y;
464 	} else {
465 		i = Data->currentMV[0].x; j = Data->currentMV[0].y;
466 	}
467 
468 	CheckCandidateRDBF(i, j, Data, 1);
469 
470 	return Data->iMinSAD[0];
471 }
472 
473 static int
get_sad_for_mode(int mode,SearchData * const Data_d,SearchData * const Data_b,SearchData * const Data_f,SearchData * const Data_i)474 get_sad_for_mode(int mode,
475 				 SearchData * const Data_d,
476 				 SearchData * const Data_b,
477 				 SearchData * const Data_f,
478 				 SearchData * const Data_i)
479 {
480 	switch(mode) {
481 		case MODE_DIRECT: return Data_d->iMinSAD[0];
482 		case MODE_FORWARD: return Data_f->iMinSAD[0];
483 		case MODE_BACKWARD: return Data_b->iMinSAD[0];
484 		default:
485 		case MODE_INTERPOLATE: return Data_i->iMinSAD[0];
486 	}
487 }
488 
489 void
ModeDecision_BVOP_RD(SearchData * const Data_d,SearchData * const Data_b,SearchData * const Data_f,SearchData * const Data_i,MACROBLOCK * const pMB,const MACROBLOCK * const b_mb,VECTOR * f_predMV,VECTOR * b_predMV,const uint32_t MotionFlags,const uint32_t VopFlags,const MBParam * const pParam,int x,int y,int best_sad,int force_direct)490 ModeDecision_BVOP_RD(SearchData * const Data_d,
491 					 SearchData * const Data_b,
492 					 SearchData * const Data_f,
493 					 SearchData * const Data_i,
494 					 MACROBLOCK * const pMB,
495 					 const MACROBLOCK * const b_mb,
496 					 VECTOR * f_predMV,
497 					 VECTOR * b_predMV,
498 					 const uint32_t MotionFlags,
499 					 const uint32_t VopFlags,
500 					 const MBParam * const pParam,
501 					 int x, int y,
502 					 int best_sad,
503 					 int force_direct)
504 {
505 	int mode = MODE_DIRECT, k;
506 	int f_rd, b_rd, i_rd, d_rd, best_rd;
507 	const int qpel = Data_d->qpel;
508 	const uint32_t iQuant = Data_d->iQuant;
509 	int i;
510 	int ref_quant = b_mb->quant;
511 	int no_of_checks = 0;
512 
513 	int order[4] = {MODE_DIRECT, MODE_FORWARD, MODE_BACKWARD, MODE_INTERPOLATE};
514 
515 	Data_d->metric = Data_b->metric = Data_f->metric = Data_i->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM);
516 
517 	Data_d->scan_table = Data_b->scan_table = Data_f->scan_table = Data_i->scan_table
518 		= /*VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : */scan_tables[0];
519 	*Data_f->cbp = *Data_b->cbp = *Data_i->cbp = *Data_d->cbp = 63;
520 
521 	f_rd = b_rd = i_rd = d_rd = best_rd = 256*4096;
522 
523 	for (i = 0; i < 6; i++) {
524 		/* re-calculate as if it was p-frame's quant +.5 */
525 		int lam = (pMB->lambda[i]*LAMBDA*iQuant*iQuant)/(ref_quant*(ref_quant+1));
526 		lam >>= LAMBDA_EXP;
527 		Data_d->lambda[i] = lam;
528 		Data_b->lambda[i] = lam;
529 		Data_f->lambda[i] = lam;
530 		Data_i->lambda[i] = lam;
531 
532 		Data_d->rel_var8[i] = pMB->rel_var8[i];
533 		Data_b->rel_var8[i] = pMB->rel_var8[i];
534 		Data_f->rel_var8[i] = pMB->rel_var8[i];
535 		Data_i->rel_var8[i] = pMB->rel_var8[i];
536 	}
537 
538 	if (force_direct) {
539 		best_rd = 0;
540 		goto set_mode; /* bypass checks for non-direct modes */
541 	}
542 
543 	/* find the best order of evaluation - smallest SAD comes first, because *if* it means smaller RD,
544 	   early-stops will activate sooner */
545 
546 	for (i = 3; i >= 0; i--) {
547 		int j;
548 		for (j = 0; j < i; j++) {
549 			int sad1 = get_sad_for_mode(order[j], Data_d, Data_b, Data_f, Data_i);
550 			int sad2 = get_sad_for_mode(order[j+1], Data_d, Data_b, Data_f, Data_i);
551 			if (sad1 > sad2) {
552 				int t = order[j];
553 				order[j] = order[j+1];
554 				order[j+1] = t;
555 			}
556 		}
557 	}
558 
559 	for(i = 0; i < 4; i++)
560 		if (get_sad_for_mode(order[i], Data_d, Data_b, Data_f, Data_i) < 2*best_sad)
561 			no_of_checks++;
562 
563 	if (no_of_checks > 1) {
564 		/* evaluate cost of all modes */
565 		for (i = 0; i < no_of_checks; i++) {
566 			int rd;
567 			if (2*best_sad < get_sad_for_mode(order[i], Data_d, Data_b, Data_f, Data_i))
568 				break; /* further SADs are too big */
569 
570 			switch (order[i]) {
571 			case MODE_DIRECT:
572 				rd = d_rd = SearchDirect_RD(x, y, MotionFlags, pParam, &best_rd, Data_d);
573 				break;
574 			case MODE_FORWARD:
575 				rd = f_rd = SearchBF_RD(x, y, MotionFlags, pParam, &best_rd, Data_f) + 1*BITS_MULT; /* extra one bit for FORWARD vs BACKWARD */
576 				break;
577 			case MODE_BACKWARD:
578 				rd = b_rd = SearchBF_RD(x, y, MotionFlags, pParam, &best_rd, Data_b);
579 				break;
580 			default:
581 			case MODE_INTERPOLATE:
582 				rd = i_rd = SearchInterpolate_RD(x, y, MotionFlags, pParam, &best_rd, Data_i);
583 				break;
584 			}
585 			if (rd < best_rd) {
586 				mode = order[i];
587 				best_rd = rd;
588 			}
589 		}
590 	} else {
591 		/* only 1 mode is below the threshold */
592 		mode = order[0];
593 		best_rd = 0;
594 	}
595 
596 
597 set_mode:
598 	pMB->sad16 = best_rd;
599 	pMB->mode = mode;
600 
601 	switch (mode) {
602 
603 	case MODE_DIRECT:
604 		if (!qpel && b_mb->mode != MODE_INTER4V) pMB->mode = MODE_DIRECT_NO4V; /* for faster compensation */
605 
606 		pMB->pmvs[3] = Data_d->currentMV[0];
607 
608 		pMB->cbp = *Data_d->cbp;
609 
610 		for (k = 0; k < 4; k++) {
611 			pMB->mvs[k].x = Data_d->directmvF[k].x + Data_d->currentMV->x;
612 			pMB->b_mvs[k].x = (	(Data_d->currentMV->x == 0)
613 								? Data_d->directmvB[k].x
614 								:pMB->mvs[k].x - Data_d->referencemv[k].x);
615 			pMB->mvs[k].y = (Data_d->directmvF[k].y + Data_d->currentMV->y);
616 			pMB->b_mvs[k].y = ((Data_d->currentMV->y == 0)
617 								? Data_d->directmvB[k].y
618 								: pMB->mvs[k].y - Data_d->referencemv[k].y);
619 			if (qpel) {
620 				pMB->qmvs[k].x = pMB->mvs[k].x; pMB->mvs[k].x /= 2;
621 				pMB->b_qmvs[k].x = pMB->b_mvs[k].x; pMB->b_mvs[k].x /= 2;
622 				pMB->qmvs[k].y = pMB->mvs[k].y; pMB->mvs[k].y /= 2;
623 				pMB->b_qmvs[k].y = pMB->b_mvs[k].y; pMB->b_mvs[k].y /= 2;
624 			}
625 
626 			if (b_mb->mode != MODE_INTER4V) {
627 				pMB->mvs[3] = pMB->mvs[2] = pMB->mvs[1] = pMB->mvs[0];
628 				pMB->b_mvs[3] = pMB->b_mvs[2] = pMB->b_mvs[1] = pMB->b_mvs[0];
629 				pMB->qmvs[3] = pMB->qmvs[2] = pMB->qmvs[1] = pMB->qmvs[0];
630 				pMB->b_qmvs[3] = pMB->b_qmvs[2] = pMB->b_qmvs[1] = pMB->b_qmvs[0];
631 				break;
632 			}
633 		}
634 		break;
635 
636 	case MODE_FORWARD:
637 		if (qpel) {
638 			pMB->pmvs[0].x = Data_f->currentQMV->x - f_predMV->x;
639 			pMB->pmvs[0].y = Data_f->currentQMV->y - f_predMV->y;
640 			pMB->qmvs[0] = *Data_f->currentQMV;
641 			*f_predMV = Data_f->currentQMV[0];
642 		} else {
643 			pMB->pmvs[0].x = Data_f->currentMV->x - f_predMV->x;
644 			pMB->pmvs[0].y = Data_f->currentMV->y - f_predMV->y;
645 			*f_predMV = Data_f->currentMV[0];
646 		}
647 		pMB->mvs[0] = *Data_f->currentMV;
648 		pMB->cbp = *Data_f->cbp;
649 		pMB->b_mvs[0] = *Data_b->currentMV; /* hint for future searches */
650 		break;
651 
652 	case MODE_BACKWARD:
653 		if (qpel) {
654 			pMB->pmvs[0].x = Data_b->currentQMV->x - b_predMV->x;
655 			pMB->pmvs[0].y = Data_b->currentQMV->y - b_predMV->y;
656 			pMB->b_qmvs[0] = *Data_b->currentQMV;
657 			*b_predMV = Data_b->currentQMV[0];
658 		} else {
659 			pMB->pmvs[0].x = Data_b->currentMV->x - b_predMV->x;
660 			pMB->pmvs[0].y = Data_b->currentMV->y - b_predMV->y;
661 			*b_predMV = Data_b->currentMV[0];
662 		}
663 		pMB->b_mvs[0] = *Data_b->currentMV;
664 		pMB->cbp = *Data_b->cbp;
665 		pMB->mvs[0] = *Data_f->currentMV; /* hint for future searches */
666 		break;
667 
668 
669 	case MODE_INTERPOLATE:
670 		pMB->mvs[0] = Data_i->currentMV[0];
671 		pMB->b_mvs[0] = Data_i->currentMV[1];
672 		if (qpel) {
673 			pMB->qmvs[0] = Data_i->currentQMV[0];
674 			pMB->b_qmvs[0] = Data_i->currentQMV[1];
675 			pMB->pmvs[1].x = pMB->qmvs[0].x - f_predMV->x;
676 			pMB->pmvs[1].y = pMB->qmvs[0].y - f_predMV->y;
677 			pMB->pmvs[0].x = pMB->b_qmvs[0].x - b_predMV->x;
678 			pMB->pmvs[0].y = pMB->b_qmvs[0].y - b_predMV->y;
679 			*f_predMV = Data_i->currentQMV[0];
680 			*b_predMV = Data_i->currentQMV[1];
681 		} else {
682 			pMB->pmvs[1].x = pMB->mvs[0].x - f_predMV->x;
683 			pMB->pmvs[1].y = pMB->mvs[0].y - f_predMV->y;
684 			pMB->pmvs[0].x = pMB->b_mvs[0].x - b_predMV->x;
685 			pMB->pmvs[0].y = pMB->b_mvs[0].y - b_predMV->y;
686 			*f_predMV = Data_i->currentMV[0];
687 			*b_predMV = Data_i->currentMV[1];
688 		}
689 		pMB->cbp = *Data_i->cbp;
690 		break;
691 	}
692 }
693