1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Rate-Distortion Based Motion Estimation for B- VOPs -
5 *
6 * Copyright(C) 2004 Radoslaw Czyz <xvid@syskin.cjb.net>
7 * Copyright(C) 2010 Michael Militzer <michael@xvid.org>
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program ; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * $Id: estimation_rd_based_bvop.c 1985 2011-05-18 09:02:35Z Isibaar $
24 *
25 ****************************************************************************/
26
27 #include <assert.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h> /* memcpy */
31
32 #include "../encoder.h"
33 #include "../bitstream/mbcoding.h"
34 #include "../prediction/mbprediction.h"
35 #include "../global.h"
36 #include "../image/interpolate8x8.h"
37 #include "estimation.h"
38 #include "motion.h"
39 #include "sad.h"
40 #include "../bitstream/zigzag.h"
41 #include "../quant/quant.h"
42 #include "../bitstream/vlc_codes.h"
43 #include "../dct/fdct.h"
44 #include "motion_inlines.h"
45
46 /* rd = BITS_MULT*bits + LAMBDA*distortion */
47 #define LAMBDA ( (int)(BITS_MULT*1.0) )
48
49 static __inline unsigned int
Block_CalcBits_BVOP(int16_t * const coeff,int16_t * const data,int16_t * const dqcoeff,const uint32_t quant,const int quant_type,uint32_t * cbp,const int block,const uint16_t * scan_table,const unsigned int lambda,const uint16_t * mpeg_quant_matrices,const unsigned int quant_sq,int * const cbpcost,const unsigned int rel_var8,const unsigned int metric)50 Block_CalcBits_BVOP(int16_t * const coeff,
51 int16_t * const data,
52 int16_t * const dqcoeff,
53 const uint32_t quant, const int quant_type,
54 uint32_t * cbp,
55 const int block,
56 const uint16_t * scan_table,
57 const unsigned int lambda,
58 const uint16_t * mpeg_quant_matrices,
59 const unsigned int quant_sq,
60 int * const cbpcost,
61 const unsigned int rel_var8,
62 const unsigned int metric)
63 {
64 int sum;
65 int bits;
66 int distortion = 0;
67
68 fdct((short * const)data);
69
70 if (quant_type) sum = quant_h263_inter(coeff, data, quant, mpeg_quant_matrices);
71 else sum = quant_mpeg_inter(coeff, data, quant, mpeg_quant_matrices);
72
73 if ((sum >= 3) || (coeff[1] != 0) || (coeff[8] != 0) || (coeff[0] != 0)) {
74 *cbp |= 1 << (5 - block);
75 bits = BITS_MULT * CodeCoeffInter_CalcBits(coeff, scan_table);
76 bits += *cbpcost;
77 *cbpcost = 0; /* don't add cbp cost second time */
78
79 if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
80 else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
81
82 if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8);
83 else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t));
84
85 } else {
86 const static int16_t zero_block[64] =
87 {
88 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0,
96 };
97 bits = 0;
98
99 if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8);
100 else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t));
101
102 }
103
104 return bits + (lambda*distortion)/quant_sq;
105 }
106
107
108 static __inline unsigned int
Block_CalcBits_BVOP_direct(int16_t * const coeff,int16_t * const data,int16_t * const dqcoeff,const uint32_t quant,const int quant_type,uint32_t * cbp,const int block,const uint16_t * scan_table,const unsigned int lambda,const uint16_t * mpeg_quant_matrices,const unsigned int quant_sq,int * const cbpcost,const unsigned int rel_var8,const unsigned int metric)109 Block_CalcBits_BVOP_direct(int16_t * const coeff,
110 int16_t * const data,
111 int16_t * const dqcoeff,
112 const uint32_t quant, const int quant_type,
113 uint32_t * cbp,
114 const int block,
115 const uint16_t * scan_table,
116 const unsigned int lambda,
117 const uint16_t * mpeg_quant_matrices,
118 const unsigned int quant_sq,
119 int * const cbpcost,
120 const unsigned int rel_var8,
121 const unsigned int metric)
122 {
123 int sum;
124 int bits;
125 int distortion = 0;
126
127 fdct((short * const)data);
128
129 if (quant_type) sum = quant_h263_inter(coeff, data, quant, mpeg_quant_matrices);
130 else sum = quant_mpeg_inter(coeff, data, quant, mpeg_quant_matrices);
131
132 if ((sum >= 3) || (coeff[1] != 0) || (coeff[8] != 0) || (coeff[0] > 0) || (coeff[0] < -1)) {
133 *cbp |= 1 << (5 - block);
134 bits = BITS_MULT * CodeCoeffInter_CalcBits(coeff, scan_table);
135 bits += *cbpcost;
136 *cbpcost = 0;
137
138 if (quant_type) dequant_h263_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
139 else dequant_mpeg_inter(dqcoeff, coeff, quant, mpeg_quant_matrices);
140
141 if (metric) distortion = masked_sseh8_16bit(data, dqcoeff, rel_var8);
142 else distortion = sse8_16bit(data, dqcoeff, 8*sizeof(int16_t));
143
144 } else {
145 const static int16_t zero_block[64] =
146 {
147 0, 0, 0, 0, 0, 0, 0, 0,
148 0, 0, 0, 0, 0, 0, 0, 0,
149 0, 0, 0, 0, 0, 0, 0, 0,
150 0, 0, 0, 0, 0, 0, 0, 0,
151 0, 0, 0, 0, 0, 0, 0, 0,
152 0, 0, 0, 0, 0, 0, 0, 0,
153 0, 0, 0, 0, 0, 0, 0, 0,
154 0, 0, 0, 0, 0, 0, 0, 0,
155 };
156 bits = 0;
157
158 if (metric) distortion = masked_sseh8_16bit(data, (int16_t * const) zero_block, rel_var8);
159 else distortion = sse8_16bit(data, (int16_t * const) zero_block, 8*sizeof(int16_t));
160
161 }
162
163 return bits + (lambda*distortion)/quant_sq;
164 }
165
166 static void
CheckCandidateRDBF(const int x,const int y,SearchData * const data,const unsigned int Direction)167 CheckCandidateRDBF(const int x, const int y, SearchData * const data, const unsigned int Direction)
168 {
169
170 int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64;
171 int32_t rd = (3+2)*BITS_MULT; /* 3 bits for mode + 2 for vector (minimum) */
172 VECTOR * current;
173 const uint8_t * ptr;
174 int i, xc, yc;
175 unsigned cbp = 0;
176 int cbpcost = 7*BITS_MULT; /* how much to add if cbp turns out to be non-zero */
177
178 if ( (x > data->max_dx) || (x < data->min_dx)
179 || (y > data->max_dy) || (y < data->min_dy) ) return;
180
181 if (!data->qpel_precision) {
182 ptr = GetReference(x, y, data);
183 current = data->currentMV;
184 xc = x; yc = y;
185 } else { /* x and y are in 1/4 precision */
186 ptr = xvid_me_interpolate16x16qpel(x, y, 0, data);
187 current = data->currentQMV;
188 xc = x/2; yc = y/2;
189 }
190
191 rd += BITS_MULT*(d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision)-2);
192
193 for(i = 0; i < 4; i++) {
194 int s = 8*((i&1) + (i>>1)*data->iEdgedWidth);
195 transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth);
196 rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
197 &cbp, i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices,
198 data->quant_sq, &cbpcost, data->rel_var8[i], data->metric);
199 if (rd >= data->iMinSAD[0]) return;
200 }
201
202 /* chroma */
203 xc = (xc >> 1) + roundtab_79[xc & 0x3];
204 yc = (yc >> 1) + roundtab_79[yc & 0x3];
205
206 /* chroma U */
207 ptr = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding);
208 transfer_8to16subro(in, data->CurU, ptr, data->iEdgedWidth/2);
209 rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
210 &cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices,
211 data->quant_sq, &cbpcost, data->rel_var8[4], data->metric);
212 if (rd >= data->iMinSAD[0]) return;
213
214 /* chroma V */
215 ptr = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding);
216 transfer_8to16subro(in, data->CurV, ptr, data->iEdgedWidth/2);
217 rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
218 &cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices,
219 data->quant_sq, &cbpcost, data->rel_var8[5], data->metric);
220
221 if (rd < data->iMinSAD[0]) {
222 data->iMinSAD[0] = rd;
223 current[0].x = x; current[0].y = y;
224 data->dir = Direction;
225 *data->cbp = cbp;
226 }
227 }
228
229 static void
CheckCandidateRDDirect(const int x,const int y,SearchData * const data,const unsigned int Direction)230 CheckCandidateRDDirect(const int x, const int y, SearchData * const data, const unsigned int Direction)
231 {
232 int32_t xcf = 0, ycf = 0, xcb = 0, ycb = 0;
233 int32_t rd = 1*BITS_MULT;
234 int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64;
235 unsigned int cbp = 0;
236 unsigned int k;
237 VECTOR mvs, b_mvs;
238 int cbpcost = 6*BITS_MULT; /* how much to add if cbp turns out to be non-zero */
239
240 const uint8_t *ReferenceF, *ReferenceB;
241
242 if (( x > 31) || ( x < -32) || ( y > 31) || (y < -32)) return;
243
244 for (k = 0; k < 4; k++) {
245 int s = 8*((k&1) + (k>>1)*data->iEdgedWidth);
246
247 mvs.x = data->directmvF[k].x + x;
248 b_mvs.x = ((x == 0) ?
249 data->directmvB[k].x
250 : mvs.x - data->referencemv[k].x);
251
252 mvs.y = data->directmvF[k].y + y;
253 b_mvs.y = ((y == 0) ?
254 data->directmvB[k].y
255 : mvs.y - data->referencemv[k].y);
256
257 if ((mvs.x > data->max_dx) || (mvs.x < data->min_dx) ||
258 (mvs.y > data->max_dy) || (mvs.y < data->min_dy) ||
259 (b_mvs.x > data->max_dx) || (b_mvs.x < data->min_dx) ||
260 (b_mvs.y > data->max_dy) || (b_mvs.y < data->min_dy) )
261 return;
262
263 if (data->qpel) {
264 xcf += mvs.x/2; ycf += mvs.y/2;
265 xcb += b_mvs.x/2; ycb += b_mvs.y/2;
266 ReferenceF = xvid_me_interpolate8x8qpel(mvs.x, mvs.y, k, 0, data);
267 ReferenceB = xvid_me_interpolate8x8qpel(b_mvs.x, b_mvs.y, k, 1, data);
268 } else {
269 xcf += mvs.x; ycf += mvs.y;
270 xcb += b_mvs.x; ycb += b_mvs.y;
271 ReferenceF = GetReference(mvs.x, mvs.y, data) + s;
272 ReferenceB = GetReferenceB(b_mvs.x, b_mvs.y, 1, data) + s;
273 }
274
275 transfer_8to16sub2ro(in, data->Cur + s, ReferenceF, ReferenceB, data->iEdgedWidth);
276 rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
277 &cbp, k, data->scan_table, data->lambda[k], data->mpeg_quant_matrices,
278 data->quant_sq, &cbpcost, data->rel_var8[k], data->metric);
279 if (rd > *(data->iMinSAD)) return;
280 }
281
282 /* chroma */
283 xcf = (xcf >> 3) + roundtab_76[xcf & 0xf];
284 ycf = (ycf >> 3) + roundtab_76[ycf & 0xf];
285 xcb = (xcb >> 3) + roundtab_76[xcb & 0xf];
286 ycb = (ycb >> 3) + roundtab_76[ycb & 0xf];
287
288 /* chroma U */
289 ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
290 ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[4], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
291 transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2);
292 rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
293 &cbp, 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices,
294 data->quant_sq, &cbpcost, data->rel_var8[4], data->metric);
295 if (rd >= data->iMinSAD[0]) return;
296
297 /* chroma V */
298 ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
299 ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[5], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
300 transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2);
301 rd += Block_CalcBits_BVOP_direct(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type,
302 &cbp, 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices,
303 data->quant_sq, &cbpcost, data->rel_var8[5], data->metric);
304
305 if (cbp || x != 0 || y != 0)
306 rd += BITS_MULT * d_mv_bits(x, y, zeroMV, 1, 0);
307
308 if (rd < *(data->iMinSAD)) {
309 *data->iMinSAD = rd;
310 data->currentMV->x = x; data->currentMV->y = y;
311 data->dir = Direction;
312 *data->cbp = cbp;
313 }
314 }
315
316 static void
CheckCandidateRDInt(const int x,const int y,SearchData * const data,const unsigned int Direction)317 CheckCandidateRDInt(const int x, const int y, SearchData * const data, const unsigned int Direction)
318 {
319 int32_t xf, yf, xb, yb, xcf, ycf, xcb, ycb;
320 int32_t rd = 2*BITS_MULT;
321 int16_t *in = data->dctSpace, *coeff = data->dctSpace + 64;
322 unsigned int cbp = 0;
323 unsigned int i;
324 int cbpcost = 7*BITS_MULT; /* how much to add if cbp turns out to be non-zero */
325
326 const uint8_t *ReferenceF, *ReferenceB;
327 VECTOR *current;
328
329 if ((x > data->max_dx) || (x < data->min_dx) ||
330 (y > data->max_dy) || (y < data->min_dy))
331 return;
332
333 if (Direction == 1) { /* x and y mean forward vector */
334 VECTOR backward = data->qpel_precision ? data->currentQMV[1] : data->currentMV[1];
335 xb = backward.x;
336 yb = backward.y;
337 xf = x; yf = y;
338 } else { /* x and y mean backward vector */
339 VECTOR forward = data->qpel_precision ? data->currentQMV[0] : data->currentMV[0];
340 xf = forward.x;
341 yf = forward.y;
342 xb = x; yb = y;
343 }
344
345 if (!data->qpel_precision) {
346 ReferenceF = GetReference(xf, yf, data);
347 ReferenceB = GetReferenceB(xb, yb, 1, data);
348 current = data->currentMV + Direction - 1;
349 xcf = xf; ycf = yf;
350 xcb = xb; ycb = yb;
351 } else {
352 ReferenceF = xvid_me_interpolate16x16qpel(xf, yf, 0, data);
353 current = data->currentQMV + Direction - 1;
354 ReferenceB = xvid_me_interpolate16x16qpel(xb, yb, 1, data);
355 xcf = xf/2; ycf = yf/2;
356 xcb = xb/2; ycb = yb/2;
357 }
358
359 rd += BITS_MULT * (d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision)
360 + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision));
361
362 for(i = 0; i < 4; i++) {
363 int s = 8*((i&1) + (i>>1)*data->iEdgedWidth);
364 if (rd >= *data->iMinSAD) return;
365 transfer_8to16sub2ro(in, data->Cur + s, ReferenceF + s, ReferenceB + s, data->iEdgedWidth);
366 rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp,
367 i, data->scan_table, data->lambda[i], data->mpeg_quant_matrices,
368 data->quant_sq, &cbpcost, data->rel_var8[i], data->metric);
369 }
370
371 /* chroma */
372 xcf = (xcf >> 1) + roundtab_79[xcf & 0x3];
373 ycf = (ycf >> 1) + roundtab_79[ycf & 0x3];
374 xcb = (xcb >> 1) + roundtab_79[xcb & 0x3];
375 ycb = (ycb >> 1) + roundtab_79[ycb & 0x3];
376
377 /* chroma U */
378 ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[4], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
379 ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[4], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
380 transfer_8to16sub2ro(in, data->CurU, ReferenceF, ReferenceB, data->iEdgedWidth/2);
381 rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp,
382 4, data->scan_table, data->lambda[4], data->mpeg_quant_matrices,
383 data->quant_sq, &cbpcost, data->rel_var8[4], data->metric);
384 if (rd >= data->iMinSAD[0]) return;
385
386
387 /* chroma V */
388 ReferenceF = interpolate8x8_switch2(data->RefQ, data->RefP[5], 0, 0, xcf, ycf, data->iEdgedWidth/2, data->rounding);
389 ReferenceB = interpolate8x8_switch2(data->RefQ + 16, data->b_RefP[5], 0, 0, xcb, ycb, data->iEdgedWidth/2, data->rounding);
390 transfer_8to16sub2ro(in, data->CurV, ReferenceF, ReferenceB, data->iEdgedWidth/2);
391 rd += Block_CalcBits_BVOP(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp,
392 5, data->scan_table, data->lambda[5], data->mpeg_quant_matrices,
393 data->quant_sq, &cbpcost, data->rel_var8[5], data->metric);
394
395 if (rd < *(data->iMinSAD)) {
396 *data->iMinSAD = rd;
397 current->x = x; current->y = y;
398 data->dir = Direction;
399 *data->cbp = cbp;
400 }
401 }
402
403 static int
SearchInterpolate_RD(const int x,const int y,const uint32_t MotionFlags,const MBParam * const pParam,int32_t * const best_sad,SearchData * const Data)404 SearchInterpolate_RD(const int x, const int y,
405 const uint32_t MotionFlags,
406 const MBParam * const pParam,
407 int32_t * const best_sad,
408 SearchData * const Data)
409 {
410 int i, j;
411
412 Data->iMinSAD[0] = *best_sad;
413
414 get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy,
415 x, y, 4, pParam->width, pParam->height, Data->iFcode, 1 + Data->qpel);
416
417 Data->qpel_precision = Data->qpel;
418
419 if (Data->qpel) {
420 i = Data->currentQMV[0].x; j = Data->currentQMV[0].y;
421 } else {
422 i = Data->currentMV[0].x; j = Data->currentMV[0].y;
423 }
424
425 CheckCandidateRDInt(i, j, Data, 1);
426
427 return Data->iMinSAD[0];
428 }
429
430 static int
SearchDirect_RD(const int x,const int y,const uint32_t MotionFlags,const MBParam * const pParam,int32_t * const best_sad,SearchData * const Data)431 SearchDirect_RD(const int x, const int y,
432 const uint32_t MotionFlags,
433 const MBParam * const pParam,
434 int32_t * const best_sad,
435 SearchData * const Data)
436 {
437 Data->iMinSAD[0] = *best_sad;
438
439 Data->qpel_precision = Data->qpel;
440
441 CheckCandidateRDDirect(Data->currentMV->x, Data->currentMV->y, Data, 255);
442
443 return Data->iMinSAD[0];
444 }
445
446 static int
SearchBF_RD(const int x,const int y,const uint32_t MotionFlags,const MBParam * const pParam,int32_t * const best_sad,SearchData * const Data)447 SearchBF_RD(const int x, const int y,
448 const uint32_t MotionFlags,
449 const MBParam * const pParam,
450 int32_t * const best_sad,
451 SearchData * const Data)
452 {
453 int i, j;
454
455 Data->iMinSAD[0] = *best_sad;
456
457 get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy,
458 x, y, 4, pParam->width, pParam->height, Data->iFcode, 1 + Data->qpel);
459
460 Data->qpel_precision = Data->qpel;
461
462 if (Data->qpel) {
463 i = Data->currentQMV[0].x; j = Data->currentQMV[0].y;
464 } else {
465 i = Data->currentMV[0].x; j = Data->currentMV[0].y;
466 }
467
468 CheckCandidateRDBF(i, j, Data, 1);
469
470 return Data->iMinSAD[0];
471 }
472
473 static int
get_sad_for_mode(int mode,SearchData * const Data_d,SearchData * const Data_b,SearchData * const Data_f,SearchData * const Data_i)474 get_sad_for_mode(int mode,
475 SearchData * const Data_d,
476 SearchData * const Data_b,
477 SearchData * const Data_f,
478 SearchData * const Data_i)
479 {
480 switch(mode) {
481 case MODE_DIRECT: return Data_d->iMinSAD[0];
482 case MODE_FORWARD: return Data_f->iMinSAD[0];
483 case MODE_BACKWARD: return Data_b->iMinSAD[0];
484 default:
485 case MODE_INTERPOLATE: return Data_i->iMinSAD[0];
486 }
487 }
488
489 void
ModeDecision_BVOP_RD(SearchData * const Data_d,SearchData * const Data_b,SearchData * const Data_f,SearchData * const Data_i,MACROBLOCK * const pMB,const MACROBLOCK * const b_mb,VECTOR * f_predMV,VECTOR * b_predMV,const uint32_t MotionFlags,const uint32_t VopFlags,const MBParam * const pParam,int x,int y,int best_sad,int force_direct)490 ModeDecision_BVOP_RD(SearchData * const Data_d,
491 SearchData * const Data_b,
492 SearchData * const Data_f,
493 SearchData * const Data_i,
494 MACROBLOCK * const pMB,
495 const MACROBLOCK * const b_mb,
496 VECTOR * f_predMV,
497 VECTOR * b_predMV,
498 const uint32_t MotionFlags,
499 const uint32_t VopFlags,
500 const MBParam * const pParam,
501 int x, int y,
502 int best_sad,
503 int force_direct)
504 {
505 int mode = MODE_DIRECT, k;
506 int f_rd, b_rd, i_rd, d_rd, best_rd;
507 const int qpel = Data_d->qpel;
508 const uint32_t iQuant = Data_d->iQuant;
509 int i;
510 int ref_quant = b_mb->quant;
511 int no_of_checks = 0;
512
513 int order[4] = {MODE_DIRECT, MODE_FORWARD, MODE_BACKWARD, MODE_INTERPOLATE};
514
515 Data_d->metric = Data_b->metric = Data_f->metric = Data_i->metric = !!(VopFlags & XVID_VOP_RD_PSNRHVSM);
516
517 Data_d->scan_table = Data_b->scan_table = Data_f->scan_table = Data_i->scan_table
518 = /*VopFlags & XVID_VOP_ALTERNATESCAN ? scan_tables[2] : */scan_tables[0];
519 *Data_f->cbp = *Data_b->cbp = *Data_i->cbp = *Data_d->cbp = 63;
520
521 f_rd = b_rd = i_rd = d_rd = best_rd = 256*4096;
522
523 for (i = 0; i < 6; i++) {
524 /* re-calculate as if it was p-frame's quant +.5 */
525 int lam = (pMB->lambda[i]*LAMBDA*iQuant*iQuant)/(ref_quant*(ref_quant+1));
526 lam >>= LAMBDA_EXP;
527 Data_d->lambda[i] = lam;
528 Data_b->lambda[i] = lam;
529 Data_f->lambda[i] = lam;
530 Data_i->lambda[i] = lam;
531
532 Data_d->rel_var8[i] = pMB->rel_var8[i];
533 Data_b->rel_var8[i] = pMB->rel_var8[i];
534 Data_f->rel_var8[i] = pMB->rel_var8[i];
535 Data_i->rel_var8[i] = pMB->rel_var8[i];
536 }
537
538 if (force_direct) {
539 best_rd = 0;
540 goto set_mode; /* bypass checks for non-direct modes */
541 }
542
543 /* find the best order of evaluation - smallest SAD comes first, because *if* it means smaller RD,
544 early-stops will activate sooner */
545
546 for (i = 3; i >= 0; i--) {
547 int j;
548 for (j = 0; j < i; j++) {
549 int sad1 = get_sad_for_mode(order[j], Data_d, Data_b, Data_f, Data_i);
550 int sad2 = get_sad_for_mode(order[j+1], Data_d, Data_b, Data_f, Data_i);
551 if (sad1 > sad2) {
552 int t = order[j];
553 order[j] = order[j+1];
554 order[j+1] = t;
555 }
556 }
557 }
558
559 for(i = 0; i < 4; i++)
560 if (get_sad_for_mode(order[i], Data_d, Data_b, Data_f, Data_i) < 2*best_sad)
561 no_of_checks++;
562
563 if (no_of_checks > 1) {
564 /* evaluate cost of all modes */
565 for (i = 0; i < no_of_checks; i++) {
566 int rd;
567 if (2*best_sad < get_sad_for_mode(order[i], Data_d, Data_b, Data_f, Data_i))
568 break; /* further SADs are too big */
569
570 switch (order[i]) {
571 case MODE_DIRECT:
572 rd = d_rd = SearchDirect_RD(x, y, MotionFlags, pParam, &best_rd, Data_d);
573 break;
574 case MODE_FORWARD:
575 rd = f_rd = SearchBF_RD(x, y, MotionFlags, pParam, &best_rd, Data_f) + 1*BITS_MULT; /* extra one bit for FORWARD vs BACKWARD */
576 break;
577 case MODE_BACKWARD:
578 rd = b_rd = SearchBF_RD(x, y, MotionFlags, pParam, &best_rd, Data_b);
579 break;
580 default:
581 case MODE_INTERPOLATE:
582 rd = i_rd = SearchInterpolate_RD(x, y, MotionFlags, pParam, &best_rd, Data_i);
583 break;
584 }
585 if (rd < best_rd) {
586 mode = order[i];
587 best_rd = rd;
588 }
589 }
590 } else {
591 /* only 1 mode is below the threshold */
592 mode = order[0];
593 best_rd = 0;
594 }
595
596
597 set_mode:
598 pMB->sad16 = best_rd;
599 pMB->mode = mode;
600
601 switch (mode) {
602
603 case MODE_DIRECT:
604 if (!qpel && b_mb->mode != MODE_INTER4V) pMB->mode = MODE_DIRECT_NO4V; /* for faster compensation */
605
606 pMB->pmvs[3] = Data_d->currentMV[0];
607
608 pMB->cbp = *Data_d->cbp;
609
610 for (k = 0; k < 4; k++) {
611 pMB->mvs[k].x = Data_d->directmvF[k].x + Data_d->currentMV->x;
612 pMB->b_mvs[k].x = ( (Data_d->currentMV->x == 0)
613 ? Data_d->directmvB[k].x
614 :pMB->mvs[k].x - Data_d->referencemv[k].x);
615 pMB->mvs[k].y = (Data_d->directmvF[k].y + Data_d->currentMV->y);
616 pMB->b_mvs[k].y = ((Data_d->currentMV->y == 0)
617 ? Data_d->directmvB[k].y
618 : pMB->mvs[k].y - Data_d->referencemv[k].y);
619 if (qpel) {
620 pMB->qmvs[k].x = pMB->mvs[k].x; pMB->mvs[k].x /= 2;
621 pMB->b_qmvs[k].x = pMB->b_mvs[k].x; pMB->b_mvs[k].x /= 2;
622 pMB->qmvs[k].y = pMB->mvs[k].y; pMB->mvs[k].y /= 2;
623 pMB->b_qmvs[k].y = pMB->b_mvs[k].y; pMB->b_mvs[k].y /= 2;
624 }
625
626 if (b_mb->mode != MODE_INTER4V) {
627 pMB->mvs[3] = pMB->mvs[2] = pMB->mvs[1] = pMB->mvs[0];
628 pMB->b_mvs[3] = pMB->b_mvs[2] = pMB->b_mvs[1] = pMB->b_mvs[0];
629 pMB->qmvs[3] = pMB->qmvs[2] = pMB->qmvs[1] = pMB->qmvs[0];
630 pMB->b_qmvs[3] = pMB->b_qmvs[2] = pMB->b_qmvs[1] = pMB->b_qmvs[0];
631 break;
632 }
633 }
634 break;
635
636 case MODE_FORWARD:
637 if (qpel) {
638 pMB->pmvs[0].x = Data_f->currentQMV->x - f_predMV->x;
639 pMB->pmvs[0].y = Data_f->currentQMV->y - f_predMV->y;
640 pMB->qmvs[0] = *Data_f->currentQMV;
641 *f_predMV = Data_f->currentQMV[0];
642 } else {
643 pMB->pmvs[0].x = Data_f->currentMV->x - f_predMV->x;
644 pMB->pmvs[0].y = Data_f->currentMV->y - f_predMV->y;
645 *f_predMV = Data_f->currentMV[0];
646 }
647 pMB->mvs[0] = *Data_f->currentMV;
648 pMB->cbp = *Data_f->cbp;
649 pMB->b_mvs[0] = *Data_b->currentMV; /* hint for future searches */
650 break;
651
652 case MODE_BACKWARD:
653 if (qpel) {
654 pMB->pmvs[0].x = Data_b->currentQMV->x - b_predMV->x;
655 pMB->pmvs[0].y = Data_b->currentQMV->y - b_predMV->y;
656 pMB->b_qmvs[0] = *Data_b->currentQMV;
657 *b_predMV = Data_b->currentQMV[0];
658 } else {
659 pMB->pmvs[0].x = Data_b->currentMV->x - b_predMV->x;
660 pMB->pmvs[0].y = Data_b->currentMV->y - b_predMV->y;
661 *b_predMV = Data_b->currentMV[0];
662 }
663 pMB->b_mvs[0] = *Data_b->currentMV;
664 pMB->cbp = *Data_b->cbp;
665 pMB->mvs[0] = *Data_f->currentMV; /* hint for future searches */
666 break;
667
668
669 case MODE_INTERPOLATE:
670 pMB->mvs[0] = Data_i->currentMV[0];
671 pMB->b_mvs[0] = Data_i->currentMV[1];
672 if (qpel) {
673 pMB->qmvs[0] = Data_i->currentQMV[0];
674 pMB->b_qmvs[0] = Data_i->currentQMV[1];
675 pMB->pmvs[1].x = pMB->qmvs[0].x - f_predMV->x;
676 pMB->pmvs[1].y = pMB->qmvs[0].y - f_predMV->y;
677 pMB->pmvs[0].x = pMB->b_qmvs[0].x - b_predMV->x;
678 pMB->pmvs[0].y = pMB->b_qmvs[0].y - b_predMV->y;
679 *f_predMV = Data_i->currentQMV[0];
680 *b_predMV = Data_i->currentQMV[1];
681 } else {
682 pMB->pmvs[1].x = pMB->mvs[0].x - f_predMV->x;
683 pMB->pmvs[1].y = pMB->mvs[0].y - f_predMV->y;
684 pMB->pmvs[0].x = pMB->b_mvs[0].x - b_predMV->x;
685 pMB->pmvs[0].y = pMB->b_mvs[0].y - b_predMV->y;
686 *f_predMV = Data_i->currentMV[0];
687 *b_predMV = Data_i->currentMV[1];
688 }
689 pMB->cbp = *Data_i->cbp;
690 break;
691 }
692 }
693