1 /*
2 * VC-1 and WMV3 decoder
3 * Copyright (c) 2011 Mashiat Sarker Shakkhar
4 * Copyright (c) 2006-2007 Konstantin Shishkov
5 * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 /**
25 * @file
26 * VC-1 and WMV3 loopfilter
27 */
28
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "vc1.h"
32 #include "vc1dsp.h"
33
vc1_h_overlap_filter(VC1Context * v,int16_t (* left_block)[64],int16_t (* right_block)[64],int left_fieldtx,int right_fieldtx,int block_num)34 static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
35 int16_t (*right_block)[64], int left_fieldtx,
36 int right_fieldtx, int block_num)
37 {
38 switch (block_num) {
39 case 0:
40 v->vc1dsp.vc1_h_s_overlap(left_block[2],
41 right_block[0],
42 left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
43 left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
44 left_fieldtx || right_fieldtx ? 0 : 1);
45 break;
46
47 case 1:
48 v->vc1dsp.vc1_h_s_overlap(right_block[0],
49 right_block[2],
50 8,
51 8,
52 right_fieldtx ? 0 : 1);
53 break;
54
55 case 2:
56 v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3],
57 left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1],
58 left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
59 left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
60 left_fieldtx || right_fieldtx ? 2 : 1);
61 break;
62
63 case 3:
64 v->vc1dsp.vc1_h_s_overlap(right_block[1],
65 right_block[3],
66 8,
67 8,
68 right_fieldtx ? 2 : 1);
69 break;
70
71 case 4:
72 case 5:
73 v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1);
74 break;
75 }
76 }
77
vc1_v_overlap_filter(VC1Context * v,int16_t (* top_block)[64],int16_t (* bottom_block)[64],int block_num)78 static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
79 int16_t (*bottom_block)[64], int block_num)
80 {
81 switch (block_num) {
82 case 0:
83 v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]);
84 break;
85
86 case 1:
87 v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]);
88 break;
89
90 case 2:
91 v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]);
92 break;
93
94 case 3:
95 v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]);
96 break;
97
98 case 4:
99 case 5:
100 v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
101 break;
102 }
103 }
104
ff_vc1_i_overlap_filter(VC1Context * v)105 void ff_vc1_i_overlap_filter(VC1Context *v)
106 {
107 MpegEncContext *s = &v->s;
108 int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
109 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
110 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
111 int i;
112
113 topleft_blk = v->block[v->topleft_blk_idx];
114 top_blk = v->block[v->top_blk_idx];
115 left_blk = v->block[v->left_blk_idx];
116 cur_blk = v->block[v->cur_blk_idx];
117
118 /* Within a MB, the horizontal overlap always runs before the vertical.
119 * To accomplish that, we run the H on the left and internal vertical
120 * borders of the currently decoded MB. Then, we wait for the next overlap
121 * iteration to do H overlap on the right edge of this MB, before moving
122 * over and running the V overlap on the top and internal horizontal
123 * borders. Therefore, the H overlap trails by one MB col and the
124 * V overlap trails by one MB row. This is reflected in the time at which
125 * we run the put_pixels loop, i.e. delayed by one row and one column. */
126 for (i = 0; i < block_count; i++) {
127 if (s->mb_x == 0 && (i & 5) != 1)
128 continue;
129
130 if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
131 (v->condover == CONDOVER_ALL ||
132 (v->over_flags_plane[mb_pos] &&
133 ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))))
134 vc1_h_overlap_filter(v,
135 s->mb_x ? left_blk : cur_blk, cur_blk,
136 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
137 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
138 i);
139 }
140
141 if (v->fcm != ILACE_FRAME)
142 for (i = 0; i < block_count; i++) {
143 if (s->first_slice_line && !(i & 2))
144 continue;
145
146 if (s->mb_x &&
147 (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
148 (v->condover == CONDOVER_ALL ||
149 (v->over_flags_plane[mb_pos - 1] &&
150 ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))))
151 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
152 if (s->mb_x == s->mb_width - 1 &&
153 (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
154 (v->condover == CONDOVER_ALL ||
155 (v->over_flags_plane[mb_pos] &&
156 ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride]))))))
157 vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
158 }
159 }
160
ff_vc1_p_overlap_filter(VC1Context * v)161 void ff_vc1_p_overlap_filter(VC1Context *v)
162 {
163 MpegEncContext *s = &v->s;
164 int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
165 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
166 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
167 int i;
168
169 topleft_blk = v->block[v->topleft_blk_idx];
170 top_blk = v->block[v->top_blk_idx];
171 left_blk = v->block[v->left_blk_idx];
172 cur_blk = v->block[v->cur_blk_idx];
173
174 for (i = 0; i < block_count; i++) {
175 if (s->mb_x == 0 && (i & 5) != 1)
176 continue;
177
178 if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
179 vc1_h_overlap_filter(v,
180 s->mb_x ? left_blk : cur_blk, cur_blk,
181 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
182 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
183 i);
184 }
185
186 if (v->fcm != ILACE_FRAME)
187 for (i = 0; i < block_count; i++) {
188 if (s->first_slice_line && !(i & 2))
189 continue;
190
191 if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] &&
192 v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)])
193 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
194 if (s->mb_x == s->mb_width - 1)
195 if (v->mb_type[0][s->block_index[i]] &&
196 v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
197 vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
198 }
199 }
200
201 #define LEFT_EDGE (1 << 0)
202 #define RIGHT_EDGE (1 << 1)
203 #define TOP_EDGE (1 << 2)
204 #define BOTTOM_EDGE (1 << 3)
205
vc1_i_h_loop_filter(VC1Context * v,uint8_t * dest,uint32_t flags,int block_num)206 static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
207 uint32_t flags, int block_num)
208 {
209 MpegEncContext *s = &v->s;
210 int pq = v->pq;
211 uint8_t *dst;
212
213 if (block_num & 2)
214 return;
215
216 if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
217 if (block_num > 3)
218 dst = dest;
219 else
220 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
221
222 if (v->fcm == ILACE_FRAME)
223 if (block_num > 3) {
224 v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
225 v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
226 } else {
227 v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
228 v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
229 }
230 else
231 if (block_num > 3)
232 v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
233 else
234 v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
235 }
236 }
237
vc1_i_v_loop_filter(VC1Context * v,uint8_t * dest,uint32_t flags,uint8_t fieldtx,int block_num)238 static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
239 uint32_t flags, uint8_t fieldtx,
240 int block_num)
241 {
242 MpegEncContext *s = &v->s;
243 int pq = v->pq;
244 uint8_t *dst;
245
246 if ((block_num & 5) == 1)
247 return;
248
249 if (!(flags & TOP_EDGE) || block_num & 2) {
250 if (block_num > 3)
251 dst = dest;
252 else
253 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
254
255 if (v->fcm == ILACE_FRAME) {
256 if (block_num > 3) {
257 v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
258 v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
259 } else if (block_num < 2 || !fieldtx) {
260 v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
261 v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
262 }
263 } else
264 if (block_num > 3)
265 v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
266 else
267 v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
268 }
269 }
270
ff_vc1_i_loop_filter(VC1Context * v)271 void ff_vc1_i_loop_filter(VC1Context *v)
272 {
273 MpegEncContext *s = &v->s;
274 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
275 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
276 uint8_t *dest, fieldtx;
277 uint32_t flags = 0;
278 int i;
279
280 /* Within a MB, the vertical loop filter always runs before the horizontal.
281 * To accomplish that, we run the V loop filter on top and internal
282 * horizontal borders of the last overlap filtered MB. Then, we wait for
283 * the loop filter iteration on the next row to do V loop filter on the
284 * bottom edge of this MB, before moving over and running the H loop
285 * filter on the left and internal vertical borders. Therefore, the loop
286 * filter trails by one row and one column relative to the overlap filter
287 * and two rows and two columns relative to the decoding loop. */
288 if (!s->first_slice_line) {
289 dest = s->dest[0] - 16 * s->linesize - 16;
290 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
291 if (s->mb_x) {
292 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
293 for (i = 0; i < block_count; i++)
294 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
295 }
296 if (s->mb_x == v->end_mb_x - 1) {
297 dest += 16;
298 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
299 for (i = 0; i < block_count; i++)
300 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
301 }
302 }
303 if (s->mb_y == s->end_mb_y - 1) {
304 dest = s->dest[0] - 16;
305 flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
306 if (s->mb_x) {
307 fieldtx = v->fieldtx_plane[mb_pos - 1];
308 for (i = 0; i < block_count; i++)
309 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
310 }
311 if (s->mb_x == v->end_mb_x - 1) {
312 dest += 16;
313 fieldtx = v->fieldtx_plane[mb_pos];
314 for (i = 0; i < block_count; i++)
315 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
316 }
317 }
318
319 if (s->mb_y >= s->start_mb_y + 2) {
320 dest = s->dest[0] - 32 * s->linesize - 16;
321 if (s->mb_x) {
322 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
323 for (i = 0; i < block_count; i++)
324 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
325 }
326 if (s->mb_x == v->end_mb_x - 1) {
327 dest += 16;
328 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
329 for (i = 0; i < block_count; i++)
330 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
331 }
332 }
333 if (s->mb_y == s->end_mb_y - 1) {
334 if (s->mb_y >= s->start_mb_y + 1) {
335 dest = s->dest[0] - 16 * s->linesize - 16;
336 if (s->mb_x) {
337 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
338 for (i = 0; i < block_count; i++)
339 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
340 }
341 if (s->mb_x == v->end_mb_x - 1) {
342 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
343 dest += 16;
344 for (i = 0; i < block_count; i++)
345 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
346 }
347 }
348 dest = s->dest[0] - 16;
349 if (s->mb_x) {
350 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
351 for (i = 0; i < block_count; i++)
352 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
353 }
354 if (s->mb_x == v->end_mb_x - 1) {
355 dest += 16;
356 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
357 for (i = 0; i < block_count; i++)
358 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
359 }
360 }
361 }
362
vc1_p_h_loop_filter(VC1Context * v,uint8_t * dest,uint32_t * cbp,uint8_t * is_intra,int16_t (* mv)[2],uint8_t * mv_f,int * ttblk,uint32_t flags,int block_num)363 static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
364 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
365 int *ttblk, uint32_t flags, int block_num)
366 {
367 MpegEncContext *s = &v->s;
368 int pq = v->pq;
369 uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
370 uint8_t left_is_intra, right_is_intra;
371 int tt;
372 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
373 uint8_t *dst;
374
375 if (block_num > 3)
376 dst = dest;
377 else
378 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
379
380 if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
381 left_is_intra = is_intra[0] & (1 << block_num);
382
383 if (block_num > 3) {
384 right_is_intra = is_intra[1] & (1 << block_num);
385 right_cbp = cbp[1] >> (block_num * 4);
386 } else if (block_num & 1) {
387 right_is_intra = is_intra[1] & (1 << block_num - 1);
388 right_cbp = cbp[1] >> ((block_num - 1) * 4);
389 } else {
390 right_is_intra = is_intra[0] & (1 << block_num + 1);
391 right_cbp = cbp[0] >> ((block_num + 1) * 4);
392 }
393
394 if (left_is_intra || right_is_intra ||
395 mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
396 (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
397 v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
398 else {
399 idx = (left_cbp | (right_cbp >> 1)) & 5;
400 if (idx & 1)
401 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
402 if (idx & 4)
403 v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
404 }
405 }
406
407 tt = ttblk[0] >> (block_num * 4) & 0xf;
408 if (tt == TT_4X4 || tt == TT_4X8) {
409 if (left_cbp & 3)
410 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
411 if (left_cbp & 12)
412 v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
413 }
414 }
415
vc1_p_v_loop_filter(VC1Context * v,uint8_t * dest,uint32_t * cbp,uint8_t * is_intra,int16_t (* mv)[2],uint8_t * mv_f,int * ttblk,uint32_t flags,int block_num)416 static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
417 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
418 int *ttblk, uint32_t flags, int block_num)
419 {
420 MpegEncContext *s = &v->s;
421 int pq = v->pq;
422 uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
423 uint8_t top_is_intra, bottom_is_intra;
424 int tt;
425 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
426 uint8_t *dst;
427
428 if (block_num > 3)
429 dst = dest;
430 else
431 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
432
433 if(!(flags & BOTTOM_EDGE) || block_num < 2) {
434 top_is_intra = is_intra[0] & (1 << block_num);
435
436 if (block_num > 3) {
437 bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
438 bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
439 } else if (block_num < 2) {
440 bottom_is_intra = is_intra[0] & (1 << block_num + 2);
441 bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
442 } else {
443 bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
444 bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
445 }
446
447 if (top_is_intra || bottom_is_intra ||
448 mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
449 mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
450 (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
451 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
452 else {
453 idx = (top_cbp | (bottom_cbp >> 2)) & 3;
454 if (idx & 1)
455 v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
456 if (idx & 2)
457 v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
458 }
459 }
460
461 tt = ttblk[0] >> (block_num * 4) & 0xf;
462 if (tt == TT_4X4 || tt == TT_8X4) {
463 if (top_cbp & 5)
464 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
465 if (top_cbp & 10)
466 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
467 }
468 }
469
ff_vc1_p_loop_filter(VC1Context * v)470 void ff_vc1_p_loop_filter(VC1Context *v)
471 {
472 MpegEncContext *s = &v->s;
473 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
474 uint8_t *dest;
475 uint32_t *cbp;
476 uint8_t *is_intra;
477 int16_t (*uvmv)[2];
478 int *ttblk;
479 uint32_t flags;
480 int i;
481
482 /* Within a MB, the vertical loop filter always runs before the horizontal.
483 * To accomplish that, we run the V loop filter on all applicable
484 * horizontal borders of the MB above the last overlap filtered MB. Then,
485 * we wait for the next loop filter iteration to do H loop filter on all
486 * applicable vertical borders of this MB. Therefore, the loop filter
487 * trails by one row and one column relative to the overlap filter and two
488 * rows and two columns relative to the decoding loop. */
489 if (s->mb_y >= s->start_mb_y + 2) {
490 if (s->mb_x) {
491 dest = s->dest[0] - 32 * s->linesize - 16;
492 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
493 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
494 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
495 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
496 flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
497 for (i = 0; i < block_count; i++)
498 vc1_p_v_loop_filter(v,
499 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
500 cbp,
501 is_intra,
502 i > 3 ? uvmv :
503 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
504 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
505 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
506 ttblk,
507 flags,
508 i);
509 }
510 if (s->mb_x == s->mb_width - 1) {
511 dest = s->dest[0] - 32 * s->linesize;
512 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
513 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
514 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
515 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
516 flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
517 for (i = 0; i < block_count; i++)
518 vc1_p_v_loop_filter(v,
519 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
520 cbp,
521 is_intra,
522 i > 3 ? uvmv :
523 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
524 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
525 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
526 ttblk,
527 flags,
528 i);
529 }
530 }
531 if (s->mb_y == s->end_mb_y - 1) {
532 if (s->mb_x) {
533 if (s->mb_y >= s->start_mb_y + 1) {
534 dest = s->dest[0] - 16 * s->linesize - 16;
535 cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
536 is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
537 uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
538 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
539 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
540 for (i = 0; i < block_count; i++)
541 vc1_p_v_loop_filter(v,
542 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
543 cbp,
544 is_intra,
545 i > 3 ? uvmv :
546 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
547 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
548 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
549 ttblk,
550 flags,
551 i);
552 }
553 dest = s->dest[0] - 16;
554 cbp = &v->cbp[s->mb_x - 1];
555 is_intra = &v->is_intra[s->mb_x - 1];
556 uvmv = &v->luma_mv[s->mb_x - 1];
557 ttblk = &v->ttblk[s->mb_x - 1];
558 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
559 for (i = 0; i < block_count; i++)
560 vc1_p_v_loop_filter(v,
561 i > 3 ? s->dest[i - 3] - 8 : dest,
562 cbp,
563 is_intra,
564 i > 3 ? uvmv :
565 &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
566 i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
567 &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
568 ttblk,
569 flags,
570 i);
571 }
572 if (s->mb_x == s->mb_width - 1) {
573 if (s->mb_y >= s->start_mb_y + 1) {
574 dest = s->dest[0] - 16 * s->linesize;
575 cbp = &v->cbp[s->mb_x - s->mb_stride];
576 is_intra = &v->is_intra[s->mb_x - s->mb_stride];
577 uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
578 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
579 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
580 for (i = 0; i < block_count; i++)
581 vc1_p_v_loop_filter(v,
582 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
583 cbp,
584 is_intra,
585 i > 3 ? uvmv :
586 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
587 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
588 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
589 ttblk,
590 flags,
591 i);
592 }
593 dest = s->dest[0];
594 cbp = &v->cbp[s->mb_x];
595 is_intra = &v->is_intra[s->mb_x];
596 uvmv = &v->luma_mv[s->mb_x];
597 ttblk = &v->ttblk[s->mb_x];
598 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
599 for (i = 0; i < block_count; i++)
600 vc1_p_v_loop_filter(v,
601 i > 3 ? s->dest[i - 3] : dest,
602 cbp,
603 is_intra,
604 i > 3 ? uvmv :
605 &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
606 i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
607 &v->mv_f[0][s->block_index[i] + v->blocks_off],
608 ttblk,
609 flags,
610 i);
611 }
612 }
613
614 if (s->mb_y >= s->start_mb_y + 2) {
615 if (s->mb_x >= 2) {
616 dest = s->dest[0] - 32 * s->linesize - 32;
617 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
618 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
619 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
620 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
621 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
622 for (i = 0; i < block_count; i++)
623 vc1_p_h_loop_filter(v,
624 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
625 cbp,
626 is_intra,
627 i > 3 ? uvmv :
628 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
629 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
630 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
631 ttblk,
632 flags,
633 i);
634 }
635 if (s->mb_x == s->mb_width - 1) {
636 if (s->mb_x >= 1) {
637 dest = s->dest[0] - 32 * s->linesize - 16;
638 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
639 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
640 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
641 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
642 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
643 for (i = 0; i < block_count; i++)
644 vc1_p_h_loop_filter(v,
645 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
646 cbp,
647 is_intra,
648 i > 3 ? uvmv :
649 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
650 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
651 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
652 ttblk,
653 flags,
654 i);
655 }
656 dest = s->dest[0] - 32 * s->linesize;
657 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
658 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
659 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
660 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
661 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
662 for (i = 0; i < block_count; i++)
663 vc1_p_h_loop_filter(v,
664 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
665 cbp,
666 is_intra,
667 i > 3 ? uvmv :
668 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
669 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
670 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
671 ttblk,
672 flags,
673 i);
674 }
675 }
676 if (s->mb_y == s->end_mb_y - 1) {
677 if (s->mb_y >= s->start_mb_y + 1) {
678 if (s->mb_x >= 2) {
679 dest = s->dest[0] - 16 * s->linesize - 32;
680 cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
681 is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
682 uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
683 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
684 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
685 for (i = 0; i < block_count; i++)
686 vc1_p_h_loop_filter(v,
687 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
688 cbp,
689 is_intra,
690 i > 3 ? uvmv :
691 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
692 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
693 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
694 ttblk,
695 flags,
696 i);
697 }
698 if (s->mb_x == s->mb_width - 1) {
699 if (s->mb_x >= 1) {
700 dest = s->dest[0] - 16 * s->linesize - 16;
701 cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
702 is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
703 uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
704 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
705 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
706 for (i = 0; i < block_count; i++)
707 vc1_p_h_loop_filter(v,
708 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
709 cbp,
710 is_intra,
711 i > 3 ? uvmv :
712 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
713 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
714 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
715 ttblk,
716 flags,
717 i);
718 }
719 dest = s->dest[0] - 16 * s->linesize;
720 cbp = &v->cbp[s->mb_x - s->mb_stride];
721 is_intra = &v->is_intra[s->mb_x - s->mb_stride];
722 uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
723 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
724 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
725 for (i = 0; i < block_count; i++)
726 vc1_p_h_loop_filter(v,
727 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
728 cbp,
729 is_intra,
730 i > 3 ? uvmv :
731 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
732 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
733 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
734 ttblk,
735 flags,
736 i);
737 }
738 }
739 if (s->mb_x >= 2) {
740 dest = s->dest[0] - 32;
741 cbp = &v->cbp[s->mb_x - 2];
742 is_intra = &v->is_intra[s->mb_x - 2];
743 uvmv = &v->luma_mv[s->mb_x - 2];
744 ttblk = &v->ttblk[s->mb_x - 2];
745 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
746 for (i = 0; i < block_count; i++)
747 vc1_p_h_loop_filter(v,
748 i > 3 ? s->dest[i - 3] - 16 : dest,
749 cbp,
750 is_intra,
751 i > 3 ? uvmv :
752 &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
753 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
754 &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
755 ttblk,
756 flags,
757 i);
758 }
759 if (s->mb_x == s->mb_width - 1) {
760 if (s->mb_x >= 1) {
761 dest = s->dest[0] - 16;
762 cbp = &v->cbp[s->mb_x - 1];
763 is_intra = &v->is_intra[s->mb_x - 1];
764 uvmv = &v->luma_mv[s->mb_x - 1];
765 ttblk = &v->ttblk[s->mb_x - 1];
766 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
767 for (i = 0; i < block_count; i++)
768 vc1_p_h_loop_filter(v,
769 i > 3 ? s->dest[i - 3] - 8 : dest,
770 cbp,
771 is_intra,
772 i > 3 ? uvmv :
773 &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
774 i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
775 &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
776 ttblk,
777 flags,
778 i);
779 }
780 dest = s->dest[0];
781 cbp = &v->cbp[s->mb_x];
782 is_intra = &v->is_intra[s->mb_x];
783 uvmv = &v->luma_mv[s->mb_x];
784 ttblk = &v->ttblk[s->mb_x];
785 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
786 for (i = 0; i < block_count; i++)
787 vc1_p_h_loop_filter(v,
788 i > 3 ? s->dest[i - 3] : dest,
789 cbp,
790 is_intra,
791 i > 3 ? uvmv :
792 &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
793 i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
794 &v->mv_f[0][s->block_index[i] + v->blocks_off],
795 ttblk,
796 flags,
797 i);
798 }
799 }
800 }
801
vc1_p_h_intfr_loop_filter(VC1Context * v,uint8_t * dest,int * ttblk,uint32_t flags,uint8_t fieldtx,int block_num)802 static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
803 uint32_t flags, uint8_t fieldtx, int block_num)
804 {
805 MpegEncContext *s = &v->s;
806 int pq = v->pq;
807 int tt;
808 int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
809 uint8_t *dst;
810
811 if (block_num > 3)
812 dst = dest;
813 else
814 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
815
816 tt = ttblk[0] >> (block_num * 4) & 0xf;
817 if (block_num < 4) {
818 if (fieldtx) {
819 if (block_num < 2) {
820 if (tt == TT_4X4 || tt == TT_4X8)
821 v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
822 if (!(flags & RIGHT_EDGE) || block_num == 0)
823 v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
824 } else {
825 if (tt == TT_4X4 || tt == TT_4X8)
826 v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
827 if (!(flags & RIGHT_EDGE) || block_num == 2)
828 v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
829 }
830 } else {
831 if(tt == TT_4X4 || tt == TT_4X8) {
832 v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
833 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
834 }
835 if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
836 v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
837 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
838 }
839 }
840 } else {
841 if (tt == TT_4X4 || tt == TT_4X8) {
842 v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
843 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
844 }
845 if (!(flags & RIGHT_EDGE)) {
846 v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
847 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
848 }
849 }
850 }
851
vc1_p_v_intfr_loop_filter(VC1Context * v,uint8_t * dest,int * ttblk,uint32_t flags,uint8_t fieldtx,int block_num)852 static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
853 uint32_t flags, uint8_t fieldtx, int block_num)
854 {
855 MpegEncContext *s = &v->s;
856 int pq = v->pq;
857 int tt;
858 int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
859 uint8_t *dst;
860
861 if (block_num > 3)
862 dst = dest;
863 else
864 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
865
866 tt = ttblk[0] >> (block_num * 4) & 0xf;
867 if (block_num < 4) {
868 if (fieldtx) {
869 if (block_num < 2) {
870 if (tt == TT_4X4 || tt == TT_8X4)
871 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
872 if (!(flags & BOTTOM_EDGE))
873 v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
874 } else {
875 if (tt == TT_4X4 || tt == TT_8X4)
876 v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
877 if (!(flags & BOTTOM_EDGE))
878 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
879 }
880 } else {
881 if (block_num < 2) {
882 if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
883 v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
884 v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
885 }
886 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
887 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
888 } else if (!(flags & BOTTOM_EDGE)) {
889 if (tt == TT_4X4 || tt == TT_8X4) {
890 v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
891 v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
892 }
893 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
894 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
895 }
896 }
897 } else {
898 if (!(flags & BOTTOM_EDGE)) {
899 if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
900 v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
901 v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
902 }
903 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
904 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
905 }
906 }
907 }
908
ff_vc1_p_intfr_loop_filter(VC1Context * v)909 void ff_vc1_p_intfr_loop_filter(VC1Context *v)
910 {
911 MpegEncContext *s = &v->s;
912 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
913 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
914 uint8_t *dest;
915 int *ttblk;
916 uint32_t flags;
917 uint8_t fieldtx;
918 int i;
919
920 /* Within a MB, the vertical loop filter always runs before the horizontal.
921 * To accomplish that, we run the V loop filter on all applicable
922 * horizontal borders of the MB above the last overlap filtered MB. Then,
923 * we wait for the loop filter iteration on the next row and next column to
924 * do H loop filter on all applicable vertical borders of this MB.
925 * Therefore, the loop filter trails by two rows and one column relative to
926 * the overlap filter and two rows and two columns relative to the decoding
927 * loop. */
928 if (s->mb_x) {
929 if (s->mb_y >= s->start_mb_y + 1) {
930 dest = s->dest[0] - 16 * s->linesize - 16;
931 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
932 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
933 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
934 for (i = 0; i < block_count; i++)
935 vc1_p_v_intfr_loop_filter(v,
936 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
937 ttblk,
938 flags,
939 fieldtx,
940 i);
941 }
942 }
943 if (s->mb_x == s->mb_width - 1) {
944 if (s->mb_y >= s->start_mb_y + 1) {
945 dest = s->dest[0] - 16 * s->linesize;
946 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
947 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
948 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
949 for (i = 0; i < block_count; i++)
950 vc1_p_v_intfr_loop_filter(v,
951 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
952 ttblk,
953 flags,
954 fieldtx,
955 i);
956 }
957 }
958 if (s->mb_y == s->end_mb_y - 1) {
959 if (s->mb_x) {
960 dest = s->dest[0] - 16;
961 ttblk = &v->ttblk[s->mb_x - 1];
962 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
963 fieldtx = v->fieldtx_plane[mb_pos - 1];
964 for (i = 0; i < block_count; i++)
965 vc1_p_v_intfr_loop_filter(v,
966 i > 3 ? s->dest[i - 3] - 8 : dest,
967 ttblk,
968 flags,
969 fieldtx,
970 i);
971 }
972 if (s->mb_x == s->mb_width - 1) {
973 dest = s->dest[0];
974 ttblk = &v->ttblk[s->mb_x];
975 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
976 fieldtx = v->fieldtx_plane[mb_pos];
977 for (i = 0; i < block_count; i++)
978 vc1_p_v_intfr_loop_filter(v,
979 i > 3 ? s->dest[i - 3] : dest,
980 ttblk,
981 flags,
982 fieldtx,
983 i);
984 }
985 }
986
987 if (s->mb_y >= s->start_mb_y + 2) {
988 if (s->mb_x >= 2) {
989 dest = s->dest[0] - 32 * s->linesize - 32;
990 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
991 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
992 fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
993 for (i = 0; i < block_count; i++)
994 vc1_p_h_intfr_loop_filter(v,
995 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
996 ttblk,
997 flags,
998 fieldtx,
999 i);
1000 }
1001 if (s->mb_x == s->mb_width - 1) {
1002 if (s->mb_x >= 1) {
1003 dest = s->dest[0] - 32 * s->linesize - 16;
1004 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
1005 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1006 fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
1007 for (i = 0; i < block_count; i++)
1008 vc1_p_h_intfr_loop_filter(v,
1009 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
1010 ttblk,
1011 flags,
1012 fieldtx,
1013 i);
1014 }
1015 dest = s->dest[0] - 32 * s->linesize;
1016 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
1017 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1018 fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
1019 for (i = 0; i < block_count; i++)
1020 vc1_p_h_intfr_loop_filter(v,
1021 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
1022 ttblk,
1023 flags,
1024 fieldtx,
1025 i);
1026 }
1027 }
1028 if (s->mb_y == s->end_mb_y - 1) {
1029 if (s->mb_y >= s->start_mb_y + 1) {
1030 if (s->mb_x >= 2) {
1031 dest = s->dest[0] - 16 * s->linesize - 32;
1032 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
1033 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
1034 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
1035 for (i = 0; i < block_count; i++)
1036 vc1_p_h_intfr_loop_filter(v,
1037 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
1038 ttblk,
1039 flags,
1040 fieldtx,
1041 i);
1042 }
1043 if (s->mb_x == s->mb_width - 1) {
1044 if (s->mb_x >= 1) {
1045 dest = s->dest[0] - 16 * s->linesize - 16;
1046 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
1047 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1048 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
1049 for (i = 0; i < block_count; i++)
1050 vc1_p_h_intfr_loop_filter(v,
1051 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
1052 ttblk,
1053 flags,
1054 fieldtx,
1055 i);
1056 }
1057 dest = s->dest[0] - 16 * s->linesize;
1058 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
1059 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1060 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
1061 for (i = 0; i < block_count; i++)
1062 vc1_p_h_intfr_loop_filter(v,
1063 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
1064 ttblk,
1065 flags,
1066 fieldtx,
1067 i);
1068 }
1069 }
1070 if (s->mb_x >= 2) {
1071 dest = s->dest[0] - 32;
1072 ttblk = &v->ttblk[s->mb_x - 2];
1073 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
1074 fieldtx = v->fieldtx_plane[mb_pos - 2];
1075 for (i = 0; i < block_count; i++)
1076 vc1_p_h_intfr_loop_filter(v,
1077 i > 3 ? s->dest[i - 3] - 16 : dest,
1078 ttblk,
1079 flags,
1080 fieldtx,
1081 i);
1082 }
1083 if (s->mb_x == s->mb_width - 1) {
1084 if (s->mb_x >= 1) {
1085 dest = s->dest[0] - 16;
1086 ttblk = &v->ttblk[s->mb_x - 1];
1087 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1088 fieldtx = v->fieldtx_plane[mb_pos - 1];
1089 for (i = 0; i < block_count; i++)
1090 vc1_p_h_intfr_loop_filter(v,
1091 i > 3 ? s->dest[i - 3] - 8 : dest,
1092 ttblk,
1093 flags,
1094 fieldtx,
1095 i);
1096 }
1097 dest = s->dest[0];
1098 ttblk = &v->ttblk[s->mb_x];
1099 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1100 fieldtx = v->fieldtx_plane[mb_pos];
1101 for (i = 0; i < block_count; i++)
1102 vc1_p_h_intfr_loop_filter(v,
1103 i > 3 ? s->dest[i - 3] : dest,
1104 ttblk,
1105 flags,
1106 fieldtx,
1107 i);
1108 }
1109 }
1110 }
1111
vc1_b_h_intfi_loop_filter(VC1Context * v,uint8_t * dest,uint32_t * cbp,int * ttblk,uint32_t flags,int block_num)1112 static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
1113 int *ttblk, uint32_t flags, int block_num)
1114 {
1115 MpegEncContext *s = &v->s;
1116 int pq = v->pq;
1117 uint8_t *dst;
1118 uint32_t block_cbp = cbp[0] >> (block_num * 4);
1119 int tt;
1120 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
1121
1122 if (block_num > 3)
1123 dst = dest;
1124 else
1125 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
1126
1127 if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
1128 if (block_num > 3)
1129 v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
1130 else
1131 v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
1132 }
1133
1134 tt = ttblk[0] >> (block_num * 4) & 0xf;
1135 if (tt == TT_4X4 || tt == TT_4X8) {
1136 idx = (block_cbp | (block_cbp >> 1)) & 5;
1137 if (idx & 1)
1138 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
1139 if (idx & 4)
1140 v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
1141 }
1142 }
1143
vc1_b_v_intfi_loop_filter(VC1Context * v,uint8_t * dest,uint32_t * cbp,int * ttblk,uint32_t flags,int block_num)1144 static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
1145 int *ttblk, uint32_t flags, int block_num)
1146 {
1147 MpegEncContext *s = &v->s;
1148 int pq = v->pq;
1149 uint8_t *dst;
1150 uint32_t block_cbp = cbp[0] >> (block_num * 4);
1151 int tt;
1152 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
1153
1154 if (block_num > 3)
1155 dst = dest;
1156 else
1157 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
1158
1159 if(!(flags & BOTTOM_EDGE) || block_num < 2)
1160 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
1161
1162 tt = ttblk[0] >> (block_num * 4) & 0xf;
1163 if (tt == TT_4X4 || tt == TT_8X4) {
1164 idx = (block_cbp | (block_cbp >> 2)) & 3;
1165 if (idx & 1)
1166 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
1167 if (idx & 2)
1168 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
1169 }
1170 }
1171
ff_vc1_b_intfi_loop_filter(VC1Context * v)1172 void ff_vc1_b_intfi_loop_filter(VC1Context *v)
1173 {
1174 MpegEncContext *s = &v->s;
1175 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
1176 uint8_t *dest;
1177 uint32_t *cbp;
1178 int *ttblk;
1179 uint32_t flags = 0;
1180 int i;
1181
1182 /* Within a MB, the vertical loop filter always runs before the horizontal.
1183 * To accomplish that, we run the V loop filter on all applicable
1184 * horizontal borders of the MB above the currently decoded MB. Then,
1185 * we wait for the next loop filter iteration to do H loop filter on all
1186 * applicable vertical borders of this MB. Therefore, the loop filter
1187 * trails by one row and one column relative to the decoding loop. */
1188 if (!s->first_slice_line) {
1189 dest = s->dest[0] - 16 * s->linesize;
1190 cbp = &v->cbp[s->mb_x - s->mb_stride];
1191 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
1192 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
1193 for (i = 0; i < block_count; i++)
1194 vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
1195 }
1196 if (s->mb_y == s->end_mb_y - 1) {
1197 dest = s->dest[0];
1198 cbp = &v->cbp[s->mb_x];
1199 ttblk = &v->ttblk[s->mb_x];
1200 flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
1201 for (i = 0; i < block_count; i++)
1202 vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
1203 }
1204
1205 if (!s->first_slice_line) {
1206 dest = s->dest[0] - 16 * s->linesize - 16;
1207 cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
1208 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
1209 if (s->mb_x) {
1210 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1211 for (i = 0; i < block_count; i++)
1212 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
1213 }
1214 if (s->mb_x == s->mb_width - 1) {
1215 dest += 16;
1216 cbp++;
1217 ttblk++;
1218 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
1219 for (i = 0; i < block_count; i++)
1220 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
1221 }
1222 }
1223 if (s->mb_y == s->end_mb_y - 1) {
1224 dest = s->dest[0] - 16;
1225 cbp = &v->cbp[s->mb_x - 1];
1226 ttblk = &v->ttblk[s->mb_x - 1];
1227 if (s->mb_x) {
1228 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1229 for (i = 0; i < block_count; i++)
1230 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
1231 }
1232 if (s->mb_x == s->mb_width - 1) {
1233 dest += 16;
1234 cbp++;
1235 ttblk++;
1236 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
1237 for (i = 0; i < block_count; i++)
1238 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
1239 }
1240 }
1241 }
1242