1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #define ROUNDED_DIV_MVx2(a, b) \
25     (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
26 #define ROUNDED_DIV_MVx4(a, b, c, d) \
27     (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
28                .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
29 
FN(inter_pred)30 static void FN(inter_pred)(VP9TileData *td)
31 {
32     static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
33         { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
34         { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
35     };
36     VP9Context *s = td->s;
37     VP9Block *b = td->b;
38     int row = td->row, col = td->col;
39     ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
40     AVFrame *ref1 = tref1->f, *ref2;
41     int w1 = ref1->width, h1 = ref1->height, w2, h2;
42     ptrdiff_t ls_y = td->y_stride, ls_uv = td->uv_stride;
43     int bytesperpixel = BYTES_PER_PIXEL;
44 
45     if (b->comp) {
46         tref2 = &s->s.refs[s->s.h.refidx[b->ref[1]]];
47         ref2 = tref2->f;
48         w2 = ref2->width;
49         h2 = ref2->height;
50     }
51 
52     // y inter pred
53     if (b->bs > BS_8x8) {
54         VP56mv uvmv;
55 
56 #if SCALED == 0
57         if (b->bs == BS_8x4) {
58             mc_luma_dir(td, mc[3][b->filter][0], td->dst[0], ls_y,
59                         ref1->data[0], ref1->linesize[0], tref1,
60                         row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
61             mc_luma_dir(td, mc[3][b->filter][0],
62                         td->dst[0] + 4 * ls_y, ls_y,
63                         ref1->data[0], ref1->linesize[0], tref1,
64                         (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
65             w1 = (w1 + s->ss_h) >> s->ss_h;
66             if (s->ss_v) {
67                 h1 = (h1 + 1) >> 1;
68                 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
69                 mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
70                               td->dst[1], td->dst[2], ls_uv,
71                               ref1->data[1], ref1->linesize[1],
72                               ref1->data[2], ref1->linesize[2], tref1,
73                               row << 2, col << (3 - s->ss_h),
74                               &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
75             } else {
76                 mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
77                               td->dst[1], td->dst[2], ls_uv,
78                               ref1->data[1], ref1->linesize[1],
79                               ref1->data[2], ref1->linesize[2], tref1,
80                               row << 3, col << (3 - s->ss_h),
81                               &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
82                 // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
83                 // to get the motion vector for the bottom 4x4 block
84                 // https://code.google.com/p/webm/issues/detail?id=993
85                 if (s->ss_h == 0) {
86                     uvmv = b->mv[2][0];
87                 } else {
88                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
89                 }
90                 mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][0],
91                               td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
92                               ref1->data[1], ref1->linesize[1],
93                               ref1->data[2], ref1->linesize[2], tref1,
94                               (row << 3) + 4, col << (3 - s->ss_h),
95                               &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
96             }
97 
98             if (b->comp) {
99                 mc_luma_dir(td, mc[3][b->filter][1], td->dst[0], ls_y,
100                             ref2->data[0], ref2->linesize[0], tref2,
101                             row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
102                 mc_luma_dir(td, mc[3][b->filter][1],
103                             td->dst[0] + 4 * ls_y, ls_y,
104                             ref2->data[0], ref2->linesize[0], tref2,
105                             (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
106                 w2 = (w2 + s->ss_h) >> s->ss_h;
107                 if (s->ss_v) {
108                     h2 = (h2 + 1) >> 1;
109                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
110                     mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
111                                   td->dst[1], td->dst[2], ls_uv,
112                                   ref2->data[1], ref2->linesize[1],
113                                   ref2->data[2], ref2->linesize[2], tref2,
114                                   row << 2, col << (3 - s->ss_h),
115                                   &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
116                 } else {
117                     mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
118                                   td->dst[1], td->dst[2], ls_uv,
119                                   ref2->data[1], ref2->linesize[1],
120                                   ref2->data[2], ref2->linesize[2], tref2,
121                                   row << 3, col << (3 - s->ss_h),
122                                   &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
123                     // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
124                     // to get the motion vector for the bottom 4x4 block
125                     // https://code.google.com/p/webm/issues/detail?id=993
126                     if (s->ss_h == 0) {
127                         uvmv = b->mv[2][1];
128                     } else {
129                         uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
130                     }
131                     mc_chroma_dir(td, mc[3 + s->ss_h][b->filter][1],
132                                   td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
133                                   ref2->data[1], ref2->linesize[1],
134                                   ref2->data[2], ref2->linesize[2], tref2,
135                                   (row << 3) + 4, col << (3 - s->ss_h),
136                                   &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
137                 }
138             }
139         } else if (b->bs == BS_4x8) {
140             mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y,
141                         ref1->data[0], ref1->linesize[0], tref1,
142                         row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
143             mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y,
144                         ref1->data[0], ref1->linesize[0], tref1,
145                         row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
146             h1 = (h1 + s->ss_v) >> s->ss_v;
147             if (s->ss_h) {
148                 w1 = (w1 + 1) >> 1;
149                 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
150                 mc_chroma_dir(td, mc[4][b->filter][0],
151                               td->dst[1], td->dst[2], ls_uv,
152                               ref1->data[1], ref1->linesize[1],
153                               ref1->data[2], ref1->linesize[2], tref1,
154                               row << (3 - s->ss_v), col << 2,
155                               &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
156             } else {
157                 mc_chroma_dir(td, mc[4][b->filter][0],
158                               td->dst[1], td->dst[2], ls_uv,
159                               ref1->data[1], ref1->linesize[1],
160                               ref1->data[2], ref1->linesize[2], tref1,
161                               row << (3 - s->ss_v), col << 3,
162                               &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
163                 mc_chroma_dir(td, mc[4][b->filter][0],
164                               td->dst[1] + 4 * bytesperpixel,
165                               td->dst[2] + 4 * bytesperpixel, ls_uv,
166                               ref1->data[1], ref1->linesize[1],
167                               ref1->data[2], ref1->linesize[2], tref1,
168                               row << (3 - s->ss_v), (col << 3) + 4,
169                               &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
170             }
171 
172             if (b->comp) {
173                 mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y,
174                             ref2->data[0], ref2->linesize[0], tref2,
175                             row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
176                 mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y,
177                             ref2->data[0], ref2->linesize[0], tref2,
178                             row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
179                 h2 = (h2 + s->ss_v) >> s->ss_v;
180                 if (s->ss_h) {
181                     w2 = (w2 + 1) >> 1;
182                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
183                     mc_chroma_dir(td, mc[4][b->filter][1],
184                                   td->dst[1], td->dst[2], ls_uv,
185                                   ref2->data[1], ref2->linesize[1],
186                                   ref2->data[2], ref2->linesize[2], tref2,
187                                   row << (3 - s->ss_v), col << 2,
188                                   &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
189                 } else {
190                     mc_chroma_dir(td, mc[4][b->filter][1],
191                                   td->dst[1], td->dst[2], ls_uv,
192                                   ref2->data[1], ref2->linesize[1],
193                                   ref2->data[2], ref2->linesize[2], tref2,
194                                   row << (3 - s->ss_v), col << 3,
195                                   &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
196                     mc_chroma_dir(td, mc[4][b->filter][1],
197                                   td->dst[1] + 4 * bytesperpixel,
198                                   td->dst[2] + 4 * bytesperpixel, ls_uv,
199                                   ref2->data[1], ref2->linesize[1],
200                                   ref2->data[2], ref2->linesize[2], tref2,
201                                   row << (3 - s->ss_v), (col << 3) + 4,
202                                   &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
203                 }
204             }
205         } else
206 #endif
207         {
208 #if SCALED == 0
209             av_assert2(b->bs == BS_4x4);
210 #endif
211 
212             // FIXME if two horizontally adjacent blocks have the same MV,
213             // do a w8 instead of a w4 call
214             mc_luma_dir(td, mc[4][b->filter][0], td->dst[0], ls_y,
215                         ref1->data[0], ref1->linesize[0], tref1,
216                         row << 3, col << 3, &b->mv[0][0],
217                         0, 0, 8, 8, 4, 4, w1, h1, 0);
218             mc_luma_dir(td, mc[4][b->filter][0], td->dst[0] + 4 * bytesperpixel, ls_y,
219                         ref1->data[0], ref1->linesize[0], tref1,
220                         row << 3, (col << 3) + 4, &b->mv[1][0],
221                         4, 0, 8, 8, 4, 4, w1, h1, 0);
222             mc_luma_dir(td, mc[4][b->filter][0],
223                         td->dst[0] + 4 * ls_y, ls_y,
224                         ref1->data[0], ref1->linesize[0], tref1,
225                         (row << 3) + 4, col << 3, &b->mv[2][0],
226                         0, 4, 8, 8, 4, 4, w1, h1, 0);
227             mc_luma_dir(td, mc[4][b->filter][0],
228                         td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
229                         ref1->data[0], ref1->linesize[0], tref1,
230                         (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
231                         4, 4, 8, 8, 4, 4, w1, h1, 0);
232             if (s->ss_v) {
233                 h1 = (h1 + 1) >> 1;
234                 if (s->ss_h) {
235                     w1 = (w1 + 1) >> 1;
236                     uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
237                                             b->mv[2][0], b->mv[3][0]);
238                     mc_chroma_dir(td, mc[4][b->filter][0],
239                                   td->dst[1], td->dst[2], ls_uv,
240                                   ref1->data[1], ref1->linesize[1],
241                                   ref1->data[2], ref1->linesize[2], tref1,
242                                   row << 2, col << 2,
243                                   &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
244                 } else {
245                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
246                     mc_chroma_dir(td, mc[4][b->filter][0],
247                                   td->dst[1], td->dst[2], ls_uv,
248                                   ref1->data[1], ref1->linesize[1],
249                                   ref1->data[2], ref1->linesize[2], tref1,
250                                   row << 2, col << 3,
251                                   &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
252                     uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
253                     mc_chroma_dir(td, mc[4][b->filter][0],
254                                   td->dst[1] + 4 * bytesperpixel,
255                                   td->dst[2] + 4 * bytesperpixel, ls_uv,
256                                   ref1->data[1], ref1->linesize[1],
257                                   ref1->data[2], ref1->linesize[2], tref1,
258                                   row << 2, (col << 3) + 4,
259                                   &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
260                 }
261             } else {
262                 if (s->ss_h) {
263                     w1 = (w1 + 1) >> 1;
264                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
265                     mc_chroma_dir(td, mc[4][b->filter][0],
266                                   td->dst[1], td->dst[2], ls_uv,
267                                   ref1->data[1], ref1->linesize[1],
268                                   ref1->data[2], ref1->linesize[2], tref1,
269                                   row << 3, col << 2,
270                                   &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
271                     // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
272                     // bottom block
273                     // https://code.google.com/p/webm/issues/detail?id=993
274                     uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
275                     mc_chroma_dir(td, mc[4][b->filter][0],
276                                   td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
277                                   ref1->data[1], ref1->linesize[1],
278                                   ref1->data[2], ref1->linesize[2], tref1,
279                                   (row << 3) + 4, col << 2,
280                                   &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
281                 } else {
282                     mc_chroma_dir(td, mc[4][b->filter][0],
283                                   td->dst[1], td->dst[2], ls_uv,
284                                   ref1->data[1], ref1->linesize[1],
285                                   ref1->data[2], ref1->linesize[2], tref1,
286                                   row << 3, col << 3,
287                                   &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
288                     mc_chroma_dir(td, mc[4][b->filter][0],
289                                   td->dst[1] + 4 * bytesperpixel,
290                                   td->dst[2] + 4 * bytesperpixel, ls_uv,
291                                   ref1->data[1], ref1->linesize[1],
292                                   ref1->data[2], ref1->linesize[2], tref1,
293                                   row << 3, (col << 3) + 4,
294                                   &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
295                     mc_chroma_dir(td, mc[4][b->filter][0],
296                                   td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
297                                   ref1->data[1], ref1->linesize[1],
298                                   ref1->data[2], ref1->linesize[2], tref1,
299                                   (row << 3) + 4, col << 3,
300                                   &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
301                     mc_chroma_dir(td, mc[4][b->filter][0],
302                                   td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
303                                   td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
304                                   ref1->data[1], ref1->linesize[1],
305                                   ref1->data[2], ref1->linesize[2], tref1,
306                                   (row << 3) + 4, (col << 3) + 4,
307                                   &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
308                 }
309             }
310 
311             if (b->comp) {
312                 mc_luma_dir(td, mc[4][b->filter][1], td->dst[0], ls_y,
313                             ref2->data[0], ref2->linesize[0], tref2,
314                             row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
315                 mc_luma_dir(td, mc[4][b->filter][1], td->dst[0] + 4 * bytesperpixel, ls_y,
316                             ref2->data[0], ref2->linesize[0], tref2,
317                             row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
318                 mc_luma_dir(td, mc[4][b->filter][1],
319                             td->dst[0] + 4 * ls_y, ls_y,
320                             ref2->data[0], ref2->linesize[0], tref2,
321                             (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
322                 mc_luma_dir(td, mc[4][b->filter][1],
323                             td->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
324                             ref2->data[0], ref2->linesize[0], tref2,
325                             (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
326                 if (s->ss_v) {
327                     h2 = (h2 + 1) >> 1;
328                     if (s->ss_h) {
329                         w2 = (w2 + 1) >> 1;
330                         uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
331                                                 b->mv[2][1], b->mv[3][1]);
332                         mc_chroma_dir(td, mc[4][b->filter][1],
333                                       td->dst[1], td->dst[2], ls_uv,
334                                       ref2->data[1], ref2->linesize[1],
335                                       ref2->data[2], ref2->linesize[2], tref2,
336                                       row << 2, col << 2,
337                                       &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
338                     } else {
339                         uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
340                         mc_chroma_dir(td, mc[4][b->filter][1],
341                                       td->dst[1], td->dst[2], ls_uv,
342                                       ref2->data[1], ref2->linesize[1],
343                                       ref2->data[2], ref2->linesize[2], tref2,
344                                       row << 2, col << 3,
345                                       &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
346                         uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
347                         mc_chroma_dir(td, mc[4][b->filter][1],
348                                       td->dst[1] + 4 * bytesperpixel,
349                                       td->dst[2] + 4 * bytesperpixel, ls_uv,
350                                       ref2->data[1], ref2->linesize[1],
351                                       ref2->data[2], ref2->linesize[2], tref2,
352                                       row << 2, (col << 3) + 4,
353                                       &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
354                     }
355                 } else {
356                     if (s->ss_h) {
357                         w2 = (w2 + 1) >> 1;
358                         uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
359                         mc_chroma_dir(td, mc[4][b->filter][1],
360                                       td->dst[1], td->dst[2], ls_uv,
361                                       ref2->data[1], ref2->linesize[1],
362                                       ref2->data[2], ref2->linesize[2], tref2,
363                                       row << 3, col << 2,
364                                       &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
365                         // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
366                         // bottom block
367                         // https://code.google.com/p/webm/issues/detail?id=993
368                         uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
369                         mc_chroma_dir(td, mc[4][b->filter][1],
370                                       td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
371                                       ref2->data[1], ref2->linesize[1],
372                                       ref2->data[2], ref2->linesize[2], tref2,
373                                       (row << 3) + 4, col << 2,
374                                       &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
375                     } else {
376                         mc_chroma_dir(td, mc[4][b->filter][1],
377                                       td->dst[1], td->dst[2], ls_uv,
378                                       ref2->data[1], ref2->linesize[1],
379                                       ref2->data[2], ref2->linesize[2], tref2,
380                                       row << 3, col << 3,
381                                       &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
382                         mc_chroma_dir(td, mc[4][b->filter][1],
383                                       td->dst[1] + 4 * bytesperpixel,
384                                       td->dst[2] + 4 * bytesperpixel, ls_uv,
385                                       ref2->data[1], ref2->linesize[1],
386                                       ref2->data[2], ref2->linesize[2], tref2,
387                                       row << 3, (col << 3) + 4,
388                                       &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
389                         mc_chroma_dir(td, mc[4][b->filter][1],
390                                       td->dst[1] + 4 * ls_uv, td->dst[2] + 4 * ls_uv, ls_uv,
391                                       ref2->data[1], ref2->linesize[1],
392                                       ref2->data[2], ref2->linesize[2], tref2,
393                                       (row << 3) + 4, col << 3,
394                                       &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
395                         mc_chroma_dir(td, mc[4][b->filter][1],
396                                       td->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
397                                       td->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
398                                       ref2->data[1], ref2->linesize[1],
399                                       ref2->data[2], ref2->linesize[2], tref2,
400                                       (row << 3) + 4, (col << 3) + 4,
401                                       &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
402                     }
403                 }
404             }
405         }
406     } else {
407         int bwl = bwlog_tab[0][b->bs];
408         int bw = ff_vp9_bwh_tab[0][b->bs][0] * 4;
409         int bh = ff_vp9_bwh_tab[0][b->bs][1] * 4;
410         int uvbw = ff_vp9_bwh_tab[s->ss_h][b->bs][0] * 4;
411         int uvbh = ff_vp9_bwh_tab[s->ss_v][b->bs][1] * 4;
412 
413         mc_luma_dir(td, mc[bwl][b->filter][0], td->dst[0], ls_y,
414                     ref1->data[0], ref1->linesize[0], tref1,
415                     row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
416         w1 = (w1 + s->ss_h) >> s->ss_h;
417         h1 = (h1 + s->ss_v) >> s->ss_v;
418         mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][0],
419                       td->dst[1], td->dst[2], ls_uv,
420                       ref1->data[1], ref1->linesize[1],
421                       ref1->data[2], ref1->linesize[2], tref1,
422                       row << (3 - s->ss_v), col << (3 - s->ss_h),
423                       &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
424 
425         if (b->comp) {
426             mc_luma_dir(td, mc[bwl][b->filter][1], td->dst[0], ls_y,
427                         ref2->data[0], ref2->linesize[0], tref2,
428                         row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
429             w2 = (w2 + s->ss_h) >> s->ss_h;
430             h2 = (h2 + s->ss_v) >> s->ss_v;
431             mc_chroma_dir(td, mc[bwl + s->ss_h][b->filter][1],
432                           td->dst[1], td->dst[2], ls_uv,
433                           ref2->data[1], ref2->linesize[1],
434                           ref2->data[2], ref2->linesize[2], tref2,
435                           row << (3 - s->ss_v), col << (3 - s->ss_h),
436                           &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);
437         }
438     }
439 }
440