1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "./aom_config.h"
15 #include "./aom_dsp_rtcd.h"
16 #include "aom_dsp/aom_dsp_common.h"
17 #include "aom_mem/aom_mem.h"
18 #include "aom_ports/mem.h"
19 #include "av1/common/av1_loopfilter.h"
20 #include "av1/common/onyxc_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/common/seg_common.h"
23
24 #if CONFIG_LOOPFILTER_LEVEL
25 static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
26 { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
27 { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
28 { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
29 };
30
31 #if CONFIG_EXT_DELTA_Q
32 static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
33 { 0, 1 }, { 2, 2 }, { 3, 3 }
34 };
35 #endif // CONFIG_EXT_DELTA_Q
36 #endif // CONFIG_LOOPFILTER_LEVEL
37
38 #if CONFIG_LPF_DIRECT
pick_filter_pixel_left(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)39 static void pick_filter_pixel_left(uint8_t *const src, uint8_t *const line,
40 int *const orig_pos, int length, int row,
41 int col, int width, int height, int pitch,
42 int pivot, int direct) {
43 int i;
44 int pos = row * pitch + col;
45
46 for (i = 0; i < length; ++i) {
47 int dy = 0;
48 switch (direct) {
49 case VERT_HORZ: dy = 0; break;
50 case DEGREE_45: dy = 1; break;
51 case DEGREE_135: dy = -1; break;
52 }
53 col -= 1;
54 row += dy;
55 if (col >= 0 && col < width && row >= 0 && row < height) {
56 pos = row * pitch + col;
57 line[pivot - 1 - i] = src[pos];
58 orig_pos[pivot - 1 - i] = pos;
59 }
60 }
61 }
62
pick_filter_pixel_right(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)63 static void pick_filter_pixel_right(uint8_t *const src, uint8_t *const line,
64 int *const orig_pos, int length, int row,
65 int col, int width, int height, int pitch,
66 int pivot, int direct) {
67 int i;
68 int pos = row * pitch + col;
69
70 line[pivot] = src[pos];
71 orig_pos[pivot] = pos;
72
73 for (i = 1; i < length; ++i) {
74 int dy = 0;
75 switch (direct) {
76 case VERT_HORZ: dy = 0; break;
77 case DEGREE_45: dy = -1; break;
78 case DEGREE_135: dy = 1; break;
79 }
80 col += 1;
81 row += dy;
82 if (col >= 0 && col < width && row >= 0 && row < height) {
83 pos = row * pitch + col;
84 line[pivot + i] = src[pos];
85 orig_pos[pivot + i] = pos;
86 }
87 }
88 }
89
pick_filter_pixel_above(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)90 static void pick_filter_pixel_above(uint8_t *const src, uint8_t *const line,
91 int *const orig_pos, int length, int row,
92 int col, int width, int height, int pitch,
93 int pivot, int direct) {
94 int i;
95 int pos = row * pitch + col;
96
97 for (i = 0; i < length; ++i) {
98 int dx = 0;
99 switch (direct) {
100 case VERT_HORZ: dx = 0; break;
101 case DEGREE_45: dx = 1; break;
102 case DEGREE_135: dx = -1; break;
103 }
104 col += dx;
105 row -= 1;
106 if (col >= 0 && col < width && row >= 0 && row < height) {
107 pos = row * pitch + col;
108 line[pivot - 1 - i] = src[pos];
109 orig_pos[pivot - 1 - i] = pos;
110 }
111 }
112 }
113
pick_filter_pixel_bot(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)114 static void pick_filter_pixel_bot(uint8_t *const src, uint8_t *const line,
115 int *const orig_pos, int length, int row,
116 int col, int width, int height, int pitch,
117 int pivot, int direct) {
118 int i;
119 int pos = row * pitch + col;
120
121 line[pivot] = src[pos];
122 orig_pos[pivot] = pos;
123
124 for (i = 1; i < length; ++i) {
125 int dx = 0;
126 switch (direct) {
127 case VERT_HORZ: dx = 0; break;
128 case DEGREE_45: dx = -1; break;
129 case DEGREE_135: dx = 1; break;
130 }
131 col += dx;
132 row += 1;
133 if (col >= 0 && col < width && row >= 0 && row < height) {
134 pos = row * pitch + col;
135 line[pivot + i] = src[pos];
136 orig_pos[pivot + i] = pos;
137 }
138 }
139 }
140
pick_filter_block_vert(uint8_t * const src,uint8_t * const block,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int line_length,int unit,int direct)141 static void pick_filter_block_vert(uint8_t *const src, uint8_t *const block,
142 int *const orig_pos, int length, int row,
143 int col, int width, int height, int pitch,
144 int pivot, int line_length, int unit,
145 int direct) {
146 int i;
147 for (i = 0; i < 8 * unit; ++i) {
148 pick_filter_pixel_left(src, block + i * line_length,
149 orig_pos + i * line_length, length, row + i, col,
150 width, height, pitch, pivot, direct);
151 pick_filter_pixel_right(src, block + i * line_length,
152 orig_pos + i * line_length, length, row + i, col,
153 width, height, pitch, pivot, direct);
154 }
155 }
156
pick_filter_block_horz(uint8_t * const src,uint8_t * const block,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int line_length,int unit,int direct)157 static void pick_filter_block_horz(uint8_t *const src, uint8_t *const block,
158 int *const orig_pos, int length, int row,
159 int col, int width, int height, int pitch,
160 int pivot, int line_length, int unit,
161 int direct) {
162 int i, j;
163 int num = 8 * unit;
164 for (i = 0; i < num; ++i) {
165 pick_filter_pixel_above(src, block + i * line_length,
166 orig_pos + i * line_length, length, row, col + i,
167 width, height, pitch, pivot, direct);
168 pick_filter_pixel_bot(src, block + i * line_length,
169 orig_pos + i * line_length, length, row, col + i,
170 width, height, pitch, pivot, direct);
171 }
172
173 // rearrange block
174 // TODO(chengchen): make it in-place or a stand alone function
175 uint8_t tmp_block[256];
176 int tmp_pos[256];
177 for (i = 0; i < 256; ++i) {
178 tmp_block[i] = 0;
179 tmp_pos[i] = -1;
180 }
181 for (i = 0; i < num; ++i) {
182 for (j = 0; j < line_length; ++j) {
183 tmp_block[j * line_length + i] = block[i * line_length + j];
184 tmp_pos[j * line_length + i] = orig_pos[i * line_length + j];
185 }
186 }
187 for (i = 0; i < 256; ++i) {
188 block[i] = tmp_block[i];
189 orig_pos[i] = tmp_pos[i];
190 }
191 }
192
compute_block_grad(uint8_t * const src,int length,int row,int col,int width,int height,int pitch,int unit,int vert_or_horz,int direct)193 static int compute_block_grad(uint8_t *const src, int length, int row, int col,
194 int width, int height, int pitch, int unit,
195 int vert_or_horz, int direct) {
196 int i, j;
197 int r0, c0, pos0, r1 = 0, c1 = 0, pos1;
198 int sum_grad = 0;
199 for (i = 0; i < 8 * unit; ++i) {
200 // vert_or_horz: 0 vertical edge, 1 horizontal edge
201 r0 = vert_or_horz ? row : row + i;
202 c0 = vert_or_horz ? col + i : col;
203 pos0 = r0 * pitch + c0;
204
205 for (j = 0; j < length; ++j) {
206 if (vert_or_horz == 0) {
207 switch (direct) {
208 case VERT_HORZ: r1 = r0; break;
209 case DEGREE_45: r1 = r0 + 1; break;
210 case DEGREE_135: r1 = r0 - 1; break;
211 }
212 c1 = c0 - 1;
213 } else {
214 r1 = r0 - 1;
215 switch (direct) {
216 case VERT_HORZ: c1 = c0; break;
217 case DEGREE_45: c1 = c0 + 1; break;
218 case DEGREE_135: c1 = c0 - 1; break;
219 }
220 }
221 pos1 = r1 * pitch + c1;
222
223 if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 &&
224 r1 < height && c1 >= 0 && c1 < width) {
225 sum_grad += abs(src[pos1] - src[pos0]);
226 } else {
227 sum_grad += 255; // penalize unreachable boundary
228 }
229 r0 = r1;
230 c0 = c1;
231 pos0 = pos1;
232 }
233
234 r0 = vert_or_horz ? row : row + i;
235 c0 = vert_or_horz ? col + i : col;
236 pos0 = r0 * pitch + c0;
237
238 for (j = 0; j < length - 1; ++j) {
239 if (vert_or_horz == 0) {
240 switch (direct) {
241 case VERT_HORZ: r1 = r0; break;
242 case DEGREE_45: r1 = r0 - 1; break;
243 case DEGREE_135: r1 = r0 + 1; break;
244 }
245 c1 = c0 + 1;
246 } else {
247 r1 = r0 + 1;
248 switch (direct) {
249 case VERT_HORZ: c1 = c0; break;
250 case DEGREE_45: c1 = c0 - 1; break;
251 case DEGREE_135: c1 = c0 + 1; break;
252 }
253 }
254 pos1 = r1 * pitch + c1;
255
256 if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 &&
257 r1 < height && c1 >= 0 && c1 < width) {
258 sum_grad += abs(src[pos1] - src[pos0]);
259 } else {
260 sum_grad += 255; // penalize unreachable boundary
261 }
262 r0 = r1;
263 c0 = c1;
264 pos0 = pos1;
265 }
266 }
267
268 return sum_grad;
269 }
270
pick_min_grad_direct(uint8_t * const src,int length,int row,int col,int width,int height,int pitch,int unit,int vert_or_horz)271 static int pick_min_grad_direct(uint8_t *const src, int length, int row,
272 int col, int width, int height, int pitch,
273 int unit, int vert_or_horz) {
274 int direct = VERT_HORZ;
275 int min_grad = INT_MAX, sum_grad = 0;
276
277 int degree;
278 for (degree = 0; degree < FILTER_DEGREES; ++degree) {
279 // compute abs gradient along each line for the filter block
280 sum_grad = compute_block_grad(src, length, row, col, width, height, pitch,
281 unit, vert_or_horz, degree);
282 if (sum_grad < min_grad) {
283 min_grad = sum_grad;
284 direct = degree;
285 }
286 }
287
288 return direct;
289 }
290 #endif // CONFIG_LPF_DIRECT
291
292 #define PARALLEL_DEBLOCKING_15TAPLUMAONLY 1
293 #define PARALLEL_DEBLOCKING_DISABLE_15TAP 0
294 #if CONFIG_DEBLOCK_13TAP
295 #define PARALLEL_DEBLOCKING_5_TAP_CHROMA 1
296 #else
297 #define PARALLEL_DEBLOCKING_5_TAP_CHROMA 0
298 #endif
299
300 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
301 extern void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
302 const uint8_t *limit, const uint8_t *thresh);
303
304 extern void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
305 const uint8_t *limit, const uint8_t *thresh);
306
307 extern void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p,
308 const uint8_t *blimit,
309 const uint8_t *limit,
310 const uint8_t *thresh, int bd);
311
312 extern void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch,
313 const uint8_t *blimit,
314 const uint8_t *limit,
315 const uint8_t *thresh, int bd);
316 #endif
317
318 // 64 bit masks for left transform size. Each 1 represents a position where
319 // we should apply a loop filter across the left border of an 8x8 block
320 // boundary.
321 //
322 // In the case of TX_16X16-> ( in low order byte first we end up with
323 // a mask that looks like this
324 //
325 // 10101010
326 // 10101010
327 // 10101010
328 // 10101010
329 // 10101010
330 // 10101010
331 // 10101010
332 // 10101010
333 //
334 // A loopfilter should be applied to every other 8x8 horizontally.
335 static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
336 #if CONFIG_CHROMA_2X2
337 0xffffffffffffffffULL, // TX_2X2
338 #endif
339 0xffffffffffffffffULL, // TX_4X4
340 0xffffffffffffffffULL, // TX_8x8
341 0x5555555555555555ULL, // TX_16x16
342 0x1111111111111111ULL, // TX_32x32
343 #if CONFIG_TX64X64
344 0x0101010101010101ULL, // TX_64x64
345 #endif // CONFIG_TX64X64
346 };
347
348 // 64 bit masks for above transform size. Each 1 represents a position where
349 // we should apply a loop filter across the top border of an 8x8 block
350 // boundary.
351 //
352 // In the case of TX_32x32 -> ( in low order byte first we end up with
353 // a mask that looks like this
354 //
355 // 11111111
356 // 00000000
357 // 00000000
358 // 00000000
359 // 11111111
360 // 00000000
361 // 00000000
362 // 00000000
363 //
364 // A loopfilter should be applied to every other 4 the row vertically.
365 static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
366 #if CONFIG_CHROMA_2X2
367 0xffffffffffffffffULL, // TX_4X4
368 #endif
369 0xffffffffffffffffULL, // TX_4X4
370 0xffffffffffffffffULL, // TX_8x8
371 0x00ff00ff00ff00ffULL, // TX_16x16
372 0x000000ff000000ffULL, // TX_32x32
373 #if CONFIG_TX64X64
374 0x00000000000000ffULL, // TX_64x64
375 #endif // CONFIG_TX64X64
376 };
377
378 // 64 bit masks for prediction sizes (left). Each 1 represents a position
379 // where left border of an 8x8 block. These are aligned to the right most
380 // appropriate bit, and then shifted into place.
381 //
382 // In the case of TX_16x32 -> ( low order byte first ) we end up with
383 // a mask that looks like this :
384 //
385 // 10000000
386 // 10000000
387 // 10000000
388 // 10000000
389 // 00000000
390 // 00000000
391 // 00000000
392 // 00000000
393 static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = {
394 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
395 0x0000000000000001ULL, // BLOCK_2X2,
396 0x0000000000000001ULL, // BLOCK_2X4,
397 0x0000000000000001ULL, // BLOCK_4X2,
398 #endif
399 0x0000000000000001ULL, // BLOCK_4X4,
400 0x0000000000000001ULL, // BLOCK_4X8,
401 0x0000000000000001ULL, // BLOCK_8X4,
402 0x0000000000000001ULL, // BLOCK_8X8,
403 0x0000000000000101ULL, // BLOCK_8X16,
404 0x0000000000000001ULL, // BLOCK_16X8,
405 0x0000000000000101ULL, // BLOCK_16X16,
406 0x0000000001010101ULL, // BLOCK_16X32,
407 0x0000000000000101ULL, // BLOCK_32X16,
408 0x0000000001010101ULL, // BLOCK_32X32,
409 0x0101010101010101ULL, // BLOCK_32X64,
410 0x0000000001010101ULL, // BLOCK_64X32,
411 0x0101010101010101ULL, // BLOCK_64X64,
412 0x0000000000000101ULL, // BLOCK_4X16,
413 0x0000000000000001ULL, // BLOCK_16X4,
414 0x0000000001010101ULL, // BLOCK_8X32,
415 0x0000000000000001ULL, // BLOCK_32X8,
416 0x0101010101010101ULL, // BLOCK_16X64,
417 0x0000000000000101ULL, // BLOCK_64X16
418 };
419
420 // 64 bit mask to shift and set for each prediction size.
421 static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = {
422 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
423 0x0000000000000001ULL, // BLOCK_2X2
424 0x0000000000000001ULL, // BLOCK_2X4
425 0x0000000000000001ULL, // BLOCK_4X2
426 #endif
427 0x0000000000000001ULL, // BLOCK_4X4
428 0x0000000000000001ULL, // BLOCK_4X8
429 0x0000000000000001ULL, // BLOCK_8X4
430 0x0000000000000001ULL, // BLOCK_8X8
431 0x0000000000000001ULL, // BLOCK_8X16,
432 0x0000000000000003ULL, // BLOCK_16X8
433 0x0000000000000003ULL, // BLOCK_16X16
434 0x0000000000000003ULL, // BLOCK_16X32,
435 0x000000000000000fULL, // BLOCK_32X16,
436 0x000000000000000fULL, // BLOCK_32X32,
437 0x000000000000000fULL, // BLOCK_32X64,
438 0x00000000000000ffULL, // BLOCK_64X32,
439 0x00000000000000ffULL, // BLOCK_64X64,
440 0x0000000000000001ULL, // BLOCK_4X16,
441 0x0000000000000003ULL, // BLOCK_16X4,
442 0x0000000000000001ULL, // BLOCK_8X32,
443 0x000000000000000fULL, // BLOCK_32X8,
444 0x0000000000000003ULL, // BLOCK_16X64,
445 0x00000000000000ffULL, // BLOCK_64X16
446 };
447 // 64 bit mask to shift and set for each prediction size. A bit is set for
448 // each 8x8 block that would be in the top left most block of the given block
449 // size in the 64x64 block.
450 static const uint64_t size_mask[BLOCK_SIZES_ALL] = {
451 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
452 0x0000000000000001ULL, // BLOCK_2X2
453 0x0000000000000001ULL, // BLOCK_2X4
454 0x0000000000000001ULL, // BLOCK_4X2
455 #endif
456 0x0000000000000001ULL, // BLOCK_4X4
457 0x0000000000000001ULL, // BLOCK_4X8
458 0x0000000000000001ULL, // BLOCK_8X4
459 0x0000000000000001ULL, // BLOCK_8X8
460 0x0000000000000101ULL, // BLOCK_8X16,
461 0x0000000000000003ULL, // BLOCK_16X8
462 0x0000000000000303ULL, // BLOCK_16X16
463 0x0000000003030303ULL, // BLOCK_16X32,
464 0x0000000000000f0fULL, // BLOCK_32X16,
465 0x000000000f0f0f0fULL, // BLOCK_32X32,
466 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64,
467 0x00000000ffffffffULL, // BLOCK_64X32,
468 0xffffffffffffffffULL, // BLOCK_64X64,
469 0x0000000000000101ULL, // BLOCK_4X16,
470 0x0000000000000003ULL, // BLOCK_16X4,
471 0x0000000001010101ULL, // BLOCK_8X32,
472 0x000000000000000fULL, // BLOCK_32X8,
473 0x0303030303030303ULL, // BLOCK_16X64,
474 0x000000000000ffffULL, // BLOCK_64X16
475 };
476
477 // These are used for masking the left and above 32x32 borders.
478 static const uint64_t left_border = 0x1111111111111111ULL;
479 static const uint64_t above_border = 0x000000ff000000ffULL;
480
481 // 16 bit masks for uv transform sizes.
482 static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
483 #if CONFIG_CHROMA_2X2
484 0xffff, // TX_2X2
485 #endif
486 0xffff, // TX_4X4
487 0xffff, // TX_8x8
488 0x5555, // TX_16x16
489 0x1111, // TX_32x32
490 #if CONFIG_TX64X64
491 0x0101, // TX_64x64, never used
492 #endif // CONFIG_TX64X64
493 };
494
495 static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
496 #if CONFIG_CHROMA_2X2
497 0xffff, // TX_2X2
498 #endif
499 0xffff, // TX_4X4
500 0xffff, // TX_8x8
501 0x0f0f, // TX_16x16
502 0x000f, // TX_32x32
503 #if CONFIG_TX64X64
504 0x0003, // TX_64x64, never used
505 #endif // CONFIG_TX64X64
506 };
507
508 // 16 bit left mask to shift and set for each uv prediction size.
509 static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = {
510 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
511 0x0001, // BLOCK_2X2,
512 0x0001, // BLOCK_2X4,
513 0x0001, // BLOCK_4X2,
514 #endif
515 0x0001, // BLOCK_4X4,
516 0x0001, // BLOCK_4X8,
517 0x0001, // BLOCK_8X4,
518 0x0001, // BLOCK_8X8,
519 0x0001, // BLOCK_8X16,
520 0x0001, // BLOCK_16X8,
521 0x0001, // BLOCK_16X16,
522 0x0011, // BLOCK_16X32,
523 0x0001, // BLOCK_32X16,
524 0x0011, // BLOCK_32X32,
525 0x1111, // BLOCK_32X64
526 0x0011, // BLOCK_64X32,
527 0x1111, // BLOCK_64X64,
528 0x0001, // BLOCK_4X16,
529 0x0001, // BLOCK_16X4,
530 0x0011, // BLOCK_8X32,
531 0x0001, // BLOCK_32X8,
532 0x1111, // BLOCK_16X64,
533 0x0001, // BLOCK_64X16,
534 };
535
536 // 16 bit above mask to shift and set for uv each prediction size.
537 static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = {
538 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
539 0x0001, // BLOCK_2X2
540 0x0001, // BLOCK_2X4
541 0x0001, // BLOCK_4X2
542 #endif
543 0x0001, // BLOCK_4X4
544 0x0001, // BLOCK_4X8
545 0x0001, // BLOCK_8X4
546 0x0001, // BLOCK_8X8
547 0x0001, // BLOCK_8X16,
548 0x0001, // BLOCK_16X8
549 0x0001, // BLOCK_16X16
550 0x0001, // BLOCK_16X32,
551 0x0003, // BLOCK_32X16,
552 0x0003, // BLOCK_32X32,
553 0x0003, // BLOCK_32X64,
554 0x000f, // BLOCK_64X32,
555 0x000f, // BLOCK_64X64,
556 0x0001, // BLOCK_4X16,
557 0x0001, // BLOCK_16X4,
558 0x0001, // BLOCK_8X32,
559 0x0003, // BLOCK_32X8,
560 0x0001, // BLOCK_16X64,
561 0x000f, // BLOCK_64X16
562 };
563
564 // 64 bit mask to shift and set for each uv prediction size
565 static const uint16_t size_mask_uv[BLOCK_SIZES_ALL] = {
566 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
567 0x0001, // BLOCK_2X2
568 0x0001, // BLOCK_2X4
569 0x0001, // BLOCK_4X2
570 #endif
571 0x0001, // BLOCK_4X4
572 0x0001, // BLOCK_4X8
573 0x0001, // BLOCK_8X4
574 0x0001, // BLOCK_8X8
575 0x0001, // BLOCK_8X16,
576 0x0001, // BLOCK_16X8
577 0x0001, // BLOCK_16X16
578 0x0011, // BLOCK_16X32,
579 0x0003, // BLOCK_32X16,
580 0x0033, // BLOCK_32X32,
581 0x3333, // BLOCK_32X64,
582 0x00ff, // BLOCK_64X32,
583 0xffff, // BLOCK_64X64,
584 0x0001, // BLOCK_4X16,
585 0x0001, // BLOCK_16X4,
586 0x0011, // BLOCK_8X32,
587 0x0003, // BLOCK_32X8,
588 0x1111, // BLOCK_16X64,
589 0x000f, // BLOCK_64X16
590 };
591 static const uint16_t left_border_uv = 0x1111;
592 static const uint16_t above_border_uv = 0x000f;
593
594 static const int mode_lf_lut[] = {
595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
596 0,
597 #if CONFIG_SMOOTH_HV
598 0, 0,
599 #endif // CONFIG_SMOOTH_HV
600 1, 1, 0, 1, // INTER_MODES (ZEROMV == 0)
601 #if CONFIG_COMPOUND_SINGLEREF
602 // 1, 1, 1, 1, 1, // INTER_SINGLEREF_COMP_MODES
603 // NOTE(zoeliu): Remove SR_NEAREST_NEWMV
604 1, 1, 1, 1, // INTER_SINGLEREF_COMP_MODES
605 #endif // CONFIG_COMPOUND_SINGLEREF
606 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
607 };
608
update_sharpness(loop_filter_info_n * lfi,int sharpness_lvl)609 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
610 int lvl;
611
612 // For each possible value for the loop filter fill out limits
613 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
614 // Set loop filter parameters that control sharpness.
615 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
616
617 if (sharpness_lvl > 0) {
618 if (block_inside_limit > (9 - sharpness_lvl))
619 block_inside_limit = (9 - sharpness_lvl);
620 }
621
622 if (block_inside_limit < 1) block_inside_limit = 1;
623
624 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
625 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
626 SIMD_WIDTH);
627 }
628 }
629 #if CONFIG_EXT_DELTA_Q
get_filter_level(const AV1_COMMON * cm,const loop_filter_info_n * lfi_n,const int dir_idx,int plane,int mi_row,int mi_col,const MB_MODE_INFO * mbmi)630 static uint8_t get_filter_level(const AV1_COMMON *cm,
631 const loop_filter_info_n *lfi_n,
632 #if CONFIG_LOOPFILTER_LEVEL
633 const int dir_idx, int plane,
634 #endif
635 #if CONFIG_LPF_SB
636 int mi_row, int mi_col,
637 #endif
638 const MB_MODE_INFO *mbmi) {
639 #if CONFIG_LPF_SB
640 return cm->mi[mi_row * cm->mi_stride + mi_col].mbmi.filt_lvl;
641 #endif
642
643 #if CONFIG_SUPERTX
644 const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
645 assert(
646 IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
647 assert(IMPLIES(supertx_enabled(mbmi),
648 mbmi->segment_id_supertx <= mbmi->segment_id));
649 #else
650 const int segment_id = mbmi->segment_id;
651 #endif // CONFIG_SUPERTX
652 if (cm->delta_lf_present_flag) {
653 #if CONFIG_LOOPFILTER_LEVEL
654 int delta_lf;
655 if (cm->delta_lf_multi) {
656 const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
657 delta_lf = mbmi->curr_delta_lf[delta_lf_idx];
658 } else {
659 delta_lf = mbmi->current_delta_lf_from_base;
660 }
661 int lvl_seg =
662 clamp(delta_lf + cm->lf.filter_level[dir_idx], 0, MAX_LOOP_FILTER);
663 #else
664 int lvl_seg = clamp(mbmi->current_delta_lf_from_base + cm->lf.filter_level,
665 0, MAX_LOOP_FILTER);
666 #endif
667 const int scale = 1 << (lvl_seg >> 5);
668 #if CONFIG_LOOPFILTER_LEVEL
669 assert(plane >= 0 && plane <= 2);
670 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
671 if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
672 const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
673 lvl_seg =
674 clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
675 MAX_LOOP_FILTER);
676 }
677 #else
678 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_ALT_LF)) {
679 const int data = get_segdata(&cm->seg, segment_id, SEG_LVL_ALT_LF);
680 lvl_seg =
681 clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
682 MAX_LOOP_FILTER);
683 }
684 #endif // CONFIG_LOOPFILTER_LEVEL
685
686 if (cm->lf.mode_ref_delta_enabled) {
687 lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
688 if (mbmi->ref_frame[0] > INTRA_FRAME)
689 lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
690 lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
691 }
692 return lvl_seg;
693 } else {
694 #if CONFIG_LOOPFILTER_LEVEL
695 return lfi_n
696 ->lvl[segment_id][dir_idx][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
697 #else
698 return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
699 #endif
700 }
701 }
702 #else
get_filter_level(const loop_filter_info_n * lfi_n,const MB_MODE_INFO * mbmi)703 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
704 const MB_MODE_INFO *mbmi) {
705 #if CONFIG_SUPERTX
706 const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
707 assert(
708 IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
709 assert(IMPLIES(supertx_enabled(mbmi),
710 mbmi->segment_id_supertx <= mbmi->segment_id));
711 #else
712 const int segment_id = mbmi->segment_id;
713 #endif // CONFIG_SUPERTX
714 return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
715 }
716 #endif
717
av1_loop_filter_init(AV1_COMMON * cm)718 void av1_loop_filter_init(AV1_COMMON *cm) {
719 assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
720 loop_filter_info_n *lfi = &cm->lf_info;
721 struct loopfilter *lf = &cm->lf;
722 int lvl;
723
724 // init limits for given sharpness
725 update_sharpness(lfi, lf->sharpness_level);
726 lf->last_sharpness_level = lf->sharpness_level;
727
728 // init hev threshold const vectors
729 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
730 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
731 }
732
733 #if CONFIG_LPF_SB
av1_loop_filter_sb_level_init(AV1_COMMON * cm,int mi_row,int mi_col,int lvl)734 void av1_loop_filter_sb_level_init(AV1_COMMON *cm, int mi_row, int mi_col,
735 int lvl) {
736 const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
737 const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
738 const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
739 const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
740 const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
741 const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
742
743 int row, col;
744 for (row = mi_row_start; row < mi_row_end; ++row) {
745 for (col = mi_col_start; col < mi_col_end; ++col) {
746 // Note: can't use cm->mi_grid_visible. Because for each partition,
747 // all visible pointers will point to the first of the partition.
748 cm->mi[row * cm->mi_stride + col].mbmi.filt_lvl = lvl;
749 }
750 }
751 }
752 #endif // CONFIG_LPF_SB
753
av1_loop_filter_frame_init(AV1_COMMON * cm,int default_filt_lvl,int default_filt_lvl_r,int plane)754 void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl,
755 int default_filt_lvl_r
756 #if CONFIG_LOOPFILTER_LEVEL
757 ,
758 int plane
759 #endif
760 ) {
761 int seg_id;
762 // n_shift is the multiplier for lf_deltas
763 // the multiplier is 1 for when filter_lvl is between 0 and 31;
764 // 2 when filter_lvl is between 32 and 63
765 int scale = 1 << (default_filt_lvl >> 5);
766 loop_filter_info_n *const lfi = &cm->lf_info;
767 struct loopfilter *const lf = &cm->lf;
768 const struct segmentation *const seg = &cm->seg;
769
770 // update limits if sharpness has changed
771 if (lf->last_sharpness_level != lf->sharpness_level) {
772 update_sharpness(lfi, lf->sharpness_level);
773 lf->last_sharpness_level = lf->sharpness_level;
774 }
775
776 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
777 for (int dir = 0; dir < 2; ++dir) {
778 int lvl_seg = (dir == 0) ? default_filt_lvl : default_filt_lvl_r;
779 #if CONFIG_LOOPFILTER_LEVEL
780 assert(plane >= 0 && plane <= 2);
781 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
782 if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
783 const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
784 lvl_seg = clamp(
785 seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
786 0, MAX_LOOP_FILTER);
787 }
788 #else
789 if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
790 const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
791 lvl_seg = clamp(
792 seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
793 0, MAX_LOOP_FILTER);
794 }
795 #endif // CONFIG_LOOPFILTER_LEVEL
796
797 if (!lf->mode_ref_delta_enabled) {
798 // we could get rid of this if we assume that deltas are set to
799 // zero when not in use; encoder always uses deltas
800 #if CONFIG_LOOPFILTER_LEVEL
801 memset(lfi->lvl[seg_id][dir], lvl_seg, sizeof(lfi->lvl[seg_id][dir]));
802 #else
803 memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
804 #endif // CONFIG_LOOPFILTER_LEVEL
805 } else {
806 int ref, mode;
807 #if CONFIG_LOOPFILTER_LEVEL
808 scale = 1 << (lvl_seg >> 5);
809
810 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
811 lfi->lvl[seg_id][dir][INTRA_FRAME][0] =
812 clamp(intra_lvl, 0, MAX_LOOP_FILTER);
813
814 for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
815 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
816 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
817 lf->mode_deltas[mode] * scale;
818 lfi->lvl[seg_id][dir][ref][mode] =
819 clamp(inter_lvl, 0, MAX_LOOP_FILTER);
820 }
821 }
822 #else
823 (void)default_filt_lvl_r;
824 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
825 lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
826
827 for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
828 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
829 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
830 lf->mode_deltas[mode] * scale;
831 lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
832 }
833 }
834 #endif
835 }
836 }
837 }
838 }
839
filter_selectively_vert_row2(int subsampling_factor,uint8_t * s,int pitch,unsigned int mask_16x16_l,unsigned int mask_8x8_l,unsigned int mask_4x4_l,unsigned int mask_4x4_int_l,const loop_filter_info_n * lfi_n,const uint8_t * lfl)840 static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
841 int pitch, unsigned int mask_16x16_l,
842 unsigned int mask_8x8_l,
843 unsigned int mask_4x4_l,
844 unsigned int mask_4x4_int_l,
845 const loop_filter_info_n *lfi_n,
846 const uint8_t *lfl) {
847 const int mask_shift = subsampling_factor ? 4 : 8;
848 const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
849 const int lfl_forward = subsampling_factor ? 4 : 8;
850
851 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
852 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
853 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
854 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
855 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
856 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
857 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
858 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
859 unsigned int mask;
860
861 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
862 mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
863 mask; mask >>= 1) {
864 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
865 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
866
867 if (mask & 1) {
868 if ((mask_16x16_0 | mask_16x16_1) & 1) {
869 if ((mask_16x16_0 & mask_16x16_1) & 1) {
870 aom_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
871 lfi0->hev_thr);
872 } else if (mask_16x16_0 & 1) {
873 aom_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
874 } else {
875 aom_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
876 lfi1->hev_thr);
877 }
878 }
879
880 if ((mask_8x8_0 | mask_8x8_1) & 1) {
881 if ((mask_8x8_0 & mask_8x8_1) & 1) {
882 aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
883 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
884 lfi1->hev_thr);
885 } else if (mask_8x8_0 & 1) {
886 aom_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
887 } else {
888 aom_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
889 lfi1->hev_thr);
890 }
891 }
892
893 if ((mask_4x4_0 | mask_4x4_1) & 1) {
894 if ((mask_4x4_0 & mask_4x4_1) & 1) {
895 aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
896 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
897 lfi1->hev_thr);
898 } else if (mask_4x4_0 & 1) {
899 aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
900 } else {
901 aom_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
902 lfi1->hev_thr);
903 }
904 }
905
906 if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
907 if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
908 aom_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
909 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
910 lfi1->hev_thr);
911 } else if (mask_4x4_int_0 & 1) {
912 aom_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
913 lfi0->hev_thr);
914 } else {
915 aom_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
916 lfi1->hev_thr);
917 }
918 }
919 }
920
921 s += 8;
922 lfl += 1;
923 mask_16x16_0 >>= 1;
924 mask_8x8_0 >>= 1;
925 mask_4x4_0 >>= 1;
926 mask_4x4_int_0 >>= 1;
927 mask_16x16_1 >>= 1;
928 mask_8x8_1 >>= 1;
929 mask_4x4_1 >>= 1;
930 mask_4x4_int_1 >>= 1;
931 }
932 }
933
934 #if CONFIG_HIGHBITDEPTH
highbd_filter_selectively_vert_row2(int subsampling_factor,uint16_t * s,int pitch,unsigned int mask_16x16_l,unsigned int mask_8x8_l,unsigned int mask_4x4_l,unsigned int mask_4x4_int_l,const loop_filter_info_n * lfi_n,const uint8_t * lfl,int bd)935 static void highbd_filter_selectively_vert_row2(
936 int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l,
937 unsigned int mask_8x8_l, unsigned int mask_4x4_l,
938 unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n,
939 const uint8_t *lfl, int bd) {
940 const int mask_shift = subsampling_factor ? 4 : 8;
941 const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
942 const int lfl_forward = subsampling_factor ? 4 : 8;
943
944 unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
945 unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
946 unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
947 unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
948 unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
949 unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
950 unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
951 unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
952 unsigned int mask;
953
954 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
955 mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
956 mask; mask >>= 1) {
957 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
958 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
959
960 if (mask & 1) {
961 if ((mask_16x16_0 | mask_16x16_1) & 1) {
962 if ((mask_16x16_0 & mask_16x16_1) & 1) {
963 aom_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
964 lfi0->hev_thr, bd);
965 } else if (mask_16x16_0 & 1) {
966 aom_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
967 lfi0->hev_thr, bd);
968 } else {
969 aom_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim,
970 lfi1->lim, lfi1->hev_thr, bd);
971 }
972 }
973
974 if ((mask_8x8_0 | mask_8x8_1) & 1) {
975 if ((mask_8x8_0 & mask_8x8_1) & 1) {
976 aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
977 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
978 lfi1->hev_thr, bd);
979 } else if (mask_8x8_0 & 1) {
980 aom_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
981 lfi0->hev_thr, bd);
982 } else {
983 aom_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
984 lfi1->lim, lfi1->hev_thr, bd);
985 }
986 }
987
988 if ((mask_4x4_0 | mask_4x4_1) & 1) {
989 if ((mask_4x4_0 & mask_4x4_1) & 1) {
990 aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
991 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
992 lfi1->hev_thr, bd);
993 } else if (mask_4x4_0 & 1) {
994 aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
995 lfi0->hev_thr, bd);
996 } else {
997 aom_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
998 lfi1->lim, lfi1->hev_thr, bd);
999 }
1000 }
1001
1002 if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
1003 if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
1004 aom_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
1005 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1006 lfi1->hev_thr, bd);
1007 } else if (mask_4x4_int_0 & 1) {
1008 aom_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
1009 lfi0->hev_thr, bd);
1010 } else {
1011 aom_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
1012 lfi1->lim, lfi1->hev_thr, bd);
1013 }
1014 }
1015 }
1016
1017 s += 8;
1018 lfl += 1;
1019 mask_16x16_0 >>= 1;
1020 mask_8x8_0 >>= 1;
1021 mask_4x4_0 >>= 1;
1022 mask_4x4_int_0 >>= 1;
1023 mask_16x16_1 >>= 1;
1024 mask_8x8_1 >>= 1;
1025 mask_4x4_1 >>= 1;
1026 mask_4x4_int_1 >>= 1;
1027 }
1028 }
1029 #endif // CONFIG_HIGHBITDEPTH
1030
filter_selectively_horiz(uint8_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,uint8_t * const src,int mi_row,int mi_col,int idx_r,int col_step,int width,int height,int ss_x,int ss_y)1031 static void filter_selectively_horiz(
1032 uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
1033 unsigned int mask_4x4, unsigned int mask_4x4_int,
1034 const loop_filter_info_n *lfi_n, const uint8_t *lfl
1035 #if CONFIG_LPF_DIRECT
1036 ,
1037 uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step,
1038 int width, int height, int ss_x, int ss_y
1039 #endif
1040 ) {
1041 unsigned int mask;
1042 int count;
1043 #if CONFIG_LPF_DIRECT
1044 // scale for u, v plane
1045 width >>= ss_x;
1046 height >>= ss_y;
1047 int idx_c = 0;
1048 #endif
1049
1050 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
1051 mask >>= count) {
1052 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1053
1054 count = 1;
1055 if (mask & 1) {
1056 #if CONFIG_LPF_DIRECT
1057 int i;
1058 const int line_length = 16;
1059 const int pivot = 8;
1060 const int above_filt_len = mask_16x16 & 1 ? 8 : 4;
1061 const int bot_filt_len = mask_16x16 & 1 ? 8 : 4;
1062 uint8_t block[256]; // line_length * size_of(BLOCK_8X8) * two_blocks
1063 int orig_pos[256];
1064 int direct;
1065
1066 assert(above_filt_len == bot_filt_len);
1067 (void)bot_filt_len;
1068 for (i = 0; i < 256; ++i) {
1069 block[i] = 0;
1070 orig_pos[i] = -1;
1071 }
1072
1073 // actual position for current pixel
1074 const int row = (mi_row + idx_r) * MI_SIZE >> ss_y;
1075 const int col = (mi_col + idx_c) * MI_SIZE >> ss_x;
1076
1077 // Next block's thresholds.
1078 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1079
1080 if (mask_16x16 & 1) {
1081 if ((mask_16x16 & 3) == 3) {
1082 // Could use asymmetric length in the future
1083 direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1084 height, pitch, 2, 1);
1085
1086 pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1087 width, height, pitch, pivot, line_length, 2,
1088 direct);
1089
1090 aom_lpf_horizontal_edge_16(block + pivot * line_length, line_length,
1091 lfi->mblim, lfi->lim, lfi->hev_thr);
1092 count = 2;
1093 } else {
1094 direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1095 height, pitch, 1, 1);
1096
1097 pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1098 width, height, pitch, pivot, line_length, 1,
1099 direct);
1100
1101 aom_lpf_horizontal_edge_8(block + pivot * line_length, line_length,
1102 lfi->mblim, lfi->lim, lfi->hev_thr);
1103 }
1104
1105 for (i = 0; i < 256; ++i)
1106 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1107 } else if (mask_8x8 & 1) {
1108 if ((mask_8x8 & 3) == 3) {
1109 count = 2;
1110 direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1111 height, pitch, 2, 1);
1112
1113 pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1114 width, height, pitch, pivot, line_length, 2,
1115 direct);
1116
1117 aom_lpf_horizontal_8_dual(block + pivot * line_length, line_length,
1118 lfi->mblim, lfi->lim, lfi->hev_thr,
1119 lfin->mblim, lfin->lim, lfin->hev_thr);
1120
1121 for (i = 0; i < 256; ++i)
1122 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1123
1124 if ((mask_4x4_int & 3) == 3) {
1125 for (i = 0; i < 256; ++i) {
1126 block[i] = 0;
1127 orig_pos[i] = -1;
1128 }
1129
1130 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1131 pitch, 2, 1);
1132
1133 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1134 height, pitch, pivot, line_length, 2,
1135 direct);
1136
1137 aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
1138 lfi->mblim, lfi->lim, lfi->hev_thr,
1139 lfin->mblim, lfin->lim, lfin->hev_thr);
1140
1141 for (i = 0; i < 256; ++i)
1142 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1143 } else {
1144 for (i = 0; i < 256; ++i) {
1145 block[i] = 0;
1146 orig_pos[i] = -1;
1147 }
1148
1149 if (mask_4x4_int & 1) {
1150 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1151 pitch, 1, 1);
1152
1153 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col,
1154 width, height, pitch, pivot, line_length,
1155 1, direct);
1156
1157 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1158 lfi->mblim, lfi->lim, lfi->hev_thr);
1159 } else if (mask_4x4_int & 2) {
1160 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1161 pitch, 1, 1);
1162
1163 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8,
1164 width, height, pitch, pivot, line_length,
1165 1, direct);
1166
1167 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1168 lfin->mblim, lfin->lim, lfin->hev_thr);
1169 }
1170
1171 for (i = 0; i < 256; ++i)
1172 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1173 }
1174 } else {
1175 direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1176 height, pitch, 1, 1);
1177
1178 pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1179 width, height, pitch, pivot, line_length, 1,
1180 direct);
1181
1182 aom_lpf_horizontal_8(block + pivot * line_length, line_length,
1183 lfi->mblim, lfi->lim, lfi->hev_thr);
1184
1185 for (i = 0; i < 256; ++i)
1186 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1187
1188 if (mask_4x4_int & 1) {
1189 for (i = 0; i < 256; ++i) {
1190 block[i] = 0;
1191 orig_pos[i] = -1;
1192 }
1193 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1194 pitch, 1, 1);
1195
1196 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1197 height, pitch, pivot, line_length, 1,
1198 direct);
1199
1200 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1201 lfi->mblim, lfi->lim, lfi->hev_thr);
1202
1203 for (i = 0; i < 256; ++i)
1204 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1205 }
1206 }
1207 } else if (mask_4x4 & 1) {
1208 if ((mask_4x4 & 3) == 3) {
1209 count = 2;
1210 direct = pick_min_grad_direct(src, 4, row, col, width, height, pitch,
1211 2, 1);
1212
1213 pick_filter_block_horz(src, block, orig_pos, 4, row, col, width,
1214 height, pitch, pivot, line_length, 2, direct);
1215
1216 aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
1217 lfi->mblim, lfi->lim, lfi->hev_thr,
1218 lfin->mblim, lfin->lim, lfin->hev_thr);
1219
1220 for (i = 0; i < 256; ++i)
1221 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1222
1223 if ((mask_4x4_int & 3) == 3) {
1224 for (i = 0; i < 256; ++i) {
1225 block[i] = 0;
1226 orig_pos[i] = -1;
1227 }
1228
1229 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1230 pitch, 2, 1);
1231
1232 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1233 height, pitch, pivot, line_length, 2,
1234 direct);
1235
1236 aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
1237 lfi->mblim, lfi->lim, lfi->hev_thr,
1238 lfin->mblim, lfin->lim, lfin->hev_thr);
1239
1240 for (i = 0; i < 256; ++i)
1241 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1242 } else {
1243 for (i = 0; i < 256; ++i) {
1244 block[i] = 0;
1245 orig_pos[i] = -1;
1246 }
1247
1248 if (mask_4x4_int & 1) {
1249 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1250 pitch, 1, 1);
1251
1252 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col,
1253 width, height, pitch, pivot, line_length,
1254 1, direct);
1255
1256 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1257 lfi->mblim, lfi->lim, lfi->hev_thr);
1258 } else if (mask_4x4_int & 2) {
1259 direct = pick_min_grad_direct(src, 4, row, col, width, height,
1260 pitch, 1, 1);
1261
1262 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8,
1263 width, height, pitch, pivot, line_length,
1264 1, direct);
1265
1266 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1267 lfin->mblim, lfin->lim, lfin->hev_thr);
1268 }
1269
1270 for (i = 0; i < 256; ++i)
1271 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1272 }
1273 } else {
1274 direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1275 height, pitch, 1, 1);
1276
1277 pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1278 width, height, pitch, pivot, line_length, 1,
1279 direct);
1280
1281 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1282 lfi->mblim, lfi->lim, lfi->hev_thr);
1283
1284 for (i = 0; i < 256; ++i)
1285 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1286
1287 if (mask_4x4_int & 1) {
1288 for (i = 0; i < 256; ++i) {
1289 block[i] = 0;
1290 orig_pos[i] = -1;
1291 }
1292 direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1293 height, pitch, 1, 1);
1294
1295 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1296 height, pitch, pivot, line_length, 1,
1297 direct);
1298
1299 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1300 lfi->mblim, lfi->lim, lfi->hev_thr);
1301
1302 for (i = 0; i < 256; ++i)
1303 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1304 }
1305 }
1306 } else if (mask_4x4_int & 1) {
1307 direct =
1308 pick_min_grad_direct(src, 4, row, col, width, height, pitch, 1, 1);
1309
1310 pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1311 height, pitch, pivot, line_length, 1, direct);
1312
1313 aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1314 lfi->mblim, lfi->lim, lfi->hev_thr);
1315
1316 for (i = 0; i < 256; ++i)
1317 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1318 }
1319 #else // CONFIG_LPF_DIRECT
1320 if (mask_16x16 & 1) {
1321 if ((mask_16x16 & 3) == 3) {
1322 aom_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
1323 lfi->hev_thr);
1324 count = 2;
1325 } else {
1326 aom_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
1327 lfi->hev_thr);
1328 }
1329 } else if (mask_8x8 & 1) {
1330 if ((mask_8x8 & 3) == 3) {
1331 // Next block's thresholds.
1332 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1333
1334 aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1335 lfi->hev_thr, lfin->mblim, lfin->lim,
1336 lfin->hev_thr);
1337
1338 if ((mask_4x4_int & 3) == 3) {
1339 aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
1340 lfi->lim, lfi->hev_thr, lfin->mblim,
1341 lfin->lim, lfin->hev_thr);
1342 } else {
1343 if (mask_4x4_int & 1)
1344 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1345 lfi->hev_thr);
1346 else if (mask_4x4_int & 2)
1347 aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1348 lfin->lim, lfin->hev_thr);
1349 }
1350 count = 2;
1351 } else {
1352 aom_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1353
1354 if (mask_4x4_int & 1)
1355 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1356 lfi->hev_thr);
1357 }
1358 } else if (mask_4x4 & 1) {
1359 if ((mask_4x4 & 3) == 3) {
1360 // Next block's thresholds.
1361 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1362
1363 aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1364 lfi->hev_thr, lfin->mblim, lfin->lim,
1365 lfin->hev_thr);
1366
1367 if ((mask_4x4_int & 3) == 3) {
1368 aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
1369 lfi->lim, lfi->hev_thr, lfin->mblim,
1370 lfin->lim, lfin->hev_thr);
1371 } else {
1372 if (mask_4x4_int & 1)
1373 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1374 lfi->hev_thr);
1375 else if (mask_4x4_int & 2)
1376 aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1377 lfin->lim, lfin->hev_thr);
1378 }
1379 count = 2;
1380 } else {
1381 aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1382
1383 if (mask_4x4_int & 1)
1384 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1385 lfi->hev_thr);
1386 }
1387 } else if (mask_4x4_int & 1) {
1388 aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1389 lfi->hev_thr);
1390 }
1391 #endif // CONFIG_LPF_DIRECT
1392 }
1393 #if CONFIG_LPF_DIRECT
1394 idx_c += col_step * count;
1395 #endif
1396 s += 8 * count;
1397 lfl += count;
1398 mask_16x16 >>= count;
1399 mask_8x8 >>= count;
1400 mask_4x4 >>= count;
1401 mask_4x4_int >>= count;
1402 }
1403 }
1404
1405 #if CONFIG_HIGHBITDEPTH
highbd_filter_selectively_horiz(uint16_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,int bd)1406 static void highbd_filter_selectively_horiz(
1407 uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
1408 unsigned int mask_4x4, unsigned int mask_4x4_int,
1409 const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
1410 unsigned int mask;
1411 int count;
1412
1413 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
1414 mask >>= count) {
1415 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1416
1417 count = 1;
1418 if (mask & 1) {
1419 if (mask_16x16 & 1) {
1420 if ((mask_16x16 & 3) == 3) {
1421 aom_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
1422 lfi->hev_thr, bd);
1423 count = 2;
1424 } else {
1425 aom_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
1426 lfi->hev_thr, bd);
1427 }
1428 } else if (mask_8x8 & 1) {
1429 if ((mask_8x8 & 3) == 3) {
1430 // Next block's thresholds.
1431 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1432
1433 aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1434 lfi->hev_thr, lfin->mblim, lfin->lim,
1435 lfin->hev_thr, bd);
1436
1437 if ((mask_4x4_int & 3) == 3) {
1438 aom_highbd_lpf_horizontal_4_dual(
1439 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1440 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
1441 } else {
1442 if (mask_4x4_int & 1) {
1443 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1444 lfi->lim, lfi->hev_thr, bd);
1445 } else if (mask_4x4_int & 2) {
1446 aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1447 lfin->lim, lfin->hev_thr, bd);
1448 }
1449 }
1450 count = 2;
1451 } else {
1452 aom_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
1453 lfi->hev_thr, bd);
1454
1455 if (mask_4x4_int & 1) {
1456 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1457 lfi->lim, lfi->hev_thr, bd);
1458 }
1459 }
1460 } else if (mask_4x4 & 1) {
1461 if ((mask_4x4 & 3) == 3) {
1462 // Next block's thresholds.
1463 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1464
1465 aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1466 lfi->hev_thr, lfin->mblim, lfin->lim,
1467 lfin->hev_thr, bd);
1468 if ((mask_4x4_int & 3) == 3) {
1469 aom_highbd_lpf_horizontal_4_dual(
1470 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1471 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
1472 } else {
1473 if (mask_4x4_int & 1) {
1474 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1475 lfi->lim, lfi->hev_thr, bd);
1476 } else if (mask_4x4_int & 2) {
1477 aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1478 lfin->lim, lfin->hev_thr, bd);
1479 }
1480 }
1481 count = 2;
1482 } else {
1483 aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
1484 lfi->hev_thr, bd);
1485
1486 if (mask_4x4_int & 1) {
1487 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1488 lfi->lim, lfi->hev_thr, bd);
1489 }
1490 }
1491 } else if (mask_4x4_int & 1) {
1492 aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1493 lfi->hev_thr, bd);
1494 }
1495 }
1496 s += 8 * count;
1497 lfl += count;
1498 mask_16x16 >>= count;
1499 mask_8x8 >>= count;
1500 mask_4x4 >>= count;
1501 mask_4x4_int >>= count;
1502 }
1503 }
1504 #endif // CONFIG_HIGHBITDEPTH
1505
1506 // This function ors into the current lfm structure, where to do loop
1507 // filters for the specific mi we are looking at. It uses information
1508 // including the block_size_type (32x16, 32x32, etc.), the transform size,
1509 // whether there were any coefficients encoded, and the loop filter strength
1510 // block we are currently looking at. Shift is used to position the
1511 // 1's we produce.
1512 // TODO(JBB) Need another function for different resolution color..
build_masks(AV1_COMMON * const cm,const loop_filter_info_n * const lfi_n,const MODE_INFO * mi,const int shift_y,const int shift_uv,LOOP_FILTER_MASK * lfm)1513 static void build_masks(AV1_COMMON *const cm,
1514 const loop_filter_info_n *const lfi_n,
1515 const MODE_INFO *mi, const int shift_y,
1516 const int shift_uv, LOOP_FILTER_MASK *lfm) {
1517 const MB_MODE_INFO *mbmi = &mi->mbmi;
1518 const BLOCK_SIZE block_size = mbmi->sb_type;
1519 // TODO(debargha): Check if masks can be setup correctly when
1520 // rectangular transfroms are used with the EXT_TX expt.
1521 const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
1522 const TX_SIZE tx_size_y_left = txsize_horz_map[mbmi->tx_size];
1523 const TX_SIZE tx_size_y_above = txsize_vert_map[mbmi->tx_size];
1524 const TX_SIZE tx_size_uv =
1525 txsize_sqr_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
1526 const TX_SIZE tx_size_uv_left =
1527 txsize_horz_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
1528 const TX_SIZE tx_size_uv_above =
1529 txsize_vert_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
1530 #if CONFIG_EXT_DELTA_Q
1531 #if CONFIG_LOOPFILTER_LEVEL
1532 const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1533 #else
1534 #if CONFIG_LPF_SB
1535 const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1536 #else
1537 const int filter_level = get_filter_level(cm, lfi_n, mbmi);
1538 #endif // CONFIG_LPF_SB
1539 #endif
1540 #else
1541 const int filter_level = get_filter_level(lfi_n, mbmi);
1542 (void)cm;
1543 #endif
1544 uint64_t *const left_y = &lfm->left_y[tx_size_y_left];
1545 uint64_t *const above_y = &lfm->above_y[tx_size_y_above];
1546 uint64_t *const int_4x4_y = &lfm->int_4x4_y;
1547 uint16_t *const left_uv = &lfm->left_uv[tx_size_uv_left];
1548 uint16_t *const above_uv = &lfm->above_uv[tx_size_uv_above];
1549 uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
1550 int i;
1551
1552 // If filter level is 0 we don't loop filter.
1553 if (!filter_level) {
1554 return;
1555 } else {
1556 const int w = num_8x8_blocks_wide_lookup[block_size];
1557 const int h = num_8x8_blocks_high_lookup[block_size];
1558 const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
1559 const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
1560
1561 for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
1562 }
1563
1564 // These set 1 in the current block size for the block size edges.
1565 // For instance if the block size is 32x16, we'll set:
1566 // above = 1111
1567 // 0000
1568 // and
1569 // left = 1000
1570 // = 1000
1571 // NOTE : In this example the low bit is left most ( 1000 ) is stored as
1572 // 1, not 8...
1573 //
1574 // U and V set things on a 16 bit scale.
1575 //
1576 *above_y |= above_prediction_mask[block_size] << shift_y;
1577 *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
1578 *left_y |= left_prediction_mask[block_size] << shift_y;
1579 *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
1580
1581 // If the block has no coefficients and is not intra we skip applying
1582 // the loop filter on block edges.
1583 if (mbmi->skip && is_inter_block(mbmi)) return;
1584
1585 // Here we are adding a mask for the transform size. The transform
1586 // size mask is set to be correct for a 64x64 prediction block size. We
1587 // mask to match the size of the block we are working on and then shift it
1588 // into place..
1589 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y_above])
1590 << shift_y;
1591 *above_uv |=
1592 (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv_above])
1593 << shift_uv;
1594
1595 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y_left])
1596 << shift_y;
1597 *left_uv |=
1598 (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv_left])
1599 << shift_uv;
1600
1601 // Here we are trying to determine what to do with the internal 4x4 block
1602 // boundaries. These differ from the 4x4 boundaries on the outside edge of
1603 // an 8x8 in that the internal ones can be skipped and don't depend on
1604 // the prediction block size.
1605 if (tx_size_y == TX_4X4)
1606 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
1607
1608 if (tx_size_uv == TX_4X4)
1609 *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
1610 }
1611
1612 // This function does the same thing as the one above with the exception that
1613 // it only affects the y masks. It exists because for blocks < 16x16 in size,
1614 // we only update u and v masks on the first block.
build_y_mask(AV1_COMMON * const cm,const loop_filter_info_n * const lfi_n,const MODE_INFO * mi,const int shift_y,int supertx_enabled,LOOP_FILTER_MASK * lfm)1615 static void build_y_mask(AV1_COMMON *const cm,
1616 const loop_filter_info_n *const lfi_n,
1617 const MODE_INFO *mi, const int shift_y,
1618 #if CONFIG_SUPERTX
1619 int supertx_enabled,
1620 #endif // CONFIG_SUPERTX
1621 LOOP_FILTER_MASK *lfm) {
1622 const MB_MODE_INFO *mbmi = &mi->mbmi;
1623 const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
1624 const TX_SIZE tx_size_y_left = txsize_horz_map[mbmi->tx_size];
1625 const TX_SIZE tx_size_y_above = txsize_vert_map[mbmi->tx_size];
1626 #if CONFIG_SUPERTX
1627 const BLOCK_SIZE block_size =
1628 supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
1629 #else
1630 const BLOCK_SIZE block_size = mbmi->sb_type;
1631 #endif
1632 #if CONFIG_EXT_DELTA_Q
1633 #if CONFIG_LOOPFILTER_LEVEL
1634 const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1635 #else
1636 #if CONFIG_LPF_SB
1637 const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1638 #else
1639 const int filter_level = get_filter_level(cm, lfi_n, mbmi);
1640 #endif // CONFIG_LPF_SB
1641 #endif
1642 #else
1643 const int filter_level = get_filter_level(lfi_n, mbmi);
1644 (void)cm;
1645 #endif
1646 uint64_t *const left_y = &lfm->left_y[tx_size_y_left];
1647 uint64_t *const above_y = &lfm->above_y[tx_size_y_above];
1648 uint64_t *const int_4x4_y = &lfm->int_4x4_y;
1649 int i;
1650
1651 if (!filter_level) {
1652 return;
1653 } else {
1654 const int w = num_8x8_blocks_wide_lookup[block_size];
1655 const int h = num_8x8_blocks_high_lookup[block_size];
1656 const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
1657 const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
1658
1659 for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
1660 }
1661
1662 *above_y |= above_prediction_mask[block_size] << shift_y;
1663 *left_y |= left_prediction_mask[block_size] << shift_y;
1664
1665 if (mbmi->skip && is_inter_block(mbmi)) return;
1666
1667 *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y_above])
1668 << shift_y;
1669
1670 *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y_left])
1671 << shift_y;
1672
1673 if (tx_size_y == TX_4X4)
1674 *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
1675 }
1676
1677 #if CONFIG_LOOPFILTERING_ACROSS_TILES
1678 // This function update the bit masks for the entire 64x64 region represented
1679 // by mi_row, mi_col. In case one of the edge is a tile boundary, loop filtering
1680 // for that edge is disabled. This function only check the tile boundary info
1681 // for the top left corner mi to determine the boundary information for the
1682 // top and left edge of the whole super block
update_tile_boundary_filter_mask(AV1_COMMON * const cm,const int mi_row,const int mi_col,LOOP_FILTER_MASK * lfm)1683 static void update_tile_boundary_filter_mask(AV1_COMMON *const cm,
1684 const int mi_row, const int mi_col,
1685 LOOP_FILTER_MASK *lfm) {
1686 int i;
1687 MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride + mi_col;
1688
1689 if (mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) {
1690 for (i = 0; i <= TX_32X32; i++) {
1691 lfm->left_y[i] &= 0xfefefefefefefefeULL;
1692 lfm->left_uv[i] &= 0xeeee;
1693 }
1694 }
1695
1696 if (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY) {
1697 for (i = 0; i <= TX_32X32; i++) {
1698 lfm->above_y[i] &= 0xffffffffffffff00ULL;
1699 lfm->above_uv[i] &= 0xfff0;
1700 }
1701 }
1702 }
1703 #endif // CONFIG_LOOPFILTERING_ACROSS_TILES
1704
1705 // This function sets up the bit masks for the entire 64x64 region represented
1706 // by mi_row, mi_col.
1707 // TODO(JBB): This function only works for yv12.
av1_setup_mask(AV1_COMMON * const cm,const int mi_row,const int mi_col,MODE_INFO ** mi,const int mode_info_stride,LOOP_FILTER_MASK * lfm)1708 void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
1709 MODE_INFO **mi, const int mode_info_stride,
1710 LOOP_FILTER_MASK *lfm) {
1711 #if CONFIG_EXT_PARTITION
1712 assert(0 && "Not yet updated");
1713 #endif // CONFIG_EXT_PARTITION
1714 int idx_32, idx_16, idx_8;
1715 const loop_filter_info_n *const lfi_n = &cm->lf_info;
1716 MODE_INFO **mip = mi;
1717 MODE_INFO **mip2 = mi;
1718
1719 // These are offsets to the next mi in the 64x64 block. It is what gets
1720 // added to the mi ptr as we go through each loop. It helps us to avoid
1721 // setting up special row and column counters for each index. The last step
1722 // brings us out back to the starting position.
1723 const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
1724 -(mode_info_stride << 2) - 4 };
1725 const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
1726 -(mode_info_stride << 1) - 2 };
1727 const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
1728
1729 // Following variables represent shifts to position the current block
1730 // mask over the appropriate block. A shift of 36 to the left will move
1731 // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
1732 // 4 rows to the appropriate spot.
1733 const int shift_32_y[] = { 0, 4, 32, 36 };
1734 const int shift_16_y[] = { 0, 2, 16, 18 };
1735 const int shift_8_y[] = { 0, 1, 8, 9 };
1736 const int shift_32_uv[] = { 0, 2, 8, 10 };
1737 const int shift_16_uv[] = { 0, 1, 4, 5 };
1738 int i;
1739 const int max_rows = AOMMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
1740 const int max_cols = AOMMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
1741
1742 av1_zero(*lfm);
1743 assert(mip[0] != NULL);
1744
1745 // TODO(jimbankoski): Try moving most of the following code into decode
1746 // loop and storing lfm in the mbmi structure so that we don't have to go
1747 // through the recursive loop structure multiple times.
1748 switch (mip[0]->mbmi.sb_type) {
1749 case BLOCK_64X64: build_masks(cm, lfi_n, mip[0], 0, 0, lfm); break;
1750 case BLOCK_64X32: build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
1751 #if CONFIG_SUPERTX && CONFIG_TX64X64
1752 if (supertx_enabled(&mip[0]->mbmi)) break;
1753 #endif // CONFIG_SUPERTX && CONFIG_TX64X64
1754 mip2 = mip + mode_info_stride * 4;
1755 if (4 >= max_rows) break;
1756 build_masks(cm, lfi_n, mip2[0], 32, 8, lfm);
1757 break;
1758 case BLOCK_32X64: build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
1759 #if CONFIG_SUPERTX && CONFIG_TX64X64
1760 if (supertx_enabled(&mip[0]->mbmi)) break;
1761 #endif // CONFIG_SUPERTX && CONFIG_TX64X64
1762 mip2 = mip + 4;
1763 if (4 >= max_cols) break;
1764 build_masks(cm, lfi_n, mip2[0], 4, 2, lfm);
1765 break;
1766 default:
1767 #if CONFIG_SUPERTX && CONFIG_TX64X64
1768 if (mip[0]->mbmi.tx_size == TX_64X64) {
1769 build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
1770 } else {
1771 #endif // CONFIG_SUPERTX && CONFIG_TX64X64
1772 for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
1773 const int shift_y_32 = shift_32_y[idx_32];
1774 const int shift_uv_32 = shift_32_uv[idx_32];
1775 const int mi_32_col_offset = ((idx_32 & 1) << 2);
1776 const int mi_32_row_offset = ((idx_32 >> 1) << 2);
1777 if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
1778 continue;
1779 switch (mip[0]->mbmi.sb_type) {
1780 case BLOCK_32X32:
1781 build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1782 break;
1783 case BLOCK_32X16:
1784 build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1785 #if CONFIG_SUPERTX
1786 if (supertx_enabled(&mip[0]->mbmi)) break;
1787 #endif
1788 if (mi_32_row_offset + 2 >= max_rows) continue;
1789 mip2 = mip + mode_info_stride * 2;
1790 build_masks(cm, lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4,
1791 lfm);
1792 break;
1793 case BLOCK_16X32:
1794 build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1795 #if CONFIG_SUPERTX
1796 if (supertx_enabled(&mip[0]->mbmi)) break;
1797 #endif
1798 if (mi_32_col_offset + 2 >= max_cols) continue;
1799 mip2 = mip + 2;
1800 build_masks(cm, lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1,
1801 lfm);
1802 break;
1803 default:
1804 #if CONFIG_SUPERTX
1805 if (mip[0]->mbmi.tx_size == TX_32X32) {
1806 build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1807 break;
1808 }
1809 #endif
1810 for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
1811 const int shift_y_32_16 = shift_y_32 + shift_16_y[idx_16];
1812 const int shift_uv_32_16 = shift_uv_32 + shift_16_uv[idx_16];
1813 const int mi_16_col_offset =
1814 mi_32_col_offset + ((idx_16 & 1) << 1);
1815 const int mi_16_row_offset =
1816 mi_32_row_offset + ((idx_16 >> 1) << 1);
1817
1818 if (mi_16_col_offset >= max_cols ||
1819 mi_16_row_offset >= max_rows)
1820 continue;
1821
1822 switch (mip[0]->mbmi.sb_type) {
1823 case BLOCK_16X16:
1824 build_masks(cm, lfi_n, mip[0], shift_y_32_16,
1825 shift_uv_32_16, lfm);
1826 break;
1827 case BLOCK_16X8:
1828 #if CONFIG_SUPERTX
1829 if (supertx_enabled(&mip[0]->mbmi)) break;
1830 #endif
1831 build_masks(cm, lfi_n, mip[0], shift_y_32_16,
1832 shift_uv_32_16, lfm);
1833 if (mi_16_row_offset + 1 >= max_rows) continue;
1834 mip2 = mip + mode_info_stride;
1835 build_y_mask(cm, lfi_n, mip2[0], shift_y_32_16 + 8,
1836 #if CONFIG_SUPERTX
1837 0,
1838 #endif
1839 lfm);
1840 break;
1841 case BLOCK_8X16:
1842 #if CONFIG_SUPERTX
1843 if (supertx_enabled(&mip[0]->mbmi)) break;
1844 #endif
1845 build_masks(cm, lfi_n, mip[0], shift_y_32_16,
1846 shift_uv_32_16, lfm);
1847 if (mi_16_col_offset + 1 >= max_cols) continue;
1848 mip2 = mip + 1;
1849 build_y_mask(cm, lfi_n, mip2[0], shift_y_32_16 + 1,
1850 #if CONFIG_SUPERTX
1851 0,
1852 #endif
1853 lfm);
1854 break;
1855 default: {
1856 const int shift_y_32_16_8_zero =
1857 shift_y_32_16 + shift_8_y[0];
1858 #if CONFIG_SUPERTX
1859 if (mip[0]->mbmi.tx_size == TX_16X16) {
1860 build_masks(cm, lfi_n, mip[0], shift_y_32_16_8_zero,
1861 shift_uv_32_16, lfm);
1862 break;
1863 }
1864 #endif
1865 build_masks(cm, lfi_n, mip[0], shift_y_32_16_8_zero,
1866 shift_uv_32_16, lfm);
1867 mip += offset[0];
1868 for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
1869 const int shift_y_32_16_8 =
1870 shift_y_32_16 + shift_8_y[idx_8];
1871 const int mi_8_col_offset =
1872 mi_16_col_offset + ((idx_8 & 1));
1873 const int mi_8_row_offset =
1874 mi_16_row_offset + ((idx_8 >> 1));
1875
1876 if (mi_8_col_offset >= max_cols ||
1877 mi_8_row_offset >= max_rows)
1878 continue;
1879 build_y_mask(cm, lfi_n, mip[0], shift_y_32_16_8,
1880 #if CONFIG_SUPERTX
1881 supertx_enabled(&mip[0]->mbmi),
1882 #endif
1883 lfm);
1884 }
1885 break;
1886 }
1887 }
1888 }
1889 break;
1890 }
1891 }
1892 #if CONFIG_SUPERTX && CONFIG_TX64X64
1893 }
1894 #endif // CONFIG_SUPERTX && CONFIG_TX64X64
1895 break;
1896 }
1897 // The largest loopfilter we have is 16x16 so we use the 16x16 mask
1898 // for 32x32 transforms also.
1899 lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
1900 lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
1901 lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
1902 lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
1903
1904 // We do at least 8 tap filter on every 32x32 even if the transform size
1905 // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
1906 // remove it from the 4x4.
1907 lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
1908 lfm->left_y[TX_4X4] &= ~left_border;
1909 lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
1910 lfm->above_y[TX_4X4] &= ~above_border;
1911 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
1912 lfm->left_uv[TX_4X4] &= ~left_border_uv;
1913 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
1914 lfm->above_uv[TX_4X4] &= ~above_border_uv;
1915
1916 // We do some special edge handling.
1917 if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
1918 const uint64_t rows = cm->mi_rows - mi_row;
1919
1920 // Each pixel inside the border gets a 1,
1921 const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1);
1922 const uint16_t mask_uv =
1923 (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
1924
1925 // Remove values completely outside our border.
1926 for (i = 0; i < TX_32X32; i++) {
1927 lfm->left_y[i] &= mask_y;
1928 lfm->above_y[i] &= mask_y;
1929 lfm->left_uv[i] &= mask_uv;
1930 lfm->above_uv[i] &= mask_uv;
1931 }
1932 lfm->int_4x4_y &= mask_y;
1933 lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
1934
1935 // We don't apply a wide loop filter on the last uv block row. If set
1936 // apply the shorter one instead.
1937 if (rows == 1) {
1938 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
1939 lfm->above_uv[TX_16X16] = 0;
1940 }
1941 if (rows == 5) {
1942 lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
1943 lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
1944 }
1945 } else {
1946 lfm->above_int_4x4_uv = lfm->left_int_4x4_uv;
1947 }
1948
1949 if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
1950 const uint64_t columns = cm->mi_cols - mi_col;
1951
1952 // Each pixel inside the border gets a 1, the multiply copies the border
1953 // to where we need it.
1954 const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
1955 const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
1956
1957 // Internal edges are not applied on the last column of the image so
1958 // we mask 1 more for the internal edges
1959 const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
1960
1961 // Remove the bits outside the image edge.
1962 for (i = 0; i < TX_32X32; i++) {
1963 lfm->left_y[i] &= mask_y;
1964 lfm->above_y[i] &= mask_y;
1965 lfm->left_uv[i] &= mask_uv;
1966 lfm->above_uv[i] &= mask_uv;
1967 }
1968 lfm->int_4x4_y &= mask_y;
1969 lfm->left_int_4x4_uv &= mask_uv_int;
1970
1971 // We don't apply a wide loop filter on the last uv column. If set
1972 // apply the shorter one instead.
1973 if (columns == 1) {
1974 lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
1975 lfm->left_uv[TX_16X16] = 0;
1976 }
1977 if (columns == 5) {
1978 lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
1979 lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
1980 }
1981 }
1982 // We don't apply a loop filter on the first column in the image, mask that
1983 // out.
1984 if (mi_col == 0) {
1985 for (i = 0; i < TX_32X32; i++) {
1986 lfm->left_y[i] &= 0xfefefefefefefefeULL;
1987 lfm->left_uv[i] &= 0xeeee;
1988 }
1989 }
1990
1991 #if CONFIG_LOOPFILTERING_ACROSS_TILES
1992 if (av1_disable_loopfilter_on_tile_boundary(cm)) {
1993 update_tile_boundary_filter_mask(cm, mi_row, mi_col, lfm);
1994 }
1995 #endif // CONFIG_LOOPFILTERING_ACROSS_TILES
1996
1997 // Assert if we try to apply 2 different loop filters at the same position.
1998 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
1999 assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
2000 assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
2001 assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
2002 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
2003 assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
2004 assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
2005 assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
2006 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
2007 assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
2008 assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
2009 assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
2010 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
2011 assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
2012 assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
2013 assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
2014 }
2015
filter_selectively_vert(uint8_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,uint8_t * const src,int mi_row,int mi_col,int idx_r,int col_step,int width,int height,int ss_x,int ss_y)2016 static void filter_selectively_vert(
2017 uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
2018 unsigned int mask_4x4, unsigned int mask_4x4_int,
2019 const loop_filter_info_n *lfi_n, const uint8_t *lfl
2020 #if CONFIG_LPF_DIRECT
2021 ,
2022 uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step,
2023 int width, int height, int ss_x, int ss_y
2024 #endif
2025 ) {
2026 unsigned int mask;
2027 #if CONFIG_LPF_DIRECT
2028 // scale for u, v plane
2029 width >>= ss_x;
2030 height >>= ss_y;
2031 int idx_c = 0;
2032 #endif
2033
2034 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
2035 mask >>= 1) {
2036 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
2037
2038 #if CONFIG_LPF_DIRECT
2039 int i;
2040 const int pivot = 8;
2041 const int left_filt_len = mask_16x16 & 1 ? 8 : 4;
2042 const int right_filt_len = mask_16x16 & 1 ? 8 : 4;
2043 const int line_length = 16;
2044 uint8_t block[128];
2045 int orig_pos[128];
2046
2047 // actual position for current pixel
2048 const int row = (mi_row + idx_r) * MI_SIZE >> ss_y;
2049 const int col = (mi_col + idx_c) * MI_SIZE >> ss_x;
2050
2051 // Could use asymmetric length in the future
2052 assert(left_filt_len == right_filt_len);
2053 (void)right_filt_len;
2054
2055 if ((mask_16x16 & 1) || (mask_8x8 & 1) || (mask_4x4 & 1)) {
2056 for (i = 0; i < 128; ++i) {
2057 block[i] = 0;
2058 orig_pos[i] = -1;
2059 }
2060
2061 const int direct = pick_min_grad_direct(src, left_filt_len, row, col,
2062 width, height, pitch, 1, 0);
2063
2064 pick_filter_block_vert(src, block, orig_pos, left_filt_len, row, col,
2065 width, height, pitch, pivot, line_length, 1,
2066 direct);
2067
2068 // apply filtering
2069 if (mask_16x16 & 1) {
2070 aom_lpf_vertical_16(block + pivot, line_length, lfi->mblim, lfi->lim,
2071 lfi->hev_thr);
2072 } else if (mask_8x8 & 1) {
2073 aom_lpf_vertical_8(block + pivot, line_length, lfi->mblim, lfi->lim,
2074 lfi->hev_thr);
2075 } else if (mask_4x4 & 1) {
2076 aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim,
2077 lfi->hev_thr);
2078 }
2079
2080 for (i = 0; i < 128; ++i)
2081 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
2082 }
2083
2084 // filter inner 4x4
2085 if (mask_4x4_int & 1) {
2086 for (i = 0; i < 128; ++i) {
2087 block[i] = 0;
2088 orig_pos[i] = -1;
2089 }
2090
2091 const int direct = pick_min_grad_direct(src, 4, row, col + 4, width,
2092 height, pitch, 1, 0);
2093
2094 pick_filter_block_vert(src, block, orig_pos, 4, row, col + 4, width,
2095 height, pitch, pivot, line_length, 1, direct);
2096
2097 aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim,
2098 lfi->hev_thr);
2099
2100 for (i = 0; i < 128; ++i)
2101 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
2102 }
2103 #else
2104 if (mask & 1) {
2105 if (mask_16x16 & 1) {
2106 aom_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2107 } else if (mask_8x8 & 1) {
2108 aom_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2109 } else if (mask_4x4 & 1) {
2110 aom_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2111 }
2112 }
2113 if (mask_4x4_int & 1)
2114 aom_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2115 #endif // CONFIG_LPF_DIRECT
2116 #if CONFIG_LPF_DIRECT
2117 idx_c += col_step;
2118 #endif
2119 s += 8;
2120 lfl += 1;
2121 mask_16x16 >>= 1;
2122 mask_8x8 >>= 1;
2123 mask_4x4 >>= 1;
2124 mask_4x4_int >>= 1;
2125 }
2126 }
2127
2128 #if CONFIG_HIGHBITDEPTH
highbd_filter_selectively_vert(uint16_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,int bd)2129 static void highbd_filter_selectively_vert(
2130 uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
2131 unsigned int mask_4x4, unsigned int mask_4x4_int,
2132 const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
2133 unsigned int mask;
2134
2135 for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
2136 mask >>= 1) {
2137 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
2138
2139 if (mask & 1) {
2140 if (mask_16x16 & 1) {
2141 aom_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
2142 bd);
2143 } else if (mask_8x8 & 1) {
2144 aom_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
2145 bd);
2146 } else if (mask_4x4 & 1) {
2147 aom_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
2148 bd);
2149 }
2150 }
2151 if (mask_4x4_int & 1)
2152 aom_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
2153 lfi->hev_thr, bd);
2154 s += 8;
2155 lfl += 1;
2156 mask_16x16 >>= 1;
2157 mask_8x8 >>= 1;
2158 mask_4x4 >>= 1;
2159 mask_4x4_int >>= 1;
2160 }
2161 }
2162 #endif // CONFIG_HIGHBITDEPTH
2163
2164 typedef struct {
2165 unsigned int m16x16;
2166 unsigned int m8x8;
2167 unsigned int m4x4;
2168 } FilterMasks;
2169
2170 // Get filter level and masks for the given row index 'idx_r'. (Only used for
2171 // the non420 case).
2172 // Note: 'row_masks_ptr' and/or 'col_masks_ptr' can be passed NULL.
get_filter_level_and_masks_non420(AV1_COMMON * const cm,const struct macroblockd_plane * const plane,int pl,MODE_INFO ** mib,int mi_row,int mi_col,int idx_r,uint8_t * const lfl_r,unsigned int * const mask_4x4_int_r_ptr,unsigned int * const mask_4x4_int_c_ptr,FilterMasks * const row_masks_ptr,FilterMasks * const col_masks_ptr)2173 static void get_filter_level_and_masks_non420(
2174 AV1_COMMON *const cm, const struct macroblockd_plane *const plane, int pl,
2175 MODE_INFO **mib, int mi_row, int mi_col, int idx_r, uint8_t *const lfl_r,
2176 unsigned int *const mask_4x4_int_r_ptr,
2177 unsigned int *const mask_4x4_int_c_ptr, FilterMasks *const row_masks_ptr,
2178 FilterMasks *const col_masks_ptr) {
2179 const int ss_x = plane->subsampling_x;
2180 const int ss_y = plane->subsampling_y;
2181 const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
2182 FilterMasks row_masks, col_masks;
2183 memset(&row_masks, 0, sizeof(row_masks));
2184 memset(&col_masks, 0, sizeof(col_masks));
2185 unsigned int mask_4x4_int_r = 0, mask_4x4_int_c = 0;
2186 const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2187
2188 // Determine the vertical edges that need filtering
2189 int idx_c;
2190 for (idx_c = 0; idx_c < cm->mib_size && mi_col + idx_c < cm->mi_cols;
2191 idx_c += col_step) {
2192 const MODE_INFO *mi = mib[idx_r * cm->mi_stride + idx_c];
2193 const MB_MODE_INFO *mbmi = &mi[0].mbmi;
2194 const BLOCK_SIZE sb_type = mbmi->sb_type;
2195 const int skip_this = mbmi->skip && is_inter_block(mbmi);
2196 // Map index to 8x8 unit
2197 const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
2198
2199 const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
2200 const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
2201
2202 // left edge of current unit is block/partition edge -> no skip
2203 const int block_edge_left =
2204 (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1;
2205 const int skip_this_c = skip_this && !block_edge_left;
2206 // top edge of current unit is block/partition edge -> no skip
2207 const int block_edge_above =
2208 (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1;
2209 const int skip_this_r = skip_this && !block_edge_above;
2210
2211 TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
2212 ? av1_get_uv_tx_size(mbmi, plane)
2213 : mbmi->tx_size;
2214
2215 const int skip_border_4x4_c =
2216 ss_x && mi_col + idx_c >= cm->mi_cols - mi_size_wide[BLOCK_8X8];
2217 const int skip_border_4x4_r =
2218 ss_y && mi_row + idx_r >= cm->mi_rows - mi_size_high[BLOCK_8X8];
2219
2220 int tx_size_mask = 0;
2221 const int c_step = (c >> ss_x);
2222 const int r_step = (r >> ss_y);
2223 const int col_mask = 1 << c_step;
2224
2225 #if CONFIG_VAR_TX
2226 if (is_inter_block(mbmi) && !mbmi->skip) {
2227 const int tx_row_idx =
2228 (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
2229 const int tx_col_idx =
2230 (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
2231 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2232 const BLOCK_SIZE bsize =
2233 AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, plane));
2234 #else
2235 const BLOCK_SIZE bsize = get_plane_block_size(mbmi->sb_type, plane);
2236 #endif
2237 const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
2238 tx_size = (plane->plane_type == PLANE_TYPE_UV)
2239 ? uv_txsize_lookup[bsize][mb_tx_size][0][0]
2240 : mb_tx_size;
2241 }
2242 #endif
2243
2244 // Filter level can vary per MI
2245 #if CONFIG_EXT_DELTA_Q
2246 #if CONFIG_LOOPFILTER_LEVEL
2247 if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi)))
2248 continue;
2249 #else
2250 #if CONFIG_LPF_SB
2251 if (!(lfl_r[c_step] =
2252 get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi)))
2253 continue;
2254 #else
2255 if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, mbmi))) continue;
2256 #endif // CONFIG_LPF_SB
2257 #endif
2258 #else
2259 if (!(lfl_r[c_step] = get_filter_level(&cm->lf_info, mbmi))) continue;
2260 #endif
2261
2262 #if CONFIG_VAR_TX
2263 TX_SIZE tx_size_horz_edge, tx_size_vert_edge;
2264
2265 // filt_len_vert_edge is the length of deblocking filter for a vertical edge
2266 // The filter direction of a vertical edge is horizontal.
2267 // Thus, filt_len_vert_edge is determined as the minimum width of the two
2268 // transform block sizes on the left and right (current block) side of edge
2269 const int filt_len_vert_edge = AOMMIN(
2270 tx_size_wide[tx_size],
2271 tx_size_wide[cm->left_txfm_context[pl][((mi_row + idx_r) & MAX_MIB_MASK)
2272 << TX_UNIT_HIGH_LOG2]]);
2273
2274 // filt_len_horz_edge is the len of deblocking filter for a horizontal edge
2275 // The filter direction of a horizontal edge is vertical.
2276 // Thus, filt_len_horz_edge is determined as the minimum height of the two
2277 // transform block sizes on the top and bottom (current block) side of edge
2278 const int filt_len_horz_edge =
2279 AOMMIN(tx_size_high[tx_size],
2280 tx_size_high[cm->top_txfm_context[pl][(mi_col + idx_c)
2281 << TX_UNIT_WIDE_LOG2]]);
2282
2283 // transform width/height of current block
2284 const int tx_wide_cur = tx_size_wide[tx_size];
2285 const int tx_high_cur = tx_size_high[tx_size];
2286
2287 // tx_size_vert_edge is square transform size for a vertical deblocking edge
2288 // It determines the type of filter applied to the vertical edge
2289 // Similarly, tx_size_horz_edge is for a horizontal deblocking edge
2290 tx_size_vert_edge = get_sqr_tx_size(filt_len_vert_edge);
2291 tx_size_horz_edge = get_sqr_tx_size(filt_len_horz_edge);
2292
2293 memset(cm->top_txfm_context[pl] + ((mi_col + idx_c) << TX_UNIT_WIDE_LOG2),
2294 tx_size, mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2);
2295 memset(cm->left_txfm_context[pl] +
2296 (((mi_row + idx_r) & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2),
2297 tx_size, mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2);
2298 #else
2299 // The length (or equally the square tx size) of deblocking filter is only
2300 // determined by
2301 // a) current block's width for a vertical deblocking edge
2302 // b) current block's height for a horizontal deblocking edge
2303 TX_SIZE tx_size_vert_edge = txsize_horz_map[tx_size];
2304 TX_SIZE tx_size_horz_edge = txsize_vert_map[tx_size];
2305 (void)pl;
2306 #endif // CONFIG_VAR_TX
2307
2308 if (tx_size_vert_edge == TX_32X32)
2309 tx_size_mask = 3;
2310 else if (tx_size_vert_edge == TX_16X16)
2311 tx_size_mask = 1;
2312 else
2313 tx_size_mask = 0;
2314
2315 // Build masks based on the transform size of each block
2316 // handle vertical mask
2317 if (tx_size_vert_edge == TX_32X32) {
2318 if (!skip_this_c && (c_step & tx_size_mask) == 0) {
2319 if (!skip_border_4x4_c)
2320 col_masks.m16x16 |= col_mask;
2321 else
2322 col_masks.m8x8 |= col_mask;
2323 }
2324 } else if (tx_size_vert_edge == TX_16X16) {
2325 if (!skip_this_c && (c_step & tx_size_mask) == 0) {
2326 if (!skip_border_4x4_c)
2327 col_masks.m16x16 |= col_mask;
2328 else
2329 col_masks.m8x8 |= col_mask;
2330 }
2331 } else {
2332 // force 8x8 filtering on 32x32 boundaries
2333 if (!skip_this_c && (c_step & tx_size_mask) == 0) {
2334 if (tx_size_vert_edge == TX_8X8 || (c_step & 3) == 0)
2335 col_masks.m8x8 |= col_mask;
2336 else
2337 col_masks.m4x4 |= col_mask;
2338 }
2339
2340 #if CONFIG_VAR_TX
2341 if (!skip_this && tx_wide_cur < 8 && !skip_border_4x4_c &&
2342 (c_step & tx_size_mask) == 0)
2343 #else
2344 if (!skip_this && tx_size_vert_edge < TX_8X8 && !skip_border_4x4_c &&
2345 (c_step & tx_size_mask) == 0)
2346 #endif // CONFIG_VAR_TX
2347 mask_4x4_int_c |= col_mask;
2348 }
2349
2350 if (tx_size_horz_edge == TX_32X32)
2351 tx_size_mask = 3;
2352 else if (tx_size_horz_edge == TX_16X16)
2353 tx_size_mask = 1;
2354 else
2355 tx_size_mask = 0;
2356
2357 // set horizontal mask
2358 if (tx_size_horz_edge == TX_32X32) {
2359 if (!skip_this_r && (r_step & tx_size_mask) == 0) {
2360 if (!skip_border_4x4_r)
2361 row_masks.m16x16 |= col_mask;
2362 else
2363 row_masks.m8x8 |= col_mask;
2364 }
2365 } else if (tx_size_horz_edge == TX_16X16) {
2366 if (!skip_this_r && (r_step & tx_size_mask) == 0) {
2367 if (!skip_border_4x4_r)
2368 row_masks.m16x16 |= col_mask;
2369 else
2370 row_masks.m8x8 |= col_mask;
2371 }
2372 } else {
2373 // force 8x8 filtering on 32x32 boundaries
2374 if (!skip_this_r && (r_step & tx_size_mask) == 0) {
2375 if (tx_size_horz_edge == TX_8X8 || (r_step & 3) == 0)
2376 row_masks.m8x8 |= col_mask;
2377 else
2378 row_masks.m4x4 |= col_mask;
2379 }
2380
2381 #if CONFIG_VAR_TX
2382 if (!skip_this && tx_high_cur < 8 && !skip_border_4x4_r &&
2383 (r_step & tx_size_mask) == 0)
2384 #else
2385 if (!skip_this && tx_size_horz_edge < TX_8X8 && !skip_border_4x4_r &&
2386 (r_step & tx_size_mask) == 0)
2387 #endif // CONFIG_VAR_TX
2388 mask_4x4_int_r |= col_mask;
2389 }
2390 }
2391
2392 if (row_masks_ptr) *row_masks_ptr = row_masks;
2393 if (col_masks_ptr) *col_masks_ptr = col_masks;
2394 if (mask_4x4_int_c_ptr) *mask_4x4_int_c_ptr = mask_4x4_int_c;
2395 if (mask_4x4_int_r_ptr) *mask_4x4_int_r_ptr = mask_4x4_int_r;
2396 }
2397
av1_filter_block_plane_non420_ver(AV1_COMMON * const cm,struct macroblockd_plane * plane,MODE_INFO ** mib,int mi_row,int mi_col,int pl)2398 void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm,
2399 struct macroblockd_plane *plane,
2400 MODE_INFO **mib, int mi_row, int mi_col,
2401 int pl) {
2402 const int ss_y = plane->subsampling_y;
2403 const int row_step = mi_size_high[BLOCK_8X8] << ss_y;
2404 #if CONFIG_LPF_DIRECT
2405 const int ss_x = plane->subsampling_x;
2406 const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
2407 #endif
2408 struct buf_2d *const dst = &plane->dst;
2409 uint8_t *const dst0 = dst->buf;
2410 uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
2411
2412 int idx_r;
2413 for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows;
2414 idx_r += row_step) {
2415 unsigned int mask_4x4_int;
2416 FilterMasks col_masks;
2417 const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2418 get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r,
2419 &lfl[r][0], NULL, &mask_4x4_int, NULL,
2420 &col_masks);
2421
2422 // Disable filtering on the leftmost column or tile boundary
2423 unsigned int border_mask = ~(mi_col == 0 ? 1 : 0);
2424 #if CONFIG_LOOPFILTERING_ACROSS_TILES
2425 MODE_INFO *const mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
2426 if (av1_disable_loopfilter_on_tile_boundary(cm) &&
2427 ((mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) != 0)) {
2428 border_mask = 0xfffffffe;
2429 }
2430 #endif // CONFIG_LOOPFILTERING_ACROSS_TILES
2431
2432 #if CONFIG_HIGHBITDEPTH
2433 if (cm->use_highbitdepth)
2434 highbd_filter_selectively_vert(
2435 CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
2436 col_masks.m16x16 & border_mask, col_masks.m8x8 & border_mask,
2437 col_masks.m4x4 & border_mask, mask_4x4_int, &cm->lf_info, &lfl[r][0],
2438 (int)cm->bit_depth);
2439 else
2440 #endif // CONFIG_HIGHBITDEPTH
2441 filter_selectively_vert(
2442 dst->buf, dst->stride, col_masks.m16x16 & border_mask,
2443 col_masks.m8x8 & border_mask, col_masks.m4x4 & border_mask,
2444 mask_4x4_int, &cm->lf_info, &lfl[r][0]
2445 #if CONFIG_LPF_DIRECT
2446 ,
2447 dst->buf0, mi_row, mi_col, idx_r, col_step, cm->width, cm->height,
2448 ss_x, ss_y
2449 #endif // CONFIG_LPF_DIRECT
2450 );
2451 dst->buf += 8 * dst->stride;
2452 }
2453
2454 // Now do horizontal pass
2455 dst->buf = dst0;
2456 }
2457
av1_filter_block_plane_non420_hor(AV1_COMMON * const cm,struct macroblockd_plane * plane,MODE_INFO ** mib,int mi_row,int mi_col,int pl)2458 void av1_filter_block_plane_non420_hor(AV1_COMMON *const cm,
2459 struct macroblockd_plane *plane,
2460 MODE_INFO **mib, int mi_row, int mi_col,
2461 int pl) {
2462 const int ss_y = plane->subsampling_y;
2463 const int row_step = mi_size_high[BLOCK_8X8] << ss_y;
2464 #if CONFIG_LPF_DIRECT
2465 const int ss_x = plane->subsampling_x;
2466 const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
2467 #endif
2468 struct buf_2d *const dst = &plane->dst;
2469 uint8_t *const dst0 = dst->buf;
2470 uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
2471
2472 int idx_r;
2473 for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows;
2474 idx_r += row_step) {
2475 unsigned int mask_4x4_int;
2476 FilterMasks row_masks;
2477 const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2478 get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r,
2479 &lfl[r][0], &mask_4x4_int, NULL,
2480 &row_masks, NULL);
2481
2482 #if CONFIG_LOOPFILTERING_ACROSS_TILES
2483 // Disable filtering on the abovemost row or tile boundary
2484 const MODE_INFO *mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
2485 if ((av1_disable_loopfilter_on_tile_boundary(cm) &&
2486 (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY)) ||
2487 (mi_row + idx_r == 0))
2488 memset(&row_masks, 0, sizeof(row_masks));
2489 #else
2490 if (mi_row + idx_r == 0) memset(&row_masks, 0, sizeof(row_masks));
2491 #endif // CONFIG_LOOPFILTERING_ACROSS_TILES
2492
2493 #if CONFIG_HIGHBITDEPTH
2494 if (cm->use_highbitdepth)
2495 highbd_filter_selectively_horiz(
2496 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, row_masks.m16x16,
2497 row_masks.m8x8, row_masks.m4x4, mask_4x4_int, &cm->lf_info,
2498 &lfl[r][0], (int)cm->bit_depth);
2499 else
2500 #endif // CONFIG_HIGHBITDEPTH
2501 filter_selectively_horiz(dst->buf, dst->stride, row_masks.m16x16,
2502 row_masks.m8x8, row_masks.m4x4, mask_4x4_int,
2503 &cm->lf_info, &lfl[r][0]
2504 #if CONFIG_LPF_DIRECT
2505 ,
2506 dst->buf0, mi_row, mi_col, idx_r, col_step,
2507 cm->width, cm->height, ss_x, ss_y
2508 #endif // CONFIG_LPF_DIRECT
2509 );
2510 dst->buf += 8 * dst->stride;
2511 }
2512 dst->buf = dst0;
2513 }
2514
av1_filter_block_plane_ss00_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2515 void av1_filter_block_plane_ss00_ver(AV1_COMMON *const cm,
2516 struct macroblockd_plane *const plane,
2517 int mi_row, LOOP_FILTER_MASK *lfm) {
2518 struct buf_2d *const dst = &plane->dst;
2519 uint8_t *const dst0 = dst->buf;
2520 int r;
2521 uint64_t mask_16x16 = lfm->left_y[TX_16X16];
2522 uint64_t mask_8x8 = lfm->left_y[TX_8X8];
2523 uint64_t mask_4x4 = lfm->left_y[TX_4X4];
2524 uint64_t mask_4x4_int = lfm->int_4x4_y;
2525
2526 assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
2527
2528 // Vertical pass: do 2 rows at one time
2529 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
2530 unsigned int mask_16x16_l = mask_16x16 & 0xffff;
2531 unsigned int mask_8x8_l = mask_8x8 & 0xffff;
2532 unsigned int mask_4x4_l = mask_4x4 & 0xffff;
2533 unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
2534
2535 // Disable filtering on the leftmost column.
2536 #if CONFIG_HIGHBITDEPTH
2537 if (cm->use_highbitdepth)
2538 highbd_filter_selectively_vert_row2(
2539 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
2540 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
2541 &lfm->lfl_y[r][0], (int)cm->bit_depth);
2542 else
2543 #endif // CONFIG_HIGHBITDEPTH
2544 filter_selectively_vert_row2(
2545 plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
2546 mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
2547
2548 dst->buf += 2 * MI_SIZE * dst->stride;
2549 mask_16x16 >>= 2 * MI_SIZE;
2550 mask_8x8 >>= 2 * MI_SIZE;
2551 mask_4x4 >>= 2 * MI_SIZE;
2552 mask_4x4_int >>= 2 * MI_SIZE;
2553 }
2554
2555 // Horizontal pass
2556 dst->buf = dst0;
2557 }
2558
av1_filter_block_plane_ss00_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2559 void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm,
2560 struct macroblockd_plane *const plane,
2561 int mi_row, LOOP_FILTER_MASK *lfm) {
2562 struct buf_2d *const dst = &plane->dst;
2563 uint8_t *const dst0 = dst->buf;
2564 int r;
2565 uint64_t mask_16x16 = lfm->above_y[TX_16X16];
2566 uint64_t mask_8x8 = lfm->above_y[TX_8X8];
2567 uint64_t mask_4x4 = lfm->above_y[TX_4X4];
2568 uint64_t mask_4x4_int = lfm->int_4x4_y;
2569
2570 assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
2571
2572 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
2573 unsigned int mask_16x16_r;
2574 unsigned int mask_8x8_r;
2575 unsigned int mask_4x4_r;
2576
2577 if (mi_row + r == 0) {
2578 mask_16x16_r = 0;
2579 mask_8x8_r = 0;
2580 mask_4x4_r = 0;
2581 } else {
2582 mask_16x16_r = mask_16x16 & 0xff;
2583 mask_8x8_r = mask_8x8 & 0xff;
2584 mask_4x4_r = mask_4x4 & 0xff;
2585 }
2586
2587 #if CONFIG_HIGHBITDEPTH
2588 if (cm->use_highbitdepth)
2589 highbd_filter_selectively_horiz(
2590 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
2591 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0],
2592 (int)cm->bit_depth);
2593 else
2594 #endif // CONFIG_HIGHBITDEPTH
2595 #if !CONFIG_LPF_DIRECT
2596 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
2597 mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
2598 &lfm->lfl_y[r][0]);
2599 #endif // CONFIG_LPF_DIRECT
2600
2601 dst->buf += MI_SIZE * dst->stride;
2602 mask_16x16 >>= MI_SIZE;
2603 mask_8x8 >>= MI_SIZE;
2604 mask_4x4 >>= MI_SIZE;
2605 mask_4x4_int >>= MI_SIZE;
2606 }
2607 // restore the buf pointer in case there is additional filter pass.
2608 dst->buf = dst0;
2609 }
2610
av1_filter_block_plane_ss11_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2611 void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
2612 struct macroblockd_plane *const plane,
2613 int mi_row, LOOP_FILTER_MASK *lfm) {
2614 struct buf_2d *const dst = &plane->dst;
2615 uint8_t *const dst0 = dst->buf;
2616 int r, c;
2617
2618 uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
2619 uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
2620 uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
2621 uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
2622
2623 assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
2624 assert(plane->plane_type == PLANE_TYPE_UV);
2625 memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
2626
2627 // Vertical pass: do 2 rows at one time
2628 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
2629 for (c = 0; c < (cm->mib_size >> 1); c++) {
2630 lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
2631 lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
2632 }
2633
2634 {
2635 unsigned int mask_16x16_l = mask_16x16 & 0xff;
2636 unsigned int mask_8x8_l = mask_8x8 & 0xff;
2637 unsigned int mask_4x4_l = mask_4x4 & 0xff;
2638 unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
2639
2640 // Disable filtering on the leftmost column.
2641 #if CONFIG_HIGHBITDEPTH
2642 if (cm->use_highbitdepth)
2643 highbd_filter_selectively_vert_row2(
2644 plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
2645 mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
2646 &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth);
2647 else
2648 #endif // CONFIG_HIGHBITDEPTH
2649 filter_selectively_vert_row2(plane->subsampling_x, dst->buf,
2650 dst->stride, mask_16x16_l, mask_8x8_l,
2651 mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
2652 &lfm->lfl_uv[r >> 1][0]);
2653
2654 dst->buf += 2 * MI_SIZE * dst->stride;
2655 mask_16x16 >>= MI_SIZE;
2656 mask_8x8 >>= MI_SIZE;
2657 mask_4x4 >>= MI_SIZE;
2658 mask_4x4_int >>= MI_SIZE;
2659 }
2660 }
2661
2662 // Horizontal pass
2663 dst->buf = dst0;
2664 }
2665
av1_filter_block_plane_ss11_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2666 void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
2667 struct macroblockd_plane *const plane,
2668 int mi_row, LOOP_FILTER_MASK *lfm) {
2669 struct buf_2d *const dst = &plane->dst;
2670 uint8_t *const dst0 = dst->buf;
2671 int r, c;
2672 uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
2673 uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
2674 uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
2675 uint64_t mask_4x4_int = lfm->above_int_4x4_uv;
2676
2677 assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
2678 memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
2679
2680 // re-porpulate the filter level for uv, same as the code for vertical
2681 // filter in av1_filter_block_plane_ss11_ver
2682 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
2683 for (c = 0; c < (cm->mib_size >> 1); c++) {
2684 lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
2685 lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
2686 }
2687 }
2688
2689 for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
2690 const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
2691 const unsigned int mask_4x4_int_r =
2692 skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
2693 unsigned int mask_16x16_r;
2694 unsigned int mask_8x8_r;
2695 unsigned int mask_4x4_r;
2696
2697 if (mi_row + r == 0) {
2698 mask_16x16_r = 0;
2699 mask_8x8_r = 0;
2700 mask_4x4_r = 0;
2701 } else {
2702 mask_16x16_r = mask_16x16 & 0xf;
2703 mask_8x8_r = mask_8x8 & 0xf;
2704 mask_4x4_r = mask_4x4 & 0xf;
2705 }
2706
2707 #if CONFIG_HIGHBITDEPTH
2708 if (cm->use_highbitdepth)
2709 highbd_filter_selectively_horiz(
2710 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
2711 mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0],
2712 (int)cm->bit_depth);
2713 else
2714 #endif // CONFIG_HIGHBITDEPTH
2715 #if !CONFIG_LPF_DIRECT
2716 filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
2717 mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
2718 &lfm->lfl_uv[r >> 1][0]);
2719 #endif // CONFIG_LPF_DIRECT
2720
2721 dst->buf += MI_SIZE * dst->stride;
2722 mask_16x16 >>= MI_SIZE / 2;
2723 mask_8x8 >>= MI_SIZE / 2;
2724 mask_4x4 >>= MI_SIZE / 2;
2725 mask_4x4_int >>= MI_SIZE / 2;
2726 }
2727 // restore the buf pointer in case there is additional filter pass.
2728 dst->buf = dst0;
2729 }
2730
2731 #if CONFIG_PARALLEL_DEBLOCKING
2732 typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
2733 static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES_ALL] = {
2734 // mask for vertical edges filtering
2735 {
2736 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2737 2 - 1, // BLOCK_2X2
2738 2 - 1, // BLOCK_2X4
2739 4 - 1, // BLOCK_4X2
2740 #endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2741 4 - 1, // BLOCK_4X4
2742 4 - 1, // BLOCK_4X8
2743 8 - 1, // BLOCK_8X4
2744 8 - 1, // BLOCK_8X8
2745 8 - 1, // BLOCK_8X16
2746 16 - 1, // BLOCK_16X8
2747 16 - 1, // BLOCK_16X16
2748 16 - 1, // BLOCK_16X32
2749 32 - 1, // BLOCK_32X16
2750 32 - 1, // BLOCK_32X32
2751 32 - 1, // BLOCK_32X64
2752 64 - 1, // BLOCK_64X32
2753 64 - 1, // BLOCK_64X64
2754 #if CONFIG_EXT_PARTITION
2755 64 - 1, // BLOCK_64X128
2756 128 - 1, // BLOCK_128X64
2757 128 - 1, // BLOCK_128X128
2758 #endif // CONFIG_EXT_PARTITION
2759 4 - 1, // BLOCK_4X16,
2760 16 - 1, // BLOCK_16X4,
2761 8 - 1, // BLOCK_8X32,
2762 32 - 1, // BLOCK_32X8,
2763 16 - 1, // BLOCK_16X64,
2764 64 - 1, // BLOCK_64X16
2765 #if CONFIG_EXT_PARTITION
2766 32 - 1, // BLOCK_32X128
2767 128 - 1, // BLOCK_128X32
2768 #endif // CONFIG_EXT_PARTITION
2769 },
2770 // mask for horizontal edges filtering
2771 {
2772 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2773 2 - 1, // BLOCK_2X2
2774 4 - 1, // BLOCK_2X4
2775 2 - 1, // BLOCK_4X2
2776 #endif // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2777 4 - 1, // BLOCK_4X4
2778 8 - 1, // BLOCK_4X8
2779 4 - 1, // BLOCK_8X4
2780 8 - 1, // BLOCK_8X8
2781 16 - 1, // BLOCK_8X16
2782 8 - 1, // BLOCK_16X8
2783 16 - 1, // BLOCK_16X16
2784 32 - 1, // BLOCK_16X32
2785 16 - 1, // BLOCK_32X16
2786 32 - 1, // BLOCK_32X32
2787 64 - 1, // BLOCK_32X64
2788 32 - 1, // BLOCK_64X32
2789 64 - 1, // BLOCK_64X64
2790 #if CONFIG_EXT_PARTITION
2791 128 - 1, // BLOCK_64X128
2792 64 - 1, // BLOCK_128X64
2793 128 - 1, // BLOCK_128X128
2794 #endif // CONFIG_EXT_PARTITION
2795 16 - 1, // BLOCK_4X16,
2796 4 - 1, // BLOCK_16X4,
2797 32 - 1, // BLOCK_8X32,
2798 8 - 1, // BLOCK_32X8,
2799 64 - 1, // BLOCK_16X64,
2800 16 - 1, // BLOCK_64X16
2801 #if CONFIG_EXT_PARTITION
2802 128 - 1, // BLOCK_32X128
2803 32 - 1, // BLOCK_128X32
2804 #endif // CONFIG_EXT_PARTITION
2805 },
2806 };
2807
2808 static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
2809 {
2810 #if CONFIG_CHROMA_2X2
2811 2 - 1, // TX_2X2
2812 #endif
2813 4 - 1, // TX_4X4
2814 8 - 1, // TX_8X8
2815 16 - 1, // TX_16X16
2816 32 - 1, // TX_32X32
2817 #if CONFIG_TX64X64
2818 64 - 1, // TX_64X64
2819 #endif // CONFIG_TX64X64
2820 4 - 1, // TX_4X8
2821 8 - 1, // TX_8X4
2822 8 - 1, // TX_8X16
2823 16 - 1, // TX_16X8
2824 16 - 1, // TX_16X32
2825 32 - 1, // TX_32X16
2826 #if CONFIG_TX64X64
2827 32 - 1, // TX_32X64
2828 64 - 1, // TX_64X32
2829 #endif // CONFIG_TX64X64
2830 4 - 1, // TX_4X16
2831 16 - 1, // TX_16X4
2832 8 - 1, // TX_8X32
2833 32 - 1 // TX_32X8
2834 },
2835 {
2836 #if CONFIG_CHROMA_2X2
2837 2 - 1, // TX_2X2
2838 #endif
2839 4 - 1, // TX_4X4
2840 8 - 1, // TX_8X8
2841 16 - 1, // TX_16X16
2842 32 - 1, // TX_32X32
2843 #if CONFIG_TX64X64
2844 64 - 1, // TX_64X64
2845 #endif // CONFIG_TX64X64
2846 8 - 1, // TX_4X8
2847 4 - 1, // TX_8X4
2848 16 - 1, // TX_8X16
2849 8 - 1, // TX_16X8
2850 32 - 1, // TX_16X32
2851 16 - 1, // TX_32X16
2852 #if CONFIG_TX64X64
2853 64 - 1, // TX_32X64
2854 32 - 1, // TX_64X32
2855 #endif // CONFIG_TX64X64
2856 16 - 1, // TX_4X16
2857 4 - 1, // TX_16X4
2858 32 - 1, // TX_8X32
2859 8 - 1 // TX_32X8
2860 }
2861 };
2862
av1_get_transform_size(const MODE_INFO * const mi,const EDGE_DIR edge_dir,const int mi_row,const int mi_col,const int plane,const struct macroblockd_plane * plane_ptr,const uint32_t scale_horz,const uint32_t scale_vert)2863 static TX_SIZE av1_get_transform_size(const MODE_INFO *const mi,
2864 const EDGE_DIR edge_dir, const int mi_row,
2865 const int mi_col, const int plane,
2866 const struct macroblockd_plane *plane_ptr,
2867 const uint32_t scale_horz,
2868 const uint32_t scale_vert) {
2869 const MB_MODE_INFO *mbmi = &mi->mbmi;
2870 TX_SIZE tx_size = (plane == AOM_PLANE_Y)
2871 ? mbmi->tx_size
2872 : av1_get_uv_tx_size(mbmi, plane_ptr);
2873 assert(tx_size < TX_SIZES_ALL);
2874
2875 #if CONFIG_VAR_TX
2876 // mi_row and mi_col is the absolute position of the MI block.
2877 // idx_c and idx_r is the relative offset of the MI within the super block
2878 // c and r is the relative offset of the 8x8 block within the supert block
2879 // blk_row and block_col is the relative offset of the current 8x8 block
2880 // within the current partition.
2881 const int idx_c = mi_col & MAX_MIB_MASK;
2882 const int idx_r = mi_row & MAX_MIB_MASK;
2883 const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
2884 const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2885 const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
2886 const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
2887 const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
2888
2889 if (is_inter_block(mbmi) && !mbmi->skip) {
2890 const int tx_row_idx =
2891 (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
2892 const int tx_col_idx =
2893 (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
2894
2895 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2896 const BLOCK_SIZE bsize =
2897 AOMMAX(BLOCK_4X4, ss_size_lookup[sb_type][scale_horz][scale_vert]);
2898 #else
2899 const BLOCK_SIZE bsize = ss_size_lookup[sb_type][scale_horz][scale_vert];
2900 #endif
2901 const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
2902
2903 assert(mb_tx_size < TX_SIZES_ALL);
2904
2905 tx_size = (plane == AOM_PLANE_Y)
2906 ? mb_tx_size
2907 : uv_txsize_lookup[bsize][mb_tx_size][0][0];
2908 assert(tx_size < TX_SIZES_ALL);
2909 }
2910 #else
2911 (void)mi_row;
2912 (void)mi_col;
2913 (void)scale_horz;
2914 (void)scale_vert;
2915 #endif // CONFIG_VAR_TX
2916
2917 // since in case of chrominance or non-square transorm need to convert
2918 // transform size into transform size in particular direction.
2919 // for vertical edge, filter direction is horizontal, for horizontal
2920 // edge, filter direction is vertical.
2921 tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
2922 : txsize_vert_map[tx_size];
2923 return tx_size;
2924 }
2925
2926 typedef struct AV1_DEBLOCKING_PARAMETERS {
2927 // length of the filter applied to the outer edge
2928 uint32_t filter_length;
2929 // length of the filter applied to the inner edge
2930 uint32_t filter_length_internal;
2931 // deblocking limits
2932 const uint8_t *lim;
2933 const uint8_t *mblim;
2934 const uint8_t *hev_thr;
2935 } AV1_DEBLOCKING_PARAMETERS;
2936
set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS * const params,const ptrdiff_t mode_step,const AV1_COMMON * const cm,const EDGE_DIR edge_dir,const uint32_t x,const uint32_t y,const int plane,const struct macroblockd_plane * const plane_ptr)2937 static void set_lpf_parameters(
2938 AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
2939 const AV1_COMMON *const cm, const EDGE_DIR edge_dir, const uint32_t x,
2940 const uint32_t y, const int plane,
2941 const struct macroblockd_plane *const plane_ptr) {
2942 // reset to initial values
2943 params->filter_length = 0;
2944 params->filter_length_internal = 0;
2945
2946 // no deblocking is required
2947 const uint32_t width = plane_ptr->dst.width;
2948 const uint32_t height = plane_ptr->dst.height;
2949 if ((width <= x) || (height <= y)) {
2950 return;
2951 }
2952
2953 const uint32_t scale_horz = plane_ptr->subsampling_x;
2954 const uint32_t scale_vert = plane_ptr->subsampling_y;
2955 const int mi_row = (y << scale_vert) >> MI_SIZE_LOG2;
2956 const int mi_col = (x << scale_horz) >> MI_SIZE_LOG2;
2957 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
2958 const MB_MODE_INFO *mbmi = &mi[0]->mbmi;
2959
2960 {
2961 const TX_SIZE ts =
2962 av1_get_transform_size(mi[0], edge_dir, mi_row, mi_col, plane,
2963 plane_ptr, scale_horz, scale_vert);
2964
2965 #if CONFIG_EXT_DELTA_Q
2966 #if CONFIG_LOOPFILTER_LEVEL
2967 const uint32_t curr_level =
2968 get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
2969 #else
2970 #if CONFIG_LPF_SB
2971 const uint32_t curr_level =
2972 get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi);
2973 #else
2974 const uint32_t curr_level = get_filter_level(cm, &cm->lf_info, mbmi);
2975 #endif // CONFIG_LPF_SB
2976 #endif
2977 #else
2978 const uint32_t curr_level = get_filter_level(&cm->lf_info, mbmi);
2979 #endif // CONFIG_EXT_DELTA_Q
2980
2981 const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
2982 const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
2983 uint32_t level = curr_level;
2984 // prepare outer edge parameters. deblock the edge if it's an edge of a TU
2985 if (coord) {
2986 #if CONFIG_LOOPFILTERING_ACROSS_TILES
2987 MODE_INFO *const mi_bound = cm->mi + mi_row * cm->mi_stride + mi_col;
2988 if (!av1_disable_loopfilter_on_tile_boundary(cm) ||
2989 ((VERT_EDGE == edge_dir) &&
2990 (0 == (mi_bound->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) ||
2991 ((HORZ_EDGE == edge_dir) &&
2992 (0 == (mi_bound->mbmi.boundary_info & TILE_ABOVE_BOUNDARY))))
2993 #endif // CONFIG_LOOPFILTERING_ACROSS_TILES
2994 {
2995 const int32_t tu_edge =
2996 (coord & av1_transform_masks[edge_dir][ts]) ? (0) : (1);
2997 if (tu_edge) {
2998 const MODE_INFO *const mi_prev = *(mi - mode_step);
2999 const int pv_row =
3000 (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
3001 const int pv_col =
3002 (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
3003 const TX_SIZE pv_ts =
3004 av1_get_transform_size(mi_prev, edge_dir, pv_row, pv_col, plane,
3005 plane_ptr, scale_horz, scale_vert);
3006
3007 #if CONFIG_EXT_DELTA_Q
3008 #if CONFIG_LOOPFILTER_LEVEL
3009 const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, edge_dir,
3010 plane, &mi_prev->mbmi);
3011 #else
3012 #if CONFIG_LPF_SB
3013 const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, pv_row,
3014 pv_col, &mi_prev->mbmi);
3015 #else
3016 const uint32_t pv_lvl =
3017 get_filter_level(cm, &cm->lf_info, &mi_prev->mbmi);
3018 #endif // CONFIG_LPF_SB
3019 #endif
3020 #else
3021 const uint32_t pv_lvl =
3022 get_filter_level(&cm->lf_info, &mi_prev->mbmi);
3023 #endif // CONFIG_EXT_DELTA_Q
3024
3025 const int pv_skip =
3026 mi_prev->mbmi.skip && is_inter_block(&mi_prev->mbmi);
3027 const int32_t pu_edge =
3028 (coord &
3029 av1_prediction_masks[edge_dir]
3030 [ss_size_lookup[mbmi->sb_type][scale_horz]
3031 [scale_vert]])
3032 ? (0)
3033 : (1);
3034 // if the current and the previous blocks are skipped,
3035 // deblock the edge if the edge belongs to a PU's edge only.
3036 if ((curr_level || pv_lvl) &&
3037 (!pv_skip || !curr_skipped || pu_edge)) {
3038 const TX_SIZE min_ts = AOMMIN(ts, pv_ts);
3039 if (TX_4X4 >= min_ts) {
3040 params->filter_length = 4;
3041 } else if (TX_8X8 == min_ts) {
3042 params->filter_length = 8;
3043 } else {
3044 params->filter_length = 16;
3045 #if PARALLEL_DEBLOCKING_15TAPLUMAONLY
3046 // No wide filtering for chroma plane
3047 if (plane != 0) {
3048 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
3049 params->filter_length = 6;
3050 #else
3051 params->filter_length = 8;
3052 #endif
3053 }
3054 #endif
3055 }
3056
3057 #if PARALLEL_DEBLOCKING_DISABLE_15TAP
3058 params->filter_length = (TX_4X4 >= AOMMIN(ts, pv_ts)) ? (4) : (8);
3059 #endif // PARALLEL_DEBLOCKING_DISABLE_15TAP
3060
3061 // update the level if the current block is skipped,
3062 // but the previous one is not
3063 level = (curr_level) ? (curr_level) : (pv_lvl);
3064 }
3065 }
3066 }
3067
3068 #if !CONFIG_CB4X4
3069 // prepare internal edge parameters
3070 if (curr_level && !curr_skipped) {
3071 params->filter_length_internal = (TX_4X4 >= ts) ? (4) : (0);
3072 }
3073 #endif
3074
3075 // prepare common parameters
3076 if (params->filter_length || params->filter_length_internal) {
3077 const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
3078 params->lim = limits->lim;
3079 params->mblim = limits->mblim;
3080 params->hev_thr = limits->hev_thr;
3081 }
3082 }
3083 }
3084 }
3085
av1_filter_block_plane_vert(const AV1_COMMON * const cm,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)3086 static void av1_filter_block_plane_vert(
3087 const AV1_COMMON *const cm, const int plane,
3088 const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
3089 const uint32_t mi_col) {
3090 const int col_step = MI_SIZE >> MI_SIZE_LOG2;
3091 const int row_step = MI_SIZE >> MI_SIZE_LOG2;
3092 const uint32_t scale_horz = plane_ptr->subsampling_x;
3093 const uint32_t scale_vert = plane_ptr->subsampling_y;
3094 uint8_t *const dst_ptr = plane_ptr->dst.buf;
3095 const int dst_stride = plane_ptr->dst.stride;
3096 #if CONFIG_LPF_SB
3097 int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3098 y_range = AOMMIN(y_range, cm->mi_rows);
3099 y_range >>= scale_vert;
3100
3101 int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3102 x_range = AOMMIN(x_range, cm->mi_cols);
3103 x_range >>= scale_horz;
3104 #else
3105 const int y_range = (MAX_MIB_SIZE >> scale_vert);
3106 const int x_range = (MAX_MIB_SIZE >> scale_horz);
3107 #endif // CONFIG_LPF_SB
3108 for (int y = 0; y < y_range; y += row_step) {
3109 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
3110 for (int x = 0; x < x_range; x += col_step) {
3111 // inner loop always filter vertical edges in a MI block. If MI size
3112 // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
3113 // If 4x4 trasnform is used, it will then filter the internal edge
3114 // aligned with a 4x4 block
3115 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
3116 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
3117 AV1_DEBLOCKING_PARAMETERS params;
3118 memset(¶ms, 0, sizeof(params));
3119
3120 set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, VERT_EDGE,
3121 curr_x, curr_y, plane, plane_ptr);
3122
3123 #if CONFIG_LPF_DIRECT
3124 uint8_t *const src = plane_ptr->dst.buf0;
3125 const int width = cm->width >> scale_horz;
3126 const int height = cm->height >> scale_vert;
3127 const int pivot = 8;
3128 const int line_length = 16;
3129 uint8_t block[128];
3130 int orig_pos[128];
3131 const int vert_or_horz = 0; // 0: vertical
3132 const int unit = 1;
3133 int i;
3134 for (i = 0; i < 128; ++i) {
3135 block[i] = 0;
3136 orig_pos[i] = -1;
3137 }
3138
3139 if (params.filter_length) {
3140 const int filt_len = params.filter_length == 16 ? 8 : 4;
3141 const int direct =
3142 pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
3143 dst_stride, unit, vert_or_horz);
3144
3145 pick_filter_block_vert(src, block, orig_pos, filt_len, curr_y, curr_x,
3146 width, height, dst_stride, pivot, line_length,
3147 unit, direct);
3148 uint8_t *const filt_start = block + pivot;
3149 switch (params.filter_length) {
3150 // apply 4-tap filtering
3151 case 4:
3152 #if CONFIG_HIGHBITDEPTH
3153 if (cm->use_highbitdepth)
3154 aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
3155 line_length, params.mblim, params.lim,
3156 params.hev_thr, cm->bit_depth);
3157 else
3158 #endif // CONFIG_HIGHBITDEPTH
3159 aom_lpf_vertical_4(filt_start, line_length, params.mblim,
3160 params.lim, params.hev_thr);
3161 break;
3162 // apply 8-tap filtering
3163 case 8:
3164 #if CONFIG_HIGHBITDEPTH
3165 if (cm->use_highbitdepth)
3166 aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(filt_start),
3167 line_length, params.mblim, params.lim,
3168 params.hev_thr, cm->bit_depth);
3169 else
3170 #endif // CONFIG_HIGHBITDEPTH
3171 aom_lpf_vertical_8(filt_start, line_length, params.mblim,
3172 params.lim, params.hev_thr);
3173 break;
3174 // apply 16-tap filtering
3175 case 16:
3176 #if CONFIG_HIGHBITDEPTH
3177 if (cm->use_highbitdepth)
3178 aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(filt_start),
3179 line_length, params.mblim, params.lim,
3180 params.hev_thr, cm->bit_depth);
3181 else
3182 #endif // CONFIG_HIGHBITDEPTH
3183 aom_lpf_vertical_16(filt_start, line_length, params.mblim,
3184 params.lim, params.hev_thr);
3185 break;
3186 // no filtering
3187 default: break;
3188 }
3189
3190 for (i = 0; i < 128; ++i) {
3191 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3192 }
3193 }
3194
3195 if (params.filter_length_internal) {
3196 for (i = 0; i < 128; ++i) {
3197 block[i] = 0;
3198 orig_pos[i] = -1;
3199 }
3200
3201 const int direct =
3202 pick_min_grad_direct(src, 4, curr_y, curr_x + 4, width, height,
3203 dst_stride, unit, vert_or_horz);
3204
3205 pick_filter_block_vert(src, block, orig_pos, 4, curr_y, curr_x + 4,
3206 width, height, dst_stride, pivot, line_length,
3207 unit, direct);
3208
3209 uint8_t *const filt_start = block + pivot;
3210 #if CONFIG_HIGHBITDEPTH
3211 if (cm->use_highbitdepth)
3212 aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
3213 line_length, params.mblim, params.lim,
3214 params.hev_thr, cm->bit_depth);
3215 else
3216 #endif // CONFIG_HIGHBITDEPTH
3217 aom_lpf_vertical_4(filt_start, line_length, params.mblim, params.lim,
3218 params.hev_thr);
3219
3220 for (i = 0; i < 128; ++i) {
3221 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3222 }
3223 }
3224 #else // !CONFIG_LPF_DIRECT
3225 switch (params.filter_length) {
3226 // apply 4-tap filtering
3227 case 4:
3228 #if CONFIG_HIGHBITDEPTH
3229 if (cm->use_highbitdepth)
3230 aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
3231 params.mblim, params.lim, params.hev_thr,
3232 cm->bit_depth);
3233 else
3234 #endif // CONFIG_HIGHBITDEPTH
3235 aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
3236 params.hev_thr);
3237 break;
3238 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
3239 case 6: // apply 6-tap filter for chroma plane only
3240 assert(plane != 0);
3241 #if CONFIG_HIGHBITDEPTH
3242 if (cm->use_highbitdepth)
3243 aom_highbd_lpf_vertical_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
3244 params.mblim, params.lim,
3245 params.hev_thr, cm->bit_depth);
3246 else
3247 #endif // CONFIG_HIGHBITDEPTH
3248 aom_lpf_vertical_6_c(p, dst_stride, params.mblim, params.lim,
3249 params.hev_thr);
3250 break;
3251 #endif
3252 // apply 8-tap filtering
3253 case 8:
3254 #if CONFIG_HIGHBITDEPTH
3255 if (cm->use_highbitdepth)
3256 aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
3257 params.mblim, params.lim, params.hev_thr,
3258 cm->bit_depth);
3259 else
3260 #endif // CONFIG_HIGHBITDEPTH
3261 aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
3262 params.hev_thr);
3263 break;
3264 // apply 16-tap filtering
3265 case 16:
3266 #if CONFIG_HIGHBITDEPTH
3267 if (cm->use_highbitdepth)
3268 #if CONFIG_DEBLOCK_13TAP
3269 // TODO(olah): Remove _c once SIMD for 13-tap is available
3270 aom_highbd_lpf_vertical_16_c(CONVERT_TO_SHORTPTR(p), dst_stride,
3271 params.mblim, params.lim,
3272 params.hev_thr, cm->bit_depth);
3273 #else
3274 aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(p), dst_stride,
3275 params.mblim, params.lim, params.hev_thr,
3276 cm->bit_depth);
3277 #endif
3278 else
3279 #endif // CONFIG_HIGHBITDEPTH
3280 #if CONFIG_DEBLOCK_13TAP
3281 aom_lpf_vertical_16_c(p, dst_stride, params.mblim, params.lim,
3282 params.hev_thr);
3283 #else
3284 aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
3285 params.hev_thr);
3286 #endif
3287 break;
3288 // no filtering
3289 default: break;
3290 }
3291 // process the internal edge
3292 if (params.filter_length_internal) {
3293 #if CONFIG_HIGHBITDEPTH
3294 if (cm->use_highbitdepth)
3295 aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p + 4), dst_stride,
3296 params.mblim, params.lim, params.hev_thr,
3297 cm->bit_depth);
3298 else
3299 #endif // CONFIG_HIGHBITDEPTH
3300 aom_lpf_vertical_4(p + 4, dst_stride, params.mblim, params.lim,
3301 params.hev_thr);
3302 }
3303 #endif // CONFIG_LPF_DIRECT
3304 // advance the destination pointer
3305 p += MI_SIZE;
3306 }
3307 }
3308 }
3309
av1_filter_block_plane_horz(const AV1_COMMON * const cm,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)3310 static void av1_filter_block_plane_horz(
3311 const AV1_COMMON *const cm, const int plane,
3312 const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
3313 const uint32_t mi_col) {
3314 const int col_step = MI_SIZE >> MI_SIZE_LOG2;
3315 const int row_step = MI_SIZE >> MI_SIZE_LOG2;
3316 const uint32_t scale_horz = plane_ptr->subsampling_x;
3317 const uint32_t scale_vert = plane_ptr->subsampling_y;
3318 uint8_t *const dst_ptr = plane_ptr->dst.buf;
3319 const int dst_stride = plane_ptr->dst.stride;
3320 #if CONFIG_LPF_SB
3321 int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3322 y_range = AOMMIN(y_range, cm->mi_rows);
3323 y_range >>= scale_vert;
3324
3325 int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3326 x_range = AOMMIN(x_range, cm->mi_cols);
3327 x_range >>= scale_horz;
3328 #else
3329 const int y_range = (MAX_MIB_SIZE >> scale_vert);
3330 const int x_range = (MAX_MIB_SIZE >> scale_horz);
3331 #endif // CONFIG_LPF_SB
3332 for (int y = 0; y < y_range; y += row_step) {
3333 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
3334 for (int x = 0; x < x_range; x += col_step) {
3335 // inner loop always filter vertical edges in a MI block. If MI size
3336 // is 8x8, it will first filter the vertical edge aligned with a 8x8
3337 // block. If 4x4 trasnform is used, it will then filter the internal
3338 // edge aligned with a 4x4 block
3339 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
3340 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
3341 AV1_DEBLOCKING_PARAMETERS params;
3342 memset(¶ms, 0, sizeof(params));
3343
3344 set_lpf_parameters(¶ms, (cm->mi_stride << scale_vert), cm, HORZ_EDGE,
3345 curr_x, curr_y, plane, plane_ptr);
3346
3347 #if CONFIG_LPF_DIRECT
3348 uint8_t *const src = plane_ptr->dst.buf0;
3349 const int width = cm->width >> scale_horz;
3350 const int height = cm->height >> scale_vert;
3351 const int pivot = 8;
3352 const int line_length = 16;
3353 uint8_t block[256];
3354 int orig_pos[256];
3355 const int vert_or_horz = 1; // 1: horizontal
3356 const int unit = 1;
3357 int i;
3358 for (i = 0; i < 256; ++i) {
3359 block[i] = 0;
3360 orig_pos[i] = -1;
3361 }
3362
3363 if (params.filter_length) {
3364 const int filt_len = params.filter_length == 16 ? 8 : 4;
3365 const int direct =
3366 pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
3367 dst_stride, unit, vert_or_horz);
3368
3369 pick_filter_block_horz(src, block, orig_pos, filt_len, curr_y, curr_x,
3370 width, height, dst_stride, pivot, line_length,
3371 unit, direct);
3372 uint8_t *const filt_start = block + pivot * line_length;
3373 switch (params.filter_length) {
3374 // apply 4-tap filtering
3375 case 4:
3376 #if CONFIG_HIGHBITDEPTH
3377 if (cm->use_highbitdepth)
3378 aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
3379 line_length, params.mblim, params.lim,
3380 params.hev_thr, cm->bit_depth);
3381 else
3382 #endif // CONFIG_HIGHBITDEPTH
3383 aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
3384 params.lim, params.hev_thr);
3385 break;
3386 // apply 8-tap filtering
3387 case 8:
3388 #if CONFIG_HIGHBITDEPTH
3389 if (cm->use_highbitdepth)
3390 aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(filt_start),
3391 line_length, params.mblim, params.lim,
3392 params.hev_thr, cm->bit_depth);
3393 else
3394 #endif // CONFIG_HIGHBITDEPTH
3395 aom_lpf_horizontal_8(filt_start, line_length, params.mblim,
3396 params.lim, params.hev_thr);
3397 break;
3398 // apply 16-tap filtering
3399 case 16:
3400 #if CONFIG_HIGHBITDEPTH
3401 if (cm->use_highbitdepth)
3402 aom_highbd_lpf_horizontal_edge_16(
3403 CONVERT_TO_SHORTPTR(filt_start), line_length, params.mblim,
3404 params.lim, params.hev_thr, cm->bit_depth);
3405 else
3406 #endif // CONFIG_HIGHBITDEPTH
3407 aom_lpf_horizontal_edge_16(filt_start, line_length, params.mblim,
3408 params.lim, params.hev_thr);
3409 break;
3410 // no filtering
3411 default: break;
3412 }
3413
3414 for (i = 0; i < 256; ++i) {
3415 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3416 }
3417 }
3418 if (params.filter_length_internal) {
3419 for (i = 0; i < 256; ++i) {
3420 block[i] = 0;
3421 orig_pos[i] = -1;
3422 }
3423
3424 const int direct =
3425 pick_min_grad_direct(src, 4, curr_y + 4, curr_x, width, height,
3426 dst_stride, unit, vert_or_horz);
3427
3428 pick_filter_block_horz(src, block, orig_pos, 4, curr_y + 4, curr_x,
3429 width, height, dst_stride, pivot, line_length,
3430 unit, direct);
3431
3432 uint8_t *const filt_start = block + pivot * line_length;
3433 #if CONFIG_HIGHBITDEPTH
3434 if (cm->use_highbitdepth)
3435 aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
3436 line_length, params.mblim, params.lim,
3437 params.hev_thr, cm->bit_depth);
3438 else
3439 #endif // CONFIG_HIGHBITDEPTH
3440 aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
3441 params.lim, params.hev_thr);
3442
3443 for (i = 0; i < 256; ++i) {
3444 if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3445 }
3446 }
3447 #else // !CONFIG_LPF_DIRECT
3448 switch (params.filter_length) {
3449 // apply 4-tap filtering
3450 case 4:
3451 #if CONFIG_HIGHBITDEPTH
3452 if (cm->use_highbitdepth)
3453 aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
3454 params.mblim, params.lim,
3455 params.hev_thr, cm->bit_depth);
3456 else
3457 #endif // CONFIG_HIGHBITDEPTH
3458 aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
3459 params.hev_thr);
3460 break;
3461 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
3462 // apply 6-tap filtering
3463 case 6: assert(plane != 0);
3464 #if CONFIG_HIGHBITDEPTH
3465 if (cm->use_highbitdepth)
3466 aom_highbd_lpf_horizontal_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
3467 params.mblim, params.lim,
3468 params.hev_thr, cm->bit_depth);
3469 else
3470 #endif // CONFIG_HIGHBITDEPTH
3471 aom_lpf_horizontal_6_c(p, dst_stride, params.mblim, params.lim,
3472 params.hev_thr);
3473 break;
3474 #endif
3475 // apply 8-tap filtering
3476 case 8:
3477 #if CONFIG_HIGHBITDEPTH
3478 if (cm->use_highbitdepth)
3479 aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
3480 params.mblim, params.lim,
3481 params.hev_thr, cm->bit_depth);
3482 else
3483 #endif // CONFIG_HIGHBITDEPTH
3484 aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
3485 params.hev_thr);
3486 break;
3487 // apply 16-tap filtering
3488 case 16:
3489 #if CONFIG_HIGHBITDEPTH
3490 if (cm->use_highbitdepth)
3491 #if CONFIG_DEBLOCK_13TAP
3492 // TODO(olah): Remove _c once SIMD for 13-tap is available
3493 aom_highbd_lpf_horizontal_edge_16_c(
3494 CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
3495 params.hev_thr, cm->bit_depth);
3496 #else
3497 aom_highbd_lpf_horizontal_edge_16(
3498 CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
3499 params.hev_thr, cm->bit_depth);
3500 #endif
3501 else
3502 #endif // CONFIG_HIGHBITDEPTH
3503 #if CONFIG_DEBLOCK_13TAP
3504 aom_lpf_horizontal_edge_16_c(p, dst_stride, params.mblim,
3505 params.lim, params.hev_thr);
3506 #else
3507 aom_lpf_horizontal_edge_16(p, dst_stride, params.mblim, params.lim,
3508 params.hev_thr);
3509 #endif
3510 break;
3511 // no filtering
3512 default: break;
3513 }
3514 // process the internal edge
3515 if (params.filter_length_internal) {
3516 #if CONFIG_HIGHBITDEPTH
3517 if (cm->use_highbitdepth)
3518 aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p + 4 * dst_stride),
3519 dst_stride, params.mblim, params.lim,
3520 params.hev_thr, cm->bit_depth);
3521 else
3522 #endif // CONFIG_HIGHBITDEPTH
3523 aom_lpf_horizontal_4(p + 4 * dst_stride, dst_stride, params.mblim,
3524 params.lim, params.hev_thr);
3525 }
3526 #endif // CONFIG_LPF_DIRECT
3527 // advance the destination pointer
3528 p += MI_SIZE;
3529 }
3530 }
3531 }
3532 #endif // CONFIG_PARALLEL_DEBLOCKING
3533
av1_loop_filter_rows(YV12_BUFFER_CONFIG * frame_buffer,AV1_COMMON * cm,struct macroblockd_plane * planes,int start,int stop,int col_start,int col_end,int y_only)3534 void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
3535 struct macroblockd_plane *planes, int start, int stop,
3536 #if CONFIG_LPF_SB
3537 int col_start, int col_end,
3538 #endif
3539 int y_only) {
3540 #if CONFIG_LOOPFILTER_LEVEL
3541 // y_only no longer has its original meaning.
3542 // Here it means which plane to filter
3543 // when y_only = {0, 1, 2}, it means we are searching for filter level for
3544 // Y/U/V plane individually.
3545 const int plane_start = y_only;
3546 const int plane_end = plane_start + 1;
3547 #else
3548 const int num_planes = y_only ? 1 : MAX_MB_PLANE;
3549 const int plane_start = 0;
3550 const int plane_end = num_planes;
3551 #endif // CONFIG_LOOPFILTER_LEVEL
3552 #if !CONFIG_LPF_SB
3553 const int col_start = 0;
3554 const int col_end = cm->mi_cols;
3555 #endif // CONFIG_LPF_SB
3556 int mi_row, mi_col;
3557 int plane;
3558
3559 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
3560 CONFIG_CB4X4
3561
3562 #if !CONFIG_PARALLEL_DEBLOCKING
3563 #if CONFIG_VAR_TX
3564 for (int i = 0; i < MAX_MB_PLANE; ++i)
3565 memset(cm->top_txfm_context[i], TX_32X32, cm->mi_cols << TX_UNIT_WIDE_LOG2);
3566 #endif // CONFIG_VAR_TX
3567 for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
3568 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
3569 #if CONFIG_VAR_TX
3570 for (int i = 0; i < MAX_MB_PLANE; ++i)
3571 memset(cm->left_txfm_context[i], TX_32X32,
3572 MAX_MIB_SIZE << TX_UNIT_HIGH_LOG2);
3573 #endif // CONFIG_VAR_TX
3574 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
3575 av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3576
3577 for (plane = plane_start; plane < plane_end; ++plane) {
3578 av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
3579 mi_row, mi_col, plane);
3580 av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
3581 mi_row, mi_col, plane);
3582 }
3583 }
3584 }
3585 #else
3586
3587 // filter all vertical edges in every 64x64 super block
3588 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3589 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
3590 av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3591 for (plane = plane_start; plane < plane_end; ++plane) {
3592 av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
3593 }
3594 }
3595 }
3596
3597 // filter all horizontal edges in every 64x64 super block
3598 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3599 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
3600 av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3601 for (plane = plane_start; plane < plane_end; ++plane) {
3602 av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
3603 }
3604 }
3605 }
3606 #endif // CONFIG_PARALLEL_DEBLOCKING
3607
3608 #else // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
3609
3610 #if CONFIG_PARALLEL_DEBLOCKING
3611 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3612 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
3613 av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3614 // filter all vertical edges in every 64x64 super block
3615 for (plane = plane_start; plane < plane_end; plane += 1) {
3616 av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
3617 }
3618 }
3619 }
3620 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3621 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
3622 av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3623 // filter all horizontal edges in every 64x64 super block
3624 for (plane = plane_start; plane < plane_end; plane += 1) {
3625 av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
3626 }
3627 }
3628 }
3629 #else // CONFIG_PARALLEL_DEBLOCKING
3630 enum lf_path path;
3631 LOOP_FILTER_MASK lfm;
3632
3633 if (y_only)
3634 path = LF_PATH_444;
3635 else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
3636 path = LF_PATH_420;
3637 else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
3638 path = LF_PATH_444;
3639 else
3640 path = LF_PATH_SLOW;
3641
3642 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3643 MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
3644 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
3645 av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3646
3647 // TODO(JBB): Make setup_mask work for non 420.
3648 av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
3649
3650 av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
3651 av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
3652 for (plane = 1; plane < num_planes; ++plane) {
3653 switch (path) {
3654 case LF_PATH_420:
3655 av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
3656 av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
3657 break;
3658 case LF_PATH_444:
3659 av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
3660 av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
3661 break;
3662 case LF_PATH_SLOW:
3663 av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
3664 mi_row, mi_col, plane);
3665 av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
3666 mi_row, mi_col, plane);
3667
3668 break;
3669 }
3670 }
3671 }
3672 }
3673 #endif // CONFIG_PARALLEL_DEBLOCKING
3674 #endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
3675 }
3676
av1_loop_filter_frame(YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,MACROBLOCKD * xd,int frame_filter_level,int frame_filter_level_r,int y_only,int partial_frame,int mi_row,int mi_col)3677 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
3678 MACROBLOCKD *xd, int frame_filter_level,
3679 #if CONFIG_LOOPFILTER_LEVEL
3680 int frame_filter_level_r,
3681 #endif
3682 int y_only, int partial_frame
3683 #if CONFIG_LPF_SB
3684 ,
3685 int mi_row, int mi_col
3686 #endif
3687 ) {
3688 int start_mi_row, end_mi_row, mi_rows_to_filter;
3689 #if CONFIG_EXT_DELTA_Q
3690 #if CONFIG_LOOPFILTER_LEVEL
3691 int orig_filter_level[2] = { cm->lf.filter_level[0], cm->lf.filter_level[1] };
3692 #else
3693 int orig_filter_level = cm->lf.filter_level;
3694 #endif
3695 #endif
3696
3697 #if CONFIG_LPF_SB
3698 if (partial_frame && !frame_filter_level) return;
3699 #else
3700 #if CONFIG_LOOPFILTER_LEVEL
3701 if (!frame_filter_level && !frame_filter_level_r) return;
3702 #else
3703 if (!frame_filter_level) return;
3704 #endif
3705 #endif // CONFIG_LPF_SB
3706 #if CONFIG_LPF_SB
3707 int start_mi_col;
3708 int end_mi_col;
3709
3710 // In the experiment of deblocking filtering per superblock.
3711 // When partial_frame is 1, it indicates we are searching for the best filter
3712 // level for current superblock. We reuse frame_filter_level as filter level
3713 // for superblock, no longer for the whole frame.
3714 // When partial_frame is 0, it's in the actual filtering stage for the frame
3715 if (partial_frame) {
3716 start_mi_row = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
3717 start_mi_col = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
3718 const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
3719 const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
3720 end_mi_row = AOMMIN(mi_row_range, cm->mi_rows);
3721 end_mi_col = AOMMIN(mi_col_range, cm->mi_cols);
3722
3723 av1_loop_filter_sb_level_init(cm, mi_row, mi_col, frame_filter_level);
3724 } else {
3725 start_mi_row = 0;
3726 mi_rows_to_filter = cm->mi_rows;
3727 end_mi_row = start_mi_row + mi_rows_to_filter;
3728 start_mi_col = 0;
3729 end_mi_col = cm->mi_cols;
3730 }
3731 #else
3732 start_mi_row = 0;
3733 mi_rows_to_filter = cm->mi_rows;
3734 if (partial_frame && cm->mi_rows > 8) {
3735 start_mi_row = cm->mi_rows >> 1;
3736 start_mi_row &= 0xfffffff8;
3737 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
3738 }
3739 end_mi_row = start_mi_row + mi_rows_to_filter;
3740 #if CONFIG_LOOPFILTER_LEVEL
3741 // TODO(chengchen): refactor the code such that y_only has its matching
3742 // meaning. Now it means the plane to be filtered in this experiment.
3743 av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
3744 y_only);
3745 #else
3746 av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
3747 #endif
3748 #endif // CONFIG_LPF_SB
3749
3750 #if CONFIG_EXT_DELTA_Q
3751 #if CONFIG_LOOPFILTER_LEVEL
3752 cm->lf.filter_level[0] = frame_filter_level;
3753 cm->lf.filter_level[1] = frame_filter_level_r;
3754 #else
3755 cm->lf.filter_level = frame_filter_level;
3756 #endif
3757 #endif
3758
3759 #if CONFIG_LPF_SB
3760 av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row,
3761 start_mi_col, end_mi_col, y_only);
3762 #else
3763 av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
3764 #endif // CONFIG_LPF_SB
3765
3766 #if CONFIG_EXT_DELTA_Q
3767 #if CONFIG_LOOPFILTER_LEVEL
3768 cm->lf.filter_level[0] = orig_filter_level[0];
3769 cm->lf.filter_level[1] = orig_filter_level[1];
3770 #else
3771 cm->lf.filter_level = orig_filter_level;
3772 #endif
3773 #endif
3774 }
3775
av1_loop_filter_data_reset(LFWorkerData * lf_data,YV12_BUFFER_CONFIG * frame_buffer,struct AV1Common * cm,const struct macroblockd_plane * planes)3776 void av1_loop_filter_data_reset(LFWorkerData *lf_data,
3777 YV12_BUFFER_CONFIG *frame_buffer,
3778 struct AV1Common *cm,
3779 const struct macroblockd_plane *planes) {
3780 lf_data->frame_buffer = frame_buffer;
3781 lf_data->cm = cm;
3782 lf_data->start = 0;
3783 lf_data->stop = 0;
3784 lf_data->y_only = 0;
3785 memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
3786 }
3787
av1_loop_filter_worker(LFWorkerData * const lf_data,void * unused)3788 int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
3789 (void)unused;
3790 #if !CONFIG_LPF_SB
3791 av1_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
3792 lf_data->start, lf_data->stop, lf_data->y_only);
3793 #else
3794 (void)lf_data;
3795 #endif // CONFIG_LPF_SB
3796 return 1;
3797 }
3798