1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 
14 #include "./aom_config.h"
15 #include "./aom_dsp_rtcd.h"
16 #include "aom_dsp/aom_dsp_common.h"
17 #include "aom_mem/aom_mem.h"
18 #include "aom_ports/mem.h"
19 #include "av1/common/av1_loopfilter.h"
20 #include "av1/common/onyxc_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/common/seg_common.h"
23 
24 #if CONFIG_LOOPFILTER_LEVEL
25 static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
26   { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
27   { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
28   { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
29 };
30 
31 #if CONFIG_EXT_DELTA_Q
32 static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
33   { 0, 1 }, { 2, 2 }, { 3, 3 }
34 };
35 #endif  // CONFIG_EXT_DELTA_Q
36 #endif  // CONFIG_LOOPFILTER_LEVEL
37 
38 #if CONFIG_LPF_DIRECT
pick_filter_pixel_left(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)39 static void pick_filter_pixel_left(uint8_t *const src, uint8_t *const line,
40                                    int *const orig_pos, int length, int row,
41                                    int col, int width, int height, int pitch,
42                                    int pivot, int direct) {
43   int i;
44   int pos = row * pitch + col;
45 
46   for (i = 0; i < length; ++i) {
47     int dy = 0;
48     switch (direct) {
49       case VERT_HORZ: dy = 0; break;
50       case DEGREE_45: dy = 1; break;
51       case DEGREE_135: dy = -1; break;
52     }
53     col -= 1;
54     row += dy;
55     if (col >= 0 && col < width && row >= 0 && row < height) {
56       pos = row * pitch + col;
57       line[pivot - 1 - i] = src[pos];
58       orig_pos[pivot - 1 - i] = pos;
59     }
60   }
61 }
62 
pick_filter_pixel_right(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)63 static void pick_filter_pixel_right(uint8_t *const src, uint8_t *const line,
64                                     int *const orig_pos, int length, int row,
65                                     int col, int width, int height, int pitch,
66                                     int pivot, int direct) {
67   int i;
68   int pos = row * pitch + col;
69 
70   line[pivot] = src[pos];
71   orig_pos[pivot] = pos;
72 
73   for (i = 1; i < length; ++i) {
74     int dy = 0;
75     switch (direct) {
76       case VERT_HORZ: dy = 0; break;
77       case DEGREE_45: dy = -1; break;
78       case DEGREE_135: dy = 1; break;
79     }
80     col += 1;
81     row += dy;
82     if (col >= 0 && col < width && row >= 0 && row < height) {
83       pos = row * pitch + col;
84       line[pivot + i] = src[pos];
85       orig_pos[pivot + i] = pos;
86     }
87   }
88 }
89 
pick_filter_pixel_above(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)90 static void pick_filter_pixel_above(uint8_t *const src, uint8_t *const line,
91                                     int *const orig_pos, int length, int row,
92                                     int col, int width, int height, int pitch,
93                                     int pivot, int direct) {
94   int i;
95   int pos = row * pitch + col;
96 
97   for (i = 0; i < length; ++i) {
98     int dx = 0;
99     switch (direct) {
100       case VERT_HORZ: dx = 0; break;
101       case DEGREE_45: dx = 1; break;
102       case DEGREE_135: dx = -1; break;
103     }
104     col += dx;
105     row -= 1;
106     if (col >= 0 && col < width && row >= 0 && row < height) {
107       pos = row * pitch + col;
108       line[pivot - 1 - i] = src[pos];
109       orig_pos[pivot - 1 - i] = pos;
110     }
111   }
112 }
113 
pick_filter_pixel_bot(uint8_t * const src,uint8_t * const line,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int direct)114 static void pick_filter_pixel_bot(uint8_t *const src, uint8_t *const line,
115                                   int *const orig_pos, int length, int row,
116                                   int col, int width, int height, int pitch,
117                                   int pivot, int direct) {
118   int i;
119   int pos = row * pitch + col;
120 
121   line[pivot] = src[pos];
122   orig_pos[pivot] = pos;
123 
124   for (i = 1; i < length; ++i) {
125     int dx = 0;
126     switch (direct) {
127       case VERT_HORZ: dx = 0; break;
128       case DEGREE_45: dx = -1; break;
129       case DEGREE_135: dx = 1; break;
130     }
131     col += dx;
132     row += 1;
133     if (col >= 0 && col < width && row >= 0 && row < height) {
134       pos = row * pitch + col;
135       line[pivot + i] = src[pos];
136       orig_pos[pivot + i] = pos;
137     }
138   }
139 }
140 
pick_filter_block_vert(uint8_t * const src,uint8_t * const block,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int line_length,int unit,int direct)141 static void pick_filter_block_vert(uint8_t *const src, uint8_t *const block,
142                                    int *const orig_pos, int length, int row,
143                                    int col, int width, int height, int pitch,
144                                    int pivot, int line_length, int unit,
145                                    int direct) {
146   int i;
147   for (i = 0; i < 8 * unit; ++i) {
148     pick_filter_pixel_left(src, block + i * line_length,
149                            orig_pos + i * line_length, length, row + i, col,
150                            width, height, pitch, pivot, direct);
151     pick_filter_pixel_right(src, block + i * line_length,
152                             orig_pos + i * line_length, length, row + i, col,
153                             width, height, pitch, pivot, direct);
154   }
155 }
156 
pick_filter_block_horz(uint8_t * const src,uint8_t * const block,int * const orig_pos,int length,int row,int col,int width,int height,int pitch,int pivot,int line_length,int unit,int direct)157 static void pick_filter_block_horz(uint8_t *const src, uint8_t *const block,
158                                    int *const orig_pos, int length, int row,
159                                    int col, int width, int height, int pitch,
160                                    int pivot, int line_length, int unit,
161                                    int direct) {
162   int i, j;
163   int num = 8 * unit;
164   for (i = 0; i < num; ++i) {
165     pick_filter_pixel_above(src, block + i * line_length,
166                             orig_pos + i * line_length, length, row, col + i,
167                             width, height, pitch, pivot, direct);
168     pick_filter_pixel_bot(src, block + i * line_length,
169                           orig_pos + i * line_length, length, row, col + i,
170                           width, height, pitch, pivot, direct);
171   }
172 
173   // rearrange block
174   // TODO(chengchen): make it in-place or a stand alone function
175   uint8_t tmp_block[256];
176   int tmp_pos[256];
177   for (i = 0; i < 256; ++i) {
178     tmp_block[i] = 0;
179     tmp_pos[i] = -1;
180   }
181   for (i = 0; i < num; ++i) {
182     for (j = 0; j < line_length; ++j) {
183       tmp_block[j * line_length + i] = block[i * line_length + j];
184       tmp_pos[j * line_length + i] = orig_pos[i * line_length + j];
185     }
186   }
187   for (i = 0; i < 256; ++i) {
188     block[i] = tmp_block[i];
189     orig_pos[i] = tmp_pos[i];
190   }
191 }
192 
compute_block_grad(uint8_t * const src,int length,int row,int col,int width,int height,int pitch,int unit,int vert_or_horz,int direct)193 static int compute_block_grad(uint8_t *const src, int length, int row, int col,
194                               int width, int height, int pitch, int unit,
195                               int vert_or_horz, int direct) {
196   int i, j;
197   int r0, c0, pos0, r1 = 0, c1 = 0, pos1;
198   int sum_grad = 0;
199   for (i = 0; i < 8 * unit; ++i) {
200     // vert_or_horz: 0 vertical edge, 1 horizontal edge
201     r0 = vert_or_horz ? row : row + i;
202     c0 = vert_or_horz ? col + i : col;
203     pos0 = r0 * pitch + c0;
204 
205     for (j = 0; j < length; ++j) {
206       if (vert_or_horz == 0) {
207         switch (direct) {
208           case VERT_HORZ: r1 = r0; break;
209           case DEGREE_45: r1 = r0 + 1; break;
210           case DEGREE_135: r1 = r0 - 1; break;
211         }
212         c1 = c0 - 1;
213       } else {
214         r1 = r0 - 1;
215         switch (direct) {
216           case VERT_HORZ: c1 = c0; break;
217           case DEGREE_45: c1 = c0 + 1; break;
218           case DEGREE_135: c1 = c0 - 1; break;
219         }
220       }
221       pos1 = r1 * pitch + c1;
222 
223       if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 &&
224           r1 < height && c1 >= 0 && c1 < width) {
225         sum_grad += abs(src[pos1] - src[pos0]);
226       } else {
227         sum_grad += 255;  // penalize unreachable boundary
228       }
229       r0 = r1;
230       c0 = c1;
231       pos0 = pos1;
232     }
233 
234     r0 = vert_or_horz ? row : row + i;
235     c0 = vert_or_horz ? col + i : col;
236     pos0 = r0 * pitch + c0;
237 
238     for (j = 0; j < length - 1; ++j) {
239       if (vert_or_horz == 0) {
240         switch (direct) {
241           case VERT_HORZ: r1 = r0; break;
242           case DEGREE_45: r1 = r0 - 1; break;
243           case DEGREE_135: r1 = r0 + 1; break;
244         }
245         c1 = c0 + 1;
246       } else {
247         r1 = r0 + 1;
248         switch (direct) {
249           case VERT_HORZ: c1 = c0; break;
250           case DEGREE_45: c1 = c0 - 1; break;
251           case DEGREE_135: c1 = c0 + 1; break;
252         }
253       }
254       pos1 = r1 * pitch + c1;
255 
256       if (r0 >= 0 && r0 < height && c0 >= 0 && c0 < width && r1 >= 0 &&
257           r1 < height && c1 >= 0 && c1 < width) {
258         sum_grad += abs(src[pos1] - src[pos0]);
259       } else {
260         sum_grad += 255;  // penalize unreachable boundary
261       }
262       r0 = r1;
263       c0 = c1;
264       pos0 = pos1;
265     }
266   }
267 
268   return sum_grad;
269 }
270 
pick_min_grad_direct(uint8_t * const src,int length,int row,int col,int width,int height,int pitch,int unit,int vert_or_horz)271 static int pick_min_grad_direct(uint8_t *const src, int length, int row,
272                                 int col, int width, int height, int pitch,
273                                 int unit, int vert_or_horz) {
274   int direct = VERT_HORZ;
275   int min_grad = INT_MAX, sum_grad = 0;
276 
277   int degree;
278   for (degree = 0; degree < FILTER_DEGREES; ++degree) {
279     // compute abs gradient along each line for the filter block
280     sum_grad = compute_block_grad(src, length, row, col, width, height, pitch,
281                                   unit, vert_or_horz, degree);
282     if (sum_grad < min_grad) {
283       min_grad = sum_grad;
284       direct = degree;
285     }
286   }
287 
288   return direct;
289 }
290 #endif  // CONFIG_LPF_DIRECT
291 
292 #define PARALLEL_DEBLOCKING_15TAPLUMAONLY 1
293 #define PARALLEL_DEBLOCKING_DISABLE_15TAP 0
294 #if CONFIG_DEBLOCK_13TAP
295 #define PARALLEL_DEBLOCKING_5_TAP_CHROMA 1
296 #else
297 #define PARALLEL_DEBLOCKING_5_TAP_CHROMA 0
298 #endif
299 
300 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
301 extern void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
302                                  const uint8_t *limit, const uint8_t *thresh);
303 
304 extern void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
305                                    const uint8_t *limit, const uint8_t *thresh);
306 
307 extern void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p,
308                                           const uint8_t *blimit,
309                                           const uint8_t *limit,
310                                           const uint8_t *thresh, int bd);
311 
312 extern void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch,
313                                         const uint8_t *blimit,
314                                         const uint8_t *limit,
315                                         const uint8_t *thresh, int bd);
316 #endif
317 
318 // 64 bit masks for left transform size. Each 1 represents a position where
319 // we should apply a loop filter across the left border of an 8x8 block
320 // boundary.
321 //
322 // In the case of TX_16X16->  ( in low order byte first we end up with
323 // a mask that looks like this
324 //
325 //    10101010
326 //    10101010
327 //    10101010
328 //    10101010
329 //    10101010
330 //    10101010
331 //    10101010
332 //    10101010
333 //
334 // A loopfilter should be applied to every other 8x8 horizontally.
335 static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
336 #if CONFIG_CHROMA_2X2
337   0xffffffffffffffffULL,  // TX_2X2
338 #endif
339   0xffffffffffffffffULL,  // TX_4X4
340   0xffffffffffffffffULL,  // TX_8x8
341   0x5555555555555555ULL,  // TX_16x16
342   0x1111111111111111ULL,  // TX_32x32
343 #if CONFIG_TX64X64
344   0x0101010101010101ULL,  // TX_64x64
345 #endif                    // CONFIG_TX64X64
346 };
347 
348 // 64 bit masks for above transform size. Each 1 represents a position where
349 // we should apply a loop filter across the top border of an 8x8 block
350 // boundary.
351 //
352 // In the case of TX_32x32 ->  ( in low order byte first we end up with
353 // a mask that looks like this
354 //
355 //    11111111
356 //    00000000
357 //    00000000
358 //    00000000
359 //    11111111
360 //    00000000
361 //    00000000
362 //    00000000
363 //
364 // A loopfilter should be applied to every other 4 the row vertically.
365 static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
366 #if CONFIG_CHROMA_2X2
367   0xffffffffffffffffULL,  // TX_4X4
368 #endif
369   0xffffffffffffffffULL,  // TX_4X4
370   0xffffffffffffffffULL,  // TX_8x8
371   0x00ff00ff00ff00ffULL,  // TX_16x16
372   0x000000ff000000ffULL,  // TX_32x32
373 #if CONFIG_TX64X64
374   0x00000000000000ffULL,  // TX_64x64
375 #endif                    // CONFIG_TX64X64
376 };
377 
378 // 64 bit masks for prediction sizes (left). Each 1 represents a position
379 // where left border of an 8x8 block. These are aligned to the right most
380 // appropriate bit, and then shifted into place.
381 //
382 // In the case of TX_16x32 ->  ( low order byte first ) we end up with
383 // a mask that looks like this :
384 //
385 //  10000000
386 //  10000000
387 //  10000000
388 //  10000000
389 //  00000000
390 //  00000000
391 //  00000000
392 //  00000000
393 static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = {
394 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
395   0x0000000000000001ULL,  // BLOCK_2X2,
396   0x0000000000000001ULL,  // BLOCK_2X4,
397   0x0000000000000001ULL,  // BLOCK_4X2,
398 #endif
399   0x0000000000000001ULL,  // BLOCK_4X4,
400   0x0000000000000001ULL,  // BLOCK_4X8,
401   0x0000000000000001ULL,  // BLOCK_8X4,
402   0x0000000000000001ULL,  // BLOCK_8X8,
403   0x0000000000000101ULL,  // BLOCK_8X16,
404   0x0000000000000001ULL,  // BLOCK_16X8,
405   0x0000000000000101ULL,  // BLOCK_16X16,
406   0x0000000001010101ULL,  // BLOCK_16X32,
407   0x0000000000000101ULL,  // BLOCK_32X16,
408   0x0000000001010101ULL,  // BLOCK_32X32,
409   0x0101010101010101ULL,  // BLOCK_32X64,
410   0x0000000001010101ULL,  // BLOCK_64X32,
411   0x0101010101010101ULL,  // BLOCK_64X64,
412   0x0000000000000101ULL,  // BLOCK_4X16,
413   0x0000000000000001ULL,  // BLOCK_16X4,
414   0x0000000001010101ULL,  // BLOCK_8X32,
415   0x0000000000000001ULL,  // BLOCK_32X8,
416   0x0101010101010101ULL,  // BLOCK_16X64,
417   0x0000000000000101ULL,  // BLOCK_64X16
418 };
419 
420 // 64 bit mask to shift and set for each prediction size.
421 static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = {
422 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
423   0x0000000000000001ULL,  // BLOCK_2X2
424   0x0000000000000001ULL,  // BLOCK_2X4
425   0x0000000000000001ULL,  // BLOCK_4X2
426 #endif
427   0x0000000000000001ULL,  // BLOCK_4X4
428   0x0000000000000001ULL,  // BLOCK_4X8
429   0x0000000000000001ULL,  // BLOCK_8X4
430   0x0000000000000001ULL,  // BLOCK_8X8
431   0x0000000000000001ULL,  // BLOCK_8X16,
432   0x0000000000000003ULL,  // BLOCK_16X8
433   0x0000000000000003ULL,  // BLOCK_16X16
434   0x0000000000000003ULL,  // BLOCK_16X32,
435   0x000000000000000fULL,  // BLOCK_32X16,
436   0x000000000000000fULL,  // BLOCK_32X32,
437   0x000000000000000fULL,  // BLOCK_32X64,
438   0x00000000000000ffULL,  // BLOCK_64X32,
439   0x00000000000000ffULL,  // BLOCK_64X64,
440   0x0000000000000001ULL,  // BLOCK_4X16,
441   0x0000000000000003ULL,  // BLOCK_16X4,
442   0x0000000000000001ULL,  // BLOCK_8X32,
443   0x000000000000000fULL,  // BLOCK_32X8,
444   0x0000000000000003ULL,  // BLOCK_16X64,
445   0x00000000000000ffULL,  // BLOCK_64X16
446 };
447 // 64 bit mask to shift and set for each prediction size. A bit is set for
448 // each 8x8 block that would be in the top left most block of the given block
449 // size in the 64x64 block.
450 static const uint64_t size_mask[BLOCK_SIZES_ALL] = {
451 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
452   0x0000000000000001ULL,  // BLOCK_2X2
453   0x0000000000000001ULL,  // BLOCK_2X4
454   0x0000000000000001ULL,  // BLOCK_4X2
455 #endif
456   0x0000000000000001ULL,  // BLOCK_4X4
457   0x0000000000000001ULL,  // BLOCK_4X8
458   0x0000000000000001ULL,  // BLOCK_8X4
459   0x0000000000000001ULL,  // BLOCK_8X8
460   0x0000000000000101ULL,  // BLOCK_8X16,
461   0x0000000000000003ULL,  // BLOCK_16X8
462   0x0000000000000303ULL,  // BLOCK_16X16
463   0x0000000003030303ULL,  // BLOCK_16X32,
464   0x0000000000000f0fULL,  // BLOCK_32X16,
465   0x000000000f0f0f0fULL,  // BLOCK_32X32,
466   0x0f0f0f0f0f0f0f0fULL,  // BLOCK_32X64,
467   0x00000000ffffffffULL,  // BLOCK_64X32,
468   0xffffffffffffffffULL,  // BLOCK_64X64,
469   0x0000000000000101ULL,  // BLOCK_4X16,
470   0x0000000000000003ULL,  // BLOCK_16X4,
471   0x0000000001010101ULL,  // BLOCK_8X32,
472   0x000000000000000fULL,  // BLOCK_32X8,
473   0x0303030303030303ULL,  // BLOCK_16X64,
474   0x000000000000ffffULL,  // BLOCK_64X16
475 };
476 
477 // These are used for masking the left and above 32x32 borders.
478 static const uint64_t left_border = 0x1111111111111111ULL;
479 static const uint64_t above_border = 0x000000ff000000ffULL;
480 
481 // 16 bit masks for uv transform sizes.
482 static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
483 #if CONFIG_CHROMA_2X2
484   0xffff,  // TX_2X2
485 #endif
486   0xffff,  // TX_4X4
487   0xffff,  // TX_8x8
488   0x5555,  // TX_16x16
489   0x1111,  // TX_32x32
490 #if CONFIG_TX64X64
491   0x0101,  // TX_64x64, never used
492 #endif     // CONFIG_TX64X64
493 };
494 
495 static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
496 #if CONFIG_CHROMA_2X2
497   0xffff,  // TX_2X2
498 #endif
499   0xffff,  // TX_4X4
500   0xffff,  // TX_8x8
501   0x0f0f,  // TX_16x16
502   0x000f,  // TX_32x32
503 #if CONFIG_TX64X64
504   0x0003,  // TX_64x64, never used
505 #endif     // CONFIG_TX64X64
506 };
507 
508 // 16 bit left mask to shift and set for each uv prediction size.
509 static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = {
510 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
511   0x0001,  // BLOCK_2X2,
512   0x0001,  // BLOCK_2X4,
513   0x0001,  // BLOCK_4X2,
514 #endif
515   0x0001,  // BLOCK_4X4,
516   0x0001,  // BLOCK_4X8,
517   0x0001,  // BLOCK_8X4,
518   0x0001,  // BLOCK_8X8,
519   0x0001,  // BLOCK_8X16,
520   0x0001,  // BLOCK_16X8,
521   0x0001,  // BLOCK_16X16,
522   0x0011,  // BLOCK_16X32,
523   0x0001,  // BLOCK_32X16,
524   0x0011,  // BLOCK_32X32,
525   0x1111,  // BLOCK_32X64
526   0x0011,  // BLOCK_64X32,
527   0x1111,  // BLOCK_64X64,
528   0x0001,  // BLOCK_4X16,
529   0x0001,  // BLOCK_16X4,
530   0x0011,  // BLOCK_8X32,
531   0x0001,  // BLOCK_32X8,
532   0x1111,  // BLOCK_16X64,
533   0x0001,  // BLOCK_64X16,
534 };
535 
536 // 16 bit above mask to shift and set for uv each prediction size.
537 static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = {
538 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
539   0x0001,  // BLOCK_2X2
540   0x0001,  // BLOCK_2X4
541   0x0001,  // BLOCK_4X2
542 #endif
543   0x0001,  // BLOCK_4X4
544   0x0001,  // BLOCK_4X8
545   0x0001,  // BLOCK_8X4
546   0x0001,  // BLOCK_8X8
547   0x0001,  // BLOCK_8X16,
548   0x0001,  // BLOCK_16X8
549   0x0001,  // BLOCK_16X16
550   0x0001,  // BLOCK_16X32,
551   0x0003,  // BLOCK_32X16,
552   0x0003,  // BLOCK_32X32,
553   0x0003,  // BLOCK_32X64,
554   0x000f,  // BLOCK_64X32,
555   0x000f,  // BLOCK_64X64,
556   0x0001,  // BLOCK_4X16,
557   0x0001,  // BLOCK_16X4,
558   0x0001,  // BLOCK_8X32,
559   0x0003,  // BLOCK_32X8,
560   0x0001,  // BLOCK_16X64,
561   0x000f,  // BLOCK_64X16
562 };
563 
564 // 64 bit mask to shift and set for each uv prediction size
565 static const uint16_t size_mask_uv[BLOCK_SIZES_ALL] = {
566 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
567   0x0001,  // BLOCK_2X2
568   0x0001,  // BLOCK_2X4
569   0x0001,  // BLOCK_4X2
570 #endif
571   0x0001,  // BLOCK_4X4
572   0x0001,  // BLOCK_4X8
573   0x0001,  // BLOCK_8X4
574   0x0001,  // BLOCK_8X8
575   0x0001,  // BLOCK_8X16,
576   0x0001,  // BLOCK_16X8
577   0x0001,  // BLOCK_16X16
578   0x0011,  // BLOCK_16X32,
579   0x0003,  // BLOCK_32X16,
580   0x0033,  // BLOCK_32X32,
581   0x3333,  // BLOCK_32X64,
582   0x00ff,  // BLOCK_64X32,
583   0xffff,  // BLOCK_64X64,
584   0x0001,  // BLOCK_4X16,
585   0x0001,  // BLOCK_16X4,
586   0x0011,  // BLOCK_8X32,
587   0x0003,  // BLOCK_32X8,
588   0x1111,  // BLOCK_16X64,
589   0x000f,  // BLOCK_64X16
590 };
591 static const uint16_t left_border_uv = 0x1111;
592 static const uint16_t above_border_uv = 0x000f;
593 
594 static const int mode_lf_lut[] = {
595   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
596   0,
597 #if CONFIG_SMOOTH_HV
598   0, 0,
599 #endif         // CONFIG_SMOOTH_HV
600   1, 1, 0, 1,  // INTER_MODES (ZEROMV == 0)
601 #if CONFIG_COMPOUND_SINGLEREF
602   // 1, 1, 1, 1, 1,       // INTER_SINGLEREF_COMP_MODES
603   // NOTE(zoeliu): Remove SR_NEAREST_NEWMV
604   1, 1, 1, 1,             // INTER_SINGLEREF_COMP_MODES
605 #endif                    // CONFIG_COMPOUND_SINGLEREF
606   1, 1, 1, 1, 1, 1, 0, 1  // INTER_COMPOUND_MODES (ZERO_ZEROMV == 0)
607 };
608 
update_sharpness(loop_filter_info_n * lfi,int sharpness_lvl)609 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
610   int lvl;
611 
612   // For each possible value for the loop filter fill out limits
613   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
614     // Set loop filter parameters that control sharpness.
615     int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
616 
617     if (sharpness_lvl > 0) {
618       if (block_inside_limit > (9 - sharpness_lvl))
619         block_inside_limit = (9 - sharpness_lvl);
620     }
621 
622     if (block_inside_limit < 1) block_inside_limit = 1;
623 
624     memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
625     memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
626            SIMD_WIDTH);
627   }
628 }
629 #if CONFIG_EXT_DELTA_Q
get_filter_level(const AV1_COMMON * cm,const loop_filter_info_n * lfi_n,const int dir_idx,int plane,int mi_row,int mi_col,const MB_MODE_INFO * mbmi)630 static uint8_t get_filter_level(const AV1_COMMON *cm,
631                                 const loop_filter_info_n *lfi_n,
632 #if CONFIG_LOOPFILTER_LEVEL
633                                 const int dir_idx, int plane,
634 #endif
635 #if CONFIG_LPF_SB
636                                 int mi_row, int mi_col,
637 #endif
638                                 const MB_MODE_INFO *mbmi) {
639 #if CONFIG_LPF_SB
640   return cm->mi[mi_row * cm->mi_stride + mi_col].mbmi.filt_lvl;
641 #endif
642 
643 #if CONFIG_SUPERTX
644   const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
645   assert(
646       IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
647   assert(IMPLIES(supertx_enabled(mbmi),
648                  mbmi->segment_id_supertx <= mbmi->segment_id));
649 #else
650   const int segment_id = mbmi->segment_id;
651 #endif  // CONFIG_SUPERTX
652   if (cm->delta_lf_present_flag) {
653 #if CONFIG_LOOPFILTER_LEVEL
654     int delta_lf;
655     if (cm->delta_lf_multi) {
656       const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
657       delta_lf = mbmi->curr_delta_lf[delta_lf_idx];
658     } else {
659       delta_lf = mbmi->current_delta_lf_from_base;
660     }
661     int lvl_seg =
662         clamp(delta_lf + cm->lf.filter_level[dir_idx], 0, MAX_LOOP_FILTER);
663 #else
664     int lvl_seg = clamp(mbmi->current_delta_lf_from_base + cm->lf.filter_level,
665                         0, MAX_LOOP_FILTER);
666 #endif
667     const int scale = 1 << (lvl_seg >> 5);
668 #if CONFIG_LOOPFILTER_LEVEL
669     assert(plane >= 0 && plane <= 2);
670     const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
671     if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
672       const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
673       lvl_seg =
674           clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
675                 MAX_LOOP_FILTER);
676     }
677 #else
678     if (segfeature_active(&cm->seg, segment_id, SEG_LVL_ALT_LF)) {
679       const int data = get_segdata(&cm->seg, segment_id, SEG_LVL_ALT_LF);
680       lvl_seg =
681           clamp(cm->seg.abs_delta == SEGMENT_ABSDATA ? data : lvl_seg + data, 0,
682                 MAX_LOOP_FILTER);
683     }
684 #endif  // CONFIG_LOOPFILTER_LEVEL
685 
686     if (cm->lf.mode_ref_delta_enabled) {
687       lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
688       if (mbmi->ref_frame[0] > INTRA_FRAME)
689         lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
690       lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
691     }
692     return lvl_seg;
693   } else {
694 #if CONFIG_LOOPFILTER_LEVEL
695     return lfi_n
696         ->lvl[segment_id][dir_idx][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
697 #else
698     return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
699 #endif
700   }
701 }
702 #else
get_filter_level(const loop_filter_info_n * lfi_n,const MB_MODE_INFO * mbmi)703 static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
704                                 const MB_MODE_INFO *mbmi) {
705 #if CONFIG_SUPERTX
706   const int segment_id = AOMMIN(mbmi->segment_id, mbmi->segment_id_supertx);
707   assert(
708       IMPLIES(supertx_enabled(mbmi), mbmi->segment_id_supertx != MAX_SEGMENTS));
709   assert(IMPLIES(supertx_enabled(mbmi),
710                  mbmi->segment_id_supertx <= mbmi->segment_id));
711 #else
712   const int segment_id = mbmi->segment_id;
713 #endif  // CONFIG_SUPERTX
714   return lfi_n->lvl[segment_id][mbmi->ref_frame[0]][mode_lf_lut[mbmi->mode]];
715 }
716 #endif
717 
av1_loop_filter_init(AV1_COMMON * cm)718 void av1_loop_filter_init(AV1_COMMON *cm) {
719   assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
720   loop_filter_info_n *lfi = &cm->lf_info;
721   struct loopfilter *lf = &cm->lf;
722   int lvl;
723 
724   // init limits for given sharpness
725   update_sharpness(lfi, lf->sharpness_level);
726   lf->last_sharpness_level = lf->sharpness_level;
727 
728   // init hev threshold const vectors
729   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
730     memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
731 }
732 
733 #if CONFIG_LPF_SB
av1_loop_filter_sb_level_init(AV1_COMMON * cm,int mi_row,int mi_col,int lvl)734 void av1_loop_filter_sb_level_init(AV1_COMMON *cm, int mi_row, int mi_col,
735                                    int lvl) {
736   const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
737   const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
738   const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
739   const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
740   const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
741   const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
742 
743   int row, col;
744   for (row = mi_row_start; row < mi_row_end; ++row) {
745     for (col = mi_col_start; col < mi_col_end; ++col) {
746       // Note: can't use cm->mi_grid_visible. Because for each partition,
747       // all visible pointers will point to the first of the partition.
748       cm->mi[row * cm->mi_stride + col].mbmi.filt_lvl = lvl;
749     }
750   }
751 }
752 #endif  // CONFIG_LPF_SB
753 
av1_loop_filter_frame_init(AV1_COMMON * cm,int default_filt_lvl,int default_filt_lvl_r,int plane)754 void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl,
755                                 int default_filt_lvl_r
756 #if CONFIG_LOOPFILTER_LEVEL
757                                 ,
758                                 int plane
759 #endif
760                                 ) {
761   int seg_id;
762   // n_shift is the multiplier for lf_deltas
763   // the multiplier is 1 for when filter_lvl is between 0 and 31;
764   // 2 when filter_lvl is between 32 and 63
765   int scale = 1 << (default_filt_lvl >> 5);
766   loop_filter_info_n *const lfi = &cm->lf_info;
767   struct loopfilter *const lf = &cm->lf;
768   const struct segmentation *const seg = &cm->seg;
769 
770   // update limits if sharpness has changed
771   if (lf->last_sharpness_level != lf->sharpness_level) {
772     update_sharpness(lfi, lf->sharpness_level);
773     lf->last_sharpness_level = lf->sharpness_level;
774   }
775 
776   for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
777     for (int dir = 0; dir < 2; ++dir) {
778       int lvl_seg = (dir == 0) ? default_filt_lvl : default_filt_lvl_r;
779 #if CONFIG_LOOPFILTER_LEVEL
780       assert(plane >= 0 && plane <= 2);
781       const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
782       if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
783         const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
784         lvl_seg = clamp(
785             seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
786             0, MAX_LOOP_FILTER);
787       }
788 #else
789       if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
790         const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
791         lvl_seg = clamp(
792             seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data,
793             0, MAX_LOOP_FILTER);
794       }
795 #endif  // CONFIG_LOOPFILTER_LEVEL
796 
797       if (!lf->mode_ref_delta_enabled) {
798 // we could get rid of this if we assume that deltas are set to
799 // zero when not in use; encoder always uses deltas
800 #if CONFIG_LOOPFILTER_LEVEL
801         memset(lfi->lvl[seg_id][dir], lvl_seg, sizeof(lfi->lvl[seg_id][dir]));
802 #else
803         memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
804 #endif  // CONFIG_LOOPFILTER_LEVEL
805       } else {
806         int ref, mode;
807 #if CONFIG_LOOPFILTER_LEVEL
808         scale = 1 << (lvl_seg >> 5);
809 
810         const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
811         lfi->lvl[seg_id][dir][INTRA_FRAME][0] =
812             clamp(intra_lvl, 0, MAX_LOOP_FILTER);
813 
814         for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
815           for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
816             const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
817                                   lf->mode_deltas[mode] * scale;
818             lfi->lvl[seg_id][dir][ref][mode] =
819                 clamp(inter_lvl, 0, MAX_LOOP_FILTER);
820           }
821         }
822 #else
823         (void)default_filt_lvl_r;
824         const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
825         lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
826 
827         for (ref = LAST_FRAME; ref < TOTAL_REFS_PER_FRAME; ++ref) {
828           for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
829             const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
830                                   lf->mode_deltas[mode] * scale;
831             lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
832           }
833         }
834 #endif
835       }
836     }
837   }
838 }
839 
filter_selectively_vert_row2(int subsampling_factor,uint8_t * s,int pitch,unsigned int mask_16x16_l,unsigned int mask_8x8_l,unsigned int mask_4x4_l,unsigned int mask_4x4_int_l,const loop_filter_info_n * lfi_n,const uint8_t * lfl)840 static void filter_selectively_vert_row2(int subsampling_factor, uint8_t *s,
841                                          int pitch, unsigned int mask_16x16_l,
842                                          unsigned int mask_8x8_l,
843                                          unsigned int mask_4x4_l,
844                                          unsigned int mask_4x4_int_l,
845                                          const loop_filter_info_n *lfi_n,
846                                          const uint8_t *lfl) {
847   const int mask_shift = subsampling_factor ? 4 : 8;
848   const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
849   const int lfl_forward = subsampling_factor ? 4 : 8;
850 
851   unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
852   unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
853   unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
854   unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
855   unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
856   unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
857   unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
858   unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
859   unsigned int mask;
860 
861   for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
862               mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
863        mask; mask >>= 1) {
864     const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
865     const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
866 
867     if (mask & 1) {
868       if ((mask_16x16_0 | mask_16x16_1) & 1) {
869         if ((mask_16x16_0 & mask_16x16_1) & 1) {
870           aom_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
871                                    lfi0->hev_thr);
872         } else if (mask_16x16_0 & 1) {
873           aom_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
874         } else {
875           aom_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
876                               lfi1->hev_thr);
877         }
878       }
879 
880       if ((mask_8x8_0 | mask_8x8_1) & 1) {
881         if ((mask_8x8_0 & mask_8x8_1) & 1) {
882           aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
883                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,
884                                   lfi1->hev_thr);
885         } else if (mask_8x8_0 & 1) {
886           aom_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
887         } else {
888           aom_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
889                              lfi1->hev_thr);
890         }
891       }
892 
893       if ((mask_4x4_0 | mask_4x4_1) & 1) {
894         if ((mask_4x4_0 & mask_4x4_1) & 1) {
895           aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
896                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,
897                                   lfi1->hev_thr);
898         } else if (mask_4x4_0 & 1) {
899           aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
900         } else {
901           aom_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
902                              lfi1->hev_thr);
903         }
904       }
905 
906       if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
907         if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
908           aom_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
909                                   lfi0->hev_thr, lfi1->mblim, lfi1->lim,
910                                   lfi1->hev_thr);
911         } else if (mask_4x4_int_0 & 1) {
912           aom_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
913                              lfi0->hev_thr);
914         } else {
915           aom_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
916                              lfi1->hev_thr);
917         }
918       }
919     }
920 
921     s += 8;
922     lfl += 1;
923     mask_16x16_0 >>= 1;
924     mask_8x8_0 >>= 1;
925     mask_4x4_0 >>= 1;
926     mask_4x4_int_0 >>= 1;
927     mask_16x16_1 >>= 1;
928     mask_8x8_1 >>= 1;
929     mask_4x4_1 >>= 1;
930     mask_4x4_int_1 >>= 1;
931   }
932 }
933 
934 #if CONFIG_HIGHBITDEPTH
highbd_filter_selectively_vert_row2(int subsampling_factor,uint16_t * s,int pitch,unsigned int mask_16x16_l,unsigned int mask_8x8_l,unsigned int mask_4x4_l,unsigned int mask_4x4_int_l,const loop_filter_info_n * lfi_n,const uint8_t * lfl,int bd)935 static void highbd_filter_selectively_vert_row2(
936     int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16_l,
937     unsigned int mask_8x8_l, unsigned int mask_4x4_l,
938     unsigned int mask_4x4_int_l, const loop_filter_info_n *lfi_n,
939     const uint8_t *lfl, int bd) {
940   const int mask_shift = subsampling_factor ? 4 : 8;
941   const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
942   const int lfl_forward = subsampling_factor ? 4 : 8;
943 
944   unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
945   unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
946   unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
947   unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
948   unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
949   unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
950   unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
951   unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
952   unsigned int mask;
953 
954   for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
955               mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
956        mask; mask >>= 1) {
957     const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
958     const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
959 
960     if (mask & 1) {
961       if ((mask_16x16_0 | mask_16x16_1) & 1) {
962         if ((mask_16x16_0 & mask_16x16_1) & 1) {
963           aom_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
964                                           lfi0->hev_thr, bd);
965         } else if (mask_16x16_0 & 1) {
966           aom_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
967                                      lfi0->hev_thr, bd);
968         } else {
969           aom_highbd_lpf_vertical_16(s + 8 * pitch, pitch, lfi1->mblim,
970                                      lfi1->lim, lfi1->hev_thr, bd);
971         }
972       }
973 
974       if ((mask_8x8_0 | mask_8x8_1) & 1) {
975         if ((mask_8x8_0 & mask_8x8_1) & 1) {
976           aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
977                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,
978                                          lfi1->hev_thr, bd);
979         } else if (mask_8x8_0 & 1) {
980           aom_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
981                                     lfi0->hev_thr, bd);
982         } else {
983           aom_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
984                                     lfi1->lim, lfi1->hev_thr, bd);
985         }
986       }
987 
988       if ((mask_4x4_0 | mask_4x4_1) & 1) {
989         if ((mask_4x4_0 & mask_4x4_1) & 1) {
990           aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
991                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,
992                                          lfi1->hev_thr, bd);
993         } else if (mask_4x4_0 & 1) {
994           aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
995                                     lfi0->hev_thr, bd);
996         } else {
997           aom_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
998                                     lfi1->lim, lfi1->hev_thr, bd);
999         }
1000       }
1001 
1002       if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
1003         if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
1004           aom_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
1005                                          lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1006                                          lfi1->hev_thr, bd);
1007         } else if (mask_4x4_int_0 & 1) {
1008           aom_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
1009                                     lfi0->hev_thr, bd);
1010         } else {
1011           aom_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
1012                                     lfi1->lim, lfi1->hev_thr, bd);
1013         }
1014       }
1015     }
1016 
1017     s += 8;
1018     lfl += 1;
1019     mask_16x16_0 >>= 1;
1020     mask_8x8_0 >>= 1;
1021     mask_4x4_0 >>= 1;
1022     mask_4x4_int_0 >>= 1;
1023     mask_16x16_1 >>= 1;
1024     mask_8x8_1 >>= 1;
1025     mask_4x4_1 >>= 1;
1026     mask_4x4_int_1 >>= 1;
1027   }
1028 }
1029 #endif  // CONFIG_HIGHBITDEPTH
1030 
filter_selectively_horiz(uint8_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,uint8_t * const src,int mi_row,int mi_col,int idx_r,int col_step,int width,int height,int ss_x,int ss_y)1031 static void filter_selectively_horiz(
1032     uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
1033     unsigned int mask_4x4, unsigned int mask_4x4_int,
1034     const loop_filter_info_n *lfi_n, const uint8_t *lfl
1035 #if CONFIG_LPF_DIRECT
1036     ,
1037     uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step,
1038     int width, int height, int ss_x, int ss_y
1039 #endif
1040     ) {
1041   unsigned int mask;
1042   int count;
1043 #if CONFIG_LPF_DIRECT
1044   // scale for u, v plane
1045   width >>= ss_x;
1046   height >>= ss_y;
1047   int idx_c = 0;
1048 #endif
1049 
1050   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
1051        mask >>= count) {
1052     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1053 
1054     count = 1;
1055     if (mask & 1) {
1056 #if CONFIG_LPF_DIRECT
1057       int i;
1058       const int line_length = 16;
1059       const int pivot = 8;
1060       const int above_filt_len = mask_16x16 & 1 ? 8 : 4;
1061       const int bot_filt_len = mask_16x16 & 1 ? 8 : 4;
1062       uint8_t block[256];  // line_length * size_of(BLOCK_8X8) * two_blocks
1063       int orig_pos[256];
1064       int direct;
1065 
1066       assert(above_filt_len == bot_filt_len);
1067       (void)bot_filt_len;
1068       for (i = 0; i < 256; ++i) {
1069         block[i] = 0;
1070         orig_pos[i] = -1;
1071       }
1072 
1073       // actual position for current pixel
1074       const int row = (mi_row + idx_r) * MI_SIZE >> ss_y;
1075       const int col = (mi_col + idx_c) * MI_SIZE >> ss_x;
1076 
1077       // Next block's thresholds.
1078       const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1079 
1080       if (mask_16x16 & 1) {
1081         if ((mask_16x16 & 3) == 3) {
1082           // Could use asymmetric length in the future
1083           direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1084                                         height, pitch, 2, 1);
1085 
1086           pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1087                                  width, height, pitch, pivot, line_length, 2,
1088                                  direct);
1089 
1090           aom_lpf_horizontal_edge_16(block + pivot * line_length, line_length,
1091                                      lfi->mblim, lfi->lim, lfi->hev_thr);
1092           count = 2;
1093         } else {
1094           direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1095                                         height, pitch, 1, 1);
1096 
1097           pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1098                                  width, height, pitch, pivot, line_length, 1,
1099                                  direct);
1100 
1101           aom_lpf_horizontal_edge_8(block + pivot * line_length, line_length,
1102                                     lfi->mblim, lfi->lim, lfi->hev_thr);
1103         }
1104 
1105         for (i = 0; i < 256; ++i)
1106           if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1107       } else if (mask_8x8 & 1) {
1108         if ((mask_8x8 & 3) == 3) {
1109           count = 2;
1110           direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1111                                         height, pitch, 2, 1);
1112 
1113           pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1114                                  width, height, pitch, pivot, line_length, 2,
1115                                  direct);
1116 
1117           aom_lpf_horizontal_8_dual(block + pivot * line_length, line_length,
1118                                     lfi->mblim, lfi->lim, lfi->hev_thr,
1119                                     lfin->mblim, lfin->lim, lfin->hev_thr);
1120 
1121           for (i = 0; i < 256; ++i)
1122             if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1123 
1124           if ((mask_4x4_int & 3) == 3) {
1125             for (i = 0; i < 256; ++i) {
1126               block[i] = 0;
1127               orig_pos[i] = -1;
1128             }
1129 
1130             direct = pick_min_grad_direct(src, 4, row, col, width, height,
1131                                           pitch, 2, 1);
1132 
1133             pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1134                                    height, pitch, pivot, line_length, 2,
1135                                    direct);
1136 
1137             aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
1138                                       lfi->mblim, lfi->lim, lfi->hev_thr,
1139                                       lfin->mblim, lfin->lim, lfin->hev_thr);
1140 
1141             for (i = 0; i < 256; ++i)
1142               if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1143           } else {
1144             for (i = 0; i < 256; ++i) {
1145               block[i] = 0;
1146               orig_pos[i] = -1;
1147             }
1148 
1149             if (mask_4x4_int & 1) {
1150               direct = pick_min_grad_direct(src, 4, row, col, width, height,
1151                                             pitch, 1, 1);
1152 
1153               pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col,
1154                                      width, height, pitch, pivot, line_length,
1155                                      1, direct);
1156 
1157               aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1158                                    lfi->mblim, lfi->lim, lfi->hev_thr);
1159             } else if (mask_4x4_int & 2) {
1160               direct = pick_min_grad_direct(src, 4, row, col, width, height,
1161                                             pitch, 1, 1);
1162 
1163               pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8,
1164                                      width, height, pitch, pivot, line_length,
1165                                      1, direct);
1166 
1167               aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1168                                    lfin->mblim, lfin->lim, lfin->hev_thr);
1169             }
1170 
1171             for (i = 0; i < 256; ++i)
1172               if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1173           }
1174         } else {
1175           direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1176                                         height, pitch, 1, 1);
1177 
1178           pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1179                                  width, height, pitch, pivot, line_length, 1,
1180                                  direct);
1181 
1182           aom_lpf_horizontal_8(block + pivot * line_length, line_length,
1183                                lfi->mblim, lfi->lim, lfi->hev_thr);
1184 
1185           for (i = 0; i < 256; ++i)
1186             if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1187 
1188           if (mask_4x4_int & 1) {
1189             for (i = 0; i < 256; ++i) {
1190               block[i] = 0;
1191               orig_pos[i] = -1;
1192             }
1193             direct = pick_min_grad_direct(src, 4, row, col, width, height,
1194                                           pitch, 1, 1);
1195 
1196             pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1197                                    height, pitch, pivot, line_length, 1,
1198                                    direct);
1199 
1200             aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1201                                  lfi->mblim, lfi->lim, lfi->hev_thr);
1202 
1203             for (i = 0; i < 256; ++i)
1204               if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1205           }
1206         }
1207       } else if (mask_4x4 & 1) {
1208         if ((mask_4x4 & 3) == 3) {
1209           count = 2;
1210           direct = pick_min_grad_direct(src, 4, row, col, width, height, pitch,
1211                                         2, 1);
1212 
1213           pick_filter_block_horz(src, block, orig_pos, 4, row, col, width,
1214                                  height, pitch, pivot, line_length, 2, direct);
1215 
1216           aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
1217                                     lfi->mblim, lfi->lim, lfi->hev_thr,
1218                                     lfin->mblim, lfin->lim, lfin->hev_thr);
1219 
1220           for (i = 0; i < 256; ++i)
1221             if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1222 
1223           if ((mask_4x4_int & 3) == 3) {
1224             for (i = 0; i < 256; ++i) {
1225               block[i] = 0;
1226               orig_pos[i] = -1;
1227             }
1228 
1229             direct = pick_min_grad_direct(src, 4, row, col, width, height,
1230                                           pitch, 2, 1);
1231 
1232             pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1233                                    height, pitch, pivot, line_length, 2,
1234                                    direct);
1235 
1236             aom_lpf_horizontal_4_dual(block + pivot * line_length, line_length,
1237                                       lfi->mblim, lfi->lim, lfi->hev_thr,
1238                                       lfin->mblim, lfin->lim, lfin->hev_thr);
1239 
1240             for (i = 0; i < 256; ++i)
1241               if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1242           } else {
1243             for (i = 0; i < 256; ++i) {
1244               block[i] = 0;
1245               orig_pos[i] = -1;
1246             }
1247 
1248             if (mask_4x4_int & 1) {
1249               direct = pick_min_grad_direct(src, 4, row, col, width, height,
1250                                             pitch, 1, 1);
1251 
1252               pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col,
1253                                      width, height, pitch, pivot, line_length,
1254                                      1, direct);
1255 
1256               aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1257                                    lfi->mblim, lfi->lim, lfi->hev_thr);
1258             } else if (mask_4x4_int & 2) {
1259               direct = pick_min_grad_direct(src, 4, row, col, width, height,
1260                                             pitch, 1, 1);
1261 
1262               pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col + 8,
1263                                      width, height, pitch, pivot, line_length,
1264                                      1, direct);
1265 
1266               aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1267                                    lfin->mblim, lfin->lim, lfin->hev_thr);
1268             }
1269 
1270             for (i = 0; i < 256; ++i)
1271               if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1272           }
1273         } else {
1274           direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1275                                         height, pitch, 1, 1);
1276 
1277           pick_filter_block_horz(src, block, orig_pos, above_filt_len, row, col,
1278                                  width, height, pitch, pivot, line_length, 1,
1279                                  direct);
1280 
1281           aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1282                                lfi->mblim, lfi->lim, lfi->hev_thr);
1283 
1284           for (i = 0; i < 256; ++i)
1285             if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1286 
1287           if (mask_4x4_int & 1) {
1288             for (i = 0; i < 256; ++i) {
1289               block[i] = 0;
1290               orig_pos[i] = -1;
1291             }
1292             direct = pick_min_grad_direct(src, above_filt_len, row, col, width,
1293                                           height, pitch, 1, 1);
1294 
1295             pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1296                                    height, pitch, pivot, line_length, 1,
1297                                    direct);
1298 
1299             aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1300                                  lfi->mblim, lfi->lim, lfi->hev_thr);
1301 
1302             for (i = 0; i < 256; ++i)
1303               if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1304           }
1305         }
1306       } else if (mask_4x4_int & 1) {
1307         direct =
1308             pick_min_grad_direct(src, 4, row, col, width, height, pitch, 1, 1);
1309 
1310         pick_filter_block_horz(src, block, orig_pos, 4, row + 4, col, width,
1311                                height, pitch, pivot, line_length, 1, direct);
1312 
1313         aom_lpf_horizontal_4(block + pivot * line_length, line_length,
1314                              lfi->mblim, lfi->lim, lfi->hev_thr);
1315 
1316         for (i = 0; i < 256; ++i)
1317           if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
1318       }
1319 #else   // CONFIG_LPF_DIRECT
1320       if (mask_16x16 & 1) {
1321         if ((mask_16x16 & 3) == 3) {
1322           aom_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
1323                                      lfi->hev_thr);
1324           count = 2;
1325         } else {
1326           aom_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
1327                                     lfi->hev_thr);
1328         }
1329       } else if (mask_8x8 & 1) {
1330         if ((mask_8x8 & 3) == 3) {
1331           // Next block's thresholds.
1332           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1333 
1334           aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1335                                     lfi->hev_thr, lfin->mblim, lfin->lim,
1336                                     lfin->hev_thr);
1337 
1338           if ((mask_4x4_int & 3) == 3) {
1339             aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
1340                                       lfi->lim, lfi->hev_thr, lfin->mblim,
1341                                       lfin->lim, lfin->hev_thr);
1342           } else {
1343             if (mask_4x4_int & 1)
1344               aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1345                                    lfi->hev_thr);
1346             else if (mask_4x4_int & 2)
1347               aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1348                                    lfin->lim, lfin->hev_thr);
1349           }
1350           count = 2;
1351         } else {
1352           aom_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1353 
1354           if (mask_4x4_int & 1)
1355             aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1356                                  lfi->hev_thr);
1357         }
1358       } else if (mask_4x4 & 1) {
1359         if ((mask_4x4 & 3) == 3) {
1360           // Next block's thresholds.
1361           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1362 
1363           aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1364                                     lfi->hev_thr, lfin->mblim, lfin->lim,
1365                                     lfin->hev_thr);
1366 
1367           if ((mask_4x4_int & 3) == 3) {
1368             aom_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
1369                                       lfi->lim, lfi->hev_thr, lfin->mblim,
1370                                       lfin->lim, lfin->hev_thr);
1371           } else {
1372             if (mask_4x4_int & 1)
1373               aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1374                                    lfi->hev_thr);
1375             else if (mask_4x4_int & 2)
1376               aom_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1377                                    lfin->lim, lfin->hev_thr);
1378           }
1379           count = 2;
1380         } else {
1381           aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1382 
1383           if (mask_4x4_int & 1)
1384             aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1385                                  lfi->hev_thr);
1386         }
1387       } else if (mask_4x4_int & 1) {
1388         aom_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1389                              lfi->hev_thr);
1390       }
1391 #endif  // CONFIG_LPF_DIRECT
1392     }
1393 #if CONFIG_LPF_DIRECT
1394     idx_c += col_step * count;
1395 #endif
1396     s += 8 * count;
1397     lfl += count;
1398     mask_16x16 >>= count;
1399     mask_8x8 >>= count;
1400     mask_4x4 >>= count;
1401     mask_4x4_int >>= count;
1402   }
1403 }
1404 
1405 #if CONFIG_HIGHBITDEPTH
highbd_filter_selectively_horiz(uint16_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,int bd)1406 static void highbd_filter_selectively_horiz(
1407     uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
1408     unsigned int mask_4x4, unsigned int mask_4x4_int,
1409     const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
1410   unsigned int mask;
1411   int count;
1412 
1413   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
1414        mask >>= count) {
1415     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1416 
1417     count = 1;
1418     if (mask & 1) {
1419       if (mask_16x16 & 1) {
1420         if ((mask_16x16 & 3) == 3) {
1421           aom_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
1422                                             lfi->hev_thr, bd);
1423           count = 2;
1424         } else {
1425           aom_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
1426                                            lfi->hev_thr, bd);
1427         }
1428       } else if (mask_8x8 & 1) {
1429         if ((mask_8x8 & 3) == 3) {
1430           // Next block's thresholds.
1431           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1432 
1433           aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1434                                            lfi->hev_thr, lfin->mblim, lfin->lim,
1435                                            lfin->hev_thr, bd);
1436 
1437           if ((mask_4x4_int & 3) == 3) {
1438             aom_highbd_lpf_horizontal_4_dual(
1439                 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1440                 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
1441           } else {
1442             if (mask_4x4_int & 1) {
1443               aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1444                                           lfi->lim, lfi->hev_thr, bd);
1445             } else if (mask_4x4_int & 2) {
1446               aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1447                                           lfin->lim, lfin->hev_thr, bd);
1448             }
1449           }
1450           count = 2;
1451         } else {
1452           aom_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
1453                                       lfi->hev_thr, bd);
1454 
1455           if (mask_4x4_int & 1) {
1456             aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1457                                         lfi->lim, lfi->hev_thr, bd);
1458           }
1459         }
1460       } else if (mask_4x4 & 1) {
1461         if ((mask_4x4 & 3) == 3) {
1462           // Next block's thresholds.
1463           const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
1464 
1465           aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1466                                            lfi->hev_thr, lfin->mblim, lfin->lim,
1467                                            lfin->hev_thr, bd);
1468           if ((mask_4x4_int & 3) == 3) {
1469             aom_highbd_lpf_horizontal_4_dual(
1470                 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1471                 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
1472           } else {
1473             if (mask_4x4_int & 1) {
1474               aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1475                                           lfi->lim, lfi->hev_thr, bd);
1476             } else if (mask_4x4_int & 2) {
1477               aom_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
1478                                           lfin->lim, lfin->hev_thr, bd);
1479             }
1480           }
1481           count = 2;
1482         } else {
1483           aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
1484                                       lfi->hev_thr, bd);
1485 
1486           if (mask_4x4_int & 1) {
1487             aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
1488                                         lfi->lim, lfi->hev_thr, bd);
1489           }
1490         }
1491       } else if (mask_4x4_int & 1) {
1492         aom_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
1493                                     lfi->hev_thr, bd);
1494       }
1495     }
1496     s += 8 * count;
1497     lfl += count;
1498     mask_16x16 >>= count;
1499     mask_8x8 >>= count;
1500     mask_4x4 >>= count;
1501     mask_4x4_int >>= count;
1502   }
1503 }
1504 #endif  // CONFIG_HIGHBITDEPTH
1505 
1506 // This function ors into the current lfm structure, where to do loop
1507 // filters for the specific mi we are looking at. It uses information
1508 // including the block_size_type (32x16, 32x32, etc.), the transform size,
1509 // whether there were any coefficients encoded, and the loop filter strength
1510 // block we are currently looking at. Shift is used to position the
1511 // 1's we produce.
1512 // TODO(JBB) Need another function for different resolution color..
build_masks(AV1_COMMON * const cm,const loop_filter_info_n * const lfi_n,const MODE_INFO * mi,const int shift_y,const int shift_uv,LOOP_FILTER_MASK * lfm)1513 static void build_masks(AV1_COMMON *const cm,
1514                         const loop_filter_info_n *const lfi_n,
1515                         const MODE_INFO *mi, const int shift_y,
1516                         const int shift_uv, LOOP_FILTER_MASK *lfm) {
1517   const MB_MODE_INFO *mbmi = &mi->mbmi;
1518   const BLOCK_SIZE block_size = mbmi->sb_type;
1519   // TODO(debargha): Check if masks can be setup correctly when
1520   // rectangular transfroms are used with the EXT_TX expt.
1521   const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
1522   const TX_SIZE tx_size_y_left = txsize_horz_map[mbmi->tx_size];
1523   const TX_SIZE tx_size_y_above = txsize_vert_map[mbmi->tx_size];
1524   const TX_SIZE tx_size_uv =
1525       txsize_sqr_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
1526   const TX_SIZE tx_size_uv_left =
1527       txsize_horz_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
1528   const TX_SIZE tx_size_uv_above =
1529       txsize_vert_map[uv_txsize_lookup[block_size][mbmi->tx_size][1][1]];
1530 #if CONFIG_EXT_DELTA_Q
1531 #if CONFIG_LOOPFILTER_LEVEL
1532   const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1533 #else
1534 #if CONFIG_LPF_SB
1535   const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1536 #else
1537   const int filter_level = get_filter_level(cm, lfi_n, mbmi);
1538 #endif  // CONFIG_LPF_SB
1539 #endif
1540 #else
1541   const int filter_level = get_filter_level(lfi_n, mbmi);
1542   (void)cm;
1543 #endif
1544   uint64_t *const left_y = &lfm->left_y[tx_size_y_left];
1545   uint64_t *const above_y = &lfm->above_y[tx_size_y_above];
1546   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
1547   uint16_t *const left_uv = &lfm->left_uv[tx_size_uv_left];
1548   uint16_t *const above_uv = &lfm->above_uv[tx_size_uv_above];
1549   uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv;
1550   int i;
1551 
1552   // If filter level is 0 we don't loop filter.
1553   if (!filter_level) {
1554     return;
1555   } else {
1556     const int w = num_8x8_blocks_wide_lookup[block_size];
1557     const int h = num_8x8_blocks_high_lookup[block_size];
1558     const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
1559     const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
1560 
1561     for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
1562   }
1563 
1564   // These set 1 in the current block size for the block size edges.
1565   // For instance if the block size is 32x16, we'll set:
1566   //    above =   1111
1567   //              0000
1568   //    and
1569   //    left  =   1000
1570   //          =   1000
1571   // NOTE : In this example the low bit is left most ( 1000 ) is stored as
1572   //        1,  not 8...
1573   //
1574   // U and V set things on a 16 bit scale.
1575   //
1576   *above_y |= above_prediction_mask[block_size] << shift_y;
1577   *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
1578   *left_y |= left_prediction_mask[block_size] << shift_y;
1579   *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
1580 
1581   // If the block has no coefficients and is not intra we skip applying
1582   // the loop filter on block edges.
1583   if (mbmi->skip && is_inter_block(mbmi)) return;
1584 
1585   // Here we are adding a mask for the transform size. The transform
1586   // size mask is set to be correct for a 64x64 prediction block size. We
1587   // mask to match the size of the block we are working on and then shift it
1588   // into place..
1589   *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y_above])
1590               << shift_y;
1591   *above_uv |=
1592       (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv_above])
1593       << shift_uv;
1594 
1595   *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y_left])
1596              << shift_y;
1597   *left_uv |=
1598       (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv_left])
1599       << shift_uv;
1600 
1601   // Here we are trying to determine what to do with the internal 4x4 block
1602   // boundaries.  These differ from the 4x4 boundaries on the outside edge of
1603   // an 8x8 in that the internal ones can be skipped and don't depend on
1604   // the prediction block size.
1605   if (tx_size_y == TX_4X4)
1606     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
1607 
1608   if (tx_size_uv == TX_4X4)
1609     *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
1610 }
1611 
1612 // This function does the same thing as the one above with the exception that
1613 // it only affects the y masks. It exists because for blocks < 16x16 in size,
1614 // we only update u and v masks on the first block.
build_y_mask(AV1_COMMON * const cm,const loop_filter_info_n * const lfi_n,const MODE_INFO * mi,const int shift_y,int supertx_enabled,LOOP_FILTER_MASK * lfm)1615 static void build_y_mask(AV1_COMMON *const cm,
1616                          const loop_filter_info_n *const lfi_n,
1617                          const MODE_INFO *mi, const int shift_y,
1618 #if CONFIG_SUPERTX
1619                          int supertx_enabled,
1620 #endif  // CONFIG_SUPERTX
1621                          LOOP_FILTER_MASK *lfm) {
1622   const MB_MODE_INFO *mbmi = &mi->mbmi;
1623   const TX_SIZE tx_size_y = txsize_sqr_map[mbmi->tx_size];
1624   const TX_SIZE tx_size_y_left = txsize_horz_map[mbmi->tx_size];
1625   const TX_SIZE tx_size_y_above = txsize_vert_map[mbmi->tx_size];
1626 #if CONFIG_SUPERTX
1627   const BLOCK_SIZE block_size =
1628       supertx_enabled ? (BLOCK_SIZE)(3 * tx_size_y) : mbmi->sb_type;
1629 #else
1630   const BLOCK_SIZE block_size = mbmi->sb_type;
1631 #endif
1632 #if CONFIG_EXT_DELTA_Q
1633 #if CONFIG_LOOPFILTER_LEVEL
1634   const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1635 #else
1636 #if CONFIG_LPF_SB
1637   const int filter_level = get_filter_level(cm, lfi_n, 0, 0, mbmi);
1638 #else
1639   const int filter_level = get_filter_level(cm, lfi_n, mbmi);
1640 #endif  // CONFIG_LPF_SB
1641 #endif
1642 #else
1643   const int filter_level = get_filter_level(lfi_n, mbmi);
1644   (void)cm;
1645 #endif
1646   uint64_t *const left_y = &lfm->left_y[tx_size_y_left];
1647   uint64_t *const above_y = &lfm->above_y[tx_size_y_above];
1648   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
1649   int i;
1650 
1651   if (!filter_level) {
1652     return;
1653   } else {
1654     const int w = num_8x8_blocks_wide_lookup[block_size];
1655     const int h = num_8x8_blocks_high_lookup[block_size];
1656     const int row = (shift_y >> MAX_MIB_SIZE_LOG2);
1657     const int col = shift_y - (row << MAX_MIB_SIZE_LOG2);
1658 
1659     for (i = 0; i < h; i++) memset(&lfm->lfl_y[row + i][col], filter_level, w);
1660   }
1661 
1662   *above_y |= above_prediction_mask[block_size] << shift_y;
1663   *left_y |= left_prediction_mask[block_size] << shift_y;
1664 
1665   if (mbmi->skip && is_inter_block(mbmi)) return;
1666 
1667   *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y_above])
1668               << shift_y;
1669 
1670   *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y_left])
1671              << shift_y;
1672 
1673   if (tx_size_y == TX_4X4)
1674     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
1675 }
1676 
1677 #if CONFIG_LOOPFILTERING_ACROSS_TILES
1678 // This function update the bit masks for the entire 64x64 region represented
1679 // by mi_row, mi_col. In case one of the edge is a tile boundary, loop filtering
1680 // for that edge is disabled. This function only check the tile boundary info
1681 // for the top left corner mi to determine the boundary information for the
1682 // top and left edge of the whole super block
update_tile_boundary_filter_mask(AV1_COMMON * const cm,const int mi_row,const int mi_col,LOOP_FILTER_MASK * lfm)1683 static void update_tile_boundary_filter_mask(AV1_COMMON *const cm,
1684                                              const int mi_row, const int mi_col,
1685                                              LOOP_FILTER_MASK *lfm) {
1686   int i;
1687   MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride + mi_col;
1688 
1689   if (mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) {
1690     for (i = 0; i <= TX_32X32; i++) {
1691       lfm->left_y[i] &= 0xfefefefefefefefeULL;
1692       lfm->left_uv[i] &= 0xeeee;
1693     }
1694   }
1695 
1696   if (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY) {
1697     for (i = 0; i <= TX_32X32; i++) {
1698       lfm->above_y[i] &= 0xffffffffffffff00ULL;
1699       lfm->above_uv[i] &= 0xfff0;
1700     }
1701   }
1702 }
1703 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
1704 
1705 // This function sets up the bit masks for the entire 64x64 region represented
1706 // by mi_row, mi_col.
1707 // TODO(JBB): This function only works for yv12.
av1_setup_mask(AV1_COMMON * const cm,const int mi_row,const int mi_col,MODE_INFO ** mi,const int mode_info_stride,LOOP_FILTER_MASK * lfm)1708 void av1_setup_mask(AV1_COMMON *const cm, const int mi_row, const int mi_col,
1709                     MODE_INFO **mi, const int mode_info_stride,
1710                     LOOP_FILTER_MASK *lfm) {
1711 #if CONFIG_EXT_PARTITION
1712   assert(0 && "Not yet updated");
1713 #endif  // CONFIG_EXT_PARTITION
1714   int idx_32, idx_16, idx_8;
1715   const loop_filter_info_n *const lfi_n = &cm->lf_info;
1716   MODE_INFO **mip = mi;
1717   MODE_INFO **mip2 = mi;
1718 
1719   // These are offsets to the next mi in the 64x64 block. It is what gets
1720   // added to the mi ptr as we go through each loop. It helps us to avoid
1721   // setting up special row and column counters for each index. The last step
1722   // brings us out back to the starting position.
1723   const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
1724                             -(mode_info_stride << 2) - 4 };
1725   const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
1726                             -(mode_info_stride << 1) - 2 };
1727   const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
1728 
1729   // Following variables represent shifts to position the current block
1730   // mask over the appropriate block. A shift of 36 to the left will move
1731   // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
1732   // 4 rows to the appropriate spot.
1733   const int shift_32_y[] = { 0, 4, 32, 36 };
1734   const int shift_16_y[] = { 0, 2, 16, 18 };
1735   const int shift_8_y[] = { 0, 1, 8, 9 };
1736   const int shift_32_uv[] = { 0, 2, 8, 10 };
1737   const int shift_16_uv[] = { 0, 1, 4, 5 };
1738   int i;
1739   const int max_rows = AOMMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
1740   const int max_cols = AOMMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
1741 
1742   av1_zero(*lfm);
1743   assert(mip[0] != NULL);
1744 
1745   // TODO(jimbankoski): Try moving most of the following code into decode
1746   // loop and storing lfm in the mbmi structure so that we don't have to go
1747   // through the recursive loop structure multiple times.
1748   switch (mip[0]->mbmi.sb_type) {
1749     case BLOCK_64X64: build_masks(cm, lfi_n, mip[0], 0, 0, lfm); break;
1750     case BLOCK_64X32: build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
1751 #if CONFIG_SUPERTX && CONFIG_TX64X64
1752       if (supertx_enabled(&mip[0]->mbmi)) break;
1753 #endif  // CONFIG_SUPERTX && CONFIG_TX64X64
1754       mip2 = mip + mode_info_stride * 4;
1755       if (4 >= max_rows) break;
1756       build_masks(cm, lfi_n, mip2[0], 32, 8, lfm);
1757       break;
1758     case BLOCK_32X64: build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
1759 #if CONFIG_SUPERTX && CONFIG_TX64X64
1760       if (supertx_enabled(&mip[0]->mbmi)) break;
1761 #endif  // CONFIG_SUPERTX && CONFIG_TX64X64
1762       mip2 = mip + 4;
1763       if (4 >= max_cols) break;
1764       build_masks(cm, lfi_n, mip2[0], 4, 2, lfm);
1765       break;
1766     default:
1767 #if CONFIG_SUPERTX && CONFIG_TX64X64
1768       if (mip[0]->mbmi.tx_size == TX_64X64) {
1769         build_masks(cm, lfi_n, mip[0], 0, 0, lfm);
1770       } else {
1771 #endif  // CONFIG_SUPERTX && CONFIG_TX64X64
1772         for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
1773           const int shift_y_32 = shift_32_y[idx_32];
1774           const int shift_uv_32 = shift_32_uv[idx_32];
1775           const int mi_32_col_offset = ((idx_32 & 1) << 2);
1776           const int mi_32_row_offset = ((idx_32 >> 1) << 2);
1777           if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
1778             continue;
1779           switch (mip[0]->mbmi.sb_type) {
1780             case BLOCK_32X32:
1781               build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1782               break;
1783             case BLOCK_32X16:
1784               build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1785 #if CONFIG_SUPERTX
1786               if (supertx_enabled(&mip[0]->mbmi)) break;
1787 #endif
1788               if (mi_32_row_offset + 2 >= max_rows) continue;
1789               mip2 = mip + mode_info_stride * 2;
1790               build_masks(cm, lfi_n, mip2[0], shift_y_32 + 16, shift_uv_32 + 4,
1791                           lfm);
1792               break;
1793             case BLOCK_16X32:
1794               build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1795 #if CONFIG_SUPERTX
1796               if (supertx_enabled(&mip[0]->mbmi)) break;
1797 #endif
1798               if (mi_32_col_offset + 2 >= max_cols) continue;
1799               mip2 = mip + 2;
1800               build_masks(cm, lfi_n, mip2[0], shift_y_32 + 2, shift_uv_32 + 1,
1801                           lfm);
1802               break;
1803             default:
1804 #if CONFIG_SUPERTX
1805               if (mip[0]->mbmi.tx_size == TX_32X32) {
1806                 build_masks(cm, lfi_n, mip[0], shift_y_32, shift_uv_32, lfm);
1807                 break;
1808               }
1809 #endif
1810               for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
1811                 const int shift_y_32_16 = shift_y_32 + shift_16_y[idx_16];
1812                 const int shift_uv_32_16 = shift_uv_32 + shift_16_uv[idx_16];
1813                 const int mi_16_col_offset =
1814                     mi_32_col_offset + ((idx_16 & 1) << 1);
1815                 const int mi_16_row_offset =
1816                     mi_32_row_offset + ((idx_16 >> 1) << 1);
1817 
1818                 if (mi_16_col_offset >= max_cols ||
1819                     mi_16_row_offset >= max_rows)
1820                   continue;
1821 
1822                 switch (mip[0]->mbmi.sb_type) {
1823                   case BLOCK_16X16:
1824                     build_masks(cm, lfi_n, mip[0], shift_y_32_16,
1825                                 shift_uv_32_16, lfm);
1826                     break;
1827                   case BLOCK_16X8:
1828 #if CONFIG_SUPERTX
1829                     if (supertx_enabled(&mip[0]->mbmi)) break;
1830 #endif
1831                     build_masks(cm, lfi_n, mip[0], shift_y_32_16,
1832                                 shift_uv_32_16, lfm);
1833                     if (mi_16_row_offset + 1 >= max_rows) continue;
1834                     mip2 = mip + mode_info_stride;
1835                     build_y_mask(cm, lfi_n, mip2[0], shift_y_32_16 + 8,
1836 #if CONFIG_SUPERTX
1837                                  0,
1838 #endif
1839                                  lfm);
1840                     break;
1841                   case BLOCK_8X16:
1842 #if CONFIG_SUPERTX
1843                     if (supertx_enabled(&mip[0]->mbmi)) break;
1844 #endif
1845                     build_masks(cm, lfi_n, mip[0], shift_y_32_16,
1846                                 shift_uv_32_16, lfm);
1847                     if (mi_16_col_offset + 1 >= max_cols) continue;
1848                     mip2 = mip + 1;
1849                     build_y_mask(cm, lfi_n, mip2[0], shift_y_32_16 + 1,
1850 #if CONFIG_SUPERTX
1851                                  0,
1852 #endif
1853                                  lfm);
1854                     break;
1855                   default: {
1856                     const int shift_y_32_16_8_zero =
1857                         shift_y_32_16 + shift_8_y[0];
1858 #if CONFIG_SUPERTX
1859                     if (mip[0]->mbmi.tx_size == TX_16X16) {
1860                       build_masks(cm, lfi_n, mip[0], shift_y_32_16_8_zero,
1861                                   shift_uv_32_16, lfm);
1862                       break;
1863                     }
1864 #endif
1865                     build_masks(cm, lfi_n, mip[0], shift_y_32_16_8_zero,
1866                                 shift_uv_32_16, lfm);
1867                     mip += offset[0];
1868                     for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
1869                       const int shift_y_32_16_8 =
1870                           shift_y_32_16 + shift_8_y[idx_8];
1871                       const int mi_8_col_offset =
1872                           mi_16_col_offset + ((idx_8 & 1));
1873                       const int mi_8_row_offset =
1874                           mi_16_row_offset + ((idx_8 >> 1));
1875 
1876                       if (mi_8_col_offset >= max_cols ||
1877                           mi_8_row_offset >= max_rows)
1878                         continue;
1879                       build_y_mask(cm, lfi_n, mip[0], shift_y_32_16_8,
1880 #if CONFIG_SUPERTX
1881                                    supertx_enabled(&mip[0]->mbmi),
1882 #endif
1883                                    lfm);
1884                     }
1885                     break;
1886                   }
1887                 }
1888               }
1889               break;
1890           }
1891         }
1892 #if CONFIG_SUPERTX && CONFIG_TX64X64
1893       }
1894 #endif  // CONFIG_SUPERTX && CONFIG_TX64X64
1895       break;
1896   }
1897   // The largest loopfilter we have is 16x16 so we use the 16x16 mask
1898   // for 32x32 transforms also.
1899   lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
1900   lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
1901   lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
1902   lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
1903 
1904   // We do at least 8 tap filter on every 32x32 even if the transform size
1905   // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
1906   // remove it from the 4x4.
1907   lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
1908   lfm->left_y[TX_4X4] &= ~left_border;
1909   lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
1910   lfm->above_y[TX_4X4] &= ~above_border;
1911   lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
1912   lfm->left_uv[TX_4X4] &= ~left_border_uv;
1913   lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
1914   lfm->above_uv[TX_4X4] &= ~above_border_uv;
1915 
1916   // We do some special edge handling.
1917   if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
1918     const uint64_t rows = cm->mi_rows - mi_row;
1919 
1920     // Each pixel inside the border gets a 1,
1921     const uint64_t mask_y = (((uint64_t)1 << (rows << MAX_MIB_SIZE_LOG2)) - 1);
1922     const uint16_t mask_uv =
1923         (((uint16_t)1 << (((rows + 1) >> 1) << (MAX_MIB_SIZE_LOG2 - 1))) - 1);
1924 
1925     // Remove values completely outside our border.
1926     for (i = 0; i < TX_32X32; i++) {
1927       lfm->left_y[i] &= mask_y;
1928       lfm->above_y[i] &= mask_y;
1929       lfm->left_uv[i] &= mask_uv;
1930       lfm->above_uv[i] &= mask_uv;
1931     }
1932     lfm->int_4x4_y &= mask_y;
1933     lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv;
1934 
1935     // We don't apply a wide loop filter on the last uv block row. If set
1936     // apply the shorter one instead.
1937     if (rows == 1) {
1938       lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
1939       lfm->above_uv[TX_16X16] = 0;
1940     }
1941     if (rows == 5) {
1942       lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
1943       lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
1944     }
1945   } else {
1946     lfm->above_int_4x4_uv = lfm->left_int_4x4_uv;
1947   }
1948 
1949   if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
1950     const uint64_t columns = cm->mi_cols - mi_col;
1951 
1952     // Each pixel inside the border gets a 1, the multiply copies the border
1953     // to where we need it.
1954     const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
1955     const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
1956 
1957     // Internal edges are not applied on the last column of the image so
1958     // we mask 1 more for the internal edges
1959     const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
1960 
1961     // Remove the bits outside the image edge.
1962     for (i = 0; i < TX_32X32; i++) {
1963       lfm->left_y[i] &= mask_y;
1964       lfm->above_y[i] &= mask_y;
1965       lfm->left_uv[i] &= mask_uv;
1966       lfm->above_uv[i] &= mask_uv;
1967     }
1968     lfm->int_4x4_y &= mask_y;
1969     lfm->left_int_4x4_uv &= mask_uv_int;
1970 
1971     // We don't apply a wide loop filter on the last uv column. If set
1972     // apply the shorter one instead.
1973     if (columns == 1) {
1974       lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
1975       lfm->left_uv[TX_16X16] = 0;
1976     }
1977     if (columns == 5) {
1978       lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
1979       lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
1980     }
1981   }
1982   // We don't apply a loop filter on the first column in the image, mask that
1983   // out.
1984   if (mi_col == 0) {
1985     for (i = 0; i < TX_32X32; i++) {
1986       lfm->left_y[i] &= 0xfefefefefefefefeULL;
1987       lfm->left_uv[i] &= 0xeeee;
1988     }
1989   }
1990 
1991 #if CONFIG_LOOPFILTERING_ACROSS_TILES
1992   if (av1_disable_loopfilter_on_tile_boundary(cm)) {
1993     update_tile_boundary_filter_mask(cm, mi_row, mi_col, lfm);
1994   }
1995 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
1996 
1997   // Assert if we try to apply 2 different loop filters at the same position.
1998   assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
1999   assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
2000   assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
2001   assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
2002   assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
2003   assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
2004   assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
2005   assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16]));
2006   assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
2007   assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
2008   assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
2009   assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
2010   assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
2011   assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
2012   assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
2013   assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16]));
2014 }
2015 
filter_selectively_vert(uint8_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,uint8_t * const src,int mi_row,int mi_col,int idx_r,int col_step,int width,int height,int ss_x,int ss_y)2016 static void filter_selectively_vert(
2017     uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
2018     unsigned int mask_4x4, unsigned int mask_4x4_int,
2019     const loop_filter_info_n *lfi_n, const uint8_t *lfl
2020 #if CONFIG_LPF_DIRECT
2021     ,
2022     uint8_t *const src, int mi_row, int mi_col, int idx_r, int col_step,
2023     int width, int height, int ss_x, int ss_y
2024 #endif
2025     ) {
2026   unsigned int mask;
2027 #if CONFIG_LPF_DIRECT
2028   // scale for u, v plane
2029   width >>= ss_x;
2030   height >>= ss_y;
2031   int idx_c = 0;
2032 #endif
2033 
2034   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
2035        mask >>= 1) {
2036     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
2037 
2038 #if CONFIG_LPF_DIRECT
2039     int i;
2040     const int pivot = 8;
2041     const int left_filt_len = mask_16x16 & 1 ? 8 : 4;
2042     const int right_filt_len = mask_16x16 & 1 ? 8 : 4;
2043     const int line_length = 16;
2044     uint8_t block[128];
2045     int orig_pos[128];
2046 
2047     // actual position for current pixel
2048     const int row = (mi_row + idx_r) * MI_SIZE >> ss_y;
2049     const int col = (mi_col + idx_c) * MI_SIZE >> ss_x;
2050 
2051     // Could use asymmetric length in the future
2052     assert(left_filt_len == right_filt_len);
2053     (void)right_filt_len;
2054 
2055     if ((mask_16x16 & 1) || (mask_8x8 & 1) || (mask_4x4 & 1)) {
2056       for (i = 0; i < 128; ++i) {
2057         block[i] = 0;
2058         orig_pos[i] = -1;
2059       }
2060 
2061       const int direct = pick_min_grad_direct(src, left_filt_len, row, col,
2062                                               width, height, pitch, 1, 0);
2063 
2064       pick_filter_block_vert(src, block, orig_pos, left_filt_len, row, col,
2065                              width, height, pitch, pivot, line_length, 1,
2066                              direct);
2067 
2068       // apply filtering
2069       if (mask_16x16 & 1) {
2070         aom_lpf_vertical_16(block + pivot, line_length, lfi->mblim, lfi->lim,
2071                             lfi->hev_thr);
2072       } else if (mask_8x8 & 1) {
2073         aom_lpf_vertical_8(block + pivot, line_length, lfi->mblim, lfi->lim,
2074                            lfi->hev_thr);
2075       } else if (mask_4x4 & 1) {
2076         aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim,
2077                            lfi->hev_thr);
2078       }
2079 
2080       for (i = 0; i < 128; ++i)
2081         if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
2082     }
2083 
2084     // filter inner 4x4
2085     if (mask_4x4_int & 1) {
2086       for (i = 0; i < 128; ++i) {
2087         block[i] = 0;
2088         orig_pos[i] = -1;
2089       }
2090 
2091       const int direct = pick_min_grad_direct(src, 4, row, col + 4, width,
2092                                               height, pitch, 1, 0);
2093 
2094       pick_filter_block_vert(src, block, orig_pos, 4, row, col + 4, width,
2095                              height, pitch, pivot, line_length, 1, direct);
2096 
2097       aom_lpf_vertical_4(block + pivot, line_length, lfi->mblim, lfi->lim,
2098                          lfi->hev_thr);
2099 
2100       for (i = 0; i < 128; ++i)
2101         if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
2102     }
2103 #else
2104     if (mask & 1) {
2105       if (mask_16x16 & 1) {
2106         aom_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2107       } else if (mask_8x8 & 1) {
2108         aom_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2109       } else if (mask_4x4 & 1) {
2110         aom_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2111       }
2112     }
2113     if (mask_4x4_int & 1)
2114       aom_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
2115 #endif  // CONFIG_LPF_DIRECT
2116 #if CONFIG_LPF_DIRECT
2117     idx_c += col_step;
2118 #endif
2119     s += 8;
2120     lfl += 1;
2121     mask_16x16 >>= 1;
2122     mask_8x8 >>= 1;
2123     mask_4x4 >>= 1;
2124     mask_4x4_int >>= 1;
2125   }
2126 }
2127 
2128 #if CONFIG_HIGHBITDEPTH
highbd_filter_selectively_vert(uint16_t * s,int pitch,unsigned int mask_16x16,unsigned int mask_8x8,unsigned int mask_4x4,unsigned int mask_4x4_int,const loop_filter_info_n * lfi_n,const uint8_t * lfl,int bd)2129 static void highbd_filter_selectively_vert(
2130     uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
2131     unsigned int mask_4x4, unsigned int mask_4x4_int,
2132     const loop_filter_info_n *lfi_n, const uint8_t *lfl, int bd) {
2133   unsigned int mask;
2134 
2135   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
2136        mask >>= 1) {
2137     const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
2138 
2139     if (mask & 1) {
2140       if (mask_16x16 & 1) {
2141         aom_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
2142                                    bd);
2143       } else if (mask_8x8 & 1) {
2144         aom_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
2145                                   bd);
2146       } else if (mask_4x4 & 1) {
2147         aom_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
2148                                   bd);
2149       }
2150     }
2151     if (mask_4x4_int & 1)
2152       aom_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
2153                                 lfi->hev_thr, bd);
2154     s += 8;
2155     lfl += 1;
2156     mask_16x16 >>= 1;
2157     mask_8x8 >>= 1;
2158     mask_4x4 >>= 1;
2159     mask_4x4_int >>= 1;
2160   }
2161 }
2162 #endif  // CONFIG_HIGHBITDEPTH
2163 
2164 typedef struct {
2165   unsigned int m16x16;
2166   unsigned int m8x8;
2167   unsigned int m4x4;
2168 } FilterMasks;
2169 
2170 // Get filter level and masks for the given row index 'idx_r'. (Only used for
2171 // the non420 case).
2172 // Note: 'row_masks_ptr' and/or 'col_masks_ptr' can be passed NULL.
get_filter_level_and_masks_non420(AV1_COMMON * const cm,const struct macroblockd_plane * const plane,int pl,MODE_INFO ** mib,int mi_row,int mi_col,int idx_r,uint8_t * const lfl_r,unsigned int * const mask_4x4_int_r_ptr,unsigned int * const mask_4x4_int_c_ptr,FilterMasks * const row_masks_ptr,FilterMasks * const col_masks_ptr)2173 static void get_filter_level_and_masks_non420(
2174     AV1_COMMON *const cm, const struct macroblockd_plane *const plane, int pl,
2175     MODE_INFO **mib, int mi_row, int mi_col, int idx_r, uint8_t *const lfl_r,
2176     unsigned int *const mask_4x4_int_r_ptr,
2177     unsigned int *const mask_4x4_int_c_ptr, FilterMasks *const row_masks_ptr,
2178     FilterMasks *const col_masks_ptr) {
2179   const int ss_x = plane->subsampling_x;
2180   const int ss_y = plane->subsampling_y;
2181   const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
2182   FilterMasks row_masks, col_masks;
2183   memset(&row_masks, 0, sizeof(row_masks));
2184   memset(&col_masks, 0, sizeof(col_masks));
2185   unsigned int mask_4x4_int_r = 0, mask_4x4_int_c = 0;
2186   const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2187 
2188   // Determine the vertical edges that need filtering
2189   int idx_c;
2190   for (idx_c = 0; idx_c < cm->mib_size && mi_col + idx_c < cm->mi_cols;
2191        idx_c += col_step) {
2192     const MODE_INFO *mi = mib[idx_r * cm->mi_stride + idx_c];
2193     const MB_MODE_INFO *mbmi = &mi[0].mbmi;
2194     const BLOCK_SIZE sb_type = mbmi->sb_type;
2195     const int skip_this = mbmi->skip && is_inter_block(mbmi);
2196     // Map index to 8x8 unit
2197     const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
2198 
2199     const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
2200     const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
2201 
2202     // left edge of current unit is block/partition edge -> no skip
2203     const int block_edge_left =
2204         (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !blk_col : 1;
2205     const int skip_this_c = skip_this && !block_edge_left;
2206     // top edge of current unit is block/partition edge -> no skip
2207     const int block_edge_above =
2208         (num_4x4_blocks_high_lookup[sb_type] > 1) ? !blk_row : 1;
2209     const int skip_this_r = skip_this && !block_edge_above;
2210 
2211     TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
2212                           ? av1_get_uv_tx_size(mbmi, plane)
2213                           : mbmi->tx_size;
2214 
2215     const int skip_border_4x4_c =
2216         ss_x && mi_col + idx_c >= cm->mi_cols - mi_size_wide[BLOCK_8X8];
2217     const int skip_border_4x4_r =
2218         ss_y && mi_row + idx_r >= cm->mi_rows - mi_size_high[BLOCK_8X8];
2219 
2220     int tx_size_mask = 0;
2221     const int c_step = (c >> ss_x);
2222     const int r_step = (r >> ss_y);
2223     const int col_mask = 1 << c_step;
2224 
2225 #if CONFIG_VAR_TX
2226     if (is_inter_block(mbmi) && !mbmi->skip) {
2227       const int tx_row_idx =
2228           (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
2229       const int tx_col_idx =
2230           (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
2231 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2232       const BLOCK_SIZE bsize =
2233           AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, plane));
2234 #else
2235       const BLOCK_SIZE bsize = get_plane_block_size(mbmi->sb_type, plane);
2236 #endif
2237       const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
2238       tx_size = (plane->plane_type == PLANE_TYPE_UV)
2239                     ? uv_txsize_lookup[bsize][mb_tx_size][0][0]
2240                     : mb_tx_size;
2241     }
2242 #endif
2243 
2244 // Filter level can vary per MI
2245 #if CONFIG_EXT_DELTA_Q
2246 #if CONFIG_LOOPFILTER_LEVEL
2247     if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi)))
2248       continue;
2249 #else
2250 #if CONFIG_LPF_SB
2251     if (!(lfl_r[c_step] =
2252               get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi)))
2253       continue;
2254 #else
2255     if (!(lfl_r[c_step] = get_filter_level(cm, &cm->lf_info, mbmi))) continue;
2256 #endif  // CONFIG_LPF_SB
2257 #endif
2258 #else
2259     if (!(lfl_r[c_step] = get_filter_level(&cm->lf_info, mbmi))) continue;
2260 #endif
2261 
2262 #if CONFIG_VAR_TX
2263     TX_SIZE tx_size_horz_edge, tx_size_vert_edge;
2264 
2265     // filt_len_vert_edge is the length of deblocking filter for a vertical edge
2266     // The filter direction of a vertical edge is horizontal.
2267     // Thus, filt_len_vert_edge is determined as the minimum width of the two
2268     // transform block sizes on the left and right (current block) side of edge
2269     const int filt_len_vert_edge = AOMMIN(
2270         tx_size_wide[tx_size],
2271         tx_size_wide[cm->left_txfm_context[pl][((mi_row + idx_r) & MAX_MIB_MASK)
2272                                                << TX_UNIT_HIGH_LOG2]]);
2273 
2274     // filt_len_horz_edge is the len of deblocking filter for a horizontal edge
2275     // The filter direction of a horizontal edge is vertical.
2276     // Thus, filt_len_horz_edge is determined as the minimum height of the two
2277     // transform block sizes on the top and bottom (current block) side of edge
2278     const int filt_len_horz_edge =
2279         AOMMIN(tx_size_high[tx_size],
2280                tx_size_high[cm->top_txfm_context[pl][(mi_col + idx_c)
2281                                                      << TX_UNIT_WIDE_LOG2]]);
2282 
2283     // transform width/height of current block
2284     const int tx_wide_cur = tx_size_wide[tx_size];
2285     const int tx_high_cur = tx_size_high[tx_size];
2286 
2287     // tx_size_vert_edge is square transform size for a vertical deblocking edge
2288     // It determines the type of filter applied to the vertical edge
2289     // Similarly, tx_size_horz_edge is for a horizontal deblocking edge
2290     tx_size_vert_edge = get_sqr_tx_size(filt_len_vert_edge);
2291     tx_size_horz_edge = get_sqr_tx_size(filt_len_horz_edge);
2292 
2293     memset(cm->top_txfm_context[pl] + ((mi_col + idx_c) << TX_UNIT_WIDE_LOG2),
2294            tx_size, mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2);
2295     memset(cm->left_txfm_context[pl] +
2296                (((mi_row + idx_r) & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2),
2297            tx_size, mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2);
2298 #else
2299     // The length (or equally the square tx size) of deblocking filter is only
2300     // determined by
2301     // a) current block's width for a vertical deblocking edge
2302     // b) current block's height for a horizontal deblocking edge
2303     TX_SIZE tx_size_vert_edge = txsize_horz_map[tx_size];
2304     TX_SIZE tx_size_horz_edge = txsize_vert_map[tx_size];
2305     (void)pl;
2306 #endif  // CONFIG_VAR_TX
2307 
2308     if (tx_size_vert_edge == TX_32X32)
2309       tx_size_mask = 3;
2310     else if (tx_size_vert_edge == TX_16X16)
2311       tx_size_mask = 1;
2312     else
2313       tx_size_mask = 0;
2314 
2315     // Build masks based on the transform size of each block
2316     // handle vertical mask
2317     if (tx_size_vert_edge == TX_32X32) {
2318       if (!skip_this_c && (c_step & tx_size_mask) == 0) {
2319         if (!skip_border_4x4_c)
2320           col_masks.m16x16 |= col_mask;
2321         else
2322           col_masks.m8x8 |= col_mask;
2323       }
2324     } else if (tx_size_vert_edge == TX_16X16) {
2325       if (!skip_this_c && (c_step & tx_size_mask) == 0) {
2326         if (!skip_border_4x4_c)
2327           col_masks.m16x16 |= col_mask;
2328         else
2329           col_masks.m8x8 |= col_mask;
2330       }
2331     } else {
2332       // force 8x8 filtering on 32x32 boundaries
2333       if (!skip_this_c && (c_step & tx_size_mask) == 0) {
2334         if (tx_size_vert_edge == TX_8X8 || (c_step & 3) == 0)
2335           col_masks.m8x8 |= col_mask;
2336         else
2337           col_masks.m4x4 |= col_mask;
2338       }
2339 
2340 #if CONFIG_VAR_TX
2341       if (!skip_this && tx_wide_cur < 8 && !skip_border_4x4_c &&
2342           (c_step & tx_size_mask) == 0)
2343 #else
2344       if (!skip_this && tx_size_vert_edge < TX_8X8 && !skip_border_4x4_c &&
2345           (c_step & tx_size_mask) == 0)
2346 #endif  // CONFIG_VAR_TX
2347         mask_4x4_int_c |= col_mask;
2348     }
2349 
2350     if (tx_size_horz_edge == TX_32X32)
2351       tx_size_mask = 3;
2352     else if (tx_size_horz_edge == TX_16X16)
2353       tx_size_mask = 1;
2354     else
2355       tx_size_mask = 0;
2356 
2357     // set horizontal mask
2358     if (tx_size_horz_edge == TX_32X32) {
2359       if (!skip_this_r && (r_step & tx_size_mask) == 0) {
2360         if (!skip_border_4x4_r)
2361           row_masks.m16x16 |= col_mask;
2362         else
2363           row_masks.m8x8 |= col_mask;
2364       }
2365     } else if (tx_size_horz_edge == TX_16X16) {
2366       if (!skip_this_r && (r_step & tx_size_mask) == 0) {
2367         if (!skip_border_4x4_r)
2368           row_masks.m16x16 |= col_mask;
2369         else
2370           row_masks.m8x8 |= col_mask;
2371       }
2372     } else {
2373       // force 8x8 filtering on 32x32 boundaries
2374       if (!skip_this_r && (r_step & tx_size_mask) == 0) {
2375         if (tx_size_horz_edge == TX_8X8 || (r_step & 3) == 0)
2376           row_masks.m8x8 |= col_mask;
2377         else
2378           row_masks.m4x4 |= col_mask;
2379       }
2380 
2381 #if CONFIG_VAR_TX
2382       if (!skip_this && tx_high_cur < 8 && !skip_border_4x4_r &&
2383           (r_step & tx_size_mask) == 0)
2384 #else
2385       if (!skip_this && tx_size_horz_edge < TX_8X8 && !skip_border_4x4_r &&
2386           (r_step & tx_size_mask) == 0)
2387 #endif  // CONFIG_VAR_TX
2388         mask_4x4_int_r |= col_mask;
2389     }
2390   }
2391 
2392   if (row_masks_ptr) *row_masks_ptr = row_masks;
2393   if (col_masks_ptr) *col_masks_ptr = col_masks;
2394   if (mask_4x4_int_c_ptr) *mask_4x4_int_c_ptr = mask_4x4_int_c;
2395   if (mask_4x4_int_r_ptr) *mask_4x4_int_r_ptr = mask_4x4_int_r;
2396 }
2397 
av1_filter_block_plane_non420_ver(AV1_COMMON * const cm,struct macroblockd_plane * plane,MODE_INFO ** mib,int mi_row,int mi_col,int pl)2398 void av1_filter_block_plane_non420_ver(AV1_COMMON *const cm,
2399                                        struct macroblockd_plane *plane,
2400                                        MODE_INFO **mib, int mi_row, int mi_col,
2401                                        int pl) {
2402   const int ss_y = plane->subsampling_y;
2403   const int row_step = mi_size_high[BLOCK_8X8] << ss_y;
2404 #if CONFIG_LPF_DIRECT
2405   const int ss_x = plane->subsampling_x;
2406   const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
2407 #endif
2408   struct buf_2d *const dst = &plane->dst;
2409   uint8_t *const dst0 = dst->buf;
2410   uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
2411 
2412   int idx_r;
2413   for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows;
2414        idx_r += row_step) {
2415     unsigned int mask_4x4_int;
2416     FilterMasks col_masks;
2417     const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2418     get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r,
2419                                       &lfl[r][0], NULL, &mask_4x4_int, NULL,
2420                                       &col_masks);
2421 
2422     // Disable filtering on the leftmost column or tile boundary
2423     unsigned int border_mask = ~(mi_col == 0 ? 1 : 0);
2424 #if CONFIG_LOOPFILTERING_ACROSS_TILES
2425     MODE_INFO *const mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
2426     if (av1_disable_loopfilter_on_tile_boundary(cm) &&
2427         ((mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) != 0)) {
2428       border_mask = 0xfffffffe;
2429     }
2430 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
2431 
2432 #if CONFIG_HIGHBITDEPTH
2433     if (cm->use_highbitdepth)
2434       highbd_filter_selectively_vert(
2435           CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
2436           col_masks.m16x16 & border_mask, col_masks.m8x8 & border_mask,
2437           col_masks.m4x4 & border_mask, mask_4x4_int, &cm->lf_info, &lfl[r][0],
2438           (int)cm->bit_depth);
2439     else
2440 #endif  // CONFIG_HIGHBITDEPTH
2441       filter_selectively_vert(
2442           dst->buf, dst->stride, col_masks.m16x16 & border_mask,
2443           col_masks.m8x8 & border_mask, col_masks.m4x4 & border_mask,
2444           mask_4x4_int, &cm->lf_info, &lfl[r][0]
2445 #if CONFIG_LPF_DIRECT
2446           ,
2447           dst->buf0, mi_row, mi_col, idx_r, col_step, cm->width, cm->height,
2448           ss_x, ss_y
2449 #endif  // CONFIG_LPF_DIRECT
2450           );
2451     dst->buf += 8 * dst->stride;
2452   }
2453 
2454   // Now do horizontal pass
2455   dst->buf = dst0;
2456 }
2457 
av1_filter_block_plane_non420_hor(AV1_COMMON * const cm,struct macroblockd_plane * plane,MODE_INFO ** mib,int mi_row,int mi_col,int pl)2458 void av1_filter_block_plane_non420_hor(AV1_COMMON *const cm,
2459                                        struct macroblockd_plane *plane,
2460                                        MODE_INFO **mib, int mi_row, int mi_col,
2461                                        int pl) {
2462   const int ss_y = plane->subsampling_y;
2463   const int row_step = mi_size_high[BLOCK_8X8] << ss_y;
2464 #if CONFIG_LPF_DIRECT
2465   const int ss_x = plane->subsampling_x;
2466   const int col_step = mi_size_wide[BLOCK_8X8] << ss_x;
2467 #endif
2468   struct buf_2d *const dst = &plane->dst;
2469   uint8_t *const dst0 = dst->buf;
2470   uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
2471 
2472   int idx_r;
2473   for (idx_r = 0; idx_r < cm->mib_size && mi_row + idx_r < cm->mi_rows;
2474        idx_r += row_step) {
2475     unsigned int mask_4x4_int;
2476     FilterMasks row_masks;
2477     const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2478     get_filter_level_and_masks_non420(cm, plane, pl, mib, mi_row, mi_col, idx_r,
2479                                       &lfl[r][0], &mask_4x4_int, NULL,
2480                                       &row_masks, NULL);
2481 
2482 #if CONFIG_LOOPFILTERING_ACROSS_TILES
2483     // Disable filtering on the abovemost row or tile boundary
2484     const MODE_INFO *mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
2485     if ((av1_disable_loopfilter_on_tile_boundary(cm) &&
2486          (mi->mbmi.boundary_info & TILE_ABOVE_BOUNDARY)) ||
2487         (mi_row + idx_r == 0))
2488       memset(&row_masks, 0, sizeof(row_masks));
2489 #else
2490     if (mi_row + idx_r == 0) memset(&row_masks, 0, sizeof(row_masks));
2491 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
2492 
2493 #if CONFIG_HIGHBITDEPTH
2494     if (cm->use_highbitdepth)
2495       highbd_filter_selectively_horiz(
2496           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, row_masks.m16x16,
2497           row_masks.m8x8, row_masks.m4x4, mask_4x4_int, &cm->lf_info,
2498           &lfl[r][0], (int)cm->bit_depth);
2499     else
2500 #endif  // CONFIG_HIGHBITDEPTH
2501       filter_selectively_horiz(dst->buf, dst->stride, row_masks.m16x16,
2502                                row_masks.m8x8, row_masks.m4x4, mask_4x4_int,
2503                                &cm->lf_info, &lfl[r][0]
2504 #if CONFIG_LPF_DIRECT
2505                                ,
2506                                dst->buf0, mi_row, mi_col, idx_r, col_step,
2507                                cm->width, cm->height, ss_x, ss_y
2508 #endif  // CONFIG_LPF_DIRECT
2509                                );
2510     dst->buf += 8 * dst->stride;
2511   }
2512   dst->buf = dst0;
2513 }
2514 
av1_filter_block_plane_ss00_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2515 void av1_filter_block_plane_ss00_ver(AV1_COMMON *const cm,
2516                                      struct macroblockd_plane *const plane,
2517                                      int mi_row, LOOP_FILTER_MASK *lfm) {
2518   struct buf_2d *const dst = &plane->dst;
2519   uint8_t *const dst0 = dst->buf;
2520   int r;
2521   uint64_t mask_16x16 = lfm->left_y[TX_16X16];
2522   uint64_t mask_8x8 = lfm->left_y[TX_8X8];
2523   uint64_t mask_4x4 = lfm->left_y[TX_4X4];
2524   uint64_t mask_4x4_int = lfm->int_4x4_y;
2525 
2526   assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
2527 
2528   // Vertical pass: do 2 rows at one time
2529   for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
2530     unsigned int mask_16x16_l = mask_16x16 & 0xffff;
2531     unsigned int mask_8x8_l = mask_8x8 & 0xffff;
2532     unsigned int mask_4x4_l = mask_4x4 & 0xffff;
2533     unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
2534 
2535 // Disable filtering on the leftmost column.
2536 #if CONFIG_HIGHBITDEPTH
2537     if (cm->use_highbitdepth)
2538       highbd_filter_selectively_vert_row2(
2539           plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
2540           mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
2541           &lfm->lfl_y[r][0], (int)cm->bit_depth);
2542     else
2543 #endif  // CONFIG_HIGHBITDEPTH
2544       filter_selectively_vert_row2(
2545           plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
2546           mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r][0]);
2547 
2548     dst->buf += 2 * MI_SIZE * dst->stride;
2549     mask_16x16 >>= 2 * MI_SIZE;
2550     mask_8x8 >>= 2 * MI_SIZE;
2551     mask_4x4 >>= 2 * MI_SIZE;
2552     mask_4x4_int >>= 2 * MI_SIZE;
2553   }
2554 
2555   // Horizontal pass
2556   dst->buf = dst0;
2557 }
2558 
av1_filter_block_plane_ss00_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2559 void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm,
2560                                      struct macroblockd_plane *const plane,
2561                                      int mi_row, LOOP_FILTER_MASK *lfm) {
2562   struct buf_2d *const dst = &plane->dst;
2563   uint8_t *const dst0 = dst->buf;
2564   int r;
2565   uint64_t mask_16x16 = lfm->above_y[TX_16X16];
2566   uint64_t mask_8x8 = lfm->above_y[TX_8X8];
2567   uint64_t mask_4x4 = lfm->above_y[TX_4X4];
2568   uint64_t mask_4x4_int = lfm->int_4x4_y;
2569 
2570   assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
2571 
2572   for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
2573     unsigned int mask_16x16_r;
2574     unsigned int mask_8x8_r;
2575     unsigned int mask_4x4_r;
2576 
2577     if (mi_row + r == 0) {
2578       mask_16x16_r = 0;
2579       mask_8x8_r = 0;
2580       mask_4x4_r = 0;
2581     } else {
2582       mask_16x16_r = mask_16x16 & 0xff;
2583       mask_8x8_r = mask_8x8 & 0xff;
2584       mask_4x4_r = mask_4x4 & 0xff;
2585     }
2586 
2587 #if CONFIG_HIGHBITDEPTH
2588     if (cm->use_highbitdepth)
2589       highbd_filter_selectively_horiz(
2590           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
2591           mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r][0],
2592           (int)cm->bit_depth);
2593     else
2594 #endif  // CONFIG_HIGHBITDEPTH
2595 #if !CONFIG_LPF_DIRECT
2596       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
2597                                mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
2598                                &lfm->lfl_y[r][0]);
2599 #endif  // CONFIG_LPF_DIRECT
2600 
2601     dst->buf += MI_SIZE * dst->stride;
2602     mask_16x16 >>= MI_SIZE;
2603     mask_8x8 >>= MI_SIZE;
2604     mask_4x4 >>= MI_SIZE;
2605     mask_4x4_int >>= MI_SIZE;
2606   }
2607   // restore the buf pointer in case there is additional filter pass.
2608   dst->buf = dst0;
2609 }
2610 
av1_filter_block_plane_ss11_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2611 void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
2612                                      struct macroblockd_plane *const plane,
2613                                      int mi_row, LOOP_FILTER_MASK *lfm) {
2614   struct buf_2d *const dst = &plane->dst;
2615   uint8_t *const dst0 = dst->buf;
2616   int r, c;
2617 
2618   uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
2619   uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
2620   uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
2621   uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
2622 
2623   assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
2624   assert(plane->plane_type == PLANE_TYPE_UV);
2625   memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
2626 
2627   // Vertical pass: do 2 rows at one time
2628   for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
2629     for (c = 0; c < (cm->mib_size >> 1); c++) {
2630       lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
2631       lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
2632     }
2633 
2634     {
2635       unsigned int mask_16x16_l = mask_16x16 & 0xff;
2636       unsigned int mask_8x8_l = mask_8x8 & 0xff;
2637       unsigned int mask_4x4_l = mask_4x4 & 0xff;
2638       unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
2639 
2640 // Disable filtering on the leftmost column.
2641 #if CONFIG_HIGHBITDEPTH
2642       if (cm->use_highbitdepth)
2643         highbd_filter_selectively_vert_row2(
2644             plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
2645             mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
2646             &lfm->lfl_uv[r >> 1][0], (int)cm->bit_depth);
2647       else
2648 #endif  // CONFIG_HIGHBITDEPTH
2649         filter_selectively_vert_row2(plane->subsampling_x, dst->buf,
2650                                      dst->stride, mask_16x16_l, mask_8x8_l,
2651                                      mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
2652                                      &lfm->lfl_uv[r >> 1][0]);
2653 
2654       dst->buf += 2 * MI_SIZE * dst->stride;
2655       mask_16x16 >>= MI_SIZE;
2656       mask_8x8 >>= MI_SIZE;
2657       mask_4x4 >>= MI_SIZE;
2658       mask_4x4_int >>= MI_SIZE;
2659     }
2660   }
2661 
2662   // Horizontal pass
2663   dst->buf = dst0;
2664 }
2665 
av1_filter_block_plane_ss11_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane,int mi_row,LOOP_FILTER_MASK * lfm)2666 void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
2667                                      struct macroblockd_plane *const plane,
2668                                      int mi_row, LOOP_FILTER_MASK *lfm) {
2669   struct buf_2d *const dst = &plane->dst;
2670   uint8_t *const dst0 = dst->buf;
2671   int r, c;
2672   uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
2673   uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
2674   uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
2675   uint64_t mask_4x4_int = lfm->above_int_4x4_uv;
2676 
2677   assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
2678   memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
2679 
2680   // re-porpulate the filter level for uv, same as the code for vertical
2681   // filter in av1_filter_block_plane_ss11_ver
2682   for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
2683     for (c = 0; c < (cm->mib_size >> 1); c++) {
2684       lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
2685       lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
2686     }
2687   }
2688 
2689   for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
2690     const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
2691     const unsigned int mask_4x4_int_r =
2692         skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
2693     unsigned int mask_16x16_r;
2694     unsigned int mask_8x8_r;
2695     unsigned int mask_4x4_r;
2696 
2697     if (mi_row + r == 0) {
2698       mask_16x16_r = 0;
2699       mask_8x8_r = 0;
2700       mask_4x4_r = 0;
2701     } else {
2702       mask_16x16_r = mask_16x16 & 0xf;
2703       mask_8x8_r = mask_8x8 & 0xf;
2704       mask_4x4_r = mask_4x4 & 0xf;
2705     }
2706 
2707 #if CONFIG_HIGHBITDEPTH
2708     if (cm->use_highbitdepth)
2709       highbd_filter_selectively_horiz(
2710           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
2711           mask_4x4_r, mask_4x4_int_r, &cm->lf_info, &lfm->lfl_uv[r >> 1][0],
2712           (int)cm->bit_depth);
2713     else
2714 #endif  // CONFIG_HIGHBITDEPTH
2715 #if !CONFIG_LPF_DIRECT
2716       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
2717                                mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
2718                                &lfm->lfl_uv[r >> 1][0]);
2719 #endif  // CONFIG_LPF_DIRECT
2720 
2721     dst->buf += MI_SIZE * dst->stride;
2722     mask_16x16 >>= MI_SIZE / 2;
2723     mask_8x8 >>= MI_SIZE / 2;
2724     mask_4x4 >>= MI_SIZE / 2;
2725     mask_4x4_int >>= MI_SIZE / 2;
2726   }
2727   // restore the buf pointer in case there is additional filter pass.
2728   dst->buf = dst0;
2729 }
2730 
2731 #if CONFIG_PARALLEL_DEBLOCKING
2732 typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
2733 static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES_ALL] = {
2734   // mask for vertical edges filtering
2735   {
2736 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2737       2 - 1,   // BLOCK_2X2
2738       2 - 1,   // BLOCK_2X4
2739       4 - 1,   // BLOCK_4X2
2740 #endif         // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2741       4 - 1,   // BLOCK_4X4
2742       4 - 1,   // BLOCK_4X8
2743       8 - 1,   // BLOCK_8X4
2744       8 - 1,   // BLOCK_8X8
2745       8 - 1,   // BLOCK_8X16
2746       16 - 1,  // BLOCK_16X8
2747       16 - 1,  // BLOCK_16X16
2748       16 - 1,  // BLOCK_16X32
2749       32 - 1,  // BLOCK_32X16
2750       32 - 1,  // BLOCK_32X32
2751       32 - 1,  // BLOCK_32X64
2752       64 - 1,  // BLOCK_64X32
2753       64 - 1,  // BLOCK_64X64
2754 #if CONFIG_EXT_PARTITION
2755       64 - 1,   // BLOCK_64X128
2756       128 - 1,  // BLOCK_128X64
2757       128 - 1,  // BLOCK_128X128
2758 #endif          // CONFIG_EXT_PARTITION
2759       4 - 1,    // BLOCK_4X16,
2760       16 - 1,   // BLOCK_16X4,
2761       8 - 1,    // BLOCK_8X32,
2762       32 - 1,   // BLOCK_32X8,
2763       16 - 1,   // BLOCK_16X64,
2764       64 - 1,   // BLOCK_64X16
2765 #if CONFIG_EXT_PARTITION
2766       32 - 1,   // BLOCK_32X128
2767       128 - 1,  // BLOCK_128X32
2768 #endif          // CONFIG_EXT_PARTITION
2769   },
2770   // mask for horizontal edges filtering
2771   {
2772 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2773       2 - 1,   // BLOCK_2X2
2774       4 - 1,   // BLOCK_2X4
2775       2 - 1,   // BLOCK_4X2
2776 #endif         // CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2777       4 - 1,   // BLOCK_4X4
2778       8 - 1,   // BLOCK_4X8
2779       4 - 1,   // BLOCK_8X4
2780       8 - 1,   // BLOCK_8X8
2781       16 - 1,  // BLOCK_8X16
2782       8 - 1,   // BLOCK_16X8
2783       16 - 1,  // BLOCK_16X16
2784       32 - 1,  // BLOCK_16X32
2785       16 - 1,  // BLOCK_32X16
2786       32 - 1,  // BLOCK_32X32
2787       64 - 1,  // BLOCK_32X64
2788       32 - 1,  // BLOCK_64X32
2789       64 - 1,  // BLOCK_64X64
2790 #if CONFIG_EXT_PARTITION
2791       128 - 1,  // BLOCK_64X128
2792       64 - 1,   // BLOCK_128X64
2793       128 - 1,  // BLOCK_128X128
2794 #endif          // CONFIG_EXT_PARTITION
2795       16 - 1,   // BLOCK_4X16,
2796       4 - 1,    // BLOCK_16X4,
2797       32 - 1,   // BLOCK_8X32,
2798       8 - 1,    // BLOCK_32X8,
2799       64 - 1,   // BLOCK_16X64,
2800       16 - 1,   // BLOCK_64X16
2801 #if CONFIG_EXT_PARTITION
2802       128 - 1,  // BLOCK_32X128
2803       32 - 1,   // BLOCK_128X32
2804 #endif          // CONFIG_EXT_PARTITION
2805   },
2806 };
2807 
2808 static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
2809   {
2810 #if CONFIG_CHROMA_2X2
2811       2 - 1,  // TX_2X2
2812 #endif
2813       4 - 1,   // TX_4X4
2814       8 - 1,   // TX_8X8
2815       16 - 1,  // TX_16X16
2816       32 - 1,  // TX_32X32
2817 #if CONFIG_TX64X64
2818       64 - 1,  // TX_64X64
2819 #endif         // CONFIG_TX64X64
2820       4 - 1,   // TX_4X8
2821       8 - 1,   // TX_8X4
2822       8 - 1,   // TX_8X16
2823       16 - 1,  // TX_16X8
2824       16 - 1,  // TX_16X32
2825       32 - 1,  // TX_32X16
2826 #if CONFIG_TX64X64
2827       32 - 1,  // TX_32X64
2828       64 - 1,  // TX_64X32
2829 #endif         // CONFIG_TX64X64
2830       4 - 1,   // TX_4X16
2831       16 - 1,  // TX_16X4
2832       8 - 1,   // TX_8X32
2833       32 - 1   // TX_32X8
2834   },
2835   {
2836 #if CONFIG_CHROMA_2X2
2837       2 - 1,  // TX_2X2
2838 #endif
2839       4 - 1,   // TX_4X4
2840       8 - 1,   // TX_8X8
2841       16 - 1,  // TX_16X16
2842       32 - 1,  // TX_32X32
2843 #if CONFIG_TX64X64
2844       64 - 1,  // TX_64X64
2845 #endif         // CONFIG_TX64X64
2846       8 - 1,   // TX_4X8
2847       4 - 1,   // TX_8X4
2848       16 - 1,  // TX_8X16
2849       8 - 1,   // TX_16X8
2850       32 - 1,  // TX_16X32
2851       16 - 1,  // TX_32X16
2852 #if CONFIG_TX64X64
2853       64 - 1,  // TX_32X64
2854       32 - 1,  // TX_64X32
2855 #endif         // CONFIG_TX64X64
2856       16 - 1,  // TX_4X16
2857       4 - 1,   // TX_16X4
2858       32 - 1,  // TX_8X32
2859       8 - 1    // TX_32X8
2860   }
2861 };
2862 
av1_get_transform_size(const MODE_INFO * const mi,const EDGE_DIR edge_dir,const int mi_row,const int mi_col,const int plane,const struct macroblockd_plane * plane_ptr,const uint32_t scale_horz,const uint32_t scale_vert)2863 static TX_SIZE av1_get_transform_size(const MODE_INFO *const mi,
2864                                       const EDGE_DIR edge_dir, const int mi_row,
2865                                       const int mi_col, const int plane,
2866                                       const struct macroblockd_plane *plane_ptr,
2867                                       const uint32_t scale_horz,
2868                                       const uint32_t scale_vert) {
2869   const MB_MODE_INFO *mbmi = &mi->mbmi;
2870   TX_SIZE tx_size = (plane == AOM_PLANE_Y)
2871                         ? mbmi->tx_size
2872                         : av1_get_uv_tx_size(mbmi, plane_ptr);
2873   assert(tx_size < TX_SIZES_ALL);
2874 
2875 #if CONFIG_VAR_TX
2876   // mi_row and mi_col is the absolute position of the MI block.
2877   // idx_c and idx_r is the relative offset of the MI within the super block
2878   // c and r is the relative offset of the 8x8 block within the supert block
2879   // blk_row and block_col is the relative offset of the current 8x8 block
2880   // within the current partition.
2881   const int idx_c = mi_col & MAX_MIB_MASK;
2882   const int idx_r = mi_row & MAX_MIB_MASK;
2883   const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
2884   const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
2885   const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
2886   const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
2887   const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
2888 
2889   if (is_inter_block(mbmi) && !mbmi->skip) {
2890     const int tx_row_idx =
2891         (blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
2892     const int tx_col_idx =
2893         (blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
2894 
2895 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
2896     const BLOCK_SIZE bsize =
2897         AOMMAX(BLOCK_4X4, ss_size_lookup[sb_type][scale_horz][scale_vert]);
2898 #else
2899     const BLOCK_SIZE bsize = ss_size_lookup[sb_type][scale_horz][scale_vert];
2900 #endif
2901     const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
2902 
2903     assert(mb_tx_size < TX_SIZES_ALL);
2904 
2905     tx_size = (plane == AOM_PLANE_Y)
2906                   ? mb_tx_size
2907                   : uv_txsize_lookup[bsize][mb_tx_size][0][0];
2908     assert(tx_size < TX_SIZES_ALL);
2909   }
2910 #else
2911   (void)mi_row;
2912   (void)mi_col;
2913   (void)scale_horz;
2914   (void)scale_vert;
2915 #endif  // CONFIG_VAR_TX
2916 
2917   // since in case of chrominance or non-square transorm need to convert
2918   // transform size into transform size in particular direction.
2919   // for vertical edge, filter direction is horizontal, for horizontal
2920   // edge, filter direction is vertical.
2921   tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
2922                                     : txsize_vert_map[tx_size];
2923   return tx_size;
2924 }
2925 
2926 typedef struct AV1_DEBLOCKING_PARAMETERS {
2927   // length of the filter applied to the outer edge
2928   uint32_t filter_length;
2929   // length of the filter applied to the inner edge
2930   uint32_t filter_length_internal;
2931   // deblocking limits
2932   const uint8_t *lim;
2933   const uint8_t *mblim;
2934   const uint8_t *hev_thr;
2935 } AV1_DEBLOCKING_PARAMETERS;
2936 
set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS * const params,const ptrdiff_t mode_step,const AV1_COMMON * const cm,const EDGE_DIR edge_dir,const uint32_t x,const uint32_t y,const int plane,const struct macroblockd_plane * const plane_ptr)2937 static void set_lpf_parameters(
2938     AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
2939     const AV1_COMMON *const cm, const EDGE_DIR edge_dir, const uint32_t x,
2940     const uint32_t y, const int plane,
2941     const struct macroblockd_plane *const plane_ptr) {
2942   // reset to initial values
2943   params->filter_length = 0;
2944   params->filter_length_internal = 0;
2945 
2946   // no deblocking is required
2947   const uint32_t width = plane_ptr->dst.width;
2948   const uint32_t height = plane_ptr->dst.height;
2949   if ((width <= x) || (height <= y)) {
2950     return;
2951   }
2952 
2953   const uint32_t scale_horz = plane_ptr->subsampling_x;
2954   const uint32_t scale_vert = plane_ptr->subsampling_y;
2955   const int mi_row = (y << scale_vert) >> MI_SIZE_LOG2;
2956   const int mi_col = (x << scale_horz) >> MI_SIZE_LOG2;
2957   MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
2958   const MB_MODE_INFO *mbmi = &mi[0]->mbmi;
2959 
2960   {
2961     const TX_SIZE ts =
2962         av1_get_transform_size(mi[0], edge_dir, mi_row, mi_col, plane,
2963                                plane_ptr, scale_horz, scale_vert);
2964 
2965 #if CONFIG_EXT_DELTA_Q
2966 #if CONFIG_LOOPFILTER_LEVEL
2967     const uint32_t curr_level =
2968         get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
2969 #else
2970 #if CONFIG_LPF_SB
2971     const uint32_t curr_level =
2972         get_filter_level(cm, &cm->lf_info, mi_row, mi_col, mbmi);
2973 #else
2974     const uint32_t curr_level = get_filter_level(cm, &cm->lf_info, mbmi);
2975 #endif  // CONFIG_LPF_SB
2976 #endif
2977 #else
2978     const uint32_t curr_level = get_filter_level(&cm->lf_info, mbmi);
2979 #endif  // CONFIG_EXT_DELTA_Q
2980 
2981     const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
2982     const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
2983     uint32_t level = curr_level;
2984     // prepare outer edge parameters. deblock the edge if it's an edge of a TU
2985     if (coord) {
2986 #if CONFIG_LOOPFILTERING_ACROSS_TILES
2987       MODE_INFO *const mi_bound = cm->mi + mi_row * cm->mi_stride + mi_col;
2988       if (!av1_disable_loopfilter_on_tile_boundary(cm) ||
2989           ((VERT_EDGE == edge_dir) &&
2990            (0 == (mi_bound->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) ||
2991           ((HORZ_EDGE == edge_dir) &&
2992            (0 == (mi_bound->mbmi.boundary_info & TILE_ABOVE_BOUNDARY))))
2993 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
2994       {
2995         const int32_t tu_edge =
2996             (coord & av1_transform_masks[edge_dir][ts]) ? (0) : (1);
2997         if (tu_edge) {
2998           const MODE_INFO *const mi_prev = *(mi - mode_step);
2999           const int pv_row =
3000               (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
3001           const int pv_col =
3002               (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
3003           const TX_SIZE pv_ts =
3004               av1_get_transform_size(mi_prev, edge_dir, pv_row, pv_col, plane,
3005                                      plane_ptr, scale_horz, scale_vert);
3006 
3007 #if CONFIG_EXT_DELTA_Q
3008 #if CONFIG_LOOPFILTER_LEVEL
3009           const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, edge_dir,
3010                                                    plane, &mi_prev->mbmi);
3011 #else
3012 #if CONFIG_LPF_SB
3013           const uint32_t pv_lvl = get_filter_level(cm, &cm->lf_info, pv_row,
3014                                                    pv_col, &mi_prev->mbmi);
3015 #else
3016           const uint32_t pv_lvl =
3017               get_filter_level(cm, &cm->lf_info, &mi_prev->mbmi);
3018 #endif  // CONFIG_LPF_SB
3019 #endif
3020 #else
3021           const uint32_t pv_lvl =
3022               get_filter_level(&cm->lf_info, &mi_prev->mbmi);
3023 #endif  // CONFIG_EXT_DELTA_Q
3024 
3025           const int pv_skip =
3026               mi_prev->mbmi.skip && is_inter_block(&mi_prev->mbmi);
3027           const int32_t pu_edge =
3028               (coord &
3029                av1_prediction_masks[edge_dir]
3030                                    [ss_size_lookup[mbmi->sb_type][scale_horz]
3031                                                   [scale_vert]])
3032                   ? (0)
3033                   : (1);
3034           // if the current and the previous blocks are skipped,
3035           // deblock the edge if the edge belongs to a PU's edge only.
3036           if ((curr_level || pv_lvl) &&
3037               (!pv_skip || !curr_skipped || pu_edge)) {
3038             const TX_SIZE min_ts = AOMMIN(ts, pv_ts);
3039             if (TX_4X4 >= min_ts) {
3040               params->filter_length = 4;
3041             } else if (TX_8X8 == min_ts) {
3042               params->filter_length = 8;
3043             } else {
3044               params->filter_length = 16;
3045 #if PARALLEL_DEBLOCKING_15TAPLUMAONLY
3046               // No wide filtering for chroma plane
3047               if (plane != 0) {
3048 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
3049                 params->filter_length = 6;
3050 #else
3051                 params->filter_length = 8;
3052 #endif
3053               }
3054 #endif
3055             }
3056 
3057 #if PARALLEL_DEBLOCKING_DISABLE_15TAP
3058             params->filter_length = (TX_4X4 >= AOMMIN(ts, pv_ts)) ? (4) : (8);
3059 #endif  // PARALLEL_DEBLOCKING_DISABLE_15TAP
3060 
3061             // update the level if the current block is skipped,
3062             // but the previous one is not
3063             level = (curr_level) ? (curr_level) : (pv_lvl);
3064           }
3065         }
3066       }
3067 
3068 #if !CONFIG_CB4X4
3069       // prepare internal edge parameters
3070       if (curr_level && !curr_skipped) {
3071         params->filter_length_internal = (TX_4X4 >= ts) ? (4) : (0);
3072       }
3073 #endif
3074 
3075       // prepare common parameters
3076       if (params->filter_length || params->filter_length_internal) {
3077         const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
3078         params->lim = limits->lim;
3079         params->mblim = limits->mblim;
3080         params->hev_thr = limits->hev_thr;
3081       }
3082     }
3083   }
3084 }
3085 
av1_filter_block_plane_vert(const AV1_COMMON * const cm,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)3086 static void av1_filter_block_plane_vert(
3087     const AV1_COMMON *const cm, const int plane,
3088     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
3089     const uint32_t mi_col) {
3090   const int col_step = MI_SIZE >> MI_SIZE_LOG2;
3091   const int row_step = MI_SIZE >> MI_SIZE_LOG2;
3092   const uint32_t scale_horz = plane_ptr->subsampling_x;
3093   const uint32_t scale_vert = plane_ptr->subsampling_y;
3094   uint8_t *const dst_ptr = plane_ptr->dst.buf;
3095   const int dst_stride = plane_ptr->dst.stride;
3096 #if CONFIG_LPF_SB
3097   int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3098   y_range = AOMMIN(y_range, cm->mi_rows);
3099   y_range >>= scale_vert;
3100 
3101   int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3102   x_range = AOMMIN(x_range, cm->mi_cols);
3103   x_range >>= scale_horz;
3104 #else
3105   const int y_range = (MAX_MIB_SIZE >> scale_vert);
3106   const int x_range = (MAX_MIB_SIZE >> scale_horz);
3107 #endif  // CONFIG_LPF_SB
3108   for (int y = 0; y < y_range; y += row_step) {
3109     uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
3110     for (int x = 0; x < x_range; x += col_step) {
3111       // inner loop always filter vertical edges in a MI block. If MI size
3112       // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
3113       // If 4x4 trasnform is used, it will then filter the internal edge
3114       //  aligned with a 4x4 block
3115       const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
3116       const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
3117       AV1_DEBLOCKING_PARAMETERS params;
3118       memset(&params, 0, sizeof(params));
3119 
3120       set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, VERT_EDGE,
3121                          curr_x, curr_y, plane, plane_ptr);
3122 
3123 #if CONFIG_LPF_DIRECT
3124       uint8_t *const src = plane_ptr->dst.buf0;
3125       const int width = cm->width >> scale_horz;
3126       const int height = cm->height >> scale_vert;
3127       const int pivot = 8;
3128       const int line_length = 16;
3129       uint8_t block[128];
3130       int orig_pos[128];
3131       const int vert_or_horz = 0;  // 0: vertical
3132       const int unit = 1;
3133       int i;
3134       for (i = 0; i < 128; ++i) {
3135         block[i] = 0;
3136         orig_pos[i] = -1;
3137       }
3138 
3139       if (params.filter_length) {
3140         const int filt_len = params.filter_length == 16 ? 8 : 4;
3141         const int direct =
3142             pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
3143                                  dst_stride, unit, vert_or_horz);
3144 
3145         pick_filter_block_vert(src, block, orig_pos, filt_len, curr_y, curr_x,
3146                                width, height, dst_stride, pivot, line_length,
3147                                unit, direct);
3148         uint8_t *const filt_start = block + pivot;
3149         switch (params.filter_length) {
3150           // apply 4-tap filtering
3151           case 4:
3152 #if CONFIG_HIGHBITDEPTH
3153             if (cm->use_highbitdepth)
3154               aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
3155                                         line_length, params.mblim, params.lim,
3156                                         params.hev_thr, cm->bit_depth);
3157             else
3158 #endif  // CONFIG_HIGHBITDEPTH
3159               aom_lpf_vertical_4(filt_start, line_length, params.mblim,
3160                                  params.lim, params.hev_thr);
3161             break;
3162           // apply 8-tap filtering
3163           case 8:
3164 #if CONFIG_HIGHBITDEPTH
3165             if (cm->use_highbitdepth)
3166               aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(filt_start),
3167                                         line_length, params.mblim, params.lim,
3168                                         params.hev_thr, cm->bit_depth);
3169             else
3170 #endif  // CONFIG_HIGHBITDEPTH
3171               aom_lpf_vertical_8(filt_start, line_length, params.mblim,
3172                                  params.lim, params.hev_thr);
3173             break;
3174           // apply 16-tap filtering
3175           case 16:
3176 #if CONFIG_HIGHBITDEPTH
3177             if (cm->use_highbitdepth)
3178               aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(filt_start),
3179                                          line_length, params.mblim, params.lim,
3180                                          params.hev_thr, cm->bit_depth);
3181             else
3182 #endif  // CONFIG_HIGHBITDEPTH
3183               aom_lpf_vertical_16(filt_start, line_length, params.mblim,
3184                                   params.lim, params.hev_thr);
3185             break;
3186           // no filtering
3187           default: break;
3188         }
3189 
3190         for (i = 0; i < 128; ++i) {
3191           if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3192         }
3193       }
3194 
3195       if (params.filter_length_internal) {
3196         for (i = 0; i < 128; ++i) {
3197           block[i] = 0;
3198           orig_pos[i] = -1;
3199         }
3200 
3201         const int direct =
3202             pick_min_grad_direct(src, 4, curr_y, curr_x + 4, width, height,
3203                                  dst_stride, unit, vert_or_horz);
3204 
3205         pick_filter_block_vert(src, block, orig_pos, 4, curr_y, curr_x + 4,
3206                                width, height, dst_stride, pivot, line_length,
3207                                unit, direct);
3208 
3209         uint8_t *const filt_start = block + pivot;
3210 #if CONFIG_HIGHBITDEPTH
3211         if (cm->use_highbitdepth)
3212           aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(filt_start),
3213                                     line_length, params.mblim, params.lim,
3214                                     params.hev_thr, cm->bit_depth);
3215         else
3216 #endif  // CONFIG_HIGHBITDEPTH
3217           aom_lpf_vertical_4(filt_start, line_length, params.mblim, params.lim,
3218                              params.hev_thr);
3219 
3220         for (i = 0; i < 128; ++i) {
3221           if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3222         }
3223       }
3224 #else  // !CONFIG_LPF_DIRECT
3225       switch (params.filter_length) {
3226         // apply 4-tap filtering
3227         case 4:
3228 #if CONFIG_HIGHBITDEPTH
3229           if (cm->use_highbitdepth)
3230             aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
3231                                       params.mblim, params.lim, params.hev_thr,
3232                                       cm->bit_depth);
3233           else
3234 #endif  // CONFIG_HIGHBITDEPTH
3235             aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
3236                                params.hev_thr);
3237           break;
3238 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
3239         case 6:  // apply 6-tap filter for chroma plane only
3240           assert(plane != 0);
3241 #if CONFIG_HIGHBITDEPTH
3242           if (cm->use_highbitdepth)
3243             aom_highbd_lpf_vertical_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
3244                                         params.mblim, params.lim,
3245                                         params.hev_thr, cm->bit_depth);
3246           else
3247 #endif  // CONFIG_HIGHBITDEPTH
3248             aom_lpf_vertical_6_c(p, dst_stride, params.mblim, params.lim,
3249                                  params.hev_thr);
3250           break;
3251 #endif
3252         // apply 8-tap filtering
3253         case 8:
3254 #if CONFIG_HIGHBITDEPTH
3255           if (cm->use_highbitdepth)
3256             aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
3257                                       params.mblim, params.lim, params.hev_thr,
3258                                       cm->bit_depth);
3259           else
3260 #endif  // CONFIG_HIGHBITDEPTH
3261             aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
3262                                params.hev_thr);
3263           break;
3264         // apply 16-tap filtering
3265         case 16:
3266 #if CONFIG_HIGHBITDEPTH
3267           if (cm->use_highbitdepth)
3268 #if CONFIG_DEBLOCK_13TAP
3269             // TODO(olah): Remove _c once SIMD for 13-tap is available
3270             aom_highbd_lpf_vertical_16_c(CONVERT_TO_SHORTPTR(p), dst_stride,
3271                                          params.mblim, params.lim,
3272                                          params.hev_thr, cm->bit_depth);
3273 #else
3274             aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(p), dst_stride,
3275                                        params.mblim, params.lim, params.hev_thr,
3276                                        cm->bit_depth);
3277 #endif
3278           else
3279 #endif  // CONFIG_HIGHBITDEPTH
3280 #if CONFIG_DEBLOCK_13TAP
3281             aom_lpf_vertical_16_c(p, dst_stride, params.mblim, params.lim,
3282                                   params.hev_thr);
3283 #else
3284           aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
3285                               params.hev_thr);
3286 #endif
3287           break;
3288         // no filtering
3289         default: break;
3290       }
3291       // process the internal edge
3292       if (params.filter_length_internal) {
3293 #if CONFIG_HIGHBITDEPTH
3294         if (cm->use_highbitdepth)
3295           aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p + 4), dst_stride,
3296                                     params.mblim, params.lim, params.hev_thr,
3297                                     cm->bit_depth);
3298         else
3299 #endif  // CONFIG_HIGHBITDEPTH
3300           aom_lpf_vertical_4(p + 4, dst_stride, params.mblim, params.lim,
3301                              params.hev_thr);
3302       }
3303 #endif  // CONFIG_LPF_DIRECT
3304       // advance the destination pointer
3305       p += MI_SIZE;
3306     }
3307   }
3308 }
3309 
av1_filter_block_plane_horz(const AV1_COMMON * const cm,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)3310 static void av1_filter_block_plane_horz(
3311     const AV1_COMMON *const cm, const int plane,
3312     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
3313     const uint32_t mi_col) {
3314   const int col_step = MI_SIZE >> MI_SIZE_LOG2;
3315   const int row_step = MI_SIZE >> MI_SIZE_LOG2;
3316   const uint32_t scale_horz = plane_ptr->subsampling_x;
3317   const uint32_t scale_vert = plane_ptr->subsampling_y;
3318   uint8_t *const dst_ptr = plane_ptr->dst.buf;
3319   const int dst_stride = plane_ptr->dst.stride;
3320 #if CONFIG_LPF_SB
3321   int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3322   y_range = AOMMIN(y_range, cm->mi_rows);
3323   y_range >>= scale_vert;
3324 
3325   int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
3326   x_range = AOMMIN(x_range, cm->mi_cols);
3327   x_range >>= scale_horz;
3328 #else
3329   const int y_range = (MAX_MIB_SIZE >> scale_vert);
3330   const int x_range = (MAX_MIB_SIZE >> scale_horz);
3331 #endif  // CONFIG_LPF_SB
3332   for (int y = 0; y < y_range; y += row_step) {
3333     uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
3334     for (int x = 0; x < x_range; x += col_step) {
3335       // inner loop always filter vertical edges in a MI block. If MI size
3336       // is 8x8, it will first filter the vertical edge aligned with a 8x8
3337       // block. If 4x4 trasnform is used, it will then filter the internal
3338       // edge aligned with a 4x4 block
3339       const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
3340       const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
3341       AV1_DEBLOCKING_PARAMETERS params;
3342       memset(&params, 0, sizeof(params));
3343 
3344       set_lpf_parameters(&params, (cm->mi_stride << scale_vert), cm, HORZ_EDGE,
3345                          curr_x, curr_y, plane, plane_ptr);
3346 
3347 #if CONFIG_LPF_DIRECT
3348       uint8_t *const src = plane_ptr->dst.buf0;
3349       const int width = cm->width >> scale_horz;
3350       const int height = cm->height >> scale_vert;
3351       const int pivot = 8;
3352       const int line_length = 16;
3353       uint8_t block[256];
3354       int orig_pos[256];
3355       const int vert_or_horz = 1;  // 1: horizontal
3356       const int unit = 1;
3357       int i;
3358       for (i = 0; i < 256; ++i) {
3359         block[i] = 0;
3360         orig_pos[i] = -1;
3361       }
3362 
3363       if (params.filter_length) {
3364         const int filt_len = params.filter_length == 16 ? 8 : 4;
3365         const int direct =
3366             pick_min_grad_direct(src, filt_len, curr_y, curr_x, width, height,
3367                                  dst_stride, unit, vert_or_horz);
3368 
3369         pick_filter_block_horz(src, block, orig_pos, filt_len, curr_y, curr_x,
3370                                width, height, dst_stride, pivot, line_length,
3371                                unit, direct);
3372         uint8_t *const filt_start = block + pivot * line_length;
3373         switch (params.filter_length) {
3374           // apply 4-tap filtering
3375           case 4:
3376 #if CONFIG_HIGHBITDEPTH
3377             if (cm->use_highbitdepth)
3378               aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
3379                                           line_length, params.mblim, params.lim,
3380                                           params.hev_thr, cm->bit_depth);
3381             else
3382 #endif  // CONFIG_HIGHBITDEPTH
3383               aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
3384                                    params.lim, params.hev_thr);
3385             break;
3386           // apply 8-tap filtering
3387           case 8:
3388 #if CONFIG_HIGHBITDEPTH
3389             if (cm->use_highbitdepth)
3390               aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(filt_start),
3391                                           line_length, params.mblim, params.lim,
3392                                           params.hev_thr, cm->bit_depth);
3393             else
3394 #endif  // CONFIG_HIGHBITDEPTH
3395               aom_lpf_horizontal_8(filt_start, line_length, params.mblim,
3396                                    params.lim, params.hev_thr);
3397             break;
3398           // apply 16-tap filtering
3399           case 16:
3400 #if CONFIG_HIGHBITDEPTH
3401             if (cm->use_highbitdepth)
3402               aom_highbd_lpf_horizontal_edge_16(
3403                   CONVERT_TO_SHORTPTR(filt_start), line_length, params.mblim,
3404                   params.lim, params.hev_thr, cm->bit_depth);
3405             else
3406 #endif  // CONFIG_HIGHBITDEPTH
3407               aom_lpf_horizontal_edge_16(filt_start, line_length, params.mblim,
3408                                          params.lim, params.hev_thr);
3409             break;
3410           // no filtering
3411           default: break;
3412         }
3413 
3414         for (i = 0; i < 256; ++i) {
3415           if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3416         }
3417       }
3418       if (params.filter_length_internal) {
3419         for (i = 0; i < 256; ++i) {
3420           block[i] = 0;
3421           orig_pos[i] = -1;
3422         }
3423 
3424         const int direct =
3425             pick_min_grad_direct(src, 4, curr_y + 4, curr_x, width, height,
3426                                  dst_stride, unit, vert_or_horz);
3427 
3428         pick_filter_block_horz(src, block, orig_pos, 4, curr_y + 4, curr_x,
3429                                width, height, dst_stride, pivot, line_length,
3430                                unit, direct);
3431 
3432         uint8_t *const filt_start = block + pivot * line_length;
3433 #if CONFIG_HIGHBITDEPTH
3434         if (cm->use_highbitdepth)
3435           aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(filt_start),
3436                                       line_length, params.mblim, params.lim,
3437                                       params.hev_thr, cm->bit_depth);
3438         else
3439 #endif  // CONFIG_HIGHBITDEPTH
3440           aom_lpf_horizontal_4(filt_start, line_length, params.mblim,
3441                                params.lim, params.hev_thr);
3442 
3443         for (i = 0; i < 256; ++i) {
3444           if (orig_pos[i] >= 0) src[orig_pos[i]] = block[i];
3445         }
3446       }
3447 #else  // !CONFIG_LPF_DIRECT
3448       switch (params.filter_length) {
3449         // apply 4-tap filtering
3450         case 4:
3451 #if CONFIG_HIGHBITDEPTH
3452           if (cm->use_highbitdepth)
3453             aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
3454                                         params.mblim, params.lim,
3455                                         params.hev_thr, cm->bit_depth);
3456           else
3457 #endif  // CONFIG_HIGHBITDEPTH
3458             aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
3459                                  params.hev_thr);
3460           break;
3461 #if PARALLEL_DEBLOCKING_5_TAP_CHROMA
3462         // apply 6-tap filtering
3463         case 6: assert(plane != 0);
3464 #if CONFIG_HIGHBITDEPTH
3465           if (cm->use_highbitdepth)
3466             aom_highbd_lpf_horizontal_6_c(CONVERT_TO_SHORTPTR(p), dst_stride,
3467                                           params.mblim, params.lim,
3468                                           params.hev_thr, cm->bit_depth);
3469           else
3470 #endif  // CONFIG_HIGHBITDEPTH
3471             aom_lpf_horizontal_6_c(p, dst_stride, params.mblim, params.lim,
3472                                    params.hev_thr);
3473           break;
3474 #endif
3475         // apply 8-tap filtering
3476         case 8:
3477 #if CONFIG_HIGHBITDEPTH
3478           if (cm->use_highbitdepth)
3479             aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
3480                                         params.mblim, params.lim,
3481                                         params.hev_thr, cm->bit_depth);
3482           else
3483 #endif  // CONFIG_HIGHBITDEPTH
3484             aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
3485                                  params.hev_thr);
3486           break;
3487         // apply 16-tap filtering
3488         case 16:
3489 #if CONFIG_HIGHBITDEPTH
3490           if (cm->use_highbitdepth)
3491 #if CONFIG_DEBLOCK_13TAP
3492             // TODO(olah): Remove _c once SIMD for 13-tap is available
3493             aom_highbd_lpf_horizontal_edge_16_c(
3494                 CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
3495                 params.hev_thr, cm->bit_depth);
3496 #else
3497             aom_highbd_lpf_horizontal_edge_16(
3498                 CONVERT_TO_SHORTPTR(p), dst_stride, params.mblim, params.lim,
3499                 params.hev_thr, cm->bit_depth);
3500 #endif
3501           else
3502 #endif  // CONFIG_HIGHBITDEPTH
3503 #if CONFIG_DEBLOCK_13TAP
3504             aom_lpf_horizontal_edge_16_c(p, dst_stride, params.mblim,
3505                                          params.lim, params.hev_thr);
3506 #else
3507           aom_lpf_horizontal_edge_16(p, dst_stride, params.mblim, params.lim,
3508                                      params.hev_thr);
3509 #endif
3510           break;
3511         // no filtering
3512         default: break;
3513       }
3514       // process the internal edge
3515       if (params.filter_length_internal) {
3516 #if CONFIG_HIGHBITDEPTH
3517         if (cm->use_highbitdepth)
3518           aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p + 4 * dst_stride),
3519                                       dst_stride, params.mblim, params.lim,
3520                                       params.hev_thr, cm->bit_depth);
3521         else
3522 #endif  // CONFIG_HIGHBITDEPTH
3523           aom_lpf_horizontal_4(p + 4 * dst_stride, dst_stride, params.mblim,
3524                                params.lim, params.hev_thr);
3525       }
3526 #endif  // CONFIG_LPF_DIRECT
3527       // advance the destination pointer
3528       p += MI_SIZE;
3529     }
3530   }
3531 }
3532 #endif  // CONFIG_PARALLEL_DEBLOCKING
3533 
av1_loop_filter_rows(YV12_BUFFER_CONFIG * frame_buffer,AV1_COMMON * cm,struct macroblockd_plane * planes,int start,int stop,int col_start,int col_end,int y_only)3534 void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
3535                           struct macroblockd_plane *planes, int start, int stop,
3536 #if CONFIG_LPF_SB
3537                           int col_start, int col_end,
3538 #endif
3539                           int y_only) {
3540 #if CONFIG_LOOPFILTER_LEVEL
3541   // y_only no longer has its original meaning.
3542   // Here it means which plane to filter
3543   // when y_only = {0, 1, 2}, it means we are searching for filter level for
3544   // Y/U/V plane individually.
3545   const int plane_start = y_only;
3546   const int plane_end = plane_start + 1;
3547 #else
3548   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
3549   const int plane_start = 0;
3550   const int plane_end = num_planes;
3551 #endif  // CONFIG_LOOPFILTER_LEVEL
3552 #if !CONFIG_LPF_SB
3553   const int col_start = 0;
3554   const int col_end = cm->mi_cols;
3555 #endif  // CONFIG_LPF_SB
3556   int mi_row, mi_col;
3557   int plane;
3558 
3559 #if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
3560     CONFIG_CB4X4
3561 
3562 #if !CONFIG_PARALLEL_DEBLOCKING
3563 #if CONFIG_VAR_TX
3564   for (int i = 0; i < MAX_MB_PLANE; ++i)
3565     memset(cm->top_txfm_context[i], TX_32X32, cm->mi_cols << TX_UNIT_WIDE_LOG2);
3566 #endif  // CONFIG_VAR_TX
3567   for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
3568     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
3569 #if CONFIG_VAR_TX
3570     for (int i = 0; i < MAX_MB_PLANE; ++i)
3571       memset(cm->left_txfm_context[i], TX_32X32,
3572              MAX_MIB_SIZE << TX_UNIT_HIGH_LOG2);
3573 #endif  // CONFIG_VAR_TX
3574     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
3575       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3576 
3577       for (plane = plane_start; plane < plane_end; ++plane) {
3578         av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
3579                                           mi_row, mi_col, plane);
3580         av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
3581                                           mi_row, mi_col, plane);
3582       }
3583     }
3584   }
3585 #else
3586 
3587   // filter all vertical edges in every 64x64 super block
3588   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3589     for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
3590       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3591       for (plane = plane_start; plane < plane_end; ++plane) {
3592         av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
3593       }
3594     }
3595   }
3596 
3597   // filter all horizontal edges in every 64x64 super block
3598   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3599     for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
3600       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3601       for (plane = plane_start; plane < plane_end; ++plane) {
3602         av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
3603       }
3604     }
3605   }
3606 #endif  // CONFIG_PARALLEL_DEBLOCKING
3607 
3608 #else  // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
3609 
3610 #if CONFIG_PARALLEL_DEBLOCKING
3611   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3612     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
3613       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3614       // filter all vertical edges in every 64x64 super block
3615       for (plane = plane_start; plane < plane_end; plane += 1) {
3616         av1_filter_block_plane_vert(cm, plane, &planes[plane], mi_row, mi_col);
3617       }
3618     }
3619   }
3620   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3621     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
3622       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3623       // filter all horizontal edges in every 64x64 super block
3624       for (plane = plane_start; plane < plane_end; plane += 1) {
3625         av1_filter_block_plane_horz(cm, plane, &planes[plane], mi_row, mi_col);
3626       }
3627     }
3628   }
3629 #else   // CONFIG_PARALLEL_DEBLOCKING
3630   enum lf_path path;
3631   LOOP_FILTER_MASK lfm;
3632 
3633   if (y_only)
3634     path = LF_PATH_444;
3635   else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
3636     path = LF_PATH_420;
3637   else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
3638     path = LF_PATH_444;
3639   else
3640     path = LF_PATH_SLOW;
3641 
3642   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
3643     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
3644     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
3645       av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
3646 
3647       // TODO(JBB): Make setup_mask work for non 420.
3648       av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
3649 
3650       av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
3651       av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
3652       for (plane = 1; plane < num_planes; ++plane) {
3653         switch (path) {
3654           case LF_PATH_420:
3655             av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
3656             av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
3657             break;
3658           case LF_PATH_444:
3659             av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
3660             av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
3661             break;
3662           case LF_PATH_SLOW:
3663             av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
3664                                               mi_row, mi_col, plane);
3665             av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
3666                                               mi_row, mi_col, plane);
3667 
3668             break;
3669         }
3670       }
3671     }
3672   }
3673 #endif  // CONFIG_PARALLEL_DEBLOCKING
3674 #endif  // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
3675 }
3676 
av1_loop_filter_frame(YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,MACROBLOCKD * xd,int frame_filter_level,int frame_filter_level_r,int y_only,int partial_frame,int mi_row,int mi_col)3677 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
3678                            MACROBLOCKD *xd, int frame_filter_level,
3679 #if CONFIG_LOOPFILTER_LEVEL
3680                            int frame_filter_level_r,
3681 #endif
3682                            int y_only, int partial_frame
3683 #if CONFIG_LPF_SB
3684                            ,
3685                            int mi_row, int mi_col
3686 #endif
3687                            ) {
3688   int start_mi_row, end_mi_row, mi_rows_to_filter;
3689 #if CONFIG_EXT_DELTA_Q
3690 #if CONFIG_LOOPFILTER_LEVEL
3691   int orig_filter_level[2] = { cm->lf.filter_level[0], cm->lf.filter_level[1] };
3692 #else
3693   int orig_filter_level = cm->lf.filter_level;
3694 #endif
3695 #endif
3696 
3697 #if CONFIG_LPF_SB
3698   if (partial_frame && !frame_filter_level) return;
3699 #else
3700 #if CONFIG_LOOPFILTER_LEVEL
3701   if (!frame_filter_level && !frame_filter_level_r) return;
3702 #else
3703   if (!frame_filter_level) return;
3704 #endif
3705 #endif  // CONFIG_LPF_SB
3706 #if CONFIG_LPF_SB
3707   int start_mi_col;
3708   int end_mi_col;
3709 
3710   // In the experiment of deblocking filtering per superblock.
3711   // When partial_frame is 1, it indicates we are searching for the best filter
3712   // level for current superblock. We reuse frame_filter_level as filter level
3713   // for superblock, no longer for the whole frame.
3714   // When partial_frame is 0, it's in the actual filtering stage for the frame
3715   if (partial_frame) {
3716     start_mi_row = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
3717     start_mi_col = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
3718     const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
3719     const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
3720     end_mi_row = AOMMIN(mi_row_range, cm->mi_rows);
3721     end_mi_col = AOMMIN(mi_col_range, cm->mi_cols);
3722 
3723     av1_loop_filter_sb_level_init(cm, mi_row, mi_col, frame_filter_level);
3724   } else {
3725     start_mi_row = 0;
3726     mi_rows_to_filter = cm->mi_rows;
3727     end_mi_row = start_mi_row + mi_rows_to_filter;
3728     start_mi_col = 0;
3729     end_mi_col = cm->mi_cols;
3730   }
3731 #else
3732   start_mi_row = 0;
3733   mi_rows_to_filter = cm->mi_rows;
3734   if (partial_frame && cm->mi_rows > 8) {
3735     start_mi_row = cm->mi_rows >> 1;
3736     start_mi_row &= 0xfffffff8;
3737     mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
3738   }
3739   end_mi_row = start_mi_row + mi_rows_to_filter;
3740 #if CONFIG_LOOPFILTER_LEVEL
3741   // TODO(chengchen): refactor the code such that y_only has its matching
3742   // meaning. Now it means the plane to be filtered in this experiment.
3743   av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level_r,
3744                              y_only);
3745 #else
3746   av1_loop_filter_frame_init(cm, frame_filter_level, frame_filter_level);
3747 #endif
3748 #endif  // CONFIG_LPF_SB
3749 
3750 #if CONFIG_EXT_DELTA_Q
3751 #if CONFIG_LOOPFILTER_LEVEL
3752   cm->lf.filter_level[0] = frame_filter_level;
3753   cm->lf.filter_level[1] = frame_filter_level_r;
3754 #else
3755   cm->lf.filter_level = frame_filter_level;
3756 #endif
3757 #endif
3758 
3759 #if CONFIG_LPF_SB
3760   av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row,
3761                        start_mi_col, end_mi_col, y_only);
3762 #else
3763   av1_loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
3764 #endif  // CONFIG_LPF_SB
3765 
3766 #if CONFIG_EXT_DELTA_Q
3767 #if CONFIG_LOOPFILTER_LEVEL
3768   cm->lf.filter_level[0] = orig_filter_level[0];
3769   cm->lf.filter_level[1] = orig_filter_level[1];
3770 #else
3771   cm->lf.filter_level = orig_filter_level;
3772 #endif
3773 #endif
3774 }
3775 
av1_loop_filter_data_reset(LFWorkerData * lf_data,YV12_BUFFER_CONFIG * frame_buffer,struct AV1Common * cm,const struct macroblockd_plane * planes)3776 void av1_loop_filter_data_reset(LFWorkerData *lf_data,
3777                                 YV12_BUFFER_CONFIG *frame_buffer,
3778                                 struct AV1Common *cm,
3779                                 const struct macroblockd_plane *planes) {
3780   lf_data->frame_buffer = frame_buffer;
3781   lf_data->cm = cm;
3782   lf_data->start = 0;
3783   lf_data->stop = 0;
3784   lf_data->y_only = 0;
3785   memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
3786 }
3787 
av1_loop_filter_worker(LFWorkerData * const lf_data,void * unused)3788 int av1_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
3789   (void)unused;
3790 #if !CONFIG_LPF_SB
3791   av1_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
3792                        lf_data->start, lf_data->stop, lf_data->y_only);
3793 #else
3794   (void)lf_data;
3795 #endif  // CONFIG_LPF_SB
3796   return 1;
3797 }
3798