1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include "config/aom_config.h"
15 #include "config/aom_dsp_rtcd.h"
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_mem/aom_mem.h"
19 #include "aom_ports/mem.h"
20 #include "av1/common/av1_loopfilter.h"
21 #include "av1/common/onyxc_int.h"
22 #include "av1/common/reconinter.h"
23 #include "av1/common/seg_common.h"
24
25 static const SEG_LVL_FEATURES seg_lvl_lf_lut[MAX_MB_PLANE][2] = {
26 { SEG_LVL_ALT_LF_Y_V, SEG_LVL_ALT_LF_Y_H },
27 { SEG_LVL_ALT_LF_U, SEG_LVL_ALT_LF_U },
28 { SEG_LVL_ALT_LF_V, SEG_LVL_ALT_LF_V }
29 };
30
31 static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
32 { 0, 1 }, { 2, 2 }, { 3, 3 }
33 };
34
35 typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
36
37 static const int mode_lf_lut[] = {
38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES
39 1, 1, 0, 1, // INTER_MODES (GLOBALMV == 0)
40 1, 1, 1, 1, 1, 1, 0, 1 // INTER_COMPOUND_MODES (GLOBAL_GLOBALMV == 0)
41 };
42
43 #if LOOP_FILTER_BITMASK
44 // 256 bit masks (64x64 / 4x4) for left transform size for Y plane.
45 // We use 4 uint64_t to represent the 256 bit.
46 // Each 1 represents a position where we should apply a loop filter
47 // across the left border of an 4x4 block boundary.
48 //
49 // In the case of TX_8x8-> ( in low order byte first we end up with
50 // a mask that looks like this (-- and | are used for better view)
51 //
52 // 10101010|10101010
53 // 10101010|10101010
54 // 10101010|10101010
55 // 10101010|10101010
56 // 10101010|10101010
57 // 10101010|10101010
58 // 10101010|10101010
59 // 10101010|10101010
60 // -----------------
61 // 10101010|10101010
62 // 10101010|10101010
63 // 10101010|10101010
64 // 10101010|10101010
65 // 10101010|10101010
66 // 10101010|10101010
67 // 10101010|10101010
68 // 10101010|10101010
69 //
70 // A loopfilter should be applied to every other 4x4 horizontally.
71
72 // 256 bit masks (64x64 / 4x4) for above transform size for Y plane.
73 // We use 4 uint64_t to represent the 256 bit.
74 // Each 1 represents a position where we should apply a loop filter
75 // across the top border of an 4x4 block boundary.
76 //
77 // In the case of TX_8x8-> ( in low order byte first we end up with
78 // a mask that looks like this
79 //
80 // 11111111|11111111
81 // 00000000|00000000
82 // 11111111|11111111
83 // 00000000|00000000
84 // 11111111|11111111
85 // 00000000|00000000
86 // 11111111|11111111
87 // 00000000|00000000
88 // -----------------
89 // 11111111|11111111
90 // 00000000|00000000
91 // 11111111|11111111
92 // 00000000|00000000
93 // 11111111|11111111
94 // 00000000|00000000
95 // 11111111|11111111
96 // 00000000|00000000
97 //
98 // A loopfilter should be applied to every other 4x4 horizontally.
99
100 const int mask_id_table_tx_4x4[BLOCK_SIZES_ALL] = {
101 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, -1, -1, -1, 13, 14, 15, 16, 17, 18
102 };
103
104 const int mask_id_table_tx_8x8[BLOCK_SIZES_ALL] = {
105 -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, 10, 11, 12, 13
106 };
107
108 const int mask_id_table_tx_16x16[BLOCK_SIZES_ALL] = {
109 -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, -1, -1, -1, -1, -1, -1, -1, 7, 8
110 };
111
112 const int mask_id_table_tx_32x32[BLOCK_SIZES_ALL] = { -1, -1, -1, -1, -1, -1,
113 -1, -1, -1, 0, 1, 2,
114 3, -1, -1, -1, -1, -1,
115 -1, -1, -1, -1 };
116
117 const FilterMask left_mask_univariant_reordered[67] = {
118 // TX_4X4
119 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
120 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
121 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
122 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
123 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
124 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
125 { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
126 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
127 { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
128 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
129 { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
130 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
131 { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
132 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
133 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
134 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
135 { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
136 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
137 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
138 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
139 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
140 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
141 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
142 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
143 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
144 0xffffffffffffffffULL } }, // block size 64X64, TX_4X4
145 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
146 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
147 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
148 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
149 { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
150 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
151 { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
152 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
153 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
154 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
155 { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
156 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
157 // TX_8X8
158 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
159 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
160 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
161 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
162 { { 0x0000000000050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
163 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
164 { { 0x0005000500050005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
165 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
166 { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0000000000000000ULL,
167 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
168 { { 0x0055005500550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
169 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
170 { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0000000000000000ULL,
171 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
172 { { 0x0055005500550055ULL, 0x0055005500550055ULL, 0x0055005500550055ULL,
173 0x0055005500550055ULL } }, // block size 32X64, TX_8X8
174 { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x0000000000000000ULL,
175 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
176 { { 0x5555555555555555ULL, 0x5555555555555555ULL, 0x5555555555555555ULL,
177 0x5555555555555555ULL } }, // block size 64X64, TX_8X8
178 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
179 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
180 { { 0x0000000000550055ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
181 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
182 { { 0x0005000500050005ULL, 0x0005000500050005ULL, 0x0005000500050005ULL,
183 0x0005000500050005ULL } }, // block size 16X64, TX_8X8
184 { { 0x5555555555555555ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
185 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
186 // TX_16X16
187 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
188 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
189 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
190 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
191 { { 0x0011001100110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
192 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
193 { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0000000000000000ULL,
194 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
195 { { 0x0011001100110011ULL, 0x0011001100110011ULL, 0x0011001100110011ULL,
196 0x0011001100110011ULL } }, // block size 32X64, TX_16X16
197 { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x0000000000000000ULL,
198 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
199 { { 0x1111111111111111ULL, 0x1111111111111111ULL, 0x1111111111111111ULL,
200 0x1111111111111111ULL } }, // block size 64X64, TX_16X16
201 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
202 0x0001000100010001ULL } }, // block size 16X64, TX_16X16
203 { { 0x1111111111111111ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
204 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
205 // TX_32X32
206 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
207 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
208 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
209 0x0101010101010101ULL } }, // block size 32X64, TX_32X32
210 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0000000000000000ULL,
211 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
212 { { 0x0101010101010101ULL, 0x0101010101010101ULL, 0x0101010101010101ULL,
213 0x0101010101010101ULL } }, // block size 64X64, TX_32X32
214 // TX_64X64
215 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
216 0x0001000100010001ULL } }, // block size 64X64, TX_64X64
217 // 2:1, 1:2 transform sizes.
218 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
219 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
220 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
221 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
222 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
223 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
224 { { 0x0000000000000005ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
225 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
226 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
227 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
228 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
229 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
230 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
231 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
232 { { 0x0000000000110011ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
233 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
234 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
235 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
236 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
237 0x0001000100010001ULL } }, // block size 16X64, TX_16X32
238 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
239 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
240 { { 0x0101010101010101ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
241 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
242 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
243 0x0001000100010001ULL } }, // block size 32X64, TX_32X64
244 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
245 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
246 // 4:1, 1:4 transform sizes.
247 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
248 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
249 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
250 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
251 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0000000000000000ULL,
252 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
253 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
254 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
255 { { 0x0001000100010001ULL, 0x0001000100010001ULL, 0x0001000100010001ULL,
256 0x0001000100010001ULL } }, // block size 16X64, TX_16X64
257 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
258 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
259 };
260
261 const FilterMask above_mask_univariant_reordered[67] = {
262 // TX_4X4
263 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
264 0x0000000000000000ULL } }, // block size 4X4, TX_4X4
265 { { 0x0000000000010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
266 0x0000000000000000ULL } }, // block size 4X8, TX_4X4
267 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
268 0x0000000000000000ULL } }, // block size 8X4, TX_4X4
269 { { 0x0000000000030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
270 0x0000000000000000ULL } }, // block size 8X8, TX_4X4
271 { { 0x0003000300030003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
272 0x0000000000000000ULL } }, // block size 8X16, TX_4X4
273 { { 0x00000000000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
274 0x0000000000000000ULL } }, // block size 16X8, TX_4X4
275 { { 0x000f000f000f000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
276 0x0000000000000000ULL } }, // block size 16X16, TX_4X4
277 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x0000000000000000ULL,
278 0x0000000000000000ULL } }, // block size 16X32, TX_4X4
279 { { 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
280 0x0000000000000000ULL } }, // block size 32X16, TX_4X4
281 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000000000000000ULL,
282 0x0000000000000000ULL } }, // block size 32X32, TX_4X4
283 { { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
284 0x00ff00ff00ff00ffULL } }, // block size 32X64, TX_4X4
285 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x0000000000000000ULL,
286 0x0000000000000000ULL } }, // block size 64X32, TX_4X4
287 { { 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL,
288 0xffffffffffffffffULL } }, // block size 64X64, TX_4x4
289 { { 0x0001000100010001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
290 0x0000000000000000ULL } }, // block size 4X16, TX_4X4
291 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
292 0x0000000000000000ULL } }, // block size 16X4, TX_4X4
293 { { 0x0003000300030003ULL, 0x0003000300030003ULL, 0x0000000000000000ULL,
294 0x0000000000000000ULL } }, // block size 8X32, TX_4X4
295 { { 0x0000000000ff00ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
296 0x0000000000000000ULL } }, // block size 32X8, TX_4X4
297 { { 0x000f000f000f000fULL, 0x000f000f000f000fULL, 0x000f000f000f000fULL,
298 0x000f000f000f000fULL } }, // block size 16X64, TX_4X4
299 { { 0xffffffffffffffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
300 0x0000000000000000ULL } }, // block size 64X16, TX_4X4
301 // TX_8X8
302 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
303 0x0000000000000000ULL } }, // block size 8X8, TX_8X8
304 { { 0x0000000300000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
305 0x0000000000000000ULL } }, // block size 8X16, TX_8X8
306 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
307 0x0000000000000000ULL } }, // block size 16X8, TX_8X8
308 { { 0x0000000f0000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
309 0x0000000000000000ULL } }, // block size 16X16, TX_8X8
310 { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000000000000ULL,
311 0x0000000000000000ULL } }, // block size 16X32, TX_8X8
312 { { 0x000000ff000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
313 0x0000000000000000ULL } }, // block size 32X16, TX_8X8
314 { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x0000000000000000ULL,
315 0x0000000000000000ULL } }, // block size 32X32, TX_8X8
316 { { 0x000000ff000000ffULL, 0x000000ff000000ffULL, 0x000000ff000000ffULL,
317 0x000000ff000000ffULL } }, // block size 32X64, TX_8X8
318 { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000000000000000ULL,
319 0x0000000000000000ULL } }, // block size 64X32, TX_8X8
320 { { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
321 0x0000ffff0000ffffULL } }, // block size 64X64, TX_8X8
322 { { 0x0000000300000003ULL, 0x0000000300000003ULL, 0x0000000000000000ULL,
323 0x0000000000000000ULL } }, // block size 8X32, TX_8X8
324 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
325 0x0000000000000000ULL } }, // block size 32X8, TX_8X8
326 { { 0x0000000f0000000fULL, 0x0000000f0000000fULL, 0x0000000f0000000fULL,
327 0x0000000f0000000fULL } }, // block size 16X64, TX_8X8
328 { { 0x0000ffff0000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
329 0x0000000000000000ULL } }, // block size 64X16, TX_8X8
330 // TX_16X16
331 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
332 0x0000000000000000ULL } }, // block size 16X16, TX_16X16
333 { { 0x000000000000000fULL, 0x000000000000000fULL, 0x0000000000000000ULL,
334 0x0000000000000000ULL } }, // block size 16X32, TX_16X16
335 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
336 0x0000000000000000ULL } }, // block size 32X16, TX_16X16
337 { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x0000000000000000ULL,
338 0x0000000000000000ULL } }, // block size 32X32, TX_16X16
339 { { 0x00000000000000ffULL, 0x00000000000000ffULL, 0x00000000000000ffULL,
340 0x00000000000000ffULL } }, // block size 32X64, TX_16X16
341 { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x0000000000000000ULL,
342 0x0000000000000000ULL } }, // block size 64X32, TX_16X16
343 { { 0x000000000000ffffULL, 0x000000000000ffffULL, 0x000000000000ffffULL,
344 0x000000000000ffffULL } }, // block size 64X64, TX_16X16
345 { { 0x000000000000000fULL, 0x000000000000000fULL, 0x000000000000000fULL,
346 0x000000000000000fULL } }, // block size 16X64, TX_16X16
347 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
348 0x0000000000000000ULL } }, // block size 64X16, TX_16X16
349 // TX_32X32
350 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
351 0x0000000000000000ULL } }, // block size 32X32, TX_32X32
352 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x00000000000000ffULL,
353 0x0000000000000000ULL } }, // block size 32X64, TX_32X32
354 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
355 0x0000000000000000ULL } }, // block size 64X32, TX_32X32
356 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x000000000000ffffULL,
357 0x0000000000000000ULL } }, // block size 64X64, TX_32X32
358 // TX_64X64
359 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
360 0x0000000000000000ULL } }, // block size 64X64, TX_64X64
361 // 2:1, 1:2 transform sizes.
362 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
363 0x0000000000000000ULL } }, // block size 4X8, TX_4X8
364 { { 0x0000000100000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
365 0x0000000000000000ULL } }, // block size 4X16, TX_4X8
366 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
367 0x0000000000000000ULL } }, // block size 8X4, TX_8X4
368 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
369 0x0000000000000000ULL } }, // block size 16X4, TX_8X4
370 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
371 0x0000000000000000ULL } }, // block size 8X16, TX_8X16
372 { { 0x0000000000000003ULL, 0x0000000000000003ULL, 0x0000000000000000ULL,
373 0x0000000000000000ULL } }, // block size 8X32, TX_8X16
374 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
375 0x0000000000000000ULL } }, // block size 16X8, TX_16X8
376 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
377 0x0000000000000000ULL } }, // block size 32X8, TX_16X8
378 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
379 0x0000000000000000ULL } }, // block size 16X32, TX_16X32
380 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x000000000000000fULL,
381 0x0000000000000000ULL } }, // block size 16X64, TX_16X32
382 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
383 0x0000000000000000ULL } }, // block size 32X16, TX_32X16
384 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
385 0x0000000000000000ULL } }, // block size 64X16, TX_32X16
386 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
387 0x0000000000000000ULL } }, // block size 32X64, TX_32X64
388 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
389 0x0000000000000000ULL } }, // block size 64X32, TX_64X32
390 // 4:1, 1:4 transform sizes.
391 { { 0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
392 0x0000000000000000ULL } }, // block size 4X16, TX_4X16
393 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
394 0x0000000000000000ULL } }, // block size 16X4, TX_16X4
395 { { 0x0000000000000003ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
396 0x0000000000000000ULL } }, // block size 8X32, TX_8X32
397 { { 0x00000000000000ffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
398 0x0000000000000000ULL } }, // block size 32X8, TX_32X8
399 { { 0x000000000000000fULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
400 0x0000000000000000ULL } }, // block size 16X64, TX_16X64
401 { { 0x000000000000ffffULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
402 0x0000000000000000ULL } }, // block size 64X16, TX_64X16
403 };
404
get_loop_filter_mask(const AV1_COMMON * const cm,int mi_row,int mi_col)405 LoopFilterMask *get_loop_filter_mask(const AV1_COMMON *const cm, int mi_row,
406 int mi_col) {
407 assert(cm->lf.lfm != NULL);
408 const int row = mi_row >> MIN_MIB_SIZE_LOG2; // 64x64
409 const int col = mi_col >> MIN_MIB_SIZE_LOG2;
410 return &cm->lf.lfm[row * cm->lf.lfm_stride + col];
411 }
412
413 typedef void (*LpfFunc)(uint8_t *s, int p, const uint8_t *blimit,
414 const uint8_t *limit, const uint8_t *thresh);
415
416 typedef void (*LpfDualFunc)(uint8_t *s, int p, const uint8_t *blimit0,
417 const uint8_t *limit0, const uint8_t *thresh0,
418 const uint8_t *blimit1, const uint8_t *limit1,
419 const uint8_t *thresh1);
420
421 typedef void (*HbdLpfFunc)(uint16_t *s, int p, const uint8_t *blimit,
422 const uint8_t *limit, const uint8_t *thresh, int bd);
423
424 typedef void (*HbdLpfDualFunc)(uint16_t *s, int p, const uint8_t *blimit0,
425 const uint8_t *limit0, const uint8_t *thresh0,
426 const uint8_t *blimit1, const uint8_t *limit1,
427 const uint8_t *thresh1, int bd);
428 #endif // LOOP_FILTER_BITMASK
429
update_sharpness(loop_filter_info_n * lfi,int sharpness_lvl)430 static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
431 int lvl;
432
433 // For each possible value for the loop filter fill out limits
434 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
435 // Set loop filter parameters that control sharpness.
436 int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
437
438 if (sharpness_lvl > 0) {
439 if (block_inside_limit > (9 - sharpness_lvl))
440 block_inside_limit = (9 - sharpness_lvl);
441 }
442
443 if (block_inside_limit < 1) block_inside_limit = 1;
444
445 memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
446 memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
447 SIMD_WIDTH);
448 }
449 }
450
get_filter_level(const AV1_COMMON * cm,const loop_filter_info_n * lfi_n,const int dir_idx,int plane,const MB_MODE_INFO * mbmi)451 uint8_t get_filter_level(const AV1_COMMON *cm, const loop_filter_info_n *lfi_n,
452 const int dir_idx, int plane,
453 const MB_MODE_INFO *mbmi) {
454 const int segment_id = mbmi->segment_id;
455 if (cm->delta_lf_present_flag) {
456 int delta_lf;
457 if (cm->delta_lf_multi) {
458 const int delta_lf_idx = delta_lf_id_lut[plane][dir_idx];
459 delta_lf = mbmi->delta_lf[delta_lf_idx];
460 } else {
461 delta_lf = mbmi->delta_lf_from_base;
462 }
463 int base_level;
464 if (plane == 0)
465 base_level = cm->lf.filter_level[dir_idx];
466 else if (plane == 1)
467 base_level = cm->lf.filter_level_u;
468 else
469 base_level = cm->lf.filter_level_v;
470 int lvl_seg = clamp(delta_lf + base_level, 0, MAX_LOOP_FILTER);
471 assert(plane >= 0 && plane <= 2);
472 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir_idx];
473 if (segfeature_active(&cm->seg, segment_id, seg_lf_feature_id)) {
474 const int data = get_segdata(&cm->seg, segment_id, seg_lf_feature_id);
475 lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
476 }
477
478 if (cm->lf.mode_ref_delta_enabled) {
479 const int scale = 1 << (lvl_seg >> 5);
480 lvl_seg += cm->lf.ref_deltas[mbmi->ref_frame[0]] * scale;
481 if (mbmi->ref_frame[0] > INTRA_FRAME)
482 lvl_seg += cm->lf.mode_deltas[mode_lf_lut[mbmi->mode]] * scale;
483 lvl_seg = clamp(lvl_seg, 0, MAX_LOOP_FILTER);
484 }
485 return lvl_seg;
486 } else {
487 return lfi_n->lvl[plane][segment_id][dir_idx][mbmi->ref_frame[0]]
488 [mode_lf_lut[mbmi->mode]];
489 }
490 }
491
av1_loop_filter_init(AV1_COMMON * cm)492 void av1_loop_filter_init(AV1_COMMON *cm) {
493 assert(MB_MODE_COUNT == NELEMENTS(mode_lf_lut));
494 loop_filter_info_n *lfi = &cm->lf_info;
495 struct loopfilter *lf = &cm->lf;
496 int lvl;
497
498 lf->combine_vert_horz_lf = 1;
499
500 // init limits for given sharpness
501 update_sharpness(lfi, lf->sharpness_level);
502
503 // init hev threshold const vectors
504 for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
505 memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
506 }
507
508 // Update the loop filter for the current frame.
509 // This should be called before loop_filter_rows(),
510 // av1_loop_filter_frame() calls this function directly.
av1_loop_filter_frame_init(AV1_COMMON * cm,int plane_start,int plane_end)511 void av1_loop_filter_frame_init(AV1_COMMON *cm, int plane_start,
512 int plane_end) {
513 int filt_lvl[MAX_MB_PLANE], filt_lvl_r[MAX_MB_PLANE];
514 int plane;
515 int seg_id;
516 // n_shift is the multiplier for lf_deltas
517 // the multiplier is 1 for when filter_lvl is between 0 and 31;
518 // 2 when filter_lvl is between 32 and 63
519 loop_filter_info_n *const lfi = &cm->lf_info;
520 struct loopfilter *const lf = &cm->lf;
521 const struct segmentation *const seg = &cm->seg;
522
523 // update sharpness limits
524 update_sharpness(lfi, lf->sharpness_level);
525
526 filt_lvl[0] = cm->lf.filter_level[0];
527 filt_lvl[1] = cm->lf.filter_level_u;
528 filt_lvl[2] = cm->lf.filter_level_v;
529
530 filt_lvl_r[0] = cm->lf.filter_level[1];
531 filt_lvl_r[1] = cm->lf.filter_level_u;
532 filt_lvl_r[2] = cm->lf.filter_level_v;
533
534 for (plane = plane_start; plane < plane_end; plane++) {
535 if (plane == 0 && !filt_lvl[0] && !filt_lvl_r[0])
536 break;
537 else if (plane == 1 && !filt_lvl[1])
538 continue;
539 else if (plane == 2 && !filt_lvl[2])
540 continue;
541
542 for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
543 for (int dir = 0; dir < 2; ++dir) {
544 int lvl_seg = (dir == 0) ? filt_lvl[plane] : filt_lvl_r[plane];
545 assert(plane >= 0 && plane <= 2);
546 const int seg_lf_feature_id = seg_lvl_lf_lut[plane][dir];
547 if (segfeature_active(seg, seg_id, seg_lf_feature_id)) {
548 const int data = get_segdata(&cm->seg, seg_id, seg_lf_feature_id);
549 lvl_seg = clamp(lvl_seg + data, 0, MAX_LOOP_FILTER);
550 }
551
552 if (!lf->mode_ref_delta_enabled) {
553 // we could get rid of this if we assume that deltas are set to
554 // zero when not in use; encoder always uses deltas
555 memset(lfi->lvl[plane][seg_id][dir], lvl_seg,
556 sizeof(lfi->lvl[plane][seg_id][dir]));
557 } else {
558 int ref, mode;
559 const int scale = 1 << (lvl_seg >> 5);
560 const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
561 lfi->lvl[plane][seg_id][dir][INTRA_FRAME][0] =
562 clamp(intra_lvl, 0, MAX_LOOP_FILTER);
563
564 for (ref = LAST_FRAME; ref < REF_FRAMES; ++ref) {
565 for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
566 const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
567 lf->mode_deltas[mode] * scale;
568 lfi->lvl[plane][seg_id][dir][ref][mode] =
569 clamp(inter_lvl, 0, MAX_LOOP_FILTER);
570 }
571 }
572 }
573 }
574 }
575 }
576 }
577
578 #if LOOP_FILTER_BITMASK
579 // A 64x64 tx block requires 256 bits to represent each 4x4 tx block.
580 // Every 4 rows is represented by one uint64_t mask. Hence,
581 // there are 4 uint64_t bitmask[4] to represent the 64x64 block.
582 //
583 // Given a location by (mi_col, mi_row), This function returns the index
584 // 0, 1, 2, 3 to select which bitmask[] to use, and the shift value.
585 //
586 // For example, mi_row is the offset of pixels in mi size (4),
587 // (mi_row / 4) returns which uint64_t.
588 // After locating which uint64_t, mi_row % 4 is the
589 // row offset, and each row has 16 = 1 << stride_log2 4x4 units.
590 // Therefore, shift = (row << stride_log2) + mi_col;
get_index_shift(int mi_col,int mi_row,int * index)591 int get_index_shift(int mi_col, int mi_row, int *index) {
592 // *index = mi_row >> 2;
593 // rows = mi_row % 4;
594 // stride_log2 = 4;
595 // shift = (rows << stride_log2) + mi_col;
596 *index = mi_row >> 2;
597 return ((mi_row & 3) << 4) | mi_col;
598 }
599
check_mask(const FilterMask * lfm)600 static void check_mask(const FilterMask *lfm) {
601 #ifndef NDEBUG
602 for (int i = 0; i < 4; ++i) {
603 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_8X8].bits[i]));
604 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_16X16].bits[i]));
605 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_32X32].bits[i]));
606 assert(!(lfm[TX_4X4].bits[i] & lfm[TX_64X64].bits[i]));
607 assert(!(lfm[TX_8X8].bits[i] & lfm[TX_16X16].bits[i]));
608 assert(!(lfm[TX_8X8].bits[i] & lfm[TX_32X32].bits[i]));
609 assert(!(lfm[TX_8X8].bits[i] & lfm[TX_64X64].bits[i]));
610 assert(!(lfm[TX_16X16].bits[i] & lfm[TX_32X32].bits[i]));
611 assert(!(lfm[TX_16X16].bits[i] & lfm[TX_64X64].bits[i]));
612 assert(!(lfm[TX_32X32].bits[i] & lfm[TX_64X64].bits[i]));
613 }
614 #else
615 (void)lfm;
616 #endif
617 }
618
check_loop_filter_masks(const LoopFilterMask * lfm,int plane)619 static void check_loop_filter_masks(const LoopFilterMask *lfm, int plane) {
620 if (plane == 0) {
621 // Assert if we try to apply 2 different loop filters at the same
622 // position.
623 check_mask(lfm->left_y);
624 check_mask(lfm->above_y);
625 } else if (plane == 1) {
626 check_mask(lfm->left_u);
627 check_mask(lfm->above_u);
628 } else {
629 check_mask(lfm->left_v);
630 check_mask(lfm->above_v);
631 }
632 }
633
update_masks(EDGE_DIR dir,int plane,uint64_t * mask,TX_SIZE sqr_tx_size,LoopFilterMask * lfm)634 static void update_masks(EDGE_DIR dir, int plane, uint64_t *mask,
635 TX_SIZE sqr_tx_size, LoopFilterMask *lfm) {
636 if (dir == VERT_EDGE) {
637 switch (plane) {
638 case 0:
639 for (int i = 0; i < 4; ++i) lfm->left_y[sqr_tx_size].bits[i] |= mask[i];
640 break;
641 case 1:
642 for (int i = 0; i < 4; ++i) lfm->left_u[sqr_tx_size].bits[i] |= mask[i];
643 break;
644 case 2:
645 for (int i = 0; i < 4; ++i) lfm->left_v[sqr_tx_size].bits[i] |= mask[i];
646 break;
647 default: assert(plane <= 2);
648 }
649 } else {
650 switch (plane) {
651 case 0:
652 for (int i = 0; i < 4; ++i)
653 lfm->above_y[sqr_tx_size].bits[i] |= mask[i];
654 break;
655 case 1:
656 for (int i = 0; i < 4; ++i)
657 lfm->above_u[sqr_tx_size].bits[i] |= mask[i];
658 break;
659 case 2:
660 for (int i = 0; i < 4; ++i)
661 lfm->above_v[sqr_tx_size].bits[i] |= mask[i];
662 break;
663 default: assert(plane <= 2);
664 }
665 }
666 }
667
is_frame_boundary(AV1_COMMON * const cm,int plane,int mi_row,int mi_col,int ssx,int ssy,EDGE_DIR dir)668 static int is_frame_boundary(AV1_COMMON *const cm, int plane, int mi_row,
669 int mi_col, int ssx, int ssy, EDGE_DIR dir) {
670 if (plane && (ssx || ssy)) {
671 if (ssx && ssy) { // format 420
672 if ((mi_row << MI_SIZE_LOG2) > cm->height ||
673 (mi_col << MI_SIZE_LOG2) > cm->width)
674 return 1;
675 } else if (ssx) { // format 422
676 if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
677 (mi_col << MI_SIZE_LOG2) > cm->width)
678 return 1;
679 }
680 } else {
681 if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
682 (mi_col << MI_SIZE_LOG2) >= cm->width)
683 return 1;
684 }
685
686 int row_or_col;
687 if (plane == 0) {
688 row_or_col = dir == VERT_EDGE ? mi_col : mi_row;
689 } else {
690 // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
691 // So if mi_col == 1, it is actually the frame boundary.
692 if (dir == VERT_EDGE) {
693 row_or_col = ssx ? (mi_col & 0x0FFFFFFE) : mi_col;
694 } else {
695 row_or_col = ssy ? (mi_row & 0x0FFFFFFE) : mi_row;
696 }
697 }
698 return row_or_col == 0;
699 }
700
setup_masks(AV1_COMMON * const cm,int mi_row,int mi_col,int plane,int ssx,int ssy,TX_SIZE tx_size)701 static void setup_masks(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
702 int ssx, int ssy, TX_SIZE tx_size) {
703 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
704 const int x = (mi_col << (MI_SIZE_LOG2 - ssx));
705 const int y = (mi_row << (MI_SIZE_LOG2 - ssy));
706 // decide whether current vertical/horizontal edge needs loop filtering
707 for (EDGE_DIR dir = VERT_EDGE; dir <= HORZ_EDGE; ++dir) {
708 // chroma sub8x8 block uses bottom/right mi of co-located 8x8 luma block.
709 mi_row |= ssy;
710 mi_col |= ssx;
711
712 MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
713 const MB_MODE_INFO *const mbmi = mi[0];
714 const int curr_skip = mbmi->skip && is_inter_block(mbmi);
715 const BLOCK_SIZE bsize = mbmi->sb_type;
716 const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
717 const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
718 const uint8_t level = get_filter_level(cm, &cm->lf_info, dir, plane, mbmi);
719 const int prediction_masks = dir == VERT_EDGE
720 ? block_size_wide[plane_bsize] - 1
721 : block_size_high[plane_bsize] - 1;
722 const int is_coding_block_border =
723 dir == VERT_EDGE ? !(x & prediction_masks) : !(y & prediction_masks);
724
725 // TODO(chengchen): step can be optimized.
726 const int row_step = mi_size_high[TX_4X4] << ssy;
727 const int col_step = mi_size_wide[TX_4X4] << ssx;
728 const int mi_height =
729 dir == VERT_EDGE ? tx_size_high_unit[tx_size] << ssy : row_step;
730 const int mi_width =
731 dir == VERT_EDGE ? col_step : tx_size_wide_unit[tx_size] << ssx;
732
733 // assign filter levels
734 for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
735 for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
736 // do not filter frame boundary
737 // Note: when chroma planes' size are half of luma plane,
738 // chroma plane mi corresponds to even position.
739 // If frame size is not even, we still need to filter this chroma
740 // position. Therefore the boundary condition check needs to be
741 // separated to two cases.
742 if (plane && (ssx || ssy)) {
743 if (ssx && ssy) { // format 420
744 if ((r << MI_SIZE_LOG2) > cm->height ||
745 (c << MI_SIZE_LOG2) > cm->width)
746 continue;
747 } else if (ssx) { // format 422
748 if ((r << MI_SIZE_LOG2) >= cm->height ||
749 (c << MI_SIZE_LOG2) > cm->width)
750 continue;
751 }
752 } else {
753 if ((r << MI_SIZE_LOG2) >= cm->height ||
754 (c << MI_SIZE_LOG2) >= cm->width)
755 continue;
756 }
757
758 const int row = r % MI_SIZE_64X64;
759 const int col = c % MI_SIZE_64X64;
760 if (plane == 0) {
761 if (dir == VERT_EDGE)
762 lfm->lfl_y_ver[row][col] = level;
763 else
764 lfm->lfl_y_hor[row][col] = level;
765 } else if (plane == 1) {
766 lfm->lfl_u[row][col] = level;
767 } else {
768 lfm->lfl_v[row][col] = level;
769 }
770 }
771 }
772
773 for (int r = mi_row; r < mi_row + mi_height; r += row_step) {
774 for (int c = mi_col; c < mi_col + mi_width; c += col_step) {
775 // do not filter frame boundary
776 if (is_frame_boundary(cm, plane, r, c, ssx, ssy, dir)) continue;
777
778 uint64_t mask[4] = { 0 };
779 const int prev_row = dir == VERT_EDGE ? r : r - (1 << ssy);
780 const int prev_col = dir == VERT_EDGE ? c - (1 << ssx) : c;
781 MB_MODE_INFO **mi_prev =
782 cm->mi_grid_visible + prev_row * cm->mi_stride + prev_col;
783 const MB_MODE_INFO *const mbmi_prev = mi_prev[0];
784 const int prev_skip = mbmi_prev->skip && is_inter_block(mbmi_prev);
785 const uint8_t level_prev =
786 get_filter_level(cm, &cm->lf_info, dir, plane, mbmi_prev);
787 const int is_edge =
788 (level || level_prev) &&
789 (!curr_skip || !prev_skip || is_coding_block_border);
790
791 if (is_edge) {
792 const TX_SIZE prev_tx_size =
793 plane ? av1_get_max_uv_txsize(mbmi_prev->sb_type, ssx, ssy)
794 : mbmi_prev->tx_size;
795 TX_SIZE min_tx_size = (dir == VERT_EDGE)
796 ? AOMMIN(txsize_horz_map[tx_size],
797 txsize_horz_map[prev_tx_size])
798 : AOMMIN(txsize_vert_map[tx_size],
799 txsize_vert_map[prev_tx_size]);
800 min_tx_size = AOMMIN(min_tx_size, TX_16X16);
801 assert(min_tx_size < TX_SIZES);
802 const int row = r % MI_SIZE_64X64;
803 const int col = c % MI_SIZE_64X64;
804 int index = 0;
805 const int shift = get_index_shift(col, row, &index);
806 assert(index < 4 && index >= 0);
807 mask[index] |= ((uint64_t)1 << shift);
808 // set mask on corresponding bit
809 update_masks(dir, plane, mask, min_tx_size, lfm);
810 }
811 }
812 }
813 }
814 }
815
setup_tx_block_mask(AV1_COMMON * const cm,int mi_row,int mi_col,int blk_row,int blk_col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,int plane,int ssx,int ssy)816 static void setup_tx_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
817 int blk_row, int blk_col,
818 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
819 int plane, int ssx, int ssy) {
820 blk_row <<= ssy;
821 blk_col <<= ssx;
822 if (((mi_row + blk_row) << MI_SIZE_LOG2) >= cm->height ||
823 ((mi_col + blk_col) << MI_SIZE_LOG2) >= cm->width)
824 return;
825
826 // U/V plane, tx_size is always the largest size
827 if (plane) {
828 assert(tx_size_wide[tx_size] <= 32 && tx_size_high[tx_size] <= 32);
829 setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
830 tx_size);
831 return;
832 }
833
834 MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
835 const MB_MODE_INFO *const mbmi = mi[0];
836 // For Y plane:
837 // If intra block, tx size is univariant.
838 // If inter block, tx size follows inter_tx_size.
839 TX_SIZE plane_tx_size = tx_size;
840 const int is_inter = is_inter_block(mbmi);
841
842 if (plane == 0) {
843 if (is_inter) {
844 if (mbmi->skip) {
845 // TODO(chengchen): change av1_get_transform_size() to be consistant.
846 // plane_tx_size = get_max_rect_tx_size(plane_bsize);
847 plane_tx_size = mbmi->tx_size;
848 } else {
849 plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
850 plane_bsize, blk_row, blk_col)];
851 }
852 } else {
853 MB_MODE_INFO **mi_this = cm->mi_grid_visible +
854 (mi_row + blk_row) * cm->mi_stride + mi_col +
855 blk_col;
856 const MB_MODE_INFO *const mbmi_this = mi_this[0];
857 plane_tx_size = mbmi_this->tx_size;
858 }
859 }
860
861 assert(txsize_to_bsize[plane_tx_size] <= plane_bsize);
862
863 if (plane || plane_tx_size == tx_size) {
864 setup_masks(cm, mi_row + blk_row, mi_col + blk_col, plane, ssx, ssy,
865 tx_size);
866 } else {
867 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
868 const int bsw = tx_size_wide_unit[sub_txs];
869 const int bsh = tx_size_high_unit[sub_txs];
870 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
871 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
872 const int offsetr = blk_row + row;
873 const int offsetc = blk_col + col;
874 setup_tx_block_mask(cm, mi_row, mi_col, offsetr, offsetc, plane_bsize,
875 sub_txs, plane, ssx, ssy);
876 }
877 }
878 }
879 }
880
setup_fix_block_mask(AV1_COMMON * const cm,int mi_row,int mi_col,int plane,int ssx,int ssy)881 static void setup_fix_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
882 int plane, int ssx, int ssy) {
883 MB_MODE_INFO **mi =
884 cm->mi_grid_visible + (mi_row | ssy) * cm->mi_stride + (mi_col | ssx);
885 const MB_MODE_INFO *const mbmi = mi[0];
886
887 const BLOCK_SIZE bsize = mbmi->sb_type;
888 const BLOCK_SIZE bsizec = scale_chroma_bsize(bsize, ssx, ssy);
889 const BLOCK_SIZE plane_bsize = ss_size_lookup[bsizec][ssx][ssy];
890
891 const int block_width = mi_size_wide[plane_bsize];
892 const int block_height = mi_size_high[plane_bsize];
893
894 TX_SIZE max_txsize = max_txsize_rect_lookup[plane_bsize];
895 // The decoder is designed so that it can process 64x64 luma pixels at a
896 // time. If this is a chroma plane with subsampling and bsize corresponds to
897 // a subsampled BLOCK_128X128 then the lookup above will give TX_64X64. That
898 // mustn't be used for the subsampled plane (because it would be bigger than
899 // a 64x64 luma block) so we round down to TX_32X32.
900 if (plane && txsize_sqr_up_map[max_txsize] == TX_64X64) {
901 if (max_txsize == TX_16X64)
902 max_txsize = TX_16X32;
903 else if (max_txsize == TX_64X16)
904 max_txsize = TX_32X16;
905 else
906 max_txsize = TX_32X32;
907 }
908
909 const BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize];
910 const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
911 const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
912 const BLOCK_SIZE max_unit_bsize = ss_size_lookup[BLOCK_64X64][ssx][ssy];
913 int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0];
914 int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0];
915
916 mu_blocks_wide = AOMMIN(block_width, mu_blocks_wide);
917 mu_blocks_high = AOMMIN(block_height, mu_blocks_high);
918
919 // Y: Largest tx_size is 64x64, while superblock size can be 128x128.
920 // Here we ensure that setup_tx_block_mask process at most a 64x64 block.
921 // U/V: largest tx size is 32x32.
922 for (int idy = 0; idy < block_height; idy += mu_blocks_high) {
923 for (int idx = 0; idx < block_width; idx += mu_blocks_wide) {
924 const int unit_height = AOMMIN(mu_blocks_high + idy, block_height);
925 const int unit_width = AOMMIN(mu_blocks_wide + idx, block_width);
926 for (int blk_row = idy; blk_row < unit_height; blk_row += bh) {
927 for (int blk_col = idx; blk_col < unit_width; blk_col += bw) {
928 setup_tx_block_mask(cm, mi_row, mi_col, blk_row, blk_col, plane_bsize,
929 max_txsize, plane, ssx, ssy);
930 }
931 }
932 }
933 }
934 }
935
setup_block_mask(AV1_COMMON * const cm,int mi_row,int mi_col,BLOCK_SIZE bsize,int plane,int ssx,int ssy)936 static void setup_block_mask(AV1_COMMON *const cm, int mi_row, int mi_col,
937 BLOCK_SIZE bsize, int plane, int ssx, int ssy) {
938 if ((mi_row << MI_SIZE_LOG2) >= cm->height ||
939 (mi_col << MI_SIZE_LOG2) >= cm->width)
940 return;
941
942 const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
943 const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
944 const int hbs = mi_size_wide[bsize] / 2;
945 const int quarter_step = mi_size_wide[bsize] / 4;
946 const int allow_sub8x8 = (ssx || ssy) ? bsize > BLOCK_8X8 : 1;
947 const int has_next_row =
948 (((mi_row + hbs) << MI_SIZE_LOG2) < cm->height) & allow_sub8x8;
949 const int has_next_col =
950 (((mi_col + hbs) << MI_SIZE_LOG2) < cm->width) & allow_sub8x8;
951 int i;
952
953 switch (partition) {
954 case PARTITION_NONE:
955 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
956 break;
957 case PARTITION_HORZ:
958 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
959 if (has_next_row)
960 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
961 break;
962 case PARTITION_VERT:
963 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
964 if (has_next_col)
965 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
966 break;
967 case PARTITION_SPLIT:
968 setup_block_mask(cm, mi_row, mi_col, subsize, plane, ssx, ssy);
969 if (has_next_col)
970 setup_block_mask(cm, mi_row, mi_col + hbs, subsize, plane, ssx, ssy);
971 if (has_next_row)
972 setup_block_mask(cm, mi_row + hbs, mi_col, subsize, plane, ssx, ssy);
973 if (has_next_col & has_next_row)
974 setup_block_mask(cm, mi_row + hbs, mi_col + hbs, subsize, plane, ssx,
975 ssy);
976 break;
977 case PARTITION_HORZ_A:
978 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
979 if (has_next_col)
980 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
981 if (has_next_row)
982 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
983 break;
984 case PARTITION_HORZ_B:
985 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
986 if (has_next_row)
987 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
988 if (has_next_col & has_next_row)
989 setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
990 break;
991 case PARTITION_VERT_A:
992 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
993 if (has_next_row)
994 setup_fix_block_mask(cm, mi_row + hbs, mi_col, plane, ssx, ssy);
995 if (has_next_col)
996 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
997 break;
998 case PARTITION_VERT_B:
999 setup_fix_block_mask(cm, mi_row, mi_col, plane, ssx, ssy);
1000 if (has_next_col)
1001 setup_fix_block_mask(cm, mi_row, mi_col + hbs, plane, ssx, ssy);
1002 if (has_next_row)
1003 setup_fix_block_mask(cm, mi_row + hbs, mi_col + hbs, plane, ssx, ssy);
1004 break;
1005 case PARTITION_HORZ_4:
1006 for (i = 0; i < 4; ++i) {
1007 int this_mi_row = mi_row + i * quarter_step;
1008 if (i > 0 && (this_mi_row << MI_SIZE_LOG2) >= cm->height) break;
1009 // chroma plane filter the odd location
1010 if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
1011
1012 setup_fix_block_mask(cm, this_mi_row, mi_col, plane, ssx, ssy);
1013 }
1014 break;
1015 case PARTITION_VERT_4:
1016 for (i = 0; i < 4; ++i) {
1017 int this_mi_col = mi_col + i * quarter_step;
1018 if (i > 0 && this_mi_col >= cm->mi_cols) break;
1019 // chroma plane filter the odd location
1020 if (plane && bsize == BLOCK_16X16 && (i & 0x01)) continue;
1021
1022 setup_fix_block_mask(cm, mi_row, this_mi_col, plane, ssx, ssy);
1023 }
1024 break;
1025 default: assert(0);
1026 }
1027 }
1028
1029 // TODO(chengchen): if lossless, do not need to setup mask. But when
1030 // segments enabled, each segment has different lossless settings.
av1_setup_bitmask(AV1_COMMON * const cm,int mi_row,int mi_col,int plane,int subsampling_x,int subsampling_y,int row_end,int col_end)1031 void av1_setup_bitmask(AV1_COMMON *const cm, int mi_row, int mi_col, int plane,
1032 int subsampling_x, int subsampling_y, int row_end,
1033 int col_end) {
1034 const int num_64x64 = cm->seq_params.mib_size >> MIN_MIB_SIZE_LOG2;
1035 for (int y = 0; y < num_64x64; ++y) {
1036 for (int x = 0; x < num_64x64; ++x) {
1037 const int row = mi_row + y * MI_SIZE_64X64;
1038 const int col = mi_col + x * MI_SIZE_64X64;
1039 if (row >= row_end || col >= col_end) continue;
1040 if ((row << MI_SIZE_LOG2) >= cm->height ||
1041 (col << MI_SIZE_LOG2) >= cm->width)
1042 continue;
1043
1044 LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
1045 if (lfm == NULL) return;
1046
1047 // init mask to zero
1048 if (plane == 0) {
1049 av1_zero(lfm->left_y);
1050 av1_zero(lfm->above_y);
1051 av1_zero(lfm->lfl_y_ver);
1052 av1_zero(lfm->lfl_y_hor);
1053 } else if (plane == 1) {
1054 av1_zero(lfm->left_u);
1055 av1_zero(lfm->above_u);
1056 av1_zero(lfm->lfl_u);
1057 } else {
1058 av1_zero(lfm->left_v);
1059 av1_zero(lfm->above_v);
1060 av1_zero(lfm->lfl_v);
1061 }
1062 }
1063 }
1064
1065 // set up bitmask for each superblock
1066 setup_block_mask(cm, mi_row, mi_col, cm->seq_params.sb_size, plane,
1067 subsampling_x, subsampling_y);
1068
1069 for (int y = 0; y < num_64x64; ++y) {
1070 for (int x = 0; x < num_64x64; ++x) {
1071 const int row = mi_row + y * MI_SIZE_64X64;
1072 const int col = mi_col + x * MI_SIZE_64X64;
1073 if (row >= row_end || col >= col_end) continue;
1074 if ((row << MI_SIZE_LOG2) >= cm->height ||
1075 (col << MI_SIZE_LOG2) >= cm->width)
1076 continue;
1077
1078 LoopFilterMask *lfm = get_loop_filter_mask(cm, row, col);
1079 if (lfm == NULL) return;
1080
1081 // check if the mask is valid
1082 check_loop_filter_masks(lfm, plane);
1083
1084 {
1085 // Let 16x16 hold 32x32 (Y/U/V) and 64x64(Y only).
1086 // Even tx size is greater, we only apply max length filter, which
1087 // is 16.
1088 if (plane == 0) {
1089 for (int j = 0; j < 4; ++j) {
1090 lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_32X32].bits[j];
1091 lfm->left_y[TX_16X16].bits[j] |= lfm->left_y[TX_64X64].bits[j];
1092 lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_32X32].bits[j];
1093 lfm->above_y[TX_16X16].bits[j] |= lfm->above_y[TX_64X64].bits[j];
1094
1095 // set 32x32 and 64x64 to 0
1096 lfm->left_y[TX_32X32].bits[j] = 0;
1097 lfm->left_y[TX_64X64].bits[j] = 0;
1098 lfm->above_y[TX_32X32].bits[j] = 0;
1099 lfm->above_y[TX_64X64].bits[j] = 0;
1100 }
1101 } else if (plane == 1) {
1102 for (int j = 0; j < 4; ++j) {
1103 lfm->left_u[TX_16X16].bits[j] |= lfm->left_u[TX_32X32].bits[j];
1104 lfm->above_u[TX_16X16].bits[j] |= lfm->above_u[TX_32X32].bits[j];
1105
1106 // set 32x32 to 0
1107 lfm->left_u[TX_32X32].bits[j] = 0;
1108 lfm->above_u[TX_32X32].bits[j] = 0;
1109 }
1110 } else {
1111 for (int j = 0; j < 4; ++j) {
1112 lfm->left_v[TX_16X16].bits[j] |= lfm->left_v[TX_32X32].bits[j];
1113 lfm->above_v[TX_16X16].bits[j] |= lfm->above_v[TX_32X32].bits[j];
1114
1115 // set 32x32 to 0
1116 lfm->left_v[TX_32X32].bits[j] = 0;
1117 lfm->above_v[TX_32X32].bits[j] = 0;
1118 }
1119 }
1120 }
1121
1122 // check if the mask is valid
1123 check_loop_filter_masks(lfm, plane);
1124 }
1125 }
1126 }
1127
filter_selectively_vert_row2(int subsampling_factor,uint8_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2)1128 static void filter_selectively_vert_row2(
1129 int subsampling_factor, uint8_t *s, int pitch, int plane,
1130 uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
1131 uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
1132 const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2) {
1133 uint64_t mask;
1134 const int step = 1 << subsampling_factor;
1135
1136 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
1137 mask_8x8_1 | mask_4x4_1;
1138 mask; mask >>= step) {
1139 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
1140 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
1141
1142 if (mask & 1) {
1143 if ((mask_16x16_0 | mask_16x16_1) & 1) {
1144 // chroma plane filters less pixels introduced in deblock_13tap
1145 // experiment
1146 LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_14;
1147
1148 if ((mask_16x16_0 & mask_16x16_1) & 1) {
1149 if (plane) {
1150 aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1151 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1152 lfi1->hev_thr);
1153 } else {
1154 aom_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
1155 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1156 lfi1->hev_thr);
1157 }
1158 } else if (mask_16x16_0 & 1) {
1159 lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1160 } else {
1161 lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1162 lfi1->hev_thr);
1163 }
1164 }
1165
1166 if ((mask_8x8_0 | mask_8x8_1) & 1) {
1167 // chroma plane filters less pixels introduced in deblock_13tap
1168 // experiment
1169 LpfFunc lpf_vertical = plane ? aom_lpf_vertical_6 : aom_lpf_vertical_8;
1170
1171 if ((mask_8x8_0 & mask_8x8_1) & 1) {
1172 if (plane) {
1173 aom_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1174 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1175 lfi1->hev_thr);
1176 } else {
1177 aom_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
1178 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1179 lfi1->hev_thr);
1180 }
1181 } else if (mask_8x8_0 & 1) {
1182 lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1183 } else {
1184 lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1185 lfi1->hev_thr);
1186 }
1187 }
1188
1189 if ((mask_4x4_0 | mask_4x4_1) & 1) {
1190 if ((mask_4x4_0 & mask_4x4_1) & 1) {
1191 aom_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
1192 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1193 lfi1->hev_thr);
1194 } else if (mask_4x4_0 & 1) {
1195 aom_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1196 } else {
1197 aom_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1198 lfi1->hev_thr);
1199 }
1200 }
1201 }
1202
1203 s += 4;
1204 lfl += step;
1205 lfl2 += step;
1206 mask_16x16_0 >>= step;
1207 mask_8x8_0 >>= step;
1208 mask_4x4_0 >>= step;
1209 mask_16x16_1 >>= step;
1210 mask_8x8_1 >>= step;
1211 mask_4x4_1 >>= step;
1212 }
1213 }
1214
highbd_filter_selectively_vert_row2(int subsampling_factor,uint16_t * s,int pitch,int plane,uint64_t mask_16x16_0,uint64_t mask_8x8_0,uint64_t mask_4x4_0,uint64_t mask_16x16_1,uint64_t mask_8x8_1,uint64_t mask_4x4_1,const loop_filter_info_n * lfi_n,uint8_t * lfl,uint8_t * lfl2,int bd)1215 static void highbd_filter_selectively_vert_row2(
1216 int subsampling_factor, uint16_t *s, int pitch, int plane,
1217 uint64_t mask_16x16_0, uint64_t mask_8x8_0, uint64_t mask_4x4_0,
1218 uint64_t mask_16x16_1, uint64_t mask_8x8_1, uint64_t mask_4x4_1,
1219 const loop_filter_info_n *lfi_n, uint8_t *lfl, uint8_t *lfl2, int bd) {
1220 uint64_t mask;
1221 const int step = 1 << subsampling_factor;
1222
1223 for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_16x16_1 |
1224 mask_8x8_1 | mask_4x4_1;
1225 mask; mask >>= step) {
1226 const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
1227 const loop_filter_thresh *lfi1 = lfi_n->lfthr + *lfl2;
1228
1229 if (mask & 1) {
1230 if ((mask_16x16_0 | mask_16x16_1) & 1) {
1231 // chroma plane filters less pixels introduced in deblock_13tap
1232 // experiment
1233 HbdLpfFunc highbd_lpf_vertical =
1234 plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_14;
1235
1236 if ((mask_16x16_0 & mask_16x16_1) & 1) {
1237 if (plane) {
1238 aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1239 lfi0->hev_thr, lfi1->mblim,
1240 lfi1->lim, lfi1->hev_thr, bd);
1241 } else {
1242 aom_highbd_lpf_vertical_14_dual(s, pitch, lfi0->mblim, lfi0->lim,
1243 lfi0->hev_thr, lfi1->mblim,
1244 lfi1->lim, lfi1->hev_thr, bd);
1245 }
1246 } else if (mask_16x16_0 & 1) {
1247 highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1248 bd);
1249 } else {
1250 highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1251 lfi1->hev_thr, bd);
1252 }
1253 }
1254
1255 if ((mask_8x8_0 | mask_8x8_1) & 1) {
1256 HbdLpfFunc highbd_lpf_vertical =
1257 plane ? aom_highbd_lpf_vertical_6 : aom_highbd_lpf_vertical_8;
1258
1259 if ((mask_8x8_0 & mask_8x8_1) & 1) {
1260 if (plane) {
1261 aom_highbd_lpf_vertical_6_dual(s, pitch, lfi0->mblim, lfi0->lim,
1262 lfi0->hev_thr, lfi1->mblim,
1263 lfi1->lim, lfi1->hev_thr, bd);
1264 } else {
1265 aom_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
1266 lfi0->hev_thr, lfi1->mblim,
1267 lfi1->lim, lfi1->hev_thr, bd);
1268 }
1269 } else if (mask_8x8_0 & 1) {
1270 highbd_lpf_vertical(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
1271 bd);
1272 } else {
1273 highbd_lpf_vertical(s + 4 * pitch, pitch, lfi1->mblim, lfi1->lim,
1274 lfi1->hev_thr, bd);
1275 }
1276 }
1277
1278 if ((mask_4x4_0 | mask_4x4_1) & 1) {
1279 if ((mask_4x4_0 & mask_4x4_1) & 1) {
1280 aom_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
1281 lfi0->hev_thr, lfi1->mblim, lfi1->lim,
1282 lfi1->hev_thr, bd);
1283 } else if (mask_4x4_0 & 1) {
1284 aom_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
1285 lfi0->hev_thr, bd);
1286 } else {
1287 aom_highbd_lpf_vertical_4(s + 4 * pitch, pitch, lfi1->mblim,
1288 lfi1->lim, lfi1->hev_thr, bd);
1289 }
1290 }
1291 }
1292
1293 s += 4;
1294 lfl += step;
1295 lfl2 += step;
1296 mask_16x16_0 >>= step;
1297 mask_8x8_0 >>= step;
1298 mask_4x4_0 >>= step;
1299 mask_16x16_1 >>= step;
1300 mask_8x8_1 >>= step;
1301 mask_4x4_1 >>= step;
1302 }
1303 }
1304
filter_selectively_horiz(uint8_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,const uint8_t * lfl)1305 static void filter_selectively_horiz(uint8_t *s, int pitch, int plane,
1306 int subsampling, uint64_t mask_16x16,
1307 uint64_t mask_8x8, uint64_t mask_4x4,
1308 const loop_filter_info_n *lfi_n,
1309 const uint8_t *lfl) {
1310 uint64_t mask;
1311 int count;
1312 const int step = 1 << subsampling;
1313 const unsigned int two_block_mask = subsampling ? 5 : 3;
1314
1315 for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
1316 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1317 // Next block's thresholds.
1318 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step);
1319
1320 count = 1;
1321 if (mask & 1) {
1322 if (mask_16x16 & 1) {
1323 // chroma plane filters less pixels introduced in deblock_13tap
1324 // experiment
1325 LpfFunc lpf_horizontal =
1326 plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_14;
1327
1328 if ((mask_16x16 & two_block_mask) == two_block_mask) {
1329 if (plane) {
1330 aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
1331 lfi->hev_thr, lfin->mblim, lfin->lim,
1332 lfin->hev_thr);
1333 } else {
1334 aom_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
1335 lfi->hev_thr, lfin->mblim, lfin->lim,
1336 lfin->hev_thr);
1337 }
1338 count = 2;
1339 } else {
1340 lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1341 }
1342 } else if (mask_8x8 & 1) {
1343 // chroma plane filters less pixels introduced in deblock_13tap
1344 // experiment
1345 LpfFunc lpf_horizontal =
1346 plane ? aom_lpf_horizontal_6 : aom_lpf_horizontal_8;
1347
1348 if ((mask_8x8 & two_block_mask) == two_block_mask) {
1349 if (plane) {
1350 aom_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
1351 lfi->hev_thr, lfin->mblim, lfin->lim,
1352 lfin->hev_thr);
1353 } else {
1354 aom_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1355 lfi->hev_thr, lfin->mblim, lfin->lim,
1356 lfin->hev_thr);
1357 }
1358 count = 2;
1359 } else {
1360 lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1361 }
1362 } else if (mask_4x4 & 1) {
1363 if ((mask_4x4 & two_block_mask) == two_block_mask) {
1364 aom_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1365 lfi->hev_thr, lfin->mblim, lfin->lim,
1366 lfin->hev_thr);
1367 count = 2;
1368 } else {
1369 aom_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1370 }
1371 }
1372 }
1373
1374 s += 4 * count;
1375 lfl += step * count;
1376 mask_16x16 >>= step * count;
1377 mask_8x8 >>= step * count;
1378 mask_4x4 >>= step * count;
1379 }
1380 }
1381
highbd_filter_selectively_horiz(uint16_t * s,int pitch,int plane,int subsampling,uint64_t mask_16x16,uint64_t mask_8x8,uint64_t mask_4x4,const loop_filter_info_n * lfi_n,uint8_t * lfl,int bd)1382 static void highbd_filter_selectively_horiz(
1383 uint16_t *s, int pitch, int plane, int subsampling, uint64_t mask_16x16,
1384 uint64_t mask_8x8, uint64_t mask_4x4, const loop_filter_info_n *lfi_n,
1385 uint8_t *lfl, int bd) {
1386 uint64_t mask;
1387 int count;
1388 const int step = 1 << subsampling;
1389 const unsigned int two_block_mask = subsampling ? 5 : 3;
1390
1391 for (mask = mask_16x16 | mask_8x8 | mask_4x4; mask; mask >>= step * count) {
1392 const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
1393 // Next block's thresholds.
1394 const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + step);
1395
1396 count = 1;
1397 if (mask & 1) {
1398 if (mask_16x16 & 1) {
1399 HbdLpfFunc highbd_lpf_horizontal =
1400 plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_14;
1401
1402 if ((mask_16x16 & two_block_mask) == two_block_mask) {
1403 if (plane) {
1404 aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
1405 lfi->hev_thr, lfin->mblim,
1406 lfin->lim, lfin->hev_thr, bd);
1407 } else {
1408 aom_highbd_lpf_horizontal_14_dual(s, pitch, lfi->mblim, lfi->lim,
1409 lfi->hev_thr, lfin->mblim,
1410 lfin->lim, lfin->hev_thr, bd);
1411 }
1412 count = 2;
1413 } else {
1414 highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1415 bd);
1416 }
1417 } else if (mask_8x8 & 1) {
1418 HbdLpfFunc highbd_lpf_horizontal =
1419 plane ? aom_highbd_lpf_horizontal_6 : aom_highbd_lpf_horizontal_8;
1420
1421 if ((mask_8x8 & two_block_mask) == two_block_mask) {
1422 if (plane) {
1423 aom_highbd_lpf_horizontal_6_dual(s, pitch, lfi->mblim, lfi->lim,
1424 lfi->hev_thr, lfin->mblim,
1425 lfin->lim, lfin->hev_thr, bd);
1426 } else {
1427 aom_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
1428 lfi->hev_thr, lfin->mblim,
1429 lfin->lim, lfin->hev_thr, bd);
1430 }
1431 count = 2;
1432 } else {
1433 highbd_lpf_horizontal(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
1434 bd);
1435 }
1436 } else if (mask_4x4 & 1) {
1437 if ((mask_4x4 & two_block_mask) == two_block_mask) {
1438 aom_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
1439 lfi->hev_thr, lfin->mblim, lfin->lim,
1440 lfin->hev_thr, bd);
1441 count = 2;
1442 } else {
1443 aom_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
1444 lfi->hev_thr, bd);
1445 }
1446 }
1447 }
1448
1449 s += 4 * count;
1450 lfl += step * count;
1451 mask_16x16 >>= step * count;
1452 mask_8x8 >>= step * count;
1453 mask_4x4 >>= step * count;
1454 }
1455 }
1456
av1_build_bitmask_vert_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)1457 void av1_build_bitmask_vert_info(
1458 AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
1459 int plane) {
1460 const int subsampling_x = plane_ptr->subsampling_x;
1461 const int subsampling_y = plane_ptr->subsampling_y;
1462 const int row_step = (MI_SIZE >> MI_SIZE_LOG2);
1463 const int is_uv = plane > 0;
1464 TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
1465 uint8_t level, prev_level = 1;
1466 int skip, prev_skip = 0;
1467 int is_coding_block_border;
1468
1469 for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height; r += row_step) {
1470 const int mi_row = r << subsampling_y;
1471 const int row = mi_row % MI_SIZE_64X64;
1472 int index = 0;
1473 const int shift = get_index_shift(0, row, &index);
1474
1475 for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width;
1476 c += (tx_size_wide_unit[TX_64X64] >> subsampling_x)) {
1477 const int mi_col = c << subsampling_x;
1478 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1479
1480 for (int col_in_unit = 0;
1481 col_in_unit < (tx_size_wide_unit[TX_64X64] >> subsampling_x);) {
1482 const int x = (c + col_in_unit) << MI_SIZE_LOG2;
1483 if (x >= plane_ptr->dst.width) break;
1484 const int col = col_in_unit << subsampling_x;
1485 const uint64_t mask = ((uint64_t)1 << (shift | col));
1486 skip = lfm->skip.bits[index] & mask;
1487 is_coding_block_border = lfm->is_vert_border.bits[index] & mask;
1488 switch (plane) {
1489 case 0: level = lfm->lfl_y_ver[row][col]; break;
1490 case 1: level = lfm->lfl_u[row][col]; break;
1491 case 2: level = lfm->lfl_v[row][col]; break;
1492 default: assert(plane >= 0 && plane <= 2); return;
1493 }
1494 for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
1495 if (is_uv && ts == TX_64X64) continue;
1496 if (lfm->tx_size_ver[is_uv][ts].bits[index] & mask) {
1497 tx_size = ts;
1498 break;
1499 }
1500 }
1501 if ((c + col_in_unit > 0) && (level || prev_level) &&
1502 (!prev_skip || !skip || is_coding_block_border)) {
1503 const TX_SIZE min_tx_size =
1504 AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
1505 const int tmp_row = (mi_row | subsampling_y) % MI_SIZE_64X64;
1506 const int tmp_col = (col | subsampling_x) % MI_SIZE_64X64;
1507 const int shift_1 = get_index_shift(tmp_col, tmp_row, &index);
1508 const uint64_t mask_1 = ((uint64_t)1 << shift_1);
1509 switch (plane) {
1510 case 0: lfm->left_y[min_tx_size].bits[index] |= mask_1; break;
1511 case 1: lfm->left_u[min_tx_size].bits[index] |= mask_1; break;
1512 case 2: lfm->left_v[min_tx_size].bits[index] |= mask_1; break;
1513 default: assert(plane >= 0 && plane <= 2); return;
1514 }
1515 }
1516
1517 // update prev info
1518 prev_level = level;
1519 prev_skip = skip;
1520 prev_tx_size = tx_size;
1521 // advance
1522 col_in_unit += tx_size_wide_unit[tx_size];
1523 }
1524 }
1525 }
1526 }
1527
av1_build_bitmask_horz_info(AV1_COMMON * const cm,const struct macroblockd_plane * const plane_ptr,int plane)1528 void av1_build_bitmask_horz_info(
1529 AV1_COMMON *const cm, const struct macroblockd_plane *const plane_ptr,
1530 int plane) {
1531 const int subsampling_x = plane_ptr->subsampling_x;
1532 const int subsampling_y = plane_ptr->subsampling_y;
1533 const int col_step = (MI_SIZE >> MI_SIZE_LOG2);
1534 const int is_uv = plane > 0;
1535 TX_SIZE tx_size = TX_16X16, prev_tx_size = TX_16X16;
1536 uint8_t level, prev_level = 1;
1537 int skip, prev_skip = 0;
1538 int is_coding_block_border;
1539
1540 for (int c = 0; (c << MI_SIZE_LOG2) < plane_ptr->dst.width; c += col_step) {
1541 const int mi_col = c << subsampling_x;
1542 const int col = mi_col % MI_SIZE_64X64;
1543
1544 for (int r = 0; (r << MI_SIZE_LOG2) < plane_ptr->dst.height;
1545 r += (tx_size_high_unit[TX_64X64] >> subsampling_y)) {
1546 const int mi_row = r << subsampling_y;
1547 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1548
1549 for (int r_in_unit = 0;
1550 r_in_unit < (tx_size_high_unit[TX_64X64] >> subsampling_y);) {
1551 const int y = (r + r_in_unit) << MI_SIZE_LOG2;
1552 if (y >= plane_ptr->dst.height) break;
1553 const int row = r_in_unit << subsampling_y;
1554 int index = 0;
1555 const int shift = get_index_shift(col, row, &index);
1556 const uint64_t mask = ((uint64_t)1 << shift);
1557 skip = lfm->skip.bits[index] & mask;
1558 is_coding_block_border = lfm->is_horz_border.bits[index] & mask;
1559 switch (plane) {
1560 case 0: level = lfm->lfl_y_hor[row][col]; break;
1561 case 1: level = lfm->lfl_u[row][col]; break;
1562 case 2: level = lfm->lfl_v[row][col]; break;
1563 default: assert(plane >= 0 && plane <= 2); return;
1564 }
1565 for (TX_SIZE ts = TX_4X4; ts <= TX_64X64; ++ts) {
1566 if (is_uv && ts == TX_64X64) continue;
1567 if (lfm->tx_size_hor[is_uv][ts].bits[index] & mask) {
1568 tx_size = ts;
1569 break;
1570 }
1571 }
1572 if ((r + r_in_unit > 0) && (level || prev_level) &&
1573 (!prev_skip || !skip || is_coding_block_border)) {
1574 const TX_SIZE min_tx_size =
1575 AOMMIN(TX_16X16, AOMMIN(tx_size, prev_tx_size));
1576 const int tmp_row = (row | subsampling_y) % MI_SIZE_64X64;
1577 const int tmp_col = (mi_col | subsampling_x) % MI_SIZE_64X64;
1578 const int shift_1 = get_index_shift(tmp_col, tmp_row, &index);
1579 const uint64_t mask_1 = ((uint64_t)1 << shift_1);
1580
1581 switch (plane) {
1582 case 0: lfm->above_y[min_tx_size].bits[index] |= mask_1; break;
1583 case 1: lfm->above_u[min_tx_size].bits[index] |= mask_1; break;
1584 case 2: lfm->above_v[min_tx_size].bits[index] |= mask_1; break;
1585 default: assert(plane >= 0 && plane <= 2); return;
1586 }
1587 }
1588
1589 // update prev info
1590 prev_level = level;
1591 prev_skip = skip;
1592 prev_tx_size = tx_size;
1593 // advance
1594 r_in_unit += tx_size_high_unit[tx_size];
1595 }
1596 }
1597 }
1598 }
1599
av1_filter_block_plane_bitmask_vert(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1600 void av1_filter_block_plane_bitmask_vert(
1601 AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
1602 int mi_row, int mi_col) {
1603 struct buf_2d *const dst = &plane_ptr->dst;
1604 uint8_t *const buf0 = dst->buf;
1605 const int ssx = plane_ptr->subsampling_x;
1606 const int ssy = plane_ptr->subsampling_y;
1607 const int mask_cutoff = 0xffff;
1608 const int row_step = 1 << ssy;
1609 const int two_row_step = 2 << ssy;
1610 const int row_stride = dst->stride << MI_SIZE_LOG2;
1611 const int two_row_stride = row_stride << 1;
1612 uint64_t mask_16x16 = 0;
1613 uint64_t mask_8x8 = 0;
1614 uint64_t mask_4x4 = 0;
1615 uint8_t *lfl;
1616 uint8_t *lfl2;
1617 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1618 assert(lfm);
1619
1620 // 1. vertical filtering. filter two rows at a time
1621 for (int r = 0;
1622 ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
1623 r += two_row_step) {
1624 const int row = r | ssy;
1625 const int row_next = row + row_step;
1626 const int col = ssx;
1627 int index = 0;
1628 const int shift = get_index_shift(col, row, &index);
1629 int index_next = 0;
1630 const int shift_next = get_index_shift(col, row_next, &index_next);
1631 switch (pl) {
1632 case 0:
1633 mask_16x16 = lfm->left_y[TX_16X16].bits[index];
1634 mask_8x8 = lfm->left_y[TX_8X8].bits[index];
1635 mask_4x4 = lfm->left_y[TX_4X4].bits[index];
1636 lfl = &lfm->lfl_y_ver[row][col];
1637 lfl2 = &lfm->lfl_y_ver[row_next][col];
1638 break;
1639 case 1:
1640 mask_16x16 = lfm->left_u[TX_16X16].bits[index];
1641 mask_8x8 = lfm->left_u[TX_8X8].bits[index];
1642 mask_4x4 = lfm->left_u[TX_4X4].bits[index];
1643 lfl = &lfm->lfl_u[row][col];
1644 lfl2 = &lfm->lfl_u[row_next][col];
1645 break;
1646 case 2:
1647 mask_16x16 = lfm->left_v[TX_16X16].bits[index];
1648 mask_8x8 = lfm->left_v[TX_8X8].bits[index];
1649 mask_4x4 = lfm->left_v[TX_4X4].bits[index];
1650 lfl = &lfm->lfl_v[row][col];
1651 lfl2 = &lfm->lfl_v[row_next][col];
1652 break;
1653 default: assert(pl >= 0 && pl <= 2); return;
1654 }
1655 uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
1656 uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
1657 uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
1658 uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
1659 uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
1660 uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
1661
1662 if (cm->seq_params.use_highbitdepth)
1663 highbd_filter_selectively_vert_row2(
1664 ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1665 mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1666 &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1667 else
1668 filter_selectively_vert_row2(
1669 ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
1670 mask_16x16_1, mask_8x8_1, mask_4x4_1, &cm->lf_info, lfl, lfl2);
1671 dst->buf += two_row_stride;
1672 }
1673 // reset buf pointer for horizontal filtering
1674 dst->buf = buf0;
1675 }
1676
av1_filter_block_plane_bitmask_horz(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1677 void av1_filter_block_plane_bitmask_horz(
1678 AV1_COMMON *const cm, struct macroblockd_plane *const plane_ptr, int pl,
1679 int mi_row, int mi_col) {
1680 struct buf_2d *const dst = &plane_ptr->dst;
1681 uint8_t *const buf0 = dst->buf;
1682 const int ssx = plane_ptr->subsampling_x;
1683 const int ssy = plane_ptr->subsampling_y;
1684 const int mask_cutoff = 0xffff;
1685 const int row_step = 1 << ssy;
1686 const int row_stride = dst->stride << MI_SIZE_LOG2;
1687 uint64_t mask_16x16 = 0;
1688 uint64_t mask_8x8 = 0;
1689 uint64_t mask_4x4 = 0;
1690 uint8_t *lfl;
1691 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col);
1692 assert(lfm);
1693 for (int r = 0;
1694 ((mi_row + r) << MI_SIZE_LOG2) < cm->height && r < MI_SIZE_64X64;
1695 r += row_step) {
1696 if (mi_row + r == 0) {
1697 dst->buf += row_stride;
1698 continue;
1699 }
1700 const int row = r | ssy;
1701 const int col = ssx;
1702 int index = 0;
1703 const int shift = get_index_shift(col, row, &index);
1704 switch (pl) {
1705 case 0:
1706 mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1707 mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1708 mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1709 lfl = &lfm->lfl_y_hor[row][col];
1710 break;
1711 case 1:
1712 mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1713 mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1714 mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1715 lfl = &lfm->lfl_u[row][col];
1716 break;
1717 case 2:
1718 mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1719 mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1720 mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1721 lfl = &lfm->lfl_v[row][col];
1722 break;
1723 default: assert(pl >= 0 && pl <= 2); return;
1724 }
1725 mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1726 mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1727 mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1728
1729 if (cm->seq_params.use_highbitdepth)
1730 highbd_filter_selectively_horiz(
1731 CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
1732 mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth);
1733 else
1734 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1735 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1736 dst->buf += row_stride;
1737 }
1738 // reset buf pointer for next block
1739 dst->buf = buf0;
1740 }
1741
av1_filter_block_plane_ver(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1742 void av1_filter_block_plane_ver(AV1_COMMON *const cm,
1743 struct macroblockd_plane *const plane_ptr,
1744 int pl, int mi_row, int mi_col) {
1745 struct buf_2d *const dst = &plane_ptr->dst;
1746 int r, c;
1747 const int ssx = plane_ptr->subsampling_x;
1748 const int ssy = plane_ptr->subsampling_y;
1749 const int mask_cutoff = 0xffff;
1750 const int single_step = 1 << ssy;
1751 const int r_step = 2 << ssy;
1752 uint64_t mask_16x16 = 0;
1753 uint64_t mask_8x8 = 0;
1754 uint64_t mask_4x4 = 0;
1755 uint8_t *lfl;
1756 uint8_t *lfl2;
1757
1758 // filter two rows at a time
1759 for (r = 0; r < cm->seq_params.mib_size &&
1760 ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1761 r += r_step) {
1762 for (c = 0; c < cm->seq_params.mib_size &&
1763 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1764 c += MI_SIZE_64X64) {
1765 dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1766 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1767 assert(lfm);
1768 const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1769 const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1770 int index = 0;
1771 const int shift = get_index_shift(col, row, &index);
1772 // current and next row should belong to the same mask_idx and index
1773 // next row's shift
1774 const int row_next = row + single_step;
1775 int index_next = 0;
1776 const int shift_next = get_index_shift(col, row_next, &index_next);
1777 switch (pl) {
1778 case 0:
1779 mask_16x16 = lfm->left_y[TX_16X16].bits[index];
1780 mask_8x8 = lfm->left_y[TX_8X8].bits[index];
1781 mask_4x4 = lfm->left_y[TX_4X4].bits[index];
1782 lfl = &lfm->lfl_y_ver[row][col];
1783 lfl2 = &lfm->lfl_y_ver[row_next][col];
1784 break;
1785 case 1:
1786 mask_16x16 = lfm->left_u[TX_16X16].bits[index];
1787 mask_8x8 = lfm->left_u[TX_8X8].bits[index];
1788 mask_4x4 = lfm->left_u[TX_4X4].bits[index];
1789 lfl = &lfm->lfl_u[row][col];
1790 lfl2 = &lfm->lfl_u[row_next][col];
1791 break;
1792 case 2:
1793 mask_16x16 = lfm->left_v[TX_16X16].bits[index];
1794 mask_8x8 = lfm->left_v[TX_8X8].bits[index];
1795 mask_4x4 = lfm->left_v[TX_4X4].bits[index];
1796 lfl = &lfm->lfl_v[row][col];
1797 lfl2 = &lfm->lfl_v[row_next][col];
1798 break;
1799 default: assert(pl >= 0 && pl <= 2); return;
1800 }
1801 uint64_t mask_16x16_0 = (mask_16x16 >> shift) & mask_cutoff;
1802 uint64_t mask_8x8_0 = (mask_8x8 >> shift) & mask_cutoff;
1803 uint64_t mask_4x4_0 = (mask_4x4 >> shift) & mask_cutoff;
1804 uint64_t mask_16x16_1 = (mask_16x16 >> shift_next) & mask_cutoff;
1805 uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
1806 uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
1807
1808 if (cm->seq_params.use_highbitdepth)
1809 highbd_filter_selectively_vert_row2(
1810 ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
1811 mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
1812 &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
1813 else
1814 filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
1815 mask_16x16_0, mask_8x8_0, mask_4x4_0,
1816 mask_16x16_1, mask_8x8_1, mask_4x4_1,
1817 &cm->lf_info, lfl, lfl2);
1818 dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1819 }
1820 dst->buf += 2 * MI_SIZE * dst->stride;
1821 }
1822 }
1823
av1_filter_block_plane_hor(AV1_COMMON * const cm,struct macroblockd_plane * const plane_ptr,int pl,int mi_row,int mi_col)1824 void av1_filter_block_plane_hor(AV1_COMMON *const cm,
1825 struct macroblockd_plane *const plane_ptr,
1826 int pl, int mi_row, int mi_col) {
1827 struct buf_2d *const dst = &plane_ptr->dst;
1828 int r, c;
1829 const int ssx = plane_ptr->subsampling_x;
1830 const int ssy = plane_ptr->subsampling_y;
1831 const int mask_cutoff = 0xffff;
1832 const int r_step = 1 << ssy;
1833 uint64_t mask_16x16 = 0;
1834 uint64_t mask_8x8 = 0;
1835 uint64_t mask_4x4 = 0;
1836 uint8_t *lfl;
1837
1838 for (r = 0; r < cm->seq_params.mib_size &&
1839 ((mi_row + r) << MI_SIZE_LOG2 < cm->height);
1840 r += r_step) {
1841 for (c = 0; c < cm->seq_params.mib_size &&
1842 ((mi_col + c) << MI_SIZE_LOG2 < cm->width);
1843 c += MI_SIZE_64X64) {
1844 if (mi_row + r == 0) continue;
1845
1846 dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
1847 LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row + r, mi_col + c);
1848 assert(lfm);
1849 const int row = ((mi_row + r) | ssy) % MI_SIZE_64X64;
1850 const int col = ((mi_col + c) | ssx) % MI_SIZE_64X64;
1851 int index = 0;
1852 const int shift = get_index_shift(col, row, &index);
1853 switch (pl) {
1854 case 0:
1855 mask_16x16 = lfm->above_y[TX_16X16].bits[index];
1856 mask_8x8 = lfm->above_y[TX_8X8].bits[index];
1857 mask_4x4 = lfm->above_y[TX_4X4].bits[index];
1858 lfl = &lfm->lfl_y_hor[row][col];
1859 break;
1860 case 1:
1861 mask_16x16 = lfm->above_u[TX_16X16].bits[index];
1862 mask_8x8 = lfm->above_u[TX_8X8].bits[index];
1863 mask_4x4 = lfm->above_u[TX_4X4].bits[index];
1864 lfl = &lfm->lfl_u[row][col];
1865 break;
1866 case 2:
1867 mask_16x16 = lfm->above_v[TX_16X16].bits[index];
1868 mask_8x8 = lfm->above_v[TX_8X8].bits[index];
1869 mask_4x4 = lfm->above_v[TX_4X4].bits[index];
1870 lfl = &lfm->lfl_v[row][col];
1871 break;
1872 default: assert(pl >= 0 && pl <= 2); return;
1873 }
1874 mask_16x16 = (mask_16x16 >> shift) & mask_cutoff;
1875 mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
1876 mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
1877
1878 if (cm->seq_params.use_highbitdepth)
1879 highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
1880 dst->stride, pl, ssx, mask_16x16,
1881 mask_8x8, mask_4x4, &cm->lf_info, lfl,
1882 (int)cm->seq_params.bit_depth);
1883 else
1884 filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
1885 mask_8x8, mask_4x4, &cm->lf_info, lfl);
1886 dst->buf -= ((c << MI_SIZE_LOG2) >> ssx);
1887 }
1888 dst->buf += MI_SIZE * dst->stride;
1889 }
1890 }
1891 #endif // LOOP_FILTER_BITMASK
1892
get_transform_size(const MACROBLOCKD * const xd,const MB_MODE_INFO * const mbmi,const EDGE_DIR edge_dir,const int mi_row,const int mi_col,const int plane,const struct macroblockd_plane * plane_ptr)1893 static TX_SIZE get_transform_size(const MACROBLOCKD *const xd,
1894 const MB_MODE_INFO *const mbmi,
1895 const EDGE_DIR edge_dir, const int mi_row,
1896 const int mi_col, const int plane,
1897 const struct macroblockd_plane *plane_ptr) {
1898 assert(mbmi != NULL);
1899 if (xd && xd->lossless[mbmi->segment_id]) return TX_4X4;
1900
1901 TX_SIZE tx_size =
1902 (plane == AOM_PLANE_Y)
1903 ? mbmi->tx_size
1904 : av1_get_max_uv_txsize(mbmi->sb_type, plane_ptr->subsampling_x,
1905 plane_ptr->subsampling_y);
1906 assert(tx_size < TX_SIZES_ALL);
1907 if ((plane == AOM_PLANE_Y) && is_inter_block(mbmi) && !mbmi->skip) {
1908 const BLOCK_SIZE sb_type = mbmi->sb_type;
1909 const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
1910 const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
1911 const TX_SIZE mb_tx_size =
1912 mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
1913 assert(mb_tx_size < TX_SIZES_ALL);
1914 tx_size = mb_tx_size;
1915 }
1916
1917 // since in case of chrominance or non-square transorm need to convert
1918 // transform size into transform size in particular direction.
1919 // for vertical edge, filter direction is horizontal, for horizontal
1920 // edge, filter direction is vertical.
1921 tx_size = (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size]
1922 : txsize_vert_map[tx_size];
1923 return tx_size;
1924 }
1925
1926 typedef struct AV1_DEBLOCKING_PARAMETERS {
1927 // length of the filter applied to the outer edge
1928 uint32_t filter_length;
1929 // deblocking limits
1930 const uint8_t *lim;
1931 const uint8_t *mblim;
1932 const uint8_t *hev_thr;
1933 } AV1_DEBLOCKING_PARAMETERS;
1934
1935 // Return TX_SIZE from get_transform_size(), so it is plane and direction
1936 // awared
set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS * const params,const ptrdiff_t mode_step,const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const EDGE_DIR edge_dir,const uint32_t x,const uint32_t y,const int plane,const struct macroblockd_plane * const plane_ptr)1937 static TX_SIZE set_lpf_parameters(
1938 AV1_DEBLOCKING_PARAMETERS *const params, const ptrdiff_t mode_step,
1939 const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
1940 const EDGE_DIR edge_dir, const uint32_t x, const uint32_t y,
1941 const int plane, const struct macroblockd_plane *const plane_ptr) {
1942 // reset to initial values
1943 params->filter_length = 0;
1944
1945 // no deblocking is required
1946 const uint32_t width = plane_ptr->dst.width;
1947 const uint32_t height = plane_ptr->dst.height;
1948 if ((width <= x) || (height <= y)) {
1949 // just return the smallest transform unit size
1950 return TX_4X4;
1951 }
1952
1953 const uint32_t scale_horz = plane_ptr->subsampling_x;
1954 const uint32_t scale_vert = plane_ptr->subsampling_y;
1955 // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
1956 // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
1957 // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
1958 // and mi_col should be odd number for chroma plane.
1959 const int mi_row = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
1960 const int mi_col = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
1961 MB_MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
1962 const MB_MODE_INFO *mbmi = mi[0];
1963 // If current mbmi is not correctly setup, return an invalid value to stop
1964 // filtering. One example is that if this tile is not coded, then its mbmi
1965 // it not set up.
1966 if (mbmi == NULL) return TX_INVALID;
1967
1968 const TX_SIZE ts =
1969 get_transform_size(xd, mi[0], edge_dir, mi_row, mi_col, plane, plane_ptr);
1970
1971 {
1972 const uint32_t coord = (VERT_EDGE == edge_dir) ? (x) : (y);
1973 const uint32_t transform_masks =
1974 edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1 : tx_size_high[ts] - 1;
1975 const int32_t tu_edge = (coord & transform_masks) ? (0) : (1);
1976
1977 if (!tu_edge) return ts;
1978
1979 // prepare outer edge parameters. deblock the edge if it's an edge of a TU
1980 {
1981 const uint32_t curr_level =
1982 get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
1983 const int curr_skipped = mbmi->skip && is_inter_block(mbmi);
1984 uint32_t level = curr_level;
1985 if (coord) {
1986 {
1987 const MB_MODE_INFO *const mi_prev = *(mi - mode_step);
1988 if (mi_prev == NULL) return TX_INVALID;
1989 const int pv_row =
1990 (VERT_EDGE == edge_dir) ? (mi_row) : (mi_row - (1 << scale_vert));
1991 const int pv_col =
1992 (VERT_EDGE == edge_dir) ? (mi_col - (1 << scale_horz)) : (mi_col);
1993 const TX_SIZE pv_ts = get_transform_size(
1994 xd, mi_prev, edge_dir, pv_row, pv_col, plane, plane_ptr);
1995
1996 const uint32_t pv_lvl =
1997 get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
1998
1999 const int pv_skip = mi_prev->skip && is_inter_block(mi_prev);
2000 const BLOCK_SIZE bsize =
2001 get_plane_block_size(mbmi->sb_type, plane_ptr->subsampling_x,
2002 plane_ptr->subsampling_y);
2003 const int prediction_masks = edge_dir == VERT_EDGE
2004 ? block_size_wide[bsize] - 1
2005 : block_size_high[bsize] - 1;
2006 const int32_t pu_edge = !(coord & prediction_masks);
2007 // if the current and the previous blocks are skipped,
2008 // deblock the edge if the edge belongs to a PU's edge only.
2009 if ((curr_level || pv_lvl) &&
2010 (!pv_skip || !curr_skipped || pu_edge)) {
2011 const TX_SIZE min_ts = AOMMIN(ts, pv_ts);
2012 if (TX_4X4 >= min_ts) {
2013 params->filter_length = 4;
2014 } else if (TX_8X8 == min_ts) {
2015 if (plane != 0)
2016 params->filter_length = 6;
2017 else
2018 params->filter_length = 8;
2019 } else {
2020 params->filter_length = 14;
2021 // No wide filtering for chroma plane
2022 if (plane != 0) {
2023 params->filter_length = 6;
2024 }
2025 }
2026
2027 // update the level if the current block is skipped,
2028 // but the previous one is not
2029 level = (curr_level) ? (curr_level) : (pv_lvl);
2030 }
2031 }
2032 }
2033 // prepare common parameters
2034 if (params->filter_length) {
2035 const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
2036 params->lim = limits->lim;
2037 params->mblim = limits->mblim;
2038 params->hev_thr = limits->hev_thr;
2039 }
2040 }
2041 }
2042
2043 return ts;
2044 }
2045
av1_filter_block_plane_vert(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)2046 void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
2047 const MACROBLOCKD *const xd, const int plane,
2048 const MACROBLOCKD_PLANE *const plane_ptr,
2049 const uint32_t mi_row, const uint32_t mi_col) {
2050 const int row_step = MI_SIZE >> MI_SIZE_LOG2;
2051 const uint32_t scale_horz = plane_ptr->subsampling_x;
2052 const uint32_t scale_vert = plane_ptr->subsampling_y;
2053 uint8_t *const dst_ptr = plane_ptr->dst.buf;
2054 const int dst_stride = plane_ptr->dst.stride;
2055 const int y_range = (MAX_MIB_SIZE >> scale_vert);
2056 const int x_range = (MAX_MIB_SIZE >> scale_horz);
2057 const int use_highbitdepth = cm->seq_params.use_highbitdepth;
2058 const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
2059 for (int y = 0; y < y_range; y += row_step) {
2060 uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
2061 for (int x = 0; x < x_range;) {
2062 // inner loop always filter vertical edges in a MI block. If MI size
2063 // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
2064 // If 4x4 trasnform is used, it will then filter the internal edge
2065 // aligned with a 4x4 block
2066 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
2067 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
2068 uint32_t advance_units;
2069 TX_SIZE tx_size;
2070 AV1_DEBLOCKING_PARAMETERS params;
2071 memset(¶ms, 0, sizeof(params));
2072
2073 tx_size =
2074 set_lpf_parameters(¶ms, ((ptrdiff_t)1 << scale_horz), cm, xd,
2075 VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
2076 if (tx_size == TX_INVALID) {
2077 params.filter_length = 0;
2078 tx_size = TX_4X4;
2079 }
2080
2081 switch (params.filter_length) {
2082 // apply 4-tap filtering
2083 case 4:
2084 if (use_highbitdepth)
2085 aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
2086 params.mblim, params.lim, params.hev_thr,
2087 bit_depth);
2088 else
2089 aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
2090 params.hev_thr);
2091 break;
2092 case 6: // apply 6-tap filter for chroma plane only
2093 assert(plane != 0);
2094 if (use_highbitdepth)
2095 aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride,
2096 params.mblim, params.lim, params.hev_thr,
2097 bit_depth);
2098 else
2099 aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
2100 params.hev_thr);
2101 break;
2102 // apply 8-tap filtering
2103 case 8:
2104 if (use_highbitdepth)
2105 aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
2106 params.mblim, params.lim, params.hev_thr,
2107 bit_depth);
2108 else
2109 aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
2110 params.hev_thr);
2111 break;
2112 // apply 14-tap filtering
2113 case 14:
2114 if (use_highbitdepth)
2115 aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride,
2116 params.mblim, params.lim, params.hev_thr,
2117 bit_depth);
2118 else
2119 aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
2120 params.hev_thr);
2121 break;
2122 // no filtering
2123 default: break;
2124 }
2125 // advance the destination pointer
2126 advance_units = tx_size_wide_unit[tx_size];
2127 x += advance_units;
2128 p += advance_units * MI_SIZE;
2129 }
2130 }
2131 }
2132
av1_filter_block_plane_horz(const AV1_COMMON * const cm,const MACROBLOCKD * const xd,const int plane,const MACROBLOCKD_PLANE * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)2133 void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
2134 const MACROBLOCKD *const xd, const int plane,
2135 const MACROBLOCKD_PLANE *const plane_ptr,
2136 const uint32_t mi_row, const uint32_t mi_col) {
2137 const int col_step = MI_SIZE >> MI_SIZE_LOG2;
2138 const uint32_t scale_horz = plane_ptr->subsampling_x;
2139 const uint32_t scale_vert = plane_ptr->subsampling_y;
2140 uint8_t *const dst_ptr = plane_ptr->dst.buf;
2141 const int dst_stride = plane_ptr->dst.stride;
2142 const int y_range = (MAX_MIB_SIZE >> scale_vert);
2143 const int x_range = (MAX_MIB_SIZE >> scale_horz);
2144 const int use_highbitdepth = cm->seq_params.use_highbitdepth;
2145 const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
2146 for (int x = 0; x < x_range; x += col_step) {
2147 uint8_t *p = dst_ptr + x * MI_SIZE;
2148 for (int y = 0; y < y_range;) {
2149 // inner loop always filter vertical edges in a MI block. If MI size
2150 // is 8x8, it will first filter the vertical edge aligned with a 8x8
2151 // block. If 4x4 trasnform is used, it will then filter the internal
2152 // edge aligned with a 4x4 block
2153 const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
2154 const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
2155 uint32_t advance_units;
2156 TX_SIZE tx_size;
2157 AV1_DEBLOCKING_PARAMETERS params;
2158 memset(¶ms, 0, sizeof(params));
2159
2160 tx_size =
2161 set_lpf_parameters(¶ms, (cm->mi_stride << scale_vert), cm, xd,
2162 HORZ_EDGE, curr_x, curr_y, plane, plane_ptr);
2163 if (tx_size == TX_INVALID) {
2164 params.filter_length = 0;
2165 tx_size = TX_4X4;
2166 }
2167
2168 switch (params.filter_length) {
2169 // apply 4-tap filtering
2170 case 4:
2171 if (use_highbitdepth)
2172 aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
2173 params.mblim, params.lim,
2174 params.hev_thr, bit_depth);
2175 else
2176 aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
2177 params.hev_thr);
2178 break;
2179 // apply 6-tap filtering
2180 case 6:
2181 assert(plane != 0);
2182 if (use_highbitdepth)
2183 aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride,
2184 params.mblim, params.lim,
2185 params.hev_thr, bit_depth);
2186 else
2187 aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
2188 params.hev_thr);
2189 break;
2190 // apply 8-tap filtering
2191 case 8:
2192 if (use_highbitdepth)
2193 aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
2194 params.mblim, params.lim,
2195 params.hev_thr, bit_depth);
2196 else
2197 aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
2198 params.hev_thr);
2199 break;
2200 // apply 14-tap filtering
2201 case 14:
2202 if (use_highbitdepth)
2203 aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride,
2204 params.mblim, params.lim,
2205 params.hev_thr, bit_depth);
2206 else
2207 aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
2208 params.hev_thr);
2209 break;
2210 // no filtering
2211 default: break;
2212 }
2213
2214 // advance the destination pointer
2215 advance_units = tx_size_high_unit[tx_size];
2216 y += advance_units;
2217 p += advance_units * dst_stride * MI_SIZE;
2218 }
2219 }
2220 }
2221
loop_filter_rows(YV12_BUFFER_CONFIG * frame_buffer,AV1_COMMON * cm,MACROBLOCKD * xd,int start,int stop,int is_decoding,int plane_start,int plane_end)2222 static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
2223 MACROBLOCKD *xd, int start, int stop,
2224 #if LOOP_FILTER_BITMASK
2225 int is_decoding,
2226 #endif
2227 int plane_start, int plane_end) {
2228 struct macroblockd_plane *pd = xd->plane;
2229 const int col_start = 0;
2230 const int col_end = cm->mi_cols;
2231 int mi_row, mi_col;
2232 int plane;
2233
2234 #if LOOP_FILTER_BITMASK
2235 if (is_decoding) {
2236 for (plane = plane_start; plane < plane_end; plane++) {
2237 if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
2238 break;
2239 else if (plane == 1 && !(cm->lf.filter_level_u))
2240 continue;
2241 else if (plane == 2 && !(cm->lf.filter_level_v))
2242 continue;
2243
2244 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0,
2245 plane, plane + 1);
2246 av1_build_bitmask_vert_info(cm, &pd[plane], plane);
2247 av1_build_bitmask_horz_info(cm, &pd[plane], plane);
2248
2249 // apply loop filtering which only goes through buffer once
2250 for (mi_row = start; mi_row < stop; mi_row += MI_SIZE_64X64) {
2251 for (mi_col = col_start; mi_col < col_end; mi_col += MI_SIZE_64X64) {
2252 av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row, mi_col,
2253 plane, plane + 1);
2254 av1_filter_block_plane_bitmask_vert(cm, &pd[plane], plane, mi_row,
2255 mi_col);
2256 if (mi_col - MI_SIZE_64X64 >= 0) {
2257 av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row,
2258 mi_col - MI_SIZE_64X64, plane, plane + 1);
2259 av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row,
2260 mi_col - MI_SIZE_64X64);
2261 }
2262 }
2263 av1_setup_dst_planes(pd, MI_SIZE_64X64, frame_buffer, mi_row,
2264 mi_col - MI_SIZE_64X64, plane, plane + 1);
2265 av1_filter_block_plane_bitmask_horz(cm, &pd[plane], plane, mi_row,
2266 mi_col - MI_SIZE_64X64);
2267 }
2268 }
2269 return;
2270 }
2271 #endif
2272
2273 for (plane = plane_start; plane < plane_end; plane++) {
2274 if (plane == 0 && !(cm->lf.filter_level[0]) && !(cm->lf.filter_level[1]))
2275 break;
2276 else if (plane == 1 && !(cm->lf.filter_level_u))
2277 continue;
2278 else if (plane == 2 && !(cm->lf.filter_level_v))
2279 continue;
2280
2281 #if LOOP_FILTER_BITMASK
2282 // filter all vertical edges every superblock (could be 128x128 or 64x64)
2283 for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
2284 for (mi_col = col_start; mi_col < col_end;
2285 mi_col += cm->seq_params.mib_size) {
2286 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2287 mi_col, plane, plane + 1);
2288
2289 av1_setup_bitmask(cm, mi_row, mi_col, plane, pd[plane].subsampling_x,
2290 pd[plane].subsampling_y, stop, col_end);
2291 av1_filter_block_plane_ver(cm, &pd[plane], plane, mi_row, mi_col);
2292 }
2293 }
2294
2295 // filter all horizontal edges every superblock
2296 for (mi_row = start; mi_row < stop; mi_row += cm->seq_params.mib_size) {
2297 for (mi_col = col_start; mi_col < col_end;
2298 mi_col += cm->seq_params.mib_size) {
2299 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2300 mi_col, plane, plane + 1);
2301
2302 av1_filter_block_plane_hor(cm, &pd[plane], plane, mi_row, mi_col);
2303 }
2304 }
2305 #else
2306 if (cm->lf.combine_vert_horz_lf) {
2307 // filter all vertical and horizontal edges in every 128x128 super block
2308 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
2309 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
2310 // filter vertical edges
2311 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2312 mi_col, plane, plane + 1);
2313 av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
2314 mi_col);
2315 // filter horizontal edges
2316 if (mi_col - MAX_MIB_SIZE >= 0) {
2317 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
2318 mi_row, mi_col - MAX_MIB_SIZE, plane,
2319 plane + 1);
2320 av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
2321 mi_col - MAX_MIB_SIZE);
2322 }
2323 }
2324 // filter horizontal edges
2325 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2326 mi_col - MAX_MIB_SIZE, plane, plane + 1);
2327 av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
2328 mi_col - MAX_MIB_SIZE);
2329 }
2330 } else {
2331 // filter all vertical edges in every 128x128 super block
2332 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
2333 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
2334 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2335 mi_col, plane, plane + 1);
2336 av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
2337 mi_col);
2338 }
2339 }
2340
2341 // filter all horizontal edges in every 128x128 super block
2342 for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
2343 for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
2344 av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
2345 mi_col, plane, plane + 1);
2346 av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
2347 mi_col);
2348 }
2349 }
2350 }
2351 #endif // LOOP_FILTER_BITMASK
2352 }
2353 }
2354
av1_loop_filter_frame(YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,MACROBLOCKD * xd,int is_decoding,int plane_start,int plane_end,int partial_frame)2355 void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
2356 MACROBLOCKD *xd,
2357 #if LOOP_FILTER_BITMASK
2358 int is_decoding,
2359 #endif
2360 int plane_start, int plane_end, int partial_frame) {
2361 int start_mi_row, end_mi_row, mi_rows_to_filter;
2362
2363 start_mi_row = 0;
2364 mi_rows_to_filter = cm->mi_rows;
2365 if (partial_frame && cm->mi_rows > 8) {
2366 start_mi_row = cm->mi_rows >> 1;
2367 start_mi_row &= 0xfffffff8;
2368 mi_rows_to_filter = AOMMAX(cm->mi_rows / 8, 8);
2369 }
2370 end_mi_row = start_mi_row + mi_rows_to_filter;
2371 av1_loop_filter_frame_init(cm, plane_start, plane_end);
2372 loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row,
2373 #if LOOP_FILTER_BITMASK
2374 is_decoding,
2375 #endif
2376 plane_start, plane_end);
2377 }
2378