1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14
15 #include "./aom_dsp_rtcd.h"
16 #include "./av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_dsp/blend.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23
24 #if CONFIG_CFL
25 #include "av1/common/cfl.h"
26 #endif
27 #include "av1/common/common.h"
28 #include "av1/common/common_data.h"
29 #include "av1/common/entropy.h"
30 #include "av1/common/entropymode.h"
31 #include "av1/common/idct.h"
32 #include "av1/common/mvref_common.h"
33 #include "av1/common/obmc.h"
34 #include "av1/common/pred_common.h"
35 #include "av1/common/quant_common.h"
36 #include "av1/common/reconinter.h"
37 #include "av1/common/reconintra.h"
38 #include "av1/common/scan.h"
39 #include "av1/common/seg_common.h"
40 #if CONFIG_LV_MAP
41 #include "av1/common/txb_common.h"
42 #endif
43 #if CONFIG_WARPED_MOTION
44 #include "av1/common/warped_motion.h"
45 #endif // CONFIG_WARPED_MOTION
46
47 #include "av1/encoder/aq_variance.h"
48 #include "av1/encoder/av1_quantize.h"
49 #include "av1/encoder/cost.h"
50 #include "av1/encoder/encodemb.h"
51 #include "av1/encoder/encodemv.h"
52 #include "av1/encoder/encoder.h"
53 #if CONFIG_LV_MAP
54 #include "av1/encoder/encodetxb.h"
55 #endif
56 #include "av1/encoder/hybrid_fwd_txfm.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/palette.h"
59 #include "av1/encoder/ratectrl.h"
60 #include "av1/encoder/rd.h"
61 #include "av1/encoder/rdopt.h"
62 #include "av1/encoder/tokenize.h"
63 #if CONFIG_PVQ
64 #include "av1/encoder/pvq_encoder.h"
65 #include "av1/common/pvq.h"
66 #endif // CONFIG_PVQ
67 #if CONFIG_DUAL_FILTER
68 #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
69 #if USE_EXTRA_FILTER
70 static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
71 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
72 { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
73 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
74 };
75 #else // USE_EXTRA_FILTER
76 static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
77 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
78 { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
79 };
80 #endif // USE_EXTRA_FILTER
81 #endif // CONFIG_DUAL_FILTER
82
83 #if CONFIG_EXT_REFS
84
85 #define LAST_FRAME_MODE_MASK \
86 ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
87 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
88 (1 << ALTREF_FRAME))
89 #define LAST2_FRAME_MODE_MASK \
90 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
91 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
92 (1 << ALTREF_FRAME))
93 #define LAST3_FRAME_MODE_MASK \
94 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
95 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
96 (1 << ALTREF_FRAME))
97 #define GOLDEN_FRAME_MODE_MASK \
98 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
99 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
100 (1 << ALTREF_FRAME))
101 #define BWDREF_FRAME_MODE_MASK \
102 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
103 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF2_FRAME) | \
104 (1 << ALTREF_FRAME))
105 #define ALTREF2_FRAME_MODE_MASK \
106 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
107 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
108 (1 << ALTREF_FRAME))
109 #define ALTREF_FRAME_MODE_MASK \
110 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
111 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
112 (1 << ALTREF2_FRAME))
113
114 #else // !CONFIG_EXT_REFS
115
116 #define LAST_FRAME_MODE_MASK \
117 ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
118 #define GOLDEN_FRAME_MODE_MASK \
119 ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
120 #define ALTREF_FRAME_MODE_MASK \
121 ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
122
123 #endif // CONFIG_EXT_REFS
124
125 #if CONFIG_EXT_REFS
126 #if CONFIG_EXT_COMP_REFS
127 #define SECOND_REF_FRAME_MASK \
128 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
129 (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
130 #else // !CONFIG_EXT_COMP_REFS
131 #define SECOND_REF_FRAME_MASK \
132 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | 0x01)
133 #endif // CONFIG_EXT_COMP_REFS
134 #else // !CONFIG_EXT_REFS
135 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
136 #endif // CONFIG_EXT_REFS
137
138 #define MIN_EARLY_TERM_INDEX 3
139 #define NEW_MV_DISCOUNT_FACTOR 8
140
141 #if CONFIG_EXT_INTRA
142 #define ANGLE_SKIP_THRESH 10
143 #define FILTER_FAST_SEARCH 1
144 #endif // CONFIG_EXT_INTRA
145
146 // Setting this to 1 will disable trellis optimization within the
147 // transform search. Trellis optimization will still be applied
148 // in the final encode.
149 #ifndef DISABLE_TRELLISQ_SEARCH
150 #define DISABLE_TRELLISQ_SEARCH 0
151 #endif
152
153 static const double ADST_FLIP_SVM[8] = {
154 /* vertical */
155 -6.6623, -2.8062, -3.2531, 3.1671,
156 /* horizontal */
157 -7.7051, -3.2234, -3.6193, 3.4533
158 };
159
160 typedef struct {
161 PREDICTION_MODE mode;
162 MV_REFERENCE_FRAME ref_frame[2];
163 } MODE_DEFINITION;
164
165 typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
166
167 struct rdcost_block_args {
168 const AV1_COMP *cpi;
169 MACROBLOCK *x;
170 ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
171 ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
172 RD_STATS rd_stats;
173 int64_t this_rd;
174 int64_t best_rd;
175 int exit_early;
176 int use_fast_coef_costing;
177 };
178
179 #define LAST_NEW_MV_INDEX 6
180 static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
181 { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
182 #if CONFIG_EXT_REFS
183 { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
184 { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
185 { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
186 { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
187 #endif // CONFIG_EXT_REFS
188 { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
189 { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
190
191 { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
192
193 { NEWMV, { LAST_FRAME, NONE_FRAME } },
194 #if CONFIG_EXT_REFS
195 { NEWMV, { LAST2_FRAME, NONE_FRAME } },
196 { NEWMV, { LAST3_FRAME, NONE_FRAME } },
197 { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
198 { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
199 #endif // CONFIG_EXT_REFS
200 { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
201 { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
202
203 { NEARMV, { LAST_FRAME, NONE_FRAME } },
204 #if CONFIG_EXT_REFS
205 { NEARMV, { LAST2_FRAME, NONE_FRAME } },
206 { NEARMV, { LAST3_FRAME, NONE_FRAME } },
207 { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
208 { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
209 #endif // CONFIG_EXT_REFS
210 { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
211 { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
212
213 { ZEROMV, { LAST_FRAME, NONE_FRAME } },
214 #if CONFIG_EXT_REFS
215 { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
216 { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
217 { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
218 { ZEROMV, { ALTREF2_FRAME, NONE_FRAME } },
219 #endif // CONFIG_EXT_REFS
220 { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
221 { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
222
223 // TODO(zoeliu): May need to reconsider the order on the modes to check
224
225 #if CONFIG_COMPOUND_SINGLEREF
226 // Single ref comp mode
227 { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } },
228 #if CONFIG_EXT_REFS
229 { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } },
230 { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } },
231 { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } },
232 #endif // CONFIG_EXT_REFS
233 { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
234 { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } },
235
236 /*
237 { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } },
238 #if CONFIG_EXT_REFS
239 { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } },
240 { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } },
241 { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
242 #endif // CONFIG_EXT_REFS
243 { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
244 { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/
245
246 { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } },
247 #if CONFIG_EXT_REFS
248 { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } },
249 { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } },
250 { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
251 #endif // CONFIG_EXT_REFS
252 { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
253 { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
254
255 { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } },
256 #if CONFIG_EXT_REFS
257 { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } },
258 { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } },
259 { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
260 #endif // CONFIG_EXT_REFS
261 { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
262 { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
263
264 { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } },
265 #if CONFIG_EXT_REFS
266 { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } },
267 { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } },
268 { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } },
269 #endif // CONFIG_EXT_REFS
270 { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
271 { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } },
272 #endif // CONFIG_COMPOUND_SINGLEREF
273
274 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
275 #if CONFIG_EXT_REFS
276 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
277 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
278 #endif // CONFIG_EXT_REFS
279 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
280 #if CONFIG_EXT_REFS
281 { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
282 { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
283 { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
284 { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
285 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
286 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
287 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
288 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
289
290 #if CONFIG_EXT_COMP_REFS
291 { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
292 { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
293 { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
294 { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
295 #endif // CONFIG_EXT_COMP_REFS
296 #endif // CONFIG_EXT_REFS
297
298 { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
299
300 { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
301 #if CONFIG_SMOOTH_HV
302 { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
303 { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
304 #endif // CONFIG_SMOOTH_HV
305
306 { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
307 { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
308 { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
309 { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
310 { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
311 { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
312 { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
313
314 #if CONFIG_EXT_REFS
315 { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
316 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
317 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
318 { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
319 { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
320 { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
321 { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
322
323 { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
324 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
325 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
326 { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
327 { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
328 { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
329 { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
330 #endif // CONFIG_EXT_REFS
331
332 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
333 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
334 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
335 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
336 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
337 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
338 { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
339
340 #if CONFIG_EXT_REFS
341 { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
342 { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
343 { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
344 { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
345 { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
346 { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
347 { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
348
349 { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
350 { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
351 { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
352 { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
353 { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
354 { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
355 { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
356
357 { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
358 { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
359 { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
360 { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
361 { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
362 { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
363 { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
364
365 { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
366 { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
367 { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
368 { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
369 { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
370 { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
371 { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
372
373 { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
374 { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
375 { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
376 { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
377 { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
378 { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
379 { ZERO_ZEROMV, { LAST_FRAME, ALTREF2_FRAME } },
380
381 { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
382 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
383 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
384 { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
385 { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
386 { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
387 { ZERO_ZEROMV, { LAST2_FRAME, ALTREF2_FRAME } },
388
389 { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
390 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
391 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
392 { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
393 { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
394 { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
395 { ZERO_ZEROMV, { LAST3_FRAME, ALTREF2_FRAME } },
396
397 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
398 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
399 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
400 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
401 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
402 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
403 { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
404
405 #if CONFIG_EXT_COMP_REFS
406 { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
407 { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
408 { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
409 { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
410 { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
411 { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
412 { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } },
413
414 { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
415 { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
416 { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
417 { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
418 { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
419 { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
420 { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } },
421
422 { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
423 { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
424 { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
425 { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
426 { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
427 { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
428 { ZERO_ZEROMV, { LAST_FRAME, GOLDEN_FRAME } },
429
430 { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
431 { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
432 { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
433 { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
434 { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
435 { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
436 { ZERO_ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } },
437 #endif // CONFIG_EXT_COMP_REFS
438 #endif // CONFIG_EXT_REFS
439
440 { H_PRED, { INTRA_FRAME, NONE_FRAME } },
441 { V_PRED, { INTRA_FRAME, NONE_FRAME } },
442 { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
443 { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
444 { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
445 { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
446 { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
447 { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
448
449 { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
450 { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
451 { NEARMV, { LAST_FRAME, INTRA_FRAME } },
452 { NEWMV, { LAST_FRAME, INTRA_FRAME } },
453
454 #if CONFIG_EXT_REFS
455 { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
456 { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
457 { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
458 { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
459
460 { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
461 { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
462 { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
463 { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
464 #endif // CONFIG_EXT_REFS
465
466 { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
467 { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
468 { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
469 { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
470
471 #if CONFIG_EXT_REFS
472 { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
473 { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
474 { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
475 { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
476
477 { ZEROMV, { ALTREF2_FRAME, INTRA_FRAME } },
478 { NEARESTMV, { ALTREF2_FRAME, INTRA_FRAME } },
479 { NEARMV, { ALTREF2_FRAME, INTRA_FRAME } },
480 { NEWMV, { ALTREF2_FRAME, INTRA_FRAME } },
481 #endif // CONFIG_EXT_REFS
482
483 { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
484 { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
485 { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
486 { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
487 };
488
489 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
490 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, TM_PRED,
491 #if CONFIG_SMOOTH_HV
492 SMOOTH_V_PRED, SMOOTH_H_PRED,
493 #endif // CONFIG_SMOOTH_HV
494 D135_PRED, D207_PRED, D153_PRED, D63_PRED, D117_PRED, D45_PRED,
495 };
496
497 #if CONFIG_CFL
498 static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
499 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED,
500 UV_V_PRED, UV_SMOOTH_PRED, UV_TM_PRED,
501 #if CONFIG_SMOOTH_HV
502 UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
503 #endif // CONFIG_SMOOTH_HV
504 UV_D135_PRED, UV_D207_PRED, UV_D153_PRED,
505 UV_D63_PRED, UV_D117_PRED, UV_D45_PRED,
506 };
507 #else
508 #define uv_rd_search_mode_order intra_rd_search_mode_order
509 #endif // CONFIG_CFL
510
write_uniform_cost(int n,int v)511 static INLINE int write_uniform_cost(int n, int v) {
512 const int l = get_unsigned_bits(n);
513 const int m = (1 << l) - n;
514 if (l == 0) return 0;
515 if (v < m)
516 return (l - 1) * av1_cost_bit(128, 0);
517 else
518 return l * av1_cost_bit(128, 0);
519 }
520
521 // constants for prune 1 and prune 2 decision boundaries
522 #define FAST_EXT_TX_CORR_MID 0.0
523 #define FAST_EXT_TX_EDST_MID 0.1
524 #define FAST_EXT_TX_CORR_MARGIN 0.5
525 #define FAST_EXT_TX_EDST_MARGIN 0.3
526
pixel_dist_visible_only(const AV1_COMP * const cpi,const MACROBLOCK * x,const uint8_t * src,const int src_stride,const uint8_t * dst,const int dst_stride,const BLOCK_SIZE tx_bsize,int txb_rows,int txb_cols,int visible_rows,int visible_cols)527 static unsigned pixel_dist_visible_only(
528 const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
529 const int src_stride, const uint8_t *dst, const int dst_stride,
530 const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
531 int visible_cols) {
532 unsigned sse;
533
534 if (txb_rows == visible_rows && txb_cols == visible_cols
535 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
536 && tx_bsize < BLOCK_SIZES
537 #endif
538 ) {
539 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
540 return sse;
541 }
542 #if CONFIG_HIGHBITDEPTH
543 const MACROBLOCKD *xd = &x->e_mbd;
544
545 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
546 uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
547 visible_cols, visible_rows);
548 return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
549 }
550 #else
551 (void)x;
552 #endif // CONFIG_HIGHBITDEPTH
553 sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
554 visible_rows);
555 return sse;
556 }
557
558 #if CONFIG_DIST_8X8
cdef_dist_8x8_16bit(uint16_t * dst,int dstride,uint16_t * src,int sstride,int coeff_shift)559 static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
560 int sstride, int coeff_shift) {
561 uint64_t svar = 0;
562 uint64_t dvar = 0;
563 uint64_t sum_s = 0;
564 uint64_t sum_d = 0;
565 uint64_t sum_s2 = 0;
566 uint64_t sum_d2 = 0;
567 uint64_t sum_sd = 0;
568 uint64_t dist = 0;
569
570 int i, j;
571 for (i = 0; i < 8; i++) {
572 for (j = 0; j < 8; j++) {
573 sum_s += src[i * sstride + j];
574 sum_d += dst[i * dstride + j];
575 sum_s2 += src[i * sstride + j] * src[i * sstride + j];
576 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
577 sum_sd += src[i * sstride + j] * dst[i * dstride + j];
578 }
579 }
580 /* Compute the variance -- the calculation cannot go negative. */
581 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
582 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
583
584 // Tuning of jm's original dering distortion metric used in CDEF tool,
585 // suggested by jm
586 const uint64_t a = 4;
587 const uint64_t b = 2;
588 const uint64_t c1 = (400 * a << 2 * coeff_shift);
589 const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
590
591 dist =
592 (uint64_t)floor(.5 +
593 (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * (svar + dvar + c1) /
594 (sqrt(svar * (double)dvar + c2)));
595
596 // Calibrate dist to have similar rate for the same QP with MSE only
597 // distortion (as in master branch)
598 dist = (uint64_t)((float)dist * 0.75);
599
600 return dist;
601 }
602
od_compute_var_4x4(uint16_t * x,int stride)603 static int od_compute_var_4x4(uint16_t *x, int stride) {
604 int sum;
605 int s2;
606 int i;
607 sum = 0;
608 s2 = 0;
609 for (i = 0; i < 4; i++) {
610 int j;
611 for (j = 0; j < 4; j++) {
612 int t;
613
614 t = x[i * stride + j];
615 sum += t;
616 s2 += t * t;
617 }
618 }
619
620 return (s2 - (sum * sum >> 4)) >> 4;
621 }
622
623 /* OD_DIST_LP_MID controls the frequency weighting filter used for computing
624 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
625 is applied both horizontally and vertically. For X=5, the filter is
626 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
627 #define OD_DIST_LP_MID (5)
628 #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
629
od_compute_dist_8x8(int use_activity_masking,uint16_t * x,uint16_t * y,od_coeff * e_lp,int stride)630 static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
631 uint16_t *y, od_coeff *e_lp, int stride) {
632 double sum;
633 int min_var;
634 double mean_var;
635 double var_stat;
636 double activity;
637 double calibration;
638 int i;
639 int j;
640 double vardist;
641
642 vardist = 0;
643
644 #if 1
645 min_var = INT_MAX;
646 mean_var = 0;
647 for (i = 0; i < 3; i++) {
648 for (j = 0; j < 3; j++) {
649 int varx;
650 int vary;
651 varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
652 vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
653 min_var = OD_MINI(min_var, varx);
654 mean_var += 1. / (1 + varx);
655 /* The cast to (double) is to avoid an overflow before the sqrt.*/
656 vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
657 }
658 }
659 /* We use a different variance statistic depending on whether activity
660 masking is used, since the harmonic mean appeared slightly worse with
661 masking off. The calibration constant just ensures that we preserve the
662 rate compared to activity=1. */
663 if (use_activity_masking) {
664 calibration = 1.95;
665 var_stat = 9. / mean_var;
666 } else {
667 calibration = 1.62;
668 var_stat = min_var;
669 }
670 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
671 activity masking constant. */
672 activity = calibration * pow(.25 + var_stat, -1. / 6);
673 #else
674 activity = 1;
675 #endif // 1
676 sum = 0;
677 for (i = 0; i < 8; i++) {
678 for (j = 0; j < 8; j++)
679 sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
680 }
681 /* Normalize the filter to unit DC response. */
682 sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
683 OD_DIST_LP_NORM);
684 return activity * activity * (sum + vardist);
685 }
686
687 // Note : Inputs x and y are in a pixel domain
od_compute_dist_common(int activity_masking,uint16_t * x,uint16_t * y,int bsize_w,int bsize_h,int qindex,od_coeff * tmp,od_coeff * e_lp)688 static double od_compute_dist_common(int activity_masking, uint16_t *x,
689 uint16_t *y, int bsize_w, int bsize_h,
690 int qindex, od_coeff *tmp,
691 od_coeff *e_lp) {
692 int i, j;
693 double sum = 0;
694 const int mid = OD_DIST_LP_MID;
695
696 for (j = 0; j < bsize_w; j++) {
697 e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
698 e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
699 2 * tmp[(bsize_h - 2) * bsize_w + j];
700 }
701 for (i = 1; i < bsize_h - 1; i++) {
702 for (j = 0; j < bsize_w; j++) {
703 e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
704 tmp[(i - 1) * bsize_w + j] +
705 tmp[(i + 1) * bsize_w + j];
706 }
707 }
708 for (i = 0; i < bsize_h; i += 8) {
709 for (j = 0; j < bsize_w; j += 8) {
710 sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
711 &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
712 bsize_w);
713 }
714 }
715 /* Scale according to linear regression against SSE, for 8x8 blocks. */
716 if (activity_masking) {
717 sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
718 (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
719 } else {
720 sum *= qindex >= 128
721 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
722 : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
723 : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
724 }
725
726 return sum;
727 }
728
od_compute_dist(uint16_t * x,uint16_t * y,int bsize_w,int bsize_h,int qindex)729 static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
730 int bsize_h, int qindex) {
731 assert(bsize_w >= 8 && bsize_h >= 8);
732 #if CONFIG_PVQ
733 int activity_masking = 1;
734 #else
735 int activity_masking = 0;
736 #endif
737 int i, j;
738 DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
739 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
740 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
741 for (i = 0; i < bsize_h; i++) {
742 for (j = 0; j < bsize_w; j++) {
743 e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
744 }
745 }
746 int mid = OD_DIST_LP_MID;
747 for (i = 0; i < bsize_h; i++) {
748 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
749 tmp[i * bsize_w + bsize_w - 1] =
750 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
751 for (j = 1; j < bsize_w - 1; j++) {
752 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
753 e[i * bsize_w + j + 1];
754 }
755 }
756 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
757 qindex, tmp, e_lp);
758 }
759
od_compute_dist_diff(uint16_t * x,int16_t * e,int bsize_w,int bsize_h,int qindex)760 static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
761 int bsize_h, int qindex) {
762 assert(bsize_w >= 8 && bsize_h >= 8);
763 #if CONFIG_PVQ
764 int activity_masking = 1;
765 #else
766 int activity_masking = 0;
767 #endif
768 DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
769 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
770 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
771 int i, j;
772 for (i = 0; i < bsize_h; i++) {
773 for (j = 0; j < bsize_w; j++) {
774 y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
775 }
776 }
777 int mid = OD_DIST_LP_MID;
778 for (i = 0; i < bsize_h; i++) {
779 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
780 tmp[i * bsize_w + bsize_w - 1] =
781 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
782 for (j = 1; j < bsize_w - 1; j++) {
783 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
784 e[i * bsize_w + j + 1];
785 }
786 }
787 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
788 qindex, tmp, e_lp);
789 }
790
av1_dist_8x8(const AV1_COMP * const cpi,const MACROBLOCK * x,const uint8_t * src,int src_stride,const uint8_t * dst,int dst_stride,const BLOCK_SIZE tx_bsize,int bsw,int bsh,int visible_w,int visible_h,int qindex)791 int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
792 const uint8_t *src, int src_stride, const uint8_t *dst,
793 int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
794 int bsh, int visible_w, int visible_h, int qindex) {
795 int64_t d = 0;
796 int i, j;
797 const MACROBLOCKD *xd = &x->e_mbd;
798
799 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
800 DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
801
802 assert(bsw >= 8);
803 assert(bsh >= 8);
804 assert((bsw & 0x07) == 0);
805 assert((bsh & 0x07) == 0);
806
807 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
808 x->tune_metric == AOM_TUNE_DAALA_DIST) {
809 #if CONFIG_HIGHBITDEPTH
810 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
811 for (j = 0; j < bsh; j++)
812 for (i = 0; i < bsw; i++)
813 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
814
815 if ((bsw == visible_w) && (bsh == visible_h)) {
816 for (j = 0; j < bsh; j++)
817 for (i = 0; i < bsw; i++)
818 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
819 } else {
820 for (j = 0; j < visible_h; j++)
821 for (i = 0; i < visible_w; i++)
822 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
823
824 if (visible_w < bsw) {
825 for (j = 0; j < bsh; j++)
826 for (i = visible_w; i < bsw; i++)
827 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
828 }
829
830 if (visible_h < bsh) {
831 for (j = visible_h; j < bsh; j++)
832 for (i = 0; i < bsw; i++)
833 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
834 }
835 }
836 } else {
837 #endif
838 for (j = 0; j < bsh; j++)
839 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
840
841 if ((bsw == visible_w) && (bsh == visible_h)) {
842 for (j = 0; j < bsh; j++)
843 for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
844 } else {
845 for (j = 0; j < visible_h; j++)
846 for (i = 0; i < visible_w; i++)
847 rec[j * bsw + i] = dst[j * dst_stride + i];
848
849 if (visible_w < bsw) {
850 for (j = 0; j < bsh; j++)
851 for (i = visible_w; i < bsw; i++)
852 rec[j * bsw + i] = src[j * src_stride + i];
853 }
854
855 if (visible_h < bsh) {
856 for (j = visible_h; j < bsh; j++)
857 for (i = 0; i < bsw; i++)
858 rec[j * bsw + i] = src[j * src_stride + i];
859 }
860 }
861 #if CONFIG_HIGHBITDEPTH
862 }
863 #endif // CONFIG_HIGHBITDEPTH
864 }
865
866 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
867 d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
868 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
869 int coeff_shift = AOMMAX(xd->bd - 8, 0);
870
871 for (i = 0; i < bsh; i += 8) {
872 for (j = 0; j < bsw; j += 8) {
873 d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
874 bsw, coeff_shift);
875 }
876 }
877 #if CONFIG_HIGHBITDEPTH
878 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
879 d = ((uint64_t)d) >> 2 * coeff_shift;
880 #endif
881 } else {
882 // Otherwise, MSE by default
883 d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
884 tx_bsize, bsh, bsw, visible_h, visible_w);
885 }
886
887 return d;
888 }
889
av1_dist_8x8_diff(const MACROBLOCK * x,const uint8_t * src,int src_stride,const int16_t * diff,int diff_stride,int bsw,int bsh,int visible_w,int visible_h,int qindex)890 static int64_t av1_dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
891 int src_stride, const int16_t *diff,
892 int diff_stride, int bsw, int bsh,
893 int visible_w, int visible_h, int qindex) {
894 int64_t d = 0;
895 int i, j;
896 const MACROBLOCKD *xd = &x->e_mbd;
897
898 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
899 DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
900
901 assert(bsw >= 8);
902 assert(bsh >= 8);
903 assert((bsw & 0x07) == 0);
904 assert((bsh & 0x07) == 0);
905
906 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
907 x->tune_metric == AOM_TUNE_DAALA_DIST) {
908 #if CONFIG_HIGHBITDEPTH
909 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
910 for (j = 0; j < bsh; j++)
911 for (i = 0; i < bsw; i++)
912 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
913 } else {
914 #endif
915 for (j = 0; j < bsh; j++)
916 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
917 #if CONFIG_HIGHBITDEPTH
918 }
919 #endif // CONFIG_HIGHBITDEPTH
920
921 if ((bsw == visible_w) && (bsh == visible_h)) {
922 for (j = 0; j < bsh; j++)
923 for (i = 0; i < bsw; i++)
924 diff16[j * bsw + i] = diff[j * diff_stride + i];
925 } else {
926 for (j = 0; j < visible_h; j++)
927 for (i = 0; i < visible_w; i++)
928 diff16[j * bsw + i] = diff[j * diff_stride + i];
929
930 if (visible_w < bsw) {
931 for (j = 0; j < bsh; j++)
932 for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
933 }
934
935 if (visible_h < bsh) {
936 for (j = visible_h; j < bsh; j++)
937 for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
938 }
939 }
940 }
941
942 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
943 d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
944 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
945 int coeff_shift = AOMMAX(xd->bd - 8, 0);
946 DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]);
947
948 for (i = 0; i < bsh; i++) {
949 for (j = 0; j < bsw; j++) {
950 dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
951 }
952 }
953
954 for (i = 0; i < bsh; i += 8) {
955 for (j = 0; j < bsw; j += 8) {
956 d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
957 bsw, coeff_shift);
958 }
959 }
960 // Don't scale 'd' for HBD since it will be done by caller side for diff
961 // input
962 } else {
963 // Otherwise, MSE by default
964 d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
965 }
966
967 return d;
968 }
969 #endif // CONFIG_DIST_8X8
970
get_energy_distribution_fine(const AV1_COMP * cpi,BLOCK_SIZE bsize,const uint8_t * src,int src_stride,const uint8_t * dst,int dst_stride,double * hordist,double * verdist)971 static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
972 const uint8_t *src, int src_stride,
973 const uint8_t *dst, int dst_stride,
974 double *hordist, double *verdist) {
975 const int bw = block_size_wide[bsize];
976 const int bh = block_size_high[bsize];
977 unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
978
979 const int f_index = bsize - BLOCK_16X16;
980 if (f_index < 0) {
981 const int w_shift = bw == 8 ? 1 : 2;
982 const int h_shift = bh == 8 ? 1 : 2;
983 #if CONFIG_HIGHBITDEPTH
984 if (cpi->common.use_highbitdepth) {
985 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
986 const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
987 for (int i = 0; i < bh; ++i)
988 for (int j = 0; j < bw; ++j) {
989 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
990 esq[index] +=
991 (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
992 (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
993 }
994 } else {
995 #endif // CONFIG_HIGHBITDEPTH
996
997 for (int i = 0; i < bh; ++i)
998 for (int j = 0; j < bw; ++j) {
999 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
1000 esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
1001 (src[j + i * src_stride] - dst[j + i * dst_stride]);
1002 }
1003 #if CONFIG_HIGHBITDEPTH
1004 }
1005 #endif // CONFIG_HIGHBITDEPTH
1006 } else {
1007 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
1008 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
1009 &esq[1]);
1010 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
1011 &esq[2]);
1012 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
1013 dst_stride, &esq[3]);
1014 src += bh / 4 * src_stride;
1015 dst += bh / 4 * dst_stride;
1016
1017 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
1018 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
1019 &esq[5]);
1020 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
1021 &esq[6]);
1022 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
1023 dst_stride, &esq[7]);
1024 src += bh / 4 * src_stride;
1025 dst += bh / 4 * dst_stride;
1026
1027 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
1028 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
1029 &esq[9]);
1030 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
1031 &esq[10]);
1032 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
1033 dst_stride, &esq[11]);
1034 src += bh / 4 * src_stride;
1035 dst += bh / 4 * dst_stride;
1036
1037 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
1038 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
1039 &esq[13]);
1040 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
1041 &esq[14]);
1042 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
1043 dst_stride, &esq[15]);
1044 }
1045
1046 double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
1047 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
1048 esq[12] + esq[13] + esq[14] + esq[15];
1049 if (total > 0) {
1050 const double e_recip = 1.0 / total;
1051 hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
1052 hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
1053 hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
1054 verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
1055 verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
1056 verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
1057 } else {
1058 hordist[0] = verdist[0] = 0.25;
1059 hordist[1] = verdist[1] = 0.25;
1060 hordist[2] = verdist[2] = 0.25;
1061 }
1062 }
1063
adst_vs_flipadst(const AV1_COMP * cpi,BLOCK_SIZE bsize,const uint8_t * src,int src_stride,const uint8_t * dst,int dst_stride)1064 static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
1065 const uint8_t *src, int src_stride,
1066 const uint8_t *dst, int dst_stride) {
1067 int prune_bitmask = 0;
1068 double svm_proj_h = 0, svm_proj_v = 0;
1069 double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
1070 get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
1071 hdist, vdist);
1072
1073 svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
1074 vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
1075 svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
1076 hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
1077 if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
1078 prune_bitmask |= 1 << FLIPADST_1D;
1079 else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
1080 prune_bitmask |= 1 << ADST_1D;
1081
1082 if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
1083 prune_bitmask |= 1 << (FLIPADST_1D + 8);
1084 else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
1085 prune_bitmask |= 1 << (ADST_1D + 8);
1086
1087 return prune_bitmask;
1088 }
1089
1090 #if CONFIG_EXT_TX
get_horver_correlation(const int16_t * diff,int stride,int w,int h,double * hcorr,double * vcorr)1091 static void get_horver_correlation(const int16_t *diff, int stride, int w,
1092 int h, double *hcorr, double *vcorr) {
1093 // Returns hor/ver correlation coefficient
1094 const int num = (h - 1) * (w - 1);
1095 double num_r;
1096 int i, j;
1097 int64_t xy_sum = 0, xz_sum = 0;
1098 int64_t x_sum = 0, y_sum = 0, z_sum = 0;
1099 int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
1100 double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
1101 *hcorr = *vcorr = 1;
1102
1103 assert(num > 0);
1104 num_r = 1.0 / num;
1105 for (i = 1; i < h; ++i) {
1106 for (j = 1; j < w; ++j) {
1107 const int16_t x = diff[i * stride + j];
1108 const int16_t y = diff[i * stride + j - 1];
1109 const int16_t z = diff[(i - 1) * stride + j];
1110 xy_sum += x * y;
1111 xz_sum += x * z;
1112 x_sum += x;
1113 y_sum += y;
1114 z_sum += z;
1115 x2_sum += x * x;
1116 y2_sum += y * y;
1117 z2_sum += z * z;
1118 }
1119 }
1120 x_var_n = x2_sum - (x_sum * x_sum) * num_r;
1121 y_var_n = y2_sum - (y_sum * y_sum) * num_r;
1122 z_var_n = z2_sum - (z_sum * z_sum) * num_r;
1123 xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
1124 xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
1125 if (x_var_n > 0 && y_var_n > 0) {
1126 *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
1127 *hcorr = *hcorr < 0 ? 0 : *hcorr;
1128 }
1129 if (x_var_n > 0 && z_var_n > 0) {
1130 *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
1131 *vcorr = *vcorr < 0 ? 0 : *vcorr;
1132 }
1133 }
1134
dct_vs_idtx(const int16_t * diff,int stride,int w,int h)1135 int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
1136 double hcorr, vcorr;
1137 int prune_bitmask = 0;
1138 get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
1139
1140 if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
1141 prune_bitmask |= 1 << IDTX_1D;
1142 else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
1143 prune_bitmask |= 1 << DCT_1D;
1144
1145 if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
1146 prune_bitmask |= 1 << (IDTX_1D + 8);
1147 else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
1148 prune_bitmask |= 1 << (DCT_1D + 8);
1149 return prune_bitmask;
1150 }
1151
1152 // Performance drop: 0.5%, Speed improvement: 24%
prune_two_for_sby(const AV1_COMP * cpi,BLOCK_SIZE bsize,MACROBLOCK * x,const MACROBLOCKD * xd,int adst_flipadst,int dct_idtx)1153 static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
1154 MACROBLOCK *x, const MACROBLOCKD *xd,
1155 int adst_flipadst, int dct_idtx) {
1156 int prune = 0;
1157
1158 if (adst_flipadst) {
1159 const struct macroblock_plane *const p = &x->plane[0];
1160 const struct macroblockd_plane *const pd = &xd->plane[0];
1161 prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
1162 pd->dst.buf, pd->dst.stride);
1163 }
1164 if (dct_idtx) {
1165 av1_subtract_plane(x, bsize, 0);
1166 const struct macroblock_plane *const p = &x->plane[0];
1167 const int bw = 4 << (b_width_log2_lookup[bsize]);
1168 const int bh = 4 << (b_height_log2_lookup[bsize]);
1169 prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
1170 }
1171
1172 return prune;
1173 }
1174 #endif // CONFIG_EXT_TX
1175
1176 // Performance drop: 0.3%, Speed improvement: 5%
prune_one_for_sby(const AV1_COMP * cpi,BLOCK_SIZE bsize,const MACROBLOCK * x,const MACROBLOCKD * xd)1177 static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
1178 const MACROBLOCK *x, const MACROBLOCKD *xd) {
1179 const struct macroblock_plane *const p = &x->plane[0];
1180 const struct macroblockd_plane *const pd = &xd->plane[0];
1181 return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
1182 pd->dst.stride);
1183 }
1184
1185 #if CONFIG_EXT_TX
1186 // 1D Transforms used in inter set, this needs to be changed if
1187 // ext_tx_used_inter is changed
1188 static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
1189 { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
1190 #if CONFIG_MRC_TX
1191 { 1, 0, 0, 1 },
1192 #endif // CONFIG_MRC_TX
1193 };
1194 #endif // CONFIG_EXT_TX
1195
prune_tx_types(const AV1_COMP * cpi,BLOCK_SIZE bsize,MACROBLOCK * x,const MACROBLOCKD * const xd,int tx_set)1196 static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
1197 const MACROBLOCKD *const xd, int tx_set) {
1198 #if CONFIG_EXT_TX
1199 const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
1200 #else
1201 const int tx_set_1D[TX_TYPES_1D] = { 0 };
1202 #endif // CONFIG_EXT_TX
1203
1204 switch (cpi->sf.tx_type_search.prune_mode) {
1205 case NO_PRUNE: return 0; break;
1206 case PRUNE_ONE:
1207 if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
1208 return 0;
1209 return prune_one_for_sby(cpi, bsize, x, xd);
1210 break;
1211 #if CONFIG_EXT_TX
1212 case PRUNE_TWO:
1213 if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
1214 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
1215 return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
1216 }
1217 if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
1218 return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
1219 return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
1220 break;
1221 #endif // CONFIG_EXT_TX
1222 }
1223 assert(0);
1224 return 0;
1225 }
1226
do_tx_type_search(TX_TYPE tx_type,int prune)1227 static int do_tx_type_search(TX_TYPE tx_type, int prune) {
1228 // TODO(sarahparker) implement for non ext tx
1229 #if CONFIG_EXT_TX
1230 return !(((prune >> vtx_tab[tx_type]) & 1) |
1231 ((prune >> (htx_tab[tx_type] + 8)) & 1));
1232 #else
1233 // temporary to avoid compiler warnings
1234 (void)vtx_tab;
1235 (void)htx_tab;
1236 (void)tx_type;
1237 (void)prune;
1238 return 1;
1239 #endif // CONFIG_EXT_TX
1240 }
1241
model_rd_from_sse(const AV1_COMP * const cpi,const MACROBLOCKD * const xd,BLOCK_SIZE bsize,int plane,int64_t sse,int * rate,int64_t * dist)1242 static void model_rd_from_sse(const AV1_COMP *const cpi,
1243 const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
1244 int plane, int64_t sse, int *rate,
1245 int64_t *dist) {
1246 const struct macroblockd_plane *const pd = &xd->plane[plane];
1247 const int dequant_shift =
1248 #if CONFIG_HIGHBITDEPTH
1249 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
1250 #endif // CONFIG_HIGHBITDEPTH
1251 3;
1252
1253 // Fast approximate the modelling function.
1254 if (cpi->sf.simple_model_rd_from_var) {
1255 const int64_t square_error = sse;
1256 int quantizer = (pd->dequant[1] >> dequant_shift);
1257
1258 if (quantizer < 120)
1259 *rate = (int)((square_error * (280 - quantizer)) >>
1260 (16 - AV1_PROB_COST_SHIFT));
1261 else
1262 *rate = 0;
1263 *dist = (square_error * quantizer) >> 8;
1264 } else {
1265 av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
1266 pd->dequant[1] >> dequant_shift, rate, dist);
1267 }
1268
1269 *dist <<= 4;
1270 }
1271
model_rd_for_sb(const AV1_COMP * const cpi,BLOCK_SIZE bsize,MACROBLOCK * x,MACROBLOCKD * xd,int plane_from,int plane_to,int * out_rate_sum,int64_t * out_dist_sum,int * skip_txfm_sb,int64_t * skip_sse_sb)1272 static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
1273 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
1274 int plane_to, int *out_rate_sum,
1275 int64_t *out_dist_sum, int *skip_txfm_sb,
1276 int64_t *skip_sse_sb) {
1277 // Note our transform coeffs are 8 times an orthogonal transform.
1278 // Hence quantizer step is also 8 times. To get effective quantizer
1279 // we need to divide by 8 before sending to modeling function.
1280 int plane;
1281 const int ref = xd->mi[0]->mbmi.ref_frame[0];
1282
1283 int64_t rate_sum = 0;
1284 int64_t dist_sum = 0;
1285 int64_t total_sse = 0;
1286
1287 x->pred_sse[ref] = 0;
1288
1289 for (plane = plane_from; plane <= plane_to; ++plane) {
1290 struct macroblock_plane *const p = &x->plane[plane];
1291 struct macroblockd_plane *const pd = &xd->plane[plane];
1292 #if CONFIG_CHROMA_SUB8X8
1293 const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
1294 #else
1295 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
1296 #endif // CONFIG_CHROMA_SUB8X8
1297
1298 unsigned int sse;
1299 int rate;
1300 int64_t dist;
1301
1302 #if CONFIG_CB4X4
1303 if (x->skip_chroma_rd && plane) continue;
1304 #endif // CONFIG_CB4X4
1305
1306 // TODO(geza): Write direct sse functions that do not compute
1307 // variance as well.
1308 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
1309 &sse);
1310
1311 if (plane == 0) x->pred_sse[ref] = sse;
1312
1313 total_sse += sse;
1314
1315 model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
1316
1317 rate_sum += rate;
1318 dist_sum += dist;
1319 }
1320
1321 *skip_txfm_sb = total_sse == 0;
1322 *skip_sse_sb = total_sse << 4;
1323 *out_rate_sum = (int)rate_sum;
1324 *out_dist_sum = dist_sum;
1325 }
1326
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)1327 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
1328 intptr_t block_size, int64_t *ssz) {
1329 int i;
1330 int64_t error = 0, sqcoeff = 0;
1331
1332 for (i = 0; i < block_size; i++) {
1333 const int diff = coeff[i] - dqcoeff[i];
1334 error += diff * diff;
1335 sqcoeff += coeff[i] * coeff[i];
1336 }
1337
1338 *ssz = sqcoeff;
1339 return error;
1340 }
1341
av1_block_error_fp_c(const int16_t * coeff,const int16_t * dqcoeff,int block_size)1342 int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
1343 int block_size) {
1344 int i;
1345 int64_t error = 0;
1346
1347 for (i = 0; i < block_size; i++) {
1348 const int diff = coeff[i] - dqcoeff[i];
1349 error += diff * diff;
1350 }
1351
1352 return error;
1353 }
1354
1355 #if CONFIG_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)1356 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
1357 const tran_low_t *dqcoeff, intptr_t block_size,
1358 int64_t *ssz, int bd) {
1359 int i;
1360 int64_t error = 0, sqcoeff = 0;
1361 int shift = 2 * (bd - 8);
1362 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
1363
1364 for (i = 0; i < block_size; i++) {
1365 const int64_t diff = coeff[i] - dqcoeff[i];
1366 error += diff * diff;
1367 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
1368 }
1369 assert(error >= 0 && sqcoeff >= 0);
1370 error = (error + rounding) >> shift;
1371 sqcoeff = (sqcoeff + rounding) >> shift;
1372
1373 *ssz = sqcoeff;
1374 return error;
1375 }
1376 #endif // CONFIG_HIGHBITDEPTH
1377
1378 #if CONFIG_PVQ
1379 // Without PVQ, av1_block_error_c() return two kind of errors,
1380 // 1) reconstruction (i.e. decoded) error and
1381 // 2) Squared sum of transformed residue (i.e. 'coeff')
1382 // However, if PVQ is enabled, coeff does not keep the transformed residue
1383 // but instead a transformed original is kept.
1384 // Hence, new parameter ref vector (i.e. transformed predicted signal)
1385 // is required to derive the residue signal,
1386 // i.e. coeff - ref = residue (all transformed).
1387
1388 #if CONFIG_HIGHBITDEPTH
av1_highbd_block_error2_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,const tran_low_t * ref,intptr_t block_size,int64_t * ssz,int bd)1389 static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
1390 const tran_low_t *dqcoeff,
1391 const tran_low_t *ref,
1392 intptr_t block_size, int64_t *ssz,
1393 int bd) {
1394 int64_t error;
1395 int64_t sqcoeff;
1396 int shift = 2 * (bd - 8);
1397 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
1398 // Use the existing sse codes for calculating distortion of decoded signal:
1399 // i.e. (orig - decoded)^2
1400 // For high bit depth, throw away ssz until a 32-bit version of
1401 // av1_block_error_fp is written.
1402 int64_t ssz_trash;
1403 error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
1404 // prediction residue^2 = (orig - ref)^2
1405 sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
1406 error = (error + rounding) >> shift;
1407 sqcoeff = (sqcoeff + rounding) >> shift;
1408 *ssz = sqcoeff;
1409 return error;
1410 }
1411 #else
1412 // TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
1413 // a separate function that does not do the extra computations for ssz.
av1_block_error2_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,const tran_low_t * ref,intptr_t block_size,int64_t * ssz)1414 static int64_t av1_block_error2_c(const tran_low_t *coeff,
1415 const tran_low_t *dqcoeff,
1416 const tran_low_t *ref, intptr_t block_size,
1417 int64_t *ssz) {
1418 int64_t error;
1419 int64_t ssz_trash;
1420 // Use the existing sse codes for calculating distortion of decoded signal:
1421 // i.e. (orig - decoded)^2
1422 error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
1423 // prediction residue^2 = (orig - ref)^2
1424 *ssz = av1_block_error(coeff, ref, block_size, &ssz_trash);
1425 return error;
1426 }
1427 #endif // CONFIG_HIGHBITDEPTH
1428 #endif // CONFIG_PVQ
1429
1430 #if !CONFIG_PVQ || CONFIG_VAR_TX
1431 #if !CONFIG_LV_MAP
cost_coeffs(const AV1_COMMON * const cm,MACROBLOCK * x,int plane,int block,TX_SIZE tx_size,const SCAN_ORDER * scan_order,const ENTROPY_CONTEXT * a,const ENTROPY_CONTEXT * l,int use_fast_coef_costing)1432 static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
1433 int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1434 const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1435 int use_fast_coef_costing) {
1436 MACROBLOCKD *const xd = &x->e_mbd;
1437 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1438 const struct macroblock_plane *p = &x->plane[plane];
1439 const struct macroblockd_plane *pd = &xd->plane[plane];
1440 const PLANE_TYPE type = pd->plane_type;
1441 const uint16_t *band_count = &band_count_table[tx_size][1];
1442 const int eob = p->eobs[block];
1443 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1444 const TX_SIZE tx_size_ctx = txsize_sqr_map[tx_size];
1445 uint8_t token_cache[MAX_TX_SQUARE];
1446 int pt = combine_entropy_contexts(*a, *l);
1447 int c, cost;
1448 const int16_t *scan = scan_order->scan;
1449 const int16_t *nb = scan_order->neighbors;
1450 const int ref = is_inter_block(mbmi);
1451 int(*head_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
1452 x->token_head_costs[tx_size_ctx][type][ref];
1453 int(*tail_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
1454 x->token_tail_costs[tx_size_ctx][type][ref];
1455 const int seg_eob = av1_get_tx_eob(&cm->seg, mbmi->segment_id, tx_size);
1456 int eob_val;
1457
1458 #if CONFIG_HIGHBITDEPTH
1459 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
1460 #else
1461 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1462 #endif // CONFIG_HIGHBITDEPTH
1463
1464 #if !CONFIG_VAR_TX && !CONFIG_SUPERTX
1465 // Check for consistency of tx_size with mode info
1466 assert(tx_size == av1_get_tx_size(plane, xd));
1467 #endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1468 (void)cm;
1469
1470 if (eob == 0) {
1471 // block zero
1472 cost = (*head_token_costs)[pt][0];
1473 } else {
1474 if (use_fast_coef_costing) {
1475 int band_left = *band_count++;
1476
1477 // dc token
1478 int v = qcoeff[0];
1479 int16_t prev_t;
1480 cost = av1_get_token_cost(v, &prev_t, cat6_bits);
1481 eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
1482 cost += av1_get_coeff_token_cost(
1483 prev_t, eob_val, 1, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
1484
1485 token_cache[0] = av1_pt_energy_class[prev_t];
1486 ++head_token_costs;
1487 ++tail_token_costs;
1488
1489 // ac tokens
1490 for (c = 1; c < eob; c++) {
1491 const int rc = scan[c];
1492 int16_t t;
1493
1494 v = qcoeff[rc];
1495 cost += av1_get_token_cost(v, &t, cat6_bits);
1496 eob_val =
1497 (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
1498 cost += av1_get_coeff_token_cost(t, eob_val, 0,
1499 (*head_token_costs)[!prev_t],
1500 (*tail_token_costs)[!prev_t]);
1501 prev_t = t;
1502 if (!--band_left) {
1503 band_left = *band_count++;
1504 ++head_token_costs;
1505 ++tail_token_costs;
1506 }
1507 }
1508 } else { // !use_fast_coef_costing
1509 int band_left = *band_count++;
1510
1511 // dc token
1512 int v = qcoeff[0];
1513 int16_t tok;
1514 cost = av1_get_token_cost(v, &tok, cat6_bits);
1515 eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
1516 cost += av1_get_coeff_token_cost(tok, eob_val, 1, (*head_token_costs)[pt],
1517 (*tail_token_costs)[pt]);
1518
1519 token_cache[0] = av1_pt_energy_class[tok];
1520 ++head_token_costs;
1521 ++tail_token_costs;
1522
1523 // ac tokens
1524 for (c = 1; c < eob; c++) {
1525 const int rc = scan[c];
1526
1527 v = qcoeff[rc];
1528 cost += av1_get_token_cost(v, &tok, cat6_bits);
1529 pt = get_coef_context(nb, token_cache, c);
1530 eob_val =
1531 (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
1532 cost += av1_get_coeff_token_cost(
1533 tok, eob_val, 0, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
1534 token_cache[rc] = av1_pt_energy_class[tok];
1535 if (!--band_left) {
1536 band_left = *band_count++;
1537 ++head_token_costs;
1538 ++tail_token_costs;
1539 }
1540 }
1541 }
1542 }
1543
1544 return cost;
1545 }
1546 #endif // !CONFIG_LV_MAP
1547
av1_cost_coeffs(const AV1_COMP * const cpi,MACROBLOCK * x,int plane,int blk_row,int blk_col,int block,TX_SIZE tx_size,const SCAN_ORDER * scan_order,const ENTROPY_CONTEXT * a,const ENTROPY_CONTEXT * l,int use_fast_coef_costing)1548 int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
1549 int blk_row, int blk_col, int block, TX_SIZE tx_size,
1550 const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
1551 const ENTROPY_CONTEXT *l, int use_fast_coef_costing) {
1552 const AV1_COMMON *const cm = &cpi->common;
1553 #if !CONFIG_LV_MAP
1554 (void)blk_row;
1555 (void)blk_col;
1556 #if CONFIG_MRC_TX
1557 const MACROBLOCKD *xd = &x->e_mbd;
1558 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1559 const TX_TYPE tx_type = av1_get_tx_type(xd->plane[plane].plane_type, xd,
1560 blk_row, blk_col, block, tx_size);
1561 const int is_inter = is_inter_block(mbmi);
1562 if (tx_type == MRC_DCT && ((is_inter && SIGNAL_MRC_MASK_INTER) ||
1563 (!is_inter && SIGNAL_MRC_MASK_INTRA))) {
1564 const int mrc_mask_cost =
1565 av1_cost_color_map(x, plane, block, mbmi->sb_type, tx_size, MRC_MAP);
1566 return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
1567 use_fast_coef_costing) +
1568 mrc_mask_cost;
1569 }
1570 #endif
1571 return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
1572 use_fast_coef_costing);
1573 #else // !CONFIG_LV_MAP
1574 (void)scan_order;
1575 (void)use_fast_coef_costing;
1576 const MACROBLOCKD *xd = &x->e_mbd;
1577 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1578 const struct macroblockd_plane *pd = &xd->plane[plane];
1579 const BLOCK_SIZE bsize = mbmi->sb_type;
1580 #if CONFIG_CHROMA_SUB8X8
1581 const BLOCK_SIZE plane_bsize =
1582 AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
1583 #elif CONFIG_CB4X4
1584 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
1585 #else // CONFIG_CB4X4
1586 const BLOCK_SIZE plane_bsize =
1587 get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
1588 #endif // CONFIG_CB4X4
1589
1590 TXB_CTX txb_ctx;
1591 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
1592 return av1_cost_coeffs_txb(cm, x, plane, blk_row, blk_col, block, tx_size,
1593 &txb_ctx);
1594 #endif // !CONFIG_LV_MAP
1595 }
1596 #endif // !CONFIG_PVQ || CONFIG_VAR_TX
1597
1598 // Get transform block visible dimensions cropped to the MI units.
get_txb_dimensions(const MACROBLOCKD * xd,int plane,BLOCK_SIZE plane_bsize,int blk_row,int blk_col,BLOCK_SIZE tx_bsize,int * width,int * height,int * visible_width,int * visible_height)1599 static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
1600 BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
1601 BLOCK_SIZE tx_bsize, int *width, int *height,
1602 int *visible_width, int *visible_height) {
1603 #if !(CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX))
1604 assert(tx_bsize <= plane_bsize);
1605 #endif
1606 int txb_height = block_size_high[tx_bsize];
1607 int txb_width = block_size_wide[tx_bsize];
1608 const int block_height = block_size_high[plane_bsize];
1609 const int block_width = block_size_wide[plane_bsize];
1610 const struct macroblockd_plane *const pd = &xd->plane[plane];
1611 // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1612 // than the MI size
1613 const int block_rows =
1614 (xd->mb_to_bottom_edge >= 0)
1615 ? block_height
1616 : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
1617 const int block_cols =
1618 (xd->mb_to_right_edge >= 0)
1619 ? block_width
1620 : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
1621 const int tx_unit_size = tx_size_wide_log2[0];
1622 if (width) *width = txb_width;
1623 if (height) *height = txb_height;
1624 *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
1625 *visible_height =
1626 clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
1627 }
1628
1629 // Compute the pixel domain distortion from src and dst on all visible 4x4s in
1630 // the
1631 // transform block.
pixel_dist(const AV1_COMP * const cpi,const MACROBLOCK * x,int plane,const uint8_t * src,const int src_stride,const uint8_t * dst,const int dst_stride,int blk_row,int blk_col,const BLOCK_SIZE plane_bsize,const BLOCK_SIZE tx_bsize)1632 static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
1633 int plane, const uint8_t *src, const int src_stride,
1634 const uint8_t *dst, const int dst_stride,
1635 int blk_row, int blk_col,
1636 const BLOCK_SIZE plane_bsize,
1637 const BLOCK_SIZE tx_bsize) {
1638 int txb_rows, txb_cols, visible_rows, visible_cols;
1639 const MACROBLOCKD *xd = &x->e_mbd;
1640
1641 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
1642 &txb_cols, &txb_rows, &visible_cols, &visible_rows);
1643 assert(visible_rows > 0);
1644 assert(visible_cols > 0);
1645
1646 #if CONFIG_DIST_8X8
1647 if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
1648 return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
1649 tx_bsize, txb_cols, txb_rows, visible_cols,
1650 visible_rows, x->qindex);
1651 #endif // CONFIG_DIST_8X8
1652
1653 unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
1654 dst_stride, tx_bsize, txb_rows,
1655 txb_cols, visible_rows, visible_cols);
1656
1657 return sse;
1658 }
1659
1660 // Compute the pixel domain distortion from diff on all visible 4x4s in the
1661 // transform block.
pixel_diff_dist(const MACROBLOCK * x,int plane,const int16_t * diff,const int diff_stride,int blk_row,int blk_col,const BLOCK_SIZE plane_bsize,const BLOCK_SIZE tx_bsize)1662 static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
1663 const int16_t *diff, const int diff_stride,
1664 int blk_row, int blk_col,
1665 const BLOCK_SIZE plane_bsize,
1666 const BLOCK_SIZE tx_bsize) {
1667 int visible_rows, visible_cols;
1668 const MACROBLOCKD *xd = &x->e_mbd;
1669 #if CONFIG_DIST_8X8
1670 int txb_height = block_size_high[tx_bsize];
1671 int txb_width = block_size_wide[tx_bsize];
1672 const int src_stride = x->plane[plane].src.stride;
1673 const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
1674 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1675 #endif
1676
1677 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
1678 NULL, &visible_cols, &visible_rows);
1679
1680 #if CONFIG_DIST_8X8
1681 if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
1682 return av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width,
1683 txb_height, visible_cols, visible_rows, x->qindex);
1684 else
1685 #endif
1686 return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols,
1687 visible_rows);
1688 }
1689
av1_count_colors(const uint8_t * src,int stride,int rows,int cols)1690 int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) {
1691 int val_count[256];
1692 memset(val_count, 0, sizeof(val_count));
1693 for (int r = 0; r < rows; ++r) {
1694 for (int c = 0; c < cols; ++c) {
1695 ++val_count[src[r * stride + c]];
1696 }
1697 }
1698 int n = 0;
1699 for (int i = 0; i < 256; ++i) {
1700 if (val_count[i]) ++n;
1701 }
1702 return n;
1703 }
1704
1705 #if CONFIG_HIGHBITDEPTH
av1_count_colors_highbd(const uint8_t * src8,int stride,int rows,int cols,int bit_depth)1706 int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
1707 int bit_depth) {
1708 assert(bit_depth <= 12);
1709 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
1710 int val_count[1 << 12];
1711 memset(val_count, 0, (1 << 12) * sizeof(val_count[0]));
1712 for (int r = 0; r < rows; ++r) {
1713 for (int c = 0; c < cols; ++c) {
1714 ++val_count[src[r * stride + c]];
1715 }
1716 }
1717 int n = 0;
1718 for (int i = 0; i < (1 << bit_depth); ++i) {
1719 if (val_count[i]) ++n;
1720 }
1721 return n;
1722 }
1723 #endif // CONFIG_HIGHBITDEPTH
1724
av1_dist_block(const AV1_COMP * cpi,MACROBLOCK * x,int plane,BLOCK_SIZE plane_bsize,int block,int blk_row,int blk_col,TX_SIZE tx_size,int64_t * out_dist,int64_t * out_sse,OUTPUT_STATUS output_status)1725 void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1726 BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
1727 TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
1728 OUTPUT_STATUS output_status) {
1729 MACROBLOCKD *const xd = &x->e_mbd;
1730 const struct macroblock_plane *const p = &x->plane[plane];
1731 #if CONFIG_DIST_8X8
1732 struct macroblockd_plane *const pd = &xd->plane[plane];
1733 #else // CONFIG_DIST_8X8
1734 const struct macroblockd_plane *const pd = &xd->plane[plane];
1735 #endif // CONFIG_DIST_8X8
1736
1737 if (cpi->sf.use_transform_domain_distortion
1738 #if CONFIG_DIST_8X8
1739 && !x->using_dist_8x8
1740 #endif
1741 ) {
1742 // Transform domain distortion computation is more efficient as it does
1743 // not involve an inverse transform, but it is less accurate.
1744 const int buffer_length = tx_size_2d[tx_size];
1745 int64_t this_sse;
1746 int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
1747 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1748 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1749 #if CONFIG_PVQ
1750 tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
1751
1752 #if CONFIG_HIGHBITDEPTH
1753 const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1754 *out_dist = av1_highbd_block_error2_c(coeff, dqcoeff, ref_coeff,
1755 buffer_length, &this_sse, bd);
1756 #else
1757 *out_dist =
1758 av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length, &this_sse);
1759 #endif // CONFIG_HIGHBITDEPTH
1760 #else // !CONFIG_PVQ
1761 #if CONFIG_HIGHBITDEPTH
1762 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1763 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
1764 &this_sse, xd->bd);
1765 else
1766 #endif
1767 *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
1768 #endif // CONFIG_PVQ
1769 *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
1770 *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
1771 } else {
1772 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
1773 #if !CONFIG_PVQ || CONFIG_DIST_8X8
1774 const int bsw = block_size_wide[tx_bsize];
1775 const int bsh = block_size_high[tx_bsize];
1776 #endif
1777 const int src_stride = x->plane[plane].src.stride;
1778 const int dst_stride = xd->plane[plane].dst.stride;
1779 // Scale the transform block index to pixel unit.
1780 const int src_idx = (blk_row * src_stride + blk_col)
1781 << tx_size_wide_log2[0];
1782 const int dst_idx = (blk_row * dst_stride + blk_col)
1783 << tx_size_wide_log2[0];
1784 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1785 const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
1786 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1787 const uint16_t eob = p->eobs[block];
1788
1789 assert(cpi != NULL);
1790 assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
1791
1792 {
1793 const int diff_stride = block_size_wide[plane_bsize];
1794 const int diff_idx = (blk_row * diff_stride + blk_col)
1795 << tx_size_wide_log2[0];
1796 const int16_t *diff = &p->src_diff[diff_idx];
1797 *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
1798 plane_bsize, tx_bsize);
1799 #if CONFIG_HIGHBITDEPTH
1800 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1801 *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
1802 #endif // CONFIG_HIGHBITDEPTH
1803 }
1804 *out_sse *= 16;
1805
1806 if (eob) {
1807 if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
1808 *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride,
1809 blk_row, blk_col, plane_bsize, tx_bsize);
1810 } else {
1811 #if CONFIG_HIGHBITDEPTH
1812 uint8_t *recon;
1813 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1814
1815 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1816 recon = CONVERT_TO_BYTEPTR(recon16);
1817 else
1818 recon = (uint8_t *)recon16;
1819 #else
1820 DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
1821 #endif // CONFIG_HIGHBITDEPTH
1822
1823 #if !CONFIG_PVQ
1824 #if CONFIG_HIGHBITDEPTH
1825 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1826 aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1827 NULL, 0, bsw, bsh, xd->bd);
1828 } else {
1829 #endif // CONFIG_HIGHBITDEPTH
1830 aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
1831 0, bsw, bsh);
1832 #if CONFIG_HIGHBITDEPTH
1833 }
1834 #endif // CONFIG_HIGHBITDEPTH
1835 #else
1836 (void)dst;
1837 #endif // !CONFIG_PVQ
1838
1839 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
1840 uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
1841 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
1842 const PLANE_TYPE plane_type = get_plane_type(plane);
1843 TX_TYPE tx_type =
1844 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
1845 av1_inverse_transform_block(xd, dqcoeff,
1846 #if CONFIG_LGT_FROM_PRED
1847 xd->mi[0]->mbmi.mode,
1848 #endif
1849 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
1850 mrc_mask,
1851 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
1852 tx_type, tx_size, recon, MAX_TX_SIZE, eob);
1853
1854 #if CONFIG_DIST_8X8
1855 if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
1856 // Save decoded pixels for inter block in pd->pred to avoid
1857 // block_8x8_rd_txfm_daala_dist() need to produce them
1858 // by calling av1_inverse_transform_block() again.
1859 const int pred_stride = block_size_wide[plane_bsize];
1860 const int pred_idx = (blk_row * pred_stride + blk_col)
1861 << tx_size_wide_log2[0];
1862 int16_t *pred = &pd->pred[pred_idx];
1863 int i, j;
1864
1865 #if CONFIG_HIGHBITDEPTH
1866 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1867 for (j = 0; j < bsh; j++)
1868 for (i = 0; i < bsw; i++)
1869 pred[j * pred_stride + i] =
1870 CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
1871 } else {
1872 #endif
1873 for (j = 0; j < bsh; j++)
1874 for (i = 0; i < bsw; i++)
1875 pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
1876 #if CONFIG_HIGHBITDEPTH
1877 }
1878 #endif // CONFIG_HIGHBITDEPTH
1879 }
1880 #endif // CONFIG_DIST_8X8
1881 *out_dist =
1882 pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
1883 blk_row, blk_col, plane_bsize, tx_bsize);
1884 }
1885 *out_dist *= 16;
1886 } else {
1887 *out_dist = *out_sse;
1888 }
1889 }
1890 }
1891
block_rd_txfm(int plane,int block,int blk_row,int blk_col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)1892 static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
1893 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
1894 struct rdcost_block_args *args = arg;
1895 MACROBLOCK *const x = args->x;
1896 MACROBLOCKD *const xd = &x->e_mbd;
1897 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1898 const AV1_COMP *cpi = args->cpi;
1899 ENTROPY_CONTEXT *a = args->t_above + blk_col;
1900 ENTROPY_CONTEXT *l = args->t_left + blk_row;
1901 const AV1_COMMON *cm = &cpi->common;
1902 int64_t rd1, rd2, rd;
1903 RD_STATS this_rd_stats;
1904
1905 #if CONFIG_DIST_8X8
1906 // If sub8x8 tx, 8x8 or larger partition, and luma channel,
1907 // dist-8x8 disables early skip, because the distortion metrics for
1908 // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition
1909 // (new distortion metric) are different.
1910 // Exception is: dist-8x8 is enabled but still MSE is used,
1911 // i.e. "--tune=" encoder option is not used.
1912 int disable_early_skip =
1913 x->using_dist_8x8 && plane == 0 && plane_bsize >= BLOCK_8X8 &&
1914 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
1915 x->tune_metric != AOM_TUNE_PSNR;
1916 #endif // CONFIG_DIST_8X8
1917
1918 #if !CONFIG_SUPERTX && !CONFIG_VAR_TX
1919 assert(tx_size == av1_get_tx_size(plane, xd));
1920 #endif // !CONFIG_SUPERTX
1921
1922 av1_init_rd_stats(&this_rd_stats);
1923
1924 if (args->exit_early) return;
1925
1926 if (!is_inter_block(mbmi)) {
1927 av1_predict_intra_block_facade(cm, xd, plane, block, blk_col, blk_row,
1928 tx_size);
1929 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
1930 }
1931
1932 #if !CONFIG_TXK_SEL
1933 // full forward transform and quantization
1934 const int coeff_ctx = combine_entropy_contexts(*a, *l);
1935 #if DISABLE_TRELLISQ_SEARCH
1936 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1937 coeff_ctx, AV1_XFORM_QUANT_B);
1938 #else
1939 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1940 coeff_ctx, AV1_XFORM_QUANT_FP);
1941
1942 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
1943 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
1944 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
1945 const int buffer_length = tx_size_2d[tx_size];
1946 int64_t tmp_dist;
1947 int64_t tmp;
1948 #if CONFIG_HIGHBITDEPTH
1949 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1950 tmp_dist =
1951 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
1952 else
1953 #endif
1954 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp);
1955 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
1956
1957 if (
1958 #if CONFIG_DIST_8X8
1959 disable_early_skip ||
1960 #endif
1961 RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
1962 av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
1963 a, l, 1);
1964 } else {
1965 args->exit_early = 1;
1966 return;
1967 }
1968 #endif // DISABLE_TRELLISQ_SEARCH
1969
1970 #if CONFIG_MRC_TX
1971 if (mbmi->tx_type == MRC_DCT && !mbmi->valid_mrc_mask) {
1972 args->exit_early = 1;
1973 return;
1974 }
1975 #endif // CONFIG_MRC_TX
1976
1977 if (!is_inter_block(mbmi)) {
1978 struct macroblock_plane *const p = &x->plane[plane];
1979 av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
1980 p->eobs[block]);
1981 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1982 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1983 OUTPUT_HAS_DECODED_PIXELS);
1984 } else {
1985 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1986 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1987 OUTPUT_HAS_PREDICTED_PIXELS);
1988 }
1989 #if CONFIG_CFL
1990 if (plane == AOM_PLANE_Y && xd->cfl->store_y) {
1991 #if CONFIG_CHROMA_SUB8X8
1992 assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
1993 #else
1994 assert(!is_inter_block(mbmi));
1995 #endif // CONFIG_CHROMA_SUB8X8
1996 cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
1997 }
1998 #endif // CONFIG_CFL
1999 rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
2000 if (args->this_rd + rd > args->best_rd) {
2001 args->exit_early = 1;
2002 return;
2003 }
2004 #if !CONFIG_PVQ
2005 const PLANE_TYPE plane_type = get_plane_type(plane);
2006 const TX_TYPE tx_type =
2007 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
2008 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
2009 this_rd_stats.rate =
2010 av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
2011 scan_order, a, l, args->use_fast_coef_costing);
2012 #else // !CONFIG_PVQ
2013 this_rd_stats.rate = x->rate;
2014 #endif // !CONFIG_PVQ
2015 #else // !CONFIG_TXK_SEL
2016 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
2017 tx_size, a, l, args->use_fast_coef_costing,
2018 &this_rd_stats);
2019 #endif // !CONFIG_TXK_SEL
2020
2021 #if !CONFIG_PVQ
2022 #if CONFIG_RD_DEBUG
2023 av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
2024 this_rd_stats.rate);
2025 #endif // CONFIG_RD_DEBUG
2026 av1_set_txb_context(x, plane, block, tx_size, a, l);
2027 #endif // !CONFIG_PVQ
2028
2029 rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
2030 rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
2031
2032 // TODO(jingning): temporarily enabled only for luma component
2033 rd = AOMMIN(rd1, rd2);
2034
2035 #if !CONFIG_PVQ
2036 this_rd_stats.skip &= !x->plane[plane].eobs[block];
2037 #else
2038 this_rd_stats.skip &= x->pvq_skip[plane];
2039 #endif // !CONFIG_PVQ
2040 av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
2041
2042 args->this_rd += rd;
2043
2044 #if CONFIG_DIST_8X8
2045 if (!disable_early_skip)
2046 #endif
2047 if (args->this_rd > args->best_rd) {
2048 args->exit_early = 1;
2049 return;
2050 }
2051 }
2052
2053 #if CONFIG_DIST_8X8
dist_8x8_sub8x8_txfm_rd(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,struct rdcost_block_args * args)2054 static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
2055 BLOCK_SIZE bsize,
2056 struct rdcost_block_args *args) {
2057 MACROBLOCKD *const xd = &x->e_mbd;
2058 const struct macroblockd_plane *const pd = &xd->plane[0];
2059 const struct macroblock_plane *const p = &x->plane[0];
2060 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2061 const int src_stride = p->src.stride;
2062 const int dst_stride = pd->dst.stride;
2063 const uint8_t *src = &p->src.buf[0];
2064 const uint8_t *dst = &pd->dst.buf[0];
2065 const int16_t *pred = &pd->pred[0];
2066 int bw = block_size_wide[bsize];
2067 int bh = block_size_high[bsize];
2068 int visible_w = bw;
2069 int visible_h = bh;
2070
2071 int i, j;
2072 int64_t rd, rd1, rd2;
2073 unsigned int tmp1, tmp2;
2074 int qindex = x->qindex;
2075
2076 assert((bw & 0x07) == 0);
2077 assert((bh & 0x07) == 0);
2078
2079 get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w,
2080 &visible_h);
2081
2082 #if CONFIG_HIGHBITDEPTH
2083 uint8_t *pred8;
2084 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]);
2085
2086 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
2087 pred8 = CONVERT_TO_BYTEPTR(pred16);
2088 else
2089 pred8 = (uint8_t *)pred16;
2090 #else
2091 DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
2092 #endif // CONFIG_HIGHBITDEPTH
2093
2094 #if CONFIG_HIGHBITDEPTH
2095 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2096 for (j = 0; j < bh; j++)
2097 for (i = 0; i < bw; i++)
2098 CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
2099 } else {
2100 #endif
2101 for (j = 0; j < bh; j++)
2102 for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i];
2103 #if CONFIG_HIGHBITDEPTH
2104 }
2105 #endif // CONFIG_HIGHBITDEPTH
2106
2107 tmp1 = (unsigned)av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw,
2108 bh, visible_w, visible_h, qindex);
2109 tmp2 = (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize,
2110 bw, bh, visible_w, visible_h, qindex);
2111
2112 if (!is_inter_block(mbmi)) {
2113 if (x->tune_metric == AOM_TUNE_PSNR) {
2114 assert(args->rd_stats.sse == tmp1 * 16);
2115 assert(args->rd_stats.dist == tmp2 * 16);
2116 }
2117 args->rd_stats.sse = (int64_t)tmp1 * 16;
2118 args->rd_stats.dist = (int64_t)tmp2 * 16;
2119 } else {
2120 // For inter mode, the decoded pixels are provided in pd->pred,
2121 // while the predicted pixels are in dst.
2122 if (x->tune_metric == AOM_TUNE_PSNR) {
2123 assert(args->rd_stats.sse == tmp2 * 16);
2124 assert(args->rd_stats.dist == tmp1 * 16);
2125 }
2126 args->rd_stats.sse = (int64_t)tmp2 * 16;
2127 args->rd_stats.dist = (int64_t)tmp1 * 16;
2128 }
2129
2130 rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
2131 rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
2132 rd = AOMMIN(rd1, rd2);
2133
2134 args->rd_stats.rdcost = rd;
2135 args->this_rd = rd;
2136
2137 if (args->this_rd > args->best_rd) args->exit_early = 1;
2138 }
2139 #endif // CONFIG_DIST_8X8
2140
txfm_rd_in_plane(MACROBLOCK * x,const AV1_COMP * cpi,RD_STATS * rd_stats,int64_t ref_best_rd,int plane,BLOCK_SIZE bsize,TX_SIZE tx_size,int use_fast_coef_casting)2141 static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
2142 RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
2143 BLOCK_SIZE bsize, TX_SIZE tx_size,
2144 int use_fast_coef_casting) {
2145 MACROBLOCKD *const xd = &x->e_mbd;
2146 const struct macroblockd_plane *const pd = &xd->plane[plane];
2147 struct rdcost_block_args args;
2148 av1_zero(args);
2149 args.x = x;
2150 args.cpi = cpi;
2151 args.best_rd = ref_best_rd;
2152 args.use_fast_coef_costing = use_fast_coef_casting;
2153 av1_init_rd_stats(&args.rd_stats);
2154
2155 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
2156
2157 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
2158
2159 av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
2160 &args);
2161 #if CONFIG_DIST_8X8
2162 if (x->using_dist_8x8 && !args.exit_early && plane == 0 &&
2163 bsize >= BLOCK_8X8 &&
2164 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
2165 dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
2166 #endif
2167
2168 if (args.exit_early) {
2169 av1_invalid_rd_stats(rd_stats);
2170 } else {
2171 *rd_stats = args.rd_stats;
2172 }
2173 }
2174
2175 #if CONFIG_SUPERTX
av1_txfm_rd_in_plane_supertx(MACROBLOCK * x,const AV1_COMP * cpi,int * rate,int64_t * distortion,int * skippable,int64_t * sse,int64_t ref_best_rd,int plane,BLOCK_SIZE bsize,TX_SIZE tx_size,int use_fast_coef_casting)2176 void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
2177 int64_t *distortion, int *skippable,
2178 int64_t *sse, int64_t ref_best_rd, int plane,
2179 BLOCK_SIZE bsize, TX_SIZE tx_size,
2180 int use_fast_coef_casting) {
2181 MACROBLOCKD *const xd = &x->e_mbd;
2182 const struct macroblockd_plane *const pd = &xd->plane[plane];
2183 struct rdcost_block_args args;
2184 av1_zero(args);
2185 args.cpi = cpi;
2186 args.x = x;
2187 args.best_rd = ref_best_rd;
2188 args.use_fast_coef_costing = use_fast_coef_casting;
2189
2190 #if CONFIG_EXT_TX
2191 assert(tx_size < TX_SIZES);
2192 #endif // CONFIG_EXT_TX
2193
2194 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
2195
2196 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
2197
2198 block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
2199 &args);
2200
2201 if (args.exit_early) {
2202 *rate = INT_MAX;
2203 *distortion = INT64_MAX;
2204 *sse = INT64_MAX;
2205 *skippable = 0;
2206 } else {
2207 *distortion = args.rd_stats.dist;
2208 *rate = args.rd_stats.rate;
2209 *sse = args.rd_stats.sse;
2210 *skippable = !x->plane[plane].eobs[0];
2211 }
2212 }
2213 #endif // CONFIG_SUPERTX
2214
tx_size_cost(const AV1_COMP * const cpi,const MACROBLOCK * const x,BLOCK_SIZE bsize,TX_SIZE tx_size)2215 static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
2216 BLOCK_SIZE bsize, TX_SIZE tx_size) {
2217 const AV1_COMMON *const cm = &cpi->common;
2218 const MACROBLOCKD *const xd = &x->e_mbd;
2219 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2220
2221 if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
2222 const int is_inter = is_inter_block(mbmi);
2223 const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
2224 : intra_tx_size_cat_lookup[bsize];
2225 const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
2226 const int depth = tx_size_to_depth(coded_tx_size);
2227 const int tx_size_ctx = get_tx_size_context(xd);
2228 int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
2229 #if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
2230 if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
2231 r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
2232 tx_size == quarter_txsize_lookup[bsize]);
2233 #endif
2234 return r_tx_size;
2235 } else {
2236 return 0;
2237 }
2238 }
2239
2240 #if CONFIG_LGT_FROM_PRED
av1_lgt_cost(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,TX_SIZE tx_size,int use_lgt)2241 int av1_lgt_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
2242 const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
2243 TX_SIZE tx_size, int use_lgt) {
2244 if (plane > 0) return 0;
2245 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2246 const int is_inter = is_inter_block(mbmi);
2247
2248 assert(is_lgt_allowed(mbmi->mode, tx_size));
2249 if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
2250 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2251 const int ext_tx_set =
2252 get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
2253 if (LGT_FROM_PRED_INTRA && !is_inter && ext_tx_set > 0 &&
2254 ALLOW_INTRA_EXT_TX)
2255 return x->intra_lgt_cost[txsize_sqr_map[tx_size]][mbmi->mode][use_lgt];
2256 if (LGT_FROM_PRED_INTRA && is_inter && ext_tx_set > 0)
2257 return x->inter_lgt_cost[txsize_sqr_map[tx_size]][use_lgt];
2258 }
2259 return 0;
2260 }
2261 #endif // CONFIG_LGT_FROM_PRED
2262
2263 // TODO(angiebird): use this function whenever it's possible
av1_tx_type_cost(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,TX_SIZE tx_size,TX_TYPE tx_type)2264 int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
2265 const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
2266 TX_SIZE tx_size, TX_TYPE tx_type) {
2267 if (plane > 0) return 0;
2268
2269 #if CONFIG_LGT_FROM_PRED
2270 assert(!xd->mi[0]->mbmi.use_lgt);
2271 #endif
2272 #if CONFIG_VAR_TX
2273 tx_size = get_min_tx_size(tx_size);
2274 #endif
2275
2276 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2277 const int is_inter = is_inter_block(mbmi);
2278 #if CONFIG_EXT_TX
2279 if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
2280 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2281 const int ext_tx_set =
2282 get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
2283 if (is_inter) {
2284 if (ext_tx_set > 0)
2285 return x
2286 ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
2287 } else {
2288 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
2289 return x->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
2290 [mbmi->mode][tx_type];
2291 }
2292 }
2293 #else
2294 (void)bsize;
2295 (void)cm;
2296 if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2297 !FIXED_TX_TYPE) {
2298 if (is_inter) {
2299 return x->inter_tx_type_costs[tx_size][tx_type];
2300 } else {
2301 return x->intra_tx_type_costs[tx_size]
2302 [intra_mode_to_tx_type_context[mbmi->mode]]
2303 [tx_type];
2304 }
2305 }
2306 #endif // CONFIG_EXT_TX
2307 return 0;
2308 }
txfm_yrd(const AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,int64_t ref_best_rd,BLOCK_SIZE bs,TX_TYPE tx_type,TX_SIZE tx_size)2309 static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2310 RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
2311 TX_TYPE tx_type, TX_SIZE tx_size) {
2312 const AV1_COMMON *const cm = &cpi->common;
2313 MACROBLOCKD *const xd = &x->e_mbd;
2314 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2315 int64_t rd = INT64_MAX;
2316 aom_prob skip_prob = av1_get_skip_prob(cm, xd);
2317 int s0, s1;
2318 const int is_inter = is_inter_block(mbmi);
2319 const int tx_select =
2320 cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
2321
2322 const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
2323
2324 #if CONFIG_PVQ
2325 assert(tx_size >= TX_4X4);
2326 #endif // CONFIG_PVQ
2327 assert(skip_prob > 0);
2328 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2329 assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
2330 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2331
2332 s0 = av1_cost_bit(skip_prob, 0);
2333 s1 = av1_cost_bit(skip_prob, 1);
2334
2335 mbmi->tx_type = tx_type;
2336 mbmi->tx_size = tx_size;
2337 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
2338 cpi->sf.use_fast_coef_costing);
2339 if (rd_stats->rate == INT_MAX) return INT64_MAX;
2340 #if !CONFIG_TXK_SEL
2341 int plane = 0;
2342 #if CONFIG_LGT_FROM_PRED
2343 if (is_lgt_allowed(mbmi->mode, tx_size))
2344 rd_stats->rate +=
2345 av1_lgt_cost(cm, x, xd, bs, plane, tx_size, mbmi->use_lgt);
2346 if (!mbmi->use_lgt)
2347 rd_stats->rate += av1_tx_type_cost(cm, x, xd, bs, plane, tx_size, tx_type);
2348 #else
2349 rd_stats->rate += av1_tx_type_cost(cm, x, xd, bs, plane, tx_size, tx_type);
2350 #endif // CONFIG_LGT_FROM_PRED
2351 #endif
2352
2353 if (rd_stats->skip) {
2354 if (is_inter) {
2355 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
2356 } else {
2357 rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
2358 }
2359 } else {
2360 rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
2361 rd_stats->dist);
2362 }
2363
2364 if (tx_select) rd_stats->rate += r_tx_size;
2365
2366 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2367 !(rd_stats->skip))
2368 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
2369
2370 return rd;
2371 }
2372
skip_txfm_search(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bs,TX_TYPE tx_type,TX_SIZE tx_size)2373 static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
2374 TX_TYPE tx_type, TX_SIZE tx_size) {
2375 const MACROBLOCKD *const xd = &x->e_mbd;
2376 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2377 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
2378 const int is_inter = is_inter_block(mbmi);
2379 int prune = 0;
2380 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
2381 // passing -1 in for tx_type indicates that all 1D
2382 // transforms should be considered for pruning
2383 prune = prune_tx_types(cpi, bs, x, xd, -1);
2384
2385 #if CONFIG_MRC_TX
2386 // MRC_DCT only implemented for TX_32X32 so only include this tx in
2387 // the search for TX_32X32
2388 if (tx_type == MRC_DCT &&
2389 ((is_inter && !USE_MRC_INTER) || (!is_inter && !USE_MRC_INTRA) ||
2390 tx_size != TX_32X32))
2391 return 1;
2392 #endif // CONFIG_MRC_TX
2393 #if CONFIG_LGT_FROM_PRED
2394 if (mbmi->use_lgt && mbmi->ref_mv_idx > 0) return 1;
2395 #endif // CONFIG_LGT_FROM_PRED
2396 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
2397 if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
2398 return 1;
2399 if (!is_inter && x->use_default_intra_tx_type &&
2400 tx_type != get_default_tx_type(0, xd, 0, tx_size))
2401 return 1;
2402 if (is_inter && x->use_default_inter_tx_type &&
2403 tx_type != get_default_tx_type(0, xd, 0, tx_size))
2404 return 1;
2405 if (max_tx_size >= TX_32X32 && tx_size == TX_4X4) return 1;
2406 #if CONFIG_EXT_TX
2407 const AV1_COMMON *const cm = &cpi->common;
2408 const TxSetType tx_set_type =
2409 get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2410 if (!av1_ext_tx_used[tx_set_type][tx_type]) return 1;
2411 if (is_inter) {
2412 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
2413 if (!do_tx_type_search(tx_type, prune)) return 1;
2414 }
2415 } else {
2416 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2417 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
2418 }
2419 }
2420 #else // CONFIG_EXT_TX
2421 if (tx_size >= TX_32X32 && tx_type != DCT_DCT) return 1;
2422 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2423 !do_tx_type_search(tx_type, prune))
2424 return 1;
2425 #endif // CONFIG_EXT_TX
2426 return 0;
2427 }
2428
2429 #if (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA)
estimate_yrd_for_sb(const AV1_COMP * const cpi,BLOCK_SIZE bs,MACROBLOCK * x,int * r,int64_t * d,int * s,int64_t * sse,int64_t ref_best_rd)2430 static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
2431 MACROBLOCK *x, int *r, int64_t *d, int *s,
2432 int64_t *sse, int64_t ref_best_rd) {
2433 RD_STATS rd_stats;
2434 int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
2435 max_txsize_lookup[bs]);
2436 *r = rd_stats.rate;
2437 *d = rd_stats.dist;
2438 *s = rd_stats.skip;
2439 *sse = rd_stats.sse;
2440 return rd;
2441 }
2442 #endif // (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
2443
choose_largest_tx_size(const AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,int64_t ref_best_rd,BLOCK_SIZE bs)2444 static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
2445 RD_STATS *rd_stats, int64_t ref_best_rd,
2446 BLOCK_SIZE bs) {
2447 const AV1_COMMON *const cm = &cpi->common;
2448 MACROBLOCKD *const xd = &x->e_mbd;
2449 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2450 TX_TYPE tx_type, best_tx_type = DCT_DCT;
2451 int64_t this_rd, best_rd = INT64_MAX;
2452 aom_prob skip_prob = av1_get_skip_prob(cm, xd);
2453 int s0 = av1_cost_bit(skip_prob, 0);
2454 int s1 = av1_cost_bit(skip_prob, 1);
2455 const int is_inter = is_inter_block(mbmi);
2456 int prune = 0;
2457 const int plane = 0;
2458 #if CONFIG_LGT_FROM_PRED
2459 int is_lgt_best = 0;
2460 int search_lgt = is_inter
2461 ? LGT_FROM_PRED_INTER && !x->use_default_inter_tx_type &&
2462 !cpi->sf.tx_type_search.prune_mode > NO_PRUNE
2463 : LGT_FROM_PRED_INTRA && !x->use_default_intra_tx_type &&
2464 ALLOW_INTRA_EXT_TX;
2465 #endif // CONFIG_LGT_FROM_PRED
2466 av1_invalid_rd_stats(rd_stats);
2467
2468 mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2469 #if CONFIG_VAR_TX
2470 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
2471 #endif // CONFIG_VAR_TX
2472 #if CONFIG_EXT_TX
2473 int ext_tx_set =
2474 get_ext_tx_set(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
2475 const TxSetType tx_set_type =
2476 get_ext_tx_set_type(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
2477 #endif // CONFIG_EXT_TX
2478
2479 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
2480 #if CONFIG_EXT_TX
2481 prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
2482 #else
2483 prune = prune_tx_types(cpi, bs, x, xd, 0);
2484 #endif // CONFIG_EXT_TX
2485 #if CONFIG_EXT_TX
2486 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
2487 1 &&
2488 !xd->lossless[mbmi->segment_id]) {
2489 #if CONFIG_PVQ
2490 od_rollback_buffer pre_buf, post_buf;
2491
2492 od_encode_checkpoint(&x->daala_enc, &pre_buf);
2493 od_encode_checkpoint(&x->daala_enc, &post_buf);
2494 #endif // CONFIG_PVQ
2495
2496 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
2497 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
2498 RD_STATS this_rd_stats;
2499 if (is_inter) {
2500 if (x->use_default_inter_tx_type &&
2501 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2502 continue;
2503 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
2504 if (!do_tx_type_search(tx_type, prune)) continue;
2505 }
2506 } else {
2507 if (x->use_default_intra_tx_type &&
2508 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2509 continue;
2510 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2511 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
2512 }
2513 }
2514
2515 mbmi->tx_type = tx_type;
2516
2517 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2518 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2519 #if CONFIG_PVQ
2520 od_encode_rollback(&x->daala_enc, &pre_buf);
2521 #endif // CONFIG_PVQ
2522 if (this_rd_stats.rate == INT_MAX) continue;
2523 av1_tx_type_cost(cm, x, xd, bs, plane, mbmi->tx_size, tx_type);
2524
2525 if (this_rd_stats.skip)
2526 this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
2527 else
2528 this_rd =
2529 RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
2530 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2531 !this_rd_stats.skip)
2532 this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
2533
2534 if (this_rd < best_rd) {
2535 best_rd = this_rd;
2536 best_tx_type = mbmi->tx_type;
2537 *rd_stats = this_rd_stats;
2538 #if CONFIG_PVQ
2539 od_encode_checkpoint(&x->daala_enc, &post_buf);
2540 #endif // CONFIG_PVQ
2541 }
2542 }
2543 #if CONFIG_PVQ
2544 od_encode_rollback(&x->daala_enc, &post_buf);
2545 #endif // CONFIG_PVQ
2546 #if CONFIG_LGT_FROM_PRED
2547 // search LGT
2548 if (search_lgt && is_lgt_allowed(mbmi->mode, mbmi->tx_size) &&
2549 !cm->reduced_tx_set_used) {
2550 RD_STATS this_rd_stats;
2551 mbmi->use_lgt = 1;
2552 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2553 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2554 if (this_rd_stats.rate != INT_MAX) {
2555 av1_lgt_cost(cm, x, xd, bs, plane, mbmi->tx_size, 1);
2556 if (this_rd_stats.skip)
2557 this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
2558 else
2559 this_rd =
2560 RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
2561 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2562 !this_rd_stats.skip)
2563 this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
2564 if (this_rd < best_rd) {
2565 best_rd = this_rd;
2566 is_lgt_best = 1;
2567 *rd_stats = this_rd_stats;
2568 }
2569 }
2570 mbmi->use_lgt = 0;
2571 }
2572 #endif // CONFIG_LGT_FROM_PRED
2573 } else {
2574 mbmi->tx_type = DCT_DCT;
2575 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2576 cpi->sf.use_fast_coef_costing);
2577 }
2578 #else // CONFIG_EXT_TX
2579 if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
2580 for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
2581 RD_STATS this_rd_stats;
2582 if (!is_inter && x->use_default_intra_tx_type &&
2583 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2584 continue;
2585 if (is_inter && x->use_default_inter_tx_type &&
2586 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2587 continue;
2588 mbmi->tx_type = tx_type;
2589 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2590 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2591 if (this_rd_stats.rate == INT_MAX) continue;
2592
2593 av1_tx_type_cost(cm, x, xd, bs, plane, mbmi->tx_size, tx_type);
2594 if (is_inter) {
2595 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2596 !do_tx_type_search(tx_type, prune))
2597 continue;
2598 }
2599 if (this_rd_stats.skip)
2600 this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
2601 else
2602 this_rd =
2603 RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
2604 if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
2605 this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
2606
2607 if (this_rd < best_rd) {
2608 best_rd = this_rd;
2609 best_tx_type = mbmi->tx_type;
2610 *rd_stats = this_rd_stats;
2611 }
2612 }
2613 } else {
2614 mbmi->tx_type = DCT_DCT;
2615 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2616 cpi->sf.use_fast_coef_costing);
2617 }
2618 #endif // CONFIG_EXT_TX
2619 mbmi->tx_type = best_tx_type;
2620 #if CONFIG_LGT_FROM_PRED
2621 mbmi->use_lgt = is_lgt_best;
2622 #endif // CONFIG_LGT_FROM_PRED
2623 }
2624
choose_smallest_tx_size(const AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,int64_t ref_best_rd,BLOCK_SIZE bs)2625 static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
2626 RD_STATS *rd_stats, int64_t ref_best_rd,
2627 BLOCK_SIZE bs) {
2628 MACROBLOCKD *const xd = &x->e_mbd;
2629 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2630
2631 mbmi->tx_size = TX_4X4;
2632 mbmi->tx_type = DCT_DCT;
2633 #if CONFIG_VAR_TX
2634 mbmi->min_tx_size = get_min_tx_size(TX_4X4);
2635 #endif // CONFIG_VAR_TX
2636
2637 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2638 cpi->sf.use_fast_coef_costing);
2639 }
2640
2641 #if CONFIG_TXK_SEL || CONFIG_VAR_TX
bsize_to_num_blk(BLOCK_SIZE bsize)2642 static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
2643 int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
2644 return num_blk;
2645 }
2646 #endif // CONFIG_TXK_SEL || CONFIG_VAR_TX
2647
choose_tx_size_type_from_rd(const AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,int64_t ref_best_rd,BLOCK_SIZE bs)2648 static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
2649 MACROBLOCK *x, RD_STATS *rd_stats,
2650 int64_t ref_best_rd, BLOCK_SIZE bs) {
2651 const AV1_COMMON *const cm = &cpi->common;
2652 MACROBLOCKD *const xd = &x->e_mbd;
2653 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2654 int64_t rd = INT64_MAX;
2655 int n;
2656 int start_tx, end_tx;
2657 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
2658 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
2659 TX_SIZE best_tx_size = max_tx_size;
2660 TX_TYPE best_tx_type = DCT_DCT;
2661 #if CONFIG_LGT_FROM_PRED
2662 int breakout = 0;
2663 int is_lgt_best = 0;
2664 mbmi->use_lgt = 0;
2665 #endif // CONFIG_LGT_FROM_PRED
2666 #if CONFIG_TXK_SEL
2667 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
2668 #endif // CONFIG_TXK_SEL
2669 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
2670 const int is_inter = is_inter_block(mbmi);
2671 #if CONFIG_PVQ
2672 od_rollback_buffer buf;
2673 od_encode_checkpoint(&x->daala_enc, &buf);
2674 #endif // CONFIG_PVQ
2675
2676 av1_invalid_rd_stats(rd_stats);
2677
2678 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2679 int evaluate_rect_tx = 0;
2680 if (tx_select) {
2681 evaluate_rect_tx = is_rect_tx_allowed(xd, mbmi);
2682 } else {
2683 const TX_SIZE chosen_tx_size =
2684 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2685 evaluate_rect_tx = is_rect_tx(chosen_tx_size);
2686 assert(IMPLIES(evaluate_rect_tx, is_rect_tx_allowed(xd, mbmi)));
2687 }
2688 if (evaluate_rect_tx) {
2689 TX_TYPE tx_start = DCT_DCT;
2690 TX_TYPE tx_end = TX_TYPES;
2691 #if CONFIG_TXK_SEL
2692 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2693 // performed in av1_search_txk_type()
2694 tx_end = DCT_DCT + 1;
2695 #endif
2696 TX_TYPE tx_type;
2697 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2698 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
2699 const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
2700 RD_STATS this_rd_stats;
2701 const TxSetType tx_set_type = get_ext_tx_set_type(
2702 rect_tx_size, bs, is_inter, cm->reduced_tx_set_used);
2703 if (av1_ext_tx_used[tx_set_type][tx_type]) {
2704 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
2705 rect_tx_size);
2706 ref_best_rd = AOMMIN(rd, ref_best_rd);
2707 if (rd < best_rd) {
2708 #if CONFIG_TXK_SEL
2709 memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
2710 #endif
2711 best_tx_type = tx_type;
2712 best_tx_size = rect_tx_size;
2713 best_rd = rd;
2714 *rd_stats = this_rd_stats;
2715 }
2716 }
2717 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2718 const int is_inter = is_inter_block(mbmi);
2719 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2720 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2721 }
2722 #if CONFIG_LGT_FROM_PRED
2723 const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
2724 if (is_lgt_allowed(mbmi->mode, rect_tx_size) && !cm->reduced_tx_set_used) {
2725 RD_STATS this_rd_stats;
2726 mbmi->use_lgt = 1;
2727 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, 0, rect_tx_size);
2728 if (rd < best_rd) {
2729 is_lgt_best = 1;
2730 best_tx_size = rect_tx_size;
2731 best_rd = rd;
2732 *rd_stats = this_rd_stats;
2733 }
2734 mbmi->use_lgt = 0;
2735 }
2736 #endif // CONFIG_LGT_FROM_PRED
2737 }
2738
2739 #if CONFIG_RECT_TX_EXT
2740 // test 1:4/4:1 tx
2741 int evaluate_quarter_tx = 0;
2742 if (is_quarter_tx_allowed(xd, mbmi, is_inter)) {
2743 if (tx_select) {
2744 evaluate_quarter_tx = 1;
2745 } else {
2746 const TX_SIZE chosen_tx_size =
2747 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2748 evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs];
2749 }
2750 }
2751 if (evaluate_quarter_tx) {
2752 TX_TYPE tx_start = DCT_DCT;
2753 TX_TYPE tx_end = TX_TYPES;
2754 #if CONFIG_TXK_SEL
2755 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2756 // performed in av1_search_txk_type()
2757 tx_end = DCT_DCT + 1;
2758 #endif
2759 TX_TYPE tx_type;
2760 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2761 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
2762 const TX_SIZE tx_size = quarter_txsize_lookup[bs];
2763 RD_STATS this_rd_stats;
2764 const TxSetType tx_set_type =
2765 get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2766 if (av1_ext_tx_used[tx_set_type][tx_type]) {
2767 rd =
2768 txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
2769 if (rd < best_rd) {
2770 #if CONFIG_TXK_SEL
2771 memcpy(best_txk_type, mbmi->txk_type,
2772 sizeof(best_txk_type[0]) * num_blk);
2773 #endif
2774 best_tx_type = tx_type;
2775 #if CONFIG_LGT_FROM_PRED
2776 is_lgt_best = 0;
2777 #endif
2778 best_tx_size = tx_size;
2779 best_rd = rd;
2780 *rd_stats = this_rd_stats;
2781 }
2782 }
2783 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2784 const int is_inter = is_inter_block(mbmi);
2785 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2786 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2787 }
2788 #if CONFIG_LGT_FROM_PRED
2789 if (is_lgt_allowed(mbmi->mode, tx_size) && !cm->reduced_tx_set_used) {
2790 const TX_SIZE tx_size = quarter_txsize_lookup[bs];
2791 RD_STATS this_rd_stats;
2792 mbmi->use_lgt = 1;
2793 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, 0, tx_size);
2794 if (rd < best_rd) {
2795 is_lgt_best = 1;
2796 best_tx_size = tx_size;
2797 best_rd = rd;
2798 *rd_stats = this_rd_stats;
2799 }
2800 mbmi->use_lgt = 0;
2801 }
2802 #endif // CONFIG_LGT_FROM_PRED
2803 }
2804 #endif // CONFIG_RECT_TX_EXT
2805 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2806
2807 if (tx_select) {
2808 start_tx = max_tx_size;
2809 end_tx = (max_tx_size >= TX_32X32) ? TX_8X8 : TX_4X4;
2810 } else {
2811 const TX_SIZE chosen_tx_size =
2812 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2813 start_tx = chosen_tx_size;
2814 end_tx = chosen_tx_size;
2815 }
2816
2817 last_rd = INT64_MAX;
2818 for (n = start_tx; n >= end_tx; --n) {
2819 #if CONFIG_EXT_TX && CONFIG_RECT_TX
2820 if (is_rect_tx(n)) break;
2821 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2822 TX_TYPE tx_start = DCT_DCT;
2823 TX_TYPE tx_end = TX_TYPES;
2824 #if CONFIG_TXK_SEL
2825 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2826 // performed in av1_search_txk_type()
2827 tx_end = DCT_DCT + 1;
2828 #endif
2829 TX_TYPE tx_type;
2830 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2831 RD_STATS this_rd_stats;
2832 if (skip_txfm_search(cpi, x, bs, tx_type, n)) continue;
2833 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
2834 #if CONFIG_PVQ
2835 od_encode_rollback(&x->daala_enc, &buf);
2836 #endif // CONFIG_PVQ
2837 // Early termination in transform size search.
2838 if (cpi->sf.tx_size_search_breakout &&
2839 (rd == INT64_MAX ||
2840 (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n < start_tx) ||
2841 (n < (int)max_tx_size && rd > last_rd))) {
2842 #if CONFIG_LGT_FROM_PRED
2843 breakout = 1;
2844 #endif
2845 break;
2846 }
2847
2848 last_rd = rd;
2849 ref_best_rd = AOMMIN(rd, ref_best_rd);
2850 if (rd < best_rd) {
2851 #if CONFIG_TXK_SEL
2852 memcpy(best_txk_type, mbmi->txk_type, sizeof(best_txk_type[0]) * 256);
2853 #endif
2854 best_tx_type = tx_type;
2855 #if CONFIG_LGT_FROM_PRED
2856 is_lgt_best = 0;
2857 #endif
2858 best_tx_size = n;
2859 best_rd = rd;
2860 *rd_stats = this_rd_stats;
2861 }
2862 #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2863 const int is_inter = is_inter_block(mbmi);
2864 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2865 #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2866 }
2867 #if CONFIG_LGT_FROM_PRED
2868 mbmi->use_lgt = 1;
2869 if (is_lgt_allowed(mbmi->mode, n) && !skip_txfm_search(cpi, x, bs, 0, n) &&
2870 !breakout) {
2871 RD_STATS this_rd_stats;
2872 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, 0, n);
2873 if (rd < best_rd) {
2874 is_lgt_best = 1;
2875 best_tx_size = n;
2876 best_rd = rd;
2877 *rd_stats = this_rd_stats;
2878 }
2879 }
2880 mbmi->use_lgt = 0;
2881 #endif // CONFIG_LGT_FROM_PRED
2882 }
2883 mbmi->tx_size = best_tx_size;
2884 mbmi->tx_type = best_tx_type;
2885 #if CONFIG_LGT_FROM_PRED
2886 mbmi->use_lgt = is_lgt_best;
2887 assert(!is_lgt_best || is_lgt_allowed(mbmi->mode, mbmi->tx_size));
2888 #endif // CONFIG_LGT_FROM_PRED
2889 #if CONFIG_TXK_SEL
2890 memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * 256);
2891 #endif
2892
2893 #if CONFIG_VAR_TX
2894 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
2895 #endif // CONFIG_VAR_TX
2896
2897 #if !CONFIG_EXT_TX
2898 if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
2899 #endif // !CONFIG_EXT_TX
2900 #if CONFIG_PVQ
2901 if (best_rd != INT64_MAX) {
2902 txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, best_tx_type, best_tx_size);
2903 }
2904 #endif // CONFIG_PVQ
2905 }
2906
super_block_yrd(const AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,BLOCK_SIZE bs,int64_t ref_best_rd)2907 static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2908 RD_STATS *rd_stats, BLOCK_SIZE bs,
2909 int64_t ref_best_rd) {
2910 MACROBLOCKD *xd = &x->e_mbd;
2911 av1_init_rd_stats(rd_stats);
2912
2913 assert(bs == xd->mi[0]->mbmi.sb_type);
2914
2915 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2916 choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
2917 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
2918 choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
2919 } else {
2920 choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
2921 }
2922 }
2923
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)2924 static int conditional_skipintra(PREDICTION_MODE mode,
2925 PREDICTION_MODE best_intra_mode) {
2926 if (mode == D117_PRED && best_intra_mode != V_PRED &&
2927 best_intra_mode != D135_PRED)
2928 return 1;
2929 if (mode == D63_PRED && best_intra_mode != V_PRED &&
2930 best_intra_mode != D45_PRED)
2931 return 1;
2932 if (mode == D207_PRED && best_intra_mode != H_PRED &&
2933 best_intra_mode != D45_PRED)
2934 return 1;
2935 if (mode == D153_PRED && best_intra_mode != H_PRED &&
2936 best_intra_mode != D135_PRED)
2937 return 1;
2938 return 0;
2939 }
2940
2941 // Model based RD estimation for luma intra blocks.
intra_model_yrd(const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,int mode_cost)2942 static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
2943 BLOCK_SIZE bsize, int mode_cost) {
2944 const AV1_COMMON *cm = &cpi->common;
2945 MACROBLOCKD *const xd = &x->e_mbd;
2946 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2947 assert(!is_inter_block(mbmi));
2948 RD_STATS this_rd_stats;
2949 int row, col;
2950 int64_t temp_sse, this_rd;
2951 const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
2952 const int stepr = tx_size_high_unit[tx_size];
2953 const int stepc = tx_size_wide_unit[tx_size];
2954 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
2955 const int max_blocks_high = max_block_high(xd, bsize, 0);
2956 mbmi->tx_size = tx_size;
2957 // Prediction.
2958 const int step = stepr * stepc;
2959 int block = 0;
2960 for (row = 0; row < max_blocks_high; row += stepr) {
2961 for (col = 0; col < max_blocks_wide; col += stepc) {
2962 av1_predict_intra_block_facade(cm, xd, 0, block, col, row, tx_size);
2963 block += step;
2964 }
2965 }
2966 // RD estimation.
2967 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
2968 &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
2969 #if CONFIG_EXT_INTRA
2970 if (av1_is_directional_mode(mbmi->mode, bsize) &&
2971 av1_use_angle_delta(bsize)) {
2972 mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2973 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
2974 }
2975 #endif // CONFIG_EXT_INTRA
2976 #if CONFIG_FILTER_INTRA
2977 if (mbmi->mode == DC_PRED) {
2978 const aom_prob prob = cpi->common.fc->filter_intra_probs[0];
2979 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
2980 const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
2981 mode_cost += (av1_cost_bit(prob, 1) +
2982 write_uniform_cost(FILTER_INTRA_MODES, mode));
2983 } else {
2984 mode_cost += av1_cost_bit(prob, 0);
2985 }
2986 }
2987 #endif // CONFIG_FILTER_INTRA
2988 this_rd =
2989 RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
2990 return this_rd;
2991 }
2992
2993 // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2994 // new_height'. Extra rows and columns are filled in by copying last valid
2995 // row/column.
extend_palette_color_map(uint8_t * const color_map,int orig_width,int orig_height,int new_width,int new_height)2996 static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
2997 int orig_height, int new_width,
2998 int new_height) {
2999 int j;
3000 assert(new_width >= orig_width);
3001 assert(new_height >= orig_height);
3002 if (new_width == orig_width && new_height == orig_height) return;
3003
3004 for (j = orig_height - 1; j >= 0; --j) {
3005 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
3006 // Copy last column to extra columns.
3007 memset(color_map + j * new_width + orig_width,
3008 color_map[j * new_width + orig_width - 1], new_width - orig_width);
3009 }
3010 // Copy last row to extra rows.
3011 for (j = orig_height; j < new_height; ++j) {
3012 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
3013 new_width);
3014 }
3015 }
3016
3017 #if CONFIG_PALETTE_DELTA_ENCODING
3018 // Bias toward using colors in the cache.
3019 // TODO(huisu): Try other schemes to improve compression.
optimize_palette_colors(uint16_t * color_cache,int n_cache,int n_colors,int stride,float * centroids)3020 static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
3021 int n_colors, int stride,
3022 float *centroids) {
3023 if (n_cache <= 0) return;
3024 for (int i = 0; i < n_colors * stride; i += stride) {
3025 float min_diff = fabsf(centroids[i] - color_cache[0]);
3026 int idx = 0;
3027 for (int j = 1; j < n_cache; ++j) {
3028 float this_diff = fabsf(centroids[i] - color_cache[j]);
3029 if (this_diff < min_diff) {
3030 min_diff = this_diff;
3031 idx = j;
3032 }
3033 }
3034 if (min_diff < 1.5) centroids[i] = color_cache[idx];
3035 }
3036 }
3037 #endif // CONFIG_PALETTE_DELTA_ENCODING
3038
rd_pick_palette_intra_sby(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int palette_ctx,int dc_mode_cost,MB_MODE_INFO * best_mbmi,uint8_t * best_palette_color_map,int64_t * best_rd,int64_t * best_model_rd,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable)3039 static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3040 BLOCK_SIZE bsize, int palette_ctx,
3041 int dc_mode_cost, MB_MODE_INFO *best_mbmi,
3042 uint8_t *best_palette_color_map,
3043 int64_t *best_rd, int64_t *best_model_rd,
3044 int *rate, int *rate_tokenonly,
3045 int64_t *distortion, int *skippable) {
3046 int rate_overhead = 0;
3047 MACROBLOCKD *const xd = &x->e_mbd;
3048 MODE_INFO *const mic = xd->mi[0];
3049 MB_MODE_INFO *const mbmi = &mic->mbmi;
3050 assert(!is_inter_block(mbmi));
3051 assert(bsize >= BLOCK_8X8);
3052 int this_rate, colors, n;
3053 const int src_stride = x->plane[0].src.stride;
3054 const uint8_t *const src = x->plane[0].src.buf;
3055 uint8_t *const color_map = xd->plane[0].color_index_map;
3056 int block_width, block_height, rows, cols;
3057 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
3058 &cols);
3059
3060 assert(cpi->common.allow_screen_content_tools);
3061
3062 #if CONFIG_HIGHBITDEPTH
3063 if (cpi->common.use_highbitdepth)
3064 colors = av1_count_colors_highbd(src, src_stride, rows, cols,
3065 cpi->common.bit_depth);
3066 else
3067 #endif // CONFIG_HIGHBITDEPTH
3068 colors = av1_count_colors(src, src_stride, rows, cols);
3069 #if CONFIG_FILTER_INTRA
3070 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3071 #endif // CONFIG_FILTER_INTRA
3072
3073 if (colors > 1 && colors <= 64) {
3074 int r, c, i, k, palette_mode_cost;
3075 const int max_itr = 50;
3076 float *const data = x->palette_buffer->kmeans_data_buf;
3077 float centroids[PALETTE_MAX_SIZE];
3078 float lb, ub, val;
3079 RD_STATS tokenonly_rd_stats;
3080 int64_t this_rd, this_model_rd;
3081 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3082 #if CONFIG_HIGHBITDEPTH
3083 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
3084 if (cpi->common.use_highbitdepth)
3085 lb = ub = src16[0];
3086 else
3087 #endif // CONFIG_HIGHBITDEPTH
3088 lb = ub = src[0];
3089
3090 #if CONFIG_HIGHBITDEPTH
3091 if (cpi->common.use_highbitdepth) {
3092 for (r = 0; r < rows; ++r) {
3093 for (c = 0; c < cols; ++c) {
3094 val = src16[r * src_stride + c];
3095 data[r * cols + c] = val;
3096 if (val < lb)
3097 lb = val;
3098 else if (val > ub)
3099 ub = val;
3100 }
3101 }
3102 } else {
3103 #endif // CONFIG_HIGHBITDEPTH
3104 for (r = 0; r < rows; ++r) {
3105 for (c = 0; c < cols; ++c) {
3106 val = src[r * src_stride + c];
3107 data[r * cols + c] = val;
3108 if (val < lb)
3109 lb = val;
3110 else if (val > ub)
3111 ub = val;
3112 }
3113 }
3114 #if CONFIG_HIGHBITDEPTH
3115 }
3116 #endif // CONFIG_HIGHBITDEPTH
3117
3118 mbmi->mode = DC_PRED;
3119 #if CONFIG_FILTER_INTRA
3120 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3121 #endif // CONFIG_FILTER_INTRA
3122
3123 if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
3124
3125 #if CONFIG_PALETTE_DELTA_ENCODING
3126 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
3127 const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
3128 #endif // CONFIG_PALETTE_DELTA_ENCODING
3129
3130 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
3131 --n) {
3132 if (colors == PALETTE_MIN_SIZE) {
3133 // Special case: These colors automatically become the centroids.
3134 assert(colors == n);
3135 assert(colors == 2);
3136 centroids[0] = lb;
3137 centroids[1] = ub;
3138 k = 2;
3139 } else {
3140 for (i = 0; i < n; ++i) {
3141 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
3142 }
3143 av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
3144 #if CONFIG_PALETTE_DELTA_ENCODING
3145 optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
3146 #endif // CONFIG_PALETTE_DELTA_ENCODING
3147 k = av1_remove_duplicates(centroids, n);
3148 if (k < PALETTE_MIN_SIZE) {
3149 // Too few unique colors to create a palette. And DC_PRED will work
3150 // well for that case anyway. So skip.
3151 continue;
3152 }
3153 }
3154
3155 #if CONFIG_HIGHBITDEPTH
3156 if (cpi->common.use_highbitdepth)
3157 for (i = 0; i < k; ++i)
3158 pmi->palette_colors[i] =
3159 clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
3160 else
3161 #endif // CONFIG_HIGHBITDEPTH
3162 for (i = 0; i < k; ++i)
3163 pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
3164 pmi->palette_size[0] = k;
3165
3166 av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
3167 extend_palette_color_map(color_map, cols, rows, block_width,
3168 block_height);
3169 palette_mode_cost =
3170 dc_mode_cost +
3171 x->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
3172 write_uniform_cost(k, color_map[0]) +
3173 av1_cost_bit(
3174 av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
3175 1);
3176 palette_mode_cost += av1_palette_color_cost_y(pmi,
3177 #if CONFIG_PALETTE_DELTA_ENCODING
3178 color_cache, n_cache,
3179 #endif // CONFIG_PALETTE_DELTA_ENCODING
3180 cpi->common.bit_depth);
3181 palette_mode_cost +=
3182 av1_cost_color_map(x, 0, 0, bsize, mbmi->tx_size, PALETTE_MAP);
3183 this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
3184 if (*best_model_rd != INT64_MAX &&
3185 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3186 continue;
3187 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3188 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3189 if (tokenonly_rd_stats.rate == INT_MAX) continue;
3190 this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
3191 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
3192 if (!xd->lossless[mbmi->segment_id] &&
3193 block_signals_txsize(mbmi->sb_type)) {
3194 tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
3195 }
3196 if (this_rd < *best_rd) {
3197 *best_rd = this_rd;
3198 memcpy(best_palette_color_map, color_map,
3199 block_width * block_height * sizeof(color_map[0]));
3200 *best_mbmi = *mbmi;
3201 rate_overhead = this_rate - tokenonly_rd_stats.rate;
3202 if (rate) *rate = this_rate;
3203 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
3204 if (distortion) *distortion = tokenonly_rd_stats.dist;
3205 if (skippable) *skippable = tokenonly_rd_stats.skip;
3206 }
3207 }
3208 }
3209
3210 if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
3211 memcpy(color_map, best_palette_color_map,
3212 block_width * block_height * sizeof(best_palette_color_map[0]));
3213 }
3214 *mbmi = *best_mbmi;
3215 return rate_overhead;
3216 }
3217
rd_pick_intra_sub_8x8_y_subblock_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int row,int col,PREDICTION_MODE * best_mode,const int * bmode_costs,ENTROPY_CONTEXT * a,ENTROPY_CONTEXT * l,int * bestrate,int * bestratey,int64_t * bestdistortion,BLOCK_SIZE bsize,TX_SIZE tx_size,int * y_skip,int64_t rd_thresh)3218 static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
3219 const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
3220 PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
3221 ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
3222 BLOCK_SIZE bsize, TX_SIZE tx_size, int *y_skip, int64_t rd_thresh) {
3223 const AV1_COMMON *const cm = &cpi->common;
3224 PREDICTION_MODE mode;
3225 MACROBLOCKD *const xd = &x->e_mbd;
3226 assert(!is_inter_block(&xd->mi[0]->mbmi));
3227 int64_t best_rd = rd_thresh;
3228 struct macroblock_plane *p = &x->plane[0];
3229 struct macroblockd_plane *pd = &xd->plane[0];
3230 const int src_stride = p->src.stride;
3231 const int dst_stride = pd->dst.stride;
3232 const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
3233 uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4];
3234 #if CONFIG_CHROMA_2X2
3235 // TODO(jingning): This is a temporal change. The whole function should be
3236 // out when cb4x4 is enabled.
3237 ENTROPY_CONTEXT ta[4], tempa[4];
3238 ENTROPY_CONTEXT tl[4], templ[4];
3239 #else
3240 ENTROPY_CONTEXT ta[2], tempa[2];
3241 ENTROPY_CONTEXT tl[2], templ[2];
3242 #endif // CONFIG_CHROMA_2X2
3243
3244 const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
3245 const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
3246 const int tx_width_unit = tx_size_wide_unit[tx_size];
3247 const int tx_height_unit = tx_size_high_unit[tx_size];
3248 const int pred_block_width = block_size_wide[bsize];
3249 const int pred_block_height = block_size_high[bsize];
3250 const int tx_width = tx_size_wide[tx_size];
3251 const int tx_height = tx_size_high[tx_size];
3252 const int pred_width_in_transform_blocks = pred_block_width / tx_width;
3253 const int pred_height_in_transform_blocks = pred_block_height / tx_height;
3254 int idx, idy;
3255 int best_can_skip = 0;
3256 uint8_t best_dst[8 * 8];
3257 #if CONFIG_HIGHBITDEPTH
3258 uint16_t best_dst16[8 * 8];
3259 #endif // CONFIG_HIGHBITDEPTH
3260 const int is_lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
3261 #if CONFIG_EXT_TX && CONFIG_RECT_TX
3262 const int sub_bsize = bsize;
3263 #else
3264 const int sub_bsize = BLOCK_4X4;
3265 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
3266
3267 #if CONFIG_PVQ
3268 od_rollback_buffer pre_buf, post_buf;
3269 od_encode_checkpoint(&x->daala_enc, &pre_buf);
3270 od_encode_checkpoint(&x->daala_enc, &post_buf);
3271 #endif // CONFIG_PVQ
3272
3273 assert(bsize < BLOCK_8X8);
3274 assert(tx_width < 8 || tx_height < 8);
3275 #if CONFIG_EXT_TX && CONFIG_RECT_TX
3276 if (is_lossless)
3277 assert(tx_width == 4 && tx_height == 4);
3278 else
3279 assert(tx_width == pred_block_width && tx_height == pred_block_height);
3280 #else
3281 assert(tx_width == 4 && tx_height == 4);
3282 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
3283
3284 memcpy(ta, a, pred_width_in_transform_blocks * sizeof(a[0]));
3285 memcpy(tl, l, pred_height_in_transform_blocks * sizeof(l[0]));
3286
3287 xd->mi[0]->mbmi.tx_size = tx_size;
3288
3289 xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
3290
3291 #if CONFIG_HIGHBITDEPTH
3292 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3293 #if CONFIG_PVQ
3294 od_encode_checkpoint(&x->daala_enc, &pre_buf);
3295 #endif
3296 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
3297 int64_t this_rd;
3298 int ratey = 0;
3299 int64_t distortion = 0;
3300 int rate = bmode_costs[mode];
3301 int can_skip = 1;
3302
3303 if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
3304 (1 << mode)))
3305 continue;
3306
3307 // Only do the oblique modes if the best so far is
3308 // one of the neighboring directional modes
3309 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3310 if (conditional_skipintra(mode, *best_mode)) continue;
3311 }
3312
3313 memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
3314 memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
3315
3316 for (idy = 0; idy < pred_height_in_transform_blocks; ++idy) {
3317 for (idx = 0; idx < pred_width_in_transform_blocks; ++idx) {
3318 const int block_raster_idx = (row + idy) * 2 + (col + idx);
3319 const int block =
3320 av1_raster_order_to_block_index(tx_size, block_raster_idx);
3321 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
3322 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
3323 #if !CONFIG_PVQ
3324 int16_t *const src_diff = av1_raster_block_offset_int16(
3325 BLOCK_8X8, block_raster_idx, p->src_diff);
3326 #endif
3327 int skip;
3328 assert(block < 4);
3329 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
3330 idx == 0 && idy == 0));
3331 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
3332 block == 0 || block == 2));
3333 xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
3334 av1_predict_intra_block(
3335 cm, xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode,
3336 dst, dst_stride, dst, dst_stride, col + idx, row + idy, 0);
3337 #if !CONFIG_PVQ
3338 aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
3339 src_stride, dst, dst_stride, xd->bd);
3340 #endif
3341 if (is_lossless) {
3342 TX_TYPE tx_type =
3343 av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
3344 const SCAN_ORDER *scan_order =
3345 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
3346 const int coeff_ctx =
3347 combine_entropy_contexts(tempa[idx], templ[idy]);
3348 #if !CONFIG_PVQ
3349 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
3350 tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
3351 ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
3352 scan_order, tempa + idx, templ + idy,
3353 cpi->sf.use_fast_coef_costing);
3354 skip = (p->eobs[block] == 0);
3355 can_skip &= skip;
3356 tempa[idx] = !skip;
3357 templ[idy] = !skip;
3358 #if CONFIG_EXT_TX
3359 if (tx_size == TX_8X4) {
3360 tempa[idx + 1] = tempa[idx];
3361 } else if (tx_size == TX_4X8) {
3362 templ[idy + 1] = templ[idy];
3363 }
3364 #endif // CONFIG_EXT_TX
3365 #else
3366 (void)scan_order;
3367
3368 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
3369 tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
3370
3371 ratey += x->rate;
3372 skip = x->pvq_skip[0];
3373 tempa[idx] = !skip;
3374 templ[idy] = !skip;
3375 can_skip &= skip;
3376 #endif
3377 if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
3378 goto next_highbd;
3379 #if CONFIG_PVQ
3380 if (!skip)
3381 #endif
3382 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3383 #if CONFIG_LGT_FROM_PRED
3384 mode,
3385 #endif
3386 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3387 BLOCK_OFFSET(xd->mrc_mask, block),
3388 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3389 DCT_DCT, tx_size, dst, dst_stride,
3390 p->eobs[block]);
3391 } else {
3392 int64_t dist;
3393 unsigned int tmp;
3394 TX_TYPE tx_type =
3395 av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
3396 const SCAN_ORDER *scan_order =
3397 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
3398 const int coeff_ctx =
3399 combine_entropy_contexts(tempa[idx], templ[idy]);
3400 #if !CONFIG_PVQ
3401 #if DISABLE_TRELLISQ_SEARCH
3402 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
3403 tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
3404 #else
3405 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
3406 tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
3407 av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size,
3408 tempa + idx, templ + idy, 1);
3409 #endif // DISABLE_TRELLISQ_SEARCH
3410 ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size,
3411 scan_order, tempa + idx, templ + idy,
3412 cpi->sf.use_fast_coef_costing);
3413 skip = (p->eobs[block] == 0);
3414 can_skip &= skip;
3415 tempa[idx] = !skip;
3416 templ[idy] = !skip;
3417 #if CONFIG_EXT_TX
3418 if (tx_size == TX_8X4) {
3419 tempa[idx + 1] = tempa[idx];
3420 } else if (tx_size == TX_4X8) {
3421 templ[idy + 1] = templ[idy];
3422 }
3423 #endif // CONFIG_EXT_TX
3424 #else
3425 (void)scan_order;
3426
3427 av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
3428 tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
3429 ratey += x->rate;
3430 skip = x->pvq_skip[0];
3431 tempa[idx] = !skip;
3432 templ[idy] = !skip;
3433 can_skip &= skip;
3434 #endif
3435 #if CONFIG_PVQ
3436 if (!skip)
3437 #endif
3438 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3439 #if CONFIG_LGT_FROM_PRED
3440 mode,
3441 #endif
3442 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3443 BLOCK_OFFSET(xd->mrc_mask, block),
3444 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3445 tx_type, tx_size, dst, dst_stride,
3446 p->eobs[block]);
3447 cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
3448 dist = (int64_t)tmp << 4;
3449 distortion += dist;
3450 if (RDCOST(x->rdmult, ratey, distortion) >= best_rd)
3451 goto next_highbd;
3452 }
3453 }
3454 }
3455
3456 rate += ratey;
3457 this_rd = RDCOST(x->rdmult, rate, distortion);
3458
3459 if (this_rd < best_rd) {
3460 *bestrate = rate;
3461 *bestratey = ratey;
3462 *bestdistortion = distortion;
3463 best_rd = this_rd;
3464 best_can_skip = can_skip;
3465 *best_mode = mode;
3466 memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
3467 memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
3468 #if CONFIG_PVQ
3469 od_encode_checkpoint(&x->daala_enc, &post_buf);
3470 #endif
3471 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
3472 memcpy(best_dst16 + idy * 8,
3473 CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
3474 pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
3475 }
3476 }
3477 next_highbd : {}
3478 #if CONFIG_PVQ
3479 od_encode_rollback(&x->daala_enc, &pre_buf);
3480 #endif
3481 }
3482
3483 if (best_rd >= rd_thresh) return best_rd;
3484
3485 #if CONFIG_PVQ
3486 od_encode_rollback(&x->daala_enc, &post_buf);
3487 #endif
3488
3489 if (y_skip) *y_skip &= best_can_skip;
3490
3491 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
3492 memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
3493 best_dst16 + idy * 8,
3494 pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
3495 }
3496
3497 return best_rd;
3498 }
3499 #endif // CONFIG_HIGHBITDEPTH
3500
3501 #if CONFIG_PVQ
3502 od_encode_checkpoint(&x->daala_enc, &pre_buf);
3503 #endif // CONFIG_PVQ
3504
3505 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
3506 int64_t this_rd;
3507 int ratey = 0;
3508 int64_t distortion = 0;
3509 int rate = bmode_costs[mode];
3510 int can_skip = 1;
3511
3512 if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
3513 (1 << mode))) {
3514 continue;
3515 }
3516
3517 // Only do the oblique modes if the best so far is
3518 // one of the neighboring directional modes
3519 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3520 if (conditional_skipintra(mode, *best_mode)) continue;
3521 }
3522
3523 memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
3524 memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
3525
3526 for (idy = 0; idy < pred_height_in_4x4_blocks; idy += tx_height_unit) {
3527 for (idx = 0; idx < pred_width_in_4x4_blocks; idx += tx_width_unit) {
3528 const int block_raster_idx = (row + idy) * 2 + (col + idx);
3529 int block = av1_raster_order_to_block_index(tx_size, block_raster_idx);
3530 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
3531 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
3532 #if !CONFIG_PVQ
3533 int16_t *const src_diff = av1_raster_block_offset_int16(
3534 BLOCK_8X8, block_raster_idx, p->src_diff);
3535 #endif // !CONFIG_PVQ
3536 int skip;
3537 assert(block < 4);
3538 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
3539 idx == 0 && idy == 0));
3540 assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
3541 block == 0 || block == 2));
3542 xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
3543 av1_predict_intra_block(cm, xd, pd->width, pd->height,
3544 txsize_to_bsize[tx_size], mode, dst, dst_stride,
3545 dst, dst_stride,
3546 #if CONFIG_CB4X4
3547 2 * (col + idx), 2 * (row + idy),
3548 #else
3549 col + idx, row + idy,
3550 #endif // CONFIG_CB4X4
3551 0);
3552 #if !CONFIG_PVQ
3553 aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
3554 dst, dst_stride);
3555 #endif // !CONFIG_PVQ
3556 TX_TYPE tx_type =
3557 av1_get_tx_type(PLANE_TYPE_Y, xd, 0, 0, block, tx_size);
3558 const SCAN_ORDER *scan_order =
3559 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
3560 const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
3561 #if CONFIG_CB4X4
3562 block = 4 * block;
3563 #endif // CONFIG_CB4X4
3564 #if !CONFIG_PVQ
3565 #if DISABLE_TRELLISQ_SEARCH
3566 av1_xform_quant(cm, x, 0, block,
3567 #if CONFIG_CB4X4
3568 2 * (row + idy), 2 * (col + idx),
3569 #else
3570 row + idy, col + idx,
3571 #endif // CONFIG_CB4X4
3572 BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
3573 #else
3574 const AV1_XFORM_QUANT xform_quant =
3575 is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
3576 av1_xform_quant(cm, x, 0, block,
3577 #if CONFIG_CB4X4
3578 2 * (row + idy), 2 * (col + idx),
3579 #else
3580 row + idy, col + idx,
3581 #endif // CONFIG_CB4X4
3582 BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
3583
3584 av1_optimize_b(cm, x, 0, 0, 0, block, BLOCK_8X8, tx_size, tempa + idx,
3585 templ + idy, 1);
3586 #endif // DISABLE_TRELLISQ_SEARCH
3587 ratey += av1_cost_coeffs(cpi, x, 0, 0, 0, block, tx_size, scan_order,
3588 tempa + idx, templ + idy,
3589 cpi->sf.use_fast_coef_costing);
3590 skip = (p->eobs[block] == 0);
3591 can_skip &= skip;
3592 tempa[idx] = !skip;
3593 templ[idy] = !skip;
3594 #if CONFIG_EXT_TX
3595 if (tx_size == TX_8X4) {
3596 tempa[idx + 1] = tempa[idx];
3597 } else if (tx_size == TX_4X8) {
3598 templ[idy + 1] = templ[idy];
3599 }
3600 #endif // CONFIG_EXT_TX
3601 #else
3602 (void)scan_order;
3603
3604 av1_xform_quant(cm, x, 0, block,
3605 #if CONFIG_CB4X4
3606 2 * (row + idy), 2 * (col + idx),
3607 #else
3608 row + idy, col + idx,
3609 #endif // CONFIG_CB4X4
3610 BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
3611
3612 ratey += x->rate;
3613 skip = x->pvq_skip[0];
3614 tempa[idx] = !skip;
3615 templ[idy] = !skip;
3616 can_skip &= skip;
3617 #endif // !CONFIG_PVQ
3618
3619 if (!is_lossless) { // To use the pixel domain distortion, we need to
3620 // calculate inverse txfm *before* calculating RD
3621 // cost. Compared to calculating the distortion in
3622 // the frequency domain, the overhead of encoding
3623 // effort is low.
3624 #if CONFIG_PVQ
3625 if (!skip)
3626 #endif // CONFIG_PVQ
3627 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3628 #if CONFIG_LGT_FROM_PRED
3629 mode,
3630 #endif
3631 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3632 BLOCK_OFFSET(xd->mrc_mask, block),
3633 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3634 tx_type, tx_size, dst, dst_stride,
3635 p->eobs[block]);
3636 unsigned int tmp;
3637 cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
3638 const int64_t dist = (int64_t)tmp << 4;
3639 distortion += dist;
3640 }
3641
3642 if (RDCOST(x->rdmult, ratey, distortion) >= best_rd) goto next;
3643
3644 if (is_lossless) { // Calculate inverse txfm *after* RD cost.
3645 #if CONFIG_PVQ
3646 if (!skip)
3647 #endif // CONFIG_PVQ
3648 av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3649 #if CONFIG_LGT_FROM_PRED
3650 mode,
3651 #endif
3652 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3653 BLOCK_OFFSET(xd->mrc_mask, block),
3654 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
3655 DCT_DCT, tx_size, dst, dst_stride,
3656 p->eobs[block]);
3657 }
3658 }
3659 }
3660
3661 rate += ratey;
3662 this_rd = RDCOST(x->rdmult, rate, distortion);
3663
3664 if (this_rd < best_rd) {
3665 *bestrate = rate;
3666 *bestratey = ratey;
3667 *bestdistortion = distortion;
3668 best_rd = this_rd;
3669 best_can_skip = can_skip;
3670 *best_mode = mode;
3671 memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
3672 memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
3673 #if CONFIG_PVQ
3674 od_encode_checkpoint(&x->daala_enc, &post_buf);
3675 #endif // CONFIG_PVQ
3676 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
3677 memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
3678 pred_width_in_transform_blocks * 4);
3679 }
3680 next : {}
3681 #if CONFIG_PVQ
3682 od_encode_rollback(&x->daala_enc, &pre_buf);
3683 #endif // CONFIG_PVQ
3684 } // mode decision loop
3685
3686 if (best_rd >= rd_thresh) return best_rd;
3687
3688 #if CONFIG_PVQ
3689 od_encode_rollback(&x->daala_enc, &post_buf);
3690 #endif // CONFIG_PVQ
3691
3692 if (y_skip) *y_skip &= best_can_skip;
3693
3694 for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
3695 memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
3696 pred_width_in_transform_blocks * 4);
3697
3698 return best_rd;
3699 }
3700
rd_pick_intra_sub_8x8_y_mode(const AV1_COMP * const cpi,MACROBLOCK * mb,int * rate,int * rate_y,int64_t * distortion,int * y_skip,int64_t best_rd)3701 static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
3702 MACROBLOCK *mb, int *rate,
3703 int *rate_y, int64_t *distortion,
3704 int *y_skip, int64_t best_rd) {
3705 const MACROBLOCKD *const xd = &mb->e_mbd;
3706 MODE_INFO *const mic = xd->mi[0];
3707 const MODE_INFO *above_mi = xd->above_mi;
3708 const MODE_INFO *left_mi = xd->left_mi;
3709 MB_MODE_INFO *const mbmi = &mic->mbmi;
3710 assert(!is_inter_block(mbmi));
3711 const BLOCK_SIZE bsize = mbmi->sb_type;
3712 const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
3713 const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
3714 int idx, idy;
3715 int cost = 0;
3716 int64_t total_distortion = 0;
3717 int tot_rate_y = 0;
3718 int64_t total_rd = 0;
3719 const int *bmode_costs = mb->mbmode_cost[0];
3720 const int is_lossless = xd->lossless[mbmi->segment_id];
3721 #if CONFIG_EXT_TX && CONFIG_RECT_TX
3722 const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
3723 #else
3724 const TX_SIZE tx_size = TX_4X4;
3725 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
3726
3727 #if CONFIG_EXT_INTRA
3728 #if CONFIG_INTRA_INTERP
3729 mbmi->intra_filter = INTRA_FILTER_LINEAR;
3730 #endif // CONFIG_INTRA_INTERP
3731 #endif // CONFIG_EXT_INTRA
3732 #if CONFIG_FILTER_INTRA
3733 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3734 #endif // CONFIG_FILTER_INTRA
3735
3736 // TODO(any): Add search of the tx_type to improve rd performance at the
3737 // expense of speed.
3738 mbmi->tx_type = DCT_DCT;
3739 mbmi->tx_size = tx_size;
3740 #if CONFIG_LGT_FROM_PRED
3741 mbmi->use_lgt = 0;
3742 #endif
3743
3744 if (y_skip) *y_skip = 1;
3745
3746 // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
3747 // 8x8 coding block.
3748 for (idy = 0; idy < 2; idy += pred_height_in_4x4_blocks) {
3749 for (idx = 0; idx < 2; idx += pred_width_in_4x4_blocks) {
3750 PREDICTION_MODE best_mode = DC_PRED;
3751 int r = INT_MAX, ry = INT_MAX;
3752 int64_t d = INT64_MAX, this_rd = INT64_MAX;
3753 int j;
3754 const int pred_block_idx = idy * 2 + idx;
3755 if (cpi->common.frame_type == KEY_FRAME) {
3756 const PREDICTION_MODE A =
3757 av1_above_block_mode(mic, above_mi, pred_block_idx);
3758 const PREDICTION_MODE L =
3759 av1_left_block_mode(mic, left_mi, pred_block_idx);
3760
3761 #if CONFIG_KF_CTX
3762 const int above_ctx = intra_mode_context[A];
3763 const int left_ctx = intra_mode_context[L];
3764 bmode_costs = mb->y_mode_costs[above_ctx][left_ctx];
3765 #else
3766 bmode_costs = mb->y_mode_costs[A][L];
3767 #endif
3768 }
3769 this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
3770 cpi, mb, idy, idx, &best_mode, bmode_costs,
3771 xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
3772 &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
3773 #if CONFIG_DIST_8X8
3774 if (!cpi->oxcf.using_dist_8x8)
3775 #endif
3776 if (this_rd >= best_rd - total_rd) return INT64_MAX;
3777
3778 total_rd += this_rd;
3779 cost += r;
3780 total_distortion += d;
3781 tot_rate_y += ry;
3782
3783 mic->bmi[pred_block_idx].as_mode = best_mode;
3784 for (j = 1; j < pred_height_in_4x4_blocks; ++j)
3785 mic->bmi[pred_block_idx + j * 2].as_mode = best_mode;
3786 for (j = 1; j < pred_width_in_4x4_blocks; ++j)
3787 mic->bmi[pred_block_idx + j].as_mode = best_mode;
3788
3789 if (total_rd >= best_rd) return INT64_MAX;
3790 }
3791 }
3792 mbmi->mode = mic->bmi[3].as_mode;
3793
3794 #if CONFIG_DIST_8X8
3795 if (cpi->oxcf.using_dist_8x8) {
3796 const struct macroblock_plane *p = &mb->plane[0];
3797 const struct macroblockd_plane *pd = &xd->plane[0];
3798 const int src_stride = p->src.stride;
3799 const int dst_stride = pd->dst.stride;
3800 uint8_t *src = p->src.buf;
3801 uint8_t *dst = pd->dst.buf;
3802
3803 // Daala-defined distortion computed for the block of 8x8 pixels
3804 total_distortion = av1_dist_8x8(cpi, mb, src, src_stride, dst, dst_stride,
3805 BLOCK_8X8, 8, 8, 8, 8, mb->qindex)
3806 << 4;
3807 }
3808 #endif // CONFIG_DIST_8X8
3809 // Add in the cost of the transform type
3810 if (!is_lossless) {
3811 int rate_tx_type = 0;
3812 #if CONFIG_EXT_TX
3813 if (get_ext_tx_types(tx_size, bsize, 0, cpi->common.reduced_tx_set_used) >
3814 1) {
3815 const int eset =
3816 get_ext_tx_set(tx_size, bsize, 0, cpi->common.reduced_tx_set_used);
3817 #if CONFIG_LGT_FROM_PRED
3818 if (LGT_FROM_PRED_INTRA && is_lgt_allowed(mbmi->mode, tx_size))
3819 rate_tx_type += mb->intra_lgt_cost[txsize_sqr_map[tx_size]][mbmi->mode]
3820 [mbmi->use_lgt];
3821 if (!LGT_FROM_PRED_INTRA || !mbmi->use_lgt)
3822 #endif // CONFIG_LGT_FROM_PRED
3823 rate_tx_type += mb->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
3824 [mbmi->mode][mbmi->tx_type];
3825 }
3826 #else
3827 rate_tx_type =
3828 mb->intra_tx_type_costs[txsize_sqr_map[tx_size]]
3829 [intra_mode_to_tx_type_context[mbmi->mode]]
3830 [mbmi->tx_type];
3831 #endif // CONFIG_EXT_TX
3832 assert(mbmi->tx_size == tx_size);
3833 cost += rate_tx_type;
3834 tot_rate_y += rate_tx_type;
3835 }
3836
3837 *rate = cost;
3838 *rate_y = tot_rate_y;
3839 *distortion = total_distortion;
3840
3841 return RDCOST(mb->rdmult, cost, total_distortion);
3842 }
3843
3844 #if CONFIG_FILTER_INTRA
3845 // Return 1 if an filter intra mode is selected; return 0 otherwise.
rd_pick_filter_intra_sby(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,int mode_cost,int64_t * best_rd,int64_t * best_model_rd,uint16_t skip_mask)3846 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3847 int *rate, int *rate_tokenonly,
3848 int64_t *distortion, int *skippable,
3849 BLOCK_SIZE bsize, int mode_cost,
3850 int64_t *best_rd, int64_t *best_model_rd,
3851 uint16_t skip_mask) {
3852 MACROBLOCKD *const xd = &x->e_mbd;
3853 MODE_INFO *const mic = xd->mi[0];
3854 MB_MODE_INFO *mbmi = &mic->mbmi;
3855 int filter_intra_selected_flag = 0;
3856 FILTER_INTRA_MODE mode;
3857 TX_SIZE best_tx_size = TX_4X4;
3858 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
3859 TX_TYPE best_tx_type;
3860 #if CONFIG_LGT_FROM_PRED
3861 int use_lgt_when_selected;
3862 #endif
3863
3864 av1_zero(filter_intra_mode_info);
3865 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
3866 mbmi->mode = DC_PRED;
3867 mbmi->palette_mode_info.palette_size[0] = 0;
3868
3869 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
3870 int this_rate;
3871 int64_t this_rd, this_model_rd;
3872 RD_STATS tokenonly_rd_stats;
3873 if (skip_mask & (1 << mode)) continue;
3874 mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
3875 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3876 if (*best_model_rd != INT64_MAX &&
3877 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3878 continue;
3879 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3880 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3881 if (tokenonly_rd_stats.rate == INT_MAX) continue;
3882 this_rate = tokenonly_rd_stats.rate +
3883 av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
3884 write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
3885 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
3886
3887 if (this_rd < *best_rd) {
3888 *best_rd = this_rd;
3889 best_tx_size = mic->mbmi.tx_size;
3890 filter_intra_mode_info = mbmi->filter_intra_mode_info;
3891 best_tx_type = mic->mbmi.tx_type;
3892 #if CONFIG_LGT_FROM_PRED
3893 use_lgt_when_selected = mic->mbmi.use_lgt;
3894 #endif
3895 *rate = this_rate;
3896 *rate_tokenonly = tokenonly_rd_stats.rate;
3897 *distortion = tokenonly_rd_stats.dist;
3898 *skippable = tokenonly_rd_stats.skip;
3899 filter_intra_selected_flag = 1;
3900 }
3901 }
3902
3903 if (filter_intra_selected_flag) {
3904 mbmi->mode = DC_PRED;
3905 mbmi->tx_size = best_tx_size;
3906 #if CONFIG_LGT_FROM_PRED
3907 mbmi->use_lgt = use_lgt_when_selected;
3908 #endif
3909 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
3910 filter_intra_mode_info.use_filter_intra_mode[0];
3911 mbmi->filter_intra_mode_info.filter_intra_mode[0] =
3912 filter_intra_mode_info.filter_intra_mode[0];
3913 mbmi->tx_type = best_tx_type;
3914 return 1;
3915 } else {
3916 return 0;
3917 }
3918 }
3919 #endif // CONFIG_FILTER_INTRA
3920
3921 #if CONFIG_EXT_INTRA
3922 // Run RD calculation with given luma intra prediction angle., and return
3923 // the RD cost. Update the best mode info. if the RD cost is the best so far.
calc_rd_given_intra_angle(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mode_cost,int64_t best_rd_in,int8_t angle_delta,int max_angle_delta,int * rate,RD_STATS * rd_stats,int * best_angle_delta,TX_SIZE * best_tx_size,TX_TYPE * best_tx_type,int * use_lgt_when_selected,INTRA_FILTER * best_filter,int64_t * best_rd,int64_t * best_model_rd)3924 static int64_t calc_rd_given_intra_angle(
3925 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
3926 int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
3927 RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
3928 TX_TYPE *best_tx_type,
3929 #if CONFIG_LGT_FROM_PRED
3930 int *use_lgt_when_selected,
3931 #endif
3932 #if CONFIG_INTRA_INTERP
3933 INTRA_FILTER *best_filter,
3934 #endif // CONFIG_INTRA_INTERP
3935 int64_t *best_rd, int64_t *best_model_rd) {
3936 int this_rate;
3937 RD_STATS tokenonly_rd_stats;
3938 int64_t this_rd, this_model_rd;
3939 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
3940 assert(!is_inter_block(mbmi));
3941
3942 mbmi->angle_delta[0] = angle_delta;
3943 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3944 if (*best_model_rd != INT64_MAX &&
3945 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3946 return INT64_MAX;
3947 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3948 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
3949 if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
3950
3951 this_rate = tokenonly_rd_stats.rate + mode_cost +
3952 write_uniform_cost(2 * max_angle_delta + 1,
3953 mbmi->angle_delta[0] + max_angle_delta);
3954 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
3955
3956 if (this_rd < *best_rd) {
3957 *best_rd = this_rd;
3958 *best_angle_delta = mbmi->angle_delta[0];
3959 *best_tx_size = mbmi->tx_size;
3960 #if CONFIG_INTRA_INTERP
3961 *best_filter = mbmi->intra_filter;
3962 #endif // CONFIG_INTRA_INTERP
3963 *best_tx_type = mbmi->tx_type;
3964 #if CONFIG_LGT_FROM_PRED
3965 *use_lgt_when_selected = mbmi->use_lgt;
3966 #endif
3967 *rate = this_rate;
3968 rd_stats->rate = tokenonly_rd_stats.rate;
3969 rd_stats->dist = tokenonly_rd_stats.dist;
3970 rd_stats->skip = tokenonly_rd_stats.skip;
3971 }
3972 return this_rd;
3973 }
3974
3975 // With given luma directional intra prediction mode, pick the best angle delta
3976 // Return the RD cost corresponding to the best angle delta.
rd_pick_intra_angle_sby(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,RD_STATS * rd_stats,BLOCK_SIZE bsize,int mode_cost,int64_t best_rd,int64_t * best_model_rd)3977 static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3978 int *rate, RD_STATS *rd_stats,
3979 BLOCK_SIZE bsize, int mode_cost,
3980 int64_t best_rd,
3981 int64_t *best_model_rd) {
3982 MACROBLOCKD *const xd = &x->e_mbd;
3983 MODE_INFO *const mic = xd->mi[0];
3984 MB_MODE_INFO *mbmi = &mic->mbmi;
3985 assert(!is_inter_block(mbmi));
3986 int i, angle_delta, best_angle_delta = 0;
3987 int first_try = 1;
3988 #if CONFIG_INTRA_INTERP
3989 int p_angle;
3990 const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
3991 INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
3992 #endif // CONFIG_INTRA_INTERP
3993 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
3994 TX_SIZE best_tx_size = mic->mbmi.tx_size;
3995 TX_TYPE best_tx_type = mbmi->tx_type;
3996 #if CONFIG_LGT_FROM_PRED
3997 int use_lgt_when_selected = mbmi->use_lgt;
3998 #endif
3999
4000 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
4001
4002 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
4003 #if CONFIG_INTRA_INTERP
4004 for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
4005 if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
4006 mic->mbmi.intra_filter = filter;
4007 #endif // CONFIG_INTRA_INTERP
4008 for (i = 0; i < 2; ++i) {
4009 best_rd_in = (best_rd == INT64_MAX)
4010 ? INT64_MAX
4011 : (best_rd + (best_rd >> (first_try ? 3 : 5)));
4012 this_rd = calc_rd_given_intra_angle(
4013 cpi, x, bsize,
4014 #if CONFIG_INTRA_INTERP
4015 mode_cost + x->intra_filter_cost[intra_filter_ctx][filter],
4016 #else
4017 mode_cost,
4018 #endif // CONFIG_INTRA_INTERP
4019 best_rd_in, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
4020 rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
4021 #if CONFIG_LGT_FROM_PRED
4022 &use_lgt_when_selected,
4023 #endif
4024 #if CONFIG_INTRA_INTERP
4025 &best_filter,
4026 #endif // CONFIG_INTRA_INTERP
4027 &best_rd, best_model_rd);
4028 rd_cost[2 * angle_delta + i] = this_rd;
4029 if (first_try && this_rd == INT64_MAX) return best_rd;
4030 first_try = 0;
4031 if (angle_delta == 0) {
4032 rd_cost[1] = this_rd;
4033 break;
4034 }
4035 }
4036 #if CONFIG_INTRA_INTERP
4037 }
4038 #endif // CONFIG_INTRA_INTERP
4039 }
4040
4041 assert(best_rd != INT64_MAX);
4042 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
4043 int64_t rd_thresh;
4044 #if CONFIG_INTRA_INTERP
4045 for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
4046 if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
4047 mic->mbmi.intra_filter = filter;
4048 #endif // CONFIG_INTRA_INTERP
4049 for (i = 0; i < 2; ++i) {
4050 int skip_search = 0;
4051 rd_thresh = best_rd + (best_rd >> 5);
4052 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
4053 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
4054 skip_search = 1;
4055 if (!skip_search) {
4056 calc_rd_given_intra_angle(
4057 cpi, x, bsize,
4058 #if CONFIG_INTRA_INTERP
4059 mode_cost + x->intra_filter_cost[intra_filter_ctx][filter],
4060 #else
4061 mode_cost,
4062 #endif // CONFIG_INTRA_INTERP
4063 best_rd, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
4064 rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
4065 #if CONFIG_LGT_FROM_PRED
4066 &use_lgt_when_selected,
4067 #endif
4068 #if CONFIG_INTRA_INTERP
4069 &best_filter,
4070 #endif // CONFIG_INTRA_INTERP
4071 &best_rd, best_model_rd);
4072 }
4073 }
4074 #if CONFIG_INTRA_INTERP
4075 }
4076 #endif // CONFIG_INTRA_INTERP
4077 }
4078
4079 #if CONFIG_INTRA_INTERP
4080 if (FILTER_FAST_SEARCH && rd_stats->rate < INT_MAX) {
4081 p_angle = mode_to_angle_map[mbmi->mode] + best_angle_delta * ANGLE_STEP;
4082 if (av1_is_intra_filter_switchable(p_angle)) {
4083 for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
4084 mic->mbmi.intra_filter = filter;
4085 this_rd = calc_rd_given_intra_angle(
4086 cpi, x, bsize,
4087 mode_cost + x->intra_filter_cost[intra_filter_ctx][filter], best_rd,
4088 best_angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
4089 &best_angle_delta, &best_tx_size, &best_tx_type,
4090 #if CONFIG_LGT_FROM_PRED
4091 &use_lgt_when_selected,
4092 #endif
4093 &best_filter, &best_rd, best_model_rd);
4094 }
4095 }
4096 }
4097 #endif // CONFIG_INTRA_INTERP
4098
4099 mbmi->tx_size = best_tx_size;
4100 mbmi->angle_delta[0] = best_angle_delta;
4101 #if CONFIG_INTRA_INTERP
4102 mic->mbmi.intra_filter = best_filter;
4103 #endif // CONFIG_INTRA_INTERP
4104 mbmi->tx_type = best_tx_type;
4105 #if CONFIG_LGT_FROM_PRED
4106 mbmi->use_lgt = use_lgt_when_selected;
4107 #endif
4108 return best_rd;
4109 }
4110
4111 // Indices are sign, integer, and fractional part of the gradient value
4112 static const uint8_t gradient_to_angle_bin[2][7][16] = {
4113 {
4114 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
4115 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
4116 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
4117 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
4118 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
4119 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
4120 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
4121 },
4122 {
4123 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
4124 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
4125 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
4126 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
4127 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
4128 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
4129 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
4130 },
4131 };
4132
4133 /* clang-format off */
4134 static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
4135 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
4136 0,
4137 };
4138 /* clang-format on */
4139
angle_estimation(const uint8_t * src,int src_stride,int rows,int cols,BLOCK_SIZE bsize,uint8_t * directional_mode_skip_mask)4140 static void angle_estimation(const uint8_t *src, int src_stride, int rows,
4141 int cols, BLOCK_SIZE bsize,
4142 uint8_t *directional_mode_skip_mask) {
4143 memset(directional_mode_skip_mask, 0,
4144 INTRA_MODES * sizeof(*directional_mode_skip_mask));
4145 // Check if angle_delta is used
4146 if (!av1_use_angle_delta(bsize)) return;
4147 uint64_t hist[DIRECTIONAL_MODES];
4148 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
4149 src += src_stride;
4150 int r, c, dx, dy;
4151 for (r = 1; r < rows; ++r) {
4152 for (c = 1; c < cols; ++c) {
4153 dx = src[c] - src[c - 1];
4154 dy = src[c] - src[c - src_stride];
4155 int index;
4156 const int temp = dx * dx + dy * dy;
4157 if (dy == 0) {
4158 index = 2;
4159 } else {
4160 const int sn = (dx > 0) ^ (dy > 0);
4161 dx = abs(dx);
4162 dy = abs(dy);
4163 const int remd = (dx % dy) * 16 / dy;
4164 const int quot = dx / dy;
4165 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
4166 }
4167 hist[index] += temp;
4168 }
4169 src += src_stride;
4170 }
4171
4172 int i;
4173 uint64_t hist_sum = 0;
4174 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
4175 for (i = 0; i < INTRA_MODES; ++i) {
4176 if (av1_is_directional_mode(i, bsize)) {
4177 const uint8_t angle_bin = mode_to_angle_bin[i];
4178 uint64_t score = 2 * hist[angle_bin];
4179 int weight = 2;
4180 if (angle_bin > 0) {
4181 score += hist[angle_bin - 1];
4182 ++weight;
4183 }
4184 if (angle_bin < DIRECTIONAL_MODES - 1) {
4185 score += hist[angle_bin + 1];
4186 ++weight;
4187 }
4188 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
4189 directional_mode_skip_mask[i] = 1;
4190 }
4191 }
4192 }
4193
4194 #if CONFIG_HIGHBITDEPTH
highbd_angle_estimation(const uint8_t * src8,int src_stride,int rows,int cols,BLOCK_SIZE bsize,uint8_t * directional_mode_skip_mask)4195 static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
4196 int rows, int cols, BLOCK_SIZE bsize,
4197 uint8_t *directional_mode_skip_mask) {
4198 memset(directional_mode_skip_mask, 0,
4199 INTRA_MODES * sizeof(*directional_mode_skip_mask));
4200 // Check if angle_delta is used
4201 if (!av1_use_angle_delta(bsize)) return;
4202 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
4203 uint64_t hist[DIRECTIONAL_MODES];
4204 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
4205 src += src_stride;
4206 int r, c, dx, dy;
4207 for (r = 1; r < rows; ++r) {
4208 for (c = 1; c < cols; ++c) {
4209 dx = src[c] - src[c - 1];
4210 dy = src[c] - src[c - src_stride];
4211 int index;
4212 const int temp = dx * dx + dy * dy;
4213 if (dy == 0) {
4214 index = 2;
4215 } else {
4216 const int sn = (dx > 0) ^ (dy > 0);
4217 dx = abs(dx);
4218 dy = abs(dy);
4219 const int remd = (dx % dy) * 16 / dy;
4220 const int quot = dx / dy;
4221 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
4222 }
4223 hist[index] += temp;
4224 }
4225 src += src_stride;
4226 }
4227
4228 int i;
4229 uint64_t hist_sum = 0;
4230 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
4231 for (i = 0; i < INTRA_MODES; ++i) {
4232 if (av1_is_directional_mode(i, bsize)) {
4233 const uint8_t angle_bin = mode_to_angle_bin[i];
4234 uint64_t score = 2 * hist[angle_bin];
4235 int weight = 2;
4236 if (angle_bin > 0) {
4237 score += hist[angle_bin - 1];
4238 ++weight;
4239 }
4240 if (angle_bin < DIRECTIONAL_MODES - 1) {
4241 score += hist[angle_bin + 1];
4242 ++weight;
4243 }
4244 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
4245 directional_mode_skip_mask[i] = 1;
4246 }
4247 }
4248 }
4249 #endif // CONFIG_HIGHBITDEPTH
4250 #endif // CONFIG_EXT_INTRA
4251
4252 // This function is used only for intra_only frames
rd_pick_intra_sby_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,int64_t best_rd)4253 static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
4254 int *rate, int *rate_tokenonly,
4255 int64_t *distortion, int *skippable,
4256 BLOCK_SIZE bsize, int64_t best_rd) {
4257 MACROBLOCKD *const xd = &x->e_mbd;
4258 MODE_INFO *const mic = xd->mi[0];
4259 MB_MODE_INFO *const mbmi = &mic->mbmi;
4260 assert(!is_inter_block(mbmi));
4261 MB_MODE_INFO best_mbmi = *mbmi;
4262 int64_t best_model_rd = INT64_MAX;
4263 #if CONFIG_EXT_INTRA
4264 const int rows = block_size_high[bsize];
4265 const int cols = block_size_wide[bsize];
4266 #if CONFIG_INTRA_INTERP
4267 const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
4268 #endif // CONFIG_INTRA_INTERP
4269 int is_directional_mode;
4270 uint8_t directional_mode_skip_mask[INTRA_MODES];
4271 const int src_stride = x->plane[0].src.stride;
4272 const uint8_t *src = x->plane[0].src.buf;
4273 #endif // CONFIG_EXT_INTRA
4274 #if CONFIG_FILTER_INTRA
4275 int beat_best_rd = 0;
4276 uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
4277 #endif // CONFIG_FILTER_INTRA
4278 const int *bmode_costs;
4279 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4280 int palette_y_mode_ctx = 0;
4281 const int try_palette =
4282 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
4283 uint8_t *best_palette_color_map =
4284 try_palette ? x->palette_buffer->best_palette_color_map : NULL;
4285 const MODE_INFO *above_mi = xd->above_mi;
4286 const MODE_INFO *left_mi = xd->left_mi;
4287 const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
4288 const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
4289 const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
4290 #if CONFIG_PVQ
4291 od_rollback_buffer pre_buf, post_buf;
4292
4293 od_encode_checkpoint(&x->daala_enc, &pre_buf);
4294 od_encode_checkpoint(&x->daala_enc, &post_buf);
4295 #endif // CONFIG_PVQ
4296
4297 #if CONFIG_KF_CTX
4298 const int above_ctx = intra_mode_context[A];
4299 const int left_ctx = intra_mode_context[L];
4300 bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
4301 #else
4302 bmode_costs = x->y_mode_costs[A][L];
4303 #endif
4304
4305 #if CONFIG_EXT_INTRA
4306 mbmi->angle_delta[0] = 0;
4307 #if CONFIG_HIGHBITDEPTH
4308 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
4309 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
4310 directional_mode_skip_mask);
4311 else
4312 #endif // CONFIG_HIGHBITDEPTH
4313 angle_estimation(src, src_stride, rows, cols, bsize,
4314 directional_mode_skip_mask);
4315 #endif // CONFIG_EXT_INTRA
4316 #if CONFIG_FILTER_INTRA
4317 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
4318 #endif // CONFIG_FILTER_INTRA
4319 pmi->palette_size[0] = 0;
4320 if (try_palette) {
4321 if (above_mi) {
4322 palette_y_mode_ctx +=
4323 (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
4324 }
4325 if (left_mi) {
4326 palette_y_mode_ctx +=
4327 (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
4328 }
4329 }
4330
4331 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
4332 x->use_default_intra_tx_type = 1;
4333 else
4334 x->use_default_intra_tx_type = 0;
4335
4336 /* Y Search for intra prediction mode */
4337 for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
4338 RD_STATS this_rd_stats;
4339 int this_rate, this_rate_tokenonly, s;
4340 int64_t this_distortion, this_rd, this_model_rd;
4341 if (mode_idx == FINAL_MODE_SEARCH) {
4342 if (x->use_default_intra_tx_type == 0) break;
4343 mbmi->mode = best_mbmi.mode;
4344 x->use_default_intra_tx_type = 0;
4345 } else {
4346 assert(mode_idx < INTRA_MODES);
4347 mbmi->mode = intra_rd_search_mode_order[mode_idx];
4348 }
4349 #if CONFIG_PVQ
4350 od_encode_rollback(&x->daala_enc, &pre_buf);
4351 #endif // CONFIG_PVQ
4352 #if CONFIG_EXT_INTRA
4353 mbmi->angle_delta[0] = 0;
4354 #endif // CONFIG_EXT_INTRA
4355 this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
4356 if (best_model_rd != INT64_MAX &&
4357 this_model_rd > best_model_rd + (best_model_rd >> 1))
4358 continue;
4359 if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
4360 #if CONFIG_EXT_INTRA
4361 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
4362 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
4363 if (is_directional_mode && av1_use_angle_delta(bsize)) {
4364 this_rd_stats.rate = INT_MAX;
4365 rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
4366 bmode_costs[mbmi->mode], best_rd, &best_model_rd);
4367 } else {
4368 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
4369 }
4370 #else
4371 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
4372 #endif // CONFIG_EXT_INTRA
4373 this_rate_tokenonly = this_rd_stats.rate;
4374 this_distortion = this_rd_stats.dist;
4375 s = this_rd_stats.skip;
4376
4377 if (this_rate_tokenonly == INT_MAX) continue;
4378
4379 this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
4380
4381 if (!xd->lossless[mbmi->segment_id] &&
4382 block_signals_txsize(mbmi->sb_type)) {
4383 // super_block_yrd above includes the cost of the tx_size in the
4384 // tokenonly rate, but for intra blocks, tx_size is always coded
4385 // (prediction granularity), so we account for it in the full rate,
4386 // not the tokenonly rate.
4387 this_rate_tokenonly -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
4388 }
4389 if (try_palette && mbmi->mode == DC_PRED) {
4390 this_rate +=
4391 av1_cost_bit(av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
4392 [palette_y_mode_ctx],
4393 0);
4394 }
4395 #if CONFIG_FILTER_INTRA
4396 if (mbmi->mode == DC_PRED)
4397 this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0);
4398 #endif // CONFIG_FILTER_INTRA
4399 #if CONFIG_EXT_INTRA
4400 if (is_directional_mode) {
4401 #if CONFIG_INTRA_INTERP
4402 const int p_angle =
4403 mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
4404 if (av1_is_intra_filter_switchable(p_angle))
4405 this_rate += x->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
4406 #endif // CONFIG_INTRA_INTERP
4407 if (av1_use_angle_delta(bsize)) {
4408 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
4409 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
4410 }
4411 }
4412 #endif // CONFIG_EXT_INTRA
4413 #if CONFIG_INTRABC
4414 if (bsize >= BLOCK_8X8 && cpi->common.allow_screen_content_tools)
4415 this_rate += x->intrabc_cost[0];
4416 #endif // CONFIG_INTRABC
4417 this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
4418 #if CONFIG_FILTER_INTRA
4419 if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
4420 filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
4421 }
4422 #endif // CONFIG_FILTER_INTRA
4423
4424 if (this_rd < best_rd) {
4425 best_mbmi = *mbmi;
4426 best_rd = this_rd;
4427 #if CONFIG_FILTER_INTRA
4428 beat_best_rd = 1;
4429 #endif // CONFIG_FILTER_INTRA
4430 *rate = this_rate;
4431 *rate_tokenonly = this_rate_tokenonly;
4432 *distortion = this_distortion;
4433 *skippable = s;
4434 #if CONFIG_PVQ
4435 od_encode_checkpoint(&x->daala_enc, &post_buf);
4436 #endif // CONFIG_PVQ
4437 }
4438 }
4439
4440 #if CONFIG_PVQ
4441 od_encode_rollback(&x->daala_enc, &post_buf);
4442 #endif // CONFIG_PVQ
4443
4444 if (try_palette) {
4445 rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
4446 bmode_costs[DC_PRED], &best_mbmi,
4447 best_palette_color_map, &best_rd, &best_model_rd,
4448 rate, rate_tokenonly, distortion, skippable);
4449 }
4450
4451 #if CONFIG_FILTER_INTRA
4452 if (beat_best_rd) {
4453 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
4454 skippable, bsize, bmode_costs[DC_PRED],
4455 &best_rd, &best_model_rd,
4456 filter_intra_mode_skip_mask)) {
4457 best_mbmi = *mbmi;
4458 }
4459 }
4460 #endif // CONFIG_FILTER_INTRA
4461
4462 *mbmi = best_mbmi;
4463 return best_rd;
4464 }
4465
4466 // Return value 0: early termination triggered, no valid rd cost available;
4467 // 1: rd cost values are valid.
super_block_uvrd(const AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t ref_best_rd)4468 static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
4469 RD_STATS *rd_stats, BLOCK_SIZE bsize,
4470 int64_t ref_best_rd) {
4471 MACROBLOCKD *const xd = &x->e_mbd;
4472 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4473 const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
4474 int plane;
4475 int is_cost_valid = 1;
4476 av1_init_rd_stats(rd_stats);
4477
4478 if (ref_best_rd < 0) is_cost_valid = 0;
4479
4480 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4481 if (x->skip_chroma_rd) return is_cost_valid;
4482
4483 bsize = scale_chroma_bsize(bsize, xd->plane[1].subsampling_x,
4484 xd->plane[1].subsampling_y);
4485 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4486
4487 #if !CONFIG_PVQ
4488 if (is_inter_block(mbmi) && is_cost_valid) {
4489 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
4490 av1_subtract_plane(x, bsize, plane);
4491 }
4492 #endif // !CONFIG_PVQ
4493
4494 if (is_cost_valid) {
4495 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
4496 RD_STATS pn_rd_stats;
4497 txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
4498 uv_tx_size, cpi->sf.use_fast_coef_costing);
4499 if (pn_rd_stats.rate == INT_MAX) {
4500 is_cost_valid = 0;
4501 break;
4502 }
4503 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4504 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
4505 RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
4506 is_cost_valid = 0;
4507 break;
4508 }
4509 }
4510 }
4511
4512 if (!is_cost_valid) {
4513 // reset cost value
4514 av1_invalid_rd_stats(rd_stats);
4515 }
4516
4517 return is_cost_valid;
4518 }
4519
4520 #if CONFIG_VAR_TX
av1_tx_block_rd_b(const AV1_COMP * cpi,MACROBLOCK * x,TX_SIZE tx_size,int blk_row,int blk_col,int plane,int block,int plane_bsize,const ENTROPY_CONTEXT * a,const ENTROPY_CONTEXT * l,RD_STATS * rd_stats)4521 void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
4522 int blk_row, int blk_col, int plane, int block,
4523 int plane_bsize, const ENTROPY_CONTEXT *a,
4524 const ENTROPY_CONTEXT *l, RD_STATS *rd_stats) {
4525 const AV1_COMMON *const cm = &cpi->common;
4526 MACROBLOCKD *xd = &x->e_mbd;
4527 const struct macroblock_plane *const p = &x->plane[plane];
4528 struct macroblockd_plane *const pd = &xd->plane[plane];
4529
4530 #if CONFIG_TXK_SEL
4531 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
4532 tx_size, a, l, 0, rd_stats);
4533 return;
4534 #endif
4535
4536 int64_t tmp;
4537 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
4538 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
4539 uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block);
4540 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
4541 PLANE_TYPE plane_type = get_plane_type(plane);
4542 TX_TYPE tx_type =
4543 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
4544 const SCAN_ORDER *const scan_order =
4545 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
4546 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
4547 int bh = block_size_high[txm_bsize];
4548 int bw = block_size_wide[txm_bsize];
4549 int src_stride = p->src.stride;
4550 uint8_t *src =
4551 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
4552 uint8_t *dst =
4553 &pd->dst
4554 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
4555 #if CONFIG_HIGHBITDEPTH
4556 DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
4557 uint8_t *rec_buffer;
4558 #else
4559 DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
4560 #endif // CONFIG_HIGHBITDEPTH
4561 const int diff_stride = block_size_wide[plane_bsize];
4562 const int16_t *diff =
4563 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
4564 int txb_coeff_cost;
4565
4566 assert(tx_size < TX_SIZES_ALL);
4567
4568 int coeff_ctx = get_entropy_context(tx_size, a, l);
4569
4570 tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
4571 plane_bsize, txm_bsize);
4572
4573 #if CONFIG_HIGHBITDEPTH
4574 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
4575 tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
4576 #endif // CONFIG_HIGHBITDEPTH
4577 rd_stats->sse += tmp << 4;
4578
4579 if (rd_stats->invalid_rate) {
4580 rd_stats->dist += tmp << 4;
4581 rd_stats->rate += rd_stats->zero_rate;
4582 rd_stats->skip = 1;
4583 return;
4584 }
4585
4586 // TODO(any): Use av1_dist_block to compute distortion
4587 #if CONFIG_HIGHBITDEPTH
4588 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4589 rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
4590 aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
4591 0, NULL, 0, bw, bh, xd->bd);
4592 } else {
4593 rec_buffer = (uint8_t *)rec_buffer16;
4594 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
4595 NULL, 0, bw, bh);
4596 }
4597 #else
4598 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
4599 0, bw, bh);
4600 #endif // CONFIG_HIGHBITDEPTH
4601
4602 #if DISABLE_TRELLISQ_SEARCH
4603 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
4604 coeff_ctx, AV1_XFORM_QUANT_B);
4605
4606 #else
4607 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
4608 coeff_ctx, AV1_XFORM_QUANT_FP);
4609
4610 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
4611 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
4612 const int buffer_length = tx_size_2d[tx_size];
4613 int64_t tmp_dist, tmp_sse;
4614 #if CONFIG_DIST_8X8
4615 int disable_early_skip =
4616 x->using_dist_8x8 && plane == 0 && plane_bsize >= BLOCK_8X8 &&
4617 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
4618 x->tune_metric != AOM_TUNE_PSNR;
4619 #endif // CONFIG_DIST_8X8
4620
4621 #if CONFIG_HIGHBITDEPTH
4622 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
4623 tmp_dist =
4624 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse, xd->bd);
4625 else
4626 #endif
4627 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp_sse);
4628
4629 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
4630
4631 #if CONFIG_MRC_TX
4632 if (tx_type == MRC_DCT && !xd->mi[0]->mbmi.valid_mrc_mask) {
4633 av1_invalid_rd_stats(rd_stats);
4634 return;
4635 }
4636 #endif // CONFIG_MRC_TX
4637 if (
4638 #if CONFIG_DIST_8X8
4639 disable_early_skip ||
4640 #endif
4641 RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
4642 av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
4643 a, l, 1);
4644 } else {
4645 rd_stats->rate += rd_stats->zero_rate;
4646 rd_stats->dist += tmp << 4;
4647 rd_stats->skip = 1;
4648 rd_stats->invalid_rate = 1;
4649 return;
4650 }
4651 #endif // DISABLE_TRELLISQ_SEARCH
4652
4653 const int eob = p->eobs[block];
4654
4655 av1_inverse_transform_block(xd, dqcoeff,
4656 #if CONFIG_LGT_FROM_PRED
4657 xd->mi[0]->mbmi.mode,
4658 #endif
4659 #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
4660 mrc_mask,
4661 #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK
4662 tx_type, tx_size, rec_buffer, MAX_TX_SIZE, eob);
4663 if (eob > 0) {
4664 #if CONFIG_DIST_8X8
4665 if (x->using_dist_8x8 && plane == 0 && (bw < 8 && bh < 8)) {
4666 // Save sub8x8 luma decoded pixels
4667 // since 8x8 luma decoded pixels are not available for daala-dist
4668 // after recursive split of BLOCK_8x8 is done.
4669 const int pred_stride = block_size_wide[plane_bsize];
4670 const int pred_idx = (blk_row * pred_stride + blk_col)
4671 << tx_size_wide_log2[0];
4672 int16_t *decoded = &pd->pred[pred_idx];
4673 int i, j;
4674
4675 #if CONFIG_HIGHBITDEPTH
4676 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4677 for (j = 0; j < bh; j++)
4678 for (i = 0; i < bw; i++)
4679 decoded[j * pred_stride + i] =
4680 CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i];
4681 } else {
4682 #endif
4683 for (j = 0; j < bh; j++)
4684 for (i = 0; i < bw; i++)
4685 decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i];
4686 #if CONFIG_HIGHBITDEPTH
4687 }
4688 #endif // CONFIG_HIGHBITDEPTH
4689 }
4690 #endif // CONFIG_DIST_8X8
4691 tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE,
4692 blk_row, blk_col, plane_bsize, txm_bsize);
4693 }
4694 rd_stats->dist += tmp * 16;
4695 txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
4696 tx_size, scan_order, a, l, 0);
4697 rd_stats->rate += txb_coeff_cost;
4698 rd_stats->skip &= (eob == 0);
4699
4700 #if CONFIG_RD_DEBUG
4701 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
4702 txb_coeff_cost);
4703 #endif // CONFIG_RD_DEBUG
4704 }
4705
select_tx_block(const AV1_COMP * cpi,MACROBLOCK * x,int blk_row,int blk_col,int plane,int block,TX_SIZE tx_size,int depth,BLOCK_SIZE plane_bsize,ENTROPY_CONTEXT * ta,ENTROPY_CONTEXT * tl,TXFM_CONTEXT * tx_above,TXFM_CONTEXT * tx_left,RD_STATS * rd_stats,int64_t ref_best_rd,int * is_cost_valid)4706 static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4707 int blk_col, int plane, int block, TX_SIZE tx_size,
4708 int depth, BLOCK_SIZE plane_bsize,
4709 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
4710 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
4711 RD_STATS *rd_stats, int64_t ref_best_rd,
4712 int *is_cost_valid) {
4713 MACROBLOCKD *const xd = &x->e_mbd;
4714 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4715 struct macroblock_plane *const p = &x->plane[plane];
4716 struct macroblockd_plane *const pd = &xd->plane[plane];
4717 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4718 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4719 TX_SIZE(*const inter_tx_size)
4720 [MAX_MIB_SIZE] =
4721 (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
4722 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4723 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4724 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4725 int64_t this_rd = INT64_MAX;
4726 ENTROPY_CONTEXT *pta = ta + blk_col;
4727 ENTROPY_CONTEXT *ptl = tl + blk_row;
4728 int i;
4729 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
4730 mbmi->sb_type, tx_size);
4731 int64_t sum_rd = INT64_MAX;
4732 int tmp_eob = 0;
4733 int zero_blk_rate;
4734 RD_STATS sum_rd_stats;
4735 #if CONFIG_TXK_SEL
4736 TX_TYPE best_tx_type = TX_TYPES;
4737 int txk_idx = (blk_row << 4) + blk_col;
4738 #endif
4739 #if CONFIG_RECT_TX_EXT
4740 TX_SIZE quarter_txsize = quarter_txsize_lookup[mbmi->sb_type];
4741 int check_qttx = is_quarter_tx_allowed(xd, mbmi, is_inter_block(mbmi)) &&
4742 tx_size == max_txsize_rect_lookup[mbmi->sb_type] &&
4743 quarter_txsize != tx_size;
4744 int is_qttx_picked = 0;
4745 int eobs_qttx[2] = { 0, 0 };
4746 int skip_qttx[2] = { 0, 0 };
4747 int block_offset_qttx = check_qttx
4748 ? tx_size_wide_unit[quarter_txsize] *
4749 tx_size_high_unit[quarter_txsize]
4750 : 0;
4751 int blk_row_offset, blk_col_offset;
4752 int is_wide_qttx =
4753 tx_size_wide_unit[quarter_txsize] > tx_size_high_unit[quarter_txsize];
4754 blk_row_offset = is_wide_qttx ? tx_size_high_unit[quarter_txsize] : 0;
4755 blk_col_offset = is_wide_qttx ? 0 : tx_size_wide_unit[quarter_txsize];
4756 #endif
4757
4758 av1_init_rd_stats(&sum_rd_stats);
4759
4760 assert(tx_size < TX_SIZES_ALL);
4761
4762 if (ref_best_rd < 0) {
4763 *is_cost_valid = 0;
4764 return;
4765 }
4766
4767 av1_init_rd_stats(rd_stats);
4768
4769 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4770
4771 #if CONFIG_LV_MAP
4772 TX_SIZE txs_ctx = get_txsize_context(tx_size);
4773 TXB_CTX txb_ctx;
4774 get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
4775
4776 #if LV_MAP_PROB
4777 zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
4778 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4779 #else
4780 zero_blk_rate =
4781 av1_cost_bit(xd->fc->txb_skip[txs_ctx][txb_ctx.txb_skip_ctx], 1);
4782 #endif // LV_MAP_PROB
4783 #else
4784 TX_SIZE tx_size_ctx = txsize_sqr_map[tx_size];
4785 int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
4786 zero_blk_rate =
4787 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
4788 #endif
4789
4790 rd_stats->ref_rdcost = ref_best_rd;
4791 rd_stats->zero_rate = zero_blk_rate;
4792 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
4793 inter_tx_size[0][0] = tx_size;
4794 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
4795 plane_bsize, pta, ptl, rd_stats);
4796 if (rd_stats->rate == INT_MAX) return;
4797
4798 if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
4799 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
4800 rd_stats->skip == 1) &&
4801 !xd->lossless[mbmi->segment_id]) {
4802 #if CONFIG_RD_DEBUG
4803 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
4804 zero_blk_rate - rd_stats->rate);
4805 #endif // CONFIG_RD_DEBUG
4806 rd_stats->rate = zero_blk_rate;
4807 rd_stats->dist = rd_stats->sse;
4808 rd_stats->skip = 1;
4809 x->blk_skip[plane][blk_row * bw + blk_col] = 1;
4810 p->eobs[block] = 0;
4811 #if CONFIG_TXK_SEL
4812 mbmi->txk_type[txk_idx] = DCT_DCT;
4813 #endif
4814 } else {
4815 x->blk_skip[plane][blk_row * bw + blk_col] = 0;
4816 rd_stats->skip = 0;
4817 }
4818
4819 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4820 rd_stats->rate +=
4821 av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
4822 #if CONFIG_RECT_TX_EXT
4823 if (check_qttx) {
4824 assert(blk_row == 0 && blk_col == 0);
4825 rd_stats->rate += av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 0);
4826 }
4827 #endif
4828 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4829 #if CONFIG_LV_MAP
4830 tmp_eob = p->txb_entropy_ctx[block];
4831 #else
4832 tmp_eob = p->eobs[block];
4833 #endif
4834
4835 #if CONFIG_TXK_SEL
4836 best_tx_type = mbmi->txk_type[txk_idx];
4837 #endif
4838
4839 #if CONFIG_RECT_TX_EXT
4840 if (check_qttx) {
4841 assert(blk_row == 0 && blk_col == 0 && block == 0 && plane == 0);
4842
4843 RD_STATS rd_stats_tmp, rd_stats_qttx;
4844 int64_t rd_qttx;
4845
4846 av1_init_rd_stats(&rd_stats_qttx);
4847 av1_init_rd_stats(&rd_stats_tmp);
4848
4849 av1_tx_block_rd_b(cpi, x, quarter_txsize, 0, 0, plane, 0, plane_bsize,
4850 pta, ptl, &rd_stats_qttx);
4851 if (rd_stats->rate == INT_MAX) return;
4852
4853 tx_size_ctx = txsize_sqr_map[quarter_txsize];
4854 coeff_ctx = get_entropy_context(quarter_txsize, pta, ptl);
4855 zero_blk_rate =
4856 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
4857 if ((RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist) >=
4858 RDCOST(x->rdmult, zero_blk_rate, rd_stats_qttx.sse) ||
4859 rd_stats_qttx.skip == 1) &&
4860 !xd->lossless[mbmi->segment_id]) {
4861 #if CONFIG_RD_DEBUG
4862 av1_update_txb_coeff_cost(&rd_stats_qttx, plane, quarter_txsize, 0, 0,
4863 zero_blk_rate - rd_stats_qttx.rate);
4864 #endif // CONFIG_RD_DEBUG
4865 rd_stats_qttx.rate = zero_blk_rate;
4866 rd_stats_qttx.dist = rd_stats_qttx.sse;
4867 rd_stats_qttx.skip = 1;
4868 x->blk_skip[plane][blk_row * bw + blk_col] = 1;
4869 skip_qttx[0] = 1;
4870 p->eobs[block] = 0;
4871 } else {
4872 x->blk_skip[plane][blk_row * bw + blk_col] = 0;
4873 skip_qttx[0] = 0;
4874 rd_stats->skip = 0;
4875 }
4876
4877 // Second tx block
4878 av1_tx_block_rd_b(cpi, x, quarter_txsize, blk_row_offset, blk_col_offset,
4879 plane, block_offset_qttx, plane_bsize, pta, ptl,
4880 &rd_stats_tmp);
4881
4882 if (rd_stats->rate == INT_MAX) return;
4883
4884 #if !CONFIG_PVQ
4885 av1_set_txb_context(x, plane, 0, quarter_txsize, pta, ptl);
4886 #endif // !CONFIG_PVQ
4887 coeff_ctx = get_entropy_context(quarter_txsize, pta + blk_col_offset,
4888 ptl + blk_row_offset);
4889 zero_blk_rate =
4890 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
4891 if ((RDCOST(x->rdmult, rd_stats_tmp.rate, rd_stats_tmp.dist) >=
4892 RDCOST(x->rdmult, zero_blk_rate, rd_stats_tmp.sse) ||
4893 rd_stats_tmp.skip == 1) &&
4894 !xd->lossless[mbmi->segment_id]) {
4895 #if CONFIG_RD_DEBUG
4896 av1_update_txb_coeff_cost(&rd_stats_tmp, plane, quarter_txsize, 0, 0,
4897 zero_blk_rate - rd_stats_tmp.rate);
4898 #endif // CONFIG_RD_DEBUG
4899 rd_stats_tmp.rate = zero_blk_rate;
4900 rd_stats_tmp.dist = rd_stats_tmp.sse;
4901 rd_stats_tmp.skip = 1;
4902 x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 1;
4903 skip_qttx[1] = 1;
4904 p->eobs[block_offset_qttx] = 0;
4905 } else {
4906 x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = 0;
4907 skip_qttx[1] = 0;
4908 rd_stats_tmp.skip = 0;
4909 }
4910
4911 av1_merge_rd_stats(&rd_stats_qttx, &rd_stats_tmp);
4912
4913 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
4914 rd_stats_qttx.rate +=
4915 av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
4916 }
4917 rd_stats_qttx.rate +=
4918 av1_cost_bit(cpi->common.fc->quarter_tx_size_prob, 1);
4919 rd_qttx = RDCOST(x->rdmult, rd_stats_qttx.rate, rd_stats_qttx.dist);
4920 #if CONFIG_LV_MAP
4921 eobs_qttx[0] = p->txb_entropy_ctx[0];
4922 eobs_qttx[1] = p->txb_entropy_ctx[block_offset_qttx];
4923 #else
4924 eobs_qttx[0] = p->eobs[0];
4925 eobs_qttx[1] = p->eobs[block_offset_qttx];
4926 #endif
4927 if (rd_qttx < this_rd) {
4928 is_qttx_picked = 1;
4929 this_rd = rd_qttx;
4930 rd_stats->rate = rd_stats_qttx.rate;
4931 rd_stats->dist = rd_stats_qttx.dist;
4932 rd_stats->sse = rd_stats_qttx.sse;
4933 rd_stats->skip = rd_stats_qttx.skip;
4934 rd_stats->rdcost = rd_stats_qttx.rdcost;
4935 }
4936 av1_get_entropy_contexts(plane_bsize, 0, pd, ta, tl);
4937 }
4938 #endif
4939 }
4940
4941 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH
4942 #if CONFIG_MRC_TX
4943 // If the tx type we are trying is MRC_DCT, we cannot partition the
4944 // transform into anything smaller than TX_32X32
4945 && mbmi->tx_type != MRC_DCT
4946 #endif // CONFIG_MRC_TX
4947 ) {
4948 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
4949 const int bsl = tx_size_wide_unit[sub_txs];
4950 int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
4951 RD_STATS this_rd_stats;
4952 int this_cost_valid = 1;
4953 int64_t tmp_rd = 0;
4954 #if CONFIG_DIST_8X8
4955 int sub8x8_eob[4];
4956 #endif
4957 sum_rd_stats.rate =
4958 av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
4959
4960 assert(tx_size < TX_SIZES_ALL);
4961
4962 ref_best_rd = AOMMIN(this_rd, ref_best_rd);
4963
4964 for (i = 0; i < 4 && this_cost_valid; ++i) {
4965 int offsetr = blk_row + (i >> 1) * bsl;
4966 int offsetc = blk_col + (i & 0x01) * bsl;
4967
4968 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
4969
4970 select_tx_block(cpi, x, offsetr, offsetc, plane, block, sub_txs,
4971 depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
4972 &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid);
4973 #if CONFIG_DIST_8X8
4974 if (x->using_dist_8x8 && plane == 0 && tx_size == TX_8X8) {
4975 sub8x8_eob[i] = p->eobs[block];
4976 }
4977 #endif // CONFIG_DIST_8X8
4978 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
4979
4980 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
4981 #if CONFIG_DIST_8X8
4982 if (!x->using_dist_8x8)
4983 #endif
4984 if (this_rd < tmp_rd) break;
4985 block += sub_step;
4986 }
4987 #if CONFIG_DIST_8X8
4988 if (x->using_dist_8x8 && this_cost_valid && plane == 0 &&
4989 tx_size == TX_8X8) {
4990 const int src_stride = p->src.stride;
4991 const int dst_stride = pd->dst.stride;
4992
4993 const uint8_t *src =
4994 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
4995 const uint8_t *dst =
4996 &pd->dst
4997 .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
4998
4999 int64_t dist_8x8;
5000 int qindex = x->qindex;
5001 const int pred_stride = block_size_wide[plane_bsize];
5002 const int pred_idx = (blk_row * pred_stride + blk_col)
5003 << tx_size_wide_log2[0];
5004 int16_t *pred = &pd->pred[pred_idx];
5005 int j;
5006 int row, col;
5007
5008 #if CONFIG_HIGHBITDEPTH
5009 uint8_t *pred8;
5010 DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
5011 #else
5012 DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
5013 #endif // CONFIG_HIGHBITDEPTH
5014
5015 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
5016 BLOCK_8X8, 8, 8, 8, 8, qindex) *
5017 16;
5018 sum_rd_stats.sse = dist_8x8;
5019
5020 #if CONFIG_HIGHBITDEPTH
5021 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
5022 pred8 = CONVERT_TO_BYTEPTR(pred8_16);
5023 else
5024 pred8 = (uint8_t *)pred8_16;
5025 #endif
5026
5027 #if CONFIG_HIGHBITDEPTH
5028 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5029 for (row = 0; row < 2; ++row) {
5030 for (col = 0; col < 2; ++col) {
5031 int idx = row * 2 + col;
5032 int eob = sub8x8_eob[idx];
5033
5034 if (eob > 0) {
5035 for (j = 0; j < 4; j++)
5036 for (i = 0; i < 4; i++)
5037 CONVERT_TO_SHORTPTR(pred8)
5038 [(row * 4 + j) * 8 + 4 * col + i] =
5039 pred[(row * 4 + j) * pred_stride + 4 * col + i];
5040 } else {
5041 for (j = 0; j < 4; j++)
5042 for (i = 0; i < 4; i++)
5043 CONVERT_TO_SHORTPTR(pred8)
5044 [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
5045 dst)[(row * 4 + j) * dst_stride + 4 * col + i];
5046 }
5047 }
5048 }
5049 } else {
5050 #endif
5051 for (row = 0; row < 2; ++row) {
5052 for (col = 0; col < 2; ++col) {
5053 int idx = row * 2 + col;
5054 int eob = sub8x8_eob[idx];
5055
5056 if (eob > 0) {
5057 for (j = 0; j < 4; j++)
5058 for (i = 0; i < 4; i++)
5059 pred8[(row * 4 + j) * 8 + 4 * col + i] =
5060 (uint8_t)pred[(row * 4 + j) * pred_stride + 4 * col + i];
5061 } else {
5062 for (j = 0; j < 4; j++)
5063 for (i = 0; i < 4; i++)
5064 pred8[(row * 4 + j) * 8 + 4 * col + i] =
5065 dst[(row * 4 + j) * dst_stride + 4 * col + i];
5066 }
5067 }
5068 }
5069 #if CONFIG_HIGHBITDEPTH
5070 }
5071 #endif // CONFIG_HIGHBITDEPTH
5072 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8,
5073 8, 8, 8, qindex) *
5074 16;
5075 sum_rd_stats.dist = dist_8x8;
5076 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
5077 }
5078 #endif // CONFIG_DIST_8X8
5079 if (this_cost_valid) sum_rd = tmp_rd;
5080 }
5081
5082 if (this_rd < sum_rd) {
5083 int idx, idy;
5084 #if CONFIG_RECT_TX_EXT
5085 TX_SIZE tx_size_selected = is_qttx_picked ? quarter_txsize : tx_size;
5086 #else
5087 TX_SIZE tx_size_selected = tx_size;
5088 #endif
5089
5090 #if CONFIG_RECT_TX_EXT
5091 if (is_qttx_picked) {
5092 assert(blk_row == 0 && blk_col == 0 && plane == 0);
5093 #if CONFIG_LV_MAP
5094 p->txb_entropy_ctx[0] = eobs_qttx[0];
5095 p->txb_entropy_ctx[block_offset_qttx] = eobs_qttx[1];
5096 #else
5097 p->eobs[0] = eobs_qttx[0];
5098 p->eobs[block_offset_qttx] = eobs_qttx[1];
5099 #endif
5100 } else {
5101 #endif
5102 #if CONFIG_LV_MAP
5103 p->txb_entropy_ctx[block] = tmp_eob;
5104 #else
5105 p->eobs[block] = tmp_eob;
5106 #endif
5107 #if CONFIG_RECT_TX_EXT
5108 }
5109 #endif
5110
5111 #if !CONFIG_PVQ
5112 av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
5113 #if CONFIG_RECT_TX_EXT
5114 if (is_qttx_picked)
5115 av1_set_txb_context(x, plane, block_offset_qttx, tx_size_selected,
5116 pta + blk_col_offset, ptl + blk_row_offset);
5117 #endif // CONFIG_RECT_TX_EXT
5118 #endif // !CONFIG_PVQ
5119
5120 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
5121 tx_size);
5122 inter_tx_size[0][0] = tx_size_selected;
5123 for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
5124 for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
5125 inter_tx_size[idy][idx] = tx_size_selected;
5126 mbmi->tx_size = tx_size_selected;
5127 #if CONFIG_TXK_SEL
5128 mbmi->txk_type[txk_idx] = best_tx_type;
5129 #endif
5130 if (this_rd == INT64_MAX) *is_cost_valid = 0;
5131 #if CONFIG_RECT_TX_EXT
5132 if (is_qttx_picked) {
5133 x->blk_skip[plane][0] = skip_qttx[0];
5134 x->blk_skip[plane][blk_row_offset * bw + blk_col_offset] = skip_qttx[1];
5135 } else {
5136 #endif
5137 x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
5138 #if CONFIG_RECT_TX_EXT
5139 }
5140 #endif
5141 } else {
5142 *rd_stats = sum_rd_stats;
5143 if (sum_rd == INT64_MAX) *is_cost_valid = 0;
5144 }
5145 }
5146
inter_block_yrd(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t ref_best_rd)5147 static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
5148 RD_STATS *rd_stats, BLOCK_SIZE bsize,
5149 int64_t ref_best_rd) {
5150 MACROBLOCKD *const xd = &x->e_mbd;
5151 int is_cost_valid = 1;
5152 int64_t this_rd = 0;
5153
5154 if (ref_best_rd < 0) is_cost_valid = 0;
5155
5156 av1_init_rd_stats(rd_stats);
5157
5158 if (is_cost_valid) {
5159 const struct macroblockd_plane *const pd = &xd->plane[0];
5160 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
5161 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5162 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
5163 const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
5164 const int bh = tx_size_high_unit[max_tx_size];
5165 const int bw = tx_size_wide_unit[max_tx_size];
5166 int idx, idy;
5167 int block = 0;
5168 int init_depth =
5169 (mi_height != mi_width) ? RECT_VARTX_DEPTH_INIT : SQR_VARTX_DEPTH_INIT;
5170 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
5171 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
5172 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
5173 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
5174 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
5175
5176 RD_STATS pn_rd_stats;
5177 av1_init_rd_stats(&pn_rd_stats);
5178
5179 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
5180 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
5181 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
5182
5183 for (idy = 0; idy < mi_height; idy += bh) {
5184 for (idx = 0; idx < mi_width; idx += bw) {
5185 select_tx_block(cpi, x, idy, idx, 0, block, max_tx_size, init_depth,
5186 plane_bsize, ctxa, ctxl, tx_above, tx_left,
5187 &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid);
5188 if (pn_rd_stats.rate == INT_MAX) {
5189 av1_invalid_rd_stats(rd_stats);
5190 return;
5191 }
5192 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
5193 this_rd += AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
5194 RDCOST(x->rdmult, 0, pn_rd_stats.sse));
5195 block += step;
5196 }
5197 }
5198 }
5199
5200 this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
5201 RDCOST(x->rdmult, 0, rd_stats->sse));
5202 if (this_rd > ref_best_rd) is_cost_valid = 0;
5203
5204 if (!is_cost_valid) {
5205 // reset cost value
5206 av1_invalid_rd_stats(rd_stats);
5207 }
5208 }
5209
select_tx_size_fix_type(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t ref_best_rd,TX_TYPE tx_type)5210 static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
5211 RD_STATS *rd_stats, BLOCK_SIZE bsize,
5212 int64_t ref_best_rd, TX_TYPE tx_type) {
5213 const AV1_COMMON *const cm = &cpi->common;
5214 MACROBLOCKD *const xd = &x->e_mbd;
5215 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5216 const int is_inter = is_inter_block(mbmi);
5217 aom_prob skip_prob = av1_get_skip_prob(cm, xd);
5218 int s0 = av1_cost_bit(skip_prob, 0);
5219 int s1 = av1_cost_bit(skip_prob, 1);
5220 int64_t rd;
5221 int row, col;
5222 const int max_blocks_high = max_block_high(xd, bsize, 0);
5223 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
5224
5225 mbmi->tx_type = tx_type;
5226 inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd);
5227 mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
5228
5229 if (rd_stats->rate == INT_MAX) return INT64_MAX;
5230
5231 for (row = 0; row < max_blocks_high / 2; ++row)
5232 for (col = 0; col < max_blocks_wide / 2; ++col)
5233 mbmi->min_tx_size = AOMMIN(
5234 mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
5235
5236 #if !CONFIG_TXK_SEL
5237 #if CONFIG_EXT_TX
5238 if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
5239 cm->reduced_tx_set_used) > 1 &&
5240 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
5241 const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
5242 cm->reduced_tx_set_used);
5243 #if CONFIG_LGT_FROM_PRED
5244 if (is_lgt_allowed(mbmi->mode, mbmi->min_tx_size)) {
5245 if (LGT_FROM_PRED_INTRA && !is_inter && ext_tx_set > 0 &&
5246 ALLOW_INTRA_EXT_TX)
5247 rd_stats->rate += x->intra_lgt_cost[txsize_sqr_map[mbmi->min_tx_size]]
5248 [mbmi->mode][mbmi->use_lgt];
5249 if (LGT_FROM_PRED_INTER && is_inter && ext_tx_set > 0)
5250 rd_stats->rate +=
5251 x->inter_lgt_cost[txsize_sqr_map[mbmi->min_tx_size]][mbmi->use_lgt];
5252 }
5253 if (!mbmi->use_lgt) {
5254 #endif // CONFIG_LGT_FROM_PRED
5255 if (is_inter) {
5256 if (ext_tx_set > 0)
5257 rd_stats->rate +=
5258 x->inter_tx_type_costs[ext_tx_set]
5259 [txsize_sqr_map[mbmi->min_tx_size]]
5260 [mbmi->tx_type];
5261 } else {
5262 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
5263 rd_stats->rate +=
5264 x->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size][mbmi->mode]
5265 [mbmi->tx_type];
5266 }
5267 }
5268 #if CONFIG_LGT_FROM_PRED
5269 }
5270 #endif
5271 #else
5272 if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
5273 rd_stats->rate += x->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
5274 #endif // CONFIG_EXT_TX
5275 #endif // CONFIG_TXK_SEL
5276
5277 if (rd_stats->skip)
5278 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
5279 else
5280 rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
5281
5282 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
5283 !(rd_stats->skip))
5284 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
5285
5286 return rd;
5287 }
5288
get_block_residue_hash(MACROBLOCK * x,BLOCK_SIZE bsize)5289 static uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
5290 const int rows = block_size_high[bsize];
5291 const int cols = block_size_wide[bsize];
5292 const int diff_stride = cols;
5293 const struct macroblock_plane *const p = &x->plane[0];
5294 const int16_t *diff = &p->src_diff[0];
5295 uint8_t hash_data[MAX_SB_SQUARE];
5296 for (int r = 0; r < rows; ++r) {
5297 for (int c = 0; c < cols; ++c) {
5298 hash_data[cols * r + c] = clip_pixel(diff[c] + 128);
5299 }
5300 diff += diff_stride;
5301 }
5302 return (av1_get_crc_value(&x->tx_rd_record.crc_calculator, hash_data,
5303 rows * cols)
5304 << 7) +
5305 bsize;
5306 }
5307
save_tx_rd_info(int n4,uint32_t hash,const MACROBLOCK * const x,const RD_STATS * const rd_stats,TX_RD_INFO * const tx_rd_info)5308 static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
5309 const RD_STATS *const rd_stats,
5310 TX_RD_INFO *const tx_rd_info) {
5311 const MACROBLOCKD *const xd = &x->e_mbd;
5312 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5313 tx_rd_info->hash_value = hash;
5314 tx_rd_info->tx_type = mbmi->tx_type;
5315 tx_rd_info->tx_size = mbmi->tx_size;
5316 #if CONFIG_VAR_TX
5317 tx_rd_info->min_tx_size = mbmi->min_tx_size;
5318 memcpy(tx_rd_info->blk_skip, x->blk_skip[0],
5319 sizeof(tx_rd_info->blk_skip[0]) * n4);
5320 for (int idy = 0; idy < xd->n8_h; ++idy)
5321 for (int idx = 0; idx < xd->n8_w; ++idx)
5322 tx_rd_info->inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
5323 #endif // CONFIG_VAR_TX
5324 #if CONFIG_TXK_SEL
5325 av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
5326 #endif // CONFIG_TXK_SEL
5327 tx_rd_info->rd_stats = *rd_stats;
5328 }
5329
fetch_tx_rd_info(int n4,const TX_RD_INFO * const tx_rd_info,RD_STATS * const rd_stats,MACROBLOCK * const x)5330 static void fetch_tx_rd_info(int n4, const TX_RD_INFO *const tx_rd_info,
5331 RD_STATS *const rd_stats, MACROBLOCK *const x) {
5332 MACROBLOCKD *const xd = &x->e_mbd;
5333 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5334 mbmi->tx_type = tx_rd_info->tx_type;
5335 mbmi->tx_size = tx_rd_info->tx_size;
5336 #if CONFIG_VAR_TX
5337 mbmi->min_tx_size = tx_rd_info->min_tx_size;
5338 memcpy(x->blk_skip[0], tx_rd_info->blk_skip,
5339 sizeof(tx_rd_info->blk_skip[0]) * n4);
5340 for (int idy = 0; idy < xd->n8_h; ++idy)
5341 for (int idx = 0; idx < xd->n8_w; ++idx)
5342 mbmi->inter_tx_size[idy][idx] = tx_rd_info->inter_tx_size[idy][idx];
5343 #endif // CONFIG_VAR_TX
5344 #if CONFIG_TXK_SEL
5345 av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
5346 #endif // CONFIG_TXK_SEL
5347 *rd_stats = tx_rd_info->rd_stats;
5348 }
5349
5350 // Uses simple features on top of DCT coefficients to quickly predict
5351 // whether optimal RD decision is to skip encoding the residual.
predict_skip_flag_8bit(const MACROBLOCK * x,BLOCK_SIZE bsize)5352 static int predict_skip_flag_8bit(const MACROBLOCK *x, BLOCK_SIZE bsize) {
5353 if (bsize > BLOCK_16X16) return 0;
5354 // Tuned for target false-positive rate of 5% for all block sizes:
5355 const uint32_t threshold_table[] = { 50, 50, 50, 55, 47, 47, 53, 22, 22, 37 };
5356 const struct macroblock_plane *const p = &x->plane[0];
5357 const int bw = block_size_wide[bsize];
5358 const int bh = block_size_high[bsize];
5359 tran_low_t DCT_coefs[32 * 32];
5360 TxfmParam param;
5361 param.tx_type = DCT_DCT;
5362 #if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
5363 param.tx_size = max_txsize_rect_lookup[bsize];
5364 #else
5365 param.tx_size = max_txsize_lookup[bsize];
5366 #endif
5367 param.bd = 8;
5368 param.lossless = 0;
5369 av1_fwd_txfm(p->src_diff, DCT_coefs, bw, ¶m);
5370
5371 uint32_t dc = (uint32_t)av1_dc_quant(x->qindex, 0, AOM_BITS_8);
5372 uint32_t ac = (uint32_t)av1_ac_quant(x->qindex, 0, AOM_BITS_8);
5373 uint32_t max_quantized_coef = (100 * (uint32_t)abs(DCT_coefs[0])) / dc;
5374 for (int i = 1; i < bw * bh; i++) {
5375 uint32_t cur_quantized_coef = (100 * (uint32_t)abs(DCT_coefs[i])) / ac;
5376 if (cur_quantized_coef > max_quantized_coef)
5377 max_quantized_coef = cur_quantized_coef;
5378 }
5379
5380 return max_quantized_coef < threshold_table[AOMMAX(bsize - BLOCK_4X4, 0)];
5381 }
5382
5383 // Used to set proper context for early termination with skip = 1.
set_skip_flag(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_stats,int bsize)5384 static void set_skip_flag(const AV1_COMP *cpi, MACROBLOCK *x,
5385 RD_STATS *rd_stats, int bsize) {
5386 MACROBLOCKD *const xd = &x->e_mbd;
5387 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5388 const int n4 = bsize_to_num_blk(bsize);
5389 #if CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
5390 const TX_SIZE tx_size = max_txsize_rect_lookup[bsize];
5391 #else
5392 const TX_SIZE tx_size = max_txsize_lookup[bsize];
5393 #endif
5394 mbmi->tx_type = DCT_DCT;
5395 for (int idy = 0; idy < xd->n8_h; ++idy)
5396 for (int idx = 0; idx < xd->n8_w; ++idx)
5397 mbmi->inter_tx_size[idy][idx] = tx_size;
5398 mbmi->tx_size = tx_size;
5399 mbmi->min_tx_size = get_min_tx_size(tx_size);
5400 memset(x->blk_skip[0], 1, sizeof(uint8_t) * n4);
5401 rd_stats->skip = 1;
5402
5403 // Rate.
5404 const int tx_size_ctx = txsize_sqr_map[tx_size];
5405 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
5406 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
5407 av1_get_entropy_contexts(bsize, 0, &xd->plane[0], ctxa, ctxl);
5408 int coeff_ctx = get_entropy_context(tx_size, ctxa, ctxl);
5409 int rate = x->token_head_costs[tx_size_ctx][PLANE_TYPE_Y][1][0][coeff_ctx][0];
5410 if (tx_size > TX_4X4) {
5411 int ctx = txfm_partition_context(
5412 xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
5413 rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
5414 }
5415 #if !CONFIG_TXK_SEL
5416 #if CONFIG_EXT_TX
5417 const AV1_COMMON *cm = &cpi->common;
5418 const int ext_tx_set = get_ext_tx_set(max_txsize_lookup[bsize], bsize, 1,
5419 cm->reduced_tx_set_used);
5420 if (get_ext_tx_types(mbmi->min_tx_size, bsize, 1, cm->reduced_tx_set_used) >
5421 1 &&
5422 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
5423 if (ext_tx_set > 0)
5424 rate +=
5425 x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
5426 [mbmi->tx_type];
5427 }
5428 #else
5429 if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
5430 rd_stats->rate += x->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
5431 #endif // CONFIG_EXT_TX
5432 #endif // CONFIG_TXK_SEL
5433 rd_stats->rate = rate;
5434
5435 // Distortion.
5436 int64_t tmp = pixel_diff_dist(x, 0, x->plane[0].src_diff,
5437 block_size_wide[bsize], 0, 0, bsize, bsize);
5438 #if CONFIG_HIGHBITDEPTH
5439 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
5440 tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
5441 #endif // CONFIG_HIGHBITDEPTH
5442 rd_stats->dist = rd_stats->sse = (tmp << 4);
5443 }
5444
select_tx_type_yrd(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t ref_best_rd)5445 static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
5446 RD_STATS *rd_stats, BLOCK_SIZE bsize,
5447 int64_t ref_best_rd) {
5448 const AV1_COMMON *cm = &cpi->common;
5449 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
5450 MACROBLOCKD *const xd = &x->e_mbd;
5451 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5452 int64_t rd = INT64_MAX;
5453 int64_t best_rd = INT64_MAX;
5454 TX_TYPE tx_type, best_tx_type = DCT_DCT;
5455 const int is_inter = is_inter_block(mbmi);
5456 TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
5457 TX_SIZE best_tx = max_txsize_lookup[bsize];
5458 TX_SIZE best_min_tx_size = TX_SIZES_ALL;
5459 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
5460 TX_TYPE txk_start = DCT_DCT;
5461 #if CONFIG_TXK_SEL
5462 TX_TYPE txk_end = DCT_DCT + 1;
5463 #else
5464 TX_TYPE txk_end = TX_TYPES;
5465 #endif
5466 const int n4 = bsize_to_num_blk(bsize);
5467 int idx, idy;
5468 int prune = 0;
5469 #if CONFIG_EXT_TX
5470 const TxSetType tx_set_type = get_ext_tx_set_type(
5471 max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
5472 const int ext_tx_set =
5473 get_ext_tx_set(max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
5474 #endif // CONFIG_EXT_TX
5475
5476 av1_invalid_rd_stats(rd_stats);
5477
5478 #if CONFIG_LGT_FROM_PRED
5479 mbmi->use_lgt = 0;
5480 int search_lgt = is_inter
5481 ? LGT_FROM_PRED_INTER &&
5482 (!cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
5483 : LGT_FROM_PRED_INTRA && ALLOW_INTRA_EXT_TX;
5484 #endif // CONFIG_LGT_FROM_PRED
5485
5486 const uint32_t hash = get_block_residue_hash(x, bsize);
5487 TX_RD_RECORD *tx_rd_record = &x->tx_rd_record;
5488
5489 if (ref_best_rd != INT64_MAX) {
5490 for (int i = 0; i < tx_rd_record->num; ++i) {
5491 const int index = (tx_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
5492 // If there is a match in the tx_rd_record, fetch the RD decision and
5493 // terminate early.
5494 if (tx_rd_record->tx_rd_info[index].hash_value == hash) {
5495 TX_RD_INFO *tx_rd_info = &tx_rd_record->tx_rd_info[index];
5496 fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
5497 return;
5498 }
5499 }
5500 }
5501
5502 // If we predict that skip is the optimal RD decision - set the respective
5503 // context and terminate early.
5504 #if CONFIG_HIGHBITDEPTH
5505 if (!(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH))
5506 #endif // CONFIG_HIGHBITDEPTH
5507 {
5508 if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
5509 predict_skip_flag_8bit(x, bsize)) {
5510 set_skip_flag(cpi, x, rd_stats, bsize);
5511 return;
5512 }
5513 }
5514
5515 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
5516 #if CONFIG_EXT_TX
5517 prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
5518 #else
5519 prune = prune_tx_types(cpi, bsize, x, xd, 0);
5520 #endif // CONFIG_EXT_TX
5521
5522 int found = 0;
5523
5524 for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
5525 RD_STATS this_rd_stats;
5526 av1_init_rd_stats(&this_rd_stats);
5527 #if CONFIG_MRC_TX
5528 // MRC_DCT only implemented for TX_32X32 so only include this tx in
5529 // the search for TX_32X32
5530 if (tx_type == MRC_DCT &&
5531 (max_tx_size != TX_32X32 || (is_inter && !USE_MRC_INTER) ||
5532 (!is_inter && !USE_MRC_INTRA)))
5533 continue;
5534 #endif // CONFIG_MRC_TX
5535 #if CONFIG_EXT_TX
5536 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
5537 if (is_inter) {
5538 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
5539 if (!do_tx_type_search(tx_type, prune)) continue;
5540 }
5541 } else {
5542 if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
5543 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
5544 }
5545 }
5546 #else // CONFIG_EXT_TX
5547 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
5548 !do_tx_type_search(tx_type, prune))
5549 continue;
5550 #endif // CONFIG_EXT_TX
5551 if (is_inter && x->use_default_inter_tx_type &&
5552 tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
5553 continue;
5554
5555 if (xd->lossless[mbmi->segment_id])
5556 if (tx_type != DCT_DCT) continue;
5557
5558 rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
5559 tx_type);
5560 ref_best_rd = AOMMIN(rd, ref_best_rd);
5561 if (rd < best_rd) {
5562 best_rd = rd;
5563 *rd_stats = this_rd_stats;
5564 best_tx_type = mbmi->tx_type;
5565 best_tx = mbmi->tx_size;
5566 best_min_tx_size = mbmi->min_tx_size;
5567 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
5568 found = 1;
5569 for (idy = 0; idy < xd->n8_h; ++idy)
5570 for (idx = 0; idx < xd->n8_w; ++idx)
5571 best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
5572 }
5573 }
5574
5575 // We should always find at least one candidate unless ref_best_rd is less
5576 // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
5577 // might have failed to find something better)
5578 assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
5579 if (!found) return;
5580
5581 #if CONFIG_LGT_FROM_PRED
5582 if (search_lgt && is_lgt_allowed(mbmi->mode, max_tx_size) &&
5583 !cm->reduced_tx_set_used) {
5584 RD_STATS this_rd_stats;
5585 mbmi->use_lgt = 1;
5586 rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd, 0);
5587 if (rd < best_rd) {
5588 best_rd = rd;
5589 *rd_stats = this_rd_stats;
5590 best_tx = mbmi->tx_size;
5591 best_min_tx_size = mbmi->min_tx_size;
5592 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
5593 for (idy = 0; idy < xd->n8_h; ++idy)
5594 for (idx = 0; idx < xd->n8_w; ++idx)
5595 best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
5596 } else {
5597 mbmi->use_lgt = 0;
5598 }
5599 }
5600 #endif // CONFIG_LGT_FROM_PRED
5601 // We found a candidate transform to use. Copy our results from the "best"
5602 // array into mbmi.
5603 mbmi->tx_type = best_tx_type;
5604 for (idy = 0; idy < xd->n8_h; ++idy)
5605 for (idx = 0; idx < xd->n8_w; ++idx)
5606 mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
5607 mbmi->tx_size = best_tx;
5608 mbmi->min_tx_size = best_min_tx_size;
5609 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
5610
5611 // Save the RD search results into tx_rd_record.
5612 int index;
5613 if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
5614 index =
5615 (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
5616 ++tx_rd_record->num;
5617 } else {
5618 index = tx_rd_record->index_start;
5619 tx_rd_record->index_start =
5620 (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
5621 }
5622 save_tx_rd_info(n4, hash, x, rd_stats, &tx_rd_record->tx_rd_info[index]);
5623 }
5624
tx_block_rd(const AV1_COMP * cpi,MACROBLOCK * x,int blk_row,int blk_col,int plane,int block,TX_SIZE tx_size,BLOCK_SIZE plane_bsize,ENTROPY_CONTEXT * above_ctx,ENTROPY_CONTEXT * left_ctx,RD_STATS * rd_stats)5625 static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
5626 int blk_col, int plane, int block, TX_SIZE tx_size,
5627 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
5628 ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
5629 MACROBLOCKD *const xd = &x->e_mbd;
5630 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5631 struct macroblockd_plane *const pd = &xd->plane[plane];
5632 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
5633 const int tx_row = blk_row >> (1 - pd->subsampling_y);
5634 const int tx_col = blk_col >> (1 - pd->subsampling_x);
5635 TX_SIZE plane_tx_size;
5636 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
5637 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
5638
5639 assert(tx_size < TX_SIZES_ALL);
5640
5641 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
5642
5643 plane_tx_size =
5644 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
5645 : mbmi->inter_tx_size[tx_row][tx_col];
5646
5647 if (tx_size == plane_tx_size) {
5648 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
5649 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
5650 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
5651 plane_bsize, ta, tl, rd_stats);
5652 #if !CONFIG_PVQ
5653 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
5654 #endif // !CONFIG_PVQ
5655 } else {
5656 const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
5657 const int bsl = tx_size_wide_unit[sub_txs];
5658 int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
5659 int i;
5660
5661 assert(bsl > 0);
5662
5663 for (i = 0; i < 4; ++i) {
5664 int offsetr = blk_row + (i >> 1) * bsl;
5665 int offsetc = blk_col + (i & 0x01) * bsl;
5666
5667 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
5668
5669 tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs, plane_bsize,
5670 above_ctx, left_ctx, rd_stats);
5671 block += step;
5672 }
5673 }
5674 }
5675
5676 // Return value 0: early termination triggered, no valid rd cost available;
5677 // 1: rd cost values are valid.
inter_block_uvrd(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t ref_best_rd)5678 static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
5679 RD_STATS *rd_stats, BLOCK_SIZE bsize,
5680 int64_t ref_best_rd) {
5681 MACROBLOCKD *const xd = &x->e_mbd;
5682 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5683 int plane;
5684 int is_cost_valid = 1;
5685 int64_t this_rd;
5686
5687 if (ref_best_rd < 0) is_cost_valid = 0;
5688
5689 av1_init_rd_stats(rd_stats);
5690
5691 #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
5692 if (x->skip_chroma_rd) return is_cost_valid;
5693 bsize = scale_chroma_bsize(mbmi->sb_type, xd->plane[1].subsampling_x,
5694 xd->plane[1].subsampling_y);
5695 #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
5696
5697 #if CONFIG_EXT_TX && CONFIG_RECT_TX
5698 if (is_rect_tx(mbmi->tx_size)) {
5699 return super_block_uvrd(cpi, x, rd_stats, bsize, ref_best_rd);
5700 }
5701 #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
5702
5703 if (is_inter_block(mbmi) && is_cost_valid) {
5704 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
5705 av1_subtract_plane(x, bsize, plane);
5706 }
5707
5708 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
5709 const struct macroblockd_plane *const pd = &xd->plane[plane];
5710 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
5711 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5712 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
5713 const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
5714 const int bh = tx_size_high_unit[max_tx_size];
5715 const int bw = tx_size_wide_unit[max_tx_size];
5716 int idx, idy;
5717 int block = 0;
5718 const int step = bh * bw;
5719 ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
5720 ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
5721 RD_STATS pn_rd_stats;
5722 av1_init_rd_stats(&pn_rd_stats);
5723
5724 av1_get_entropy_contexts(bsize, 0, pd, ta, tl);
5725
5726 for (idy = 0; idy < mi_height; idy += bh) {
5727 for (idx = 0; idx < mi_width; idx += bw) {
5728 tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
5729 ta, tl, &pn_rd_stats);
5730 block += step;
5731 }
5732 }
5733
5734 if (pn_rd_stats.rate == INT_MAX) {
5735 is_cost_valid = 0;
5736 break;
5737 }
5738
5739 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
5740
5741 this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
5742 RDCOST(x->rdmult, 0, rd_stats->sse));
5743
5744 if (this_rd > ref_best_rd) {
5745 is_cost_valid = 0;
5746 break;
5747 }
5748 }
5749
5750 if (!is_cost_valid) {
5751 // reset cost value
5752 av1_invalid_rd_stats(rd_stats);
5753 }
5754
5755 return is_cost_valid;
5756 }
5757 #endif // CONFIG_VAR_TX
5758
rd_pick_palette_intra_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,int dc_mode_cost,uint8_t * best_palette_color_map,MB_MODE_INFO * const best_mbmi,int64_t * best_rd,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable)5759 static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
5760 int dc_mode_cost,
5761 uint8_t *best_palette_color_map,
5762 MB_MODE_INFO *const best_mbmi,
5763 int64_t *best_rd, int *rate,
5764 int *rate_tokenonly, int64_t *distortion,
5765 int *skippable) {
5766 MACROBLOCKD *const xd = &x->e_mbd;
5767 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5768 assert(!is_inter_block(mbmi));
5769 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
5770 const BLOCK_SIZE bsize = mbmi->sb_type;
5771 assert(bsize >= BLOCK_8X8);
5772 int this_rate;
5773 int64_t this_rd;
5774 int colors_u, colors_v, colors;
5775 const int src_stride = x->plane[1].src.stride;
5776 const uint8_t *const src_u = x->plane[1].src.buf;
5777 const uint8_t *const src_v = x->plane[2].src.buf;
5778 uint8_t *const color_map = xd->plane[1].color_index_map;
5779 RD_STATS tokenonly_rd_stats;
5780 int plane_block_width, plane_block_height, rows, cols;
5781 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
5782 &plane_block_height, &rows, &cols);
5783 if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
5784
5785 mbmi->uv_mode = UV_DC_PRED;
5786 #if CONFIG_FILTER_INTRA
5787 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
5788 #endif // CONFIG_FILTER_INTRA
5789
5790 #if CONFIG_HIGHBITDEPTH
5791 if (cpi->common.use_highbitdepth) {
5792 colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
5793 cpi->common.bit_depth);
5794 colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
5795 cpi->common.bit_depth);
5796 } else {
5797 #endif // CONFIG_HIGHBITDEPTH
5798 colors_u = av1_count_colors(src_u, src_stride, rows, cols);
5799 colors_v = av1_count_colors(src_v, src_stride, rows, cols);
5800 #if CONFIG_HIGHBITDEPTH
5801 }
5802 #endif // CONFIG_HIGHBITDEPTH
5803
5804 #if CONFIG_PALETTE_DELTA_ENCODING
5805 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
5806 const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
5807 #endif // CONFIG_PALETTE_DELTA_ENCODING
5808
5809 colors = colors_u > colors_v ? colors_u : colors_v;
5810 if (colors > 1 && colors <= 64) {
5811 int r, c, n, i, j;
5812 const int max_itr = 50;
5813 float lb_u, ub_u, val_u;
5814 float lb_v, ub_v, val_v;
5815 float *const data = x->palette_buffer->kmeans_data_buf;
5816 float centroids[2 * PALETTE_MAX_SIZE];
5817
5818 #if CONFIG_HIGHBITDEPTH
5819 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
5820 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
5821 if (cpi->common.use_highbitdepth) {
5822 lb_u = src_u16[0];
5823 ub_u = src_u16[0];
5824 lb_v = src_v16[0];
5825 ub_v = src_v16[0];
5826 } else {
5827 #endif // CONFIG_HIGHBITDEPTH
5828 lb_u = src_u[0];
5829 ub_u = src_u[0];
5830 lb_v = src_v[0];
5831 ub_v = src_v[0];
5832 #if CONFIG_HIGHBITDEPTH
5833 }
5834 #endif // CONFIG_HIGHBITDEPTH
5835
5836 for (r = 0; r < rows; ++r) {
5837 for (c = 0; c < cols; ++c) {
5838 #if CONFIG_HIGHBITDEPTH
5839 if (cpi->common.use_highbitdepth) {
5840 val_u = src_u16[r * src_stride + c];
5841 val_v = src_v16[r * src_stride + c];
5842 data[(r * cols + c) * 2] = val_u;
5843 data[(r * cols + c) * 2 + 1] = val_v;
5844 } else {
5845 #endif // CONFIG_HIGHBITDEPTH
5846 val_u = src_u[r * src_stride + c];
5847 val_v = src_v[r * src_stride + c];
5848 data[(r * cols + c) * 2] = val_u;
5849 data[(r * cols + c) * 2 + 1] = val_v;
5850 #if CONFIG_HIGHBITDEPTH
5851 }
5852 #endif // CONFIG_HIGHBITDEPTH
5853 if (val_u < lb_u)
5854 lb_u = val_u;
5855 else if (val_u > ub_u)
5856 ub_u = val_u;
5857 if (val_v < lb_v)
5858 lb_v = val_v;
5859 else if (val_v > ub_v)
5860 ub_v = val_v;
5861 }
5862 }
5863
5864 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
5865 --n) {
5866 for (i = 0; i < n; ++i) {
5867 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
5868 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
5869 }
5870 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
5871 #if CONFIG_PALETTE_DELTA_ENCODING
5872 optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
5873 // Sort the U channel colors in ascending order.
5874 for (i = 0; i < 2 * (n - 1); i += 2) {
5875 int min_idx = i;
5876 float min_val = centroids[i];
5877 for (j = i + 2; j < 2 * n; j += 2)
5878 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
5879 if (min_idx != i) {
5880 float temp_u = centroids[i], temp_v = centroids[i + 1];
5881 centroids[i] = centroids[min_idx];
5882 centroids[i + 1] = centroids[min_idx + 1];
5883 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
5884 }
5885 }
5886 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
5887 #endif // CONFIG_PALETTE_DELTA_ENCODING
5888 extend_palette_color_map(color_map, cols, rows, plane_block_width,
5889 plane_block_height);
5890 pmi->palette_size[1] = n;
5891 for (i = 1; i < 3; ++i) {
5892 for (j = 0; j < n; ++j) {
5893 #if CONFIG_HIGHBITDEPTH
5894 if (cpi->common.use_highbitdepth)
5895 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
5896 (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
5897 else
5898 #endif // CONFIG_HIGHBITDEPTH
5899 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
5900 clip_pixel((int)centroids[j * 2 + i - 1]);
5901 }
5902 }
5903
5904 super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
5905 if (tokenonly_rd_stats.rate == INT_MAX) continue;
5906 this_rate =
5907 tokenonly_rd_stats.rate + dc_mode_cost +
5908 x->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
5909 write_uniform_cost(n, color_map[0]) +
5910 av1_cost_bit(
5911 av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
5912 this_rate += av1_palette_color_cost_uv(pmi,
5913 #if CONFIG_PALETTE_DELTA_ENCODING
5914 color_cache, n_cache,
5915 #endif // CONFIG_PALETTE_DELTA_ENCODING
5916 cpi->common.bit_depth);
5917 this_rate +=
5918 av1_cost_color_map(x, 1, 0, bsize, mbmi->tx_size, PALETTE_MAP);
5919 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
5920 if (this_rd < *best_rd) {
5921 *best_rd = this_rd;
5922 *best_mbmi = *mbmi;
5923 memcpy(best_palette_color_map, color_map,
5924 plane_block_width * plane_block_height *
5925 sizeof(best_palette_color_map[0]));
5926 *rate = this_rate;
5927 *distortion = tokenonly_rd_stats.dist;
5928 *rate_tokenonly = tokenonly_rd_stats.rate;
5929 *skippable = tokenonly_rd_stats.skip;
5930 }
5931 }
5932 }
5933 if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
5934 memcpy(color_map, best_palette_color_map,
5935 plane_block_width * plane_block_height *
5936 sizeof(best_palette_color_map[0]));
5937 }
5938 }
5939
5940 #if CONFIG_FILTER_INTRA
5941 // Return 1 if an filter intra mode is selected; return 0 otherwise.
rd_pick_filter_intra_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,int64_t * best_rd)5942 static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
5943 int *rate, int *rate_tokenonly,
5944 int64_t *distortion, int *skippable,
5945 BLOCK_SIZE bsize, int64_t *best_rd) {
5946 MACROBLOCKD *const xd = &x->e_mbd;
5947 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5948 int filter_intra_selected_flag = 0;
5949 int this_rate;
5950 int64_t this_rd;
5951 FILTER_INTRA_MODE mode;
5952 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
5953 RD_STATS tokenonly_rd_stats;
5954
5955 av1_zero(filter_intra_mode_info);
5956 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
5957 mbmi->uv_mode = UV_DC_PRED;
5958 mbmi->palette_mode_info.palette_size[1] = 0;
5959
5960 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
5961 mbmi->filter_intra_mode_info.filter_intra_mode[1] = mode;
5962 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd))
5963 continue;
5964
5965 this_rate = tokenonly_rd_stats.rate +
5966 av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
5967 x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
5968 write_uniform_cost(FILTER_INTRA_MODES, mode);
5969 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
5970 if (this_rd < *best_rd) {
5971 *best_rd = this_rd;
5972 *rate = this_rate;
5973 *rate_tokenonly = tokenonly_rd_stats.rate;
5974 *distortion = tokenonly_rd_stats.dist;
5975 *skippable = tokenonly_rd_stats.skip;
5976 filter_intra_mode_info = mbmi->filter_intra_mode_info;
5977 filter_intra_selected_flag = 1;
5978 }
5979 }
5980
5981 if (filter_intra_selected_flag) {
5982 mbmi->uv_mode = UV_DC_PRED;
5983 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
5984 filter_intra_mode_info.use_filter_intra_mode[1];
5985 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
5986 filter_intra_mode_info.filter_intra_mode[1];
5987 return 1;
5988 } else {
5989 return 0;
5990 }
5991 }
5992 #endif // CONFIG_FILTER_INTRA
5993
5994 #if CONFIG_EXT_INTRA
5995 // Run RD calculation with given chroma intra prediction angle., and return
5996 // the RD cost. Update the best mode info. if the RD cost is the best so far.
pick_intra_angle_routine_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int rate_overhead,int64_t best_rd_in,int * rate,RD_STATS * rd_stats,int * best_angle_delta,int64_t * best_rd)5997 static int64_t pick_intra_angle_routine_sbuv(
5998 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
5999 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
6000 int *best_angle_delta, int64_t *best_rd) {
6001 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
6002 assert(!is_inter_block(mbmi));
6003 int this_rate;
6004 int64_t this_rd;
6005 RD_STATS tokenonly_rd_stats;
6006
6007 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
6008 return INT64_MAX;
6009 this_rate = tokenonly_rd_stats.rate + rate_overhead;
6010 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
6011 if (this_rd < *best_rd) {
6012 *best_rd = this_rd;
6013 *best_angle_delta = mbmi->angle_delta[1];
6014 *rate = this_rate;
6015 rd_stats->rate = tokenonly_rd_stats.rate;
6016 rd_stats->dist = tokenonly_rd_stats.dist;
6017 rd_stats->skip = tokenonly_rd_stats.skip;
6018 }
6019 return this_rd;
6020 }
6021
6022 // With given chroma directional intra prediction mode, pick the best angle
6023 // delta. Return true if a RD cost that is smaller than the input one is found.
rd_pick_intra_angle_sbuv(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int rate_overhead,int64_t best_rd,int * rate,RD_STATS * rd_stats)6024 static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
6025 BLOCK_SIZE bsize, int rate_overhead,
6026 int64_t best_rd, int *rate,
6027 RD_STATS *rd_stats) {
6028 MACROBLOCKD *const xd = &x->e_mbd;
6029 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6030 assert(!is_inter_block(mbmi));
6031 int i, angle_delta, best_angle_delta = 0;
6032 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
6033
6034 rd_stats->rate = INT_MAX;
6035 rd_stats->skip = 0;
6036 rd_stats->dist = INT64_MAX;
6037 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
6038
6039 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
6040 for (i = 0; i < 2; ++i) {
6041 best_rd_in = (best_rd == INT64_MAX)
6042 ? INT64_MAX
6043 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
6044 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
6045 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
6046 best_rd_in, rate, rd_stats,
6047 &best_angle_delta, &best_rd);
6048 rd_cost[2 * angle_delta + i] = this_rd;
6049 if (angle_delta == 0) {
6050 if (this_rd == INT64_MAX) return 0;
6051 rd_cost[1] = this_rd;
6052 break;
6053 }
6054 }
6055 }
6056
6057 assert(best_rd != INT64_MAX);
6058 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
6059 int64_t rd_thresh;
6060 for (i = 0; i < 2; ++i) {
6061 int skip_search = 0;
6062 rd_thresh = best_rd + (best_rd >> 5);
6063 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
6064 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
6065 skip_search = 1;
6066 if (!skip_search) {
6067 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
6068 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
6069 rate, rd_stats, &best_angle_delta,
6070 &best_rd);
6071 }
6072 }
6073 }
6074
6075 mbmi->angle_delta[1] = best_angle_delta;
6076 return rd_stats->rate != INT_MAX;
6077 }
6078 #endif // CONFIG_EXT_INTRA
6079
6080 #if CONFIG_CFL
cfl_alpha_dist_lbd(const int16_t * pred_buf_q3,const uint8_t * src,int src_stride,int width,int height,int dc_pred,int alpha_q3,int64_t * dist_neg_out)6081 static int64_t cfl_alpha_dist_lbd(const int16_t *pred_buf_q3,
6082 const uint8_t *src, int src_stride, int width,
6083 int height, int dc_pred, int alpha_q3,
6084 int64_t *dist_neg_out) {
6085 int64_t dist = 0;
6086 int diff;
6087
6088 if (alpha_q3 == 0) {
6089 for (int j = 0; j < height; j++) {
6090 for (int i = 0; i < width; i++) {
6091 diff = src[i] - dc_pred;
6092 dist += diff * diff;
6093 }
6094 src += src_stride;
6095 }
6096
6097 if (dist_neg_out) *dist_neg_out = dist;
6098
6099 return dist;
6100 }
6101
6102 int64_t dist_neg = 0;
6103 for (int j = 0; j < height; j++) {
6104 for (int i = 0; i < width; i++) {
6105 const int uv = src[i];
6106 const int scaled_luma = get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]);
6107
6108 diff = uv - clip_pixel(scaled_luma + dc_pred);
6109 dist += diff * diff;
6110
6111 diff = uv - clip_pixel(-scaled_luma + dc_pred);
6112 dist_neg += diff * diff;
6113 }
6114 pred_buf_q3 += MAX_SB_SIZE;
6115 src += src_stride;
6116 }
6117
6118 if (dist_neg_out) *dist_neg_out = dist_neg;
6119
6120 return dist;
6121 }
6122 #if CONFIG_HIGHBITDEPTH
cfl_alpha_dist_hbd(const int16_t * pred_buf_q3,const uint16_t * src,int src_stride,int width,int height,int dc_pred,int alpha_q3,int bit_depth,int64_t * dist_neg_out)6123 static int64_t cfl_alpha_dist_hbd(const int16_t *pred_buf_q3,
6124 const uint16_t *src, int src_stride,
6125 int width, int height, int dc_pred,
6126 int alpha_q3, int bit_depth,
6127 int64_t *dist_neg_out) {
6128 const int shift = 2 * (bit_depth - 8);
6129 const int rounding = shift > 0 ? (1 << shift) >> 1 : 0;
6130 int64_t dist = 0;
6131 int diff;
6132
6133 if (alpha_q3 == 0) {
6134 for (int j = 0; j < height; j++) {
6135 for (int i = 0; i < width; i++) {
6136 diff = src[i] - dc_pred;
6137 dist += diff * diff;
6138 }
6139 src += src_stride;
6140 }
6141 dist = (dist + rounding) >> shift;
6142
6143 if (dist_neg_out) *dist_neg_out = dist;
6144
6145 return dist;
6146 }
6147
6148 int64_t dist_neg = 0;
6149 for (int j = 0; j < height; j++) {
6150 for (int i = 0; i < width; i++) {
6151 const int uv = src[i];
6152 const int scaled_luma = get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]);
6153
6154 diff = uv - clip_pixel_highbd(scaled_luma + dc_pred, bit_depth);
6155 dist += diff * diff;
6156
6157 diff = uv - clip_pixel_highbd(-scaled_luma + dc_pred, bit_depth);
6158 dist_neg += diff * diff;
6159 }
6160 pred_buf_q3 += MAX_SB_SIZE;
6161 src += src_stride;
6162 }
6163
6164 if (dist_neg_out) *dist_neg_out = (dist_neg + rounding) >> shift;
6165
6166 return (dist + rounding) >> shift;
6167 }
6168 #endif // CONFIG_HIGHBITDEPTH
cfl_alpha_dist(const int16_t * pred_buf_q3,const uint8_t * src,int src_stride,int width,int height,int dc_pred,int alpha_q3,int use_hbd,int bit_depth,int64_t * dist_neg_out)6169 static int64_t cfl_alpha_dist(const int16_t *pred_buf_q3, const uint8_t *src,
6170 int src_stride, int width, int height,
6171 int dc_pred, int alpha_q3, int use_hbd,
6172 int bit_depth, int64_t *dist_neg_out) {
6173 #if CONFIG_HIGHBITDEPTH
6174 if (use_hbd) {
6175 const uint16_t *src_16 = CONVERT_TO_SHORTPTR(src);
6176 return cfl_alpha_dist_hbd(pred_buf_q3, src_16, src_stride, width, height,
6177 dc_pred, alpha_q3, bit_depth, dist_neg_out);
6178 }
6179 #endif // CONFIG_HIGHBITDEPTH
6180 (void)use_hbd;
6181 (void)bit_depth;
6182 return cfl_alpha_dist_lbd(pred_buf_q3, src, src_stride, width, height,
6183 dc_pred, alpha_q3, dist_neg_out);
6184 }
6185
cfl_rd_pick_alpha(MACROBLOCK * const x,TX_SIZE tx_size)6186 static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
6187 const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
6188 const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
6189 const uint8_t *const src_u = p_u->src.buf;
6190 const uint8_t *const src_v = p_v->src.buf;
6191 const int src_stride_u = p_u->src.stride;
6192 const int src_stride_v = p_v->src.stride;
6193
6194 MACROBLOCKD *const xd = &x->e_mbd;
6195 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6196
6197 CFL_CTX *const cfl = xd->cfl;
6198 cfl_compute_parameters(xd, tx_size);
6199 const int width = cfl->uv_width;
6200 const int height = cfl->uv_height;
6201 const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
6202 const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
6203 const int16_t *pred_buf_q3 = cfl->pred_buf_q3;
6204 const int use_hbd = get_bitdepth_data_path_index(xd);
6205
6206 int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
6207 sse[CFL_PRED_U][0] =
6208 cfl_alpha_dist(pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
6209 0, use_hbd, xd->bd, NULL);
6210 sse[CFL_PRED_V][0] =
6211 cfl_alpha_dist(pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
6212 0, use_hbd, xd->bd, NULL);
6213
6214 for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
6215 const int m = c * 2 + 1;
6216 const int abs_alpha_q3 = c + 1;
6217 sse[CFL_PRED_U][m] = cfl_alpha_dist(
6218 pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
6219 abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_U][m + 1]);
6220 sse[CFL_PRED_V][m] = cfl_alpha_dist(
6221 pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
6222 abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_V][m + 1]);
6223 }
6224
6225 int64_t dist;
6226 int64_t cost;
6227 int64_t best_cost = INT64_MAX;
6228 int best_rate = 0;
6229
6230 // Compute least squares parameter of the entire block
6231 int ind = 0;
6232 int signs = 0;
6233
6234 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
6235 const int sign_u = CFL_SIGN_U(joint_sign);
6236 const int sign_v = CFL_SIGN_V(joint_sign);
6237 const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
6238 const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
6239 for (int u = 0; u < size_u; u++) {
6240 const int idx_u = (sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1;
6241 for (int v = 0; v < size_v; v++) {
6242 const int idx_v = (sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1;
6243 dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
6244 sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
6245 dist *= 16;
6246 const int rate = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
6247 x->cfl_cost[joint_sign][CFL_PRED_V][v];
6248 cost = RDCOST(x->rdmult, rate, dist);
6249 if (cost < best_cost) {
6250 best_cost = cost;
6251 best_rate = rate;
6252 ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
6253 signs = joint_sign;
6254 }
6255 }
6256 }
6257 }
6258
6259 mbmi->cfl_alpha_idx = ind;
6260 mbmi->cfl_alpha_signs = signs;
6261 return best_rate;
6262 }
6263 #endif // CONFIG_CFL
6264
init_sbuv_mode(MB_MODE_INFO * const mbmi)6265 static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
6266 mbmi->uv_mode = UV_DC_PRED;
6267 mbmi->palette_mode_info.palette_size[1] = 0;
6268 #if CONFIG_FILTER_INTRA
6269 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
6270 #endif // CONFIG_FILTER_INTRA
6271 }
6272
rd_pick_intra_sbuv_mode(const AV1_COMP * const cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,TX_SIZE max_tx_size)6273 static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
6274 int *rate, int *rate_tokenonly,
6275 int64_t *distortion, int *skippable,
6276 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
6277 MACROBLOCKD *xd = &x->e_mbd;
6278 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6279 assert(!is_inter_block(mbmi));
6280 MB_MODE_INFO best_mbmi = *mbmi;
6281 int64_t best_rd = INT64_MAX, this_rd;
6282 #if CONFIG_PVQ
6283 od_rollback_buffer buf;
6284 od_encode_checkpoint(&x->daala_enc, &buf);
6285 #endif // CONFIG_PVQ
6286 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
6287 const int try_palette =
6288 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
6289
6290 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
6291 int this_rate;
6292 RD_STATS tokenonly_rd_stats;
6293 UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
6294 #if CONFIG_EXT_INTRA
6295 const int is_directional_mode =
6296 av1_is_directional_mode(get_uv_mode(mode), mbmi->sb_type);
6297 #endif // CONFIG_EXT_INTRA
6298 if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
6299 (1 << mode)))
6300 continue;
6301
6302 mbmi->uv_mode = mode;
6303 #if CONFIG_CFL
6304 int cfl_alpha_rate = 0;
6305 if (mode == UV_CFL_PRED) {
6306 assert(!is_directional_mode);
6307 const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
6308 cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size);
6309 }
6310 #endif
6311 #if CONFIG_EXT_INTRA
6312 mbmi->angle_delta[1] = 0;
6313 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
6314 const int rate_overhead = x->intra_uv_mode_cost[mbmi->mode][mode] +
6315 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
6316 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
6317 &this_rate, &tokenonly_rd_stats))
6318 continue;
6319 } else {
6320 #endif // CONFIG_EXT_INTRA
6321 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
6322 #if CONFIG_PVQ
6323 od_encode_rollback(&x->daala_enc, &buf);
6324 #endif // CONFIG_PVQ
6325 continue;
6326 }
6327 #if CONFIG_EXT_INTRA
6328 }
6329 #endif // CONFIG_EXT_INTRA
6330 this_rate =
6331 tokenonly_rd_stats.rate + x->intra_uv_mode_cost[mbmi->mode][mode];
6332
6333 #if CONFIG_CFL
6334 if (mode == UV_CFL_PRED) {
6335 this_rate += cfl_alpha_rate;
6336 }
6337 #endif
6338 #if CONFIG_EXT_INTRA
6339 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
6340 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
6341 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
6342 }
6343 #endif // CONFIG_EXT_INTRA
6344 #if CONFIG_FILTER_INTRA
6345 if (mbmi->sb_type >= BLOCK_8X8 && mode == UV_DC_PRED)
6346 this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 0);
6347 #endif // CONFIG_FILTER_INTRA
6348 if (try_palette && mode == UV_DC_PRED)
6349 this_rate += av1_cost_bit(
6350 av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
6351
6352 #if CONFIG_PVQ
6353 od_encode_rollback(&x->daala_enc, &buf);
6354 #endif // CONFIG_PVQ
6355 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
6356
6357 if (this_rd < best_rd) {
6358 best_mbmi = *mbmi;
6359 best_rd = this_rd;
6360 *rate = this_rate;
6361 *rate_tokenonly = tokenonly_rd_stats.rate;
6362 *distortion = tokenonly_rd_stats.dist;
6363 *skippable = tokenonly_rd_stats.skip;
6364 }
6365 }
6366
6367 if (try_palette) {
6368 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
6369 rd_pick_palette_intra_sbuv(cpi, x,
6370 x->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
6371 best_palette_color_map, &best_mbmi, &best_rd,
6372 rate, rate_tokenonly, distortion, skippable);
6373 }
6374
6375 #if CONFIG_FILTER_INTRA
6376 if (mbmi->sb_type >= BLOCK_8X8) {
6377 if (rd_pick_filter_intra_sbuv(cpi, x, rate, rate_tokenonly, distortion,
6378 skippable, bsize, &best_rd))
6379 best_mbmi = *mbmi;
6380 }
6381 #endif // CONFIG_FILTER_INTRA
6382
6383 *mbmi = best_mbmi;
6384 // Make sure we actually chose a mode
6385 assert(best_rd < INT64_MAX);
6386 return best_rd;
6387 }
6388
choose_intra_uv_mode(const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,TX_SIZE max_tx_size,int * rate_uv,int * rate_uv_tokenonly,int64_t * dist_uv,int * skip_uv,UV_PREDICTION_MODE * mode_uv)6389 static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
6390 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
6391 int *rate_uv, int *rate_uv_tokenonly,
6392 int64_t *dist_uv, int *skip_uv,
6393 UV_PREDICTION_MODE *mode_uv) {
6394 MACROBLOCKD *xd = &x->e_mbd;
6395 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6396 // Use an estimated rd for uv_intra based on DC_PRED if the
6397 // appropriate speed flag is set.
6398 init_sbuv_mode(mbmi);
6399 #if CONFIG_CB4X4
6400 #if !CONFIG_CHROMA_2X2
6401 if (x->skip_chroma_rd) {
6402 *rate_uv = 0;
6403 *rate_uv_tokenonly = 0;
6404 *dist_uv = 0;
6405 *skip_uv = 1;
6406 *mode_uv = UV_DC_PRED;
6407 return;
6408 }
6409 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
6410 xd->plane[AOM_PLANE_U].subsampling_y);
6411 #endif // !CONFIG_CHROMA_2X2
6412 #if CONFIG_CFL
6413 // Only store reconstructed luma when there's chroma RDO. When there's no
6414 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
6415 xd->cfl->store_y = !x->skip_chroma_rd;
6416 #endif // CONFIG_CFL
6417 #else
6418 bsize = bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize;
6419 #if CONFIG_CFL
6420 xd->cfl->store_y = 1;
6421 #endif // CONFIG_CFL
6422 #endif // CONFIG_CB4X4
6423 #if CONFIG_CFL
6424 if (xd->cfl->store_y) {
6425 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
6426 // during luma RDO, so we can store reconstructed luma values
6427 RD_STATS this_rd_stats;
6428 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
6429 mbmi->sb_type, mbmi->tx_size,
6430 cpi->sf.use_fast_coef_costing);
6431 xd->cfl->store_y = 0;
6432 }
6433 #endif // CONFIG_CFL
6434 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
6435 bsize, max_tx_size);
6436 *mode_uv = mbmi->uv_mode;
6437 }
6438
cost_mv_ref(const MACROBLOCK * const x,PREDICTION_MODE mode,int16_t mode_context)6439 static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
6440 int16_t mode_context) {
6441 if (is_inter_compound_mode(mode)) {
6442 return x
6443 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
6444 #if CONFIG_COMPOUND_SINGLEREF
6445 } else if (is_inter_singleref_comp_mode(mode)) {
6446 return x->inter_singleref_comp_mode_cost[mode_context]
6447 [INTER_SINGLEREF_COMP_OFFSET(mode)];
6448 #endif // CONFIG_COMPOUND_SINGLEREF
6449 }
6450
6451 int mode_cost = 0;
6452 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
6453 int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
6454
6455 assert(is_inter_mode(mode));
6456
6457 if (mode == NEWMV) {
6458 mode_cost = x->newmv_mode_cost[mode_ctx][0];
6459 return mode_cost;
6460 } else {
6461 mode_cost = x->newmv_mode_cost[mode_ctx][1];
6462 mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
6463
6464 if (is_all_zero_mv) return mode_cost;
6465
6466 if (mode == ZEROMV) {
6467 mode_cost += x->zeromv_mode_cost[mode_ctx][0];
6468 return mode_cost;
6469 } else {
6470 mode_cost += x->zeromv_mode_cost[mode_ctx][1];
6471 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
6472
6473 if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
6474 if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
6475 if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
6476
6477 mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
6478 return mode_cost;
6479 }
6480 }
6481 }
6482
6483 #if (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
get_interinter_compound_type_bits(BLOCK_SIZE bsize,COMPOUND_TYPE comp_type)6484 static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
6485 COMPOUND_TYPE comp_type) {
6486 (void)bsize;
6487 switch (comp_type) {
6488 case COMPOUND_AVERAGE: return 0;
6489 #if CONFIG_WEDGE
6490 case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
6491 #endif // CONFIG_WEDGE
6492 #if CONFIG_COMPOUND_SEGMENT
6493 case COMPOUND_SEG: return 1;
6494 #endif // CONFIG_COMPOUND_SEGMENT
6495 default: assert(0); return 0;
6496 }
6497 }
6498 #endif // (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
6499
6500 typedef struct {
6501 int eobs;
6502 int brate;
6503 int byrate;
6504 int64_t bdist;
6505 int64_t bsse;
6506 int64_t brdcost;
6507 int_mv mvs[2];
6508 int_mv pred_mv[2];
6509 int_mv ref_mv[2];
6510
6511 #if CONFIG_CHROMA_2X2
6512 ENTROPY_CONTEXT ta[4];
6513 ENTROPY_CONTEXT tl[4];
6514 #else
6515 ENTROPY_CONTEXT ta[2];
6516 ENTROPY_CONTEXT tl[2];
6517 #endif // CONFIG_CHROMA_2X2
6518 } SEG_RDSTAT;
6519
6520 typedef struct {
6521 int_mv *ref_mv[2];
6522 int_mv mvp;
6523
6524 int64_t segment_rd;
6525 int r;
6526 int64_t d;
6527 int64_t sse;
6528 int segment_yrate;
6529 PREDICTION_MODE modes[4];
6530 #if CONFIG_COMPOUND_SINGLEREF
6531 SEG_RDSTAT rdstat[4][INTER_MODES + INTER_SINGLEREF_COMP_MODES +
6532 INTER_COMPOUND_MODES];
6533 #else // !CONFIG_COMPOUND_SINGLEREF
6534 SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
6535 #endif // CONFIG_COMPOUND_SINGLEREF
6536 int mvthresh;
6537 } BEST_SEG_INFO;
6538
mv_check_bounds(const MvLimits * mv_limits,const MV * mv)6539 static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
6540 return (mv->row >> 3) < mv_limits->row_min ||
6541 (mv->row >> 3) > mv_limits->row_max ||
6542 (mv->col >> 3) < mv_limits->col_min ||
6543 (mv->col >> 3) > mv_limits->col_max;
6544 }
6545
6546 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
6547 // TODO(aconverse): Find out if this is still productive then clean up or remove
check_best_zero_mv(const AV1_COMP * const cpi,const MACROBLOCK * const x,const int16_t mode_context[TOTAL_REFS_PER_FRAME],const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME],int this_mode,const MV_REFERENCE_FRAME ref_frames[2],const BLOCK_SIZE bsize,int block,int mi_row,int mi_col)6548 static int check_best_zero_mv(
6549 const AV1_COMP *const cpi, const MACROBLOCK *const x,
6550 const int16_t mode_context[TOTAL_REFS_PER_FRAME],
6551 const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
6552 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
6553 const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
6554 int mi_row, int mi_col) {
6555 int_mv zeromv[2] = { {.as_int = 0 } };
6556 #if CONFIG_GLOBAL_MOTION
6557 int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
6558 #endif
6559 (void)mi_row;
6560 (void)mi_col;
6561 (void)cpi;
6562 #if CONFIG_GLOBAL_MOTION
6563 if (this_mode == ZEROMV || this_mode == ZERO_ZEROMV) {
6564 for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
6565 zeromv[cur_frm].as_int =
6566 gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
6567 cpi->common.allow_high_precision_mv, bsize,
6568 mi_col, mi_row, block
6569 #if CONFIG_AMVR
6570 ,
6571 cpi->common.cur_frame_mv_precision_level
6572 #endif
6573 )
6574 .as_int;
6575 }
6576 }
6577 #endif // CONFIG_GLOBAL_MOTION
6578
6579 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
6580 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
6581 (ref_frames[1] <= INTRA_FRAME ||
6582 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
6583 int16_t rfc =
6584 av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
6585 int c1 = cost_mv_ref(x, NEARMV, rfc);
6586 int c2 = cost_mv_ref(x, NEARESTMV, rfc);
6587 int c3 = cost_mv_ref(x, ZEROMV, rfc);
6588
6589 if (this_mode == NEARMV) {
6590 if (c1 > c3) return 0;
6591 } else if (this_mode == NEARESTMV) {
6592 if (c2 > c3) return 0;
6593 } else {
6594 assert(this_mode == ZEROMV);
6595 if (ref_frames[1] <= INTRA_FRAME) {
6596 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
6597 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
6598 return 0;
6599 } else {
6600 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
6601 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
6602 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
6603 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
6604 return 0;
6605 }
6606 }
6607 } else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
6608 this_mode == ZERO_ZEROMV) &&
6609 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
6610 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
6611 int16_t rfc = compound_mode_context[ref_frames[0]];
6612 int c2 = cost_mv_ref(x, NEAREST_NEARESTMV, rfc);
6613 int c3 = cost_mv_ref(x, ZERO_ZEROMV, rfc);
6614 int c5 = cost_mv_ref(x, NEAR_NEARMV, rfc);
6615
6616 if (this_mode == NEAREST_NEARESTMV) {
6617 if (c2 > c3) return 0;
6618 } else if (this_mode == NEAR_NEARMV) {
6619 if (c5 > c3) return 0;
6620 } else {
6621 assert(this_mode == ZERO_ZEROMV);
6622 if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
6623 frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
6624 (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
6625 frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
6626 return 0;
6627 }
6628 }
6629 return 1;
6630 }
6631
joint_motion_search(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int_mv * frame_mv,int_mv * frame_comp_mv,int mi_row,int mi_col,int_mv * ref_mv_sub8x8[2],const uint8_t * mask,int mask_stride,int * rate_mv,const int block)6632 static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
6633 BLOCK_SIZE bsize, int_mv *frame_mv,
6634 #if CONFIG_COMPOUND_SINGLEREF
6635 int_mv *frame_comp_mv,
6636 #endif // CONFIG_COMPOUND_SINGLEREF
6637 int mi_row, int mi_col,
6638 int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
6639 int mask_stride, int *rate_mv,
6640 const int block) {
6641 const AV1_COMMON *const cm = &cpi->common;
6642 const int pw = block_size_wide[bsize];
6643 const int ph = block_size_high[bsize];
6644 MACROBLOCKD *xd = &x->e_mbd;
6645 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6646 // This function should only ever be called for compound modes
6647 #if CONFIG_COMPOUND_SINGLEREF
6648 if (!has_second_ref(mbmi)) {
6649 assert(is_inter_singleref_comp_mode(mbmi->mode));
6650 assert(frame_comp_mv);
6651 }
6652 assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
6653 const int refs[2] = { mbmi->ref_frame[0],
6654 has_second_ref(mbmi) ? mbmi->ref_frame[1]
6655 : mbmi->ref_frame[0] };
6656 #else
6657 assert(has_second_ref(mbmi));
6658 const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
6659 #endif // CONFIG_COMPOUND_SINGLEREF
6660 int_mv ref_mv[2];
6661 int ite, ref;
6662 struct scale_factors sf;
6663 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6664 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
6665 const int ic = block & 1;
6666 const int ir = (block - ic) >> 1;
6667 struct macroblockd_plane *const pd = &xd->plane[0];
6668 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
6669 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
6670 #if CONFIG_GLOBAL_MOTION
6671 int is_global[2];
6672 #if CONFIG_COMPOUND_SINGLEREF
6673 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
6674 #else
6675 for (ref = 0; ref < 2; ++ref)
6676 #endif // CONFIG_COMPOUND_SINGLEREF
6677 {
6678 WarpedMotionParams *const wm =
6679 &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
6680 is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
6681 }
6682 #if CONFIG_COMPOUND_SINGLEREF
6683 if (!has_second_ref(mbmi)) is_global[1] = is_global[0];
6684 #endif // CONFIG_COMPOUND_SINGLEREF
6685 #endif // CONFIG_GLOBAL_MOTION
6686 #else // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6687 (void)block;
6688 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6689
6690 // Do joint motion search in compound mode to get more accurate mv.
6691 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
6692 int last_besterr[2] = { INT_MAX, INT_MAX };
6693 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
6694 av1_get_scaled_ref_frame(cpi, refs[0]),
6695 av1_get_scaled_ref_frame(cpi, refs[1])
6696 };
6697
6698 // Prediction buffer from second frame.
6699 #if CONFIG_HIGHBITDEPTH
6700 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
6701 uint8_t *second_pred;
6702 #else
6703 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
6704 #endif // CONFIG_HIGHBITDEPTH
6705
6706 #if CONFIG_CB4X4
6707 (void)ref_mv_sub8x8;
6708 #endif // CONFIG_CB4X4
6709
6710 #if CONFIG_COMPOUND_SINGLEREF
6711 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
6712 #else
6713 for (ref = 0; ref < 2; ++ref)
6714 #endif // CONFIG_COMPOUND_SINGLEREF
6715 {
6716 #if !CONFIG_CB4X4
6717 if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
6718 ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
6719 else
6720 #endif // !CONFIG_CB4X4
6721 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
6722
6723 if (scaled_ref_frame[ref]) {
6724 int i;
6725 // Swap out the reference frame for a version that's been scaled to
6726 // match the resolution of the current frame, allowing the existing
6727 // motion search code to be used without additional modifications.
6728 for (i = 0; i < MAX_MB_PLANE; i++)
6729 backup_yv12[ref][i] = xd->plane[i].pre[ref];
6730 av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
6731 NULL);
6732 }
6733 }
6734
6735 #if CONFIG_COMPOUND_SINGLEREF
6736 if (!has_second_ref(mbmi)) {
6737 assert(is_inter_singleref_comp_mode(mbmi->mode));
6738 // NOTE: For single ref comp mode, set up the 2nd set of ref_mv/pre_planes
6739 // all from the 1st reference frame, i.e. refs[0].
6740 ref_mv[1] = x->mbmi_ext->ref_mvs[refs[0]][0];
6741 if (scaled_ref_frame[0]) {
6742 int i;
6743 // Swap out the reference frame for a version that's been scaled to
6744 // match the resolution of the current frame, allowing the existing
6745 // motion search code to be used without additional modifications.
6746 for (i = 0; i < MAX_MB_PLANE; i++)
6747 backup_yv12[1][i] = xd->plane[i].pre[1];
6748 av1_setup_pre_planes(xd, 1, scaled_ref_frame[0], mi_row, mi_col, NULL);
6749 }
6750 }
6751 #endif // CONFIG_COMPOUND_SINGLEREF
6752
6753 // Since we have scaled the reference frames to match the size of the current
6754 // frame we must use a unit scaling factor during mode selection.
6755 #if CONFIG_HIGHBITDEPTH
6756 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6757 cm->height, cm->use_highbitdepth);
6758 #else
6759 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6760 cm->height);
6761 #endif // CONFIG_HIGHBITDEPTH
6762
6763 // Allow joint search multiple times iteratively for each reference frame
6764 // and break out of the search loop if it couldn't find a better mv.
6765 #if CONFIG_COMPOUND_SINGLEREF
6766 const int num_ites =
6767 (has_second_ref(mbmi) || mbmi->mode == SR_NEW_NEWMV) ? 4 : 1;
6768 const int start_ite = has_second_ref(mbmi) ? 0 : 1;
6769 for (ite = start_ite; ite < (start_ite + num_ites); ite++)
6770 #else
6771 for (ite = 0; ite < 4; ite++)
6772 #endif // CONFIG_COMPOUND_SINGLEREF
6773 {
6774 struct buf_2d ref_yv12[2];
6775 int bestsme = INT_MAX;
6776 int sadpb = x->sadperbit16;
6777 MV *const best_mv = &x->best_mv.as_mv;
6778 int search_range = 3;
6779
6780 MvLimits tmp_mv_limits = x->mv_limits;
6781 int id = ite % 2; // Even iterations search in the first reference frame,
6782 // odd iterations search in the second. The predictor
6783 // found for the 'other' reference frame is factored in.
6784 const int plane = 0;
6785 ConvolveParams conv_params = get_conv_params(!id, 0, plane);
6786 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6787 WarpTypesAllowed warp_types;
6788 #if CONFIG_GLOBAL_MOTION
6789 warp_types.global_warp_allowed = is_global[!id];
6790 #endif // CONFIG_GLOBAL_MOTION
6791 #if CONFIG_WARPED_MOTION
6792 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
6793 #endif // CONFIG_WARPED_MOTION
6794 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6795
6796 // Initialized here because of compiler problem in Visual Studio.
6797 ref_yv12[0] = xd->plane[plane].pre[0];
6798 ref_yv12[1] = xd->plane[plane].pre[1];
6799
6800 // Get the prediction block from the 'other' reference frame.
6801 #if CONFIG_COMPOUND_SINGLEREF
6802 MV *const the_other_mv = (has_second_ref(mbmi) || id)
6803 ? &frame_mv[refs[!id]].as_mv
6804 : &frame_comp_mv[refs[0]].as_mv;
6805 #endif // CONFIG_COMPOUND_SINGLEREF
6806
6807 #if CONFIG_HIGHBITDEPTH
6808 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6809 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
6810 av1_highbd_build_inter_predictor(
6811 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
6812 #if CONFIG_COMPOUND_SINGLEREF
6813 the_other_mv,
6814 #else // !(CONFIG_COMPOUND_SINGLEREF)
6815 &frame_mv[refs[!id]].as_mv,
6816 #endif // CONFIG_COMPOUND_SINGLEREF
6817 &sf, pw, ph, 0, mbmi->interp_filters,
6818 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6819 &warp_types, p_col, p_row,
6820 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6821 plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
6822 } else {
6823 second_pred = (uint8_t *)second_pred_alloc_16;
6824 #endif // CONFIG_HIGHBITDEPTH
6825 av1_build_inter_predictor(
6826 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
6827 #if CONFIG_COMPOUND_SINGLEREF
6828 the_other_mv,
6829 #else // !(CONFIG_COMPOUND_SINGLEREF)
6830 &frame_mv[refs[!id]].as_mv,
6831 #endif // CONFIG_COMPOUND_SINGLEREF
6832 &sf, pw, ph, &conv_params, mbmi->interp_filters,
6833 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6834 &warp_types, p_col, p_row, plane, !id,
6835 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6836 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
6837 #if CONFIG_HIGHBITDEPTH
6838 }
6839 #endif // CONFIG_HIGHBITDEPTH
6840
6841 // Do compound motion search on the current reference frame.
6842 if (id) xd->plane[plane].pre[0] = ref_yv12[id];
6843 av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
6844
6845 // Use the mv result from the single mode as mv predictor.
6846 // Use the mv result from the single mode as mv predictor.
6847 #if CONFIG_COMPOUND_SINGLEREF
6848 if (!has_second_ref(mbmi) && id)
6849 *best_mv = frame_comp_mv[refs[0]].as_mv;
6850 else
6851 #endif // CONFIG_COMPOUND_SINGLEREF
6852 *best_mv = frame_mv[refs[id]].as_mv;
6853
6854 best_mv->col >>= 3;
6855 best_mv->row >>= 3;
6856
6857 #if CONFIG_COMPOUND_SINGLEREF
6858 if (!has_second_ref(mbmi))
6859 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
6860 else
6861 #endif // CONFIG_COMPOUND_SINGLEREF
6862 av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
6863
6864 // Small-range full-pixel motion search.
6865 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
6866 &cpi->fn_ptr[bsize], mask, mask_stride,
6867 id, &ref_mv[id].as_mv, second_pred);
6868 if (bestsme < INT_MAX) {
6869 if (mask)
6870 bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
6871 second_pred, mask, mask_stride, id,
6872 &cpi->fn_ptr[bsize], 1);
6873 else
6874 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
6875 second_pred, &cpi->fn_ptr[bsize], 1);
6876 }
6877
6878 x->mv_limits = tmp_mv_limits;
6879
6880 #if CONFIG_AMVR
6881 if (cpi->common.cur_frame_mv_precision_level) {
6882 x->best_mv.as_mv.row *= 8;
6883 x->best_mv.as_mv.col *= 8;
6884 }
6885 if (bestsme < INT_MAX && cpi->common.cur_frame_mv_precision_level == 0)
6886 #else
6887 if (bestsme < INT_MAX)
6888 #endif
6889 {
6890 int dis; /* TODO: use dis in distortion calculation later. */
6891 unsigned int sse;
6892 bestsme = cpi->find_fractional_mv_step(
6893 x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
6894 x->errorperbit, &cpi->fn_ptr[bsize], 0,
6895 cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
6896 &dis, &sse, second_pred, mask, mask_stride, id, pw, ph,
6897 cpi->sf.use_upsampled_references);
6898 }
6899
6900 // Restore the pointer to the first (possibly scaled) prediction buffer.
6901 if (id) xd->plane[plane].pre[0] = ref_yv12[0];
6902
6903 if (bestsme < last_besterr[id]) {
6904 #if CONFIG_COMPOUND_SINGLEREF
6905 // NOTE: For single ref comp mode, frame_mv stores the first mv and
6906 // frame_comp_mv stores the second mv.
6907 if (!has_second_ref(mbmi) && id)
6908 frame_comp_mv[refs[0]].as_mv = *best_mv;
6909 else
6910 #endif // CONFIG_COMPOUND_SINGLEREF
6911 frame_mv[refs[id]].as_mv = *best_mv;
6912 last_besterr[id] = bestsme;
6913 #if CONFIG_COMPOUND_SINGLEREF
6914 if (!has_second_ref(mbmi)) last_besterr[!id] = last_besterr[id];
6915 #endif // CONFIG_COMPOUND_SINGLEREF
6916 } else {
6917 break;
6918 }
6919 }
6920
6921 *rate_mv = 0;
6922
6923 #if CONFIG_COMPOUND_SINGLEREF
6924 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref)
6925 #else
6926 for (ref = 0; ref < 2; ++ref)
6927 #endif // CONFIG_COMPOUND_SINGLEREF
6928 {
6929 if (scaled_ref_frame[ref]) {
6930 // Restore the prediction frame pointers to their unscaled versions.
6931 int i;
6932 for (i = 0; i < MAX_MB_PLANE; i++)
6933 xd->plane[i].pre[ref] = backup_yv12[ref][i];
6934 }
6935
6936 #if CONFIG_COMPOUND_SINGLEREF
6937 if (!has_second_ref(mbmi))
6938 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
6939 else
6940 #endif // CONFIG_COMPOUND_SINGLEREF
6941 av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
6942
6943 #if CONFIG_COMPOUND_SINGLEREF
6944 if (!has_second_ref(mbmi)) {
6945 // NOTE: For single ref comp mode, i.e. !has_second_ref(mbmi) is true, the
6946 // first mv is stored in frame_mv[] and the second mv is stored in
6947 // frame_comp_mv[].
6948 if (compound_ref0_mode(mbmi->mode) == NEWMV) // SR_NEW_NEWMV
6949 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
6950 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
6951 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6952 assert(compound_ref1_mode(mbmi->mode) == NEWMV);
6953 *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv,
6954 &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
6955 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6956 } else {
6957 #endif // CONFIG_COMPOUND_SINGLEREF
6958 #if !CONFIG_CB4X4
6959 if (bsize >= BLOCK_8X8)
6960 #endif // !CONFIG_CB4X4
6961 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
6962 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
6963 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6964 #if !CONFIG_CB4X4
6965 else
6966 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
6967 &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
6968 x->mvcost, MV_COST_WEIGHT);
6969 #endif // !CONFIG_CB4X4
6970 #if CONFIG_COMPOUND_SINGLEREF
6971 }
6972 #endif // CONFIG_COMPOUND_SINGLEREF
6973 }
6974
6975 #if CONFIG_COMPOUND_SINGLEREF
6976 if (!has_second_ref(mbmi)) {
6977 if (scaled_ref_frame[0]) {
6978 // Restore the prediction frame pointers to their unscaled versions.
6979 int i;
6980 for (i = 0; i < MAX_MB_PLANE; i++)
6981 xd->plane[i].pre[1] = backup_yv12[1][i];
6982 }
6983 }
6984 #endif // CONFIG_COMPOUND_SINGLEREF
6985 }
6986
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[TOTAL_REFS_PER_FRAME],aom_prob * comp_mode_p)6987 static void estimate_ref_frame_costs(
6988 const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id,
6989 unsigned int *ref_costs_single,
6990 #if CONFIG_EXT_COMP_REFS
6991 unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME],
6992 #else
6993 unsigned int *ref_costs_comp,
6994 #endif // CONFIG_EXT_COMP_REFS
6995 aom_prob *comp_mode_p) {
6996 int seg_ref_active =
6997 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6998 if (seg_ref_active) {
6999 memset(ref_costs_single, 0,
7000 TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
7001 #if CONFIG_EXT_COMP_REFS
7002 int ref_frame;
7003 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
7004 memset(ref_costs_comp[ref_frame], 0,
7005 TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0]));
7006 #else
7007 memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
7008 #endif // CONFIG_EXT_COMP_REFS
7009
7010 *comp_mode_p = 128;
7011 } else {
7012 aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd);
7013 aom_prob comp_inter_p = 128;
7014
7015 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
7016 comp_inter_p = av1_get_reference_mode_prob(cm, xd);
7017 *comp_mode_p = comp_inter_p;
7018 } else {
7019 *comp_mode_p = 128;
7020 }
7021
7022 ref_costs_single[INTRA_FRAME] = av1_cost_bit(intra_inter_p, 0);
7023
7024 if (cm->reference_mode != COMPOUND_REFERENCE) {
7025 aom_prob ref_single_p1 = av1_get_pred_prob_single_ref_p1(cm, xd);
7026 aom_prob ref_single_p2 = av1_get_pred_prob_single_ref_p2(cm, xd);
7027 #if CONFIG_EXT_REFS
7028 aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
7029 aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
7030 aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
7031 aom_prob ref_single_p6 = av1_get_pred_prob_single_ref_p6(cm, xd);
7032 #endif // CONFIG_EXT_REFS
7033
7034 unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
7035
7036 ref_costs_single[LAST_FRAME] =
7037 #if CONFIG_EXT_REFS
7038 ref_costs_single[LAST2_FRAME] = ref_costs_single[LAST3_FRAME] =
7039 ref_costs_single[BWDREF_FRAME] = ref_costs_single[ALTREF2_FRAME] =
7040 #endif // CONFIG_EXT_REFS
7041 ref_costs_single[GOLDEN_FRAME] =
7042 ref_costs_single[ALTREF_FRAME] = base_cost;
7043
7044 #if CONFIG_EXT_REFS
7045 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
7046 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
7047 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
7048 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
7049 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
7050 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p1, 1);
7051 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
7052
7053 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
7054 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
7055 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
7056 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
7057
7058 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
7059 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p2, 0);
7060 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
7061
7062 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
7063 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
7064
7065 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
7066 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
7067
7068 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p6, 0);
7069 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p6, 1);
7070 #else // !CONFIG_EXT_REFS
7071 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
7072 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
7073 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
7074
7075 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p2, 0);
7076 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
7077 #endif // CONFIG_EXT_REFS
7078 } else {
7079 ref_costs_single[LAST_FRAME] = 512;
7080 #if CONFIG_EXT_REFS
7081 ref_costs_single[LAST2_FRAME] = 512;
7082 ref_costs_single[LAST3_FRAME] = 512;
7083 ref_costs_single[BWDREF_FRAME] = 512;
7084 ref_costs_single[ALTREF2_FRAME] = 512;
7085 #endif // CONFIG_EXT_REFS
7086 ref_costs_single[GOLDEN_FRAME] = 512;
7087 ref_costs_single[ALTREF_FRAME] = 512;
7088 }
7089
7090 if (cm->reference_mode != SINGLE_REFERENCE) {
7091 aom_prob ref_comp_p = av1_get_pred_prob_comp_ref_p(cm, xd);
7092 #if CONFIG_EXT_REFS
7093 aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
7094 aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
7095 aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
7096 aom_prob bwdref_comp_p1 = av1_get_pred_prob_comp_bwdref_p1(cm, xd);
7097 #endif // CONFIG_EXT_REFS
7098
7099 unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
7100
7101 #if CONFIG_EXT_COMP_REFS
7102 aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd);
7103 unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 };
7104
7105 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
7106 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
7107 #if USE_UNI_COMP_REFS
7108 base_cost + av1_cost_bit(comp_ref_type_p, 1);
7109 #else
7110 base_cost;
7111 #endif // USE_UNI_COMP_REFS
7112 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
7113 ref_bicomp_costs[ALTREF_FRAME] = 0;
7114
7115 ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
7116 ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
7117 ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
7118 ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
7119
7120 ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
7121 ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
7122
7123 ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
7124 ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
7125
7126 ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
7127 ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
7128 ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
7129
7130 ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
7131 ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
7132
7133 int ref0, ref1;
7134 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
7135 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
7136 ref_costs_comp[ref0][ref1] =
7137 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
7138 }
7139 }
7140
7141 aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
7142 aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd);
7143 aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd);
7144
7145 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
7146 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
7147 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0);
7148 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
7149 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
7150 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
7151 av1_cost_bit(uni_comp_ref_p2, 0);
7152 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
7153 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
7154 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
7155 av1_cost_bit(uni_comp_ref_p2, 1);
7156
7157 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
7158 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
7159 av1_cost_bit(uni_comp_ref_p, 1);
7160
7161 #else // !CONFIG_EXT_COMP_REFS
7162
7163 ref_costs_comp[LAST_FRAME] =
7164 #if CONFIG_EXT_REFS
7165 ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
7166 #endif // CONFIG_EXT_REFS
7167 ref_costs_comp[GOLDEN_FRAME] = base_cost;
7168
7169 #if CONFIG_EXT_REFS
7170 ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF2_FRAME] =
7171 ref_costs_comp[ALTREF_FRAME] = 0;
7172 #endif // CONFIG_EXT_REFS
7173
7174 #if CONFIG_EXT_REFS
7175 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
7176 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
7177 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
7178 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
7179
7180 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
7181 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
7182
7183 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
7184 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
7185
7186 // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
7187 // more bit.
7188 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
7189 ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
7190 ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
7191
7192 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
7193 ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
7194 #else // !CONFIG_EXT_REFS
7195 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
7196 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
7197 #endif // CONFIG_EXT_REFS
7198 #endif // CONFIG_EXT_COMP_REFS
7199 } else {
7200 #if CONFIG_EXT_COMP_REFS
7201 int ref0, ref1;
7202 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
7203 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
7204 ref_costs_comp[ref0][ref1] = 512;
7205 }
7206 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
7207 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
7208 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
7209 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
7210 #else // !CONFIG_EXT_COMP_REFS
7211 ref_costs_comp[LAST_FRAME] = 512;
7212 #if CONFIG_EXT_REFS
7213 ref_costs_comp[LAST2_FRAME] = 512;
7214 ref_costs_comp[LAST3_FRAME] = 512;
7215 ref_costs_comp[BWDREF_FRAME] = 512;
7216 ref_costs_comp[ALTREF2_FRAME] = 512;
7217 ref_costs_comp[ALTREF_FRAME] = 512;
7218 #endif // CONFIG_EXT_REFS
7219 ref_costs_comp[GOLDEN_FRAME] = 512;
7220 #endif // CONFIG_EXT_COMP_REFS
7221 }
7222 }
7223 }
7224
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int skippable)7225 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
7226 int mode_index,
7227 int64_t comp_pred_diff[REFERENCE_MODES],
7228 int skippable) {
7229 MACROBLOCKD *const xd = &x->e_mbd;
7230
7231 // Take a snapshot of the coding context so it can be
7232 // restored if we decide to encode this way
7233 ctx->skip = x->skip;
7234 ctx->skippable = skippable;
7235 ctx->best_mode_index = mode_index;
7236 ctx->mic = *xd->mi[0];
7237 ctx->mbmi_ext = *x->mbmi_ext;
7238 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
7239 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
7240 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
7241 }
7242
setup_buffer_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,int mi_row,int mi_col,int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE])7243 static void setup_buffer_inter(
7244 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
7245 BLOCK_SIZE block_size, int mi_row, int mi_col,
7246 int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
7247 int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
7248 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
7249 const AV1_COMMON *cm = &cpi->common;
7250 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
7251 MACROBLOCKD *const xd = &x->e_mbd;
7252 MODE_INFO *const mi = xd->mi[0];
7253 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
7254 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
7255 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
7256
7257 assert(yv12 != NULL);
7258
7259 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
7260 // use the UV scaling factors.
7261 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
7262
7263 // Gets an initial list of candidate vectors from neighbours and orders them
7264 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
7265 mbmi_ext->ref_mv_stack[ref_frame],
7266 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
7267 NULL, NULL, mbmi_ext->mode_context);
7268
7269 // Candidate refinement carried out at encoder and decoder
7270 #if CONFIG_AMVR
7271 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
7272 &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame],
7273 cm->cur_frame_mv_precision_level);
7274 #else
7275 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
7276 &frame_nearest_mv[ref_frame],
7277 &frame_near_mv[ref_frame]);
7278 #endif
7279 // Further refinement that is encode side only to test the top few candidates
7280 // in full and choose the best as the centre point for subsequent searches.
7281 // The current implementation doesn't support scaling.
7282 #if CONFIG_CB4X4
7283 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
7284 block_size);
7285 #else
7286 if (!av1_is_scaled(sf) && block_size >= BLOCK_8X8)
7287 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
7288 block_size);
7289 #endif // CONFIG_CB4X4
7290 }
7291
single_motion_search(const AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int ref_idx,int * rate_mv)7292 static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
7293 BLOCK_SIZE bsize, int mi_row, int mi_col,
7294 int ref_idx, int *rate_mv) {
7295 MACROBLOCKD *xd = &x->e_mbd;
7296 const AV1_COMMON *cm = &cpi->common;
7297 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
7298 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
7299 int bestsme = INT_MAX;
7300 int step_param;
7301 int sadpb = x->sadperbit16;
7302 MV mvp_full;
7303 #if CONFIG_COMPOUND_SINGLEREF
7304 int ref =
7305 has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
7306 #else // !CONFIG_COMPOUND_SINGLEREF
7307 int ref = mbmi->ref_frame[ref_idx];
7308 #endif // CONFIG_COMPOUND_SINGLEREF
7309 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
7310
7311 MvLimits tmp_mv_limits = x->mv_limits;
7312 int cost_list[5];
7313
7314 const YV12_BUFFER_CONFIG *scaled_ref_frame =
7315 av1_get_scaled_ref_frame(cpi, ref);
7316
7317 MV pred_mv[3];
7318 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
7319 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
7320 pred_mv[2] = x->pred_mv[ref];
7321
7322 if (scaled_ref_frame) {
7323 int i;
7324 // Swap out the reference frame for a version that's been scaled to
7325 // match the resolution of the current frame, allowing the existing
7326 // motion search code to be used without additional modifications.
7327 for (i = 0; i < MAX_MB_PLANE; i++)
7328 backup_yv12[i] = xd->plane[i].pre[ref_idx];
7329
7330 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
7331 }
7332
7333 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
7334
7335 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
7336
7337 // Work out the size of the first step in the mv step search.
7338 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
7339 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
7340 // Take the weighted average of the step_params based on the last frame's
7341 // max mv magnitude and that based on the best ref mvs of the current
7342 // block for the given reference.
7343 step_param =
7344 (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
7345 2;
7346 } else {
7347 step_param = cpi->mv_step_param;
7348 }
7349
7350 if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
7351 int boffset =
7352 2 * (b_width_log2_lookup[cm->sb_size] -
7353 AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
7354 step_param = AOMMAX(step_param, boffset);
7355 }
7356
7357 if (cpi->sf.adaptive_motion_search) {
7358 int bwl = b_width_log2_lookup[bsize];
7359 int bhl = b_height_log2_lookup[bsize];
7360 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
7361
7362 if (tlevel < 5) {
7363 step_param += 2;
7364 step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
7365 }
7366
7367 // prev_mv_sad is not setup for dynamically scaled frames.
7368 if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
7369 int i;
7370 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
7371 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
7372 x->pred_mv[ref].row = 0;
7373 x->pred_mv[ref].col = 0;
7374 x->best_mv.as_int = INVALID_MV;
7375
7376 if (scaled_ref_frame) {
7377 int j;
7378 for (j = 0; j < MAX_MB_PLANE; ++j)
7379 xd->plane[j].pre[ref_idx] = backup_yv12[j];
7380 }
7381 return;
7382 }
7383 }
7384 }
7385 }
7386
7387 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
7388
7389 #if CONFIG_MOTION_VAR
7390 if (mbmi->motion_mode != SIMPLE_TRANSLATION)
7391 mvp_full = mbmi->mv[0].as_mv;
7392 else
7393 #endif // CONFIG_MOTION_VAR
7394 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
7395
7396 mvp_full.col >>= 3;
7397 mvp_full.row >>= 3;
7398
7399 x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
7400
7401 #if CONFIG_MOTION_VAR
7402 switch (mbmi->motion_mode) {
7403 case SIMPLE_TRANSLATION:
7404 #endif // CONFIG_MOTION_VAR
7405 #if CONFIG_HASH_ME
7406 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
7407 sadpb, cond_cost_list(cpi, cost_list),
7408 &ref_mv, INT_MAX, 1, (MI_SIZE * mi_col),
7409 (MI_SIZE * mi_row), 0);
7410 #else
7411 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
7412 cond_cost_list(cpi, cost_list), &ref_mv,
7413 INT_MAX, 1);
7414 #endif
7415 #if CONFIG_MOTION_VAR
7416 break;
7417 case OBMC_CAUSAL:
7418 bestsme = av1_obmc_full_pixel_diamond(
7419 cpi, x, &mvp_full, step_param, sadpb,
7420 MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
7421 &(x->best_mv.as_mv), 0);
7422 break;
7423 default: assert(0 && "Invalid motion mode!\n");
7424 }
7425 #endif // CONFIG_MOTION_VAR
7426
7427 x->mv_limits = tmp_mv_limits;
7428
7429 #if CONFIG_AMVR
7430 if (cpi->common.cur_frame_mv_precision_level) {
7431 x->best_mv.as_mv.row *= 8;
7432 x->best_mv.as_mv.col *= 8;
7433 }
7434 if (bestsme < INT_MAX && cpi->common.cur_frame_mv_precision_level == 0) {
7435 #else
7436 if (bestsme < INT_MAX) {
7437 #endif
7438 int dis; /* TODO: use dis in distortion calculation later. */
7439 #if CONFIG_MOTION_VAR
7440 switch (mbmi->motion_mode) {
7441 case SIMPLE_TRANSLATION:
7442 #endif // CONFIG_MOTION_VAR
7443 if (cpi->sf.use_upsampled_references) {
7444 int best_mv_var;
7445 const int try_second = x->second_best_mv.as_int != INVALID_MV &&
7446 x->second_best_mv.as_int != x->best_mv.as_int;
7447 const int pw = block_size_wide[bsize];
7448 const int ph = block_size_high[bsize];
7449
7450 best_mv_var = cpi->find_fractional_mv_step(
7451 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
7452 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
7453 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
7454 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
7455 0, 0, pw, ph, 1);
7456
7457 if (try_second) {
7458 const int minc =
7459 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
7460 const int maxc =
7461 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
7462 const int minr =
7463 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
7464 const int maxr =
7465 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
7466 int this_var;
7467 MV best_mv = x->best_mv.as_mv;
7468
7469 x->best_mv = x->second_best_mv;
7470 if (x->best_mv.as_mv.row * 8 <= maxr &&
7471 x->best_mv.as_mv.row * 8 >= minr &&
7472 x->best_mv.as_mv.col * 8 <= maxc &&
7473 x->best_mv.as_mv.col * 8 >= minc) {
7474 this_var = cpi->find_fractional_mv_step(
7475 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
7476 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
7477 cpi->sf.mv.subpel_iters_per_step,
7478 cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
7479 &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
7480 if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
7481 x->best_mv.as_mv = best_mv;
7482 }
7483 }
7484 } else {
7485 cpi->find_fractional_mv_step(
7486 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
7487 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
7488 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
7489 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
7490 0, 0, 0, 0, 0);
7491 }
7492 #if CONFIG_MOTION_VAR
7493 break;
7494 case OBMC_CAUSAL:
7495 av1_find_best_obmc_sub_pixel_tree_up(
7496 x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
7497 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
7498 cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
7499 &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
7500 break;
7501 default: assert(0 && "Invalid motion mode!\n");
7502 }
7503 #endif // CONFIG_MOTION_VAR
7504 }
7505 *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
7506 x->mvcost, MV_COST_WEIGHT);
7507
7508 #if CONFIG_MOTION_VAR
7509 if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
7510 #else
7511 if (cpi->sf.adaptive_motion_search)
7512 #endif // CONFIG_MOTION_VAR
7513 x->pred_mv[ref] = x->best_mv.as_mv;
7514
7515 if (scaled_ref_frame) {
7516 int i;
7517 for (i = 0; i < MAX_MB_PLANE; i++)
7518 xd->plane[i].pre[ref_idx] = backup_yv12[i];
7519 }
7520 }
7521
7522 static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
7523 int i;
7524 for (i = 0; i < MAX_MB_PLANE; i++) {
7525 xd->plane[i].dst.buf = dst.plane[i];
7526 xd->plane[i].dst.stride = dst.stride[i];
7527 }
7528 }
7529
7530 static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
7531 BLOCK_SIZE bsize, const MV *other_mv,
7532 int mi_row, int mi_col, const int block,
7533 int ref_idx, uint8_t *second_pred) {
7534 const AV1_COMMON *const cm = &cpi->common;
7535 const int pw = block_size_wide[bsize];
7536 const int ph = block_size_high[bsize];
7537 MACROBLOCKD *xd = &x->e_mbd;
7538 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
7539 #if CONFIG_COMPOUND_SINGLEREF
7540 const int other_ref =
7541 has_second_ref(mbmi) ? mbmi->ref_frame[!ref_idx] : mbmi->ref_frame[0];
7542 #else // !CONFIG_COMPOUND_SINGLEREF
7543 const int other_ref = mbmi->ref_frame[!ref_idx];
7544 #endif // CONFIG_COMPOUND_SINGLEREF
7545 struct scale_factors sf;
7546 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7547 struct macroblockd_plane *const pd = &xd->plane[0];
7548 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
7549 const int ic = block & 1;
7550 const int ir = (block - ic) >> 1;
7551 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
7552 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
7553 #if CONFIG_GLOBAL_MOTION
7554 WarpedMotionParams *const wm = &xd->global_motion[other_ref];
7555 int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
7556 #endif // CONFIG_GLOBAL_MOTION
7557 #else
7558 (void)block;
7559 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7560
7561 // This function should only ever be called for compound modes
7562 #if CONFIG_COMPOUND_SINGLEREF
7563 assert(has_second_ref(mbmi) || is_inter_singleref_comp_mode(mbmi->mode));
7564 #else // !CONFIG_COMPOUND_SINGLEREF
7565 assert(has_second_ref(mbmi));
7566 #endif // CONFIG_COMPOUND_SINGLEREF
7567
7568 struct buf_2d backup_yv12[MAX_MB_PLANE];
7569 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
7570 av1_get_scaled_ref_frame(cpi, other_ref);
7571
7572 if (scaled_ref_frame) {
7573 int i;
7574 // Swap out the reference frame for a version that's been scaled to
7575 // match the resolution of the current frame, allowing the existing
7576 // motion search code to be used without additional modifications.
7577 for (i = 0; i < MAX_MB_PLANE; i++)
7578 backup_yv12[i] = xd->plane[i].pre[!ref_idx];
7579 av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
7580 }
7581
7582 // Since we have scaled the reference frames to match the size of the current
7583 // frame we must use a unit scaling factor during mode selection.
7584 #if CONFIG_HIGHBITDEPTH
7585 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
7586 cm->height, cm->use_highbitdepth);
7587 #else
7588 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
7589 cm->height);
7590 #endif // CONFIG_HIGHBITDEPTH
7591
7592 struct buf_2d ref_yv12;
7593
7594 const int plane = 0;
7595 ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane);
7596 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7597 WarpTypesAllowed warp_types;
7598 #if CONFIG_GLOBAL_MOTION
7599 warp_types.global_warp_allowed = is_global;
7600 #endif // CONFIG_GLOBAL_MOTION
7601 #if CONFIG_WARPED_MOTION
7602 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
7603 #endif // CONFIG_WARPED_MOTION
7604 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7605
7606 // Initialized here because of compiler problem in Visual Studio.
7607 ref_yv12 = xd->plane[plane].pre[!ref_idx];
7608
7609 // Get the prediction block from the 'other' reference frame.
7610 #if CONFIG_HIGHBITDEPTH
7611 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
7612 av1_highbd_build_inter_predictor(
7613 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
7614 0, mbmi->interp_filters,
7615 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7616 &warp_types, p_col, p_row,
7617 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7618 plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
7619 } else {
7620 #endif // CONFIG_HIGHBITDEPTH
7621 av1_build_inter_predictor(
7622 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
7623 &conv_params, mbmi->interp_filters,
7624 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7625 &warp_types, p_col, p_row, plane, !ref_idx,
7626 #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
7627 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
7628 #if CONFIG_HIGHBITDEPTH
7629 }
7630 #endif // CONFIG_HIGHBITDEPTH
7631
7632 if (scaled_ref_frame) {
7633 // Restore the prediction frame pointers to their unscaled versions.
7634 int i;
7635 for (i = 0; i < MAX_MB_PLANE; i++)
7636 xd->plane[i].pre[!ref_idx] = backup_yv12[i];
7637 }
7638 }
7639
7640 // Search for the best mv for one component of a compound,
7641 // given that the other component is fixed.
7642 static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
7643 BLOCK_SIZE bsize, MV *this_mv,
7644 int mi_row, int mi_col,
7645 const uint8_t *second_pred,
7646 const uint8_t *mask, int mask_stride,
7647 int *rate_mv, int ref_idx) {
7648 const int pw = block_size_wide[bsize];
7649 const int ph = block_size_high[bsize];
7650 MACROBLOCKD *xd = &x->e_mbd;
7651 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
7652 #if CONFIG_COMPOUND_SINGLEREF
7653 const int ref =
7654 has_second_ref(mbmi) ? mbmi->ref_frame[ref_idx] : mbmi->ref_frame[0];
7655 #else
7656 const int ref = mbmi->ref_frame[ref_idx];
7657 #endif // CONFIG_COMPOUND_SINGLEREF
7658 int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
7659 struct macroblockd_plane *const pd = &xd->plane[0];
7660
7661 struct buf_2d backup_yv12[MAX_MB_PLANE];
7662 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
7663 av1_get_scaled_ref_frame(cpi, ref);
7664
7665 // Check that this is either an interinter or an interintra block
7666 #if CONFIG_COMPOUND_SINGLEREF
7667 assert(has_second_ref(mbmi) ||
7668 // or a single ref comp pred mode
7669 is_inter_singleref_comp_mode(mbmi->mode) ||
7670 (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
7671 #else
7672 assert(has_second_ref(mbmi) ||
7673 (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
7674 #endif // CONFIG_COMPOUND_SINGLEREF
7675
7676 if (scaled_ref_frame) {
7677 int i;
7678 // Swap out the reference frame for a version that's been scaled to
7679 // match the resolution of the current frame, allowing the existing
7680 // motion search code to be used without additional modifications.
7681 for (i = 0; i < MAX_MB_PLANE; i++)
7682 backup_yv12[i] = xd->plane[i].pre[ref_idx];
7683 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
7684 }
7685
7686 struct buf_2d orig_yv12;
7687 int bestsme = INT_MAX;
7688 int sadpb = x->sadperbit16;
7689 MV *const best_mv = &x->best_mv.as_mv;
7690 int search_range = 3;
7691
7692 MvLimits tmp_mv_limits = x->mv_limits;
7693
7694 // Initialized here because of compiler problem in Visual Studio.
7695 if (ref_idx) {
7696 orig_yv12 = pd->pre[0];
7697 pd->pre[0] = pd->pre[ref_idx];
7698 }
7699
7700 // Do compound motion search on the current reference frame.
7701 av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
7702
7703 // Use the mv result from the single mode as mv predictor.
7704 *best_mv = *this_mv;
7705
7706 best_mv->col >>= 3;
7707 best_mv->row >>= 3;
7708
7709 #if CONFIG_COMPOUND_SINGLEREF
7710 if (!has_second_ref(mbmi))
7711 av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx);
7712 else
7713 #endif // CONFIG_COMPOUND_SINGLEREF
7714 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
7715
7716 // Small-range full-pixel motion search.
7717 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
7718 &cpi->fn_ptr[bsize], mask, mask_stride,
7719 ref_idx, &ref_mv.as_mv, second_pred);
7720 if (bestsme < INT_MAX) {
7721 if (mask)
7722 bestsme =
7723 av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
7724 mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
7725 else
7726 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
7727 &cpi->fn_ptr[bsize], 1);
7728 }
7729
7730 x->mv_limits = tmp_mv_limits;
7731
7732 #if CONFIG_AMVR
7733 if (cpi->common.cur_frame_mv_precision_level) {
7734 x->best_mv.as_mv.row *= 8;
7735 x->best_mv.as_mv.col *= 8;
7736 }
7737 if (bestsme < INT_MAX && cpi->common.cur_frame_mv_precision_level == 0) {
7738 #else
7739 if (bestsme < INT_MAX) {
7740 #endif
7741 int dis; /* TODO: use dis in distortion calculation later. */
7742 unsigned int sse;
7743 bestsme = cpi->find_fractional_mv_step(
7744 x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
7745 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
7746 x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
7747 ref_idx, pw, ph, cpi->sf.use_upsampled_references);
7748 }
7749
7750 // Restore the pointer to the first (possibly scaled) prediction buffer.
7751 if (ref_idx) pd->pre[0] = orig_yv12;
7752
7753 if (bestsme < INT_MAX) *this_mv = *best_mv;
7754
7755 *rate_mv = 0;
7756
7757 if (scaled_ref_frame) {
7758 // Restore the prediction frame pointers to their unscaled versions.
7759 int i;
7760 for (i = 0; i < MAX_MB_PLANE; i++)
7761 xd->plane[i].pre[ref_idx] = backup_yv12[i];
7762 }
7763
7764 #if CONFIG_COMPOUND_SINGLEREF
7765 if (!has_second_ref(mbmi))
7766 av1_set_mvcost(x, ref, 0, mbmi->ref_mv_idx);
7767 else
7768 #endif // CONFIG_COMPOUND_SINGLEREF
7769 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
7770 *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
7771 x->mvcost, MV_COST_WEIGHT);
7772 }
7773
7774 // Wrapper for compound_single_motion_search, for the common case
7775 // where the second prediction is also an inter mode.
7776 static void compound_single_motion_search_interinter(
7777 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
7778 #if CONFIG_COMPOUND_SINGLEREF
7779 int_mv *frame_comp_mv,
7780 #endif // CONFIG_COMPOUND_SINGLEREF
7781 int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
7782 const int block, int ref_idx) {
7783 MACROBLOCKD *xd = &x->e_mbd;
7784 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
7785
7786 // This function should only ever be called for compound modes
7787 #if CONFIG_COMPOUND_SINGLEREF
7788 int is_singleref_comp_mode =
7789 !has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode);
7790 assert(has_second_ref(mbmi) || is_singleref_comp_mode);
7791 if (is_singleref_comp_mode && ref_idx) assert(frame_comp_mv);
7792 #else // !CONFIG_COMPOUND_SINGLEREF
7793 assert(has_second_ref(mbmi));
7794 #endif // CONFIG_COMPOUND_SINGLEREF
7795
7796 // Prediction buffer from second frame.
7797 #if CONFIG_HIGHBITDEPTH
7798 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
7799 uint8_t *second_pred;
7800 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7801 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
7802 else
7803 second_pred = (uint8_t *)second_pred_alloc_16;
7804 #else
7805 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
7806 #endif // CONFIG_HIGHBITDEPTH
7807
7808 #if CONFIG_COMPOUND_SINGLEREF
7809 MV *this_mv = has_second_ref(mbmi)
7810 ? &frame_mv[mbmi->ref_frame[ref_idx]].as_mv
7811 : (ref_idx ? &frame_comp_mv[mbmi->ref_frame[0]].as_mv
7812 : &frame_mv[mbmi->ref_frame[0]].as_mv);
7813 const MV *other_mv =
7814 has_second_ref(mbmi)
7815 ? &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv
7816 : (ref_idx ? &frame_mv[mbmi->ref_frame[0]].as_mv
7817 : &frame_comp_mv[mbmi->ref_frame[0]].as_mv);
7818 #else // !CONFIG_COMPOUND_SINGLEREF
7819 MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
7820 const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
7821 #endif // CONFIG_COMPOUND_SINGLEREF
7822
7823 build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
7824 ref_idx, second_pred);
7825
7826 compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
7827 second_pred, mask, mask_stride, rate_mv,
7828 ref_idx);
7829 }
7830
7831 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7832 static void do_masked_motion_search_indexed(
7833 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7834 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
7835 int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
7836 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
7837 MACROBLOCKD *xd = &x->e_mbd;
7838 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
7839 BLOCK_SIZE sb_type = mbmi->sb_type;
7840 const uint8_t *mask;
7841 const int mask_stride = block_size_wide[bsize];
7842
7843 mask = av1_get_compound_type_mask(comp_data, sb_type);
7844
7845 int_mv frame_mv[TOTAL_REFS_PER_FRAME];
7846 #if CONFIG_COMPOUND_SINGLEREF
7847 int_mv frame_comp_mv[TOTAL_REFS_PER_FRAME];
7848 #endif // CONFIG_COMPOUND_SINGLEREF
7849 MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
7850 assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
7851
7852 frame_mv[rf[0]].as_int = cur_mv[0].as_int;
7853 #if CONFIG_COMPOUND_SINGLEREF
7854 if (!has_second_ref(mbmi))
7855 frame_comp_mv[rf[0]].as_int = cur_mv[1].as_int;
7856 else
7857 #endif // CONFIG_COMPOUND_SINGLEREF
7858 frame_mv[rf[1]].as_int = cur_mv[1].as_int;
7859 if (which == 0 || which == 1) {
7860 compound_single_motion_search_interinter(
7861 cpi, x, bsize, frame_mv,
7862 #if CONFIG_COMPOUND_SINGLEREF
7863 has_second_ref(mbmi) ? NULL : frame_comp_mv,
7864 #endif // CONFIG_COMPOUND_SINGLEREF
7865 mi_row, mi_col, mask, mask_stride, rate_mv, 0, which);
7866 } else if (which == 2) {
7867 joint_motion_search(cpi, x, bsize, frame_mv,
7868 #if CONFIG_COMPOUND_SINGLEREF
7869 has_second_ref(mbmi) ? NULL : frame_comp_mv,
7870 #endif // CONFIG_COMPOUND_SINGLEREF
7871 mi_row, mi_col, NULL, mask, mask_stride, rate_mv, 0);
7872 }
7873 tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
7874 #if CONFIG_COMPOUND_SINGLEREF
7875 if (!has_second_ref(mbmi))
7876 tmp_mv[1].as_int = frame_comp_mv[rf[0]].as_int;
7877 else // comp ref
7878 #endif // CONFIG_COMPOUND_SINGLEREF
7879 tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
7880 }
7881 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7882
7883 // In some situations we want to discount the apparent cost of a new motion
7884 // vector. Where there is a subtle motion field and especially where there is
7885 // low spatial complexity then it can be hard to cover the cost of a new motion
7886 // vector in a single block, even if that motion vector reduces distortion.
7887 // However, once established that vector may be usable through the nearest and
7888 // near mv modes to reduce distortion in subsequent blocks and also improve
7889 // visual quality.
7890 static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
7891 int_mv this_mv,
7892 int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
7893 int ref_frame) {
7894 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
7895 (this_mv.as_int != 0) &&
7896 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
7897 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
7898 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
7899 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
7900 }
7901
7902 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
7903 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
7904
7905 // TODO(jingning): this mv clamping function should be block size dependent.
7906 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
7907 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
7908 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
7909 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
7910 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
7911 }
7912
7913 #if CONFIG_WEDGE
7914 static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
7915 const BLOCK_SIZE bsize, const uint8_t *pred0,
7916 int stride0, const uint8_t *pred1, int stride1) {
7917 const struct macroblock_plane *const p = &x->plane[0];
7918 const uint8_t *src = p->src.buf;
7919 int src_stride = p->src.stride;
7920 const int f_index = bsize - BLOCK_8X8;
7921 const int bw = block_size_wide[bsize];
7922 const int bh = block_size_high[bsize];
7923 uint32_t esq[2][4];
7924 int64_t tl, br;
7925
7926 #if CONFIG_HIGHBITDEPTH
7927 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
7928 pred0 = CONVERT_TO_BYTEPTR(pred0);
7929 pred1 = CONVERT_TO_BYTEPTR(pred1);
7930 }
7931 #endif // CONFIG_HIGHBITDEPTH
7932
7933 cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
7934 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
7935 &esq[0][1]);
7936 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
7937 pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
7938 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
7939 pred0 + bh / 2 * stride0 + bw / 2, stride0,
7940 &esq[0][3]);
7941 cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
7942 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
7943 &esq[1][1]);
7944 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
7945 pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
7946 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
7947 pred1 + bh / 2 * stride1 + bw / 2, stride0,
7948 &esq[1][3]);
7949
7950 tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
7951 (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
7952 br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
7953 (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
7954 return (tl + br > 0);
7955 }
7956 #endif // CONFIG_WEDGE
7957
7958 #if !CONFIG_DUAL_FILTER
7959 static InterpFilter predict_interp_filter(
7960 const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
7961 const int mi_row, const int mi_col,
7962 InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
7963 InterpFilter best_filter = SWITCHABLE;
7964 const AV1_COMMON *cm = &cpi->common;
7965 const MACROBLOCKD *xd = &x->e_mbd;
7966 int bsl = mi_width_log2_lookup[bsize];
7967 int pred_filter_search =
7968 cpi->sf.cb_pred_filter_search
7969 ? (((mi_row + mi_col) >> bsl) +
7970 get_chessboard_index(cm->current_video_frame)) &
7971 0x1
7972 : 0;
7973 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
7974 const int is_comp_pred = has_second_ref(mbmi);
7975 const int this_mode = mbmi->mode;
7976 int refs[2] = { mbmi->ref_frame[0],
7977 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
7978 if (pred_filter_search) {
7979 InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
7980 if (xd->up_available)
7981 af = av1_extract_interp_filter(
7982 xd->mi[-xd->mi_stride]->mbmi.interp_filters, 0);
7983 if (xd->left_available)
7984 lf = av1_extract_interp_filter(xd->mi[-1]->mbmi.interp_filters, 0);
7985
7986 if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
7987 best_filter = af;
7988 }
7989 if (is_comp_pred) {
7990 if (cpi->sf.adaptive_mode_search) {
7991 switch (this_mode) {
7992 case NEAREST_NEARESTMV:
7993 if (single_filter[NEARESTMV][refs[0]] ==
7994 single_filter[NEARESTMV][refs[1]])
7995 best_filter = single_filter[NEARESTMV][refs[0]];
7996 break;
7997 case NEAR_NEARMV:
7998 if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
7999 best_filter = single_filter[NEARMV][refs[0]];
8000 break;
8001 case ZERO_ZEROMV:
8002 if (single_filter[ZEROMV][refs[0]] == single_filter[ZEROMV][refs[1]])
8003 best_filter = single_filter[ZEROMV][refs[0]];
8004 break;
8005 case NEW_NEWMV:
8006 if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
8007 best_filter = single_filter[NEWMV][refs[0]];
8008 break;
8009 case NEAREST_NEWMV:
8010 if (single_filter[NEARESTMV][refs[0]] ==
8011 single_filter[NEWMV][refs[1]])
8012 best_filter = single_filter[NEARESTMV][refs[0]];
8013 break;
8014 case NEAR_NEWMV:
8015 if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
8016 best_filter = single_filter[NEARMV][refs[0]];
8017 break;
8018 case NEW_NEARESTMV:
8019 if (single_filter[NEWMV][refs[0]] ==
8020 single_filter[NEARESTMV][refs[1]])
8021 best_filter = single_filter[NEWMV][refs[0]];
8022 break;
8023 case NEW_NEARMV:
8024 if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
8025 best_filter = single_filter[NEWMV][refs[0]];
8026 break;
8027 default:
8028 if (single_filter[this_mode][refs[0]] ==
8029 single_filter[this_mode][refs[1]])
8030 best_filter = single_filter[this_mode][refs[0]];
8031 break;
8032 }
8033 }
8034 }
8035 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
8036 best_filter = EIGHTTAP_REGULAR;
8037 }
8038 return best_filter;
8039 }
8040 #endif // !CONFIG_DUAL_FILTER
8041
8042 // Choose the best wedge index and sign
8043 #if CONFIG_WEDGE
8044 static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
8045 const BLOCK_SIZE bsize, const uint8_t *const p0,
8046 const uint8_t *const p1, int *const best_wedge_sign,
8047 int *const best_wedge_index) {
8048 const MACROBLOCKD *const xd = &x->e_mbd;
8049 const struct buf_2d *const src = &x->plane[0].src;
8050 const int bw = block_size_wide[bsize];
8051 const int bh = block_size_high[bsize];
8052 const int N = bw * bh;
8053 int rate;
8054 int64_t dist;
8055 int64_t rd, best_rd = INT64_MAX;
8056 int wedge_index;
8057 int wedge_sign;
8058 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
8059 const uint8_t *mask;
8060 uint64_t sse;
8061 #if CONFIG_HIGHBITDEPTH
8062 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
8063 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
8064 #else
8065 const int bd_round = 0;
8066 #endif // CONFIG_HIGHBITDEPTH
8067
8068 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
8069 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
8070 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
8071 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
8072
8073 int64_t sign_limit;
8074
8075 #if CONFIG_HIGHBITDEPTH
8076 if (hbd) {
8077 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
8078 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
8079 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
8080 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
8081 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
8082 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
8083 } else // NOLINT
8084 #endif // CONFIG_HIGHBITDEPTH
8085 {
8086 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
8087 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
8088 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
8089 }
8090
8091 sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
8092 (int64_t)aom_sum_squares_i16(r1, N)) *
8093 (1 << WEDGE_WEIGHT_BITS) / 2;
8094
8095 if (N < 64)
8096 av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
8097 else
8098 av1_wedge_compute_delta_squares(ds, r0, r1, N);
8099
8100 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
8101 mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
8102
8103 // TODO(jingning): Make sse2 functions support N = 16 case
8104 if (N < 64)
8105 wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
8106 else
8107 wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
8108
8109 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
8110 if (N < 64)
8111 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
8112 else
8113 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
8114 sse = ROUND_POWER_OF_TWO(sse, bd_round);
8115
8116 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
8117 rd = RDCOST(x->rdmult, rate, dist);
8118
8119 if (rd < best_rd) {
8120 *best_wedge_index = wedge_index;
8121 *best_wedge_sign = wedge_sign;
8122 best_rd = rd;
8123 }
8124 }
8125
8126 return best_rd;
8127 }
8128
8129 // Choose the best wedge index the specified sign
8130 static int64_t pick_wedge_fixed_sign(
8131 const AV1_COMP *const cpi, const MACROBLOCK *const x,
8132 const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
8133 const int wedge_sign, int *const best_wedge_index) {
8134 const MACROBLOCKD *const xd = &x->e_mbd;
8135 const struct buf_2d *const src = &x->plane[0].src;
8136 const int bw = block_size_wide[bsize];
8137 const int bh = block_size_high[bsize];
8138 const int N = bw * bh;
8139 int rate;
8140 int64_t dist;
8141 int64_t rd, best_rd = INT64_MAX;
8142 int wedge_index;
8143 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
8144 const uint8_t *mask;
8145 uint64_t sse;
8146 #if CONFIG_HIGHBITDEPTH
8147 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
8148 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
8149 #else
8150 const int bd_round = 0;
8151 #endif // CONFIG_HIGHBITDEPTH
8152
8153 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
8154 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
8155
8156 #if CONFIG_HIGHBITDEPTH
8157 if (hbd) {
8158 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
8159 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
8160 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
8161 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
8162 } else // NOLINT
8163 #endif // CONFIG_HIGHBITDEPTH
8164 {
8165 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
8166 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
8167 }
8168
8169 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
8170 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
8171 if (N < 64)
8172 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
8173 else
8174 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
8175 sse = ROUND_POWER_OF_TWO(sse, bd_round);
8176
8177 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
8178 rd = RDCOST(x->rdmult, rate, dist);
8179
8180 if (rd < best_rd) {
8181 *best_wedge_index = wedge_index;
8182 best_rd = rd;
8183 }
8184 }
8185
8186 return best_rd;
8187 }
8188
8189 static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
8190 MACROBLOCK *const x,
8191 const BLOCK_SIZE bsize,
8192 const uint8_t *const p0,
8193 const uint8_t *const p1) {
8194 MACROBLOCKD *const xd = &x->e_mbd;
8195 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8196 const int bw = block_size_wide[bsize];
8197
8198 int64_t rd;
8199 int wedge_index = -1;
8200 int wedge_sign = 0;
8201
8202 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
8203 assert(cpi->common.allow_masked_compound);
8204
8205 if (cpi->sf.fast_wedge_sign_estimate) {
8206 wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
8207 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
8208 } else {
8209 rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
8210 }
8211
8212 mbmi->wedge_sign = wedge_sign;
8213 mbmi->wedge_index = wedge_index;
8214 return rd;
8215 }
8216 #endif // CONFIG_WEDGE
8217
8218 #if CONFIG_COMPOUND_SEGMENT
8219 static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
8220 MACROBLOCK *const x, const BLOCK_SIZE bsize,
8221 const uint8_t *const p0,
8222 const uint8_t *const p1) {
8223 MACROBLOCKD *const xd = &x->e_mbd;
8224 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8225 const struct buf_2d *const src = &x->plane[0].src;
8226 const int bw = block_size_wide[bsize];
8227 const int bh = block_size_high[bsize];
8228 const int N = bw * bh;
8229 int rate;
8230 uint64_t sse;
8231 int64_t dist;
8232 int64_t rd0;
8233 SEG_MASK_TYPE cur_mask_type;
8234 int64_t best_rd = INT64_MAX;
8235 SEG_MASK_TYPE best_mask_type = 0;
8236 #if CONFIG_HIGHBITDEPTH
8237 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
8238 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
8239 #else
8240 const int bd_round = 0;
8241 #endif // CONFIG_HIGHBITDEPTH
8242 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
8243 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
8244 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
8245
8246 #if CONFIG_HIGHBITDEPTH
8247 if (hbd) {
8248 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
8249 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
8250 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
8251 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
8252 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
8253 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
8254 } else // NOLINT
8255 #endif // CONFIG_HIGHBITDEPTH
8256 {
8257 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
8258 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
8259 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
8260 }
8261
8262 // try each mask type and its inverse
8263 for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
8264 // build mask and inverse
8265 #if CONFIG_HIGHBITDEPTH
8266 if (hbd)
8267 build_compound_seg_mask_highbd(
8268 xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
8269 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
8270 else
8271 #endif // CONFIG_HIGHBITDEPTH
8272 build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
8273 bsize, bh, bw);
8274
8275 // compute rd for mask
8276 sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
8277 sse = ROUND_POWER_OF_TWO(sse, bd_round);
8278
8279 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
8280 rd0 = RDCOST(x->rdmult, rate, dist);
8281
8282 if (rd0 < best_rd) {
8283 best_mask_type = cur_mask_type;
8284 best_rd = rd0;
8285 }
8286 }
8287
8288 // make final mask
8289 mbmi->mask_type = best_mask_type;
8290 #if CONFIG_HIGHBITDEPTH
8291 if (hbd)
8292 build_compound_seg_mask_highbd(
8293 xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
8294 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
8295 else
8296 #endif // CONFIG_HIGHBITDEPTH
8297 build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
8298 bsize, bh, bw);
8299
8300 return best_rd;
8301 }
8302 #endif // CONFIG_COMPOUND_SEGMENT
8303
8304 #if CONFIG_WEDGE && CONFIG_INTERINTRA
8305 static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
8306 const MACROBLOCK *const x,
8307 const BLOCK_SIZE bsize,
8308 const uint8_t *const p0,
8309 const uint8_t *const p1) {
8310 const MACROBLOCKD *const xd = &x->e_mbd;
8311 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8312
8313 int64_t rd;
8314 int wedge_index = -1;
8315
8316 assert(is_interintra_wedge_used(bsize));
8317 assert(cpi->common.allow_interintra_compound);
8318
8319 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
8320
8321 mbmi->interintra_wedge_sign = 0;
8322 mbmi->interintra_wedge_index = wedge_index;
8323 return rd;
8324 }
8325 #endif // CONFIG_WEDGE && CONFIG_INTERINTRA
8326
8327 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
8328 static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
8329 const BLOCK_SIZE bsize,
8330 const uint8_t *const p0,
8331 const uint8_t *const p1) {
8332 const COMPOUND_TYPE compound_type =
8333 x->e_mbd.mi[0]->mbmi.interinter_compound_type;
8334 switch (compound_type) {
8335 #if CONFIG_WEDGE
8336 case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
8337 #endif // CONFIG_WEDGE
8338 #if CONFIG_COMPOUND_SEGMENT
8339 case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
8340 #endif // CONFIG_COMPOUND_SEGMENT
8341 default: assert(0); return 0;
8342 }
8343 }
8344
8345 static int interinter_compound_motion_search(
8346 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
8347 const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
8348 MACROBLOCKD *const xd = &x->e_mbd;
8349 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8350 int_mv tmp_mv[2];
8351 int tmp_rate_mv = 0;
8352 const INTERINTER_COMPOUND_DATA compound_data = {
8353 #if CONFIG_WEDGE
8354 mbmi->wedge_index,
8355 mbmi->wedge_sign,
8356 #endif // CONFIG_WEDGE
8357 #if CONFIG_COMPOUND_SEGMENT
8358 mbmi->mask_type,
8359 xd->seg_mask,
8360 #endif // CONFIG_COMPOUND_SEGMENT
8361 mbmi->interinter_compound_type
8362 };
8363 #if CONFIG_COMPOUND_SINGLEREF
8364 // NOTE: Mode is needed to identify the compound mode prediction, regardless
8365 // of comp refs or single ref.
8366 mbmi->mode = this_mode;
8367 #endif // CONFIG_COMPOUND_SINGLEREF
8368
8369 if (this_mode == NEW_NEWMV
8370 #if CONFIG_COMPOUND_SINGLEREF
8371 || this_mode == SR_NEW_NEWMV
8372 #endif // CONFIG_COMPOUND_SINGLEREF
8373 ) {
8374 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
8375 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
8376 mbmi->mv[0].as_int = tmp_mv[0].as_int;
8377 mbmi->mv[1].as_int = tmp_mv[1].as_int;
8378 } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
8379 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
8380 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
8381 mbmi->mv[0].as_int = tmp_mv[0].as_int;
8382 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV
8383 #if CONFIG_COMPOUND_SINGLEREF
8384 // || this_mode == SR_NEAREST_NEWMV
8385 || this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV
8386 #endif // CONFIG_COMPOUND_SINGLEREF
8387 ) {
8388 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
8389 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
8390 mbmi->mv[1].as_int = tmp_mv[1].as_int;
8391 }
8392 return tmp_rate_mv;
8393 }
8394
8395 static int64_t build_and_cost_compound_type(
8396 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
8397 const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
8398 BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
8399 int *strides, int mi_row, int mi_col) {
8400 const AV1_COMMON *const cm = &cpi->common;
8401 MACROBLOCKD *xd = &x->e_mbd;
8402 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8403 int rate_sum;
8404 int64_t dist_sum;
8405 int64_t best_rd_cur = INT64_MAX;
8406 int64_t rd = INT64_MAX;
8407 int tmp_skip_txfm_sb;
8408 int64_t tmp_skip_sse_sb;
8409 const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
8410
8411 best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
8412 best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0);
8413
8414 if (have_newmv_in_inter_mode(this_mode) &&
8415 use_masked_motion_search(compound_type)) {
8416 *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
8417 this_mode, mi_row, mi_col);
8418 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
8419 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
8420 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
8421 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
8422 if (rd >= best_rd_cur) {
8423 mbmi->mv[0].as_int = cur_mv[0].as_int;
8424 mbmi->mv[1].as_int = cur_mv[1].as_int;
8425 *out_rate_mv = rate_mv;
8426 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
8427 #if CONFIG_SUPERTX
8428 0, 0,
8429 #endif // CONFIG_SUPERTX
8430 preds0, strides, preds1,
8431 strides);
8432 }
8433 av1_subtract_plane(x, bsize, 0);
8434 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8435 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8436 if (rd != INT64_MAX)
8437 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
8438 best_rd_cur = rd;
8439
8440 } else {
8441 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
8442 #if CONFIG_SUPERTX
8443 0, 0,
8444 #endif // CONFIG_SUPERTX
8445 preds0, strides, preds1, strides);
8446 av1_subtract_plane(x, bsize, 0);
8447 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8448 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8449 if (rd != INT64_MAX)
8450 rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
8451 best_rd_cur = rd;
8452 }
8453 return best_rd_cur;
8454 }
8455 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
8456
8457 typedef struct {
8458 #if CONFIG_MOTION_VAR
8459 // Inter prediction buffers and respective strides
8460 uint8_t *above_pred_buf[MAX_MB_PLANE];
8461 int above_pred_stride[MAX_MB_PLANE];
8462 uint8_t *left_pred_buf[MAX_MB_PLANE];
8463 int left_pred_stride[MAX_MB_PLANE];
8464 #endif // CONFIG_MOTION_VAR
8465 int_mv *single_newmv;
8466 // Pointer to array of motion vectors to use for each ref and their rates
8467 // Should point to first of 2 arrays in 2D array
8468 int *single_newmv_rate;
8469 // Pointer to array of predicted rate-distortion
8470 // Should point to first of 2 arrays in 2D array
8471 int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
8472 InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
8473 } HandleInterModeArgs;
8474
8475 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
8476 const BLOCK_SIZE bsize,
8477 int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
8478 #if CONFIG_COMPOUND_SINGLEREF
8479 int_mv (*const mode_comp_mv)[TOTAL_REFS_PER_FRAME],
8480 #endif // CONFIG_COMPOUND_SINGLEREF
8481 const int mi_row, const int mi_col,
8482 int *const rate_mv, int_mv *const single_newmv,
8483 HandleInterModeArgs *const args) {
8484 const MACROBLOCKD *const xd = &x->e_mbd;
8485 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8486 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8487 const int is_comp_pred = has_second_ref(mbmi);
8488 const PREDICTION_MODE this_mode = mbmi->mode;
8489 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
8490 int_mv *const frame_mv = mode_mv[this_mode];
8491 #if CONFIG_COMPOUND_SINGLEREF
8492 int_mv *const frame_comp_mv = mode_comp_mv[this_mode];
8493 #endif // CONFIG_COMPOUND_SINGLEREF
8494 const int refs[2] = { mbmi->ref_frame[0],
8495 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
8496 int i;
8497
8498 (void)args;
8499
8500 if (is_comp_pred) {
8501 for (i = 0; i < 2; ++i) {
8502 single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
8503 }
8504
8505 if (this_mode == NEW_NEWMV) {
8506 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
8507 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
8508
8509 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
8510 joint_motion_search(cpi, x, bsize, frame_mv,
8511 #if CONFIG_COMPOUND_SINGLEREF
8512 NULL, // int_mv *frame_comp_mv
8513 #endif // CONFIG_COMPOUND_SINGLEREF
8514 mi_row, mi_col, NULL, NULL, 0, rate_mv, 0);
8515 } else {
8516 *rate_mv = 0;
8517 for (i = 0; i < 2; ++i) {
8518 av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
8519 *rate_mv += av1_mv_bit_cost(
8520 &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
8521 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
8522 }
8523 }
8524 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
8525 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
8526 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
8527 frame_mv[refs[0]].as_int =
8528 mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
8529 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
8530 #if CONFIG_COMPOUND_SINGLEREF
8531 NULL,
8532 #endif // CONFIG_COMPOUND_SINGLEREF
8533 mi_row, mi_col, NULL, 0,
8534 rate_mv, 0, 1);
8535 } else {
8536 av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
8537 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
8538 &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
8539 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
8540 }
8541 } else {
8542 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
8543 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
8544 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
8545 frame_mv[refs[1]].as_int =
8546 mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
8547 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
8548 #if CONFIG_COMPOUND_SINGLEREF
8549 NULL,
8550 #endif // CONFIG_COMPOUND_SINGLEREF
8551 mi_row, mi_col, NULL, 0,
8552 rate_mv, 0, 0);
8553 } else {
8554 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
8555 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
8556 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
8557 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
8558 }
8559 }
8560 #if CONFIG_COMPOUND_SINGLEREF
8561 } else if (is_inter_singleref_comp_mode(this_mode)) {
8562 // Single ref comp mode
8563 const int mode0 = compound_ref0_mode(this_mode);
8564
8565 single_newmv[refs[0]].as_int = args->single_newmv[refs[0]].as_int;
8566 frame_mv[refs[0]].as_int = (mode0 == NEWMV)
8567 ? single_newmv[refs[0]].as_int
8568 : mode_mv[mode0][refs[0]].as_int;
8569 assert(compound_ref1_mode(this_mode) == NEWMV);
8570 frame_comp_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
8571
8572 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
8573 if (this_mode == SR_NEW_NEWMV) {
8574 joint_motion_search(cpi, x, bsize, frame_mv, frame_comp_mv, mi_row,
8575 mi_col, NULL, NULL, 0, rate_mv, 0);
8576 } else {
8577 assert( // this_mode == SR_NEAREST_NEWMV ||
8578 this_mode == SR_NEAR_NEWMV || this_mode == SR_ZERO_NEWMV);
8579 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv,
8580 frame_comp_mv, mi_row, mi_col,
8581 NULL, 0, rate_mv, 0, 1);
8582 }
8583 } else {
8584 *rate_mv = 0;
8585 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
8586 if (mode0 == NEWMV)
8587 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
8588 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
8589 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
8590 *rate_mv += av1_mv_bit_cost(&frame_comp_mv[refs[0]].as_mv,
8591 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
8592 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
8593 }
8594 #endif // CONFIG_COMPOUND_SINGLEREF
8595 } else {
8596 if (is_comp_interintra_pred) {
8597 x->best_mv = args->single_newmv[refs[0]];
8598 *rate_mv = args->single_newmv_rate[refs[0]];
8599 } else {
8600 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
8601 args->single_newmv[refs[0]] = x->best_mv;
8602 args->single_newmv_rate[refs[0]] = *rate_mv;
8603 }
8604
8605 if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
8606
8607 frame_mv[refs[0]] = x->best_mv;
8608 xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
8609
8610 // Estimate the rate implications of a new mv but discount this
8611 // under certain circumstances where we want to help initiate a weak
8612 // motion field, where the distortion gain for a single block may not
8613 // be enough to overcome the cost of a new mv.
8614 if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
8615 *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
8616 }
8617 }
8618
8619 return 0;
8620 }
8621
8622 int64_t interpolation_filter_search(
8623 MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
8624 int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
8625 BUFFER_SET *const orig_dst,
8626 InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
8627 int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
8628 int64_t *const skip_sse_sb) {
8629 const AV1_COMMON *cm = &cpi->common;
8630 MACROBLOCKD *const xd = &x->e_mbd;
8631 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8632 int i;
8633 int tmp_rate;
8634 int64_t tmp_dist;
8635
8636 (void)single_filter;
8637
8638 InterpFilter assign_filter = SWITCHABLE;
8639
8640 if (cm->interp_filter == SWITCHABLE) {
8641 #if !CONFIG_DUAL_FILTER
8642 assign_filter = av1_is_interp_needed(xd)
8643 ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
8644 single_filter)
8645 : cm->interp_filter;
8646 #endif // !CONFIG_DUAL_FILTER
8647 } else {
8648 assign_filter = cm->interp_filter;
8649 }
8650
8651 set_default_interp_filters(mbmi, assign_filter);
8652
8653 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
8654 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8655 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
8656 skip_txfm_sb, skip_sse_sb);
8657 *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist);
8658
8659 if (assign_filter == SWITCHABLE) {
8660 // do interp_filter search
8661 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
8662 #if CONFIG_DUAL_FILTER
8663 const int filter_set_size = DUAL_FILTER_SET_SIZE;
8664 #else
8665 const int filter_set_size = SWITCHABLE_FILTERS;
8666 #endif // CONFIG_DUAL_FILTER
8667 int best_in_temp = 0;
8668 InterpFilters best_filters = mbmi->interp_filters;
8669 restore_dst_buf(xd, *tmp_dst);
8670 // EIGHTTAP_REGULAR mode is calculated beforehand
8671 for (i = 1; i < filter_set_size; ++i) {
8672 int tmp_skip_sb = 0;
8673 int64_t tmp_skip_sse = INT64_MAX;
8674 int tmp_rs;
8675 int64_t tmp_rd;
8676 #if CONFIG_DUAL_FILTER
8677 mbmi->interp_filters =
8678 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
8679 #else
8680 mbmi->interp_filters = av1_broadcast_interp_filter((InterpFilter)i);
8681 #endif // CONFIG_DUAL_FILTER
8682 tmp_rs = av1_get_switchable_rate(cm, x, xd);
8683 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8684 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8685 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
8686 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
8687
8688 if (tmp_rd < *rd) {
8689 *rd = tmp_rd;
8690 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
8691 best_filters = mbmi->interp_filters;
8692 *skip_txfm_sb = tmp_skip_sb;
8693 *skip_sse_sb = tmp_skip_sse;
8694 best_in_temp = !best_in_temp;
8695 if (best_in_temp) {
8696 restore_dst_buf(xd, *orig_dst);
8697 } else {
8698 restore_dst_buf(xd, *tmp_dst);
8699 }
8700 }
8701 }
8702 if (best_in_temp) {
8703 restore_dst_buf(xd, *tmp_dst);
8704 } else {
8705 restore_dst_buf(xd, *orig_dst);
8706 }
8707 mbmi->interp_filters = best_filters;
8708 } else {
8709 assert(mbmi->interp_filters ==
8710 av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
8711 }
8712 }
8713
8714 return 0;
8715 }
8716
8717 #if CONFIG_DUAL_FILTER
8718 static InterpFilters condition_interp_filters_on_mv(
8719 InterpFilters interp_filters, const MACROBLOCKD *xd) {
8720 InterpFilter filters[2];
8721 for (int i = 0; i < 2; ++i)
8722 filters[i] = (has_subpel_mv_component(xd->mi[0], xd, i))
8723 ? av1_extract_interp_filter(interp_filters, i)
8724 : EIGHTTAP_REGULAR;
8725
8726 return av1_make_interp_filters(filters[0], filters[1]);
8727 }
8728 #endif
8729
8730 // TODO(afergs): Refactor the MBMI references in here - there's four
8731 // TODO(afergs): Refactor optional args - add them to a struct or remove
8732 static int64_t motion_mode_rd(
8733 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
8734 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
8735 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
8736 int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
8737 const int *refs, int rate_mv,
8738 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8739 // only used when WARPED_MOTION is on?
8740 int_mv *const single_newmv, int rate2_bmc_nocoeff,
8741 MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc,
8742 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8743 int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
8744 const AV1_COMMON *const cm = &cpi->common;
8745 MACROBLOCKD *xd = &x->e_mbd;
8746 MODE_INFO *mi = xd->mi[0];
8747 MB_MODE_INFO *mbmi = &mi->mbmi;
8748 const int is_comp_pred = has_second_ref(mbmi);
8749 const PREDICTION_MODE this_mode = mbmi->mode;
8750
8751 (void)mode_mv;
8752 (void)mi_row;
8753 (void)mi_col;
8754 (void)args;
8755 (void)refs;
8756 (void)rate_mv;
8757 (void)is_comp_pred;
8758 (void)this_mode;
8759 #if !CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
8760 (void)single_newmv;
8761 #endif
8762
8763 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8764 MOTION_MODE motion_mode, last_motion_mode_allowed;
8765 int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
8766 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
8767 MB_MODE_INFO base_mbmi, best_mbmi;
8768 #if CONFIG_VAR_TX
8769 uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
8770 #endif // CONFIG_VAR_TX
8771 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8772
8773 #if CONFIG_WARPED_MOTION
8774 #if WARPED_MOTION_SORT_SAMPLES
8775 int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
8776 int pts_mv0[SAMPLES_ARRAY_SIZE];
8777 int total_samples;
8778 #else
8779 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
8780 #endif // WARPED_MOTION_SORT_SAMPLES
8781 #endif // CONFIG_WARPED_MOTION
8782
8783 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8784 av1_invalid_rd_stats(&best_rd_stats);
8785 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8786
8787 if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
8788 #if CONFIG_WARPED_MOTION
8789 aom_clear_system_state();
8790 #if WARPED_MOTION_SORT_SAMPLES
8791 mbmi->num_proj_ref[0] =
8792 findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0);
8793 total_samples = mbmi->num_proj_ref[0];
8794 #else
8795 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
8796 #endif // WARPED_MOTION_SORT_SAMPLES
8797 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
8798 #endif // CONFIG_WARPED_MOTION
8799 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8800 rate2_nocoeff = rd_stats->rate;
8801 last_motion_mode_allowed = motion_mode_allowed(
8802 #if CONFIG_GLOBAL_MOTION
8803 0, xd->global_motion,
8804 #endif // CONFIG_GLOBAL_MOTION
8805 #if CONFIG_WARPED_MOTION
8806 xd,
8807 #endif
8808 mi);
8809 base_mbmi = *mbmi;
8810 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8811
8812 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8813 int64_t best_rd = INT64_MAX;
8814 for (motion_mode = SIMPLE_TRANSLATION;
8815 motion_mode <= last_motion_mode_allowed; motion_mode++) {
8816 int64_t tmp_rd = INT64_MAX;
8817 int tmp_rate;
8818 int64_t tmp_dist;
8819 int tmp_rate2 =
8820 motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
8821
8822 #if CONFIG_NCOBMC_ADAPT_WEIGHT
8823 // We cannot estimate the rd cost for the motion mode NCOBMC_ADAPT_WEIGHT
8824 // right now since it requires mvs from all neighboring blocks. We will
8825 // check if this mode is beneficial after all the mv's in the current
8826 // superblock are selected.
8827 if (motion_mode == NCOBMC_ADAPT_WEIGHT) continue;
8828 #endif
8829
8830 *mbmi = base_mbmi;
8831 mbmi->motion_mode = motion_mode;
8832 #if CONFIG_MOTION_VAR
8833 if (mbmi->motion_mode == OBMC_CAUSAL) {
8834 *mbmi = *best_bmc_mbmi;
8835 mbmi->motion_mode = OBMC_CAUSAL;
8836 if (!is_comp_pred &&
8837 #if CONFIG_COMPOUND_SINGLEREF
8838 !is_inter_singleref_comp_mode(this_mode) &&
8839 #endif // CONFIG_COMPOUND_SINGLEREF
8840 have_newmv_in_inter_mode(this_mode)) {
8841 int tmp_rate_mv = 0;
8842
8843 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
8844 mbmi->mv[0].as_int = x->best_mv.as_int;
8845 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
8846 refs[0])) {
8847 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
8848 }
8849 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
8850 #if CONFIG_DUAL_FILTER
8851 mbmi->interp_filters =
8852 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
8853 #endif // CONFIG_DUAL_FILTER
8854 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8855 } else {
8856 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8857 }
8858 av1_build_obmc_inter_prediction(
8859 cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
8860 args->left_pred_buf, args->left_pred_stride);
8861 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8862 &tmp_dist, skip_txfm_sb, skip_sse_sb);
8863 }
8864 #endif // CONFIG_MOTION_VAR
8865
8866 #if CONFIG_WARPED_MOTION
8867 if (mbmi->motion_mode == WARPED_CAUSAL) {
8868 #if WARPED_MOTION_SORT_SAMPLES
8869 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
8870 #endif // WARPED_MOTION_SORT_SAMPLES
8871 *mbmi = *best_bmc_mbmi;
8872 mbmi->motion_mode = WARPED_CAUSAL;
8873 mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
8874 mbmi->interp_filters = av1_broadcast_interp_filter(
8875 av1_unswitchable_filter(cm->interp_filter));
8876
8877 #if WARPED_MOTION_SORT_SAMPLES
8878 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
8879 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
8880 // Rank the samples by motion vector difference
8881 if (mbmi->num_proj_ref[0] > 1) {
8882 mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
8883 pts_inref, mbmi->num_proj_ref[0]);
8884 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
8885 }
8886 #endif // WARPED_MOTION_SORT_SAMPLES
8887
8888 if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
8889 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
8890 &mbmi->wm_params[0], mi_row, mi_col)) {
8891 // Refine MV for NEWMV mode
8892 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
8893 int tmp_rate_mv = 0;
8894 const int_mv mv0 = mbmi->mv[0];
8895 WarpedMotionParams wm_params0 = mbmi->wm_params[0];
8896 #if WARPED_MOTION_SORT_SAMPLES
8897 int num_proj_ref0 = mbmi->num_proj_ref[0];
8898
8899 // Refine MV in a small range.
8900 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
8901 pts_mv0, total_samples);
8902 #else
8903 // Refine MV in a small range.
8904 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
8905 #endif // WARPED_MOTION_SORT_SAMPLES
8906
8907 // Keep the refined MV and WM parameters.
8908 if (mv0.as_int != mbmi->mv[0].as_int) {
8909 const int ref = refs[0];
8910 const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
8911
8912 tmp_rate_mv =
8913 av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
8914 x->mvcost, MV_COST_WEIGHT);
8915
8916 if (cpi->sf.adaptive_motion_search)
8917 x->pred_mv[ref] = mbmi->mv[0].as_mv;
8918
8919 single_newmv[ref] = mbmi->mv[0];
8920
8921 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
8922 refs[0])) {
8923 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
8924 }
8925 #if WARPED_MOTION_SORT_SAMPLES
8926 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
8927 #endif // WARPED_MOTION_SORT_SAMPLES
8928 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
8929 #if CONFIG_DUAL_FILTER
8930 mbmi->interp_filters =
8931 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
8932 #endif // CONFIG_DUAL_FILTER
8933 } else {
8934 // Restore the old MV and WM parameters.
8935 mbmi->mv[0] = mv0;
8936 mbmi->wm_params[0] = wm_params0;
8937 #if WARPED_MOTION_SORT_SAMPLES
8938 mbmi->num_proj_ref[0] = num_proj_ref0;
8939 #endif // WARPED_MOTION_SORT_SAMPLES
8940 }
8941 }
8942
8943 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
8944 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8945 &tmp_dist, skip_txfm_sb, skip_sse_sb);
8946 } else {
8947 continue;
8948 }
8949 }
8950 #endif // CONFIG_WARPED_MOTION
8951 x->skip = 0;
8952
8953 rd_stats->dist = 0;
8954 rd_stats->sse = 0;
8955 rd_stats->skip = 1;
8956 rd_stats->rate = tmp_rate2;
8957 if (last_motion_mode_allowed > SIMPLE_TRANSLATION) {
8958 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
8959 if (last_motion_mode_allowed == WARPED_CAUSAL)
8960 #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
8961 rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
8962 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
8963 else
8964 rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
8965 #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
8966 }
8967 #if CONFIG_WARPED_MOTION
8968 if (mbmi->motion_mode == WARPED_CAUSAL) {
8969 rd_stats->rate -= rs;
8970 }
8971 #endif // CONFIG_WARPED_MOTION
8972 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8973 if (!*skip_txfm_sb) {
8974 int64_t rdcosty = INT64_MAX;
8975 int is_cost_valid_uv = 0;
8976
8977 // cost and distortion
8978 av1_subtract_plane(x, bsize, 0);
8979 #if CONFIG_VAR_TX
8980 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
8981 select_tx_type_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
8982 } else {
8983 int idx, idy;
8984 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
8985 for (idy = 0; idy < xd->n8_h; ++idy)
8986 for (idx = 0; idx < xd->n8_w; ++idx)
8987 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
8988 memset(x->blk_skip[0], rd_stats_y->skip,
8989 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
8990 }
8991 #else
8992 /* clang-format off */
8993 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
8994 /* clang-format on */
8995 #endif // CONFIG_VAR_TX
8996
8997 if (rd_stats_y->rate == INT_MAX) {
8998 av1_invalid_rd_stats(rd_stats);
8999 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9000 if (mbmi->motion_mode != SIMPLE_TRANSLATION) {
9001 continue;
9002 } else {
9003 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9004 restore_dst_buf(xd, *orig_dst);
9005 return INT64_MAX;
9006 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9007 }
9008 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9009 }
9010
9011 av1_merge_rd_stats(rd_stats, rd_stats_y);
9012
9013 rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
9014 rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse));
9015 /* clang-format off */
9016 #if CONFIG_VAR_TX
9017 is_cost_valid_uv =
9018 inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
9019 #else
9020 is_cost_valid_uv =
9021 super_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
9022 #endif // CONFIG_VAR_TX
9023 if (!is_cost_valid_uv) {
9024 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9025 continue;
9026 #else
9027 restore_dst_buf(xd, *orig_dst);
9028 return INT64_MAX;
9029 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9030 }
9031 /* clang-format on */
9032 av1_merge_rd_stats(rd_stats, rd_stats_uv);
9033 #if CONFIG_RD_DEBUG
9034 // record transform block coefficient cost
9035 // TODO(angiebird): So far rd_debug tool only detects discrepancy of
9036 // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
9037 // here because we already collect the coefficient cost. Move this part to
9038 // other place when we need to compare non-coefficient cost.
9039 mbmi->rd_stats = *rd_stats;
9040 #endif // CONFIG_RD_DEBUG
9041 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9042 if (rd_stats->skip) {
9043 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
9044 rd_stats_y->rate = 0;
9045 rd_stats_uv->rate = 0;
9046 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9047 mbmi->skip = 0;
9048 // here mbmi->skip temporarily plays a role as what this_skip2 does
9049 } else if (!xd->lossless[mbmi->segment_id] &&
9050 (RDCOST(x->rdmult,
9051 rd_stats_y->rate + rd_stats_uv->rate +
9052 av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
9053 rd_stats->dist) >=
9054 RDCOST(x->rdmult, av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
9055 rd_stats->sse))) {
9056 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
9057 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9058 rd_stats->dist = rd_stats->sse;
9059 rd_stats_y->rate = 0;
9060 rd_stats_uv->rate = 0;
9061 mbmi->skip = 1;
9062 } else {
9063 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
9064 mbmi->skip = 0;
9065 }
9066 *disable_skip = 0;
9067 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9068 } else {
9069 x->skip = 1;
9070 *disable_skip = 1;
9071 mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
9072
9073 // The cost of skip bit needs to be added.
9074 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9075 mbmi->skip = 0;
9076 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9077 rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9078
9079 rd_stats->dist = *skip_sse_sb;
9080 rd_stats->sse = *skip_sse_sb;
9081 rd_stats_y->rate = 0;
9082 rd_stats_uv->rate = 0;
9083 rd_stats->skip = 1;
9084 }
9085
9086 #if CONFIG_GLOBAL_MOTION
9087 if (this_mode == ZEROMV || this_mode == ZERO_ZEROMV) {
9088 if (is_nontrans_global_motion(xd)) {
9089 rd_stats->rate -= rs;
9090 mbmi->interp_filters = av1_broadcast_interp_filter(
9091 av1_unswitchable_filter(cm->interp_filter));
9092 }
9093 }
9094 #endif // CONFIG_GLOBAL_MOTION
9095
9096 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9097 tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
9098 if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
9099 best_mbmi = *mbmi;
9100 best_rd = tmp_rd;
9101 best_rd_stats = *rd_stats;
9102 best_rd_stats_y = *rd_stats_y;
9103 best_rd_stats_uv = *rd_stats_uv;
9104 #if CONFIG_VAR_TX
9105 for (int i = 0; i < MAX_MB_PLANE; ++i)
9106 memcpy(best_blk_skip[i], x->blk_skip[i],
9107 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
9108 #endif // CONFIG_VAR_TX
9109 best_xskip = x->skip;
9110 best_disable_skip = *disable_skip;
9111 }
9112 }
9113
9114 if (best_rd == INT64_MAX) {
9115 av1_invalid_rd_stats(rd_stats);
9116 restore_dst_buf(xd, *orig_dst);
9117 return INT64_MAX;
9118 }
9119 *mbmi = best_mbmi;
9120 *rd_stats = best_rd_stats;
9121 *rd_stats_y = best_rd_stats_y;
9122 *rd_stats_uv = best_rd_stats_uv;
9123 #if CONFIG_VAR_TX
9124 for (int i = 0; i < MAX_MB_PLANE; ++i)
9125 memcpy(x->blk_skip[i], best_blk_skip[i],
9126 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
9127 #endif // CONFIG_VAR_TX
9128 x->skip = best_xskip;
9129 *disable_skip = best_disable_skip;
9130 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9131
9132 restore_dst_buf(xd, *orig_dst);
9133 return 0;
9134 }
9135
9136 static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
9137 BLOCK_SIZE bsize, RD_STATS *rd_stats,
9138 RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
9139 int *disable_skip,
9140 int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
9141 #if CONFIG_COMPOUND_SINGLEREF
9142 int_mv (*mode_comp_mv)[TOTAL_REFS_PER_FRAME],
9143 #endif // CONFIG_COMPOUND_SINGLEREF
9144 int mi_row, int mi_col,
9145 HandleInterModeArgs *args,
9146 const int64_t ref_best_rd) {
9147 const AV1_COMMON *cm = &cpi->common;
9148 MACROBLOCKD *xd = &x->e_mbd;
9149 MODE_INFO *mi = xd->mi[0];
9150 MB_MODE_INFO *mbmi = &mi->mbmi;
9151 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9152 const int is_comp_pred = has_second_ref(mbmi);
9153 const int this_mode = mbmi->mode;
9154 #if CONFIG_COMPOUND_SINGLEREF
9155 const int is_singleref_comp_mode = is_inter_singleref_comp_mode(this_mode);
9156 #endif // CONFIG_COMPOUND_SINGLEREF
9157 int_mv *frame_mv = mode_mv[this_mode];
9158 #if CONFIG_COMPOUND_SINGLEREF
9159 // The comp mv for the compound mode in single ref
9160 int_mv *frame_comp_mv = mode_comp_mv[this_mode];
9161 #endif // CONFIG_COMPOUND_SINGLEREF
9162 int i;
9163 int refs[2] = { mbmi->ref_frame[0],
9164 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
9165 int_mv cur_mv[2];
9166 int rate_mv = 0;
9167 int pred_exists = 1;
9168 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT || CONFIG_INTERINTRA
9169 const int bw = block_size_wide[bsize];
9170 #endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
9171 int_mv single_newmv[TOTAL_REFS_PER_FRAME];
9172 #if CONFIG_INTERINTRA
9173 const int *const interintra_mode_cost =
9174 x->interintra_mode_cost[size_group_lookup[bsize]];
9175 #endif // CONFIG_INTERINTRA
9176 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
9177 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
9178 #if CONFIG_HIGHBITDEPTH
9179 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
9180 #else
9181 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
9182 #endif // CONFIG_HIGHBITDEPTH
9183 uint8_t *tmp_buf;
9184
9185 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9186 int rate2_bmc_nocoeff;
9187 MB_MODE_INFO best_bmc_mbmi;
9188 int rate_mv_bmc;
9189 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9190 int64_t rd = INT64_MAX;
9191 BUFFER_SET orig_dst, tmp_dst;
9192 int rs = 0;
9193
9194 int skip_txfm_sb = 0;
9195 int64_t skip_sse_sb = INT64_MAX;
9196 int16_t mode_ctx;
9197 #if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_MOTION_VAR
9198 // dummy fillers
9199 mbmi->ncobmc_mode[0] = NO_OVERLAP;
9200 mbmi->ncobmc_mode[1] = NO_OVERLAP;
9201 #endif
9202
9203 #if CONFIG_INTERINTRA
9204 int compmode_interintra_cost = 0;
9205 mbmi->use_wedge_interintra = 0;
9206 #endif
9207 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
9208 int compmode_interinter_cost = 0;
9209 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
9210 #endif
9211 #if CONFIG_LGT_FROM_PRED
9212 mbmi->use_lgt = 0;
9213 #endif
9214
9215 #if CONFIG_INTERINTRA
9216 if (!cm->allow_interintra_compound && is_comp_interintra_pred)
9217 return INT64_MAX;
9218 #endif // CONFIG_INTERINTRA
9219
9220 // is_comp_interintra_pred implies !is_comp_pred
9221 assert(!is_comp_interintra_pred || (!is_comp_pred));
9222 // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
9223 assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
9224
9225 #if CONFIG_COMPOUND_SINGLEREF
9226 if (is_comp_pred || is_singleref_comp_mode)
9227 #else // !CONFIG_COMPOUND_SINGLEREF
9228 if (is_comp_pred)
9229 #endif // CONFIG_COMPOUND_SINGLEREF
9230 mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
9231 else
9232 mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
9233 mbmi->ref_frame, bsize, -1);
9234
9235 #if CONFIG_HIGHBITDEPTH
9236 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
9237 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
9238 else
9239 #endif // CONFIG_HIGHBITDEPTH
9240 tmp_buf = tmp_buf_;
9241 // Make sure that we didn't leave the plane destination buffers set
9242 // to tmp_buf at the end of the last iteration
9243 assert(xd->plane[0].dst.buf != tmp_buf);
9244
9245 #if CONFIG_WARPED_MOTION
9246 mbmi->num_proj_ref[0] = 0;
9247 mbmi->num_proj_ref[1] = 0;
9248 #endif // CONFIG_WARPED_MOTION
9249
9250 if (is_comp_pred) {
9251 if (frame_mv[refs[0]].as_int == INVALID_MV ||
9252 frame_mv[refs[1]].as_int == INVALID_MV)
9253 return INT64_MAX;
9254 #if CONFIG_COMPOUND_SINGLEREF
9255 } else if (is_singleref_comp_mode) {
9256 if (frame_mv[refs[0]].as_int == INVALID_MV ||
9257 frame_comp_mv[refs[0]].as_int == INVALID_MV)
9258 return INT64_MAX;
9259 #endif // CONFIG_COMPOUND_SINGLEREF
9260 }
9261
9262 mbmi->motion_mode = SIMPLE_TRANSLATION;
9263 if (have_newmv_in_inter_mode(this_mode)) {
9264 const int64_t ret_val =
9265 handle_newmv(cpi, x, bsize, mode_mv,
9266 #if CONFIG_COMPOUND_SINGLEREF
9267 mode_comp_mv,
9268 #endif // CONFIG_COMPOUND_SINGLEREF
9269 mi_row, mi_col, &rate_mv, single_newmv, args);
9270 if (ret_val != 0)
9271 return ret_val;
9272 else
9273 rd_stats->rate += rate_mv;
9274 }
9275 for (i = 0; i < is_comp_pred + 1; ++i) {
9276 cur_mv[i] = frame_mv[refs[i]];
9277 // Clip "next_nearest" so that it does not extend to far out of image
9278 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
9279 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
9280 mbmi->mv[i].as_int = cur_mv[i].as_int;
9281 }
9282
9283 #if CONFIG_COMPOUND_SINGLEREF
9284 if (!is_comp_pred && is_singleref_comp_mode) {
9285 cur_mv[1] = frame_comp_mv[refs[0]];
9286 // Clip "next_nearest" so that it does not extend to far out of image
9287 if (this_mode != NEWMV) clamp_mv2(&cur_mv[1].as_mv, xd);
9288 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
9289 mbmi->mv[1].as_int = cur_mv[1].as_int;
9290 }
9291 #endif // CONFIG_COMPOUND_SINGLEREF
9292
9293 if (this_mode == NEAREST_NEARESTMV) {
9294 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
9295 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
9296 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9297
9298 for (i = 0; i < 2; ++i) {
9299 clamp_mv2(&cur_mv[i].as_mv, xd);
9300 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
9301 mbmi->mv[i].as_int = cur_mv[i].as_int;
9302 }
9303 }
9304 }
9305
9306 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
9307 #if CONFIG_COMPOUND_SINGLEREF
9308 if (this_mode == NEAREST_NEWMV || // this_mode == SR_NEAREST_NEWMV ||
9309 this_mode == SR_NEAREST_NEARMV)
9310 #else // !CONFIG_COMPOUND_SINGLEREF
9311 if (this_mode == NEAREST_NEWMV)
9312 #endif // CONFIG_COMPOUND_SINGLEREF
9313 {
9314 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
9315
9316 #if CONFIG_AMVR
9317 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
9318 cm->cur_frame_mv_precision_level);
9319 #else
9320 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
9321 #endif
9322 clamp_mv2(&cur_mv[0].as_mv, xd);
9323 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
9324 mbmi->mv[0].as_int = cur_mv[0].as_int;
9325 }
9326
9327 if (this_mode == NEW_NEARESTMV) {
9328 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9329
9330 #if CONFIG_AMVR
9331 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
9332 cm->cur_frame_mv_precision_level);
9333 #else
9334 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
9335 #endif
9336 clamp_mv2(&cur_mv[1].as_mv, xd);
9337 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
9338 mbmi->mv[1].as_int = cur_mv[1].as_int;
9339 }
9340 }
9341
9342 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
9343 int ref_mv_idx = mbmi->ref_mv_idx + 1;
9344 if (this_mode == NEAR_NEWMV ||
9345 #if CONFIG_COMPOUND_SINGLEREF
9346 this_mode == SR_NEAR_NEWMV ||
9347 #endif // CONFIG_COMPOUND_SINGLEREF
9348 this_mode == NEAR_NEARMV) {
9349 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
9350
9351 #if CONFIG_AMVR
9352 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
9353 cm->cur_frame_mv_precision_level);
9354 #else
9355 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
9356 #endif
9357 clamp_mv2(&cur_mv[0].as_mv, xd);
9358 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
9359 mbmi->mv[0].as_int = cur_mv[0].as_int;
9360 }
9361
9362 if (this_mode == NEW_NEARMV ||
9363 #if CONFIG_COMPOUND_SINGLEREF
9364 this_mode == SR_NEAREST_NEARMV ||
9365 #endif // CONFIG_COMPOUND_SINGLEREF
9366 this_mode == NEAR_NEARMV) {
9367 #if CONFIG_COMPOUND_SINGLEREF
9368 if (this_mode == SR_NEAREST_NEARMV)
9369 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
9370 else
9371 #endif // CONFIG_COMPOUND_SINGLEREF
9372 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
9373
9374 #if CONFIG_AMVR
9375 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
9376 cm->cur_frame_mv_precision_level);
9377 #else
9378 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
9379 #endif
9380 clamp_mv2(&cur_mv[1].as_mv, xd);
9381 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
9382 mbmi->mv[1].as_int = cur_mv[1].as_int;
9383 }
9384 }
9385
9386 // do first prediction into the destination buffer. Do the next
9387 // prediction into a temporary buffer. Then keep track of which one
9388 // of these currently holds the best predictor, and use the other
9389 // one for future predictions. In the end, copy from tmp_buf to
9390 // dst if necessary.
9391 for (i = 0; i < MAX_MB_PLANE; i++) {
9392 tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
9393 tmp_dst.stride[i] = MAX_SB_SIZE;
9394 }
9395 for (i = 0; i < MAX_MB_PLANE; i++) {
9396 orig_dst.plane[i] = xd->plane[i].dst.buf;
9397 orig_dst.stride[i] = xd->plane[i].dst.stride;
9398 }
9399
9400 // We don't include the cost of the second reference here, because there
9401 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
9402 // words if you present them in that order, the second one is always known
9403 // if the first is known.
9404 //
9405 // Under some circumstances we discount the cost of new mv mode to encourage
9406 // initiation of a motion field.
9407 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
9408 refs[0])) {
9409 rd_stats->rate += AOMMIN(
9410 cost_mv_ref(x, this_mode, mode_ctx),
9411 cost_mv_ref(x, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV, mode_ctx));
9412 } else {
9413 rd_stats->rate += cost_mv_ref(x, this_mode, mode_ctx);
9414 }
9415
9416 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
9417 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
9418 return INT64_MAX;
9419
9420 int64_t ret_val = interpolation_filter_search(
9421 x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
9422 &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
9423 if (ret_val != 0) return ret_val;
9424
9425 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9426 best_bmc_mbmi = *mbmi;
9427 rate2_bmc_nocoeff = rd_stats->rate;
9428 if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
9429 rate_mv_bmc = rate_mv;
9430 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9431
9432 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
9433 #if CONFIG_COMPOUND_SINGLEREF
9434 if (is_comp_pred || is_singleref_comp_mode)
9435 #else
9436 if (is_comp_pred)
9437 #endif // CONFIG_COMPOUND_SINGLEREF
9438 {
9439 int rate_sum, rs2;
9440 int64_t dist_sum;
9441 int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
9442 INTERINTER_COMPOUND_DATA best_compound_data;
9443 int_mv best_mv[2];
9444 int best_tmp_rate_mv = rate_mv;
9445 int tmp_skip_txfm_sb;
9446 int64_t tmp_skip_sse_sb;
9447 DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
9448 DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
9449 uint8_t *preds0[1] = { pred0 };
9450 uint8_t *preds1[1] = { pred1 };
9451 int strides[1] = { bw };
9452 int tmp_rate_mv;
9453 int masked_compound_used = is_any_masked_compound_used(bsize);
9454 #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
9455 masked_compound_used = masked_compound_used && cm->allow_masked_compound;
9456 #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
9457 COMPOUND_TYPE cur_type;
9458 int best_compmode_interinter_cost = 0;
9459
9460 best_mv[0].as_int = cur_mv[0].as_int;
9461 best_mv[1].as_int = cur_mv[1].as_int;
9462 memset(&best_compound_data, 0, sizeof(best_compound_data));
9463 #if CONFIG_COMPOUND_SEGMENT
9464 uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
9465 best_compound_data.seg_mask = tmp_mask_buf;
9466 #endif // CONFIG_COMPOUND_SEGMENT
9467
9468 #if CONFIG_COMPOUND_SINGLEREF
9469 // TODO(zoeliu): To further check whether the following setups are needed.
9470 // Single ref compound mode: Prepare the 2nd ref frame predictor the same as
9471 // the 1st one.
9472 if (!is_comp_pred && is_singleref_comp_mode) {
9473 xd->block_refs[1] = xd->block_refs[0];
9474 for (i = 0; i < MAX_MB_PLANE; i++)
9475 xd->plane[i].pre[1] = xd->plane[i].pre[0];
9476 }
9477 #endif // CONFIG_COMPOUND_SINGLEREF
9478
9479 if (masked_compound_used) {
9480 // get inter predictors to use for masked compound modes
9481 av1_build_inter_predictors_for_planes_single_buf(
9482 xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
9483 av1_build_inter_predictors_for_planes_single_buf(
9484 xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
9485 }
9486
9487 for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
9488 if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
9489 if (!is_interinter_compound_used(cur_type, bsize)) continue;
9490 tmp_rate_mv = rate_mv;
9491 best_rd_cur = INT64_MAX;
9492 mbmi->interinter_compound_type = cur_type;
9493 int masked_type_cost = 0;
9494 if (masked_compound_used) {
9495 #if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
9496 if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
9497 masked_type_cost += av1_cost_literal(1);
9498 else
9499 #endif // CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
9500 masked_type_cost +=
9501 x->compound_type_cost[bsize][mbmi->interinter_compound_type];
9502 }
9503 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
9504 bsize, mbmi->interinter_compound_type)) +
9505 masked_type_cost;
9506
9507 switch (cur_type) {
9508 case COMPOUND_AVERAGE:
9509 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
9510 bsize);
9511 av1_subtract_plane(x, bsize, 0);
9512 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
9513 &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
9514 INT64_MAX);
9515 if (rd != INT64_MAX)
9516 best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
9517 best_rd_compound = best_rd_cur;
9518 break;
9519 #if CONFIG_WEDGE
9520 case COMPOUND_WEDGE:
9521 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
9522 best_rd_compound / 3 < ref_best_rd) {
9523 best_rd_cur = build_and_cost_compound_type(
9524 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
9525 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
9526 }
9527 break;
9528 #endif // CONFIG_WEDGE
9529 #if CONFIG_COMPOUND_SEGMENT
9530 case COMPOUND_SEG:
9531 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
9532 best_rd_compound / 3 < ref_best_rd) {
9533 best_rd_cur = build_and_cost_compound_type(
9534 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
9535 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
9536 }
9537 break;
9538 #endif // CONFIG_COMPOUND_SEGMENT
9539 default: assert(0); return 0;
9540 }
9541
9542 if (best_rd_cur < best_rd_compound) {
9543 best_rd_compound = best_rd_cur;
9544 #if CONFIG_WEDGE
9545 best_compound_data.wedge_index = mbmi->wedge_index;
9546 best_compound_data.wedge_sign = mbmi->wedge_sign;
9547 #endif // CONFIG_WEDGE
9548 #if CONFIG_COMPOUND_SEGMENT
9549 best_compound_data.mask_type = mbmi->mask_type;
9550 memcpy(best_compound_data.seg_mask, xd->seg_mask,
9551 2 * MAX_SB_SQUARE * sizeof(uint8_t));
9552 #endif // CONFIG_COMPOUND_SEGMENT
9553 best_compound_data.interinter_compound_type =
9554 mbmi->interinter_compound_type;
9555 best_compmode_interinter_cost = rs2;
9556 if (have_newmv_in_inter_mode(this_mode)) {
9557 if (use_masked_motion_search(cur_type)) {
9558 best_tmp_rate_mv = tmp_rate_mv;
9559 best_mv[0].as_int = mbmi->mv[0].as_int;
9560 best_mv[1].as_int = mbmi->mv[1].as_int;
9561 } else {
9562 best_mv[0].as_int = cur_mv[0].as_int;
9563 best_mv[1].as_int = cur_mv[1].as_int;
9564 }
9565 }
9566 }
9567 // reset to original mvs for next iteration
9568 mbmi->mv[0].as_int = cur_mv[0].as_int;
9569 mbmi->mv[1].as_int = cur_mv[1].as_int;
9570 }
9571 #if CONFIG_WEDGE
9572 mbmi->wedge_index = best_compound_data.wedge_index;
9573 mbmi->wedge_sign = best_compound_data.wedge_sign;
9574 #endif // CONFIG_WEDGE
9575 #if CONFIG_COMPOUND_SEGMENT
9576 mbmi->mask_type = best_compound_data.mask_type;
9577 memcpy(xd->seg_mask, best_compound_data.seg_mask,
9578 2 * MAX_SB_SQUARE * sizeof(uint8_t));
9579 #endif // CONFIG_COMPOUND_SEGMENT
9580 mbmi->interinter_compound_type =
9581 best_compound_data.interinter_compound_type;
9582 if (have_newmv_in_inter_mode(this_mode)) {
9583 mbmi->mv[0].as_int = best_mv[0].as_int;
9584 mbmi->mv[1].as_int = best_mv[1].as_int;
9585 xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
9586 xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
9587 if (use_masked_motion_search(mbmi->interinter_compound_type)) {
9588 rd_stats->rate += best_tmp_rate_mv - rate_mv;
9589 rate_mv = best_tmp_rate_mv;
9590 }
9591 }
9592
9593 if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
9594 restore_dst_buf(xd, orig_dst);
9595 return INT64_MAX;
9596 }
9597
9598 pred_exists = 0;
9599
9600 compmode_interinter_cost = best_compmode_interinter_cost;
9601 }
9602 #endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
9603
9604 #if CONFIG_INTERINTRA
9605 if (is_comp_interintra_pred) {
9606 INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
9607 int64_t best_interintra_rd = INT64_MAX;
9608 int rmode, rate_sum;
9609 int64_t dist_sum;
9610 int j;
9611 int tmp_rate_mv = 0;
9612 int tmp_skip_txfm_sb;
9613 int64_t tmp_skip_sse_sb;
9614 DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
9615 uint8_t *intrapred;
9616
9617 #if CONFIG_HIGHBITDEPTH
9618 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
9619 intrapred = CONVERT_TO_BYTEPTR(intrapred_);
9620 else
9621 #endif // CONFIG_HIGHBITDEPTH
9622 intrapred = intrapred_;
9623
9624 mbmi->ref_frame[1] = NONE_FRAME;
9625 for (j = 0; j < MAX_MB_PLANE; j++) {
9626 xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
9627 xd->plane[j].dst.stride = bw;
9628 }
9629 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
9630 restore_dst_buf(xd, orig_dst);
9631 mbmi->ref_frame[1] = INTRA_FRAME;
9632 mbmi->use_wedge_interintra = 0;
9633
9634 for (j = 0; j < INTERINTRA_MODES; ++j) {
9635 mbmi->interintra_mode = (INTERINTRA_MODE)j;
9636 rmode = interintra_mode_cost[mbmi->interintra_mode];
9637 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
9638 intrapred, bw);
9639 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
9640 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
9641 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
9642 rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
9643 if (rd < best_interintra_rd) {
9644 best_interintra_rd = rd;
9645 best_interintra_mode = mbmi->interintra_mode;
9646 }
9647 }
9648 mbmi->interintra_mode = best_interintra_mode;
9649 rmode = interintra_mode_cost[mbmi->interintra_mode];
9650 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
9651 intrapred, bw);
9652 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
9653 av1_subtract_plane(x, bsize, 0);
9654 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
9655 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
9656 if (rd != INT64_MAX)
9657 rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum);
9658 best_interintra_rd = rd;
9659
9660 if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
9661 // Don't need to call restore_dst_buf here
9662 return INT64_MAX;
9663 }
9664 #if CONFIG_WEDGE
9665 if (is_interintra_wedge_used(bsize)) {
9666 int64_t best_interintra_rd_nowedge = INT64_MAX;
9667 int64_t best_interintra_rd_wedge = INT64_MAX;
9668 int_mv tmp_mv;
9669 int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
9670 if (rd != INT64_MAX)
9671 rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum);
9672 best_interintra_rd_nowedge = best_interintra_rd;
9673
9674 // Disable wedge search if source variance is small
9675 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
9676 mbmi->use_wedge_interintra = 1;
9677
9678 rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
9679 av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
9680
9681 best_interintra_rd_wedge =
9682 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
9683
9684 best_interintra_rd_wedge +=
9685 RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0);
9686 // Refine motion vector.
9687 if (have_newmv_in_inter_mode(this_mode)) {
9688 // get negative of mask
9689 const uint8_t *mask = av1_get_contiguous_soft_mask(
9690 mbmi->interintra_wedge_index, 1, bsize);
9691 tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
9692 compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
9693 mi_col, intrapred, mask, bw,
9694 &tmp_rate_mv, 0);
9695 mbmi->mv[0].as_int = tmp_mv.as_int;
9696 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
9697 bsize);
9698 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
9699 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
9700 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
9701 dist_sum);
9702 if (rd >= best_interintra_rd_wedge) {
9703 tmp_mv.as_int = cur_mv[0].as_int;
9704 tmp_rate_mv = rate_mv;
9705 }
9706 } else {
9707 tmp_mv.as_int = cur_mv[0].as_int;
9708 tmp_rate_mv = rate_mv;
9709 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
9710 }
9711 // Evaluate closer to true rd
9712 av1_subtract_plane(x, bsize, 0);
9713 rd =
9714 estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
9715 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
9716 if (rd != INT64_MAX)
9717 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
9718 dist_sum);
9719 best_interintra_rd_wedge = rd;
9720 if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
9721 mbmi->use_wedge_interintra = 1;
9722 mbmi->mv[0].as_int = tmp_mv.as_int;
9723 rd_stats->rate += tmp_rate_mv - rate_mv;
9724 rate_mv = tmp_rate_mv;
9725 } else {
9726 mbmi->use_wedge_interintra = 0;
9727 mbmi->mv[0].as_int = cur_mv[0].as_int;
9728 }
9729 } else {
9730 mbmi->use_wedge_interintra = 0;
9731 }
9732 }
9733 #endif // CONFIG_WEDGE
9734
9735 pred_exists = 0;
9736 compmode_interintra_cost =
9737 av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1) +
9738 interintra_mode_cost[mbmi->interintra_mode];
9739 if (is_interintra_wedge_used(bsize)) {
9740 compmode_interintra_cost += av1_cost_bit(
9741 cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
9742 if (mbmi->use_wedge_interintra) {
9743 compmode_interintra_cost +=
9744 av1_cost_literal(get_interintra_wedge_bits(bsize));
9745 }
9746 }
9747 } else if (is_interintra_allowed(mbmi)) {
9748 compmode_interintra_cost =
9749 av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0);
9750 }
9751 #endif // CONFIG_INTERINTRA
9752
9753 if (pred_exists == 0) {
9754 int tmp_rate;
9755 int64_t tmp_dist;
9756 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
9757 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
9758 &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
9759 rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
9760 }
9761
9762 if (!is_comp_pred)
9763 args->single_filter[this_mode][refs[0]] =
9764 av1_extract_interp_filter(mbmi->interp_filters, 0);
9765
9766 if (args->modelled_rd != NULL) {
9767 if (is_comp_pred) {
9768 const int mode0 = compound_ref0_mode(this_mode);
9769 const int mode1 = compound_ref1_mode(this_mode);
9770 const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
9771 args->modelled_rd[mode1][refs[1]]);
9772 if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
9773 restore_dst_buf(xd, orig_dst);
9774 return INT64_MAX;
9775 }
9776 } else if (!is_comp_interintra_pred) {
9777 args->modelled_rd[this_mode][refs[0]] = rd;
9778 }
9779 }
9780
9781 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
9782 // if current pred_error modeled rd is substantially more than the best
9783 // so far, do not bother doing full rd
9784 if (rd / 2 > ref_best_rd) {
9785 restore_dst_buf(xd, orig_dst);
9786 return INT64_MAX;
9787 }
9788 }
9789
9790 #if CONFIG_INTERINTRA
9791 rd_stats->rate += compmode_interintra_cost;
9792 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9793 rate2_bmc_nocoeff += compmode_interintra_cost;
9794 #endif
9795 #endif
9796 #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
9797 rd_stats->rate += compmode_interinter_cost;
9798 #endif
9799
9800 ret_val = motion_mode_rd(
9801 cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip, mode_mv,
9802 mi_row, mi_col, args, ref_best_rd, refs, rate_mv,
9803 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9804 single_newmv, rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc,
9805 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9806 rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
9807 if (ret_val != 0) return ret_val;
9808
9809 return 0; // The rate-distortion cost will be re-calculated by caller.
9810 }
9811
9812 #if CONFIG_INTRABC
9813 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
9814 RD_STATS *rd_cost, BLOCK_SIZE bsize,
9815 int64_t best_rd) {
9816 const AV1_COMMON *const cm = &cpi->common;
9817 if (!av1_allow_intrabc(bsize, cm)) return INT64_MAX;
9818
9819 MACROBLOCKD *const xd = &x->e_mbd;
9820 const TileInfo *tile = &xd->tile;
9821 MODE_INFO *const mi = xd->mi[0];
9822 const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
9823 const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
9824 const int w = block_size_wide[bsize];
9825 const int h = block_size_high[bsize];
9826 const int sb_row = mi_row / MAX_MIB_SIZE;
9827 const int sb_col = mi_col / MAX_MIB_SIZE;
9828
9829 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9830 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
9831 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
9832 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
9833 mbmi_ext->ref_mv_stack[ref_frame],
9834 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
9835 NULL, NULL, mbmi_ext->mode_context);
9836
9837 int_mv nearestmv, nearmv;
9838 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
9839
9840 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
9841 if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
9842 mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
9843
9844 struct buf_2d yv12_mb[MAX_MB_PLANE];
9845 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
9846 for (int i = 0; i < MAX_MB_PLANE; ++i) {
9847 xd->plane[i].pre[0] = yv12_mb[i];
9848 }
9849
9850 enum IntrabcMotionDirection {
9851 IBC_MOTION_ABOVE,
9852 IBC_MOTION_LEFT,
9853 IBC_MOTION_DIRECTIONS
9854 };
9855
9856 MB_MODE_INFO *mbmi = &mi->mbmi;
9857 MB_MODE_INFO best_mbmi = *mbmi;
9858 RD_STATS best_rdcost = *rd_cost;
9859 int best_skip = x->skip;
9860
9861 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
9862 dir < IBC_MOTION_DIRECTIONS; ++dir) {
9863 const MvLimits tmp_mv_limits = x->mv_limits;
9864 switch (dir) {
9865 case IBC_MOTION_ABOVE:
9866 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
9867 x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
9868 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
9869 x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h;
9870 break;
9871 case IBC_MOTION_LEFT:
9872 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
9873 x->mv_limits.col_max = (sb_col * MAX_MIB_SIZE - mi_col) * MI_SIZE - w;
9874 // TODO(aconverse@google.com): Minimize the overlap between above and
9875 // left areas.
9876 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
9877 int bottom_coded_mi_edge =
9878 AOMMIN((sb_row + 1) * MAX_MIB_SIZE, tile->mi_row_end);
9879 x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
9880 break;
9881 default: assert(0);
9882 }
9883 assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
9884 assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
9885 assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
9886 assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
9887 av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
9888
9889 if (x->mv_limits.col_max < x->mv_limits.col_min ||
9890 x->mv_limits.row_max < x->mv_limits.row_min) {
9891 x->mv_limits = tmp_mv_limits;
9892 continue;
9893 }
9894
9895 int step_param = cpi->mv_step_param;
9896 MV mvp_full = dv_ref.as_mv;
9897 mvp_full.col >>= 3;
9898 mvp_full.row >>= 3;
9899 int sadpb = x->sadperbit16;
9900 int cost_list[5];
9901 #if CONFIG_HASH_ME
9902 int bestsme = av1_full_pixel_search(
9903 cpi, x, bsize, &mvp_full, step_param, sadpb,
9904 cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
9905 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
9906 #else
9907 int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
9908 sadpb, cond_cost_list(cpi, cost_list),
9909 &dv_ref.as_mv, INT_MAX, 1);
9910 #endif
9911
9912 x->mv_limits = tmp_mv_limits;
9913 if (bestsme == INT_MAX) continue;
9914 mvp_full = x->best_mv.as_mv;
9915 MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
9916 if (mv_check_bounds(&x->mv_limits, &dv)) continue;
9917 if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue;
9918
9919 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
9920 mbmi->use_intrabc = 1;
9921 mbmi->mode = DC_PRED;
9922 mbmi->uv_mode = UV_DC_PRED;
9923 mbmi->mv[0].as_mv = dv;
9924 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
9925 mbmi->skip = 0;
9926 x->skip = 0;
9927 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
9928
9929 assert(x->mvcost == x->mv_cost_stack[0]);
9930 // TODO(aconverse@google.com): The full motion field defining discount
9931 // in MV_COST_WEIGHT is too large. Explore other values.
9932 int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
9933 x->mvcost, MV_COST_WEIGHT_SUB);
9934 const int rate_mode = x->intrabc_cost[1];
9935 RD_STATS rd_stats, rd_stats_uv;
9936 av1_subtract_plane(x, bsize, 0);
9937 super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
9938 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9939 av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
9940 #if CONFIG_RD_DEBUG
9941 mbmi->rd_stats = rd_stats;
9942 #endif
9943
9944 #if CONFIG_VAR_TX
9945 // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks
9946 const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
9947 const int height = block_size_high[bsize] >> tx_size_high_log2[0];
9948 int idx, idy;
9949 for (idy = 0; idy < height; ++idy)
9950 for (idx = 0; idx < width; ++idx)
9951 mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
9952 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
9953 #endif // CONFIG_VAR_TX
9954
9955 const aom_prob skip_prob = av1_get_skip_prob(cm, xd);
9956
9957 RD_STATS rdc_noskip;
9958 av1_init_rd_stats(&rdc_noskip);
9959 rdc_noskip.rate =
9960 rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
9961 rdc_noskip.dist = rd_stats.dist;
9962 rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
9963 if (rdc_noskip.rdcost < best_rd) {
9964 best_rd = rdc_noskip.rdcost;
9965 best_mbmi = *mbmi;
9966 best_skip = x->skip;
9967 best_rdcost = rdc_noskip;
9968 }
9969
9970 x->skip = 1;
9971 mbmi->skip = 1;
9972 RD_STATS rdc_skip;
9973 av1_init_rd_stats(&rdc_skip);
9974 rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
9975 rdc_skip.dist = rd_stats.sse;
9976 rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
9977 if (rdc_skip.rdcost < best_rd) {
9978 best_rd = rdc_skip.rdcost;
9979 best_mbmi = *mbmi;
9980 best_skip = x->skip;
9981 best_rdcost = rdc_skip;
9982 }
9983 }
9984 *mbmi = best_mbmi;
9985 *rd_cost = best_rdcost;
9986 x->skip = best_skip;
9987 return best_rd;
9988 }
9989 #endif // CONFIG_INTRABC
9990
9991 void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
9992 RD_STATS *rd_cost, BLOCK_SIZE bsize,
9993 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
9994 const AV1_COMMON *const cm = &cpi->common;
9995 MACROBLOCKD *const xd = &x->e_mbd;
9996 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
9997 struct macroblockd_plane *const pd = xd->plane;
9998 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
9999 int y_skip = 0, uv_skip = 0;
10000 int64_t dist_y = 0, dist_uv = 0;
10001 TX_SIZE max_uv_tx_size;
10002 const int unify_bsize = CONFIG_CB4X4;
10003
10004 ctx->skip = 0;
10005 mbmi->ref_frame[0] = INTRA_FRAME;
10006 mbmi->ref_frame[1] = NONE_FRAME;
10007 #if CONFIG_INTRABC
10008 mbmi->use_intrabc = 0;
10009 mbmi->mv[0].as_int = 0;
10010 #endif // CONFIG_INTRABC
10011 #if CONFIG_LGT_FROM_PRED
10012 mbmi->use_lgt = 0;
10013 #endif
10014
10015 const int64_t intra_yrd =
10016 (bsize >= BLOCK_8X8 || unify_bsize)
10017 ? rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
10018 &y_skip, bsize, best_rd)
10019 : rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
10020 &dist_y, &y_skip, best_rd);
10021
10022 if (intra_yrd < best_rd) {
10023 #if CONFIG_CFL
10024 #if CONFIG_CB4X4
10025 // Only store reconstructed luma when there's chroma RDO. When there's no
10026 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
10027 xd->cfl->store_y = !x->skip_chroma_rd;
10028 #else
10029 xd->cfl->store_y = 1;
10030 #endif // CONFIG_CB4X4
10031 if (xd->cfl->store_y) {
10032 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
10033 // during luma RDO, so we can store reconstructed luma values
10034 RD_STATS this_rd_stats;
10035 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
10036 mbmi->sb_type, mbmi->tx_size,
10037 cpi->sf.use_fast_coef_costing);
10038 xd->cfl->store_y = 0;
10039 }
10040 #endif // CONFIG_CFL
10041 max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
10042 [pd[1].subsampling_y];
10043 init_sbuv_mode(mbmi);
10044 #if CONFIG_CB4X4
10045 if (!x->skip_chroma_rd)
10046 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
10047 &uv_skip, bsize, max_uv_tx_size);
10048 #else
10049 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
10050 &uv_skip, AOMMAX(BLOCK_8X8, bsize), max_uv_tx_size);
10051 #endif // CONFIG_CB4X4
10052
10053 if (y_skip && (uv_skip || x->skip_chroma_rd)) {
10054 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
10055 av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10056 rd_cost->dist = dist_y + dist_uv;
10057 } else {
10058 rd_cost->rate =
10059 rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10060 rd_cost->dist = dist_y + dist_uv;
10061 }
10062 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
10063 } else {
10064 rd_cost->rate = INT_MAX;
10065 }
10066
10067 #if CONFIG_INTRABC
10068 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
10069 best_rd = rd_cost->rdcost;
10070 if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
10071 ctx->skip = x->skip; // FIXME where is the proper place to set this?!
10072 assert(rd_cost->rate != INT_MAX);
10073 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
10074 }
10075 #endif
10076 if (rd_cost->rate == INT_MAX) return;
10077
10078 ctx->mic = *xd->mi[0];
10079 ctx->mbmi_ext = *x->mbmi_ext;
10080 }
10081
10082 // Do we have an internal image edge (e.g. formatting bars).
10083 int av1_internal_image_edge(const AV1_COMP *cpi) {
10084 return (cpi->oxcf.pass == 2) &&
10085 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
10086 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
10087 }
10088
10089 // Checks to see if a super block is on a horizontal image edge.
10090 // In most cases this is the "real" edge unless there are formatting
10091 // bars embedded in the stream.
10092 int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
10093 int top_edge = 0;
10094 int bottom_edge = cpi->common.mi_rows;
10095 int is_active_h_edge = 0;
10096
10097 // For two pass account for any formatting bars detected.
10098 if (cpi->oxcf.pass == 2) {
10099 const TWO_PASS *const twopass = &cpi->twopass;
10100
10101 // The inactive region is specified in MBs not mi units.
10102 // The image edge is in the following MB row.
10103 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
10104
10105 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
10106 bottom_edge = AOMMAX(top_edge, bottom_edge);
10107 }
10108
10109 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
10110 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
10111 is_active_h_edge = 1;
10112 }
10113 return is_active_h_edge;
10114 }
10115
10116 // Checks to see if a super block is on a vertical image edge.
10117 // In most cases this is the "real" edge unless there are formatting
10118 // bars embedded in the stream.
10119 int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
10120 int left_edge = 0;
10121 int right_edge = cpi->common.mi_cols;
10122 int is_active_v_edge = 0;
10123
10124 // For two pass account for any formatting bars detected.
10125 if (cpi->oxcf.pass == 2) {
10126 const TWO_PASS *const twopass = &cpi->twopass;
10127
10128 // The inactive region is specified in MBs not mi units.
10129 // The image edge is in the following MB row.
10130 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
10131
10132 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
10133 right_edge = AOMMAX(left_edge, right_edge);
10134 }
10135
10136 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
10137 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
10138 is_active_v_edge = 1;
10139 }
10140 return is_active_v_edge;
10141 }
10142
10143 // Checks to see if a super block is at the edge of the active image.
10144 // In most cases this is the "real" edge unless there are formatting
10145 // bars embedded in the stream.
10146 int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
10147 return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
10148 av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
10149 }
10150
10151 static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
10152 MACROBLOCKD *const xd = &x->e_mbd;
10153 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10154 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
10155 const BLOCK_SIZE bsize = mbmi->sb_type;
10156 assert(bsize >= BLOCK_8X8);
10157 int src_stride = x->plane[1].src.stride;
10158 const uint8_t *const src_u = x->plane[1].src.buf;
10159 const uint8_t *const src_v = x->plane[2].src.buf;
10160 float *const data = x->palette_buffer->kmeans_data_buf;
10161 float centroids[2 * PALETTE_MAX_SIZE];
10162 uint8_t *const color_map = xd->plane[1].color_index_map;
10163 int r, c;
10164 #if CONFIG_HIGHBITDEPTH
10165 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
10166 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
10167 #endif // CONFIG_HIGHBITDEPTH
10168 int plane_block_width, plane_block_height, rows, cols;
10169 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
10170 &plane_block_height, &rows, &cols);
10171 (void)cpi;
10172
10173 for (r = 0; r < rows; ++r) {
10174 for (c = 0; c < cols; ++c) {
10175 #if CONFIG_HIGHBITDEPTH
10176 if (cpi->common.use_highbitdepth) {
10177 data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
10178 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
10179 } else {
10180 #endif // CONFIG_HIGHBITDEPTH
10181 data[(r * cols + c) * 2] = src_u[r * src_stride + c];
10182 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
10183 #if CONFIG_HIGHBITDEPTH
10184 }
10185 #endif // CONFIG_HIGHBITDEPTH
10186 }
10187 }
10188
10189 for (r = 1; r < 3; ++r) {
10190 for (c = 0; c < pmi->palette_size[1]; ++c) {
10191 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
10192 }
10193 }
10194
10195 av1_calc_indices(data, centroids, color_map, rows * cols,
10196 pmi->palette_size[1], 2);
10197 extend_palette_color_map(color_map, cols, rows, plane_block_width,
10198 plane_block_height);
10199 }
10200
10201 #if CONFIG_FILTER_INTRA
10202 static void pick_filter_intra_interframe(
10203 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
10204 int mi_col, int *rate_uv_intra, int *rate_uv_tokenonly, int64_t *dist_uv,
10205 int *skip_uv, UV_PREDICTION_MODE *mode_uv,
10206 FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
10207 #if CONFIG_EXT_INTRA
10208 int8_t *uv_angle_delta,
10209 #endif // CONFIG_EXT_INTRA
10210 PALETTE_MODE_INFO *pmi_uv, int palette_ctx, int skip_mask,
10211 unsigned int *ref_costs_single, int64_t *best_rd, int64_t *best_intra_rd,
10212 PREDICTION_MODE *best_intra_mode, int *best_mode_index, int *best_skip2,
10213 int *best_mode_skippable,
10214 #if CONFIG_SUPERTX
10215 int *returnrate_nocoef,
10216 #endif // CONFIG_SUPERTX
10217 int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_STATS *rd_cost) {
10218 const AV1_COMMON *const cm = &cpi->common;
10219 MACROBLOCKD *const xd = &x->e_mbd;
10220 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10221 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
10222 const int try_palette =
10223 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
10224 int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
10225 int dc_mode_index;
10226 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
10227 int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd;
10228 int64_t distortion_uv, model_rd = INT64_MAX;
10229 TX_SIZE uv_tx;
10230
10231 for (i = 0; i < MAX_MODES; ++i)
10232 if (av1_mode_order[i].mode == DC_PRED &&
10233 av1_mode_order[i].ref_frame[0] == INTRA_FRAME)
10234 break;
10235 dc_mode_index = i;
10236 assert(i < MAX_MODES);
10237
10238 // TODO(huisu): use skip_mask for further speedup.
10239 (void)skip_mask;
10240 mbmi->mode = DC_PRED;
10241 mbmi->uv_mode = UV_DC_PRED;
10242 mbmi->ref_frame[0] = INTRA_FRAME;
10243 mbmi->ref_frame[1] = NONE_FRAME;
10244 if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
10245 &skippable, bsize, intra_mode_cost[mbmi->mode],
10246 &this_rd, &model_rd, 0)) {
10247 return;
10248 }
10249 if (rate_y == INT_MAX) return;
10250
10251 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
10252 [xd->plane[1].subsampling_y];
10253 if (rate_uv_intra[uv_tx] == INT_MAX) {
10254 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
10255 &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
10256 &skip_uv[uv_tx], &mode_uv[uv_tx]);
10257 if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
10258 filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
10259 #if CONFIG_EXT_INTRA
10260 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
10261 #endif // CONFIG_EXT_INTRA
10262 }
10263
10264 rate_uv = rate_uv_tokenonly[uv_tx];
10265 distortion_uv = dist_uv[uv_tx];
10266 skippable = skippable && skip_uv[uv_tx];
10267 mbmi->uv_mode = mode_uv[uv_tx];
10268 if (cm->allow_screen_content_tools) {
10269 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
10270 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
10271 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
10272 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
10273 }
10274 #if CONFIG_EXT_INTRA
10275 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
10276 #endif // CONFIG_EXT_INTRA
10277 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
10278 filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
10279 if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
10280 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
10281 filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
10282 }
10283
10284 rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
10285 x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
10286 if (try_palette && mbmi->mode == DC_PRED)
10287 rate2 += av1_cost_bit(
10288 av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
10289
10290 if (!xd->lossless[mbmi->segment_id]) {
10291 // super_block_yrd above includes the cost of the tx_size in the
10292 // tokenonly rate, but for intra blocks, tx_size is always coded
10293 // (prediction granularity), so we account for it in the full rate,
10294 // not the tokenonly rate.
10295 rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
10296 }
10297
10298 rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
10299 mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
10300 rate2 += write_uniform_cost(
10301 FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
10302 #if CONFIG_EXT_INTRA
10303 if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize) &&
10304 av1_use_angle_delta(bsize)) {
10305 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
10306 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
10307 }
10308 #endif // CONFIG_EXT_INTRA
10309 if (mbmi->mode == DC_PRED) {
10310 rate2 +=
10311 av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
10312 mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
10313 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
10314 rate2 +=
10315 write_uniform_cost(FILTER_INTRA_MODES,
10316 mbmi->filter_intra_mode_info.filter_intra_mode[1]);
10317 }
10318 distortion2 = distortion_y + distortion_uv;
10319 av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, 0, 0, mi_row,
10320 mi_col);
10321
10322 rate2 += ref_costs_single[INTRA_FRAME];
10323
10324 if (skippable) {
10325 rate2 -= (rate_y + rate_uv);
10326 rate_y = 0;
10327 rate_uv = 0;
10328 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10329 } else {
10330 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10331 }
10332 this_rd = RDCOST(x->rdmult, rate2, distortion2);
10333
10334 if (this_rd < *best_intra_rd) {
10335 *best_intra_rd = this_rd;
10336 *best_intra_mode = mbmi->mode;
10337 }
10338 for (i = 0; i < REFERENCE_MODES; ++i)
10339 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
10340
10341 if (this_rd < *best_rd) {
10342 *best_mode_index = dc_mode_index;
10343 mbmi->mv[0].as_int = 0;
10344 rd_cost->rate = rate2;
10345 #if CONFIG_SUPERTX
10346 if (x->skip)
10347 *returnrate_nocoef = rate2;
10348 else
10349 *returnrate_nocoef = rate2 - rate_y - rate_uv;
10350 *returnrate_nocoef -= av1_cost_bit(av1_get_skip_prob(cm, xd), skippable);
10351 *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
10352 mbmi->ref_frame[0] != INTRA_FRAME);
10353 #endif // CONFIG_SUPERTX
10354 rd_cost->dist = distortion2;
10355 rd_cost->rdcost = this_rd;
10356 *best_rd = this_rd;
10357 *best_mbmode = *mbmi;
10358 *best_skip2 = 0;
10359 *best_mode_skippable = skippable;
10360 }
10361 }
10362 #endif // CONFIG_FILTER_INTRA
10363
10364 #if CONFIG_MOTION_VAR
10365 static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
10366 const MACROBLOCKD *xd, int mi_row,
10367 int mi_col, const uint8_t *above,
10368 int above_stride, const uint8_t *left,
10369 int left_stride);
10370 #endif // CONFIG_MOTION_VAR
10371
10372 void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
10373 MACROBLOCK *x, int mi_row, int mi_col,
10374 RD_STATS *rd_cost,
10375 #if CONFIG_SUPERTX
10376 int *returnrate_nocoef,
10377 #endif // CONFIG_SUPERTX
10378 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
10379 int64_t best_rd_so_far) {
10380 const AV1_COMMON *const cm = &cpi->common;
10381 const RD_OPT *const rd_opt = &cpi->rd;
10382 const SPEED_FEATURES *const sf = &cpi->sf;
10383 MACROBLOCKD *const xd = &x->e_mbd;
10384 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10385 const int try_palette =
10386 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
10387 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
10388 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
10389 const struct segmentation *const seg = &cm->seg;
10390 PREDICTION_MODE this_mode;
10391 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
10392 unsigned char segment_id = mbmi->segment_id;
10393 int comp_pred, i, k;
10394 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
10395 #if CONFIG_COMPOUND_SINGLEREF
10396 int_mv frame_comp_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
10397 #endif // CONFIG_COMPOUND_SINGLEREF
10398 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
10399 int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
10400 int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
10401 int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
10402 static const int flag_list[TOTAL_REFS_PER_FRAME] = {
10403 0,
10404 AOM_LAST_FLAG,
10405 #if CONFIG_EXT_REFS
10406 AOM_LAST2_FLAG,
10407 AOM_LAST3_FLAG,
10408 #endif // CONFIG_EXT_REFS
10409 AOM_GOLD_FLAG,
10410 #if CONFIG_EXT_REFS
10411 AOM_BWD_FLAG,
10412 AOM_ALT2_FLAG,
10413 #endif // CONFIG_EXT_REFS
10414 AOM_ALT_FLAG
10415 };
10416 int64_t best_rd = best_rd_so_far;
10417 int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
10418 int64_t best_pred_diff[REFERENCE_MODES];
10419 int64_t best_pred_rd[REFERENCE_MODES];
10420 MB_MODE_INFO best_mbmode;
10421 int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10422 int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10423 int best_mode_skippable = 0;
10424 int midx, best_mode_index = -1;
10425 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
10426 #if CONFIG_EXT_COMP_REFS
10427 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
10428 #else
10429 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
10430 #endif // CONFIG_EXT_COMP_REFS
10431 aom_prob comp_mode_p;
10432 int64_t best_intra_rd = INT64_MAX;
10433 unsigned int best_pred_sse = UINT_MAX;
10434 PREDICTION_MODE best_intra_mode = DC_PRED;
10435 int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
10436 int64_t dist_uvs[TX_SIZES_ALL];
10437 int skip_uvs[TX_SIZES_ALL];
10438 UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
10439 PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
10440 #if CONFIG_EXT_INTRA
10441 int8_t uv_angle_delta[TX_SIZES_ALL];
10442 int is_directional_mode, angle_stats_ready = 0;
10443 uint8_t directional_mode_skip_mask[INTRA_MODES];
10444 #endif // CONFIG_EXT_INTRA
10445 #if CONFIG_FILTER_INTRA
10446 int8_t dc_skipped = 1;
10447 FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv[TX_SIZES_ALL];
10448 #endif // CONFIG_FILTER_INTRA
10449 const int intra_cost_penalty = av1_get_intra_cost_penalty(
10450 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
10451 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
10452 int best_skip2 = 0;
10453 uint16_t ref_frame_skip_mask[2] = { 0 };
10454 uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
10455 #if CONFIG_INTERINTRA
10456 MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
10457 int64_t best_single_inter_rd = INT64_MAX;
10458 #endif // CONFIG_INTERINTRA
10459 int mode_skip_start = sf->mode_skip_start + 1;
10460 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
10461 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
10462 int64_t mode_threshold[MAX_MODES];
10463 int *mode_map = tile_data->mode_map[bsize];
10464 const int mode_search_skip_flags = sf->mode_search_skip_flags;
10465 #if CONFIG_PVQ
10466 od_rollback_buffer pre_buf;
10467 #endif // CONFIG_PVQ
10468
10469 HandleInterModeArgs args = {
10470 #if CONFIG_MOTION_VAR
10471 { NULL },
10472 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
10473 { NULL },
10474 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
10475 #endif // CONFIG_MOTION_VAR
10476 NULL,
10477 NULL,
10478 NULL,
10479 { { 0 } },
10480 };
10481
10482 const int rows = block_size_high[bsize];
10483 const int cols = block_size_wide[bsize];
10484 int palette_ctx = 0;
10485 const MODE_INFO *above_mi = xd->above_mi;
10486 const MODE_INFO *left_mi = xd->left_mi;
10487 #if CONFIG_MOTION_VAR
10488 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
10489 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
10490 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
10491 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
10492
10493 #if CONFIG_HIGHBITDEPTH
10494 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
10495 int len = sizeof(uint16_t);
10496 args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
10497 args.above_pred_buf[1] =
10498 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
10499 args.above_pred_buf[2] =
10500 CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
10501 args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
10502 args.left_pred_buf[1] =
10503 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
10504 args.left_pred_buf[2] =
10505 CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
10506 } else {
10507 #endif // CONFIG_HIGHBITDEPTH
10508 args.above_pred_buf[0] = x->above_pred_buf;
10509 args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
10510 args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
10511 args.left_pred_buf[0] = x->left_pred_buf;
10512 args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
10513 args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
10514 #if CONFIG_HIGHBITDEPTH
10515 }
10516 #endif // CONFIG_HIGHBITDEPTH
10517 #endif // CONFIG_MOTION_VAR
10518
10519 av1_zero(best_mbmode);
10520
10521 av1_zero(pmi_uv);
10522 if (try_palette) {
10523 if (above_mi)
10524 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
10525 if (left_mi)
10526 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
10527 }
10528
10529 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
10530 &comp_mode_p);
10531
10532 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
10533 for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
10534 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
10535 for (i = 0; i < MB_MODE_COUNT; ++i) {
10536 for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
10537 args.single_filter[i][k] = SWITCHABLE;
10538 }
10539 }
10540
10541 rd_cost->rate = INT_MAX;
10542 #if CONFIG_SUPERTX
10543 *returnrate_nocoef = INT_MAX;
10544 #endif // CONFIG_SUPERTX
10545
10546 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
10547 x->pred_mv_sad[ref_frame] = INT_MAX;
10548 x->mbmi_ext->mode_context[ref_frame] = 0;
10549 x->mbmi_ext->compound_mode_context[ref_frame] = 0;
10550 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
10551 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
10552 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
10553 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
10554 }
10555 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
10556 #if CONFIG_GLOBAL_MOTION
10557 frame_mv[ZEROMV][ref_frame].as_int =
10558 gm_get_motion_vector(&cm->global_motion[ref_frame],
10559 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
10560 0
10561 #if CONFIG_AMVR
10562 ,
10563 cm->cur_frame_mv_precision_level
10564 #endif
10565 )
10566 .as_int;
10567 #else // CONFIG_GLOBAL_MOTION
10568 frame_mv[ZEROMV][ref_frame].as_int = 0;
10569 #endif // CONFIG_GLOBAL_MOTION
10570 frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
10571 #if CONFIG_COMPOUND_SINGLEREF
10572 frame_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
10573 frame_comp_mv[SR_NEW_NEWMV][ref_frame].as_int = INVALID_MV;
10574 #endif // CONFIG_COMPOUND_SINGLEREF
10575 #if CONFIG_GLOBAL_MOTION
10576 frame_mv[ZERO_ZEROMV][ref_frame].as_int =
10577 gm_get_motion_vector(&cm->global_motion[ref_frame],
10578 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
10579 0
10580 #if CONFIG_AMVR
10581 ,
10582 cm->cur_frame_mv_precision_level
10583 #endif
10584 )
10585 .as_int;
10586 #else // CONFIG_GLOBAL_MOTION
10587 frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
10588 #endif // CONFIG_GLOBAL_MOTION
10589 }
10590
10591 for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
10592 MODE_INFO *const mi = xd->mi[0];
10593 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
10594 x->mbmi_ext->mode_context[ref_frame] = 0;
10595 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
10596 mbmi_ext->ref_mv_stack[ref_frame],
10597 mbmi_ext->compound_mode_context, candidates, mi_row,
10598 mi_col, NULL, NULL, mbmi_ext->mode_context);
10599 if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
10600 MV_REFERENCE_FRAME rf[2];
10601 av1_set_ref_frame(rf, ref_frame);
10602 if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
10603 frame_mv[ZEROMV][rf[0]].as_int ||
10604 mbmi_ext->ref_mvs[rf[0]][1].as_int !=
10605 frame_mv[ZEROMV][rf[0]].as_int ||
10606 mbmi_ext->ref_mvs[rf[1]][0].as_int !=
10607 frame_mv[ZEROMV][rf[1]].as_int ||
10608 mbmi_ext->ref_mvs[rf[1]][1].as_int != frame_mv[ZEROMV][rf[1]].as_int)
10609 mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
10610 }
10611 }
10612
10613 #if CONFIG_MOTION_VAR
10614 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
10615
10616 if (check_num_overlappable_neighbors(mbmi) &&
10617 is_motion_variation_allowed_bsize(bsize)) {
10618 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
10619 args.above_pred_buf, dst_width1,
10620 dst_height1, args.above_pred_stride);
10621 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
10622 args.left_pred_buf, dst_width2,
10623 dst_height2, args.left_pred_stride);
10624 av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
10625 mi_col);
10626 calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
10627 args.above_pred_stride[0], args.left_pred_buf[0],
10628 args.left_pred_stride[0]);
10629 }
10630 #endif // CONFIG_MOTION_VAR
10631
10632 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
10633 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
10634 // Skip checking missing references in both single and compound reference
10635 // modes. Note that a mode will be skipped iff both reference frames
10636 // are masked out.
10637 ref_frame_skip_mask[0] |= (1 << ref_frame);
10638 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10639 } else {
10640 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
10641 // Skip fixed mv modes for poor references
10642 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
10643 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
10644 break;
10645 }
10646 }
10647 }
10648 // If the segment reference frame feature is enabled....
10649 // then do nothing if the current ref frame is not allowed..
10650 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
10651 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
10652 ref_frame_skip_mask[0] |= (1 << ref_frame);
10653 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10654 }
10655 }
10656
10657 // Disable this drop out case if the ref frame
10658 // segment level feature is enabled for this segment. This is to
10659 // prevent the possibility that we end up unable to pick any mode.
10660 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
10661 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
10662 // unless ARNR filtering is enabled in which case we want
10663 // an unfiltered alternative. We allow near/nearest as well
10664 // because they may result in zero-zero MVs but be cheaper.
10665 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
10666 int_mv zeromv;
10667 ref_frame_skip_mask[0] = (1 << LAST_FRAME) |
10668 #if CONFIG_EXT_REFS
10669 (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
10670 (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) |
10671 #endif // CONFIG_EXT_REFS
10672 (1 << GOLDEN_FRAME);
10673 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
10674 // TODO(zoeliu): To further explore whether following needs to be done for
10675 // BWDREF_FRAME as well.
10676 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
10677 #if CONFIG_GLOBAL_MOTION
10678 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
10679 cm->allow_high_precision_mv, bsize,
10680 mi_col, mi_row, 0
10681 #if CONFIG_AMVR
10682 ,
10683 cm->cur_frame_mv_precision_level
10684 #endif
10685 )
10686 .as_int;
10687 #else
10688 zeromv.as_int = 0;
10689 #endif // CONFIG_GLOBAL_MOTION
10690 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
10691 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
10692 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
10693 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
10694 if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
10695 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
10696 if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
10697 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
10698 #if CONFIG_COMPOUND_SINGLEREF
10699 if (frame_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int ||
10700 frame_comp_mv[SR_NEAREST_NEARMV][ALTREF_FRAME].as_int !=
10701 zeromv.as_int)
10702 mode_skip_mask[ALTREF_FRAME] |= (1 << SR_NEAREST_NEARMV);
10703 #endif // CONFIG_COMPOUND_SINGLEREF
10704 }
10705 }
10706
10707 if (cpi->rc.is_src_frame_alt_ref) {
10708 if (sf->alt_ref_search_fp) {
10709 assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
10710 mode_skip_mask[ALTREF_FRAME] = 0;
10711 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
10712 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
10713 }
10714 }
10715
10716 if (sf->alt_ref_search_fp)
10717 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
10718 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
10719 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
10720
10721 if (sf->adaptive_mode_search) {
10722 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
10723 cpi->rc.frames_since_golden >= 3)
10724 if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
10725 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
10726 }
10727
10728 if (bsize > sf->max_intra_bsize) {
10729 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
10730 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
10731 }
10732
10733 mode_skip_mask[INTRA_FRAME] |=
10734 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
10735
10736 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
10737 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
10738 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
10739
10740 midx = sf->schedule_mode_search ? mode_skip_start : 0;
10741 while (midx > 4) {
10742 uint8_t end_pos = 0;
10743 for (i = 5; i < midx; ++i) {
10744 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
10745 uint8_t tmp = mode_map[i];
10746 mode_map[i] = mode_map[i - 1];
10747 mode_map[i - 1] = tmp;
10748 end_pos = i;
10749 }
10750 }
10751 midx = end_pos;
10752 }
10753
10754 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
10755 x->use_default_intra_tx_type = 1;
10756 else
10757 x->use_default_intra_tx_type = 0;
10758
10759 if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
10760 x->use_default_inter_tx_type = 1;
10761 else
10762 x->use_default_inter_tx_type = 0;
10763 #if CONFIG_PVQ
10764 od_encode_checkpoint(&x->daala_enc, &pre_buf);
10765 #endif // CONFIG_PVQ
10766 for (i = 0; i < MB_MODE_COUNT; ++i)
10767 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
10768 modelled_rd[i][ref_frame] = INT64_MAX;
10769
10770 for (midx = 0; midx < MAX_MODES; ++midx) {
10771 int mode_index;
10772 int mode_excluded = 0;
10773 int64_t this_rd = INT64_MAX;
10774 int disable_skip = 0;
10775 int compmode_cost = 0;
10776 int rate2 = 0, rate_y = 0, rate_uv = 0;
10777 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
10778 int skippable = 0;
10779 int this_skip2 = 0;
10780 int64_t total_sse = INT64_MAX;
10781 uint8_t ref_frame_type;
10782 #if CONFIG_PVQ
10783 od_encode_rollback(&x->daala_enc, &pre_buf);
10784 #endif // CONFIG_PVQ
10785 mode_index = mode_map[midx];
10786 this_mode = av1_mode_order[mode_index].mode;
10787 ref_frame = av1_mode_order[mode_index].ref_frame[0];
10788 second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
10789 mbmi->ref_mv_idx = 0;
10790
10791 if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
10792 // Mode must by compatible
10793 if (!is_interintra_allowed_mode(this_mode)) continue;
10794 if (!is_interintra_allowed_bsize(bsize)) continue;
10795 }
10796
10797 if (is_inter_compound_mode(this_mode)) {
10798 frame_mv[this_mode][ref_frame].as_int =
10799 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
10800 frame_mv[this_mode][second_ref_frame].as_int =
10801 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
10802 #if CONFIG_COMPOUND_SINGLEREF
10803 } else if (is_inter_singleref_comp_mode(this_mode)) {
10804 frame_mv[this_mode][ref_frame].as_int =
10805 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
10806 frame_comp_mv[this_mode][ref_frame].as_int =
10807 frame_mv[compound_ref1_mode(this_mode)][ref_frame].as_int;
10808 #endif // CONFIG_COMPOUND_SINGLEREF
10809 }
10810
10811 // Look at the reference frame of the best mode so far and set the
10812 // skip mask to look at a subset of the remaining modes.
10813 if (midx == mode_skip_start && best_mode_index >= 0) {
10814 switch (best_mbmode.ref_frame[0]) {
10815 case INTRA_FRAME: break;
10816 case LAST_FRAME:
10817 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
10818 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10819 break;
10820 #if CONFIG_EXT_REFS
10821 case LAST2_FRAME:
10822 ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
10823 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10824 break;
10825 case LAST3_FRAME:
10826 ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
10827 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10828 break;
10829 #endif // CONFIG_EXT_REFS
10830 case GOLDEN_FRAME:
10831 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
10832 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10833 break;
10834 #if CONFIG_EXT_REFS
10835 case BWDREF_FRAME:
10836 ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
10837 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10838 break;
10839 case ALTREF2_FRAME:
10840 ref_frame_skip_mask[0] |= ALTREF2_FRAME_MODE_MASK;
10841 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10842 break;
10843 #endif // CONFIG_EXT_REFS
10844 case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
10845 #if CONFIG_EXT_REFS
10846 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
10847 #endif // CONFIG_EXT_REFS
10848 break;
10849 case NONE_FRAME:
10850 case TOTAL_REFS_PER_FRAME:
10851 assert(0 && "Invalid Reference frame");
10852 break;
10853 }
10854 }
10855
10856 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
10857 (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
10858 continue;
10859
10860 #if CONFIG_EXT_COMP_REFS
10861 // TODO(zoeliu): Following toggle between #if 0/1 and the bug will manifest
10862 // itself.
10863 #if 0
10864 if (!(cpi->ref_frame_flags & flag_list[ref_frame]) ||
10865 (second_ref_frame > INTRA_FRAME &&
10866 (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))))
10867 printf("Frame=%d, bsize=%d, (mi_row,mi_col)=(%d,%d), ref_frame=%d, "
10868 "second_ref_frame=%d\n", cm->current_video_frame, bsize, mi_row,
10869 mi_col, ref_frame, second_ref_frame);
10870
10871 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
10872 if (second_ref_frame > INTRA_FRAME &&
10873 (!(cpi->ref_frame_flags & flag_list[second_ref_frame])))
10874 continue;
10875 #endif // 0
10876
10877 #if !USE_UNI_COMP_REFS
10878 // NOTE(zoeliu): Temporarily disable uni-directional comp refs
10879 if (second_ref_frame > INTRA_FRAME) {
10880 if (!((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME)))
10881 continue;
10882 }
10883 assert(second_ref_frame <= INTRA_FRAME ||
10884 ((ref_frame < BWDREF_FRAME) ^ (second_ref_frame < BWDREF_FRAME)));
10885 #endif // !USE_UNI_COMP_REFS
10886 #endif // CONFIG_EXT_COMP_REFS
10887
10888 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
10889
10890 // Test best rd so far against threshold for trying this mode.
10891 if (best_mode_skippable && sf->schedule_mode_search)
10892 mode_threshold[mode_index] <<= 1;
10893
10894 if (best_rd < mode_threshold[mode_index]) continue;
10895
10896 // This is only used in motion vector unit test.
10897 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
10898
10899 #if CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS // Changes LL bitstream
10900 #if CONFIG_EXT_REFS
10901 if (cpi->oxcf.pass == 0) {
10902 // Complexity-compression trade-offs
10903 // if (ref_frame == ALTREF_FRAME) continue;
10904 // if (ref_frame == BWDREF_FRAME) continue;
10905 if (second_ref_frame == ALTREF_FRAME) continue;
10906 // if (second_ref_frame == BWDREF_FRAME) continue;
10907 }
10908 #endif // CONFIG_EXT_REFS
10909 #endif // CONFIG_ONE_SIDED_COMPOUND && !CONFIG_EXT_COMP_REFS
10910 comp_pred = second_ref_frame > INTRA_FRAME;
10911 if (comp_pred) {
10912 if (!cpi->allow_comp_inter_inter) continue;
10913
10914 // Skip compound inter modes if ARF is not available.
10915 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
10916
10917 // Do not allow compound prediction if the segment level reference frame
10918 // feature is in use as in this case there can only be one reference.
10919 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
10920
10921 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
10922 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
10923 continue;
10924
10925 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
10926 } else {
10927 if (ref_frame != INTRA_FRAME)
10928 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
10929 }
10930
10931 if (ref_frame == INTRA_FRAME) {
10932 if (sf->adaptive_mode_search)
10933 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
10934 continue;
10935
10936 if (this_mode != DC_PRED) {
10937 // Disable intra modes other than DC_PRED for blocks with low variance
10938 // Threshold for intra skipping based on source variance
10939 // TODO(debargha): Specialize the threshold for super block sizes
10940 const unsigned int skip_intra_var_thresh = 64;
10941 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
10942 x->source_variance < skip_intra_var_thresh)
10943 continue;
10944 // Only search the oblique modes if the best so far is
10945 // one of the neighboring directional modes
10946 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
10947 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
10948 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
10949 continue;
10950 }
10951 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
10952 if (conditional_skipintra(this_mode, best_intra_mode)) continue;
10953 }
10954 }
10955 #if CONFIG_GLOBAL_MOTION
10956 } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
10957 (!comp_pred ||
10958 cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
10959 #else // CONFIG_GLOBAL_MOTION
10960 } else {
10961 #endif // CONFIG_GLOBAL_MOTION
10962 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
10963 if (!check_best_zero_mv(cpi, x, mbmi_ext->mode_context,
10964 mbmi_ext->compound_mode_context, frame_mv,
10965 this_mode, ref_frames, bsize, -1, mi_row, mi_col))
10966 continue;
10967 }
10968
10969 mbmi->mode = this_mode;
10970 mbmi->uv_mode = UV_DC_PRED;
10971 mbmi->ref_frame[0] = ref_frame;
10972 mbmi->ref_frame[1] = second_ref_frame;
10973 pmi->palette_size[0] = 0;
10974 pmi->palette_size[1] = 0;
10975 #if CONFIG_FILTER_INTRA
10976 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10977 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10978 #endif // CONFIG_FILTER_INTRA
10979 // Evaluate all sub-pel filters irrespective of whether we can use
10980 // them for this frame.
10981
10982 set_default_interp_filters(mbmi, cm->interp_filter);
10983
10984 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
10985 mbmi->motion_mode = SIMPLE_TRANSLATION;
10986
10987 x->skip = 0;
10988 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
10989
10990 // Select prediction reference frames.
10991 for (i = 0; i < MAX_MB_PLANE; i++) {
10992 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
10993 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
10994 }
10995
10996 #if CONFIG_COMPOUND_SINGLEREF
10997 // Single ref compound mode
10998 if (!comp_pred && is_inter_singleref_comp_mode(mbmi->mode)) {
10999 xd->block_refs[1] = xd->block_refs[0];
11000 for (i = 0; i < MAX_MB_PLANE; i++)
11001 xd->plane[i].pre[1] = xd->plane[i].pre[0];
11002 }
11003 #endif // CONFIG_COMPOUND_SINGLEREF
11004
11005 #if CONFIG_INTERINTRA
11006 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
11007 #endif // CONFIG_INTERINTRA
11008
11009 if (ref_frame == INTRA_FRAME) {
11010 RD_STATS rd_stats_y;
11011 TX_SIZE uv_tx;
11012 struct macroblockd_plane *const pd = &xd->plane[1];
11013 #if CONFIG_EXT_INTRA
11014 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
11015 if (is_directional_mode && av1_use_angle_delta(bsize)) {
11016 int rate_dummy;
11017 int64_t model_rd = INT64_MAX;
11018 if (!angle_stats_ready) {
11019 const int src_stride = x->plane[0].src.stride;
11020 const uint8_t *src = x->plane[0].src.buf;
11021 #if CONFIG_HIGHBITDEPTH
11022 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
11023 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
11024 directional_mode_skip_mask);
11025 else
11026 #endif // CONFIG_HIGHBITDEPTH
11027 angle_estimation(src, src_stride, rows, cols, bsize,
11028 directional_mode_skip_mask);
11029 angle_stats_ready = 1;
11030 }
11031 if (directional_mode_skip_mask[mbmi->mode]) continue;
11032 rd_stats_y.rate = INT_MAX;
11033 rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
11034 intra_mode_cost[mbmi->mode], best_rd,
11035 &model_rd);
11036 } else {
11037 mbmi->angle_delta[0] = 0;
11038 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
11039 }
11040 #else
11041 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
11042 #endif // CONFIG_EXT_INTRA
11043 rate_y = rd_stats_y.rate;
11044 distortion_y = rd_stats_y.dist;
11045 skippable = rd_stats_y.skip;
11046
11047 if (rate_y == INT_MAX) continue;
11048
11049 #if CONFIG_FILTER_INTRA
11050 if (mbmi->mode == DC_PRED) dc_skipped = 0;
11051 #endif // CONFIG_FILTER_INTRA
11052
11053 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
11054 [pd->subsampling_y];
11055 if (rate_uv_intra[uv_tx] == INT_MAX) {
11056 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
11057 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
11058 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
11059 if (try_palette) pmi_uv[uv_tx] = *pmi;
11060
11061 #if CONFIG_EXT_INTRA
11062 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
11063 #endif // CONFIG_EXT_INTRA
11064 #if CONFIG_FILTER_INTRA
11065 filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
11066 #endif // CONFIG_FILTER_INTRA
11067 }
11068
11069 rate_uv = rate_uv_tokenonly[uv_tx];
11070 distortion_uv = dist_uvs[uv_tx];
11071 skippable = skippable && skip_uvs[uv_tx];
11072 mbmi->uv_mode = mode_uv[uv_tx];
11073 if (try_palette) {
11074 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
11075 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
11076 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
11077 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
11078 }
11079
11080 #if CONFIG_EXT_INTRA
11081 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
11082 #endif // CONFIG_EXT_INTRA
11083 #if CONFIG_FILTER_INTRA
11084 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
11085 filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
11086 if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
11087 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
11088 filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
11089 }
11090 #endif // CONFIG_FILTER_INTRA
11091
11092 #if CONFIG_CB4X4
11093 rate2 = rate_y + intra_mode_cost[mbmi->mode];
11094 if (!x->skip_chroma_rd)
11095 rate2 += rate_uv + x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
11096 #else
11097 rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
11098 x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
11099 #endif // CONFIG_CB4X4
11100
11101 if (try_palette && mbmi->mode == DC_PRED) {
11102 rate2 += av1_cost_bit(
11103 av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
11104 }
11105
11106 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
11107 // super_block_yrd above includes the cost of the tx_size in the
11108 // tokenonly rate, but for intra blocks, tx_size is always coded
11109 // (prediction granularity), so we account for it in the full rate,
11110 // not the tokenonly rate.
11111 rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
11112 }
11113 #if CONFIG_EXT_INTRA
11114 if (is_directional_mode) {
11115 #if CONFIG_INTRA_INTERP
11116 const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
11117 const int p_angle =
11118 mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
11119 if (av1_is_intra_filter_switchable(p_angle))
11120 rate2 += x->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
11121 #endif // CONFIG_INTRA_INTERP
11122 if (av1_use_angle_delta(bsize)) {
11123 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
11124 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
11125 }
11126 }
11127 if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize) &&
11128 av1_use_angle_delta(bsize)) {
11129 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
11130 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
11131 }
11132 #endif // CONFIG_EXT_INTRA
11133 #if CONFIG_FILTER_INTRA
11134 if (mbmi->mode == DC_PRED) {
11135 rate2 +=
11136 av1_cost_bit(cm->fc->filter_intra_probs[0],
11137 mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
11138 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
11139 rate2 += write_uniform_cost(
11140 FILTER_INTRA_MODES,
11141 mbmi->filter_intra_mode_info.filter_intra_mode[0]);
11142 }
11143 }
11144 if (mbmi->uv_mode == UV_DC_PRED) {
11145 rate2 +=
11146 av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
11147 mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
11148 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
11149 rate2 += write_uniform_cost(
11150 FILTER_INTRA_MODES,
11151 mbmi->filter_intra_mode_info.filter_intra_mode[1]);
11152 }
11153 #endif // CONFIG_FILTER_INTRA
11154 if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
11155 rate2 += intra_cost_penalty;
11156 distortion2 = distortion_y + distortion_uv;
11157 } else {
11158 int_mv backup_ref_mv[2];
11159
11160 if (!is_comp_ref_allowed(bsize) && mbmi->ref_frame[1] > INTRA_FRAME)
11161 continue;
11162
11163 backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
11164 if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
11165 #if CONFIG_INTERINTRA
11166 if (second_ref_frame == INTRA_FRAME) {
11167 if (best_single_inter_ref != ref_frame) continue;
11168 mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
11169 // TODO(debargha|geza.lore):
11170 // Should we use ext_intra modes for interintra?
11171 #if CONFIG_EXT_INTRA
11172 mbmi->angle_delta[0] = 0;
11173 mbmi->angle_delta[1] = 0;
11174 #if CONFIG_INTRA_INTERP
11175 mbmi->intra_filter = INTRA_FILTER_LINEAR;
11176 #endif // CONFIG_INTRA_INTERP
11177 #endif // CONFIG_EXT_INTRA
11178 #if CONFIG_FILTER_INTRA
11179 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
11180 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
11181 #endif // CONFIG_FILTER_INTRA
11182 }
11183 #endif // CONFIG_INTERINTRA
11184 mbmi->ref_mv_idx = 0;
11185 ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
11186
11187 if (comp_pred) {
11188 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
11189 int ref_mv_idx = 0;
11190 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
11191 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
11192 // mbmi->ref_mv_idx (like NEWMV)
11193 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
11194 ref_mv_idx = 1;
11195
11196 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
11197 int_mv this_mv =
11198 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
11199 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11200 xd->n8_h << MI_SIZE_LOG2, xd);
11201 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
11202 }
11203 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
11204 int_mv this_mv =
11205 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
11206 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11207 xd->n8_h << MI_SIZE_LOG2, xd);
11208 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
11209 }
11210 }
11211 #if CONFIG_COMPOUND_SINGLEREF
11212 } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
11213 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
11214 // TODO(zoeliu): To further investigate which ref_mv_idx should be
11215 // chosen for the mode of SR_NEAR_NEWMV.
11216 int ref_mv_idx = 0;
11217 // Special case: SR_NEAR_NEWMV mode use
11218 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
11219 // mbmi->ref_mv_idx (like NEWMV)
11220 if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1;
11221
11222 if (compound_ref0_mode(mbmi->mode) == NEWMV ||
11223 compound_ref1_mode(mbmi->mode) == NEWMV) {
11224 int_mv this_mv =
11225 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
11226 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11227 xd->n8_h << MI_SIZE_LOG2, xd);
11228 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
11229 }
11230 }
11231 #endif // CONFIG_COMPOUND_SINGLEREF
11232 } else {
11233 if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
11234 int ref;
11235 for (ref = 0; ref < 1 + comp_pred; ++ref) {
11236 int_mv this_mv =
11237 (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
11238 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
11239 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11240 xd->n8_h << MI_SIZE_LOG2, xd);
11241 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
11242 }
11243 }
11244 }
11245 {
11246 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
11247 av1_init_rd_stats(&rd_stats);
11248 rd_stats.rate = rate2;
11249
11250 // Point to variables that are maintained between loop iterations
11251 args.single_newmv = single_newmv;
11252 args.single_newmv_rate = single_newmv_rate;
11253 args.modelled_rd = modelled_rd;
11254 this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
11255 &rd_stats_uv, &disable_skip, frame_mv,
11256 #if CONFIG_COMPOUND_SINGLEREF
11257 frame_comp_mv,
11258 #endif // CONFIG_COMPOUND_SINGLEREF
11259 mi_row, mi_col, &args, best_rd);
11260
11261 rate2 = rd_stats.rate;
11262 skippable = rd_stats.skip;
11263 distortion2 = rd_stats.dist;
11264 total_sse = rd_stats.sse;
11265 rate_y = rd_stats_y.rate;
11266 rate_uv = rd_stats_uv.rate;
11267 }
11268
11269 // TODO(jingning): This needs some refactoring to improve code quality
11270 // and reduce redundant steps.
11271 #if CONFIG_COMPOUND_SINGLEREF
11272 if ((have_nearmv_in_inter_mode(mbmi->mode) &&
11273 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
11274 ((mbmi->mode == NEWMV || mbmi->mode == SR_NEW_NEWMV ||
11275 mbmi->mode == NEW_NEWMV) &&
11276 mbmi_ext->ref_mv_count[ref_frame_type] > 1))
11277 #else // !CONFIG_COMPOUND_SINGLEREF
11278 if ((have_nearmv_in_inter_mode(mbmi->mode) &&
11279 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
11280 ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
11281 mbmi_ext->ref_mv_count[ref_frame_type] > 1))
11282 #endif // CONFIG_COMPOUND_SINGLEREF
11283 {
11284 int_mv backup_mv = frame_mv[NEARMV][ref_frame];
11285 MB_MODE_INFO backup_mbmi = *mbmi;
11286 int backup_skip = x->skip;
11287 int64_t tmp_ref_rd = this_rd;
11288 int ref_idx;
11289
11290 // TODO(jingning): This should be deprecated shortly.
11291 int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
11292 int ref_set =
11293 AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
11294
11295 uint8_t drl_ctx =
11296 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
11297 // Dummy
11298 int_mv backup_fmv[2];
11299 backup_fmv[0] = frame_mv[NEWMV][ref_frame];
11300 if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
11301
11302 rate2 += (rate2 < INT_MAX ? x->drl_mode_cost0[drl_ctx][0] : 0);
11303
11304 if (this_rd < INT64_MAX) {
11305 if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
11306 RDCOST(x->rdmult, 0, total_sse))
11307 tmp_ref_rd = RDCOST(
11308 x->rdmult, rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
11309 distortion2);
11310 else
11311 tmp_ref_rd =
11312 RDCOST(x->rdmult,
11313 rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
11314 rate_y - rate_uv,
11315 total_sse);
11316 }
11317 #if CONFIG_VAR_TX
11318 for (i = 0; i < MAX_MB_PLANE; ++i)
11319 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
11320 sizeof(uint8_t) * ctx->num_4x4_blk);
11321 #endif // CONFIG_VAR_TX
11322
11323 for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
11324 int64_t tmp_alt_rd = INT64_MAX;
11325 int dummy_disable_skip = 0;
11326 int ref;
11327 int_mv cur_mv;
11328 RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
11329
11330 av1_invalid_rd_stats(&tmp_rd_stats);
11331
11332 x->skip = 0;
11333
11334 mbmi->ref_mv_idx = 1 + ref_idx;
11335
11336 if (comp_pred) {
11337 int ref_mv_idx = mbmi->ref_mv_idx;
11338 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
11339 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
11340 // mbmi->ref_mv_idx (like NEWMV)
11341 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
11342 ref_mv_idx = 1 + mbmi->ref_mv_idx;
11343
11344 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
11345 int_mv this_mv =
11346 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
11347 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11348 xd->n8_h << MI_SIZE_LOG2, xd);
11349 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
11350 } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
11351 int_mv this_mv =
11352 mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
11353 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11354 xd->n8_h << MI_SIZE_LOG2, xd);
11355 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
11356 }
11357
11358 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
11359 int_mv this_mv =
11360 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
11361 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11362 xd->n8_h << MI_SIZE_LOG2, xd);
11363 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
11364 } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
11365 int_mv this_mv =
11366 mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
11367 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11368 xd->n8_h << MI_SIZE_LOG2, xd);
11369 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
11370 }
11371 #if CONFIG_COMPOUND_SINGLEREF
11372 } else if (is_inter_singleref_comp_mode(mbmi->mode)) {
11373 int ref_mv_idx = mbmi->ref_mv_idx;
11374 // Special case: SR_NEAR_NEWMV mode use
11375 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
11376 // mbmi->ref_mv_idx (like NEWMV)
11377 if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx = 1 + mbmi->ref_mv_idx;
11378
11379 // TODO(zoeliu): For the mode of SR_NEAREST_NEWMV, as it only runs
11380 // the "if", not the "else if",
11381 // mbmi_ext->ref_mvs[mbmi->ref_frame[0]] takes the
11382 // value for "NEWMV", instead of "NEARESTMV".
11383 if (compound_ref0_mode(mbmi->mode) == NEWMV ||
11384 compound_ref1_mode(mbmi->mode) == NEWMV) {
11385 int_mv this_mv =
11386 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
11387 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11388 xd->n8_h << MI_SIZE_LOG2, xd);
11389 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
11390 } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV ||
11391 compound_ref1_mode(mbmi->mode) == NEARESTMV) {
11392 int_mv this_mv =
11393 mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
11394 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11395 xd->n8_h << MI_SIZE_LOG2, xd);
11396 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
11397 }
11398 #endif // CONFIG_COMPOUND_SINGLEREF
11399 } else {
11400 for (ref = 0; ref < 1 + comp_pred; ++ref) {
11401 int_mv this_mv =
11402 (ref == 0)
11403 ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
11404 .this_mv
11405 : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
11406 .comp_mv;
11407 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
11408 xd->n8_h << MI_SIZE_LOG2, xd);
11409 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
11410 }
11411 }
11412
11413 cur_mv =
11414 mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
11415 .this_mv;
11416 clamp_mv2(&cur_mv.as_mv, xd);
11417
11418 if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
11419 int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
11420 int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
11421
11422 frame_mv[NEARMV][ref_frame] = cur_mv;
11423 av1_init_rd_stats(&tmp_rd_stats);
11424
11425 // Point to variables that are not maintained between iterations
11426 args.single_newmv = dummy_single_newmv;
11427 args.single_newmv_rate = dummy_single_newmv_rate;
11428 args.modelled_rd = NULL;
11429 tmp_alt_rd = handle_inter_mode(cpi, x, bsize, &tmp_rd_stats,
11430 &tmp_rd_stats_y, &tmp_rd_stats_uv,
11431 &dummy_disable_skip, frame_mv,
11432 #if CONFIG_COMPOUND_SINGLEREF
11433 frame_comp_mv,
11434 #endif // CONFIG_COMPOUND_SINGLEREF
11435 mi_row, mi_col, &args, best_rd);
11436 // Prevent pointers from escaping local scope
11437 args.single_newmv = NULL;
11438 args.single_newmv_rate = NULL;
11439 }
11440
11441 for (i = 0; i < mbmi->ref_mv_idx; ++i) {
11442 uint8_t drl1_ctx = 0;
11443 drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
11444 i + idx_offset);
11445 tmp_rd_stats.rate +=
11446 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][1]
11447 : 0);
11448 }
11449
11450 if (mbmi_ext->ref_mv_count[ref_frame_type] >
11451 mbmi->ref_mv_idx + idx_offset + 1 &&
11452 ref_idx < ref_set - 1) {
11453 uint8_t drl1_ctx =
11454 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
11455 mbmi->ref_mv_idx + idx_offset);
11456 tmp_rd_stats.rate +=
11457 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][0]
11458 : 0);
11459 }
11460
11461 if (tmp_alt_rd < INT64_MAX) {
11462 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11463 tmp_alt_rd =
11464 RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist);
11465 #else
11466 if (RDCOST(x->rdmult, tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
11467 tmp_rd_stats.dist) <
11468 RDCOST(x->rdmult, 0, tmp_rd_stats.sse))
11469 tmp_alt_rd =
11470 RDCOST(x->rdmult,
11471 tmp_rd_stats.rate +
11472 av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
11473 tmp_rd_stats.dist);
11474 else
11475 tmp_alt_rd =
11476 RDCOST(x->rdmult,
11477 tmp_rd_stats.rate +
11478 av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
11479 tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
11480 tmp_rd_stats.sse);
11481 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11482 }
11483
11484 if (tmp_ref_rd > tmp_alt_rd) {
11485 rate2 = tmp_rd_stats.rate;
11486 disable_skip = dummy_disable_skip;
11487 distortion2 = tmp_rd_stats.dist;
11488 skippable = tmp_rd_stats.skip;
11489 rate_y = tmp_rd_stats_y.rate;
11490 rate_uv = tmp_rd_stats_uv.rate;
11491 total_sse = tmp_rd_stats.sse;
11492 this_rd = tmp_alt_rd;
11493 tmp_ref_rd = tmp_alt_rd;
11494 backup_mbmi = *mbmi;
11495 backup_skip = x->skip;
11496 #if CONFIG_VAR_TX
11497 for (i = 0; i < MAX_MB_PLANE; ++i)
11498 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
11499 sizeof(uint8_t) * ctx->num_4x4_blk);
11500 #endif // CONFIG_VAR_TX
11501 } else {
11502 *mbmi = backup_mbmi;
11503 x->skip = backup_skip;
11504 }
11505 }
11506
11507 frame_mv[NEARMV][ref_frame] = backup_mv;
11508 frame_mv[NEWMV][ref_frame] = backup_fmv[0];
11509 if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
11510 #if CONFIG_VAR_TX
11511 for (i = 0; i < MAX_MB_PLANE; ++i)
11512 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
11513 sizeof(uint8_t) * ctx->num_4x4_blk);
11514 #endif // CONFIG_VAR_TX
11515 }
11516 mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
11517 if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
11518
11519 if (this_rd == INT64_MAX) continue;
11520
11521 if (is_comp_ref_allowed(mbmi->sb_type))
11522 compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
11523
11524 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
11525 }
11526
11527 // Estimate the reference frame signaling cost and add it
11528 // to the rolling cost variable.
11529 if (comp_pred) {
11530 #if CONFIG_EXT_COMP_REFS
11531 rate2 += ref_costs_comp[ref_frame][second_ref_frame];
11532 #else // !CONFIG_EXT_COMP_REFS
11533 rate2 += ref_costs_comp[ref_frame];
11534 #if CONFIG_EXT_REFS
11535 rate2 += ref_costs_comp[second_ref_frame];
11536 #endif // CONFIG_EXT_REFS
11537 #endif // CONFIG_EXT_COMP_REFS
11538 } else {
11539 rate2 += ref_costs_single[ref_frame];
11540 }
11541
11542 #if CONFIG_COMPOUND_SINGLEREF
11543 // Add the cost to signal single/comp mode in single ref.
11544 if (!comp_pred && cm->reference_mode != COMPOUND_REFERENCE) {
11545 aom_prob singleref_comp_mode_p = av1_get_inter_mode_prob(cm, xd);
11546 rate2 += av1_cost_bit(singleref_comp_mode_p,
11547 is_inter_singleref_comp_mode(mbmi->mode));
11548 }
11549 #endif // CONFIG_COMPOUND_SINGLEREF
11550
11551 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11552 if (ref_frame == INTRA_FRAME)
11553 #else
11554 if (!disable_skip)
11555 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11556 {
11557 if (skippable) {
11558 // Back out the coefficient coding costs
11559 rate2 -= (rate_y + rate_uv);
11560 rate_y = 0;
11561 rate_uv = 0;
11562 // Cost the skip mb case
11563 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
11564 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
11565 if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) <
11566 RDCOST(x->rdmult, rate_skip1, total_sse)) {
11567 // Add in the cost of the no skip flag.
11568 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
11569 } else {
11570 // FIXME(rbultje) make this work for splitmv also
11571 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
11572 distortion2 = total_sse;
11573 assert(total_sse >= 0);
11574 rate2 -= (rate_y + rate_uv);
11575 this_skip2 = 1;
11576 rate_y = 0;
11577 rate_uv = 0;
11578 }
11579 } else {
11580 // Add in the cost of the no skip flag.
11581 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
11582 }
11583
11584 // Calculate the final RD estimate for this mode.
11585 this_rd = RDCOST(x->rdmult, rate2, distortion2);
11586 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11587 } else {
11588 this_skip2 = mbmi->skip;
11589 this_rd = RDCOST(x->rdmult, rate2, distortion2);
11590 if (this_skip2) {
11591 rate_y = 0;
11592 rate_uv = 0;
11593 }
11594 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11595 }
11596
11597 if (ref_frame == INTRA_FRAME) {
11598 // Keep record of best intra rd
11599 if (this_rd < best_intra_rd) {
11600 best_intra_rd = this_rd;
11601 best_intra_mode = mbmi->mode;
11602 }
11603 #if CONFIG_INTERINTRA
11604 } else if (second_ref_frame == NONE_FRAME) {
11605 if (this_rd < best_single_inter_rd) {
11606 best_single_inter_rd = this_rd;
11607 best_single_inter_ref = mbmi->ref_frame[0];
11608 }
11609 #endif // CONFIG_INTERINTRA
11610 }
11611
11612 if (!disable_skip && ref_frame == INTRA_FRAME) {
11613 for (i = 0; i < REFERENCE_MODES; ++i)
11614 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
11615 }
11616
11617 // Did this mode help.. i.e. is it the new best mode
11618 if (this_rd < best_rd || x->skip) {
11619 if (!mode_excluded) {
11620 // Note index of best mode so far
11621 best_mode_index = mode_index;
11622
11623 if (ref_frame == INTRA_FRAME) {
11624 /* required for left and above block mv */
11625 mbmi->mv[0].as_int = 0;
11626 } else {
11627 best_pred_sse = x->pred_sse[ref_frame];
11628 }
11629
11630 rd_cost->rate = rate2;
11631 #if CONFIG_SUPERTX
11632 if (x->skip)
11633 *returnrate_nocoef = rate2;
11634 else
11635 *returnrate_nocoef = rate2 - rate_y - rate_uv;
11636 *returnrate_nocoef -= av1_cost_bit(
11637 av1_get_skip_prob(cm, xd), disable_skip || skippable || this_skip2);
11638 *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
11639 mbmi->ref_frame[0] != INTRA_FRAME);
11640 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11641 #if CONFIG_WARPED_MOTION
11642 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
11643 #endif
11644 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
11645 MODE_INFO *const mi = xd->mi[0];
11646 const MOTION_MODE motion_allowed = motion_mode_allowed(
11647 #if CONFIG_GLOBAL_MOTION
11648 0, xd->global_motion,
11649 #endif // CONFIG_GLOBAL_MOTION
11650 #if CONFIG_WARPED_MOTION
11651 xd,
11652 #endif
11653 mi);
11654 if (motion_allowed == WARPED_CAUSAL)
11655 *returnrate_nocoef -= x->motion_mode_cost[bsize][mbmi->motion_mode];
11656 else if (motion_allowed == OBMC_CAUSAL)
11657 *returnrate_nocoef -= x->motion_mode_cost1[bsize][mbmi->motion_mode];
11658 #else
11659 *returnrate_nocoef -= x->motion_mode_cost[bsize][mbmi->motion_mode];
11660 #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
11661 #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
11662 #endif // CONFIG_SUPERTX
11663 rd_cost->dist = distortion2;
11664 rd_cost->rdcost = this_rd;
11665 best_rd = this_rd;
11666 best_mbmode = *mbmi;
11667 best_skip2 = this_skip2;
11668 best_mode_skippable = skippable;
11669 best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
11670 this_skip2 || skippable);
11671 best_rate_uv = rate_uv;
11672 #if CONFIG_VAR_TX
11673 for (i = 0; i < MAX_MB_PLANE; ++i)
11674 memcpy(ctx->blk_skip[i], x->blk_skip[i],
11675 sizeof(uint8_t) * ctx->num_4x4_blk);
11676 #endif // CONFIG_VAR_TX
11677 }
11678 }
11679
11680 /* keep record of best compound/single-only prediction */
11681 if (!disable_skip && ref_frame != INTRA_FRAME) {
11682 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
11683
11684 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
11685 single_rate = rate2 - compmode_cost;
11686 hybrid_rate = rate2;
11687 } else {
11688 single_rate = rate2;
11689 hybrid_rate = rate2 + compmode_cost;
11690 }
11691
11692 single_rd = RDCOST(x->rdmult, single_rate, distortion2);
11693 hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
11694
11695 if (!comp_pred) {
11696 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
11697 best_pred_rd[SINGLE_REFERENCE] = single_rd;
11698 } else {
11699 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
11700 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
11701 }
11702 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
11703 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
11704 }
11705
11706 if (x->skip && !comp_pred) break;
11707 }
11708
11709 if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
11710 ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
11711 is_inter_mode(best_mbmode.mode)) ||
11712 (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
11713 !is_inter_mode(best_mbmode.mode)))) {
11714 int skip_blk = 0;
11715 RD_STATS rd_stats_y, rd_stats_uv;
11716
11717 x->use_default_inter_tx_type = 0;
11718 x->use_default_intra_tx_type = 0;
11719
11720 *mbmi = best_mbmode;
11721
11722 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
11723
11724 // Select prediction reference frames.
11725 for (i = 0; i < MAX_MB_PLANE; i++) {
11726 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
11727 if (has_second_ref(mbmi))
11728 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
11729 }
11730
11731 #if CONFIG_COMPOUND_SINGLEREF
11732 // Single ref compound mode
11733 if (!has_second_ref(mbmi) && is_inter_singleref_comp_mode(mbmi->mode)) {
11734 xd->block_refs[1] = xd->block_refs[0];
11735 for (i = 0; i < MAX_MB_PLANE; i++)
11736 xd->plane[i].pre[1] = xd->plane[i].pre[0];
11737 }
11738 #endif // CONFIG_COMPOUND_SINGLEREF
11739
11740 if (is_inter_mode(mbmi->mode)) {
11741 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
11742 #if CONFIG_MOTION_VAR
11743 if (mbmi->motion_mode == OBMC_CAUSAL) {
11744 av1_build_obmc_inter_prediction(
11745 cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
11746 args.left_pred_buf, args.left_pred_stride);
11747 }
11748 #endif // CONFIG_MOTION_VAR
11749 av1_subtract_plane(x, bsize, 0);
11750 #if CONFIG_VAR_TX
11751 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
11752 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
11753 assert(rd_stats_y.rate != INT_MAX);
11754 } else {
11755 int idx, idy;
11756 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
11757 for (idy = 0; idy < xd->n8_h; ++idy)
11758 for (idx = 0; idx < xd->n8_w; ++idx)
11759 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
11760 memset(x->blk_skip[0], rd_stats_y.skip,
11761 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
11762 }
11763
11764 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
11765 #else
11766 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
11767 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
11768 #endif // CONFIG_VAR_TX
11769 } else {
11770 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
11771 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
11772 }
11773
11774 if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
11775 (rd_stats_y.dist + rd_stats_uv.dist)) >
11776 RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
11777 skip_blk = 1;
11778 rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
11779 rd_stats_uv.rate = 0;
11780 rd_stats_y.dist = rd_stats_y.sse;
11781 rd_stats_uv.dist = rd_stats_uv.sse;
11782 } else {
11783 skip_blk = 0;
11784 rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
11785 }
11786
11787 if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
11788 RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
11789 (rd_stats_y.dist + rd_stats_uv.dist))) {
11790 #if CONFIG_VAR_TX
11791 int idx, idy;
11792 #endif // CONFIG_VAR_TX
11793 best_mbmode.tx_type = mbmi->tx_type;
11794 best_mbmode.tx_size = mbmi->tx_size;
11795 #if CONFIG_LGT_FROM_PRED
11796 best_mbmode.use_lgt = mbmi->use_lgt;
11797 #endif
11798 #if CONFIG_VAR_TX
11799 for (idy = 0; idy < xd->n8_h; ++idy)
11800 for (idx = 0; idx < xd->n8_w; ++idx)
11801 best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
11802
11803 for (i = 0; i < MAX_MB_PLANE; ++i)
11804 memcpy(ctx->blk_skip[i], x->blk_skip[i],
11805 sizeof(uint8_t) * ctx->num_4x4_blk);
11806
11807 best_mbmode.min_tx_size = mbmi->min_tx_size;
11808 #endif // CONFIG_VAR_TX
11809 rd_cost->rate +=
11810 (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
11811 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
11812 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
11813 best_skip2 = skip_blk;
11814 }
11815 }
11816
11817 // Only try palette mode when the best mode so far is an intra mode.
11818 if (try_palette && !is_inter_mode(best_mbmode.mode)) {
11819 int rate2 = 0;
11820 #if CONFIG_SUPERTX
11821 int best_rate_nocoef;
11822 #endif // CONFIG_SUPERTX
11823 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
11824 best_model_rd_palette = INT64_MAX;
11825 int skippable = 0, rate_overhead_palette = 0;
11826 RD_STATS rd_stats_y;
11827 TX_SIZE uv_tx;
11828 uint8_t *const best_palette_color_map =
11829 x->palette_buffer->best_palette_color_map;
11830 uint8_t *const color_map = xd->plane[0].color_index_map;
11831 MB_MODE_INFO best_mbmi_palette = best_mbmode;
11832
11833 mbmi->mode = DC_PRED;
11834 mbmi->uv_mode = UV_DC_PRED;
11835 mbmi->ref_frame[0] = INTRA_FRAME;
11836 mbmi->ref_frame[1] = NONE_FRAME;
11837 rate_overhead_palette = rd_pick_palette_intra_sby(
11838 cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
11839 &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
11840 &best_model_rd_palette, NULL, NULL, NULL, NULL);
11841 if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
11842 memcpy(color_map, best_palette_color_map,
11843 rows * cols * sizeof(best_palette_color_map[0]));
11844 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
11845 if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
11846 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
11847 [xd->plane[1].subsampling_y];
11848 if (rate_uv_intra[uv_tx] == INT_MAX) {
11849 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
11850 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
11851 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
11852 pmi_uv[uv_tx] = *pmi;
11853 #if CONFIG_EXT_INTRA
11854 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
11855 #endif // CONFIG_EXT_INTRA
11856 #if CONFIG_FILTER_INTRA
11857 filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
11858 #endif // CONFIG_FILTER_INTRA
11859 }
11860 mbmi->uv_mode = mode_uv[uv_tx];
11861 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
11862 if (pmi->palette_size[1] > 0) {
11863 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
11864 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
11865 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
11866 }
11867 #if CONFIG_EXT_INTRA
11868 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
11869 #endif // CONFIG_EXT_INTRA
11870 #if CONFIG_FILTER_INTRA
11871 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
11872 filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
11873 if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
11874 mbmi->filter_intra_mode_info.filter_intra_mode[1] =
11875 filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
11876 }
11877 #endif // CONFIG_FILTER_INTRA
11878 skippable = rd_stats_y.skip && skip_uvs[uv_tx];
11879 distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
11880 rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
11881 rate2 += ref_costs_single[INTRA_FRAME];
11882
11883 if (skippable) {
11884 rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
11885 #if CONFIG_SUPERTX
11886 best_rate_nocoef = rate2;
11887 #endif // CONFIG_SUPERTX
11888 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
11889 } else {
11890 #if CONFIG_SUPERTX
11891 best_rate_nocoef = rate2 - (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
11892 #endif // CONFIG_SUPERTX
11893 rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
11894 }
11895 this_rd = RDCOST(x->rdmult, rate2, distortion2);
11896 if (this_rd < best_rd) {
11897 best_mode_index = 3;
11898 mbmi->mv[0].as_int = 0;
11899 rd_cost->rate = rate2;
11900 #if CONFIG_SUPERTX
11901 *returnrate_nocoef = best_rate_nocoef;
11902 #endif // CONFIG_SUPERTX
11903 rd_cost->dist = distortion2;
11904 rd_cost->rdcost = this_rd;
11905 best_rd = this_rd;
11906 best_mbmode = *mbmi;
11907 best_skip2 = 0;
11908 best_mode_skippable = skippable;
11909 }
11910 }
11911 PALETTE_EXIT:
11912
11913 #if CONFIG_FILTER_INTRA
11914 // TODO(huisu): filter-intra is turned off in lossless mode for now to
11915 // avoid a unit test failure
11916 if (!xd->lossless[mbmi->segment_id] && pmi->palette_size[0] == 0 &&
11917 !dc_skipped && best_mode_index >= 0 &&
11918 best_intra_rd < (best_rd + (best_rd >> 3))) {
11919 pick_filter_intra_interframe(
11920 cpi, x, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
11921 dist_uvs, skip_uvs, mode_uv, filter_intra_mode_info_uv,
11922 #if CONFIG_EXT_INTRA
11923 uv_angle_delta,
11924 #endif // CONFIG_EXT_INTRA
11925 pmi_uv, palette_ctx, 0, ref_costs_single, &best_rd, &best_intra_rd,
11926 &best_intra_mode, &best_mode_index, &best_skip2, &best_mode_skippable,
11927 #if CONFIG_SUPERTX
11928 returnrate_nocoef,
11929 #endif // CONFIG_SUPERTX
11930 best_pred_rd, &best_mbmode, rd_cost);
11931 }
11932 #endif // CONFIG_FILTER_INTRA
11933
11934 // The inter modes' rate costs are not calculated precisely in some cases.
11935 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
11936 // ZEROMV. Here, checks are added for those cases, and the mode decisions
11937 // are corrected.
11938 #if CONFIG_COMPOUND_SINGLEREF
11939 // NOTE: For SR_NEW_NEWMV, no need to check as the two mvs from the same ref
11940 // are surely different from each other.
11941 #endif // CONFIG_COMPOUND_SINGLEREF
11942 if (best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV) {
11943 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
11944 best_mbmode.ref_frame[1] };
11945 int comp_pred_mode = refs[1] > INTRA_FRAME;
11946 int_mv zeromv[2];
11947 const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
11948 #if CONFIG_GLOBAL_MOTION
11949 zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
11950 cm->allow_high_precision_mv, bsize,
11951 mi_col, mi_row, 0
11952 #if CONFIG_AMVR
11953 ,
11954 cm->cur_frame_mv_precision_level
11955 #endif
11956 )
11957 .as_int;
11958 zeromv[1].as_int =
11959 comp_pred_mode
11960 ? gm_get_motion_vector(&cm->global_motion[refs[1]],
11961 cm->allow_high_precision_mv, bsize, mi_col,
11962 mi_row, 0
11963 #if CONFIG_AMVR
11964 ,
11965 cm->cur_frame_mv_precision_level
11966 #endif
11967 )
11968 .as_int
11969 : 0;
11970 #else
11971 zeromv[0].as_int = 0;
11972 zeromv[1].as_int = 0;
11973 #endif // CONFIG_GLOBAL_MOTION
11974 if (!comp_pred_mode) {
11975 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
11976 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
11977 : INT_MAX;
11978
11979 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
11980 int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
11981 if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
11982 best_mbmode.mode = NEARMV;
11983 best_mbmode.ref_mv_idx = i;
11984 }
11985 }
11986
11987 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
11988 best_mbmode.mode = NEARESTMV;
11989 else if (best_mbmode.mv[0].as_int == zeromv[0].as_int)
11990 best_mbmode.mode = ZEROMV;
11991 } else {
11992 int_mv nearestmv[2];
11993 int_mv nearmv[2];
11994
11995 if (mbmi_ext->ref_mv_count[rf_type] > 1) {
11996 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
11997 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
11998 } else {
11999 nearmv[0] = frame_mv[NEARMV][refs[0]];
12000 nearmv[1] = frame_mv[NEARMV][refs[1]];
12001 }
12002 if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
12003 nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
12004 nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
12005 } else {
12006 nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
12007 nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
12008 }
12009
12010 if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
12011 nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
12012 best_mbmode.mode = NEAREST_NEARESTMV;
12013 } else {
12014 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
12015 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
12016 : INT_MAX;
12017
12018 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
12019 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
12020 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
12021
12022 // Try switching to the NEAR_NEARMV mode
12023 if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
12024 nearmv[1].as_int == best_mbmode.mv[1].as_int) {
12025 best_mbmode.mode = NEAR_NEARMV;
12026 best_mbmode.ref_mv_idx = i;
12027 }
12028 }
12029
12030 if (best_mbmode.mode == NEW_NEWMV &&
12031 best_mbmode.mv[0].as_int == zeromv[0].as_int &&
12032 best_mbmode.mv[1].as_int == zeromv[1].as_int)
12033 best_mbmode.mode = ZERO_ZEROMV;
12034 }
12035 }
12036 }
12037
12038 // Make sure that the ref_mv_idx is only nonzero when we're
12039 // using a mode which can support ref_mv_idx
12040 if (best_mbmode.ref_mv_idx != 0 &&
12041 #if CONFIG_COMPOUND_SINGLEREF
12042 !(best_mbmode.mode == NEWMV || best_mbmode.mode == SR_NEW_NEWMV ||
12043 best_mbmode.mode == NEW_NEWMV ||
12044 have_nearmv_in_inter_mode(best_mbmode.mode)))
12045 #else // !CONFIG_COMPOUND_SINGLEREF
12046 !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
12047 have_nearmv_in_inter_mode(best_mbmode.mode)))
12048 #endif // CONFIG_COMPOUND_SINGLEREF
12049 {
12050 best_mbmode.ref_mv_idx = 0;
12051 }
12052
12053 if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
12054 best_mbmode.ref_frame[1] <= INTRA_FRAME) {
12055 int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
12056 int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
12057 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
12058 int_mv zeromv;
12059 #if CONFIG_GLOBAL_MOTION
12060 const MV_REFERENCE_FRAME ref = best_mbmode.ref_frame[0];
12061 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
12062 cm->allow_high_precision_mv, bsize,
12063 mi_col, mi_row, 0
12064 #if CONFIG_AMVR
12065 ,
12066 cm->cur_frame_mv_precision_level
12067 #endif
12068 )
12069 .as_int;
12070 #else
12071 zeromv.as_int = 0;
12072 #endif // CONFIG_GLOBAL_MOTION
12073 if (best_mbmode.mv[0].as_int == zeromv.as_int) {
12074 best_mbmode.mode = ZEROMV;
12075 }
12076 }
12077 }
12078
12079 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
12080 rd_cost->rate = INT_MAX;
12081 rd_cost->rdcost = INT64_MAX;
12082 return;
12083 }
12084
12085 assert((cm->interp_filter == SWITCHABLE) ||
12086 (cm->interp_filter ==
12087 av1_extract_interp_filter(best_mbmode.interp_filters, 0)) ||
12088 !is_inter_block(&best_mbmode));
12089 #if CONFIG_DUAL_FILTER
12090 assert((cm->interp_filter == SWITCHABLE) ||
12091 (cm->interp_filter ==
12092 av1_extract_interp_filter(best_mbmode.interp_filters, 1)) ||
12093 !is_inter_block(&best_mbmode));
12094 #endif // CONFIG_DUAL_FILTER
12095
12096 if (!cpi->rc.is_src_frame_alt_ref)
12097 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
12098 sf->adaptive_rd_thresh, bsize, best_mode_index);
12099
12100 // macroblock modes
12101 *mbmi = best_mbmode;
12102 x->skip |= best_skip2;
12103
12104 // Note: this section is needed since the mode may have been forced to
12105 // ZEROMV by the all-zero mode handling of ref-mv.
12106 #if CONFIG_GLOBAL_MOTION
12107 if (mbmi->mode == ZEROMV || mbmi->mode == ZERO_ZEROMV) {
12108 #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
12109 // Correct the motion mode for ZEROMV
12110 const MOTION_MODE last_motion_mode_allowed =
12111 motion_mode_allowed(0, xd->global_motion,
12112 #if CONFIG_WARPED_MOTION
12113 xd,
12114 #endif
12115 xd->mi[0]);
12116 if (mbmi->motion_mode > last_motion_mode_allowed)
12117 mbmi->motion_mode = last_motion_mode_allowed;
12118 #endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
12119
12120 // Correct the interpolation filter for ZEROMV
12121 if (is_nontrans_global_motion(xd)) {
12122 mbmi->interp_filters = av1_broadcast_interp_filter(
12123 av1_unswitchable_filter(cm->interp_filter));
12124 }
12125 }
12126 #endif // CONFIG_GLOBAL_MOTION
12127
12128 for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
12129 if (mbmi->mode != NEWMV)
12130 mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
12131 else
12132 mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
12133 }
12134
12135 for (i = 0; i < REFERENCE_MODES; ++i) {
12136 if (best_pred_rd[i] == INT64_MAX)
12137 best_pred_diff[i] = INT_MIN;
12138 else
12139 best_pred_diff[i] = best_rd - best_pred_rd[i];
12140 }
12141
12142 x->skip |= best_mode_skippable;
12143
12144 assert(best_mode_index >= 0);
12145
12146 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
12147 best_mode_skippable);
12148
12149 if (pmi->palette_size[1] > 0) {
12150 assert(try_palette);
12151 restore_uv_color_map(cpi, x);
12152 }
12153 }
12154
12155 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
12156 TileDataEnc *tile_data, MACROBLOCK *x,
12157 int mi_row, int mi_col,
12158 RD_STATS *rd_cost, BLOCK_SIZE bsize,
12159 PICK_MODE_CONTEXT *ctx,
12160 int64_t best_rd_so_far) {
12161 const AV1_COMMON *const cm = &cpi->common;
12162 MACROBLOCKD *const xd = &x->e_mbd;
12163 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
12164 unsigned char segment_id = mbmi->segment_id;
12165 const int comp_pred = 0;
12166 int i;
12167 int64_t best_pred_diff[REFERENCE_MODES];
12168 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
12169 #if CONFIG_EXT_COMP_REFS
12170 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
12171 #else
12172 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
12173 #endif // CONFIG_EXT_COMP_REFS
12174 aom_prob comp_mode_p;
12175 InterpFilter best_filter = SWITCHABLE;
12176 int64_t this_rd = INT64_MAX;
12177 int rate2 = 0;
12178 const int64_t distortion2 = 0;
12179 (void)mi_row;
12180 (void)mi_col;
12181
12182 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
12183 &comp_mode_p);
12184
12185 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
12186 for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
12187 x->pred_mv_sad[i] = INT_MAX;
12188
12189 rd_cost->rate = INT_MAX;
12190
12191 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
12192
12193 mbmi->palette_mode_info.palette_size[0] = 0;
12194 mbmi->palette_mode_info.palette_size[1] = 0;
12195
12196 #if CONFIG_FILTER_INTRA
12197 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
12198 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
12199 #endif // CONFIG_FILTER_INTRA
12200 mbmi->mode = ZEROMV;
12201 mbmi->motion_mode = SIMPLE_TRANSLATION;
12202 mbmi->uv_mode = UV_DC_PRED;
12203 mbmi->ref_frame[0] = LAST_FRAME;
12204 mbmi->ref_frame[1] = NONE_FRAME;
12205 #if CONFIG_GLOBAL_MOTION
12206 mbmi->mv[0].as_int =
12207 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
12208 cm->allow_high_precision_mv, bsize, mi_col, mi_row, 0
12209 #if CONFIG_AMVR
12210 ,
12211 cm->cur_frame_mv_precision_level
12212 #endif
12213 )
12214 .as_int;
12215 #else // CONFIG_GLOBAL_MOTION
12216 mbmi->mv[0].as_int = 0;
12217 #endif // CONFIG_GLOBAL_MOTION
12218 mbmi->tx_size = max_txsize_lookup[bsize];
12219 x->skip = 1;
12220
12221 mbmi->ref_mv_idx = 0;
12222 mbmi->pred_mv[0].as_int = 0;
12223 #if CONFIG_LGT_FROM_PRED
12224 mbmi->use_lgt = 0;
12225 #endif
12226
12227 mbmi->motion_mode = SIMPLE_TRANSLATION;
12228 #if CONFIG_MOTION_VAR
12229 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
12230 #endif
12231 #if CONFIG_WARPED_MOTION
12232 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
12233 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
12234 #if WARPED_MOTION_SORT_SAMPLES
12235 int pts_mv[SAMPLES_ARRAY_SIZE];
12236 mbmi->num_proj_ref[0] =
12237 findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
12238 // Rank the samples by motion vector difference
12239 if (mbmi->num_proj_ref[0] > 1)
12240 mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
12241 pts_inref, mbmi->num_proj_ref[0]);
12242 #else
12243 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
12244 #endif // WARPED_MOTION_SORT_SAMPLES
12245 }
12246 #endif
12247
12248 set_default_interp_filters(mbmi, cm->interp_filter);
12249
12250 if (cm->interp_filter != SWITCHABLE) {
12251 best_filter = cm->interp_filter;
12252 } else {
12253 best_filter = EIGHTTAP_REGULAR;
12254 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
12255 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
12256 int rs;
12257 int best_rs = INT_MAX;
12258 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
12259 mbmi->interp_filters = av1_broadcast_interp_filter(i);
12260 rs = av1_get_switchable_rate(cm, x, xd);
12261 if (rs < best_rs) {
12262 best_rs = rs;
12263 best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
12264 }
12265 }
12266 }
12267 }
12268 // Set the appropriate filter
12269 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
12270 rate2 += av1_get_switchable_rate(cm, x, xd);
12271
12272 if (cm->reference_mode == REFERENCE_MODE_SELECT)
12273 rate2 += av1_cost_bit(comp_mode_p, comp_pred);
12274
12275 // Estimate the reference frame signaling cost and add it
12276 // to the rolling cost variable.
12277 rate2 += ref_costs_single[LAST_FRAME];
12278 this_rd = RDCOST(x->rdmult, rate2, distortion2);
12279
12280 rd_cost->rate = rate2;
12281 rd_cost->dist = distortion2;
12282 rd_cost->rdcost = this_rd;
12283
12284 if (this_rd >= best_rd_so_far) {
12285 rd_cost->rate = INT_MAX;
12286 rd_cost->rdcost = INT64_MAX;
12287 return;
12288 }
12289
12290 assert((cm->interp_filter == SWITCHABLE) ||
12291 (cm->interp_filter ==
12292 av1_extract_interp_filter(mbmi->interp_filters, 0)));
12293
12294 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
12295 cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
12296
12297 av1_zero(best_pred_diff);
12298
12299 store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
12300 }
12301
12302 #if CONFIG_MOTION_VAR
12303
12304 struct calc_target_weighted_pred_ctxt {
12305 const MACROBLOCK *x;
12306 const uint8_t *tmp;
12307 int tmp_stride;
12308 int overlap;
12309 };
12310
12311 static INLINE void calc_target_weighted_pred_above(MACROBLOCKD *xd,
12312 int rel_mi_col,
12313 uint8_t nb_mi_width,
12314 MODE_INFO *nb_mi,
12315 void *fun_ctxt) {
12316 (void)nb_mi;
12317
12318 struct calc_target_weighted_pred_ctxt *ctxt =
12319 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
12320
12321 #if CONFIG_HIGHBITDEPTH
12322 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
12323 #else
12324 const int is_hbd = 0;
12325 #endif // CONFIG_HIGHBITDEPTH
12326
12327 const int bw = xd->n8_w << MI_SIZE_LOG2;
12328 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
12329
12330 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
12331 int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
12332 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
12333
12334 if (!is_hbd) {
12335 for (int row = 0; row < ctxt->overlap; ++row) {
12336 const uint8_t m0 = mask1d[row];
12337 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
12338 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
12339 wsrc[col] = m1 * tmp[col];
12340 mask[col] = m0;
12341 }
12342 wsrc += bw;
12343 mask += bw;
12344 tmp += ctxt->tmp_stride;
12345 }
12346 #if CONFIG_HIGHBITDEPTH
12347 } else {
12348 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
12349
12350 for (int row = 0; row < ctxt->overlap; ++row) {
12351 const uint8_t m0 = mask1d[row];
12352 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
12353 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
12354 wsrc[col] = m1 * tmp16[col];
12355 mask[col] = m0;
12356 }
12357 wsrc += bw;
12358 mask += bw;
12359 tmp16 += ctxt->tmp_stride;
12360 }
12361 #endif // CONFIG_HIGHBITDEPTH
12362 }
12363 }
12364
12365 static INLINE void calc_target_weighted_pred_left(MACROBLOCKD *xd,
12366 int rel_mi_row,
12367 uint8_t nb_mi_height,
12368 MODE_INFO *nb_mi,
12369 void *fun_ctxt) {
12370 (void)nb_mi;
12371
12372 struct calc_target_weighted_pred_ctxt *ctxt =
12373 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
12374
12375 #if CONFIG_HIGHBITDEPTH
12376 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
12377 #else
12378 const int is_hbd = 0;
12379 #endif // CONFIG_HIGHBITDEPTH
12380
12381 const int bw = xd->n8_w << MI_SIZE_LOG2;
12382 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
12383
12384 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
12385 int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
12386 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
12387
12388 if (!is_hbd) {
12389 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
12390 for (int col = 0; col < ctxt->overlap; ++col) {
12391 const uint8_t m0 = mask1d[col];
12392 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
12393 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
12394 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
12395 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
12396 }
12397 wsrc += bw;
12398 mask += bw;
12399 tmp += ctxt->tmp_stride;
12400 }
12401 #if CONFIG_HIGHBITDEPTH
12402 } else {
12403 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
12404
12405 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
12406 for (int col = 0; col < ctxt->overlap; ++col) {
12407 const uint8_t m0 = mask1d[col];
12408 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
12409 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
12410 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
12411 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
12412 }
12413 wsrc += bw;
12414 mask += bw;
12415 tmp16 += ctxt->tmp_stride;
12416 }
12417 #endif // CONFIG_HIGHBITDEPTH
12418 }
12419 }
12420
12421 // This function has a structure similar to av1_build_obmc_inter_prediction
12422 //
12423 // The OBMC predictor is computed as:
12424 //
12425 // PObmc(x,y) =
12426 // AOM_BLEND_A64(Mh(x),
12427 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
12428 // PLeft(x, y))
12429 //
12430 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
12431 // rounding, this can be written as:
12432 //
12433 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
12434 // Mh(x) * Mv(y) * P(x,y) +
12435 // Mh(x) * Cv(y) * Pabove(x,y) +
12436 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
12437 //
12438 // Where :
12439 //
12440 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
12441 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
12442 //
12443 // This function computes 'wsrc' and 'mask' as:
12444 //
12445 // wsrc(x, y) =
12446 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
12447 // Mh(x) * Cv(y) * Pabove(x,y) +
12448 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
12449 //
12450 // mask(x, y) = Mh(x) * Mv(y)
12451 //
12452 // These can then be used to efficiently approximate the error for any
12453 // predictor P in the context of the provided neighbouring predictors by
12454 // computing:
12455 //
12456 // error(x, y) =
12457 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
12458 //
12459 static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
12460 const MACROBLOCKD *xd, int mi_row,
12461 int mi_col, const uint8_t *above,
12462 int above_stride, const uint8_t *left,
12463 int left_stride) {
12464 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
12465 const int bw = xd->n8_w << MI_SIZE_LOG2;
12466 const int bh = xd->n8_h << MI_SIZE_LOG2;
12467 int32_t *mask_buf = x->mask_buf;
12468 int32_t *wsrc_buf = x->wsrc_buf;
12469
12470 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
12471 #if CONFIG_HIGHBITDEPTH
12472 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
12473 #else
12474 const int is_hbd = 0;
12475 #endif // CONFIG_HIGHBITDEPTH
12476
12477 // plane 0 should not be subsampled
12478 assert(xd->plane[0].subsampling_x == 0);
12479 assert(xd->plane[0].subsampling_y == 0);
12480
12481 av1_zero_array(wsrc_buf, bw * bh);
12482 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
12483
12484 // handle above row
12485 if (xd->up_available) {
12486 const int overlap =
12487 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
12488 struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
12489 overlap };
12490 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
12491 max_neighbor_obmc[b_width_log2_lookup[bsize]],
12492 calc_target_weighted_pred_above, &ctxt);
12493 }
12494
12495 for (int i = 0; i < bw * bh; ++i) {
12496 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
12497 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
12498 }
12499
12500 // handle left column
12501 if (xd->left_available) {
12502 const int overlap =
12503 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
12504 struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
12505 overlap };
12506 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
12507 max_neighbor_obmc[b_height_log2_lookup[bsize]],
12508 calc_target_weighted_pred_left, &ctxt);
12509 }
12510
12511 if (!is_hbd) {
12512 const uint8_t *src = x->plane[0].src.buf;
12513
12514 for (int row = 0; row < bh; ++row) {
12515 for (int col = 0; col < bw; ++col) {
12516 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
12517 }
12518 wsrc_buf += bw;
12519 src += x->plane[0].src.stride;
12520 }
12521 #if CONFIG_HIGHBITDEPTH
12522 } else {
12523 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
12524
12525 for (int row = 0; row < bh; ++row) {
12526 for (int col = 0; col < bw; ++col) {
12527 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
12528 }
12529 wsrc_buf += bw;
12530 src += x->plane[0].src.stride;
12531 }
12532 #endif // CONFIG_HIGHBITDEPTH
12533 }
12534 }
12535
12536 #if CONFIG_NCOBMC
12537 void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
12538 int mi_row, int mi_col) {
12539 const AV1_COMMON *const cm = &cpi->common;
12540 MACROBLOCKD *const xd = &x->e_mbd;
12541 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
12542 MB_MODE_INFO backup_mbmi;
12543 BLOCK_SIZE bsize = mbmi->sb_type;
12544 int ref, skip_blk, backup_skip = x->skip;
12545 int64_t rd_causal;
12546 RD_STATS rd_stats_y, rd_stats_uv;
12547 int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
12548 int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
12549
12550 // Recompute the best causal predictor and rd
12551 mbmi->motion_mode = SIMPLE_TRANSLATION;
12552 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
12553 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
12554 YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
12555 assert(cfg != NULL);
12556 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
12557 &xd->block_refs[ref]->sf);
12558 }
12559 av1_setup_dst_planes(x->e_mbd.plane, bsize,
12560 get_frame_new_buffer(&cpi->common), mi_row, mi_col);
12561
12562 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
12563
12564 av1_subtract_plane(x, bsize, 0);
12565 #if CONFIG_VAR_TX
12566 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
12567 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12568 } else {
12569 int idx, idy;
12570 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12571 for (idy = 0; idy < xd->n8_h; ++idy)
12572 for (idx = 0; idx < xd->n8_w; ++idx)
12573 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
12574 memset(x->blk_skip[0], rd_stats_y.skip,
12575 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
12576 }
12577 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
12578 #else
12579 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12580 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
12581 #endif
12582 assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
12583 if (rd_stats_y.skip && rd_stats_uv.skip) {
12584 rd_stats_y.rate = rate_skip1;
12585 rd_stats_uv.rate = 0;
12586 rd_stats_y.dist = rd_stats_y.sse;
12587 rd_stats_uv.dist = rd_stats_uv.sse;
12588 skip_blk = 0;
12589 } else if (RDCOST(x->rdmult,
12590 (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
12591 (rd_stats_y.dist + rd_stats_uv.dist)) >
12592 RDCOST(x->rdmult, rate_skip1,
12593 (rd_stats_y.sse + rd_stats_uv.sse))) {
12594 rd_stats_y.rate = rate_skip1;
12595 rd_stats_uv.rate = 0;
12596 rd_stats_y.dist = rd_stats_y.sse;
12597 rd_stats_uv.dist = rd_stats_uv.sse;
12598 skip_blk = 1;
12599 } else {
12600 rd_stats_y.rate += rate_skip0;
12601 skip_blk = 0;
12602 }
12603 backup_skip = skip_blk;
12604 backup_mbmi = *mbmi;
12605 rd_causal = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate),
12606 (rd_stats_y.dist + rd_stats_uv.dist));
12607 rd_causal +=
12608 RDCOST(x->rdmult, av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
12609
12610 // Check non-causal mode
12611 mbmi->motion_mode = OBMC_CAUSAL;
12612 av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
12613
12614 av1_subtract_plane(x, bsize, 0);
12615 #if CONFIG_VAR_TX
12616 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
12617 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12618 } else {
12619 int idx, idy;
12620 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12621 for (idy = 0; idy < xd->n8_h; ++idy)
12622 for (idx = 0; idx < xd->n8_w; ++idx)
12623 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
12624 memset(x->blk_skip[0], rd_stats_y.skip,
12625 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
12626 }
12627 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
12628 #else
12629 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12630 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
12631 #endif
12632 assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
12633 if (rd_stats_y.skip && rd_stats_uv.skip) {
12634 rd_stats_y.rate = rate_skip1;
12635 rd_stats_uv.rate = 0;
12636 rd_stats_y.dist = rd_stats_y.sse;
12637 rd_stats_uv.dist = rd_stats_uv.sse;
12638 skip_blk = 0;
12639 } else if (RDCOST(x->rdmult,
12640 (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
12641 (rd_stats_y.dist + rd_stats_uv.dist)) >
12642 RDCOST(x->rdmult, rate_skip1,
12643 (rd_stats_y.sse + rd_stats_uv.sse))) {
12644 rd_stats_y.rate = rate_skip1;
12645 rd_stats_uv.rate = 0;
12646 rd_stats_y.dist = rd_stats_y.sse;
12647 rd_stats_uv.dist = rd_stats_uv.sse;
12648 skip_blk = 1;
12649 } else {
12650 rd_stats_y.rate += rate_skip0;
12651 skip_blk = 0;
12652 }
12653
12654 if (rd_causal >
12655 RDCOST(x->rdmult,
12656 rd_stats_y.rate + rd_stats_uv.rate +
12657 av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
12658 (rd_stats_y.dist + rd_stats_uv.dist))) {
12659 x->skip = skip_blk;
12660 } else {
12661 *mbmi = backup_mbmi;
12662 x->skip = backup_skip;
12663 }
12664 }
12665 #endif // CONFIG_NCOBMC
12666
12667 int64_t get_prediction_rd_cost(const struct AV1_COMP *cpi, struct macroblock *x,
12668 int mi_row, int mi_col, int *skip_blk,
12669 MB_MODE_INFO *backup_mbmi) {
12670 const AV1_COMMON *const cm = &cpi->common;
12671 MACROBLOCKD *const xd = &x->e_mbd;
12672 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
12673 BLOCK_SIZE bsize = mbmi->sb_type;
12674 #if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
12675 const MOTION_MODE motion_allowed = motion_mode_allowed(
12676 #if CONFIG_GLOBAL_MOTION
12677 0, xd->global_motion,
12678 #endif // CONFIG_GLOBAL_MOTION
12679 #if CONFIG_WARPED_MOTION
12680 xd,
12681 #endif
12682 xd->mi[0]);
12683 #endif // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
12684 RD_STATS rd_stats_y, rd_stats_uv;
12685 int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
12686 int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
12687 int64_t this_rd;
12688 int ref;
12689
12690 #if CONFIG_CB4X4
12691 x->skip_chroma_rd =
12692 !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
12693 xd->plane[1].subsampling_y);
12694 #endif
12695
12696 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
12697 for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
12698 YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
12699 assert(cfg != NULL);
12700 av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
12701 &xd->block_refs[ref]->sf);
12702 }
12703 av1_setup_dst_planes(x->e_mbd.plane, bsize,
12704 get_frame_new_buffer(&cpi->common), mi_row, mi_col);
12705
12706 #if CONFIG_NCOBMC_ADAPT_WEIGHT
12707 if (mbmi->motion_mode != NCOBMC_ADAPT_WEIGHT)
12708 #endif
12709 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
12710
12711 #if CONFIG_MOTION_VAR
12712 if (mbmi->motion_mode == OBMC_CAUSAL) {
12713 #if CONFIG_NCOBMC
12714 av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
12715 #else
12716 av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
12717 #endif
12718 }
12719 #endif // CONFIG_MOTION_VAR
12720
12721 #if CONFIG_NCOBMC_ADAPT_WEIGHT
12722 if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT)
12723 for (int plane = 0; plane < MAX_MB_PLANE; ++plane)
12724 get_pred_from_intrpl_buf(xd, mi_row, mi_col, bsize, plane);
12725 #endif
12726 av1_subtract_plane(x, bsize, 0);
12727
12728 #if CONFIG_VAR_TX
12729 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
12730 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12731 } else {
12732 int idx, idy;
12733 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12734 for (idy = 0; idy < xd->n8_h; ++idy)
12735 for (idx = 0; idx < xd->n8_w; ++idx)
12736 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
12737 memset(x->blk_skip[0], rd_stats_y.skip,
12738 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
12739 }
12740 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
12741 #else
12742 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
12743 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
12744 #endif
12745 assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
12746
12747 if (rd_stats_y.skip && rd_stats_uv.skip) {
12748 rd_stats_y.rate = rate_skip1;
12749 rd_stats_uv.rate = 0;
12750 rd_stats_y.dist = rd_stats_y.sse;
12751 rd_stats_uv.dist = rd_stats_uv.sse;
12752 *skip_blk = 1;
12753 } else if (RDCOST(x->rdmult,
12754 (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
12755 (rd_stats_y.dist + rd_stats_uv.dist)) >
12756 RDCOST(x->rdmult, rate_skip1,
12757 (rd_stats_y.sse + rd_stats_uv.sse))) {
12758 rd_stats_y.rate = rate_skip1;
12759 rd_stats_uv.rate = 0;
12760 rd_stats_y.dist = rd_stats_y.sse;
12761 rd_stats_uv.dist = rd_stats_uv.sse;
12762 *skip_blk = 1;
12763 } else {
12764 rd_stats_y.rate += rate_skip0;
12765 *skip_blk = 0;
12766 }
12767
12768 if (backup_mbmi) *backup_mbmi = *mbmi;
12769
12770 this_rd = RDCOST(x->rdmult, (rd_stats_y.rate + rd_stats_uv.rate),
12771 (rd_stats_y.dist + rd_stats_uv.dist));
12772 #if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
12773 if (motion_allowed == NCOBMC_ADAPT_WEIGHT) {
12774 assert(mbmi->motion_mode <= NCOBMC_ADAPT_WEIGHT);
12775 this_rd +=
12776 RDCOST(x->rdmult, x->motion_mode_cost2[bsize][mbmi->motion_mode], 0);
12777 } else if (motion_allowed == OBMC_CAUSAL) {
12778 assert(mbmi->motion_mode <= OBMC_CAUSAL);
12779 this_rd +=
12780 RDCOST(x->rdmult, x->motion_mode_cost1[bsize][mbmi->motion_mode], 0);
12781 } else {
12782 #endif // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
12783 this_rd +=
12784 RDCOST(x->rdmult, x->motion_mode_cost[bsize][mbmi->motion_mode], 0);
12785 #if CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
12786 }
12787 #endif // CONFIG_NCOBMC_ADAPT_WEIGHT && CONFIG_WARPED_MOTION
12788 return this_rd;
12789 }
12790
12791 #if CONFIG_NCOBMC_ADAPT_WEIGHT
12792 void av1_check_ncobmc_adapt_weight_rd(const struct AV1_COMP *cpi,
12793 struct macroblock *x, int mi_row,
12794 int mi_col) {
12795 MACROBLOCKD *const xd = &x->e_mbd;
12796 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
12797 BLOCK_SIZE bsize = mbmi->sb_type;
12798 #if CONFIG_VAR_TX
12799 const int n4 = bsize_to_num_blk(bsize);
12800 uint8_t st_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
12801 uint8_t obmc_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
12802 uint8_t ncobmc_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
12803 #endif
12804 MB_MODE_INFO st_mbmi, obmc_mbmi, ncobmc_mbmi;
12805 int st_skip, obmc_skip, ncobmc_skip;
12806 int64_t st_rd, obmc_rd, ncobmc_rd;
12807 #if CONFIG_WARPED_MOTION
12808 const AV1_COMMON *const cm = &cpi->common;
12809 const int is_warp_motion = mbmi->motion_mode == WARPED_CAUSAL;
12810 const int rs = RDCOST(x->rdmult, av1_get_switchable_rate(cm, x, xd), 0);
12811 MB_MODE_INFO warp_mbmi;
12812 int64_t warp_rd;
12813 int warp_skip;
12814 #endif
12815
12816 // Recompute the rd for the motion mode decided in rd loop
12817 mbmi->motion_mode = SIMPLE_TRANSLATION;
12818 st_rd = get_prediction_rd_cost(cpi, x, mi_row, mi_col, &st_skip, &st_mbmi);
12819 #if CONFIG_WARPED_MOTION
12820 st_rd += rs;
12821 #endif
12822 #if CONFIG_VAR_TX
12823 memcpy(st_blk_skip, x->blk_skip[0], sizeof(st_blk_skip[0]) * n4);
12824 #endif
12825
12826 mbmi->motion_mode = OBMC_CAUSAL;
12827 obmc_rd =
12828 get_prediction_rd_cost(cpi, x, mi_row, mi_col, &obmc_skip, &obmc_mbmi);
12829 #if CONFIG_WARPED_MOTION
12830 obmc_rd += rs;
12831 #endif
12832 #if CONFIG_VAR_TX
12833 memcpy(obmc_blk_skip, x->blk_skip[0], sizeof(obmc_blk_skip[0]) * n4);
12834 #endif
12835
12836 // Compute the rd cost for ncobmc adaptive weight
12837 mbmi->motion_mode = NCOBMC_ADAPT_WEIGHT;
12838 ncobmc_rd = get_prediction_rd_cost(cpi, x, mi_row, mi_col, &ncobmc_skip,
12839 &ncobmc_mbmi);
12840 #if CONFIG_WARPED_MOTION
12841 ncobmc_rd += rs;
12842 #endif
12843 // Calculate the ncobmc mode costs
12844 {
12845 ADAPT_OVERLAP_BLOCK aob = adapt_overlap_block_lookup[bsize];
12846 ncobmc_rd +=
12847 RDCOST(x->rdmult, x->ncobmc_mode_cost[aob][mbmi->ncobmc_mode[0]], 0);
12848 if (mi_size_wide[bsize] != mi_size_high[bsize])
12849 ncobmc_rd +=
12850 RDCOST(x->rdmult, x->ncobmc_mode_cost[aob][mbmi->ncobmc_mode[1]], 0);
12851 }
12852 #if CONFIG_VAR_TX
12853 memcpy(ncobmc_blk_skip, x->blk_skip[0], sizeof(ncobmc_blk_skip[0]) * n4);
12854 #endif
12855
12856 #if CONFIG_WARPED_MOTION
12857 if (is_warp_motion) {
12858 mbmi->motion_mode = WARPED_CAUSAL;
12859 warp_rd =
12860 get_prediction_rd_cost(cpi, x, mi_row, mi_col, &warp_skip, &warp_mbmi);
12861 } else {
12862 warp_rd = INT64_MAX;
12863 }
12864 #endif
12865
12866 #if CONFIG_WARPED_MOTION
12867 if (AOMMIN(ncobmc_rd, warp_rd) < AOMMIN(st_rd, obmc_rd)) {
12868 if (ncobmc_rd < warp_rd) {
12869 x->skip = ncobmc_skip;
12870 *mbmi = ncobmc_mbmi;
12871 #if CONFIG_VAR_TX
12872 memcpy(x->blk_skip[0], ncobmc_blk_skip, sizeof(ncobmc_blk_skip[0]) * n4);
12873 #endif
12874 } else {
12875 x->skip = warp_skip;
12876 *mbmi = warp_mbmi;
12877 }
12878 #else
12879 if (ncobmc_rd < AOMMIN(st_rd, obmc_rd)) {
12880 x->skip = ncobmc_skip;
12881 *mbmi = ncobmc_mbmi;
12882 #if CONFIG_VAR_TX
12883 memcpy(x->blk_skip[0], ncobmc_blk_skip, sizeof(ncobmc_blk_skip[0]) * n4);
12884 #endif
12885 #endif // CONFIG_WARPED_MOTION
12886 } else {
12887 if (obmc_rd < st_rd) {
12888 *mbmi = obmc_mbmi;
12889 x->skip = obmc_skip;
12890 #if CONFIG_VAR_TX
12891 memcpy(x->blk_skip[0], obmc_blk_skip, sizeof(obmc_blk_skip[0]) * n4);
12892 #endif
12893 } else {
12894 *mbmi = st_mbmi;
12895 x->skip = st_skip;
12896 #if CONFIG_VAR_TX
12897 memcpy(x->blk_skip[0], st_blk_skip, sizeof(st_blk_skip[0]) * n4);
12898 #endif
12899 }
12900 }
12901 }
12902
12903 int64_t get_ncobmc_error(MACROBLOCKD *xd, int pxl_row, int pxl_col,
12904 BLOCK_SIZE bsize, int plane, struct buf_2d *src) {
12905 const int wide = AOMMIN(mi_size_wide[bsize] * MI_SIZE,
12906 (xd->sb_mi_bd.mi_col_end + 1) * MI_SIZE - pxl_col);
12907 const int high = AOMMIN(mi_size_high[bsize] * MI_SIZE,
12908 (xd->sb_mi_bd.mi_row_end + 1) * MI_SIZE - pxl_row);
12909 const int ss_x = xd->plane[plane].subsampling_x;
12910 const int ss_y = xd->plane[plane].subsampling_y;
12911 int row_offset = (pxl_row - xd->sb_mi_bd.mi_row_begin * MI_SIZE) >> ss_y;
12912 int col_offset = (pxl_col - xd->sb_mi_bd.mi_col_begin * MI_SIZE) >> ss_x;
12913 int dst_stride = xd->ncobmc_pred_buf_stride[plane];
12914 int dst_offset = row_offset * dst_stride + col_offset;
12915 int src_stride = src->stride;
12916
12917 int r, c;
12918 int64_t tmp, error = 0;
12919
12920 for (r = 0; r < (high >> ss_y); ++r) {
12921 for (c = 0; c < (wide >> ss_x); ++c) {
12922 tmp = xd->ncobmc_pred_buf[plane][r * dst_stride + c + dst_offset] -
12923 src->buf[r * src_stride + c];
12924 error += tmp * tmp;
12925 }
12926 }
12927 return error;
12928 }
12929
12930 int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
12931 MACROBLOCKD *xd, int mi_row, int mi_col, int bsize) {
12932 const AV1_COMMON *const cm = &cpi->common;
12933 uint8_t *pred_buf[4][MAX_MB_PLANE];
12934
12935 // TODO(weitinglin): stride size needs to be fixed for high-bit depth
12936 int pred_stride[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
12937
12938 // target block in pxl
12939 int pxl_row = mi_row << MI_SIZE_LOG2;
12940 int pxl_col = mi_col << MI_SIZE_LOG2;
12941 int64_t error, best_error = INT64_MAX;
12942 int plane, tmp_mode, best_mode = 0;
12943 #if CONFIG_HIGHBITDEPTH
12944 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
12945 int len = sizeof(uint16_t);
12946 ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE,
12947 len);
12948 ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE,
12949 len);
12950 ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE,
12951 len);
12952 ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE,
12953 len);
12954 } else {
12955 #endif // CONFIG_HIGHBITDEPTH
12956 ASSIGN_ALIGNED_PTRS(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE);
12957 ASSIGN_ALIGNED_PTRS(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE);
12958 ASSIGN_ALIGNED_PTRS(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE);
12959 ASSIGN_ALIGNED_PTRS(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE);
12960 #if CONFIG_HIGHBITDEPTH
12961 }
12962 #endif
12963
12964 av1_get_ext_blk_preds(cm, xd, bsize, mi_row, mi_col, pred_buf, pred_stride);
12965 av1_get_ori_blk_pred(cm, xd, bsize, mi_row, mi_col, pred_buf[3], pred_stride);
12966
12967 for (tmp_mode = 0; tmp_mode < MAX_NCOBMC_MODES; ++tmp_mode) {
12968 error = 0;
12969 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
12970 build_ncobmc_intrpl_pred(cm, xd, plane, pxl_row, pxl_col, bsize, pred_buf,
12971 pred_stride, tmp_mode);
12972 error += get_ncobmc_error(xd, pxl_row, pxl_col, bsize, plane,
12973 &x->plane[plane].src);
12974 }
12975 if (error < best_error) {
12976 best_mode = tmp_mode;
12977 best_error = error;
12978 }
12979 }
12980
12981 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
12982 build_ncobmc_intrpl_pred(cm, xd, plane, pxl_row, pxl_col, bsize, pred_buf,
12983 pred_stride, best_mode);
12984 }
12985
12986 return best_mode;
12987 }
12988
12989 #endif // CONFIG_NCOBMC_ADAPT_WEIGHT
12990 #endif // CONFIG_MOTION_VAR
12991