1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <math.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20 
21 #include "aom_dsp/aom_dsp_common.h"
22 #include "aom_dsp/binary_codes_writer.h"
23 #include "aom_ports/mem.h"
24 #include "aom_ports/aom_timer.h"
25 
26 #include "av1/common/reconinter.h"
27 #include "av1/common/blockd.h"
28 
29 #include "av1/encoder/encodeframe.h"
30 #include "av1/encoder/var_based_part.h"
31 #include "av1/encoder/reconinter_enc.h"
32 
33 extern const uint8_t AV1_VAR_OFFS[];
34 
35 typedef struct {
36   VPVariance *part_variances;
37   VPartVar *split[4];
38 } variance_node;
39 
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)40 static AOM_INLINE void tree_to_node(void *data, BLOCK_SIZE bsize,
41                                     variance_node *node) {
42   int i;
43   node->part_variances = NULL;
44   switch (bsize) {
45     case BLOCK_128X128: {
46       VP128x128 *vt = (VP128x128 *)data;
47       node->part_variances = &vt->part_variances;
48       for (i = 0; i < 4; i++)
49         node->split[i] = &vt->split[i].part_variances.none;
50       break;
51     }
52     case BLOCK_64X64: {
53       VP64x64 *vt = (VP64x64 *)data;
54       node->part_variances = &vt->part_variances;
55       for (i = 0; i < 4; i++)
56         node->split[i] = &vt->split[i].part_variances.none;
57       break;
58     }
59     case BLOCK_32X32: {
60       VP32x32 *vt = (VP32x32 *)data;
61       node->part_variances = &vt->part_variances;
62       for (i = 0; i < 4; i++)
63         node->split[i] = &vt->split[i].part_variances.none;
64       break;
65     }
66     case BLOCK_16X16: {
67       VP16x16 *vt = (VP16x16 *)data;
68       node->part_variances = &vt->part_variances;
69       for (i = 0; i < 4; i++)
70         node->split[i] = &vt->split[i].part_variances.none;
71       break;
72     }
73     case BLOCK_8X8: {
74       VP8x8 *vt = (VP8x8 *)data;
75       node->part_variances = &vt->part_variances;
76       for (i = 0; i < 4; i++)
77         node->split[i] = &vt->split[i].part_variances.none;
78       break;
79     }
80     default: {
81       VP4x4 *vt = (VP4x4 *)data;
82       assert(bsize == BLOCK_4X4);
83       node->part_variances = &vt->part_variances;
84       for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
85       break;
86     }
87   }
88 }
89 
90 // Set variance values given sum square error, sum error, count.
fill_variance(uint32_t s2,int32_t s,int c,VPartVar * v)91 static AOM_INLINE void fill_variance(uint32_t s2, int32_t s, int c,
92                                      VPartVar *v) {
93   v->sum_square_error = s2;
94   v->sum_error = s;
95   v->log2_count = c;
96 }
97 
get_variance(VPartVar * v)98 static AOM_INLINE void get_variance(VPartVar *v) {
99   v->variance =
100       (int)(256 * (v->sum_square_error -
101                    (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
102                               v->log2_count)) >>
103             v->log2_count);
104 }
105 
sum_2_variances(const VPartVar * a,const VPartVar * b,VPartVar * r)106 static AOM_INLINE void sum_2_variances(const VPartVar *a, const VPartVar *b,
107                                        VPartVar *r) {
108   assert(a->log2_count == b->log2_count);
109   fill_variance(a->sum_square_error + b->sum_square_error,
110                 a->sum_error + b->sum_error, a->log2_count + 1, r);
111 }
112 
fill_variance_tree(void * data,BLOCK_SIZE bsize)113 static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
114   variance_node node;
115   memset(&node, 0, sizeof(node));
116   tree_to_node(data, bsize, &node);
117   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
118   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
119   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
120   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
121   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
122                   &node.part_variances->none);
123 }
124 
set_block_size(AV1_COMP * const cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE bsize)125 static AOM_INLINE void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
126                                       MACROBLOCKD *const xd, int mi_row,
127                                       int mi_col, BLOCK_SIZE bsize) {
128   if (cpi->common.mi_params.mi_cols > mi_col &&
129       cpi->common.mi_params.mi_rows > mi_row) {
130     set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
131                           mi_row, mi_col);
132     xd->mi[0]->bsize = bsize;
133   }
134 }
135 
set_vt_partitioning(AV1_COMP * cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,const TileInfo * const tile,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,int force_split)136 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
137                                MACROBLOCKD *const xd,
138                                const TileInfo *const tile, void *data,
139                                BLOCK_SIZE bsize, int mi_row, int mi_col,
140                                int64_t threshold, BLOCK_SIZE bsize_min,
141                                int force_split) {
142   AV1_COMMON *const cm = &cpi->common;
143   variance_node vt;
144   const int block_width = mi_size_wide[bsize];
145   const int block_height = mi_size_high[bsize];
146 
147   assert(block_height == block_width);
148   tree_to_node(data, bsize, &vt);
149 
150   if (force_split == 1) return 0;
151 
152   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
153   // variance is below threshold, otherwise split will be selected.
154   // No check for vert/horiz split as too few samples for variance.
155   if (bsize == bsize_min) {
156     // Variance already computed to set the force_split.
157     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
158     if (mi_col + block_width <= tile->mi_col_end &&
159         mi_row + block_height <= tile->mi_row_end &&
160         vt.part_variances->none.variance < threshold) {
161       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
162       return 1;
163     }
164     return 0;
165   } else if (bsize > bsize_min) {
166     // Variance already computed to set the force_split.
167     if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
168     // For key frame: take split for bsize above 32X32 or very high variance.
169     if (frame_is_intra_only(cm) &&
170         (bsize > BLOCK_32X32 ||
171          vt.part_variances->none.variance > (threshold << 4))) {
172       return 0;
173     }
174     // If variance is low, take the bsize (no split).
175     if (mi_col + block_width <= tile->mi_col_end &&
176         mi_row + block_height <= tile->mi_row_end &&
177         vt.part_variances->none.variance < threshold) {
178       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
179       return 1;
180     }
181     // Check vertical split.
182     if (mi_row + block_height <= tile->mi_row_end &&
183         mi_col + block_width / 2 <= tile->mi_col_end) {
184       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
185       get_variance(&vt.part_variances->vert[0]);
186       get_variance(&vt.part_variances->vert[1]);
187       if (vt.part_variances->vert[0].variance < threshold &&
188           vt.part_variances->vert[1].variance < threshold &&
189           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
190                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
191         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
192         set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
193         return 1;
194       }
195     }
196     // Check horizontal split.
197     if (mi_col + block_width <= tile->mi_col_end &&
198         mi_row + block_height / 2 <= tile->mi_row_end) {
199       BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
200       get_variance(&vt.part_variances->horz[0]);
201       get_variance(&vt.part_variances->horz[1]);
202       if (vt.part_variances->horz[0].variance < threshold &&
203           vt.part_variances->horz[1].variance < threshold &&
204           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
205                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
206         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
207         set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
208         return 1;
209       }
210     }
211     return 0;
212   }
213   return 0;
214 }
215 
fill_variance_8x8avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,VP16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)216 static AOM_INLINE void fill_variance_8x8avg(const uint8_t *s, int sp,
217                                             const uint8_t *d, int dp,
218                                             int x16_idx, int y16_idx,
219                                             VP16x16 *vst,
220 #if CONFIG_AV1_HIGHBITDEPTH
221                                             int highbd_flag,
222 #endif
223                                             int pixels_wide, int pixels_high,
224                                             int is_key_frame) {
225   int k;
226   for (k = 0; k < 4; k++) {
227     int x8_idx = x16_idx + ((k & 1) << 3);
228     int y8_idx = y16_idx + ((k >> 1) << 3);
229     unsigned int sse = 0;
230     int sum = 0;
231     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
232       int s_avg;
233       int d_avg = 128;
234 #if CONFIG_AV1_HIGHBITDEPTH
235       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
236         s_avg = aom_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
237         if (!is_key_frame)
238           d_avg = aom_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
239       } else {
240         s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
241         if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
242       }
243 #else
244       s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
245       if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
246 #endif
247       sum = s_avg - d_avg;
248       sse = sum * sum;
249     }
250     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
251   }
252 }
253 
compute_minmax_8x8(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)254 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
255                               int dp, int x16_idx, int y16_idx,
256 #if CONFIG_AV1_HIGHBITDEPTH
257                               int highbd_flag,
258 #endif
259                               int pixels_wide, int pixels_high) {
260   int k;
261   int minmax_max = 0;
262   int minmax_min = 255;
263   // Loop over the 4 8x8 subblocks.
264   for (k = 0; k < 4; k++) {
265     int x8_idx = x16_idx + ((k & 1) << 3);
266     int y8_idx = y16_idx + ((k >> 1) << 3);
267     int min = 0;
268     int max = 0;
269     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
270 #if CONFIG_AV1_HIGHBITDEPTH
271       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
272         aom_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
273                               d + y8_idx * dp + x8_idx, dp, &min, &max);
274       } else {
275         aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
276                        dp, &min, &max);
277       }
278 #else
279       aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
280                      &min, &max);
281 #endif
282       if ((max - min) > minmax_max) minmax_max = (max - min);
283       if ((max - min) < minmax_min) minmax_min = (max - min);
284     }
285   }
286   return (minmax_max - minmax_min);
287 }
288 
fill_variance_4x4avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x8_idx,int y8_idx,VP8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)289 static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
290                                             const uint8_t *d, int dp,
291                                             int x8_idx, int y8_idx, VP8x8 *vst,
292 #if CONFIG_AV1_HIGHBITDEPTH
293                                             int highbd_flag,
294 #endif
295                                             int pixels_wide, int pixels_high,
296                                             int is_key_frame) {
297   int k;
298   for (k = 0; k < 4; k++) {
299     int x4_idx = x8_idx + ((k & 1) << 2);
300     int y4_idx = y8_idx + ((k >> 1) << 2);
301     unsigned int sse = 0;
302     int sum = 0;
303     if (x4_idx < pixels_wide && y4_idx < pixels_high) {
304       int s_avg;
305       int d_avg = 128;
306 #if CONFIG_AV1_HIGHBITDEPTH
307       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
308         s_avg = aom_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
309         if (!is_key_frame)
310           d_avg = aom_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
311       } else {
312         s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
313         if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
314       }
315 #else
316       s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
317       if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
318 #endif
319 
320       sum = s_avg - d_avg;
321       sse = sum * sum;
322     }
323     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
324   }
325 }
326 
327 // TODO(kyslov) Bring back threshold adjustment based on content state
scale_part_thresh_content(int64_t threshold_base,int speed,int width,int height,int non_reference_frame)328 static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
329                                          int width, int height,
330                                          int non_reference_frame) {
331   (void)width;
332   (void)height;
333   int64_t threshold = threshold_base;
334   if (non_reference_frame) threshold = (3 * threshold) >> 1;
335   if (speed >= 8) {
336     return (5 * threshold) >> 2;
337   }
338   return threshold;
339 }
340 
set_vbp_thresholds(AV1_COMP * cpi,int64_t thresholds[],int q,int content_lowsumdiff,int source_sad,int segment_id)341 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
342                                           int q, int content_lowsumdiff,
343                                           int source_sad, int segment_id) {
344   AV1_COMMON *const cm = &cpi->common;
345   const int is_key_frame = frame_is_intra_only(cm);
346   const int threshold_multiplier = is_key_frame ? 120 : 1;
347   int64_t threshold_base =
348       (int64_t)(threshold_multiplier *
349                 cpi->enc_quant_dequant_params.dequants.y_dequant_QTX[q][1]);
350   const int current_qindex = cm->quant_params.base_qindex;
351 
352   if (is_key_frame) {
353     if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
354       threshold_base <<= cpi->oxcf.speed - 7;
355     }
356     thresholds[0] = threshold_base;
357     thresholds[1] = threshold_base;
358     if (cm->width * cm->height < 1280 * 720) {
359       thresholds[2] = threshold_base / 3;
360       thresholds[3] = threshold_base >> 1;
361     } else {
362       int shift_val = 2;
363       if (cpi->sf.rt_sf.force_large_partition_blocks_intra) {
364         shift_val = 0;
365       }
366 
367       thresholds[2] = threshold_base >> shift_val;
368       thresholds[3] = threshold_base >> shift_val;
369     }
370     thresholds[4] = threshold_base << 2;
371   } else {
372     // Increase partition thresholds for noisy content. Apply it only for
373     // superblocks where sumdiff is low, as we assume the sumdiff of superblock
374     // whose only change is due to noise will be low (i.e, noise will average
375     // out over large block).
376     if (cpi->noise_estimate.enabled && content_lowsumdiff &&
377         (cm->width * cm->height > 640 * 480) &&
378         cm->current_frame.frame_number > 60) {
379       NOISE_LEVEL noise_level =
380           av1_noise_estimate_extract_level(&cpi->noise_estimate);
381       if (noise_level == kHigh)
382         threshold_base = (5 * threshold_base) >> 1;
383       else if (noise_level == kMedium &&
384                !cpi->sf.rt_sf.force_large_partition_blocks)
385         threshold_base = (5 * threshold_base) >> 2;
386     }
387     // TODO(kyslov) Enable var based partition adjusment on temporal denoising
388 #if 0  // CONFIG_AV1_TEMPORAL_DENOISING
389     if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
390         cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow)
391       threshold_base =
392           av1_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level,
393                                 content_state, cpi->svc.temporal_layer_id);
394     else
395       threshold_base =
396         scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
397                                   cm->height, cpi->svc.non_reference_frame);
398 #else
399     // Increase base variance threshold based on content_state/sum_diff level.
400     threshold_base =
401         scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
402                                   cm->height, cpi->svc.non_reference_frame);
403 #endif
404     thresholds[0] = threshold_base >> 1;
405     thresholds[1] = threshold_base;
406     thresholds[3] = threshold_base << cpi->oxcf.speed;
407     if (cm->width >= 1280 && cm->height >= 720)
408       thresholds[3] = thresholds[3] << 1;
409     if (cm->width * cm->height <= 352 * 288) {
410       if (current_qindex >= QINDEX_HIGH_THR) {
411         threshold_base = (5 * threshold_base) >> 1;
412         thresholds[1] = threshold_base >> 3;
413         thresholds[2] = threshold_base << 2;
414         thresholds[3] = threshold_base << 5;
415       } else if (current_qindex < QINDEX_LOW_THR) {
416         thresholds[1] = threshold_base >> 3;
417         thresholds[2] = threshold_base >> 1;
418         thresholds[3] = threshold_base << 3;
419       } else {
420         int64_t qi_diff_low = current_qindex - QINDEX_LOW_THR;
421         int64_t qi_diff_high = QINDEX_HIGH_THR - current_qindex;
422         int64_t threshold_diff = QINDEX_HIGH_THR - QINDEX_LOW_THR;
423         int64_t threshold_base_high = (5 * threshold_base) >> 1;
424 
425         threshold_diff = threshold_diff > 0 ? threshold_diff : 1;
426         threshold_base = (qi_diff_low * threshold_base_high +
427                           qi_diff_high * threshold_base) /
428                          threshold_diff;
429         thresholds[1] = threshold_base >> 3;
430         thresholds[2] = ((qi_diff_low * threshold_base) +
431                          qi_diff_high * (threshold_base >> 1)) /
432                         threshold_diff;
433         thresholds[3] = ((qi_diff_low * (threshold_base << 5)) +
434                          qi_diff_high * (threshold_base << 3)) /
435                         threshold_diff;
436       }
437     } else if (cm->width < 1280 && cm->height < 720) {
438       thresholds[2] = (5 * threshold_base) >> 2;
439     } else if (cm->width < 1920 && cm->height < 1080) {
440       thresholds[2] = threshold_base << 1;
441     } else {
442       thresholds[2] = (5 * threshold_base) >> 1;
443     }
444     if (cpi->sf.rt_sf.force_large_partition_blocks) {
445       double weight;
446       const int win = 20;
447       if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
448         weight = 1.0;
449       else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
450         weight = 0.0;
451       else
452         weight =
453             1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
454       if (cm->width * cm->height > 640 * 480) {
455         for (int i = 0; i < 4; i++) {
456           thresholds[i] <<= 1;
457         }
458       }
459       if (cm->width * cm->height <= 352 * 288) {
460         thresholds[1] <<= 2;
461         thresholds[2] <<= 5;
462         thresholds[3] = INT32_MAX;
463         // Condition the increase of partition thresholds on the segment
464         // and the content. Avoid the increase for superblocks which have
465         // high source sad, unless the whole frame has very high motion
466         // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
467         // have high source sad).
468       } else if (cm->width * cm->height > 640 * 480 && segment_id == 0 &&
469                  (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
470         thresholds[0] = (3 * thresholds[0]) >> 1;
471         thresholds[3] = INT32_MAX;
472         if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
473           thresholds[1] = (int)((1 - weight) * (thresholds[1] << 1) +
474                                 weight * thresholds[1]);
475           thresholds[2] = (int)((1 - weight) * (thresholds[2] << 1) +
476                                 weight * thresholds[2]);
477         }
478       } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0 &&
479                  (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
480         thresholds[1] =
481             (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
482         thresholds[2] =
483             (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
484         thresholds[3] = INT32_MAX;
485       }
486     }
487   }
488 }
489 
490 // Set temporal variance low flag for superblock 64x64.
491 // Only first 25 in the array are used in this case.
set_low_temp_var_flag_64x64(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP64x64 * vt,const int64_t thresholds[],int mi_col,int mi_row)492 static AOM_INLINE void set_low_temp_var_flag_64x64(
493     CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
494     MACROBLOCKD *xd, VP64x64 *vt, const int64_t thresholds[], int mi_col,
495     int mi_row) {
496   if (xd->mi[0]->bsize == BLOCK_64X64) {
497     if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
498       part_info->variance_low[0] = 1;
499   } else if (xd->mi[0]->bsize == BLOCK_64X32) {
500     for (int i = 0; i < 2; i++) {
501       if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
502         part_info->variance_low[i + 1] = 1;
503     }
504   } else if (xd->mi[0]->bsize == BLOCK_32X64) {
505     for (int i = 0; i < 2; i++) {
506       if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
507         part_info->variance_low[i + 3] = 1;
508     }
509   } else {
510     static const int idx[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
511     for (int i = 0; i < 4; i++) {
512       const int idx_str =
513           mi_params->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
514       MB_MODE_INFO **this_mi = mi_params->mi_grid_base + idx_str;
515 
516       if (mi_params->mi_cols <= mi_col + idx[i][1] ||
517           mi_params->mi_rows <= mi_row + idx[i][0])
518         continue;
519 
520       if (*this_mi == NULL) continue;
521 
522       if ((*this_mi)->bsize == BLOCK_32X32) {
523         int64_t threshold_32x32 = (5 * thresholds[1]) >> 3;
524         if (vt->split[i].part_variances.none.variance < threshold_32x32)
525           part_info->variance_low[i + 5] = 1;
526       } else {
527         // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
528         // inside.
529         if ((*this_mi)->bsize == BLOCK_16X16 ||
530             (*this_mi)->bsize == BLOCK_32X16 ||
531             (*this_mi)->bsize == BLOCK_16X32) {
532           for (int j = 0; j < 4; j++) {
533             if (vt->split[i].split[j].part_variances.none.variance <
534                 (thresholds[2] >> 8))
535               part_info->variance_low[(i << 2) + j + 9] = 1;
536           }
537         }
538       }
539     }
540   }
541 }
542 
set_low_temp_var_flag_128x128(CommonModeInfoParams * mi_params,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,const int64_t thresholds[],int mi_col,int mi_row)543 static AOM_INLINE void set_low_temp_var_flag_128x128(
544     CommonModeInfoParams *mi_params, PartitionSearchInfo *part_info,
545     MACROBLOCKD *xd, VP128x128 *vt, const int64_t thresholds[], int mi_col,
546     int mi_row) {
547   if (xd->mi[0]->bsize == BLOCK_128X128) {
548     if (vt->part_variances.none.variance < (thresholds[0] >> 1))
549       part_info->variance_low[0] = 1;
550   } else if (xd->mi[0]->bsize == BLOCK_128X64) {
551     for (int i = 0; i < 2; i++) {
552       if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
553         part_info->variance_low[i + 1] = 1;
554     }
555   } else if (xd->mi[0]->bsize == BLOCK_64X128) {
556     for (int i = 0; i < 2; i++) {
557       if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
558         part_info->variance_low[i + 3] = 1;
559     }
560   } else {
561     static const int idx64[4][2] = {
562       { 0, 0 }, { 0, 16 }, { 16, 0 }, { 16, 16 }
563     };
564     static const int idx32[4][2] = { { 0, 0 }, { 0, 8 }, { 8, 0 }, { 8, 8 } };
565     for (int i = 0; i < 4; i++) {
566       const int idx_str =
567           mi_params->mi_stride * (mi_row + idx64[i][0]) + mi_col + idx64[i][1];
568       MB_MODE_INFO **mi_64 = mi_params->mi_grid_base + idx_str;
569       if (*mi_64 == NULL) continue;
570       if (mi_params->mi_cols <= mi_col + idx64[i][1] ||
571           mi_params->mi_rows <= mi_row + idx64[i][0])
572         continue;
573       const int64_t threshold_64x64 = (5 * thresholds[1]) >> 3;
574       if ((*mi_64)->bsize == BLOCK_64X64) {
575         if (vt->split[i].part_variances.none.variance < threshold_64x64)
576           part_info->variance_low[5 + i] = 1;
577       } else if ((*mi_64)->bsize == BLOCK_64X32) {
578         for (int j = 0; j < 2; j++)
579           if (vt->split[i].part_variances.horz[j].variance <
580               (threshold_64x64 >> 1))
581             part_info->variance_low[9 + (i << 1) + j] = 1;
582       } else if ((*mi_64)->bsize == BLOCK_32X64) {
583         for (int j = 0; j < 2; j++)
584           if (vt->split[i].part_variances.vert[j].variance <
585               (threshold_64x64 >> 1))
586             part_info->variance_low[17 + (i << 1) + j] = 1;
587       } else {
588         for (int k = 0; k < 4; k++) {
589           const int idx_str1 = mi_params->mi_stride * idx32[k][0] + idx32[k][1];
590           MB_MODE_INFO **mi_32 = mi_params->mi_grid_base + idx_str + idx_str1;
591           if (*mi_32 == NULL) continue;
592 
593           if (mi_params->mi_cols <= mi_col + idx64[i][1] + idx32[k][1] ||
594               mi_params->mi_rows <= mi_row + idx64[i][0] + idx32[k][0])
595             continue;
596           const int64_t threshold_32x32 = (5 * thresholds[2]) >> 3;
597           if ((*mi_32)->bsize == BLOCK_32X32) {
598             if (vt->split[i].split[k].part_variances.none.variance <
599                 threshold_32x32)
600               part_info->variance_low[25 + (i << 2) + k] = 1;
601           } else {
602             // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
603             // inside.
604             if ((*mi_32)->bsize == BLOCK_16X16 ||
605                 (*mi_32)->bsize == BLOCK_32X16 ||
606                 (*mi_32)->bsize == BLOCK_16X32) {
607               for (int j = 0; j < 4; j++) {
608                 if (vt->split[i]
609                         .split[k]
610                         .split[j]
611                         .part_variances.none.variance < (thresholds[3] >> 8))
612                   part_info->variance_low[41 + (i << 4) + (k << 2) + j] = 1;
613               }
614             }
615           }
616         }
617       }
618     }
619   }
620 }
621 
set_low_temp_var_flag(AV1_COMP * cpi,PartitionSearchInfo * part_info,MACROBLOCKD * xd,VP128x128 * vt,int64_t thresholds[],MV_REFERENCE_FRAME ref_frame_partition,int mi_col,int mi_row)622 static AOM_INLINE void set_low_temp_var_flag(
623     AV1_COMP *cpi, PartitionSearchInfo *part_info, MACROBLOCKD *xd,
624     VP128x128 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition,
625     int mi_col, int mi_row) {
626   AV1_COMMON *const cm = &cpi->common;
627   const int mv_thr = cm->width > 640 ? 8 : 4;
628   // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
629   // int_pro mv is small. If the temporal variance is small set the flag
630   // variance_low for the block. The variance threshold can be adjusted, the
631   // higher the more aggressive.
632   if (ref_frame_partition == LAST_FRAME &&
633       (cpi->sf.rt_sf.short_circuit_low_temp_var == 1 ||
634        (cpi->sf.rt_sf.estimate_motion_for_var_based_partition &&
635         xd->mi[0]->mv[0].as_mv.col < mv_thr &&
636         xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
637         xd->mi[0]->mv[0].as_mv.row < mv_thr &&
638         xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
639     const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
640     if (is_small_sb)
641       set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
642                                   &(vt->split[0]), thresholds, mi_col, mi_row);
643     else
644       set_low_temp_var_flag_128x128(&cm->mi_params, part_info, xd, vt,
645                                     thresholds, mi_col, mi_row);
646   }
647 }
648 
av1_set_variance_partition_thresholds(AV1_COMP * cpi,int q,int content_lowsumdiff)649 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
650                                            int content_lowsumdiff) {
651   SPEED_FEATURES *const sf = &cpi->sf;
652   if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
653     return;
654   } else {
655     set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
656                        0);
657     // The threshold below is not changed locally.
658     cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
659   }
660 }
661 
chroma_check(AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,unsigned int y_sad,int is_key_frame)662 static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
663                                     BLOCK_SIZE bsize, unsigned int y_sad,
664                                     int is_key_frame) {
665   int i;
666   MACROBLOCKD *xd = &x->e_mbd;
667 
668   if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
669 
670   for (i = 1; i <= 2; ++i) {
671     unsigned int uv_sad = UINT_MAX;
672     struct macroblock_plane *p = &x->plane[i];
673     struct macroblockd_plane *pd = &xd->plane[i];
674     const BLOCK_SIZE bs =
675         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
676 
677     if (bs != BLOCK_INVALID)
678       uv_sad = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
679                                         pd->dst.stride);
680 
681     if (uv_sad > (y_sad >> 1))
682       x->color_sensitivity_sb[i - 1] = 1;
683     else if (uv_sad < (y_sad >> 3))
684       x->color_sensitivity_sb[i - 1] = 0;
685     // Borderline case: to be refined at coding block level in nonrd_pickmode,
686     // for coding block size < sb_size.
687     else
688       x->color_sensitivity_sb[i - 1] = 2;
689   }
690 }
691 
fill_variance_tree_leaves(AV1_COMP * cpi,MACROBLOCK * x,VP128x128 * vt,VP16x16 * vt2,unsigned char * force_split,int avg_16x16[][4],int maxvar_16x16[][4],int minvar_16x16[][4],int * variance4x4downsample,int64_t * thresholds,uint8_t * src,int src_stride,const uint8_t * dst,int dst_stride)692 static void fill_variance_tree_leaves(
693     AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, VP16x16 *vt2,
694     unsigned char *force_split, int avg_16x16[][4], int maxvar_16x16[][4],
695     int minvar_16x16[][4], int *variance4x4downsample, int64_t *thresholds,
696     uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride) {
697   AV1_COMMON *cm = &cpi->common;
698   MACROBLOCKD *xd = &x->e_mbd;
699   const int is_key_frame = frame_is_intra_only(cm);
700   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
701   const int num_64x64_blocks = is_small_sb ? 1 : 4;
702   // TODO(kyslov) Bring back compute_minmax_variance with content type detection
703   const int compute_minmax_variance = 0;
704   const int segment_id = xd->mi[0]->segment_id;
705   int pixels_wide = 128, pixels_high = 128;
706 
707   if (is_small_sb) {
708     pixels_wide = 64;
709     pixels_high = 64;
710   }
711   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
712   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
713   for (int m = 0; m < num_64x64_blocks; m++) {
714     const int x64_idx = ((m & 1) << 6);
715     const int y64_idx = ((m >> 1) << 6);
716     const int m2 = m << 2;
717     force_split[m + 1] = 0;
718 
719     for (int i = 0; i < 4; i++) {
720       const int x32_idx = x64_idx + ((i & 1) << 5);
721       const int y32_idx = y64_idx + ((i >> 1) << 5);
722       const int i2 = (m2 + i) << 2;
723       force_split[5 + m2 + i] = 0;
724       avg_16x16[m][i] = 0;
725       maxvar_16x16[m][i] = 0;
726       minvar_16x16[m][i] = INT_MAX;
727       for (int j = 0; j < 4; j++) {
728         const int x16_idx = x32_idx + ((j & 1) << 4);
729         const int y16_idx = y32_idx + ((j >> 1) << 4);
730         const int split_index = 21 + i2 + j;
731         VP16x16 *vst = &vt->split[m].split[i].split[j];
732         force_split[split_index] = 0;
733         variance4x4downsample[i2 + j] = 0;
734         if (!is_key_frame) {
735           fill_variance_8x8avg(src, src_stride, dst, dst_stride, x16_idx,
736                                y16_idx, vst,
737 #if CONFIG_AV1_HIGHBITDEPTH
738                                xd->cur_buf->flags,
739 #endif
740                                pixels_wide, pixels_high, is_key_frame);
741           fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
742           get_variance(&vt->split[m].split[i].split[j].part_variances.none);
743           avg_16x16[m][i] +=
744               vt->split[m].split[i].split[j].part_variances.none.variance;
745           if (vt->split[m].split[i].split[j].part_variances.none.variance <
746               minvar_16x16[m][i])
747             minvar_16x16[m][i] =
748                 vt->split[m].split[i].split[j].part_variances.none.variance;
749           if (vt->split[m].split[i].split[j].part_variances.none.variance >
750               maxvar_16x16[m][i])
751             maxvar_16x16[m][i] =
752                 vt->split[m].split[i].split[j].part_variances.none.variance;
753           if (vt->split[m].split[i].split[j].part_variances.none.variance >
754               thresholds[3]) {
755             // 16X16 variance is above threshold for split, so force split to
756             // 8x8 for this 16x16 block (this also forces splits for upper
757             // levels).
758             force_split[split_index] = 1;
759             force_split[5 + m2 + i] = 1;
760             force_split[m + 1] = 1;
761             force_split[0] = 1;
762           } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
763                      compute_minmax_variance &&
764                      vt->split[m]
765                              .split[i]
766                              .split[j]
767                              .part_variances.none.variance > thresholds[2]) {
768             // We have some nominal amount of 16x16 variance (based on average),
769             // compute the minmax over the 8x8 sub-blocks, and if above
770             // threshold, force split to 8x8 block for this 16x16 block.
771             int minmax = compute_minmax_8x8(src, src_stride, dst, dst_stride,
772                                             x16_idx, y16_idx,
773 #if CONFIG_AV1_HIGHBITDEPTH
774                                             xd->cur_buf->flags,
775 #endif
776                                             pixels_wide, pixels_high);
777             int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
778             if (minmax > thresh_minmax) {
779               force_split[split_index] = 1;
780               force_split[5 + m2 + i] = 1;
781               force_split[m + 1] = 1;
782               force_split[0] = 1;
783             }
784           }
785         }
786         if (is_key_frame) {
787           force_split[split_index] = 0;
788           // Go down to 4x4 down-sampling for variance.
789           variance4x4downsample[i2 + j] = 1;
790           for (int k = 0; k < 4; k++) {
791             int x8_idx = x16_idx + ((k & 1) << 3);
792             int y8_idx = y16_idx + ((k >> 1) << 3);
793             VP8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
794             fill_variance_4x4avg(src, src_stride, dst, dst_stride, x8_idx,
795                                  y8_idx, vst2,
796 #if CONFIG_AV1_HIGHBITDEPTH
797                                  xd->cur_buf->flags,
798 #endif
799                                  pixels_wide, pixels_high, is_key_frame);
800           }
801         }
802       }
803     }
804   }
805 }
806 
setup_planes(AV1_COMP * cpi,MACROBLOCK * x,unsigned int * y_sad,unsigned int * y_sad_g,MV_REFERENCE_FRAME * ref_frame_partition,int mi_row,int mi_col)807 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
808                          unsigned int *y_sad_g,
809                          MV_REFERENCE_FRAME *ref_frame_partition, int mi_row,
810                          int mi_col) {
811   AV1_COMMON *const cm = &cpi->common;
812   MACROBLOCKD *xd = &x->e_mbd;
813   const int num_planes = av1_num_planes(cm);
814   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
815   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
816   // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
817   // is!!
818   MB_MODE_INFO *mi = xd->mi[0];
819   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
820   assert(yv12 != NULL);
821   const YV12_BUFFER_CONFIG *yv12_g = NULL;
822 
823   // For non-SVC GOLDEN is another temporal reference. Check if it should be
824   // used as reference for partitioning.
825   if (!cpi->ppi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG)) {
826     yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
827     if (yv12_g && yv12_g != yv12) {
828       av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
829                            get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
830       *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
831           x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
832           xd->plane[0].pre[0].stride);
833     }
834   }
835 
836   av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
837                        get_ref_scale_factors(cm, LAST_FRAME), num_planes);
838   mi->ref_frame[0] = LAST_FRAME;
839   mi->ref_frame[1] = NONE_FRAME;
840   mi->bsize = cm->seq_params->sb_size;
841   mi->mv[0].as_int = 0;
842   mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
843   if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
844     if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
845       const MV dummy_mv = { 0, 0 };
846       *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
847                                              mi_row, mi_col, &dummy_mv);
848     }
849   }
850   if (*y_sad == UINT_MAX) {
851     *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
852         x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
853         xd->plane[0].pre[0].stride);
854   }
855 
856   // Pick the ref frame for partitioning, use golden frame only if its
857   // lower sad.
858   if (*y_sad_g < 0.9 * *y_sad) {
859     av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
860                          get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
861     mi->ref_frame[0] = GOLDEN_FRAME;
862     mi->mv[0].as_int = 0;
863     *y_sad = *y_sad_g;
864     *ref_frame_partition = GOLDEN_FRAME;
865     x->nonrd_prune_ref_frame_search = 0;
866   } else {
867     *ref_frame_partition = LAST_FRAME;
868     x->nonrd_prune_ref_frame_search =
869         cpi->sf.rt_sf.nonrd_prune_ref_frame_search;
870   }
871 
872   set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
873   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
874                                 cm->seq_params->sb_size, AOM_PLANE_Y,
875                                 AOM_PLANE_Y);
876 }
877 
av1_choose_var_based_partitioning(AV1_COMP * cpi,const TileInfo * const tile,ThreadData * td,MACROBLOCK * x,int mi_row,int mi_col)878 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
879                                       ThreadData *td, MACROBLOCK *x, int mi_row,
880                                       int mi_col) {
881   AV1_COMMON *const cm = &cpi->common;
882   MACROBLOCKD *xd = &x->e_mbd;
883   const int64_t *const vbp_thresholds = cpi->vbp_info.thresholds;
884 
885   int i, j, k, m;
886   VP128x128 *vt;
887   VP16x16 *vt2 = NULL;
888   unsigned char force_split[85];
889   int avg_64x64;
890   int max_var_32x32[4];
891   int min_var_32x32[4];
892   int var_32x32;
893   int var_64x64;
894   int min_var_64x64 = INT_MAX;
895   int max_var_64x64 = 0;
896   int avg_16x16[4][4];
897   int maxvar_16x16[4][4];
898   int minvar_16x16[4][4];
899   int64_t threshold_4x4avg;
900   uint8_t *s;
901   const uint8_t *d;
902   int sp;
903   int dp;
904   NOISE_LEVEL noise_level = kLow;
905 
906   int is_key_frame =
907       (frame_is_intra_only(cm) ||
908        (cpi->ppi->use_svc &&
909         cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
910 
911   assert(cm->seq_params->sb_size == BLOCK_64X64 ||
912          cm->seq_params->sb_size == BLOCK_128X128);
913   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
914   const int num_64x64_blocks = is_small_sb ? 1 : 4;
915 
916   unsigned int y_sad = UINT_MAX;
917   unsigned int y_sad_g = UINT_MAX;
918   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
919 
920   // Ref frame used in partitioning.
921   MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
922 
923   CHECK_MEM_ERROR(cm, vt, aom_malloc(sizeof(*vt)));
924 
925   vt->split = td->vt64x64;
926 
927   int64_t thresholds[5] = { vbp_thresholds[0], vbp_thresholds[1],
928                             vbp_thresholds[2], vbp_thresholds[3],
929                             vbp_thresholds[4] };
930 
931   const int low_res = (cm->width <= 352 && cm->height <= 288);
932   int variance4x4downsample[64];
933   const int segment_id = xd->mi[0]->segment_id;
934 
935   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
936       cyclic_refresh_segment_id_boosted(segment_id) &&
937       cpi->sf.rt_sf.use_nonrd_pick_mode) {
938     int q = av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
939     set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
940                        x->content_state_sb.source_sad, 1);
941   } else {
942     set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
943                        x->content_state_sb.low_sumdiff,
944                        x->content_state_sb.source_sad, 0);
945   }
946 
947   // For non keyframes, disable 4x4 average for low resolution when speed = 8
948   threshold_4x4avg = INT64_MAX;
949 
950   s = x->plane[0].src.buf;
951   sp = x->plane[0].src.stride;
952 
953   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
954   // 5-20 for the 16x16 blocks.
955   force_split[0] = 0;
956   memset(x->part_search_info.variance_low, 0,
957          sizeof(x->part_search_info.variance_low));
958 
959   // Check if LAST frame is NULL or if the resolution of LAST is
960   // different than the current frame resolution, and if so, treat this frame
961   // as a key frame, for the purpose of the superblock partitioning.
962   // LAST == NULL can happen in cases where enhancement spatial layers are
963   // enabled dyanmically and the only reference is the spatial(GOLDEN).
964   // TODO(marpan): Check se of scaled references for the different resoln.
965   if (!frame_is_intra_only(cm)) {
966     const YV12_BUFFER_CONFIG *const ref =
967         get_ref_frame_yv12_buf(cm, LAST_FRAME);
968     if (ref == NULL || ref->y_crop_height != cm->height ||
969         ref->y_crop_width != cm->width) {
970       is_key_frame = 1;
971     }
972   }
973 
974   if (!is_key_frame) {
975     setup_planes(cpi, x, &y_sad, &y_sad_g, &ref_frame_partition, mi_row,
976                  mi_col);
977     d = xd->plane[0].dst.buf;
978     dp = xd->plane[0].dst.stride;
979   } else {
980     d = AV1_VAR_OFFS;
981     dp = 0;
982   }
983   if (cpi->noise_estimate.enabled)
984     noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
985 
986   if (low_res && threshold_4x4avg < INT64_MAX)
987     CHECK_MEM_ERROR(cm, vt2, aom_malloc(sizeof(*vt2)));
988   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
989   // for splits.
990   fill_variance_tree_leaves(cpi, x, vt, vt2, force_split, avg_16x16,
991                             maxvar_16x16, minvar_16x16, variance4x4downsample,
992                             thresholds, s, sp, d, dp);
993 
994   avg_64x64 = 0;
995   for (m = 0; m < num_64x64_blocks; ++m) {
996     max_var_32x32[m] = 0;
997     min_var_32x32[m] = INT_MAX;
998     const int m2 = m << 2;
999     for (i = 0; i < 4; i++) {
1000       const int i2 = (m2 + i) << 2;
1001       for (j = 0; j < 4; j++) {
1002         const int split_index = 21 + i2 + j;
1003         if (variance4x4downsample[i2 + j] == 1) {
1004           VP16x16 *vtemp =
1005               (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j];
1006           for (k = 0; k < 4; k++)
1007             fill_variance_tree(&vtemp->split[k], BLOCK_8X8);
1008           fill_variance_tree(vtemp, BLOCK_16X16);
1009           // If variance of this 16x16 block is above the threshold, force block
1010           // to split. This also forces a split on the upper levels.
1011           get_variance(&vtemp->part_variances.none);
1012           if (vtemp->part_variances.none.variance > thresholds[3]) {
1013             force_split[split_index] = 1;
1014             force_split[5 + m2 + i] = 1;
1015             force_split[m + 1] = 1;
1016             force_split[0] = 1;
1017           }
1018         }
1019       }
1020       fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32);
1021       // If variance of this 32x32 block is above the threshold, or if its above
1022       // (some threshold of) the average variance over the sub-16x16 blocks,
1023       // then force this block to split. This also forces a split on the upper
1024       // (64x64) level.
1025       if (!force_split[5 + m2 + i]) {
1026         get_variance(&vt->split[m].split[i].part_variances.none);
1027         var_32x32 = vt->split[m].split[i].part_variances.none.variance;
1028         max_var_32x32[m] = AOMMAX(var_32x32, max_var_32x32[m]);
1029         min_var_32x32[m] = AOMMIN(var_32x32, min_var_32x32[m]);
1030         if (vt->split[m].split[i].part_variances.none.variance >
1031                 thresholds[2] ||
1032             (!is_key_frame &&
1033              vt->split[m].split[i].part_variances.none.variance >
1034                  (thresholds[2] >> 1) &&
1035              vt->split[m].split[i].part_variances.none.variance >
1036                  (avg_16x16[m][i] >> 1))) {
1037           force_split[5 + m2 + i] = 1;
1038           force_split[m + 1] = 1;
1039           force_split[0] = 1;
1040         } else if (!is_key_frame && cm->height <= 360 &&
1041                    (maxvar_16x16[m][i] - minvar_16x16[m][i]) >
1042                        (thresholds[2] >> 1) &&
1043                    maxvar_16x16[m][i] > thresholds[2]) {
1044           force_split[5 + m2 + i] = 1;
1045           force_split[m + 1] = 1;
1046           force_split[0] = 1;
1047         }
1048       }
1049     }
1050     if (!force_split[1 + m]) {
1051       fill_variance_tree(&vt->split[m], BLOCK_64X64);
1052       get_variance(&vt->split[m].part_variances.none);
1053       var_64x64 = vt->split[m].part_variances.none.variance;
1054       max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
1055       min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
1056       // If the difference of the max-min variances of sub-blocks or max
1057       // variance of a sub-block is above some threshold of then force this
1058       // block to split. Only checking this for noise level >= medium, if
1059       // encoder is in SVC or if we already forced large blocks.
1060 
1061       if (!is_key_frame &&
1062           (max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
1063           max_var_32x32[m] > thresholds[1] >> 1 &&
1064           (noise_level >= kMedium || cpi->ppi->use_svc ||
1065            cpi->sf.rt_sf.force_large_partition_blocks ||
1066            !cpi->sf.rt_sf.use_nonrd_pick_mode)) {
1067         force_split[1 + m] = 1;
1068         force_split[0] = 1;
1069       }
1070       avg_64x64 += var_64x64;
1071     }
1072     if (is_small_sb) force_split[0] = 1;
1073   }
1074 
1075   if (!force_split[0]) {
1076     fill_variance_tree(vt, BLOCK_128X128);
1077     get_variance(&vt->part_variances.none);
1078     if (!is_key_frame &&
1079         vt->part_variances.none.variance > (9 * avg_64x64) >> 5)
1080       force_split[0] = 1;
1081 
1082     if (!is_key_frame &&
1083         (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
1084         max_var_64x64 > thresholds[0] >> 1)
1085       force_split[0] = 1;
1086   }
1087 
1088   if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
1089       !set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
1090                            thresholds[0], BLOCK_16X16, force_split[0])) {
1091     for (m = 0; m < num_64x64_blocks; ++m) {
1092       const int x64_idx = ((m & 1) << 4);
1093       const int y64_idx = ((m >> 1) << 4);
1094       const int m2 = m << 2;
1095 
1096       // Now go through the entire structure, splitting every block size until
1097       // we get to one that's got a variance lower than our threshold.
1098       if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m], BLOCK_64X64,
1099                                mi_row + y64_idx, mi_col + x64_idx,
1100                                thresholds[1], BLOCK_16X16,
1101                                force_split[1 + m])) {
1102         for (i = 0; i < 4; ++i) {
1103           const int x32_idx = ((i & 1) << 3);
1104           const int y32_idx = ((i >> 1) << 3);
1105           const int i2 = (m2 + i) << 2;
1106           if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m].split[i],
1107                                    BLOCK_32X32, (mi_row + y64_idx + y32_idx),
1108                                    (mi_col + x64_idx + x32_idx), thresholds[2],
1109                                    BLOCK_16X16, force_split[5 + m2 + i])) {
1110             for (j = 0; j < 4; ++j) {
1111               const int x16_idx = ((j & 1) << 2);
1112               const int y16_idx = ((j >> 1) << 2);
1113               const int split_index = 21 + i2 + j;
1114               // For inter frames: if variance4x4downsample[] == 1 for this
1115               // 16x16 block, then the variance is based on 4x4 down-sampling,
1116               // so use vt2 in set_vt_partioning(), otherwise use vt.
1117               VP16x16 *vtemp =
1118                   (!is_key_frame && variance4x4downsample[i2 + j] == 1)
1119                       ? &vt2[i2 + j]
1120                       : &vt->split[m].split[i].split[j];
1121               if (!set_vt_partitioning(cpi, x, xd, tile, vtemp, BLOCK_16X16,
1122                                        mi_row + y64_idx + y32_idx + y16_idx,
1123                                        mi_col + x64_idx + x32_idx + x16_idx,
1124                                        thresholds[3], BLOCK_8X8,
1125                                        force_split[split_index])) {
1126                 for (k = 0; k < 4; ++k) {
1127                   const int x8_idx = (k & 1) << 1;
1128                   const int y8_idx = (k >> 1) << 1;
1129                   set_block_size(
1130                       cpi, x, xd,
1131                       (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
1132                       (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
1133                       BLOCK_8X8);
1134                 }
1135               }
1136             }
1137           }
1138         }
1139       }
1140     }
1141   }
1142 
1143   if (cpi->sf.rt_sf.short_circuit_low_temp_var) {
1144     set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
1145                           ref_frame_partition, mi_col, mi_row);
1146   }
1147   chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1148 
1149   if (vt2) aom_free(vt2);
1150   if (vt) aom_free(vt);
1151   return 0;
1152 }
1153