1 // Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
2 // Copyright (c) 2017-2020, The rav1e contributors. All rights reserved
3 //
4 // This source code is subject to the terms of the BSD 2 Clause License and
5 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 // was not distributed with this source code in the LICENSE file, you can
7 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 // Media Patent License 1.0 was not distributed with this source code in the
9 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
11 #![allow(non_camel_case_types)]
12
13 use crate::api::*;
14 use crate::cdef::*;
15 use crate::context::*;
16 use crate::cpu_features::CpuFeatureLevel;
17 use crate::deblock::*;
18 use crate::dist::*;
19 use crate::ec::{Writer, WriterCounter, OD_BITRES};
20 use crate::encode_block_with_modes;
21 use crate::encoder::{FrameInvariants, IMPORTANCE_BLOCK_SIZE};
22 use crate::frame::Frame;
23 use crate::frame::*;
24 use crate::header::ReferenceMode;
25 use crate::lrf::*;
26 use crate::luma_ac;
27 use crate::mc::MotionVector;
28 use crate::me::*;
29 use crate::motion_compensate;
30 use crate::partition::RefType::*;
31 use crate::partition::*;
32 use crate::predict::{
33 AngleDelta, IntraEdgeFilterParameters, IntraParam, PredictionMode,
34 RAV1E_INTER_COMPOUND_MODES, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTRA_MODES,
35 };
36 use crate::rdo_tables::*;
37 use crate::tiling::*;
38 use crate::transform::{TxSet, TxSize, TxType, RAV1E_TX_TYPES};
39 use crate::util::{init_slice_repeat_mut, Aligned, CastFromPrimitive, Pixel};
40 use crate::write_tx_blocks;
41 use crate::write_tx_tree;
42 use crate::Tune;
43 use crate::{encode_block_post_cdef, encode_block_pre_cdef};
44
45 use crate::partition::PartitionType::*;
46 use arrayvec::*;
47 use itertools::izip;
48 use std::fmt;
49 use std::mem::MaybeUninit;
50
51 #[derive(Copy, Clone, PartialEq)]
52 pub enum RDOType {
53 PixelDistRealRate,
54 TxDistRealRate,
55 TxDistEstRate,
56 }
57
58 impl RDOType {
59 #[inline]
needs_tx_dist(self) -> bool60 pub fn needs_tx_dist(self) -> bool {
61 match self {
62 // Pixel-domain distortion and exact ec rate
63 RDOType::PixelDistRealRate => false,
64 // Tx-domain distortion and exact ec rate
65 RDOType::TxDistRealRate => true,
66 // Tx-domain distortion and txdist-based rate
67 RDOType::TxDistEstRate => true,
68 }
69 }
70 #[inline]
needs_coeff_rate(self) -> bool71 pub fn needs_coeff_rate(self) -> bool {
72 match self {
73 RDOType::PixelDistRealRate => true,
74 RDOType::TxDistRealRate => true,
75 RDOType::TxDistEstRate => false,
76 }
77 }
78 }
79
80 #[derive(Clone)]
81 pub struct PartitionGroupParameters {
82 pub rd_cost: f64,
83 pub part_type: PartitionType,
84 pub part_modes: ArrayVec<[PartitionParameters; 4]>,
85 }
86
87 #[derive(Clone, Debug)]
88 pub struct PartitionParameters {
89 pub rd_cost: f64,
90 pub bo: TileBlockOffset,
91 pub bsize: BlockSize,
92 pub pred_mode_luma: PredictionMode,
93 pub pred_mode_chroma: PredictionMode,
94 pub pred_cfl_params: CFLParams,
95 pub angle_delta: AngleDelta,
96 pub ref_frames: [RefType; 2],
97 pub mvs: [MotionVector; 2],
98 pub skip: bool,
99 pub has_coeff: bool,
100 pub tx_size: TxSize,
101 pub tx_type: TxType,
102 pub sidx: u8,
103 }
104
105 impl Default for PartitionParameters {
default() -> Self106 fn default() -> Self {
107 PartitionParameters {
108 rd_cost: std::f64::MAX,
109 bo: TileBlockOffset::default(),
110 bsize: BlockSize::BLOCK_INVALID,
111 pred_mode_luma: PredictionMode::default(),
112 pred_mode_chroma: PredictionMode::default(),
113 pred_cfl_params: CFLParams::default(),
114 angle_delta: AngleDelta::default(),
115 ref_frames: [RefType::INTRA_FRAME, RefType::NONE_FRAME],
116 mvs: [MotionVector::default(); 2],
117 skip: false,
118 has_coeff: true,
119 tx_size: TxSize::TX_4X4,
120 tx_type: TxType::DCT_DCT,
121 sidx: 0,
122 }
123 }
124 }
125
estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64126 pub fn estimate_rate(qindex: u8, ts: TxSize, fast_distortion: u64) -> u64 {
127 let bs_index = ts as usize;
128 let q_bin_idx = (qindex as usize) / RDO_QUANT_DIV;
129 let bin_idx_down =
130 ((fast_distortion) / RATE_EST_BIN_SIZE).min((RDO_NUM_BINS - 2) as u64);
131 let bin_idx_up = (bin_idx_down + 1).min((RDO_NUM_BINS - 1) as u64);
132 let x0 = (bin_idx_down * RATE_EST_BIN_SIZE) as i64;
133 let x1 = (bin_idx_up * RATE_EST_BIN_SIZE) as i64;
134 let y0 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_down as usize] as i64;
135 let y1 = RDO_RATE_TABLE[q_bin_idx][bs_index][bin_idx_up as usize] as i64;
136 let slope = ((y1 - y0) << 8) / (x1 - x0);
137 (y0 + (((fast_distortion as i64 - x0) * slope) >> 8)).max(0) as u64
138 }
139
140 // The microbenchmarks perform better with inlining turned off
141 #[inline(never)]
cdef_dist_wxh_8x8<T: Pixel>( src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, bit_depth: usize, ) -> RawDistortion142 fn cdef_dist_wxh_8x8<T: Pixel>(
143 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, bit_depth: usize,
144 ) -> RawDistortion {
145 debug_assert!(src1.plane_cfg.xdec == 0);
146 debug_assert!(src1.plane_cfg.ydec == 0);
147 debug_assert!(src2.plane_cfg.xdec == 0);
148 debug_assert!(src2.plane_cfg.ydec == 0);
149
150 let coeff_shift = bit_depth - 8;
151
152 // Sum into columns to improve auto-vectorization
153 let mut sum_s_cols: [u16; 8] = [0; 8];
154 let mut sum_d_cols: [u16; 8] = [0; 8];
155 let mut sum_s2_cols: [u32; 8] = [0; 8];
156 let mut sum_d2_cols: [u32; 8] = [0; 8];
157 let mut sum_sd_cols: [u32; 8] = [0; 8];
158
159 for j in 0..8 {
160 let row1 = &src1[j][0..8];
161 let row2 = &src2[j][0..8];
162 for (sum_s, sum_d, sum_s2, sum_d2, sum_sd, s, d) in izip!(
163 &mut sum_s_cols,
164 &mut sum_d_cols,
165 &mut sum_s2_cols,
166 &mut sum_d2_cols,
167 &mut sum_sd_cols,
168 row1,
169 row2
170 ) {
171 // Don't convert directly to u32 to allow better vectorization
172 let s: u16 = u16::cast_from(*s);
173 let d: u16 = u16::cast_from(*d);
174 *sum_s += s;
175 *sum_d += d;
176
177 // Convert to u32 to avoid overflows when multiplying
178 let s: u32 = s as u32;
179 let d: u32 = d as u32;
180
181 *sum_s2 += s * s;
182 *sum_d2 += d * d;
183 *sum_sd += s * d;
184 }
185 }
186
187 // Sum together the sum of columns
188 let sum_s: i64 =
189 sum_s_cols.iter().map(|&a| u32::cast_from(a)).sum::<u32>() as i64;
190 let sum_d: i64 =
191 sum_d_cols.iter().map(|&a| u32::cast_from(a)).sum::<u32>() as i64;
192 let sum_s2: i64 = sum_s2_cols.iter().sum::<u32>() as i64;
193 let sum_d2: i64 = sum_d2_cols.iter().sum::<u32>() as i64;
194 let sum_sd: i64 = sum_sd_cols.iter().sum::<u32>() as i64;
195
196 // Use sums to calculate distortion
197 let svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
198 let dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
199 let sse = (sum_d2 + sum_s2 - 2 * sum_sd) as f64;
200 //The two constants were tuned for CDEF, but can probably be better tuned for use in general RDO
201 let ssim_boost = (4033_f64 / 16_384_f64)
202 * (svar + dvar + (16_384 << (2 * coeff_shift))) as f64
203 / f64::sqrt(((16_265_089i64 << (4 * coeff_shift)) + svar * dvar) as f64);
204 RawDistortion::new((sse * ssim_boost + 0.5_f64) as u64)
205 }
206
207 #[allow(unused)]
cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>( src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize, bit_depth: usize, compute_bias: F, ) -> Distortion208 pub fn cdef_dist_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
209 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
210 bit_depth: usize, compute_bias: F,
211 ) -> Distortion {
212 assert!(w & 0x7 == 0);
213 assert!(h & 0x7 == 0);
214 debug_assert!(src1.plane_cfg.xdec == 0);
215 debug_assert!(src1.plane_cfg.ydec == 0);
216 debug_assert!(src2.plane_cfg.xdec == 0);
217 debug_assert!(src2.plane_cfg.ydec == 0);
218
219 let mut sum = Distortion::zero();
220 for j in 0isize..h as isize / 8 {
221 for i in 0isize..w as isize / 8 {
222 let area = Area::StartingAt { x: i * 8, y: j * 8 };
223 let value = cdef_dist_wxh_8x8(
224 &src1.subregion(area),
225 &src2.subregion(area),
226 bit_depth,
227 );
228
229 // cdef is always called on non-subsampled planes, so BLOCK_8X8 is
230 // correct here.
231 sum += value * compute_bias(area, BlockSize::BLOCK_8X8);
232 }
233 }
234 sum
235 }
236
237 /// Sum of Squared Error for a wxh block
238 /// Currently limited to w and h of valid blocks
sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>( src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize, compute_bias: F, bit_depth: usize, cpu: CpuFeatureLevel, ) -> Distortion239 pub fn sse_wxh<T: Pixel, F: Fn(Area, BlockSize) -> DistortionScale>(
240 src1: &PlaneRegion<'_, T>, src2: &PlaneRegion<'_, T>, w: usize, h: usize,
241 compute_bias: F, bit_depth: usize, cpu: CpuFeatureLevel,
242 ) -> Distortion {
243 // See get_weighted_sse in src/dist.rs.
244 // Provide a scale to get_weighted_sse for each square region of this size.
245 const CHUNK_SIZE: usize = IMPORTANCE_BLOCK_SIZE >> 1;
246
247 // To bias the distortion correctly, compute it in blocks up to the size
248 // importance block size in a non-subsampled plane.
249 let imp_block_w = CHUNK_SIZE << src1.plane_cfg.xdec;
250 let imp_block_h = CHUNK_SIZE << src1.plane_cfg.ydec;
251
252 let imp_bsize = BlockSize::from_width_and_height(imp_block_w, imp_block_h);
253
254 let n_imp_blocks_w = (w + CHUNK_SIZE - 1) / CHUNK_SIZE;
255 let n_imp_blocks_h = (h + CHUNK_SIZE - 1) / CHUNK_SIZE;
256
257 // TODO: Copying biases into a buffer is slow. It would be best if biases were
258 // passed directly. To do this, we would need different versions of the
259 // weighted sse function for decimated/subsampled data. Also requires
260 // eliminating use of unbiased sse.
261 // It should also be noted that the current copy code does not auto-vectorize.
262
263 // Copy biases into a buffer.
264 let mut buf_storage = Aligned::new(
265 [MaybeUninit::<u32>::uninit(); 128 / CHUNK_SIZE * 128 / CHUNK_SIZE],
266 );
267 let buf_stride = n_imp_blocks_w.next_power_of_two();
268 let buf = init_slice_repeat_mut(
269 &mut buf_storage.data[..buf_stride * n_imp_blocks_h],
270 0,
271 );
272
273 for block_y in 0..n_imp_blocks_h {
274 for block_x in 0..n_imp_blocks_w {
275 let block = Area::StartingAt {
276 x: (block_x * CHUNK_SIZE) as isize,
277 y: (block_y * CHUNK_SIZE) as isize,
278 };
279 buf[block_y * buf_stride + block_x] = compute_bias(block, imp_bsize).0;
280 }
281 }
282
283 Distortion(get_weighted_sse(
284 src1, src2, buf, buf_stride, w, h, bit_depth, cpu,
285 ))
286 }
287
clip_visible_bsize( frame_w: usize, frame_h: usize, bsize: BlockSize, x: usize, y: usize, ) -> (usize, usize)288 pub fn clip_visible_bsize(
289 frame_w: usize, frame_h: usize, bsize: BlockSize, x: usize, y: usize,
290 ) -> (usize, usize) {
291 let blk_w = bsize.width();
292 let blk_h = bsize.height();
293
294 let visible_w: usize = if x + blk_w <= frame_w {
295 blk_w
296 } else if x >= frame_w {
297 0
298 } else {
299 frame_w - x
300 };
301
302 let visible_h: usize = if y + blk_h <= frame_h {
303 blk_h
304 } else if y >= frame_h {
305 0
306 } else {
307 frame_h - y
308 };
309
310 (visible_w, visible_h)
311 }
312
313 // Compute the pixel-domain distortion for an encode
compute_distortion<T: Pixel>( fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool, ) -> ScaledDistortion314 fn compute_distortion<T: Pixel>(
315 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
316 is_chroma_block: bool, tile_bo: TileBlockOffset, luma_only: bool,
317 ) -> ScaledDistortion {
318 let area = Area::BlockStartingAt { bo: tile_bo.0 };
319 let input_region = ts.input_tile.planes[0].subregion(area);
320 let rec_region = ts.rec.planes[0].subregion(area);
321
322 // clip a block to have visible pixles only
323 let frame_bo = ts.to_frame_block_offset(tile_bo);
324 let (visible_w, visible_h) = clip_visible_bsize(
325 fi.width,
326 fi.height,
327 bsize,
328 frame_bo.0.x << MI_SIZE_LOG2,
329 frame_bo.0.y << MI_SIZE_LOG2,
330 );
331
332 if visible_w == 0 || visible_h == 0 {
333 return ScaledDistortion::zero();
334 }
335
336 let mut distortion = match fi.config.tune {
337 Tune::Psychovisual
338 if bsize.width() >= 8
339 && bsize.height() >= 8
340 && (visible_w & 0x7 == 0)
341 && (visible_h & 0x7 == 0) =>
342 {
343 cdef_dist_wxh(
344 &input_region,
345 &rec_region,
346 visible_w,
347 visible_h,
348 fi.sequence.bit_depth,
349 |bias_area, bsize| {
350 distortion_scale(
351 fi,
352 input_region.subregion(bias_area).frame_block_offset(),
353 bsize,
354 )
355 },
356 )
357 }
358 Tune::Psnr | Tune::Psychovisual => sse_wxh(
359 &input_region,
360 &rec_region,
361 visible_w,
362 visible_h,
363 |bias_area, bsize| {
364 distortion_scale(
365 fi,
366 input_region.subregion(bias_area).frame_block_offset(),
367 bsize,
368 )
369 },
370 fi.sequence.bit_depth,
371 fi.cpu_feature_level,
372 ),
373 } * fi.dist_scale[0];
374
375 if is_chroma_block
376 && !luma_only
377 && fi.sequence.chroma_sampling != ChromaSampling::Cs400
378 {
379 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
380 let chroma_w = if bsize.width() >= 8 || xdec == 0 {
381 (visible_w + xdec) >> xdec
382 } else {
383 (4 + visible_w + xdec) >> xdec
384 };
385 let chroma_h = if bsize.height() >= 8 || ydec == 0 {
386 (visible_h + ydec) >> ydec
387 } else {
388 (4 + visible_h + ydec) >> ydec
389 };
390
391 for p in 1..3 {
392 let input_region = ts.input_tile.planes[p].subregion(area);
393 let rec_region = ts.rec.planes[p].subregion(area);
394 distortion += sse_wxh(
395 &input_region,
396 &rec_region,
397 chroma_w,
398 chroma_h,
399 |bias_area, bsize| {
400 distortion_scale(
401 fi,
402 input_region.subregion(bias_area).frame_block_offset(),
403 bsize,
404 )
405 },
406 fi.sequence.bit_depth,
407 fi.cpu_feature_level,
408 ) * fi.dist_scale[p];
409 }
410 }
411 distortion
412 }
413
414 // Compute the transform-domain distortion for an encode
compute_tx_distortion<T: Pixel>( fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize, is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion, skip: bool, luma_only: bool, ) -> ScaledDistortion415 fn compute_tx_distortion<T: Pixel>(
416 fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, bsize: BlockSize,
417 is_chroma_block: bool, tile_bo: TileBlockOffset, tx_dist: ScaledDistortion,
418 skip: bool, luma_only: bool,
419 ) -> ScaledDistortion {
420 assert!(fi.config.tune == Tune::Psnr);
421 let area = Area::BlockStartingAt { bo: tile_bo.0 };
422 let input_region = ts.input_tile.planes[0].subregion(area);
423 let rec_region = ts.rec.planes[0].subregion(area);
424
425 let (visible_w, visible_h) = if !skip {
426 (bsize.width(), bsize.height())
427 } else {
428 let frame_bo = ts.to_frame_block_offset(tile_bo);
429 clip_visible_bsize(
430 fi.width,
431 fi.height,
432 bsize,
433 frame_bo.0.x << MI_SIZE_LOG2,
434 frame_bo.0.y << MI_SIZE_LOG2,
435 )
436 };
437
438 if visible_w == 0 || visible_h == 0 {
439 return ScaledDistortion::zero();
440 }
441
442 let mut distortion = if skip {
443 sse_wxh(
444 &input_region,
445 &rec_region,
446 visible_w,
447 visible_h,
448 |bias_area, bsize| {
449 distortion_scale(
450 fi,
451 input_region.subregion(bias_area).frame_block_offset(),
452 bsize,
453 )
454 },
455 fi.sequence.bit_depth,
456 fi.cpu_feature_level,
457 ) * fi.dist_scale[0]
458 } else {
459 tx_dist
460 };
461
462 if is_chroma_block
463 && !luma_only
464 && skip
465 && fi.sequence.chroma_sampling != ChromaSampling::Cs400
466 {
467 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
468 let chroma_w = if bsize.width() >= 8 || xdec == 0 {
469 (visible_w + xdec) >> xdec
470 } else {
471 (4 + visible_w + xdec) >> xdec
472 };
473 let chroma_h = if bsize.height() >= 8 || ydec == 0 {
474 (visible_h + ydec) >> ydec
475 } else {
476 (4 + visible_h + ydec) >> ydec
477 };
478
479 for p in 1..3 {
480 let input_region = ts.input_tile.planes[p].subregion(area);
481 let rec_region = ts.rec.planes[p].subregion(area);
482 distortion += sse_wxh(
483 &input_region,
484 &rec_region,
485 chroma_w,
486 chroma_h,
487 |bias_area, bsize| {
488 distortion_scale(
489 fi,
490 input_region.subregion(bias_area).frame_block_offset(),
491 bsize,
492 )
493 },
494 fi.sequence.bit_depth,
495 fi.cpu_feature_level,
496 ) * fi.dist_scale[p];
497 }
498 }
499 distortion
500 }
501
502 /// Compute a scaling factor to multiply the distortion of a block by,
503 /// this factor is determined using temporal RDO.
distortion_scale<T: Pixel>( fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize, ) -> DistortionScale504 pub fn distortion_scale<T: Pixel>(
505 fi: &FrameInvariants<T>, frame_bo: PlaneBlockOffset, bsize: BlockSize,
506 ) -> DistortionScale {
507 if !fi.config.temporal_rdo() {
508 return DistortionScale::default();
509 }
510 // EncoderConfig::temporal_rdo() should always return false in situations
511 // where distortion is computed on > 8x8 blocks, so we should never hit this
512 // assert.
513 assert!(bsize <= BlockSize::BLOCK_8X8);
514
515 let x = frame_bo.0.x >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
516 let y = frame_bo.0.y >> IMPORTANCE_BLOCK_TO_BLOCK_SHIFT;
517
518 fi.distortion_scales[y * fi.w_in_imp_b + x]
519 }
520
distortion_scale_for( propagate_cost: f64, intra_cost: f64, ) -> DistortionScale521 pub fn distortion_scale_for(
522 propagate_cost: f64, intra_cost: f64,
523 ) -> DistortionScale {
524 // The mbtree paper \cite{mbtree} uses the following formula:
525 //
526 // QP_delta = -strength * log2(1 + (propagate_cost / intra_cost))
527 //
528 // Since this is H.264, this corresponds to the following quantizer:
529 //
530 // Q' = Q * 2^(QP_delta/6)
531 //
532 // Since lambda is proportial to Q^2, this means we want to minimize:
533 //
534 // D + lambda' * R
535 // = D + 2^(QP_delta / 3) * lambda * R
536 //
537 // If we want to keep lambda fixed, we can instead scale distortion and
538 // minimize:
539 //
540 // D * scale + lambda * R
541 //
542 // where:
543 //
544 // scale = 2^(QP_delta / -3)
545 // = (1 + (propagate_cost / intra_cost))^(strength / 3)
546 //
547 // The original paper empirically chooses strength = 2.0, but strength = 1.0
548 // seems to work best in rav1e currently, this may have something to do with
549 // the fact that they use 16x16 blocks whereas our "importance blocks" are
550 // 8x8, but everything should be scale invariant here so that's weird.
551 //
552 // @article{mbtree,
553 // title={A novel macroblock-tree algorithm for high-performance
554 // optimization of dependent video coding in H.264/AVC},
555 // author={Garrett-Glaser, Jason},
556 // journal={Tech. Rep.},
557 // year={2009},
558 // url={https://pdfs.semanticscholar.org/032f/1ab7d9db385780a02eb2d579af8303b266d2.pdf}
559 // }
560
561 if intra_cost == 0. {
562 return DistortionScale::default(); // no scaling
563 }
564
565 let strength = 1.0; // empirical, see comment above
566 let frac = (intra_cost + propagate_cost) / intra_cost;
567 DistortionScale::new(frac.powf(strength / 3.0))
568 }
569
570 /// Fixed point arithmetic version of distortion scale
571 #[repr(transparent)]
572 #[derive(Copy, Clone)]
573 pub struct DistortionScale(pub u32);
574
575 #[repr(transparent)]
576 pub struct RawDistortion(u64);
577
578 #[repr(transparent)]
579 pub struct Distortion(pub u64);
580
581 #[repr(transparent)]
582 pub struct ScaledDistortion(u64);
583
584 impl DistortionScale {
585 /// Bits past the radix point
586 const SHIFT: u32 = 12;
587 /// Number of bits used. Determines the max value.
588 /// 24 bits is likely excessive.
589 const BITS: u32 = 24;
590
591 #[inline]
new(scale: f64) -> Self592 pub fn new(scale: f64) -> Self {
593 Self(
594 (scale * (1 << Self::SHIFT) as f64 + 0.5)
595 .min(((1 << Self::BITS as u64) - 1) as f64) as u32,
596 )
597 }
598
599 /// Multiply, round and shift
600 /// Internal implementation, so don't use multiply trait.
601 #[inline]
mul_u64(self, dist: u64) -> u64602 fn mul_u64(self, dist: u64) -> u64 {
603 (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT
604 }
605 }
606
607 // Default value for DistortionScale is a fixed point 1
608 impl Default for DistortionScale {
609 #[inline]
default() -> Self610 fn default() -> Self {
611 Self(1 << Self::SHIFT)
612 }
613 }
614
615 impl fmt::Debug for DistortionScale {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result616 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
617 write!(f, "{}", f64::from(*self))
618 }
619 }
620
621 impl From<DistortionScale> for f64 {
622 #[inline]
from(scale: DistortionScale) -> Self623 fn from(scale: DistortionScale) -> Self {
624 scale.0 as f64 / (1 << DistortionScale::SHIFT) as f64
625 }
626 }
627
628 impl RawDistortion {
629 #[inline]
new(dist: u64) -> Self630 pub const fn new(dist: u64) -> Self {
631 Self(dist)
632 }
633 }
634
635 impl std::ops::Mul<DistortionScale> for RawDistortion {
636 type Output = Distortion;
637 #[inline]
mul(self, rhs: DistortionScale) -> Distortion638 fn mul(self, rhs: DistortionScale) -> Distortion {
639 Distortion(rhs.mul_u64(self.0))
640 }
641 }
642
643 impl Distortion {
644 #[inline]
zero() -> Self645 pub const fn zero() -> Self {
646 Self(0)
647 }
648 }
649
650 impl std::ops::Mul<f64> for Distortion {
651 type Output = ScaledDistortion;
652 #[inline]
mul(self, rhs: f64) -> ScaledDistortion653 fn mul(self, rhs: f64) -> ScaledDistortion {
654 ScaledDistortion((self.0 as f64 * rhs) as u64)
655 }
656 }
657
658 impl std::ops::AddAssign for Distortion {
659 #[inline]
add_assign(&mut self, other: Self)660 fn add_assign(&mut self, other: Self) {
661 self.0 += other.0;
662 }
663 }
664
665 impl ScaledDistortion {
666 #[inline]
zero() -> Self667 pub const fn zero() -> Self {
668 Self(0)
669 }
670 }
671
672 impl std::ops::AddAssign for ScaledDistortion {
673 #[inline]
add_assign(&mut self, other: Self)674 fn add_assign(&mut self, other: Self) {
675 self.0 += other.0;
676 }
677 }
678
compute_rd_cost<T: Pixel>( fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion, ) -> f64679 pub fn compute_rd_cost<T: Pixel>(
680 fi: &FrameInvariants<T>, rate: u32, distortion: ScaledDistortion,
681 ) -> f64 {
682 let rate_in_bits = (rate as f64) / ((1 << OD_BITRES) as f64);
683 distortion.0 as f64 + fi.lambda * rate_in_bits
684 }
685
rdo_tx_size_type<T: Pixel>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2], skip: bool, ) -> (TxSize, TxType)686 pub fn rdo_tx_size_type<T: Pixel>(
687 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
688 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
689 luma_mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
690 skip: bool,
691 ) -> (TxSize, TxType) {
692 let is_inter = !luma_mode.is_intra();
693 let mut tx_size = max_txsize_rect_lookup[bsize as usize];
694
695 if fi.enable_inter_txfm_split && is_inter && !skip {
696 tx_size = sub_tx_size_map[tx_size as usize]; // Always choose one level split size
697 }
698
699 let mut best_tx_type = TxType::DCT_DCT;
700 let mut best_tx_size = tx_size;
701 let mut best_rd = std::f64::MAX;
702
703 let do_rdo_tx_size =
704 fi.tx_mode_select && fi.config.speed_settings.rdo_tx_decision && !is_inter;
705 let rdo_tx_depth = if do_rdo_tx_size { 2 } else { 0 };
706 let mut cw_checkpoint: Option<ContextWriterCheckpoint> = None;
707
708 for _ in 0..=rdo_tx_depth {
709 let tx_set = get_tx_set(tx_size, is_inter, fi.use_reduced_tx_set);
710
711 let do_rdo_tx_type = tx_set > TxSet::TX_SET_DCTONLY
712 && fi.config.speed_settings.rdo_tx_decision
713 && !is_inter
714 && !skip;
715
716 if !do_rdo_tx_size && !do_rdo_tx_type {
717 return (best_tx_size, best_tx_type);
718 };
719
720 let tx_types =
721 if do_rdo_tx_type { RAV1E_TX_TYPES } else { &[TxType::DCT_DCT] };
722
723 // Luma plane transform type decision
724 let (tx_type, rd_cost) = rdo_tx_type_decision(
725 fi,
726 ts,
727 cw,
728 &mut cw_checkpoint,
729 luma_mode,
730 ref_frames,
731 mvs,
732 bsize,
733 tile_bo,
734 tx_size,
735 tx_set,
736 tx_types,
737 );
738
739 if rd_cost < best_rd {
740 best_tx_size = tx_size;
741 best_tx_type = tx_type;
742 best_rd = rd_cost;
743 }
744
745 debug_assert!(tx_size.width_log2() <= bsize.width_log2());
746 debug_assert!(tx_size.height_log2() <= bsize.height_log2());
747 debug_assert!(
748 tx_size.sqr() <= TxSize::TX_32X32 || tx_type == TxType::DCT_DCT
749 );
750
751 let next_tx_size = sub_tx_size_map[tx_size as usize];
752
753 if next_tx_size == tx_size {
754 break;
755 } else {
756 tx_size = next_tx_size;
757 };
758 }
759
760 (best_tx_size, best_tx_type)
761 }
762
763 #[inline]
dmv_in_range(mv: MotionVector, ref_mv: MotionVector) -> bool764 fn dmv_in_range(mv: MotionVector, ref_mv: MotionVector) -> bool {
765 let diff_row = mv.row as i32 - ref_mv.row as i32;
766 let diff_col = mv.col as i32 - ref_mv.col as i32;
767 diff_row >= MV_LOW
768 && diff_row <= MV_UPP
769 && diff_col >= MV_LOW
770 && diff_col <= MV_UPP
771 }
772
773 #[inline]
luma_chroma_mode_rdo<T: Pixel>( luma_mode: PredictionMode, fi: &FrameInvariants<T>, bsize: BlockSize, tile_bo: TileBlockOffset, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, rdo_type: RDOType, cw_checkpoint: &ContextWriterCheckpoint, best: &mut PartitionParameters, mvs: [MotionVector; 2], ref_frames: [RefType; 2], mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool, mode_context: usize, mv_stack: &ArrayVec<[CandidateMV; 9]>, angle_delta: AngleDelta, )774 fn luma_chroma_mode_rdo<T: Pixel>(
775 luma_mode: PredictionMode, fi: &FrameInvariants<T>, bsize: BlockSize,
776 tile_bo: TileBlockOffset, ts: &mut TileStateMut<'_, T>,
777 cw: &mut ContextWriter, rdo_type: RDOType,
778 cw_checkpoint: &ContextWriterCheckpoint, best: &mut PartitionParameters,
779 mvs: [MotionVector; 2], ref_frames: [RefType; 2],
780 mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool,
781 mode_context: usize, mv_stack: &ArrayVec<[CandidateMV; 9]>,
782 angle_delta: AngleDelta,
783 ) {
784 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
785
786 let is_chroma_block =
787 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
788
789 if !luma_mode_is_intra {
790 let ref_mvs = if mv_stack.is_empty() {
791 [MotionVector::default(); 2]
792 } else {
793 [mv_stack[0].this_mv, mv_stack[0].comp_mv]
794 };
795
796 if (luma_mode == PredictionMode::NEWMV
797 || luma_mode == PredictionMode::NEW_NEWMV
798 || luma_mode == PredictionMode::NEW_NEARESTMV)
799 && !dmv_in_range(mvs[0], ref_mvs[0])
800 {
801 return;
802 }
803
804 if (luma_mode == PredictionMode::NEW_NEWMV
805 || luma_mode == PredictionMode::NEAREST_NEWMV)
806 && !dmv_in_range(mvs[1], ref_mvs[1])
807 {
808 return;
809 }
810 }
811
812 // Find the best chroma prediction mode for the current luma prediction mode
813 let mut chroma_rdo = |skip: bool| -> bool {
814 let mut zero_distortion = false;
815
816 // If skip is true or segmentation is turned off, sidx is not coded.
817 let sidx_range = if skip || !fi.enable_segmentation {
818 0..=0
819 } else if fi.base_q_idx as i16
820 + ts.segmentation.data[2][SegLvl::SEG_LVL_ALT_Q as usize]
821 < 1
822 {
823 0..=1
824 } else {
825 0..=2
826 };
827
828 for sidx in sidx_range {
829 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, sidx);
830
831 let (tx_size, tx_type) = rdo_tx_size_type(
832 fi, ts, cw, bsize, tile_bo, luma_mode, ref_frames, mvs, skip,
833 );
834 for &chroma_mode in mode_set_chroma.iter() {
835 let wr = &mut WriterCounter::new();
836 let tell = wr.tell_frac();
837
838 if bsize >= BlockSize::BLOCK_8X8 && bsize.is_sqr() {
839 cw.write_partition(
840 wr,
841 tile_bo,
842 PartitionType::PARTITION_NONE,
843 bsize,
844 );
845 }
846
847 // TODO(yushin): luma and chroma would have different decision based on chroma format
848 let need_recon_pixel =
849 luma_mode_is_intra && tx_size.block_size() != bsize;
850
851 encode_block_pre_cdef(&fi.sequence, ts, cw, wr, bsize, tile_bo, skip);
852 let (has_coeff, tx_dist) = encode_block_post_cdef(
853 fi,
854 ts,
855 cw,
856 wr,
857 luma_mode,
858 chroma_mode,
859 angle_delta,
860 ref_frames,
861 mvs,
862 bsize,
863 tile_bo,
864 skip,
865 CFLParams::default(),
866 tx_size,
867 tx_type,
868 mode_context,
869 mv_stack,
870 rdo_type,
871 need_recon_pixel,
872 false,
873 );
874
875 let rate = wr.tell_frac() - tell;
876 let distortion = if fi.use_tx_domain_distortion && !need_recon_pixel {
877 compute_tx_distortion(
878 fi,
879 ts,
880 bsize,
881 is_chroma_block,
882 tile_bo,
883 tx_dist,
884 skip,
885 false,
886 )
887 } else {
888 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false)
889 };
890 let is_zero_dist = distortion.0 == 0;
891 let rd = compute_rd_cost(fi, rate, distortion);
892 if rd < best.rd_cost {
893 //if rd < best.rd_cost || luma_mode == PredictionMode::NEW_NEWMV {
894 best.rd_cost = rd;
895 best.pred_mode_luma = luma_mode;
896 best.pred_mode_chroma = chroma_mode;
897 best.angle_delta = angle_delta;
898 best.ref_frames = ref_frames;
899 best.mvs = mvs;
900 best.skip = skip;
901 best.has_coeff = has_coeff;
902 best.tx_size = tx_size;
903 best.tx_type = tx_type;
904 best.sidx = sidx;
905 zero_distortion = is_zero_dist;
906 }
907
908 cw.rollback(cw_checkpoint);
909 }
910 }
911
912 zero_distortion
913 };
914
915 // Don't skip when using intra modes
916 let zero_distortion =
917 if !luma_mode_is_intra { chroma_rdo(true) } else { false };
918 // early skip
919 if !zero_distortion {
920 chroma_rdo(false);
921 }
922 }
923
924 // RDO-based mode decision
rdo_mode_decision<T: Pixel>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig, ) -> PartitionParameters925 pub fn rdo_mode_decision<T: Pixel>(
926 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
927 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
928 inter_cfg: &InterConfig,
929 ) -> PartitionParameters {
930 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
931 let cw_checkpoint = cw.checkpoint();
932
933 let rdo_type = if fi.use_tx_domain_rate {
934 RDOType::TxDistEstRate
935 } else if fi.use_tx_domain_distortion {
936 RDOType::TxDistRealRate
937 } else {
938 RDOType::PixelDistRealRate
939 };
940
941 let mut best = if fi.frame_type.has_inter() {
942 assert!(fi.frame_type != FrameType::KEY);
943
944 inter_frame_rdo_mode_decision(
945 fi,
946 ts,
947 cw,
948 bsize,
949 tile_bo,
950 inter_cfg,
951 &cw_checkpoint,
952 rdo_type,
953 )
954 } else {
955 PartitionParameters::default()
956 };
957
958 let is_chroma_block =
959 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
960
961 if !best.skip {
962 best = intra_frame_rdo_mode_decision(
963 fi,
964 ts,
965 cw,
966 bsize,
967 tile_bo,
968 &cw_checkpoint,
969 rdo_type,
970 best,
971 is_chroma_block,
972 );
973 }
974
975 if best.pred_mode_luma.is_intra() && is_chroma_block && bsize.cfl_allowed() {
976 cw.bc.blocks.set_segmentation_idx(tile_bo, bsize, best.sidx);
977
978 let chroma_mode = PredictionMode::UV_CFL_PRED;
979 let cw_checkpoint = cw.checkpoint();
980 let wr: &mut dyn Writer = &mut WriterCounter::new();
981 let angle_delta = AngleDelta { y: best.angle_delta.y, uv: 0 };
982
983 write_tx_blocks(
984 fi,
985 ts,
986 cw,
987 wr,
988 best.pred_mode_luma,
989 best.pred_mode_luma,
990 angle_delta,
991 tile_bo,
992 bsize,
993 best.tx_size,
994 best.tx_type,
995 false,
996 CFLParams::default(),
997 true,
998 rdo_type,
999 true,
1000 );
1001 cw.rollback(&cw_checkpoint);
1002 if fi.sequence.chroma_sampling != ChromaSampling::Cs400 {
1003 if let Some(cfl) = rdo_cfl_alpha(ts, tile_bo, bsize, best.tx_size, fi) {
1004 let wr: &mut dyn Writer = &mut WriterCounter::new();
1005 let tell = wr.tell_frac();
1006
1007 encode_block_pre_cdef(
1008 &fi.sequence,
1009 ts,
1010 cw,
1011 wr,
1012 bsize,
1013 tile_bo,
1014 best.skip,
1015 );
1016 let (has_coeff, _) = encode_block_post_cdef(
1017 fi,
1018 ts,
1019 cw,
1020 wr,
1021 best.pred_mode_luma,
1022 chroma_mode,
1023 angle_delta,
1024 best.ref_frames,
1025 best.mvs,
1026 bsize,
1027 tile_bo,
1028 best.skip,
1029 cfl,
1030 best.tx_size,
1031 best.tx_type,
1032 0,
1033 &[],
1034 rdo_type,
1035 true, // For CFL, luma should be always reconstructed.
1036 false,
1037 );
1038
1039 let rate = wr.tell_frac() - tell;
1040
1041 // For CFL, tx-domain distortion is not an option.
1042 let distortion =
1043 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, false);
1044 let rd = compute_rd_cost(fi, rate, distortion);
1045 if rd < best.rd_cost {
1046 best.rd_cost = rd;
1047 best.pred_mode_chroma = chroma_mode;
1048 best.angle_delta = angle_delta;
1049 best.has_coeff = has_coeff;
1050 best.pred_cfl_params = cfl;
1051 }
1052
1053 cw.rollback(&cw_checkpoint);
1054 }
1055 }
1056 }
1057
1058 cw.bc.blocks.set_mode(tile_bo, bsize, best.pred_mode_luma);
1059 cw.bc.blocks.set_ref_frames(tile_bo, bsize, best.ref_frames);
1060 cw.bc.blocks.set_motion_vectors(tile_bo, bsize, best.mvs);
1061
1062 assert!(best.rd_cost >= 0_f64);
1063
1064 PartitionParameters {
1065 bo: tile_bo,
1066 bsize,
1067 pred_mode_luma: best.pred_mode_luma,
1068 pred_mode_chroma: best.pred_mode_chroma,
1069 pred_cfl_params: best.pred_cfl_params,
1070 angle_delta: best.angle_delta,
1071 ref_frames: best.ref_frames,
1072 mvs: best.mvs,
1073 rd_cost: best.rd_cost,
1074 skip: best.skip,
1075 has_coeff: best.has_coeff,
1076 tx_size: best.tx_size,
1077 tx_type: best.tx_type,
1078 sidx: best.sidx,
1079 }
1080 }
1081
inter_frame_rdo_mode_decision<T: Pixel>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig, cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType, ) -> PartitionParameters1082 fn inter_frame_rdo_mode_decision<T: Pixel>(
1083 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1084 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1085 inter_cfg: &InterConfig, cw_checkpoint: &ContextWriterCheckpoint,
1086 rdo_type: RDOType,
1087 ) -> PartitionParameters {
1088 let mut best = PartitionParameters::default();
1089
1090 // we can never have more than 7 reference frame sets
1091 let mut ref_frames_set = ArrayVec::<[_; 7]>::new();
1092 // again, max of 7 ref slots
1093 let mut ref_slot_set = ArrayVec::<[_; 7]>::new();
1094 // our implementation never returns more than 3 at the moment
1095 let mut mvs_from_me = ArrayVec::<[_; 3]>::new();
1096 let mut fwdref = None;
1097 let mut bwdref = None;
1098
1099 for i in inter_cfg.allowed_ref_frames().iter().copied() {
1100 // Don't search LAST3 since it's used only for probs
1101 if i == LAST3_FRAME {
1102 continue;
1103 }
1104
1105 if !ref_slot_set.contains(&fi.ref_frames[i.to_index()]) {
1106 if fwdref == None && i.is_fwd_ref() {
1107 fwdref = Some(ref_frames_set.len());
1108 }
1109 if bwdref == None && i.is_bwd_ref() {
1110 bwdref = Some(ref_frames_set.len());
1111 }
1112 ref_frames_set.push([i, NONE_FRAME]);
1113 let slot_idx = fi.ref_frames[i.to_index()];
1114 ref_slot_set.push(slot_idx);
1115 }
1116 }
1117 assert!(!ref_frames_set.is_empty());
1118
1119 let mut inter_mode_set = ArrayVec::<[(PredictionMode, usize); 20]>::new();
1120 let mut mvs_set = ArrayVec::<[[MotionVector; 2]; 20]>::new();
1121 let mut satds = ArrayVec::<[u32; 20]>::new();
1122 let mut mv_stacks = ArrayVec::<[_; 20]>::new();
1123 let mut mode_contexts = ArrayVec::<[_; 7]>::new();
1124
1125 for (i, &ref_frames) in ref_frames_set.iter().enumerate() {
1126 let mut mv_stack = ArrayVec::<[CandidateMV; 9]>::new();
1127 mode_contexts.push(cw.find_mvrefs(
1128 tile_bo,
1129 ref_frames,
1130 &mut mv_stack,
1131 bsize,
1132 fi,
1133 false,
1134 ));
1135
1136 let mut pmv = [MotionVector::default(); 2];
1137 if !mv_stack.is_empty() {
1138 pmv[0] = mv_stack[0].this_mv;
1139 }
1140 if mv_stack.len() > 1 {
1141 pmv[1] = mv_stack[1].this_mv;
1142 }
1143
1144 let res = motion_estimation(fi, ts, bsize, tile_bo, ref_frames[0], pmv);
1145 let b_me = res.0;
1146
1147 mvs_from_me.push([b_me, MotionVector::default()]);
1148
1149 for &x in RAV1E_INTER_MODES_MINIMAL {
1150 inter_mode_set.push((x, i));
1151 }
1152 if !mv_stack.is_empty() {
1153 inter_mode_set.push((PredictionMode::NEAR0MV, i));
1154 }
1155 if mv_stack.len() >= 2 {
1156 inter_mode_set.push((PredictionMode::GLOBALMV, i));
1157 }
1158 let include_near_mvs = fi.config.speed_settings.include_near_mvs;
1159 if include_near_mvs {
1160 if mv_stack.len() >= 3 {
1161 inter_mode_set.push((PredictionMode::NEAR1MV, i));
1162 }
1163 if mv_stack.len() >= 4 {
1164 inter_mode_set.push((PredictionMode::NEAR2MV, i));
1165 }
1166 }
1167 let same_row_col = |x: &CandidateMV| {
1168 x.this_mv.row == mvs_from_me[i][0].row
1169 && x.this_mv.col == mvs_from_me[i][0].col
1170 };
1171 if !mv_stack
1172 .iter()
1173 .take(if include_near_mvs { 4 } else { 2 })
1174 .any(same_row_col)
1175 && (mvs_from_me[i][0].row != 0 || mvs_from_me[i][0].col != 0)
1176 {
1177 inter_mode_set.push((PredictionMode::NEWMV, i));
1178 }
1179
1180 mv_stacks.push(mv_stack);
1181 }
1182
1183 let sz = bsize.width_mi().min(bsize.height_mi());
1184
1185 // To use non single reference modes, block width and height must be greater than 4.
1186 if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 {
1187 // Adding compound candidate
1188 if let Some(r0) = fwdref {
1189 if let Some(r1) = bwdref {
1190 let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]];
1191 ref_frames_set.push(ref_frames);
1192 let mv0 = mvs_from_me[r0][0];
1193 let mv1 = mvs_from_me[r1][0];
1194 mvs_from_me.push([mv0, mv1]);
1195 let mut mv_stack = ArrayVec::<[CandidateMV; 9]>::new();
1196 mode_contexts.push(cw.find_mvrefs(
1197 tile_bo,
1198 ref_frames,
1199 &mut mv_stack,
1200 bsize,
1201 fi,
1202 true,
1203 ));
1204 for &x in RAV1E_INTER_COMPOUND_MODES {
1205 // exclude any NEAR mode based on speed setting
1206 if fi.config.speed_settings.include_near_mvs || !x.has_nearmv() {
1207 let mv_stack_idx = ref_frames_set.len() - 1;
1208 // exclude NEAR modes if the mv_stack is too short
1209 if !(x.has_nearmv() && x.ref_mv_idx() >= mv_stack.len()) {
1210 inter_mode_set.push((x, mv_stack_idx));
1211 }
1212 }
1213 }
1214 mv_stacks.push(mv_stack);
1215 }
1216 }
1217 }
1218
1219 let num_modes_rdo = if fi.config.speed_settings.prediction_modes
1220 >= PredictionModesSetting::ComplexAll
1221 {
1222 inter_mode_set.len()
1223 } else {
1224 9 // This number is determined by AWCY test
1225 };
1226
1227 inter_mode_set.iter().for_each(|&(luma_mode, i)| {
1228 let mvs = match luma_mode {
1229 PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i],
1230 PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => {
1231 if !mv_stacks[i].is_empty() {
1232 [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv]
1233 } else {
1234 [MotionVector::default(); 2]
1235 }
1236 }
1237 PredictionMode::NEAR0MV | PredictionMode::NEAR_NEAR0MV => {
1238 if mv_stacks[i].len() > 1 {
1239 [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv]
1240 } else {
1241 [MotionVector::default(); 2]
1242 }
1243 }
1244 PredictionMode::NEAR1MV
1245 | PredictionMode::NEAR2MV
1246 | PredictionMode::NEAR_NEAR1MV
1247 | PredictionMode::NEAR_NEAR2MV => [
1248 mv_stacks[i][luma_mode.ref_mv_idx()].this_mv,
1249 mv_stacks[i][luma_mode.ref_mv_idx()].comp_mv,
1250 ],
1251 PredictionMode::NEAREST_NEWMV => {
1252 [mv_stacks[i][0].this_mv, mvs_from_me[i][1]]
1253 }
1254 PredictionMode::NEW_NEARESTMV => {
1255 [mvs_from_me[i][0], mv_stacks[i][0].comp_mv]
1256 }
1257 PredictionMode::GLOBALMV | PredictionMode::GLOBAL_GLOBALMV => {
1258 [MotionVector::default(); 2]
1259 }
1260 _ => {
1261 unimplemented!();
1262 }
1263 };
1264 mvs_set.push(mvs);
1265
1266 // Calculate SATD for each mode
1267 if num_modes_rdo != inter_mode_set.len() {
1268 let tile_rect = ts.tile_rect();
1269 let rec = &mut ts.rec.planes[0];
1270 let po = tile_bo.plane_offset(rec.plane_cfg);
1271 let mut rec_region =
1272 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1273
1274 luma_mode.predict_inter(
1275 fi,
1276 tile_rect,
1277 0,
1278 po,
1279 &mut rec_region,
1280 bsize.width(),
1281 bsize.height(),
1282 ref_frames_set[i],
1283 mvs,
1284 &mut ts.inter_compound_buffers,
1285 );
1286
1287 let plane_org = ts.input_tile.planes[0]
1288 .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1289 let plane_ref = rec_region.as_const();
1290
1291 let satd = get_satd(
1292 &plane_org,
1293 &plane_ref,
1294 bsize,
1295 fi.sequence.bit_depth,
1296 fi.cpu_feature_level,
1297 );
1298 satds.push(satd);
1299 } else {
1300 satds.push(0);
1301 }
1302 });
1303
1304 let mut sorted =
1305 izip!(inter_mode_set, mvs_set, satds).collect::<ArrayVec<[_; 20]>>();
1306 if num_modes_rdo != sorted.len() {
1307 sorted.sort_by_key(|((_mode, _i), _mvs, satd)| *satd);
1308 }
1309
1310 sorted.iter().take(num_modes_rdo).for_each(
1311 |&((luma_mode, i), mvs, _satd)| {
1312 let mode_set_chroma = ArrayVec::from([luma_mode]);
1313
1314 luma_chroma_mode_rdo(
1315 luma_mode,
1316 fi,
1317 bsize,
1318 tile_bo,
1319 ts,
1320 cw,
1321 rdo_type,
1322 cw_checkpoint,
1323 &mut best,
1324 mvs,
1325 ref_frames_set[i],
1326 &mode_set_chroma,
1327 false,
1328 mode_contexts[i],
1329 &mv_stacks[i],
1330 AngleDelta::default(),
1331 );
1332 },
1333 );
1334
1335 best
1336 }
1337
intra_frame_rdo_mode_decision<T: Pixel>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType, mut best: PartitionParameters, is_chroma_block: bool, ) -> PartitionParameters1338 fn intra_frame_rdo_mode_decision<T: Pixel>(
1339 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1340 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1341 cw_checkpoint: &ContextWriterCheckpoint, rdo_type: RDOType,
1342 mut best: PartitionParameters, is_chroma_block: bool,
1343 ) -> PartitionParameters {
1344 let num_modes_rdo: usize;
1345 let mut modes = ArrayVec::<[_; INTRA_MODES]>::new();
1346
1347 // Reduce number of prediction modes at higher speed levels
1348 num_modes_rdo = if (fi.frame_type == FrameType::KEY
1349 && fi.config.speed_settings.prediction_modes
1350 >= PredictionModesSetting::ComplexKeyframes)
1351 || (fi.frame_type.has_inter()
1352 && fi.config.speed_settings.prediction_modes
1353 >= PredictionModesSetting::ComplexAll)
1354 {
1355 7
1356 } else {
1357 3
1358 };
1359
1360 let intra_mode_set = RAV1E_INTRA_MODES;
1361
1362 // Find mode with lowest rate cost
1363 {
1364 let probs_all = if fi.frame_type.has_inter() {
1365 cw.get_cdf_intra_mode(bsize)
1366 } else {
1367 cw.get_cdf_intra_mode_kf(tile_bo)
1368 }
1369 .iter()
1370 .take(INTRA_MODES)
1371 .scan(32768, |z, &a| {
1372 let d = *z - a;
1373 *z = a;
1374 Some(!d)
1375 })
1376 .collect::<ArrayVec<[_; INTRA_MODES]>>();
1377
1378 modes.try_extend_from_slice(intra_mode_set).unwrap();
1379 modes.sort_by_key(|&a| probs_all[a as usize]);
1380 }
1381
1382 // If tx partition (i.e. fi.tx_mode_select) is enabled, the below intra prediction screening
1383 // may be improved by emulating prediction for each tx block.
1384 {
1385 let satds = {
1386 // FIXME: If tx partition is used, this whole sads block should be fixed
1387 let tx_size = bsize.tx_size();
1388 let edge_buf = {
1389 let rec = &ts.rec.planes[0].as_const();
1390 let po = tile_bo.plane_offset(rec.plane_cfg);
1391 // FIXME: If tx partition is used, get_intra_edges() should be called for each tx block
1392 get_intra_edges(
1393 rec,
1394 tile_bo,
1395 0,
1396 0,
1397 bsize,
1398 po,
1399 tx_size,
1400 fi.sequence.bit_depth,
1401 None,
1402 fi.sequence.enable_intra_edge_filter,
1403 IntraParam::None,
1404 )
1405 };
1406
1407 let ief_params = if fi.sequence.enable_intra_edge_filter {
1408 let above_block_info = ts.above_block_info(tile_bo, 0, 0);
1409 let left_block_info = ts.left_block_info(tile_bo, 0, 0);
1410 Some(IntraEdgeFilterParameters::new(
1411 0,
1412 above_block_info,
1413 left_block_info,
1414 ))
1415 } else {
1416 None
1417 };
1418
1419 let mut satds_all = [0; INTRA_MODES];
1420 for &luma_mode in modes.iter().skip(num_modes_rdo / 2) {
1421 let tile_rect = ts.tile_rect();
1422 let rec = &mut ts.rec.planes[0];
1423 let mut rec_region =
1424 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1425 // FIXME: If tx partition is used, luma_mode.predict_intra() should be called for each tx block
1426 luma_mode.predict_intra(
1427 tile_rect,
1428 &mut rec_region,
1429 tx_size,
1430 fi.sequence.bit_depth,
1431 &[0i16; 2],
1432 IntraParam::None,
1433 if luma_mode.is_directional() { ief_params } else { None },
1434 &edge_buf,
1435 fi.cpu_feature_level,
1436 );
1437
1438 let plane_org = ts.input_tile.planes[0]
1439 .subregion(Area::BlockStartingAt { bo: tile_bo.0 });
1440 let plane_ref = rec_region.as_const();
1441
1442 satds_all[luma_mode as usize] = get_satd(
1443 &plane_org,
1444 &plane_ref,
1445 tx_size.block_size(),
1446 fi.sequence.bit_depth,
1447 fi.cpu_feature_level,
1448 );
1449 }
1450 satds_all
1451 };
1452
1453 modes[num_modes_rdo / 2..].sort_by_key(|&a| satds[a as usize]);
1454 }
1455
1456 debug_assert!(num_modes_rdo >= 1);
1457
1458 modes.iter().take(num_modes_rdo).for_each(|&luma_mode| {
1459 let mvs = [MotionVector::default(); 2];
1460 let ref_frames = [INTRA_FRAME, NONE_FRAME];
1461 let mut mode_set_chroma = ArrayVec::<[_; 2]>::new();
1462 mode_set_chroma.push(luma_mode);
1463 if is_chroma_block && luma_mode != PredictionMode::DC_PRED {
1464 mode_set_chroma.push(PredictionMode::DC_PRED);
1465 }
1466 luma_chroma_mode_rdo(
1467 luma_mode,
1468 fi,
1469 bsize,
1470 tile_bo,
1471 ts,
1472 cw,
1473 rdo_type,
1474 cw_checkpoint,
1475 &mut best,
1476 mvs,
1477 ref_frames,
1478 &mode_set_chroma,
1479 true,
1480 0,
1481 &ArrayVec::<[CandidateMV; 9]>::new(),
1482 AngleDelta::default(),
1483 );
1484 });
1485
1486 if fi.config.speed_settings.fine_directional_intra
1487 && bsize >= BlockSize::BLOCK_8X8
1488 {
1489 // Find the best angle delta for the current best prediction mode
1490 let luma_deltas = best.pred_mode_luma.angle_delta_count();
1491 let chroma_deltas = best.pred_mode_chroma.angle_delta_count();
1492
1493 let mvs = [MotionVector::default(); 2];
1494 let ref_frames = [INTRA_FRAME, NONE_FRAME];
1495 let mode_set_chroma = [best.pred_mode_chroma];
1496 let mv_stack = ArrayVec::<[_; 9]>::new();
1497 let mut best_angle_delta = best.angle_delta;
1498 let mut angle_delta_rdo = |y, uv| -> AngleDelta {
1499 if best.angle_delta.y != y || best.angle_delta.uv != uv {
1500 luma_chroma_mode_rdo(
1501 best.pred_mode_luma,
1502 fi,
1503 bsize,
1504 tile_bo,
1505 ts,
1506 cw,
1507 rdo_type,
1508 cw_checkpoint,
1509 &mut best,
1510 mvs,
1511 ref_frames,
1512 &mode_set_chroma,
1513 true,
1514 0,
1515 &mv_stack,
1516 AngleDelta { y, uv },
1517 );
1518 }
1519 best.angle_delta
1520 };
1521
1522 for i in 0..luma_deltas {
1523 let angle_delta_y =
1524 if luma_deltas == 1 { 0 } else { i - MAX_ANGLE_DELTA as i8 };
1525 best_angle_delta = angle_delta_rdo(angle_delta_y, best_angle_delta.uv);
1526 }
1527 for j in 0..chroma_deltas {
1528 let angle_delta_uv =
1529 if chroma_deltas == 1 { 0 } else { j - MAX_ANGLE_DELTA as i8 };
1530 best_angle_delta = angle_delta_rdo(best_angle_delta.y, angle_delta_uv);
1531 }
1532 }
1533
1534 best
1535 }
1536
rdo_cfl_alpha<T: Pixel>( ts: &mut TileStateMut<'_, T>, tile_bo: TileBlockOffset, bsize: BlockSize, luma_tx_size: TxSize, fi: &FrameInvariants<T>, ) -> Option<CFLParams>1537 pub fn rdo_cfl_alpha<T: Pixel>(
1538 ts: &mut TileStateMut<'_, T>, tile_bo: TileBlockOffset, bsize: BlockSize,
1539 luma_tx_size: TxSize, fi: &FrameInvariants<T>,
1540 ) -> Option<CFLParams> {
1541 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1542 let uv_tx_size = bsize.largest_chroma_tx_size(xdec, ydec);
1543 debug_assert!(bsize.subsampled_size(xdec, ydec) == uv_tx_size.block_size());
1544
1545 let frame_bo = ts.to_frame_block_offset(tile_bo);
1546 let (visible_tx_w, visible_tx_h) = clip_visible_bsize(
1547 (fi.width + xdec) >> xdec,
1548 (fi.height + ydec) >> ydec,
1549 uv_tx_size.block_size(),
1550 (frame_bo.0.x << MI_SIZE_LOG2) >> xdec,
1551 (frame_bo.0.y << MI_SIZE_LOG2) >> ydec,
1552 );
1553
1554 if visible_tx_w == 0 || visible_tx_h == 0 {
1555 return None;
1556 };
1557 let mut ac: Aligned<[i16; 32 * 32]> = Aligned::uninitialized();
1558 luma_ac(&mut ac.data, ts, tile_bo, bsize, luma_tx_size, fi);
1559 let best_alpha: ArrayVec<[i16; 2]> = (1..3)
1560 .map(|p| {
1561 let &PlaneConfig { xdec, ydec, .. } = ts.rec.planes[p].plane_cfg;
1562 let tile_rect = ts.tile_rect().decimated(xdec, ydec);
1563 let rec = &mut ts.rec.planes[p];
1564 let input = &ts.input_tile.planes[p];
1565 let po = tile_bo.plane_offset(rec.plane_cfg);
1566 let edge_buf = get_intra_edges(
1567 &rec.as_const(),
1568 tile_bo,
1569 0,
1570 0,
1571 bsize,
1572 po,
1573 uv_tx_size,
1574 fi.sequence.bit_depth,
1575 Some(PredictionMode::UV_CFL_PRED),
1576 fi.sequence.enable_intra_edge_filter,
1577 IntraParam::None,
1578 );
1579 let mut alpha_cost = |alpha: i16| -> u64 {
1580 let mut rec_region =
1581 rec.subregion_mut(Area::BlockStartingAt { bo: tile_bo.0 });
1582 PredictionMode::UV_CFL_PRED.predict_intra(
1583 tile_rect,
1584 &mut rec_region,
1585 uv_tx_size,
1586 fi.sequence.bit_depth,
1587 &ac.data,
1588 IntraParam::Alpha(alpha),
1589 None,
1590 &edge_buf,
1591 fi.cpu_feature_level,
1592 );
1593 sse_wxh(
1594 &input.subregion(Area::BlockStartingAt { bo: tile_bo.0 }),
1595 &rec_region.as_const(),
1596 visible_tx_w,
1597 visible_tx_h,
1598 |_, _| DistortionScale::default(), // We're not doing RDO here.
1599 fi.sequence.bit_depth,
1600 fi.cpu_feature_level,
1601 )
1602 .0
1603 };
1604 let mut best = (alpha_cost(0), 0);
1605 let mut count = 2;
1606 for alpha in 1i16..=16i16 {
1607 let cost = (alpha_cost(alpha), alpha_cost(-alpha));
1608 if cost.0 < best.0 {
1609 best = (cost.0, alpha);
1610 count += 2;
1611 }
1612 if cost.1 < best.0 {
1613 best = (cost.1, -alpha);
1614 count += 2;
1615 }
1616 if count < alpha {
1617 break;
1618 }
1619 }
1620 best.1
1621 })
1622 .collect();
1623
1624 if best_alpha[0] == 0 && best_alpha[1] == 0 {
1625 None
1626 } else {
1627 Some(CFLParams::from_alpha(best_alpha[0], best_alpha[1]))
1628 }
1629 }
1630
1631 /// RDO-based transform type decision
1632 /// If cw_checkpoint is None, a checkpoint for cw's (ContextWriter) current
1633 /// state is created and stored for later use.
rdo_tx_type_decision<T: Pixel>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, cw_checkpoint: &mut Option<ContextWriterCheckpoint>, mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2], bsize: BlockSize, tile_bo: TileBlockOffset, tx_size: TxSize, tx_set: TxSet, tx_types: &[TxType], ) -> (TxType, f64)1634 pub fn rdo_tx_type_decision<T: Pixel>(
1635 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1636 cw: &mut ContextWriter, cw_checkpoint: &mut Option<ContextWriterCheckpoint>,
1637 mode: PredictionMode, ref_frames: [RefType; 2], mvs: [MotionVector; 2],
1638 bsize: BlockSize, tile_bo: TileBlockOffset, tx_size: TxSize, tx_set: TxSet,
1639 tx_types: &[TxType],
1640 ) -> (TxType, f64) {
1641 let mut best_type = TxType::DCT_DCT;
1642 let mut best_rd = std::f64::MAX;
1643
1644 let PlaneConfig { xdec, ydec, .. } = ts.input.planes[1].cfg;
1645 let is_chroma_block =
1646 has_chroma(tile_bo, bsize, xdec, ydec, fi.sequence.chroma_sampling);
1647
1648 let is_inter = !mode.is_intra();
1649
1650 if cw_checkpoint.is_none() {
1651 // Only run the first call
1652 // Prevents creating multiple checkpoints for own version of cw
1653 *cw_checkpoint = Some(cw.checkpoint());
1654 }
1655
1656 let rdo_type = if fi.use_tx_domain_distortion {
1657 RDOType::TxDistRealRate
1658 } else {
1659 RDOType::PixelDistRealRate
1660 };
1661 let need_recon_pixel = tx_size.block_size() != bsize && !is_inter;
1662
1663 for &tx_type in tx_types {
1664 // Skip unsupported transform types
1665 if av1_tx_used[tx_set as usize][tx_type as usize] == 0 {
1666 continue;
1667 }
1668
1669 if is_inter {
1670 motion_compensate(
1671 fi, ts, cw, mode, ref_frames, mvs, bsize, tile_bo, true,
1672 );
1673 }
1674
1675 let wr: &mut dyn Writer = &mut WriterCounter::new();
1676 let tell = wr.tell_frac();
1677 let (_, tx_dist) = if is_inter {
1678 write_tx_tree(
1679 fi,
1680 ts,
1681 cw,
1682 wr,
1683 mode,
1684 0,
1685 tile_bo,
1686 bsize,
1687 tx_size,
1688 tx_type,
1689 false,
1690 true,
1691 rdo_type,
1692 need_recon_pixel,
1693 )
1694 } else {
1695 write_tx_blocks(
1696 fi,
1697 ts,
1698 cw,
1699 wr,
1700 mode,
1701 mode,
1702 AngleDelta::default(),
1703 tile_bo,
1704 bsize,
1705 tx_size,
1706 tx_type,
1707 false,
1708 CFLParams::default(), // Unused.
1709 true,
1710 rdo_type,
1711 need_recon_pixel,
1712 )
1713 };
1714
1715 let rate = wr.tell_frac() - tell;
1716 let distortion = if fi.use_tx_domain_distortion {
1717 compute_tx_distortion(
1718 fi,
1719 ts,
1720 bsize,
1721 is_chroma_block,
1722 tile_bo,
1723 tx_dist,
1724 false,
1725 true,
1726 )
1727 } else {
1728 compute_distortion(fi, ts, bsize, is_chroma_block, tile_bo, true)
1729 };
1730 let rd = compute_rd_cost(fi, rate, distortion);
1731 if rd < best_rd {
1732 best_rd = rd;
1733 best_type = tx_type;
1734 }
1735
1736 cw.rollback(cw_checkpoint.as_ref().unwrap());
1737 }
1738
1739 assert!(best_rd >= 0_f64);
1740
1741 (best_type, best_rd)
1742 }
1743
get_sub_partitions( four_partitions: &[TileBlockOffset; 4], partition: PartitionType, ) -> ArrayVec<[TileBlockOffset; 4]>1744 pub fn get_sub_partitions(
1745 four_partitions: &[TileBlockOffset; 4], partition: PartitionType,
1746 ) -> ArrayVec<[TileBlockOffset; 4]> {
1747 let mut partition_offsets = ArrayVec::<[TileBlockOffset; 4]>::new();
1748
1749 partition_offsets.push(four_partitions[0]);
1750
1751 if partition == PARTITION_NONE {
1752 return partition_offsets;
1753 }
1754 if partition == PARTITION_VERT || partition == PARTITION_SPLIT {
1755 partition_offsets.push(four_partitions[1]);
1756 };
1757 if partition == PARTITION_HORZ || partition == PARTITION_SPLIT {
1758 partition_offsets.push(four_partitions[2]);
1759 };
1760 if partition == PARTITION_SPLIT {
1761 partition_offsets.push(four_partitions[3]);
1762 };
1763
1764 partition_offsets
1765 }
1766
1767 #[inline(always)]
rdo_partition_none<T: Pixel>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig, child_modes: &mut ArrayVec<[PartitionParameters; 4]>, ) -> f641768 fn rdo_partition_none<T: Pixel>(
1769 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1770 cw: &mut ContextWriter, bsize: BlockSize, tile_bo: TileBlockOffset,
1771 inter_cfg: &InterConfig,
1772 child_modes: &mut ArrayVec<[PartitionParameters; 4]>,
1773 ) -> f64 {
1774 debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1775
1776 let mode = rdo_mode_decision(fi, ts, cw, bsize, tile_bo, inter_cfg);
1777 let cost = mode.rd_cost;
1778
1779 child_modes.push(mode);
1780
1781 cost
1782 }
1783
1784 // VERTICAL, HORIZONTAL or simple SPLIT
1785 #[inline(always)]
rdo_partition_simple<T: Pixel, W: Writer>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig, partition: PartitionType, rdo_type: RDOType, best_rd: f64, child_modes: &mut ArrayVec<[PartitionParameters; 4]>, ) -> Option<f64>1786 fn rdo_partition_simple<T: Pixel, W: Writer>(
1787 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1788 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1789 bsize: BlockSize, tile_bo: TileBlockOffset, inter_cfg: &InterConfig,
1790 partition: PartitionType, rdo_type: RDOType, best_rd: f64,
1791 child_modes: &mut ArrayVec<[PartitionParameters; 4]>,
1792 ) -> Option<f64> {
1793 debug_assert!(tile_bo.0.x < ts.mi_width && tile_bo.0.y < ts.mi_height);
1794 let subsize = bsize.subsize(partition);
1795
1796 debug_assert!(subsize != BlockSize::BLOCK_INVALID);
1797
1798 let cost = if bsize >= BlockSize::BLOCK_8X8 {
1799 let w: &mut W = if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1800 let tell = w.tell_frac();
1801 cw.write_partition(w, tile_bo, partition, bsize);
1802 compute_rd_cost(fi, w.tell_frac() - tell, ScaledDistortion::zero())
1803 } else {
1804 0.0
1805 };
1806
1807 let hbsw = subsize.width_mi(); // Half the block size width in blocks
1808 let hbsh = subsize.height_mi(); // Half the block size height in blocks
1809 let four_partitions = [
1810 tile_bo,
1811 TileBlockOffset(BlockOffset {
1812 x: tile_bo.0.x + hbsw as usize,
1813 y: tile_bo.0.y,
1814 }),
1815 TileBlockOffset(BlockOffset {
1816 x: tile_bo.0.x,
1817 y: tile_bo.0.y + hbsh as usize,
1818 }),
1819 TileBlockOffset(BlockOffset {
1820 x: tile_bo.0.x + hbsw as usize,
1821 y: tile_bo.0.y + hbsh as usize,
1822 }),
1823 ];
1824
1825 let partitions = get_sub_partitions(&four_partitions, partition);
1826
1827 let mut rd_cost_sum = 0.0;
1828
1829 for offset in partitions {
1830 let hbs = subsize.width_mi() >> 1;
1831 let has_cols = offset.0.x + hbs < ts.mi_width;
1832 let has_rows = offset.0.y + hbs < ts.mi_height;
1833
1834 if has_cols && has_rows {
1835 let mode_decision =
1836 rdo_mode_decision(fi, ts, cw, subsize, offset, inter_cfg);
1837
1838 rd_cost_sum += mode_decision.rd_cost;
1839
1840 if fi.enable_early_exit && rd_cost_sum > best_rd {
1841 return None;
1842 }
1843 if subsize >= BlockSize::BLOCK_8X8 && subsize.is_sqr() {
1844 let w: &mut W =
1845 if cw.bc.cdef_coded { w_post_cdef } else { w_pre_cdef };
1846 cw.write_partition(w, offset, PartitionType::PARTITION_NONE, subsize);
1847 }
1848 encode_block_with_modes(
1849 fi,
1850 ts,
1851 cw,
1852 w_pre_cdef,
1853 w_post_cdef,
1854 subsize,
1855 offset,
1856 &mode_decision,
1857 rdo_type,
1858 false,
1859 );
1860 child_modes.push(mode_decision);
1861 } else {
1862 //rd_cost_sum += std::f64::MAX;
1863 return None;
1864 }
1865 }
1866
1867 Some(cost + rd_cost_sum)
1868 }
1869
1870 // RDO-based single level partitioning decision
rdo_partition_decision<T: Pixel, W: Writer>( fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W, bsize: BlockSize, tile_bo: TileBlockOffset, cached_block: &PartitionGroupParameters, partition_types: &[PartitionType], rdo_type: RDOType, inter_cfg: &InterConfig, ) -> PartitionGroupParameters1871 pub fn rdo_partition_decision<T: Pixel, W: Writer>(
1872 fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>,
1873 cw: &mut ContextWriter, w_pre_cdef: &mut W, w_post_cdef: &mut W,
1874 bsize: BlockSize, tile_bo: TileBlockOffset,
1875 cached_block: &PartitionGroupParameters, partition_types: &[PartitionType],
1876 rdo_type: RDOType, inter_cfg: &InterConfig,
1877 ) -> PartitionGroupParameters {
1878 let mut best_partition = cached_block.part_type;
1879 let mut best_rd = cached_block.rd_cost;
1880 let mut best_pred_modes = cached_block.part_modes.clone();
1881
1882 let cw_checkpoint = cw.checkpoint();
1883 let w_pre_checkpoint = w_pre_cdef.checkpoint();
1884 let w_post_checkpoint = w_post_cdef.checkpoint();
1885
1886 for &partition in partition_types {
1887 // Do not re-encode results we already have
1888 if partition == cached_block.part_type {
1889 continue;
1890 }
1891
1892 let mut child_modes = ArrayVec::<[_; 4]>::new();
1893
1894 let cost = match partition {
1895 PARTITION_NONE if bsize <= BlockSize::BLOCK_64X64 => {
1896 Some(rdo_partition_none(
1897 fi,
1898 ts,
1899 cw,
1900 bsize,
1901 tile_bo,
1902 inter_cfg,
1903 &mut child_modes,
1904 ))
1905 }
1906 PARTITION_SPLIT | PARTITION_HORZ | PARTITION_VERT => {
1907 rdo_partition_simple(
1908 fi,
1909 ts,
1910 cw,
1911 w_pre_cdef,
1912 w_post_cdef,
1913 bsize,
1914 tile_bo,
1915 inter_cfg,
1916 partition,
1917 rdo_type,
1918 best_rd,
1919 &mut child_modes,
1920 )
1921 }
1922 _ => {
1923 unreachable!();
1924 }
1925 };
1926
1927 if let Some(rd) = cost {
1928 if rd < best_rd {
1929 best_rd = rd;
1930 best_partition = partition;
1931 best_pred_modes = child_modes.clone();
1932 }
1933 }
1934 cw.rollback(&cw_checkpoint);
1935 w_pre_cdef.rollback(&w_pre_checkpoint);
1936 w_post_cdef.rollback(&w_post_checkpoint);
1937 }
1938
1939 assert!(best_rd >= 0_f64);
1940
1941 PartitionGroupParameters {
1942 rd_cost: best_rd,
1943 part_type: best_partition,
1944 part_modes: best_pred_modes,
1945 }
1946 }
1947
rdo_loop_plane_error<T: Pixel>( base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset, sb_w: usize, sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>, blocks: &TileBlocks<'_>, test: &Frame<T>, src: &Tile<'_, T>, pli: usize, ) -> ScaledDistortion1948 fn rdo_loop_plane_error<T: Pixel>(
1949 base_sbo: TileSuperBlockOffset, offset_sbo: TileSuperBlockOffset,
1950 sb_w: usize, sb_h: usize, fi: &FrameInvariants<T>, ts: &TileStateMut<'_, T>,
1951 blocks: &TileBlocks<'_>, test: &Frame<T>, src: &Tile<'_, T>, pli: usize,
1952 ) -> ScaledDistortion {
1953 let sb_w_blocks =
1954 if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_w;
1955 let sb_h_blocks =
1956 if fi.sequence.use_128x128_superblock { 16 } else { 8 } * sb_h;
1957 // Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
1958 // accumulating in-frame and unpadded
1959 let mut err = Distortion::zero();
1960 for by in 0..sb_h_blocks {
1961 for bx in 0..sb_w_blocks {
1962 let loop_bo = offset_sbo.block_offset(bx << 1, by << 1);
1963 if loop_bo.0.x < blocks.cols() && loop_bo.0.y < blocks.rows() {
1964 let src_plane = &src.planes[pli];
1965 let test_plane = &test.planes[pli];
1966 let PlaneConfig { xdec, ydec, .. } = *src_plane.plane_cfg;
1967 debug_assert_eq!(xdec, test_plane.cfg.xdec);
1968 debug_assert_eq!(ydec, test_plane.cfg.ydec);
1969
1970 // Unfortunately, our distortion biases are only available via
1971 // Frame-absolute addressing, so we need a block offset
1972 // relative to the full frame origin (not the tile or analysis
1973 // area)
1974 let frame_bo = (base_sbo + offset_sbo).block_offset(bx << 1, by << 1);
1975 let bias = distortion_scale(
1976 fi,
1977 ts.to_frame_block_offset(frame_bo),
1978 BlockSize::BLOCK_8X8,
1979 );
1980
1981 let src_region =
1982 src_plane.subregion(Area::BlockStartingAt { bo: loop_bo.0 });
1983 let test_region =
1984 test_plane.region(Area::BlockStartingAt { bo: loop_bo.0 });
1985
1986 err += if pli == 0 {
1987 // For loop filters, We intentionally use cdef_dist even with
1988 // `--tune Psnr`. Using SSE instead gives no PSNR gain but has a
1989 // significant negative impact on other metrics and visual quality.
1990 cdef_dist_wxh_8x8(&src_region, &test_region, fi.sequence.bit_depth)
1991 * bias
1992 } else {
1993 sse_wxh(
1994 &src_region,
1995 &test_region,
1996 8 >> xdec,
1997 8 >> ydec,
1998 |_, _| bias,
1999 fi.sequence.bit_depth,
2000 fi.cpu_feature_level,
2001 )
2002 };
2003 }
2004 }
2005 }
2006 err * fi.dist_scale[pli]
2007 }
2008
2009 // Passed in a superblock offset representing the upper left corner of
2010 // the LRU area we're optimizing. This area covers the largest LRU in
2011 // any of the present planes, but may consist of a number of
2012 // superblocks and full, smaller LRUs in the other planes
rdo_loop_decision<T: Pixel>( base_sbo: TileSuperBlockOffset, fi: &FrameInvariants<T>, ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut dyn Writer, deblock_p: bool, )2013 pub fn rdo_loop_decision<T: Pixel>(
2014 base_sbo: TileSuperBlockOffset, fi: &FrameInvariants<T>,
2015 ts: &mut TileStateMut<'_, T>, cw: &mut ContextWriter, w: &mut dyn Writer,
2016 deblock_p: bool,
2017 ) {
2018 let planes = if fi.sequence.chroma_sampling == ChromaSampling::Cs400 {
2019 1
2020 } else {
2021 MAX_PLANES
2022 };
2023 assert!(fi.sequence.enable_cdef || fi.sequence.enable_restoration);
2024 // Determine area of optimization: Which plane has the largest LRUs?
2025 // How many LRUs for each?
2026 let mut sb_w = 1; // how many superblocks wide the largest LRU
2027 // is/how many SBs we're processing (same thing)
2028 let mut sb_h = 1; // how many superblocks wide the largest LRU
2029 // is/how many SBs we're processing (same thing)
2030 let mut lru_w = [0; MAX_PLANES]; // how many LRUs we're processing
2031 let mut lru_h = [0; MAX_PLANES]; // how many LRUs we're processing
2032 for pli in 0..planes {
2033 let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2034 let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2035 if sb_w < (1 << sb_h_shift) {
2036 sb_w = 1 << sb_h_shift;
2037 }
2038 if sb_h < (1 << sb_v_shift) {
2039 sb_h = 1 << sb_v_shift;
2040 }
2041 }
2042 for pli in 0..planes {
2043 let sb_h_shift = ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2044 let sb_v_shift = ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2045 lru_w[pli] = sb_w / (1 << sb_h_shift);
2046 lru_h[pli] = sb_h / (1 << sb_v_shift);
2047 }
2048
2049 // The superblock width/height determinations may be calling for us
2050 // to compute over superblocks that do not actually exist in the
2051 // frame (off the right or lower edge). Trim sb width/height down
2052 // to actual superblocks. Note that these last superblocks on the
2053 // right/bottom may themselves still span the edge of the frame, but
2054 // they do hold at least some visible pixels.
2055 sb_w = sb_w.min(ts.sb_width - base_sbo.0.x);
2056 sb_h = sb_h.min(ts.sb_height - base_sbo.0.y);
2057
2058 // We have need to know the Y visible pixel limits as well (the
2059 // sb_w/sb_h figures above can be used to determine how many
2060 // allocated pixels, possibly beyond the visible frame, exist).
2061 let crop_w =
2062 fi.width - ((ts.sbo.0.x + base_sbo.0.x) << SUPERBLOCK_TO_PLANE_SHIFT);
2063 let crop_h =
2064 fi.height - ((ts.sbo.0.y + base_sbo.0.y) << SUPERBLOCK_TO_PLANE_SHIFT);
2065 let pixel_w = crop_w.min(sb_w << SUPERBLOCK_TO_PLANE_SHIFT);
2066 let pixel_h = crop_h.min(sb_h << SUPERBLOCK_TO_PLANE_SHIFT);
2067
2068 // Based on `RestorationState::new`
2069 const MAX_SB_SHIFT: usize = 4;
2070 const MAX_SB_SIZE: usize = 1 << MAX_SB_SHIFT;
2071 const MAX_LRU_SIZE: usize = MAX_SB_SIZE;
2072
2073 // Static allocation relies on the "minimal LRU area for all N planes" invariant.
2074 let mut best_index = [-1; MAX_SB_SIZE * MAX_SB_SIZE];
2075 let mut best_lrf =
2076 [[RestorationFilter::None; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2077
2078 // due to imprecision in the reconstruction parameter solver, we
2079 // need to make sure we don't fall into a limit cycle. Track our
2080 // best cost at LRF so that we can break if we get a solution that doesn't
2081 // improve at the reconstruction stage.
2082 let mut best_lrf_cost = [[-1.0; MAX_PLANES]; MAX_LRU_SIZE * MAX_LRU_SIZE];
2083
2084 // Loop filter RDO is an iterative process and we need temporary
2085 // scratch data to hold the results of deblocking, cdef, and the
2086 // loop reconstruction filter so that each can be partially updated
2087 // without recomputing the entire stack. Construct
2088 // largest-LRU-sized frames for each, accounting for padding
2089 // required by deblocking, cdef and [optionally] LR.
2090 let mut rec_subset = ts
2091 .rec
2092 .subregion(Area::BlockRect {
2093 bo: base_sbo.block_offset(0, 0).0,
2094 width: (pixel_w + 7) >> 3 << 3,
2095 height: (pixel_h + 7) >> 3 << 3,
2096 })
2097 .scratch_copy();
2098
2099 // sub-setted region of the TileBlocks for our working frame area.
2100 // Note that the size of this subset is what signals CDEF as to the
2101 // actual coded size.
2102 let mut tileblocks_subset = cw.bc.blocks.subregion_mut(
2103 base_sbo.block_offset(0, 0).0.x,
2104 base_sbo.block_offset(0, 0).0.y,
2105 sb_w << SUPERBLOCK_TO_BLOCK_SHIFT,
2106 sb_h << SUPERBLOCK_TO_BLOCK_SHIFT,
2107 );
2108
2109 // const, no need to copy, just need the subregion (but do zero the
2110 // origin to match the other copies/new backing frames).
2111 let src_subset = ts
2112 .input_tile
2113 .subregion(Area::BlockRect {
2114 bo: base_sbo.block_offset(0, 0).0,
2115 width: (pixel_w + 7) >> 3 << 3,
2116 height: (pixel_h + 7) >> 3 << 3,
2117 })
2118 .home();
2119
2120 if deblock_p {
2121 // Find a good deblocking filter solution for the passed in area.
2122 // This is not RDO of deblocking itself, merely a solution to get
2123 // better results from CDEF/LRF RDO.
2124 let deblock_levels = deblock_filter_optimize(
2125 fi,
2126 &rec_subset.as_tile(),
2127 &src_subset,
2128 &tileblocks_subset.as_const(),
2129 crop_w,
2130 crop_h,
2131 );
2132
2133 // Deblock the contents of our reconstruction copy.
2134 if deblock_levels[0] != 0 || deblock_levels[1] != 0 {
2135 // copy ts.deblock because we need to set some of our own values here
2136 let mut deblock_copy = *ts.deblock;
2137 deblock_copy.levels = deblock_levels;
2138
2139 // finally, deblock the temp frame
2140 deblock_filter_frame(
2141 &deblock_copy,
2142 &mut rec_subset.as_tile_mut(),
2143 &tileblocks_subset.as_const(),
2144 crop_w,
2145 crop_h,
2146 fi.sequence.bit_depth,
2147 planes,
2148 );
2149 }
2150 }
2151
2152 let mut cdef_work =
2153 if fi.sequence.enable_cdef { Some(rec_subset.clone()) } else { None };
2154 let mut lrf_work = if fi.sequence.enable_restoration {
2155 Some(Frame {
2156 planes: {
2157 let new_plane = |pli: usize| {
2158 let PlaneConfig { xdec, ydec, width, height, .. } =
2159 rec_subset.planes[pli].cfg;
2160 Plane::new(width, height, xdec, ydec, 0, 0)
2161 };
2162 [new_plane(0), new_plane(1), new_plane(2)]
2163 },
2164 })
2165 } else {
2166 None
2167 };
2168
2169 // Precompute directional analysis for CDEF
2170 let cdef_data = {
2171 if cdef_work.is_some() {
2172 Some((
2173 &rec_subset,
2174 cdef_analyze_superblock_range(
2175 fi,
2176 &rec_subset,
2177 &tileblocks_subset.as_const(),
2178 sb_w,
2179 sb_h,
2180 ),
2181 ))
2182 } else {
2183 None
2184 }
2185 };
2186
2187 // CDEF/LRF decision iteration
2188 // Start with a default of CDEF 0 and RestorationFilter::None
2189 // Try all CDEF options for each sb with current LRF; if new CDEF+LRF choice is better, select it.
2190 // Then try all LRF options with current CDEFs; if new CDEFs+LRF choice is better, select it.
2191 // If LRF choice changed for any plane, repeat until no changes
2192 // Limit iterations and where we break based on speed setting (in the TODO list ;-)
2193 let mut cdef_change = true;
2194 let mut lrf_change = true;
2195 while cdef_change || lrf_change {
2196 // search for improved cdef indices, superblock by superblock, if cdef is enabled.
2197 if let (Some((rec_copy, cdef_dirs)), Some(cdef_ref)) =
2198 (&cdef_data, &mut cdef_work.as_mut())
2199 {
2200 for sby in 0..sb_h {
2201 for sbx in 0..sb_w {
2202 let prev_best_index = best_index[sby * sb_w + sbx];
2203 let mut best_cost = -1.;
2204 let mut best_new_index = -1i8;
2205
2206 /* offset of the superblock we're currently testing within the larger
2207 analysis area */
2208 let loop_sbo =
2209 TileSuperBlockOffset(SuperBlockOffset { x: sbx, y: sby });
2210
2211 /* cdef index testing loop */
2212 for cdef_index in 0..(1 << fi.cdef_bits) {
2213 let mut err = ScaledDistortion::zero();
2214 let mut rate = 0;
2215
2216 cdef_filter_superblock(
2217 fi,
2218 &rec_subset,
2219 &mut cdef_ref.as_tile_mut(),
2220 &tileblocks_subset.as_const(),
2221 loop_sbo,
2222 cdef_index,
2223 &cdef_dirs[sby * sb_w + sbx],
2224 );
2225 // apply LRF if any
2226 for pli in 0..planes {
2227 // We need the cropped-to-visible-frame area of this SB
2228 let wh =
2229 if fi.sequence.use_128x128_superblock { 128 } else { 64 };
2230 let PlaneConfig { xdec, ydec, .. } = cdef_ref.planes[pli].cfg;
2231 let vis_width = (wh >> xdec).min(
2232 (crop_w >> xdec)
2233 - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).x
2234 as usize,
2235 );
2236 let vis_height = (wh >> ydec).min(
2237 (crop_h >> ydec)
2238 - loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg).y
2239 as usize,
2240 );
2241 // which LRU are we currently testing against?
2242 if let (Some((lru_x, lru_y)), Some(lrf_ref)) = {
2243 let rp = &ts.restoration.planes[pli];
2244 (
2245 rp.restoration_unit_offset(base_sbo, loop_sbo, false),
2246 &mut lrf_work,
2247 )
2248 } {
2249 // We have a valid LRU, apply LRF, compute error
2250 match best_lrf[lru_y * lru_w[pli] + lru_x][pli] {
2251 RestorationFilter::None {} => {
2252 err += rdo_loop_plane_error(
2253 base_sbo,
2254 loop_sbo,
2255 1,
2256 1,
2257 fi,
2258 ts,
2259 &tileblocks_subset.as_const(),
2260 cdef_ref,
2261 &src_subset,
2262 pli,
2263 );
2264 rate += if fi.sequence.enable_restoration {
2265 cw.fc.count_lrf_switchable(
2266 w,
2267 &ts.restoration.as_const(),
2268 best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2269 pli,
2270 )
2271 } else {
2272 0 // no relative cost differeneces to different
2273 // CDEF params. If cdef is on, it's a wash.
2274 };
2275 }
2276 RestorationFilter::Sgrproj { set, xqd } => {
2277 // only run on this single superblock
2278 let loop_po =
2279 loop_sbo.plane_offset(&cdef_ref.planes[pli].cfg);
2280 // todo: experiment with borrowing border pixels
2281 // rather than edge-extending. Right now this is
2282 // hard-clipping to the superblock boundary.
2283 setup_integral_image(
2284 &mut ts.integral_buffer,
2285 SOLVE_IMAGE_STRIDE,
2286 vis_width,
2287 vis_height,
2288 vis_width,
2289 vis_height,
2290 &cdef_ref.planes[pli].slice(loop_po),
2291 &cdef_ref.planes[pli].slice(loop_po),
2292 );
2293 sgrproj_stripe_filter(
2294 set,
2295 xqd,
2296 fi,
2297 &ts.integral_buffer,
2298 SOLVE_IMAGE_STRIDE,
2299 &cdef_ref.planes[pli].slice(loop_po),
2300 &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2301 x: loop_po.x,
2302 y: loop_po.y,
2303 width: vis_width,
2304 height: vis_height,
2305 }),
2306 );
2307 err += rdo_loop_plane_error(
2308 base_sbo,
2309 loop_sbo,
2310 1,
2311 1,
2312 fi,
2313 ts,
2314 &tileblocks_subset.as_const(),
2315 lrf_ref,
2316 &src_subset,
2317 pli,
2318 );
2319 rate += cw.fc.count_lrf_switchable(
2320 w,
2321 &ts.restoration.as_const(),
2322 best_lrf[lru_y * lru_w[pli] + lru_x][pli],
2323 pli,
2324 );
2325 }
2326 RestorationFilter::Wiener { .. } => unreachable!(), // coming soon
2327 }
2328 } else {
2329 // No actual LRU here, compute error directly from CDEF output.
2330 err += rdo_loop_plane_error(
2331 base_sbo,
2332 loop_sbo,
2333 1,
2334 1,
2335 fi,
2336 ts,
2337 &tileblocks_subset.as_const(),
2338 cdef_ref,
2339 &src_subset,
2340 pli,
2341 );
2342 // no relative cost differeneces to different
2343 // CDEF params. If cdef is on, it's a wash.
2344 // rate += 0;
2345 }
2346 }
2347
2348 let cost = compute_rd_cost(fi, rate, err);
2349 if best_cost < 0. || cost < best_cost {
2350 best_cost = cost;
2351 best_new_index = cdef_index as i8;
2352 }
2353 }
2354
2355 // Did we change any preexisting choices?
2356 if best_new_index != prev_best_index {
2357 cdef_change = true;
2358 best_index[sby * sb_w + sbx] = best_new_index;
2359 tileblocks_subset.set_cdef(loop_sbo, best_new_index as u8);
2360 }
2361
2362 let mut cdef_ref_tm = TileMut::new(
2363 cdef_ref,
2364 TileRect {
2365 x: 0,
2366 y: 0,
2367 width: cdef_ref.planes[0].cfg.width,
2368 height: cdef_ref.planes[0].cfg.height,
2369 },
2370 );
2371
2372 // Keep cdef output up to date; we need it for restoration
2373 // both below and above (padding)
2374 cdef_filter_superblock(
2375 fi,
2376 rec_copy,
2377 &mut cdef_ref_tm,
2378 &tileblocks_subset.as_const(),
2379 loop_sbo,
2380 best_index[sby * sb_w + sbx] as u8,
2381 &cdef_dirs[sby * sb_w + sbx],
2382 );
2383 }
2384 }
2385 }
2386
2387 if !cdef_change {
2388 break;
2389 }
2390 cdef_change = false;
2391 lrf_change = false;
2392
2393 // search for improved restoration filter parameters if restoration is enabled
2394 if let Some(lrf_ref) = &mut lrf_work.as_mut() {
2395 let lrf_input = if cdef_work.is_some() {
2396 // When CDEF is enabled, we pull from the CDEF output
2397 &cdef_work.as_ref().unwrap()
2398 } else {
2399 // When CDEF is disabled, we pull from the [optionally
2400 // deblocked] reconstruction
2401 &rec_subset
2402 };
2403 for pli in 0..planes {
2404 // Nominal size of LRU in pixels before clipping to visible frame
2405 let unit_size = ts.restoration.planes[pli].rp_cfg.unit_size;
2406 // width, in sb, of an LRU in this plane
2407 let lru_sb_w = 1 << ts.restoration.planes[pli].rp_cfg.sb_h_shift;
2408 // height, in sb, of an LRU in this plane
2409 let lru_sb_h = 1 << ts.restoration.planes[pli].rp_cfg.sb_v_shift;
2410 let PlaneConfig { xdec, ydec, .. } = lrf_ref.planes[pli].cfg;
2411 for lru_y in 0..lru_h[pli] {
2412 // number of LRUs vertically
2413 for lru_x in 0..lru_w[pli] {
2414 // number of LRUs horizontally
2415 let loop_sbo = TileSuperBlockOffset(SuperBlockOffset {
2416 x: lru_x * lru_sb_w,
2417 y: lru_y * lru_sb_h,
2418 });
2419 if ts.restoration.has_restoration_unit(
2420 base_sbo + loop_sbo,
2421 pli,
2422 false,
2423 ) {
2424 let src_plane = &src_subset.planes[pli]; // uncompressed input for reference
2425 let lrf_in_plane = &lrf_input.planes[pli];
2426 let lrf_po = loop_sbo.plane_offset(src_plane.plane_cfg);
2427 let mut best_new_lrf = best_lrf[lru_y * lru_w[pli] + lru_x][pli];
2428 let mut best_cost =
2429 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli];
2430
2431 // Check the no filter option
2432 {
2433 let err = rdo_loop_plane_error(
2434 base_sbo,
2435 loop_sbo,
2436 lru_sb_w,
2437 lru_sb_h,
2438 fi,
2439 ts,
2440 &tileblocks_subset.as_const(),
2441 lrf_input,
2442 &src_subset,
2443 pli,
2444 );
2445 let rate = cw.fc.count_lrf_switchable(
2446 w,
2447 &ts.restoration.as_const(),
2448 best_new_lrf,
2449 pli,
2450 );
2451
2452 let cost = compute_rd_cost(fi, rate, err);
2453 // Was this choice actually an improvement?
2454 if best_cost < 0. || cost < best_cost {
2455 best_cost = cost;
2456 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2457 best_new_lrf = RestorationFilter::None;
2458 }
2459 }
2460
2461 // Look for a self guided filter
2462 // We need the cropped-to-visible-frame computation area of this LRU
2463 let vis_width = unit_size.min(
2464 (crop_w >> xdec)
2465 - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).x as usize,
2466 );
2467 let vis_height = unit_size.min(
2468 (crop_h >> ydec)
2469 - loop_sbo.plane_offset(&lrf_ref.planes[pli].cfg).y as usize,
2470 );
2471
2472 // todo: experiment with borrowing border pixels
2473 // rather than edge-extending. Right now this is
2474 // hard-clipping to the superblock boundary.
2475 setup_integral_image(
2476 &mut ts.integral_buffer,
2477 SOLVE_IMAGE_STRIDE,
2478 vis_width,
2479 vis_height,
2480 vis_width,
2481 vis_height,
2482 &lrf_in_plane.slice(lrf_po),
2483 &lrf_in_plane.slice(lrf_po),
2484 );
2485
2486 for &set in get_sgr_sets(fi.config.speed_settings.sgr_complexity)
2487 {
2488 let (xqd0, xqd1) = sgrproj_solve(
2489 set,
2490 fi,
2491 &ts.integral_buffer,
2492 &src_plane
2493 .subregion(Area::StartingAt { x: lrf_po.x, y: lrf_po.y }),
2494 &lrf_in_plane.slice(lrf_po),
2495 vis_width,
2496 vis_height,
2497 );
2498 let current_lrf =
2499 RestorationFilter::Sgrproj { set, xqd: [xqd0, xqd1] };
2500 if let RestorationFilter::Sgrproj { set, xqd } = current_lrf {
2501 sgrproj_stripe_filter(
2502 set,
2503 xqd,
2504 fi,
2505 &ts.integral_buffer,
2506 SOLVE_IMAGE_STRIDE,
2507 &lrf_in_plane.slice(lrf_po),
2508 &mut lrf_ref.planes[pli].region_mut(Area::Rect {
2509 x: lrf_po.x,
2510 y: lrf_po.y,
2511 width: vis_width,
2512 height: vis_height,
2513 }),
2514 );
2515 }
2516 let err = rdo_loop_plane_error(
2517 base_sbo,
2518 loop_sbo,
2519 lru_sb_w,
2520 lru_sb_h,
2521 fi,
2522 ts,
2523 &tileblocks_subset.as_const(),
2524 lrf_ref,
2525 &src_subset,
2526 pli,
2527 );
2528 let rate = cw.fc.count_lrf_switchable(
2529 w,
2530 &ts.restoration.as_const(),
2531 current_lrf,
2532 pli,
2533 );
2534 let cost = compute_rd_cost(fi, rate, err);
2535 if cost < best_cost {
2536 best_cost = cost;
2537 best_lrf_cost[lru_y * lru_w[pli] + lru_x][pli] = cost;
2538 best_new_lrf = current_lrf;
2539 }
2540 }
2541
2542 if best_lrf[lru_y * lru_w[pli] + lru_x][pli]
2543 .notequal(best_new_lrf)
2544 {
2545 best_lrf[lru_y * lru_w[pli] + lru_x][pli] = best_new_lrf;
2546 lrf_change = true;
2547 if let Some(ru) = ts.restoration.planes[pli]
2548 .restoration_unit_mut(base_sbo + loop_sbo)
2549 {
2550 ru.filter = best_new_lrf;
2551 }
2552 }
2553 }
2554 }
2555 }
2556 }
2557 }
2558 }
2559 }
2560
2561 #[test]
estimate_rate_test()2562 fn estimate_rate_test() {
2563 assert_eq!(estimate_rate(0, TxSize::TX_4X4, 0), RDO_RATE_TABLE[0][0][0]);
2564 }
2565