1 // Copyright (c) 2018-2021, The rav1e contributors. All rights reserved
2 //
3 // This source code is subject to the terms of the BSD 2 Clause License and
4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5 // was not distributed with this source code in the LICENSE file, you can
6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
7 // Media Patent License 1.0 was not distributed with this source code in the
8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9 
10 use crate::api::lookahead::*;
11 use crate::api::{EncoderConfig, SceneDetectionSpeed};
12 use crate::cpu_features::CpuFeatureLevel;
13 use crate::encoder::Sequence;
14 use crate::frame::*;
15 use crate::sad_row;
16 use crate::util::Pixel;
17 use rust_hawktracer::*;
18 use std::sync::Arc;
19 use std::{cmp, u64};
20 
21 // The fast implementation is based on a Python implementation at
22 // https://pyscenedetect.readthedocs.io/en/latest/reference/detection-methods/.
23 // The Python implementation uses HSV values and a threshold of 30. Comparing the
24 // YUV values was sufficient in most cases, and avoided a more costly YUV->RGB->HSV
25 // conversion, but the deltas needed to be scaled down. The deltas for keyframes
26 // in YUV were about 1/3 to 1/2 of what they were in HSV, but non-keyframes were
27 // very unlikely to have a delta greater than 3 in YUV, whereas they may reach into
28 // the double digits in HSV.
29 //
30 // Experiments have shown that these thresholds are optimal.
31 const FAST_THRESHOLD: f64 = 18.0;
32 const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0;
33 
34 /// Runs keyframe detection on frames from the lookahead queue.
35 pub struct SceneChangeDetector<T: Pixel> {
36   /// Minimum average difference between YUV deltas that will trigger a scene change.
37   threshold: f64,
38   /// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates.
39   speed_mode: SceneDetectionSpeed,
40   /// scaling factor for fast scene detection
41   scale_factor: usize,
42   /// Frame buffer for scaled frames
43   downscaled_frame_buffer: Option<(
44     Box<[Plane<T>; 2]>,
45     // `true` if the data is valid and initialized, or `false`
46     // if it should be assumed that the data is uninitialized.
47     bool,
48   )>,
49   /// Frame buffer for holding references to source frames.
50   ///
51   /// Useful for not copying data into the downscaled frame buffer
52   /// when using a downscale factor of 1.
53   frame_ref_buffer: Option<Box<[Arc<Frame<T>>; 2]>>,
54   /// Deque offset for current
55   lookahead_offset: usize,
56   /// Start deque offset based on lookahead
57   deque_offset: usize,
58   /// Scenechange results for adaptive threshold
59   score_deque: Vec<ScenecutResult>,
60   /// Number of pixels in scaled frame for fast mode
61   pixels: usize,
62   /// The bit depth of the video.
63   bit_depth: usize,
64   /// The CPU feature level to be used.
65   cpu_feature_level: CpuFeatureLevel,
66   encoder_config: EncoderConfig,
67   sequence: Arc<Sequence>,
68 }
69 
70 impl<T: Pixel> SceneChangeDetector<T> {
new( encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel, lookahead_distance: usize, sequence: Arc<Sequence>, ) -> Self71   pub fn new(
72     encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel,
73     lookahead_distance: usize, sequence: Arc<Sequence>,
74   ) -> Self {
75     let bit_depth = encoder_config.bit_depth;
76     let speed_mode = if encoder_config.low_latency {
77       SceneDetectionSpeed::Fast
78     } else {
79       encoder_config.speed_settings.fast_scene_detection
80     };
81 
82     // Scale factor for fast and medium scene detection
83     let scale_factor = detect_scale_factor(&sequence, speed_mode);
84 
85     // Set lookahead offset to 5 if normal lookahead available
86     let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 };
87     let deque_offset = lookahead_offset;
88 
89     let score_deque = Vec::with_capacity(5 + lookahead_distance);
90 
91     // Pixel count for fast scenedetect
92     let pixels = if speed_mode == SceneDetectionSpeed::Fast {
93       (sequence.max_frame_height as usize / scale_factor)
94         * (sequence.max_frame_width as usize / scale_factor)
95     } else {
96       1
97     };
98 
99     let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0;
100 
101     Self {
102       threshold,
103       speed_mode,
104       scale_factor,
105       downscaled_frame_buffer: None,
106       frame_ref_buffer: None,
107       lookahead_offset,
108       deque_offset,
109       score_deque,
110       pixels,
111       bit_depth,
112       cpu_feature_level,
113       encoder_config,
114       sequence,
115     }
116   }
117 
118   /// Runs keyframe detection on the next frame in the lookahead queue.
119   ///
120   /// This function requires that a subset of input frames
121   /// is passed to it in order, and that `keyframes` is only
122   /// updated from this method. `input_frameno` should correspond
123   /// to the second frame in `frame_set`.
124   ///
125   /// This will gracefully handle the first frame in the video as well.
126   #[hawktracer(analyze_next_frame)]
analyze_next_frame( &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64, previous_keyframe: u64, ) -> bool127   pub fn analyze_next_frame(
128     &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
129     previous_keyframe: u64,
130   ) -> bool {
131     // Use score deque for adaptive threshold for scene cut
132     // Declare score_deque offset based on lookahead  for scene change scores
133 
134     // Find the distance to the previous keyframe.
135     let distance = input_frameno - previous_keyframe;
136 
137     if frame_set.len() <= self.lookahead_offset {
138       // Don't insert keyframes in the last few frames of the video
139       // This is basically a scene flash and a waste of bits
140       return false;
141     }
142 
143     if self.encoder_config.speed_settings.no_scene_detection {
144       if let Some(true) = self.handle_min_max_intervals(distance) {
145         return true;
146       };
147       return false;
148     }
149 
150     // Initiallization of score deque
151     // based on frame set length
152     if self.deque_offset > 0
153       && frame_set.len() > self.deque_offset + 1
154       && self.score_deque.is_empty()
155     {
156       self.initialize_score_deque(frame_set, input_frameno, self.deque_offset);
157     } else if self.score_deque.is_empty() {
158       self.initialize_score_deque(
159         frame_set,
160         input_frameno,
161         frame_set.len() - 1,
162       );
163 
164       self.deque_offset = frame_set.len() - 2;
165     }
166     // Running single frame comparison and adding it to deque
167     // Decrease deque offset if there is no new frames
168     if frame_set.len() > self.deque_offset + 1 {
169       self.run_comparison(
170         frame_set[self.deque_offset].clone(),
171         frame_set[self.deque_offset + 1].clone(),
172         input_frameno,
173       );
174     } else {
175       self.deque_offset -= 1;
176     }
177 
178     // Adaptive scenecut check
179     let (scenecut, score) = self.adaptive_scenecut();
180     let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut);
181     debug!(
182       "[SC-Detect] Frame {}: Raw={:5.1}  ImpBl={:5.1}  Bwd={:5.1}  Fwd={:5.1}  Th={:.1}  {}",
183       input_frameno,
184       score.inter_cost,
185       score.imp_block_cost,
186       score.backward_adjusted_cost,
187       score.forward_adjusted_cost,
188       score.threshold,
189       if scenecut { "Scenecut" } else { "No cut" }
190     );
191 
192     // Keep score deque of 5 backward frames
193     // and forward frames of length of lookahead offset
194     if self.score_deque.len() > 5 + self.lookahead_offset {
195       self.score_deque.pop();
196     }
197 
198     scenecut
199   }
200 
handle_min_max_intervals(&mut self, distance: u64) -> Option<bool>201   fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> {
202     // Handle minimum and maximum keyframe intervals.
203     if distance < self.encoder_config.min_key_frame_interval {
204       return Some(false);
205     }
206     if distance >= self.encoder_config.max_key_frame_interval {
207       return Some(true);
208     }
209     None
210   }
211 
212   // Initially fill score deque with frame scores
initialize_score_deque( &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64, init_len: usize, )213   fn initialize_score_deque(
214     &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
215     init_len: usize,
216   ) {
217     for x in 0..init_len {
218       self.run_comparison(
219         frame_set[x].clone(),
220         frame_set[x + 1].clone(),
221         input_frameno,
222       );
223     }
224   }
225 
226   /// Runs scene change comparison beetween 2 given frames
227   /// Insert result to start of score deque
run_comparison( &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, input_frameno: u64, )228   fn run_comparison(
229     &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
230     input_frameno: u64,
231   ) {
232     let mut result = if self.speed_mode == SceneDetectionSpeed::Fast {
233       self.fast_scenecut(frame1, frame2)
234     } else {
235       self.cost_scenecut(frame1, frame2)
236     };
237 
238     // Subtract the highest metric value of surrounding frames from the current one
239     // It makes the peaks in the metric more distinct
240     if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > 0 {
241       if input_frameno == 1 {
242         // Accounts for the second frame not having a score to adjust against.
243         // It should always be 0 because the first frame of the video is always a keyframe.
244         result.backward_adjusted_cost = 0.0;
245       } else {
246         let mut adjusted_cost = f64::MAX;
247         for other_cost in
248           self.score_deque.iter().take(self.deque_offset).map(|i| i.inter_cost)
249         {
250           let this_cost = result.inter_cost - other_cost;
251           if this_cost < adjusted_cost {
252             adjusted_cost = this_cost;
253           }
254           if adjusted_cost < 0.0 {
255             adjusted_cost = 0.0;
256             break;
257           }
258         }
259         result.backward_adjusted_cost = adjusted_cost;
260       }
261       if !self.score_deque.is_empty() {
262         for i in 0..(cmp::min(self.deque_offset, self.score_deque.len())) {
263           let adjusted_cost =
264             self.score_deque[i].inter_cost - result.inter_cost;
265           if i == 0
266             || adjusted_cost < self.score_deque[i].forward_adjusted_cost
267           {
268             self.score_deque[i].forward_adjusted_cost = adjusted_cost;
269           }
270           if self.score_deque[i].forward_adjusted_cost < 0.0 {
271             self.score_deque[i].forward_adjusted_cost = 0.0;
272           }
273         }
274       }
275     }
276     self.score_deque.insert(0, result);
277   }
278 
279   /// Compares current scene score to adapted threshold based on previous scores
280   /// Value of current frame is offset by lookahead, if lookahead >=5
281   /// Returns true if current scene score is higher than adapted threshold
adaptive_scenecut(&mut self) -> (bool, ScenecutResult)282   fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) {
283     let score = self.score_deque[self.deque_offset];
284 
285     // We use the importance block algorithm's cost metrics as a secondary algorithm
286     // because, although it struggles in certain scenarios such as
287     // finding the end of a pan, it is very good at detecting hard scenecuts
288     // or detecting if a pan exists.
289     // Because of this, we only consider a frame for a scenechange if
290     // the importance block algorithm is over the threshold either on this frame (hard scenecut)
291     // or within the past few frames (pan). This helps filter out a few false positives
292     // produced by the cost-based algorithm.
293     let imp_block_threshold =
294       IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0;
295     if !&self.score_deque[self.deque_offset..]
296       .iter()
297       .any(|result| result.imp_block_cost >= imp_block_threshold)
298     {
299       return (false, score);
300     }
301 
302     let cost = score.forward_adjusted_cost;
303     if cost >= score.threshold {
304       let back_deque = &self.score_deque[self.deque_offset + 1..];
305       let forward_deque = &self.score_deque[..self.deque_offset];
306       let back_over_tr_count = back_deque
307         .iter()
308         .filter(|result| result.backward_adjusted_cost >= result.threshold)
309         .count();
310       let forward_over_tr_count = forward_deque
311         .iter()
312         .filter(|result| result.forward_adjusted_cost >= result.threshold)
313         .count();
314 
315       // Check for scenecut after the flashes
316       // No frames over threshold forward
317       // and some frames over threshold backward
318       let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast {
319         // Fast scenecut is more sensitive to false flash detection,
320         // so we want more "evidence" of there being a flash before creating a keyframe.
321         2
322       } else {
323         1
324       };
325       if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req {
326         return (true, score);
327       }
328 
329       // Check for scenecut before flash
330       // If distance longer than max flash length
331       if back_over_tr_count == 0
332         && forward_over_tr_count == 1
333         && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold
334       {
335         return (true, score);
336       }
337 
338       if back_over_tr_count != 0 || forward_over_tr_count != 0 {
339         return (false, score);
340       }
341     }
342 
343     (cost >= score.threshold, score)
344   }
345 
346   /// The fast algorithm detects fast cuts using a raw difference
347   /// in pixel values between the scaled frames.
348   #[hawktracer(fast_scenecut)]
fast_scenecut( &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, ) -> ScenecutResult349   fn fast_scenecut(
350     &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
351   ) -> ScenecutResult {
352     if self.scale_factor == 1 {
353       if let Some(frame_buffer) = self.frame_ref_buffer.as_deref_mut() {
354         frame_buffer.swap(0, 1);
355         frame_buffer[1] = frame2;
356       } else {
357         self.frame_ref_buffer = Some(Box::new([frame1, frame2]));
358       }
359 
360       if let Some(frame_buffer) = self.frame_ref_buffer.as_deref() {
361         let delta = self.delta_in_planes(
362           &frame_buffer[0].planes[0],
363           &frame_buffer[1].planes[0],
364         );
365 
366         ScenecutResult {
367           threshold: self.threshold as f64,
368           inter_cost: delta as f64,
369           imp_block_cost: delta as f64,
370           backward_adjusted_cost: delta as f64,
371           forward_adjusted_cost: delta as f64,
372         }
373       } else {
374         unreachable!()
375       }
376     } else {
377       // downscale both frames for faster comparison
378       if let Some((frame_buffer, is_initialized)) =
379         &mut self.downscaled_frame_buffer
380       {
381         let frame_buffer = &mut **frame_buffer;
382         if *is_initialized {
383           frame_buffer.swap(0, 1);
384           frame2.planes[0]
385             .downscale_in_place(self.scale_factor, &mut frame_buffer[1]);
386         } else {
387           // both frames are in an irrelevant and invalid state, so we have to reinitialize
388           // them, but we can reuse their allocations
389           frame1.planes[0]
390             .downscale_in_place(self.scale_factor, &mut frame_buffer[0]);
391           frame2.planes[0]
392             .downscale_in_place(self.scale_factor, &mut frame_buffer[1]);
393           *is_initialized = true;
394         }
395       } else {
396         self.downscaled_frame_buffer = Some((
397           Box::new([
398             frame1.planes[0].downscale(self.scale_factor),
399             frame2.planes[0].downscale(self.scale_factor),
400           ]),
401           true, // the frame buffer is initialized and in a valid state
402         ));
403       }
404 
405       if let Some((frame_buffer, _)) = &self.downscaled_frame_buffer {
406         let frame_buffer = &**frame_buffer;
407         let delta = self.delta_in_planes(&frame_buffer[0], &frame_buffer[1]);
408 
409         ScenecutResult {
410           threshold: self.threshold as f64,
411           inter_cost: delta as f64,
412           imp_block_cost: delta as f64,
413           forward_adjusted_cost: delta as f64,
414           backward_adjusted_cost: delta as f64,
415         }
416       } else {
417         unreachable!()
418       }
419     }
420   }
421 
422   /// Run a comparison between two frames to determine if they qualify for a scenecut.
423   ///
424   /// We gather both intra and inter costs for the frames,
425   /// as well as an importance-block-based difference,
426   /// and use all three metrics.
427   #[hawktracer(cost_scenecut)]
cost_scenecut( &self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, ) -> ScenecutResult428   fn cost_scenecut(
429     &self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
430   ) -> ScenecutResult {
431     let frame2_inter_ref = Arc::clone(&frame2);
432     let frame1_imp_ref = Arc::clone(&frame1);
433     let frame2_imp_ref = Arc::clone(&frame2);
434 
435     let mut intra_cost = 0.0;
436     let mut mv_inter_cost = 0.0;
437     let mut imp_block_cost = 0.0;
438     crate::rayon::scope(|s| {
439       s.spawn(|_| {
440         let intra_costs = estimate_intra_costs(
441           &*frame2,
442           self.bit_depth,
443           self.cpu_feature_level,
444         );
445         intra_cost = intra_costs.iter().map(|&cost| cost as u64).sum::<u64>()
446           as f64
447           / intra_costs.len() as f64
448       });
449       s.spawn(|_| {
450         let inter_costs = estimate_inter_costs(
451           frame2_inter_ref,
452           frame1,
453           self.bit_depth,
454           self.encoder_config,
455           self.sequence.clone(),
456         );
457 
458         mv_inter_cost =
459           inter_costs.iter().map(|&cost| cost as u64).sum::<u64>() as f64
460             / inter_costs.len() as f64
461       });
462       s.spawn(|_| {
463         let inter_costs =
464           estimate_importance_block_difference(frame2_imp_ref, frame1_imp_ref);
465 
466         imp_block_cost =
467           inter_costs.iter().map(|&cost| cost as u64).sum::<u64>() as f64
468             / inter_costs.len() as f64
469       });
470     });
471 
472     // `BIAS` determines how likely we are
473     // to choose a keyframe, between 0.0-1.0.
474     // Higher values mean we are more likely to choose a keyframe.
475     // This value was chosen based on trials using the new
476     // adaptive scenecut code.
477     const BIAS: f64 = 0.7;
478     let threshold = intra_cost * (1.0 - BIAS);
479 
480     ScenecutResult {
481       inter_cost: mv_inter_cost,
482       imp_block_cost,
483       threshold,
484       backward_adjusted_cost: 0.0,
485       forward_adjusted_cost: 0.0,
486     }
487   }
488 
489   /// Calculates the average sum of absolute difference (SAD) per pixel between 2 planes
490   #[hawktracer(delta_in_planes)]
delta_in_planes(&self, plane1: &Plane<T>, plane2: &Plane<T>) -> f64491   fn delta_in_planes(&self, plane1: &Plane<T>, plane2: &Plane<T>) -> f64 {
492     let mut delta = 0;
493 
494     let lines = plane1.rows_iter().zip(plane2.rows_iter());
495 
496     for (l1, l2) in lines {
497       let l1 = l1.get(..plane1.cfg.width).unwrap_or(l1);
498       let l2 = l2.get(..plane1.cfg.width).unwrap_or(l2);
499       delta += sad_row::sad_row(l1, l2, self.cpu_feature_level);
500     }
501     delta as f64 / self.pixels as f64
502   }
503 }
504 
505 /// Scaling factor for frame in scene detection
detect_scale_factor( sequence: &Arc<Sequence>, speed_mode: SceneDetectionSpeed, ) -> usize506 fn detect_scale_factor(
507   sequence: &Arc<Sequence>, speed_mode: SceneDetectionSpeed,
508 ) -> usize {
509   let small_edge =
510     cmp::min(sequence.max_frame_height, sequence.max_frame_width) as usize;
511   let scale_factor;
512   if speed_mode == SceneDetectionSpeed::Fast {
513     scale_factor = match small_edge {
514       0..=240 => 1,
515       241..=480 => 2,
516       481..=720 => 4,
517       721..=1080 => 8,
518       1081..=1600 => 16,
519       1601..=usize::MAX => 32,
520       _ => 1,
521     } as usize
522   } else {
523     scale_factor = match small_edge {
524       0..=1600 => 1,
525       1601..=2160 => 2,
526       2161..=usize::MAX => 4,
527       _ => 1,
528     } as usize
529   };
530 
531   debug!(
532     "Scene detection scale factor {}, [{},{}] -> [{},{}]",
533     scale_factor,
534     sequence.max_frame_width,
535     sequence.max_frame_height,
536     sequence.max_frame_width as usize / scale_factor,
537     sequence.max_frame_height as usize / scale_factor
538   );
539   scale_factor
540 }
541 
542 #[derive(Debug, Clone, Copy)]
543 struct ScenecutResult {
544   inter_cost: f64,
545   imp_block_cost: f64,
546   backward_adjusted_cost: f64,
547   forward_adjusted_cost: f64,
548   threshold: f64,
549 }
550