1 // Copyright (c) 2018-2021, The rav1e contributors. All rights reserved
2 //
3 // This source code is subject to the terms of the BSD 2 Clause License and
4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5 // was not distributed with this source code in the LICENSE file, you can
6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
7 // Media Patent License 1.0 was not distributed with this source code in the
8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10 use crate::api::lookahead::*;
11 use crate::api::{EncoderConfig, SceneDetectionSpeed};
12 use crate::cpu_features::CpuFeatureLevel;
13 use crate::encoder::Sequence;
14 use crate::frame::*;
15 use crate::sad_row;
16 use crate::util::Pixel;
17 use rust_hawktracer::*;
18 use std::sync::Arc;
19 use std::{cmp, u64};
20
21 // The fast implementation is based on a Python implementation at
22 // https://pyscenedetect.readthedocs.io/en/latest/reference/detection-methods/.
23 // The Python implementation uses HSV values and a threshold of 30. Comparing the
24 // YUV values was sufficient in most cases, and avoided a more costly YUV->RGB->HSV
25 // conversion, but the deltas needed to be scaled down. The deltas for keyframes
26 // in YUV were about 1/3 to 1/2 of what they were in HSV, but non-keyframes were
27 // very unlikely to have a delta greater than 3 in YUV, whereas they may reach into
28 // the double digits in HSV.
29 //
30 // Experiments have shown that these thresholds are optimal.
31 const FAST_THRESHOLD: f64 = 18.0;
32 const IMP_BLOCK_DIFF_THRESHOLD: f64 = 7.0;
33
34 /// Runs keyframe detection on frames from the lookahead queue.
35 pub struct SceneChangeDetector<T: Pixel> {
36 /// Minimum average difference between YUV deltas that will trigger a scene change.
37 threshold: f64,
38 /// Fast scene cut detection mode, uses simple SAD instead of encoder cost estimates.
39 speed_mode: SceneDetectionSpeed,
40 /// scaling factor for fast scene detection
41 scale_factor: usize,
42 /// Frame buffer for scaled frames
43 downscaled_frame_buffer: Option<(
44 Box<[Plane<T>; 2]>,
45 // `true` if the data is valid and initialized, or `false`
46 // if it should be assumed that the data is uninitialized.
47 bool,
48 )>,
49 /// Frame buffer for holding references to source frames.
50 ///
51 /// Useful for not copying data into the downscaled frame buffer
52 /// when using a downscale factor of 1.
53 frame_ref_buffer: Option<Box<[Arc<Frame<T>>; 2]>>,
54 /// Deque offset for current
55 lookahead_offset: usize,
56 /// Start deque offset based on lookahead
57 deque_offset: usize,
58 /// Scenechange results for adaptive threshold
59 score_deque: Vec<ScenecutResult>,
60 /// Number of pixels in scaled frame for fast mode
61 pixels: usize,
62 /// The bit depth of the video.
63 bit_depth: usize,
64 /// The CPU feature level to be used.
65 cpu_feature_level: CpuFeatureLevel,
66 encoder_config: EncoderConfig,
67 sequence: Arc<Sequence>,
68 }
69
70 impl<T: Pixel> SceneChangeDetector<T> {
new( encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel, lookahead_distance: usize, sequence: Arc<Sequence>, ) -> Self71 pub fn new(
72 encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel,
73 lookahead_distance: usize, sequence: Arc<Sequence>,
74 ) -> Self {
75 let bit_depth = encoder_config.bit_depth;
76 let speed_mode = if encoder_config.low_latency {
77 SceneDetectionSpeed::Fast
78 } else {
79 encoder_config.speed_settings.fast_scene_detection
80 };
81
82 // Scale factor for fast and medium scene detection
83 let scale_factor = detect_scale_factor(&sequence, speed_mode);
84
85 // Set lookahead offset to 5 if normal lookahead available
86 let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 };
87 let deque_offset = lookahead_offset;
88
89 let score_deque = Vec::with_capacity(5 + lookahead_distance);
90
91 // Pixel count for fast scenedetect
92 let pixels = if speed_mode == SceneDetectionSpeed::Fast {
93 (sequence.max_frame_height as usize / scale_factor)
94 * (sequence.max_frame_width as usize / scale_factor)
95 } else {
96 1
97 };
98
99 let threshold = FAST_THRESHOLD * (bit_depth as f64) / 8.0;
100
101 Self {
102 threshold,
103 speed_mode,
104 scale_factor,
105 downscaled_frame_buffer: None,
106 frame_ref_buffer: None,
107 lookahead_offset,
108 deque_offset,
109 score_deque,
110 pixels,
111 bit_depth,
112 cpu_feature_level,
113 encoder_config,
114 sequence,
115 }
116 }
117
118 /// Runs keyframe detection on the next frame in the lookahead queue.
119 ///
120 /// This function requires that a subset of input frames
121 /// is passed to it in order, and that `keyframes` is only
122 /// updated from this method. `input_frameno` should correspond
123 /// to the second frame in `frame_set`.
124 ///
125 /// This will gracefully handle the first frame in the video as well.
126 #[hawktracer(analyze_next_frame)]
analyze_next_frame( &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64, previous_keyframe: u64, ) -> bool127 pub fn analyze_next_frame(
128 &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
129 previous_keyframe: u64,
130 ) -> bool {
131 // Use score deque for adaptive threshold for scene cut
132 // Declare score_deque offset based on lookahead for scene change scores
133
134 // Find the distance to the previous keyframe.
135 let distance = input_frameno - previous_keyframe;
136
137 if frame_set.len() <= self.lookahead_offset {
138 // Don't insert keyframes in the last few frames of the video
139 // This is basically a scene flash and a waste of bits
140 return false;
141 }
142
143 if self.encoder_config.speed_settings.no_scene_detection {
144 if let Some(true) = self.handle_min_max_intervals(distance) {
145 return true;
146 };
147 return false;
148 }
149
150 // Initiallization of score deque
151 // based on frame set length
152 if self.deque_offset > 0
153 && frame_set.len() > self.deque_offset + 1
154 && self.score_deque.is_empty()
155 {
156 self.initialize_score_deque(frame_set, input_frameno, self.deque_offset);
157 } else if self.score_deque.is_empty() {
158 self.initialize_score_deque(
159 frame_set,
160 input_frameno,
161 frame_set.len() - 1,
162 );
163
164 self.deque_offset = frame_set.len() - 2;
165 }
166 // Running single frame comparison and adding it to deque
167 // Decrease deque offset if there is no new frames
168 if frame_set.len() > self.deque_offset + 1 {
169 self.run_comparison(
170 frame_set[self.deque_offset].clone(),
171 frame_set[self.deque_offset + 1].clone(),
172 input_frameno,
173 );
174 } else {
175 self.deque_offset -= 1;
176 }
177
178 // Adaptive scenecut check
179 let (scenecut, score) = self.adaptive_scenecut();
180 let scenecut = self.handle_min_max_intervals(distance).unwrap_or(scenecut);
181 debug!(
182 "[SC-Detect] Frame {}: Raw={:5.1} ImpBl={:5.1} Bwd={:5.1} Fwd={:5.1} Th={:.1} {}",
183 input_frameno,
184 score.inter_cost,
185 score.imp_block_cost,
186 score.backward_adjusted_cost,
187 score.forward_adjusted_cost,
188 score.threshold,
189 if scenecut { "Scenecut" } else { "No cut" }
190 );
191
192 // Keep score deque of 5 backward frames
193 // and forward frames of length of lookahead offset
194 if self.score_deque.len() > 5 + self.lookahead_offset {
195 self.score_deque.pop();
196 }
197
198 scenecut
199 }
200
handle_min_max_intervals(&mut self, distance: u64) -> Option<bool>201 fn handle_min_max_intervals(&mut self, distance: u64) -> Option<bool> {
202 // Handle minimum and maximum keyframe intervals.
203 if distance < self.encoder_config.min_key_frame_interval {
204 return Some(false);
205 }
206 if distance >= self.encoder_config.max_key_frame_interval {
207 return Some(true);
208 }
209 None
210 }
211
212 // Initially fill score deque with frame scores
initialize_score_deque( &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64, init_len: usize, )213 fn initialize_score_deque(
214 &mut self, frame_set: &[Arc<Frame<T>>], input_frameno: u64,
215 init_len: usize,
216 ) {
217 for x in 0..init_len {
218 self.run_comparison(
219 frame_set[x].clone(),
220 frame_set[x + 1].clone(),
221 input_frameno,
222 );
223 }
224 }
225
226 /// Runs scene change comparison beetween 2 given frames
227 /// Insert result to start of score deque
run_comparison( &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, input_frameno: u64, )228 fn run_comparison(
229 &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
230 input_frameno: u64,
231 ) {
232 let mut result = if self.speed_mode == SceneDetectionSpeed::Fast {
233 self.fast_scenecut(frame1, frame2)
234 } else {
235 self.cost_scenecut(frame1, frame2)
236 };
237
238 // Subtract the highest metric value of surrounding frames from the current one
239 // It makes the peaks in the metric more distinct
240 if self.speed_mode != SceneDetectionSpeed::Fast && self.deque_offset > 0 {
241 if input_frameno == 1 {
242 // Accounts for the second frame not having a score to adjust against.
243 // It should always be 0 because the first frame of the video is always a keyframe.
244 result.backward_adjusted_cost = 0.0;
245 } else {
246 let mut adjusted_cost = f64::MAX;
247 for other_cost in
248 self.score_deque.iter().take(self.deque_offset).map(|i| i.inter_cost)
249 {
250 let this_cost = result.inter_cost - other_cost;
251 if this_cost < adjusted_cost {
252 adjusted_cost = this_cost;
253 }
254 if adjusted_cost < 0.0 {
255 adjusted_cost = 0.0;
256 break;
257 }
258 }
259 result.backward_adjusted_cost = adjusted_cost;
260 }
261 if !self.score_deque.is_empty() {
262 for i in 0..(cmp::min(self.deque_offset, self.score_deque.len())) {
263 let adjusted_cost =
264 self.score_deque[i].inter_cost - result.inter_cost;
265 if i == 0
266 || adjusted_cost < self.score_deque[i].forward_adjusted_cost
267 {
268 self.score_deque[i].forward_adjusted_cost = adjusted_cost;
269 }
270 if self.score_deque[i].forward_adjusted_cost < 0.0 {
271 self.score_deque[i].forward_adjusted_cost = 0.0;
272 }
273 }
274 }
275 }
276 self.score_deque.insert(0, result);
277 }
278
279 /// Compares current scene score to adapted threshold based on previous scores
280 /// Value of current frame is offset by lookahead, if lookahead >=5
281 /// Returns true if current scene score is higher than adapted threshold
adaptive_scenecut(&mut self) -> (bool, ScenecutResult)282 fn adaptive_scenecut(&mut self) -> (bool, ScenecutResult) {
283 let score = self.score_deque[self.deque_offset];
284
285 // We use the importance block algorithm's cost metrics as a secondary algorithm
286 // because, although it struggles in certain scenarios such as
287 // finding the end of a pan, it is very good at detecting hard scenecuts
288 // or detecting if a pan exists.
289 // Because of this, we only consider a frame for a scenechange if
290 // the importance block algorithm is over the threshold either on this frame (hard scenecut)
291 // or within the past few frames (pan). This helps filter out a few false positives
292 // produced by the cost-based algorithm.
293 let imp_block_threshold =
294 IMP_BLOCK_DIFF_THRESHOLD * (self.bit_depth as f64) / 8.0;
295 if !&self.score_deque[self.deque_offset..]
296 .iter()
297 .any(|result| result.imp_block_cost >= imp_block_threshold)
298 {
299 return (false, score);
300 }
301
302 let cost = score.forward_adjusted_cost;
303 if cost >= score.threshold {
304 let back_deque = &self.score_deque[self.deque_offset + 1..];
305 let forward_deque = &self.score_deque[..self.deque_offset];
306 let back_over_tr_count = back_deque
307 .iter()
308 .filter(|result| result.backward_adjusted_cost >= result.threshold)
309 .count();
310 let forward_over_tr_count = forward_deque
311 .iter()
312 .filter(|result| result.forward_adjusted_cost >= result.threshold)
313 .count();
314
315 // Check for scenecut after the flashes
316 // No frames over threshold forward
317 // and some frames over threshold backward
318 let back_count_req = if self.speed_mode == SceneDetectionSpeed::Fast {
319 // Fast scenecut is more sensitive to false flash detection,
320 // so we want more "evidence" of there being a flash before creating a keyframe.
321 2
322 } else {
323 1
324 };
325 if forward_over_tr_count == 0 && back_over_tr_count >= back_count_req {
326 return (true, score);
327 }
328
329 // Check for scenecut before flash
330 // If distance longer than max flash length
331 if back_over_tr_count == 0
332 && forward_over_tr_count == 1
333 && forward_deque[0].forward_adjusted_cost >= forward_deque[0].threshold
334 {
335 return (true, score);
336 }
337
338 if back_over_tr_count != 0 || forward_over_tr_count != 0 {
339 return (false, score);
340 }
341 }
342
343 (cost >= score.threshold, score)
344 }
345
346 /// The fast algorithm detects fast cuts using a raw difference
347 /// in pixel values between the scaled frames.
348 #[hawktracer(fast_scenecut)]
fast_scenecut( &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, ) -> ScenecutResult349 fn fast_scenecut(
350 &mut self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
351 ) -> ScenecutResult {
352 if self.scale_factor == 1 {
353 if let Some(frame_buffer) = self.frame_ref_buffer.as_deref_mut() {
354 frame_buffer.swap(0, 1);
355 frame_buffer[1] = frame2;
356 } else {
357 self.frame_ref_buffer = Some(Box::new([frame1, frame2]));
358 }
359
360 if let Some(frame_buffer) = self.frame_ref_buffer.as_deref() {
361 let delta = self.delta_in_planes(
362 &frame_buffer[0].planes[0],
363 &frame_buffer[1].planes[0],
364 );
365
366 ScenecutResult {
367 threshold: self.threshold as f64,
368 inter_cost: delta as f64,
369 imp_block_cost: delta as f64,
370 backward_adjusted_cost: delta as f64,
371 forward_adjusted_cost: delta as f64,
372 }
373 } else {
374 unreachable!()
375 }
376 } else {
377 // downscale both frames for faster comparison
378 if let Some((frame_buffer, is_initialized)) =
379 &mut self.downscaled_frame_buffer
380 {
381 let frame_buffer = &mut **frame_buffer;
382 if *is_initialized {
383 frame_buffer.swap(0, 1);
384 frame2.planes[0]
385 .downscale_in_place(self.scale_factor, &mut frame_buffer[1]);
386 } else {
387 // both frames are in an irrelevant and invalid state, so we have to reinitialize
388 // them, but we can reuse their allocations
389 frame1.planes[0]
390 .downscale_in_place(self.scale_factor, &mut frame_buffer[0]);
391 frame2.planes[0]
392 .downscale_in_place(self.scale_factor, &mut frame_buffer[1]);
393 *is_initialized = true;
394 }
395 } else {
396 self.downscaled_frame_buffer = Some((
397 Box::new([
398 frame1.planes[0].downscale(self.scale_factor),
399 frame2.planes[0].downscale(self.scale_factor),
400 ]),
401 true, // the frame buffer is initialized and in a valid state
402 ));
403 }
404
405 if let Some((frame_buffer, _)) = &self.downscaled_frame_buffer {
406 let frame_buffer = &**frame_buffer;
407 let delta = self.delta_in_planes(&frame_buffer[0], &frame_buffer[1]);
408
409 ScenecutResult {
410 threshold: self.threshold as f64,
411 inter_cost: delta as f64,
412 imp_block_cost: delta as f64,
413 forward_adjusted_cost: delta as f64,
414 backward_adjusted_cost: delta as f64,
415 }
416 } else {
417 unreachable!()
418 }
419 }
420 }
421
422 /// Run a comparison between two frames to determine if they qualify for a scenecut.
423 ///
424 /// We gather both intra and inter costs for the frames,
425 /// as well as an importance-block-based difference,
426 /// and use all three metrics.
427 #[hawktracer(cost_scenecut)]
cost_scenecut( &self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>, ) -> ScenecutResult428 fn cost_scenecut(
429 &self, frame1: Arc<Frame<T>>, frame2: Arc<Frame<T>>,
430 ) -> ScenecutResult {
431 let frame2_inter_ref = Arc::clone(&frame2);
432 let frame1_imp_ref = Arc::clone(&frame1);
433 let frame2_imp_ref = Arc::clone(&frame2);
434
435 let mut intra_cost = 0.0;
436 let mut mv_inter_cost = 0.0;
437 let mut imp_block_cost = 0.0;
438 crate::rayon::scope(|s| {
439 s.spawn(|_| {
440 let intra_costs = estimate_intra_costs(
441 &*frame2,
442 self.bit_depth,
443 self.cpu_feature_level,
444 );
445 intra_cost = intra_costs.iter().map(|&cost| cost as u64).sum::<u64>()
446 as f64
447 / intra_costs.len() as f64
448 });
449 s.spawn(|_| {
450 let inter_costs = estimate_inter_costs(
451 frame2_inter_ref,
452 frame1,
453 self.bit_depth,
454 self.encoder_config,
455 self.sequence.clone(),
456 );
457
458 mv_inter_cost =
459 inter_costs.iter().map(|&cost| cost as u64).sum::<u64>() as f64
460 / inter_costs.len() as f64
461 });
462 s.spawn(|_| {
463 let inter_costs =
464 estimate_importance_block_difference(frame2_imp_ref, frame1_imp_ref);
465
466 imp_block_cost =
467 inter_costs.iter().map(|&cost| cost as u64).sum::<u64>() as f64
468 / inter_costs.len() as f64
469 });
470 });
471
472 // `BIAS` determines how likely we are
473 // to choose a keyframe, between 0.0-1.0.
474 // Higher values mean we are more likely to choose a keyframe.
475 // This value was chosen based on trials using the new
476 // adaptive scenecut code.
477 const BIAS: f64 = 0.7;
478 let threshold = intra_cost * (1.0 - BIAS);
479
480 ScenecutResult {
481 inter_cost: mv_inter_cost,
482 imp_block_cost,
483 threshold,
484 backward_adjusted_cost: 0.0,
485 forward_adjusted_cost: 0.0,
486 }
487 }
488
489 /// Calculates the average sum of absolute difference (SAD) per pixel between 2 planes
490 #[hawktracer(delta_in_planes)]
delta_in_planes(&self, plane1: &Plane<T>, plane2: &Plane<T>) -> f64491 fn delta_in_planes(&self, plane1: &Plane<T>, plane2: &Plane<T>) -> f64 {
492 let mut delta = 0;
493
494 let lines = plane1.rows_iter().zip(plane2.rows_iter());
495
496 for (l1, l2) in lines {
497 let l1 = l1.get(..plane1.cfg.width).unwrap_or(l1);
498 let l2 = l2.get(..plane1.cfg.width).unwrap_or(l2);
499 delta += sad_row::sad_row(l1, l2, self.cpu_feature_level);
500 }
501 delta as f64 / self.pixels as f64
502 }
503 }
504
505 /// Scaling factor for frame in scene detection
detect_scale_factor( sequence: &Arc<Sequence>, speed_mode: SceneDetectionSpeed, ) -> usize506 fn detect_scale_factor(
507 sequence: &Arc<Sequence>, speed_mode: SceneDetectionSpeed,
508 ) -> usize {
509 let small_edge =
510 cmp::min(sequence.max_frame_height, sequence.max_frame_width) as usize;
511 let scale_factor;
512 if speed_mode == SceneDetectionSpeed::Fast {
513 scale_factor = match small_edge {
514 0..=240 => 1,
515 241..=480 => 2,
516 481..=720 => 4,
517 721..=1080 => 8,
518 1081..=1600 => 16,
519 1601..=usize::MAX => 32,
520 _ => 1,
521 } as usize
522 } else {
523 scale_factor = match small_edge {
524 0..=1600 => 1,
525 1601..=2160 => 2,
526 2161..=usize::MAX => 4,
527 _ => 1,
528 } as usize
529 };
530
531 debug!(
532 "Scene detection scale factor {}, [{},{}] -> [{},{}]",
533 scale_factor,
534 sequence.max_frame_width,
535 sequence.max_frame_height,
536 sequence.max_frame_width as usize / scale_factor,
537 sequence.max_frame_height as usize / scale_factor
538 );
539 scale_factor
540 }
541
542 #[derive(Debug, Clone, Copy)]
543 struct ScenecutResult {
544 inter_cost: f64,
545 imp_block_cost: f64,
546 backward_adjusted_cost: f64,
547 forward_adjusted_cost: f64,
548 threshold: f64,
549 }
550