cast/sender/video_sender.cc

// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "media/cast/sender/video_sender.h"

#include <stdint.h>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <utility>

#include "base/bind.h"
#include "base/logging.h"
#include "base/trace_event/trace_event.h"
#include "media/cast/net/cast_transport_config.h"
#include "media/cast/sender/performance_metrics_overlay.h"
#include "media/cast/sender/video_encoder.h"

namespace media {
namespace cast {

namespace {

// The following two constants are used to adjust the target
// playout delay (when allowed). They were calculated using
// a combination of cast_benchmark runs and manual testing.
//
// This is how many round trips we think we need on the network.
const int kRoundTripsNeeded = 4;

// This is an estimate of all the the constant time needed independent of
// network quality (e.g., additional time that accounts for encode and decode
// time).
const int kConstantTimeMs = 75;

// The target maximum utilization of the encoder and network resources.  This is
// used to attenuate the actual measured utilization values in order to provide
// "breathing room" (i.e., to ensure there will be sufficient CPU and bandwidth
// available to handle the occasional more-complex frames).
const int kTargetUtilizationPercentage = 75;

// This is the minimum duration in milliseconds that the sender sends key frame
// request to the encoder on receiving Pli messages. This is used to prevent
// sending multiple requests while the sender is waiting for an encoded key
// frame or receiving multiple Pli messages in a short period.
const int64_t kMinKeyFrameRequestOnPliIntervalMs = 500;

// Extract capture begin/end timestamps from |video_frame|'s metadata and log
// it.
void LogVideoCaptureTimestamps(CastEnvironment* cast_environment,
                               const media::VideoFrame& video_frame,
                               RtpTimeTicks rtp_timestamp) {
  std::unique_ptr<FrameEvent> capture_begin_event(new FrameEvent());
  capture_begin_event->type = FRAME_CAPTURE_BEGIN;
  capture_begin_event->media_type = VIDEO_EVENT;
  capture_begin_event->rtp_timestamp = rtp_timestamp;

  std::unique_ptr<FrameEvent> capture_end_event(new FrameEvent());
  capture_end_event->type = FRAME_CAPTURE_END;
  capture_end_event->media_type = VIDEO_EVENT;
  capture_end_event->rtp_timestamp = rtp_timestamp;
  capture_end_event->width = video_frame.visible_rect().width();
  capture_end_event->height = video_frame.visible_rect().height();

  if (video_frame.metadata()->capture_begin_time.has_value() &&
      video_frame.metadata()->capture_end_time.has_value()) {
    capture_begin_event->timestamp =
        *video_frame.metadata()->capture_begin_time;
    capture_end_event->timestamp = *video_frame.metadata()->capture_end_time;
  } else {
    // The frame capture timestamps were not provided by the video capture
    // source.  Simply log the events as happening right now.
    capture_begin_event->timestamp = capture_end_event->timestamp =
        cast_environment->Clock()->NowTicks();
  }

  cast_environment->logger()->DispatchFrameEvent(
      std::move(capture_begin_event));
  cast_environment->logger()->DispatchFrameEvent(std::move(capture_end_event));
}

}  // namespace

// Note, we use a fixed bitrate value when external video encoder is used.
// Some hardware encoder shows bad behavior if we set the bitrate too
// frequently, e.g. quality drop, not abiding by target bitrate, etc.
// See details: crbug.com/392086.
VideoSender::VideoSender(
    scoped_refptr<CastEnvironment> cast_environment,
    const FrameSenderConfig& video_config,
    StatusChangeCallback status_change_cb,
    const CreateVideoEncodeAcceleratorCallback& create_vea_cb,
    const CreateVideoEncodeMemoryCallback& create_video_encode_mem_cb,
    CastTransport* const transport_sender,
    PlayoutDelayChangeCB playout_delay_change_cb,
    media::VideoCaptureFeedbackCB feedback_callback)
    : FrameSender(
          cast_environment,
          transport_sender,
          video_config,
          video_config.use_external_encoder
              ? NewFixedCongestionControl(
                    (video_config.min_bitrate + video_config.max_bitrate) / 2)
              : NewAdaptiveCongestionControl(cast_environment->Clock(),
                                             video_config.max_bitrate,
                                             video_config.min_bitrate,
                                             video_config.max_frame_rate)),
      frames_in_encoder_(0),
      last_bitrate_(0),
      playout_delay_change_cb_(std::move(playout_delay_change_cb)),
      feedback_cb_(feedback_callback),
      low_latency_mode_(false),
      last_reported_encoder_utilization_(-1.0),
      last_reported_lossy_utilization_(-1.0) {
  video_encoder_ = VideoEncoder::Create(
      cast_environment_,
      video_config,
      status_change_cb,
      create_vea_cb,
      create_video_encode_mem_cb);
  if (!video_encoder_) {
    cast_environment_->PostTask(
        CastEnvironment::MAIN, FROM_HERE,
        base::BindOnce(std::move(status_change_cb), STATUS_UNSUPPORTED_CODEC));
  }
}

VideoSender::~VideoSender() = default;

void VideoSender::InsertRawVideoFrame(
    scoped_refptr<media::VideoFrame> video_frame,
    const base::TimeTicks& reference_time) {
  DCHECK(cast_environment_->CurrentlyOn(CastEnvironment::MAIN));

  if (!video_encoder_) {
    NOTREACHED();
    return;
  }

  const RtpTimeTicks rtp_timestamp =
      RtpTimeTicks::FromTimeDelta(video_frame->timestamp(), kVideoFrequency);
  LogVideoCaptureTimestamps(cast_environment_.get(), *video_frame,
                            rtp_timestamp);

  // Used by chrome/browser/media/cast_mirroring_performance_browsertest.cc
  TRACE_EVENT_INSTANT2("cast_perf_test", "InsertRawVideoFrame",
                       TRACE_EVENT_SCOPE_THREAD, "timestamp",
                       (reference_time - base::TimeTicks()).InMicroseconds(),
                       "rtp_timestamp", rtp_timestamp.lower_32_bits());

  {
    bool new_low_latency_mode = video_frame->metadata()->interactive_content;
    if (new_low_latency_mode && !low_latency_mode_) {
      VLOG(1) << "Interactive mode playout time " << min_playout_delay_;
      playout_delay_change_cb_.Run(min_playout_delay_);
    }
    low_latency_mode_ = new_low_latency_mode;
  }

  // Drop the frame if either its RTP or reference timestamp is not an increase
  // over the last frame's.  This protects: 1) the duration calculations that
  // assume timestamps are monotonically non-decreasing, and 2) assumptions made
  // deeper in the implementation where each frame's RTP timestamp needs to be
  // unique.
  if (!last_enqueued_frame_reference_time_.is_null() &&
      (rtp_timestamp <= last_enqueued_frame_rtp_timestamp_ ||
       reference_time <= last_enqueued_frame_reference_time_)) {
    VLOG(1) << "Dropping video frame: RTP or reference time did not increase.";
    TRACE_EVENT_INSTANT2("cast.stream", "Video Frame Drop",
                         TRACE_EVENT_SCOPE_THREAD,
                         "rtp_timestamp", rtp_timestamp.lower_32_bits(),
                         "reason", "time did not increase");
    return;
  }

  // Request a key frame when a Pli message was received, and it has been passed
  // long enough from the last time sending key frame request on receiving a Pli
  // message.
  if (picture_lost_at_receiver_) {
    const int64_t min_attemp_interval_ms =
        std::max(kMinKeyFrameRequestOnPliIntervalMs,
                 6 * target_playout_delay_.InMilliseconds());
    if (last_time_attempted_to_resolve_pli_.is_null() ||
        ((reference_time - last_time_attempted_to_resolve_pli_)
             .InMilliseconds() > min_attemp_interval_ms)) {
      video_encoder_->GenerateKeyFrame();
      last_time_attempted_to_resolve_pli_ = reference_time;
    }
  }

  // Two video frames are needed to compute the exact media duration added by
  // the next frame.  If there are no frames in the encoder, compute a guess
  // based on the configured |max_frame_rate_|.  Any error introduced by this
  // guess will be eliminated when |duration_in_encoder_| is updated in
  // OnEncodedVideoFrame().
  const base::TimeDelta duration_added_by_next_frame = frames_in_encoder_ > 0 ?
      reference_time - last_enqueued_frame_reference_time_ :
      base::TimeDelta::FromSecondsD(1.0 / max_frame_rate_);

  if (ShouldDropNextFrame(duration_added_by_next_frame)) {
    base::TimeDelta new_target_delay = std::min(
        current_round_trip_time_ * kRoundTripsNeeded +
        base::TimeDelta::FromMilliseconds(kConstantTimeMs),
        max_playout_delay_);
    // In case of low latency mode, we prefer frame drops over increasing
    // playout time.
    if (!low_latency_mode_ && new_target_delay > target_playout_delay_) {
      // In case we detect user is no longer in a low latency mode and there is
      // a need to drop a frame, we ensure the playout delay is at-least the
      // the starting value for playing animated content.
      // This is intended to minimize freeze when moving from an interactive
      // session to watching animating content while being limited by end-to-end
      // delay.
      VLOG(1) << "Ensure playout time is at least " << animated_playout_delay_;
      if (new_target_delay < animated_playout_delay_)
        new_target_delay = animated_playout_delay_;
      VLOG(1) << "New target delay: " << new_target_delay.InMilliseconds();
      playout_delay_change_cb_.Run(new_target_delay);
    }

    // Some encoder implementations have a frame window for analysis. Since we
    // are dropping this frame, unless we instruct the encoder to flush all the
    // frames that have been enqueued for encoding, frames_in_encoder_ and
    // last_enqueued_frame_reference_time_ will never be updated and we will
    // drop every subsequent frame for the rest of the session.
    video_encoder_->EmitFrames();

    TRACE_EVENT_INSTANT2("cast.stream", "Video Frame Drop",
                         TRACE_EVENT_SCOPE_THREAD,
                         "rtp_timestamp", rtp_timestamp.lower_32_bits(),
                         "reason", "too much in flight");
    return;
  }

  if (video_frame->visible_rect().IsEmpty()) {
    VLOG(1) << "Rejecting empty video frame.";
    return;
  }

  const int bitrate = congestion_control_->GetBitrate(
      reference_time + target_playout_delay_, target_playout_delay_);
  if (bitrate != last_bitrate_) {
    video_encoder_->SetBitRate(bitrate);
    last_bitrate_ = bitrate;
  }

  TRACE_COUNTER_ID1("cast.stream", "Video Target Bitrate", this, bitrate);

  const scoped_refptr<VideoFrame> frame_to_encode =
      MaybeRenderPerformanceMetricsOverlay(
          GetTargetPlayoutDelay(), low_latency_mode_, bitrate,
          frames_in_encoder_ + 1, last_reported_encoder_utilization_,
          last_reported_lossy_utilization_, std::move(video_frame));
  if (video_encoder_->EncodeVideoFrame(
          frame_to_encode, reference_time,
          base::BindOnce(&VideoSender::OnEncodedVideoFrame, AsWeakPtr(),
                         frame_to_encode, bitrate))) {
    TRACE_EVENT_ASYNC_BEGIN1("cast.stream", "Video Encode",
                             frame_to_encode.get(), "rtp_timestamp",
                             rtp_timestamp.lower_32_bits());
    frames_in_encoder_++;
    duration_in_encoder_ += duration_added_by_next_frame;
    last_enqueued_frame_rtp_timestamp_ = rtp_timestamp;
    last_enqueued_frame_reference_time_ = reference_time;
  } else {
    VLOG(1) << "Encoder rejected a frame.  Skipping...";
    TRACE_EVENT_INSTANT1("cast.stream", "Video Encode Reject",
                         TRACE_EVENT_SCOPE_THREAD,
                         "rtp_timestamp", rtp_timestamp.lower_32_bits());
  }
}

std::unique_ptr<VideoFrameFactory> VideoSender::CreateVideoFrameFactory() {
  return video_encoder_ ? video_encoder_->CreateVideoFrameFactory() : nullptr;
}

base::WeakPtr<VideoSender> VideoSender::AsWeakPtr() {
  return weak_factory_.GetWeakPtr();
}

int VideoSender::GetNumberOfFramesInEncoder() const {
  return frames_in_encoder_;
}

base::TimeDelta VideoSender::GetInFlightMediaDuration() const {
  if (GetUnacknowledgedFrameCount() > 0) {
    const FrameId oldest_unacked_frame_id = latest_acked_frame_id_ + 1;
    return last_enqueued_frame_reference_time_ -
        GetRecordedReferenceTime(oldest_unacked_frame_id);
  } else {
    return duration_in_encoder_;
  }
}

void VideoSender::OnEncodedVideoFrame(
    scoped_refptr<media::VideoFrame> video_frame,
    int encoder_bitrate,
    std::unique_ptr<SenderEncodedFrame> encoded_frame) {
  DCHECK(cast_environment_->CurrentlyOn(CastEnvironment::MAIN));

  frames_in_encoder_--;
  DCHECK_GE(frames_in_encoder_, 0);

  // Encoding was exited with errors.
  if (!encoded_frame)
    return;

  duration_in_encoder_ =
      last_enqueued_frame_reference_time_ - encoded_frame->reference_time;

  last_reported_encoder_utilization_ = encoded_frame->encoder_utilization;
  last_reported_lossy_utilization_ = encoded_frame->lossy_utilization;

  TRACE_EVENT_ASYNC_END2("cast.stream", "Video Encode", video_frame.get(),
                         "encoder_utilization",
                         last_reported_encoder_utilization_,
                         "lossy_utilization", last_reported_lossy_utilization_);

  // Report the resource utilization for processing this frame.  Take the
  // greater of the two utilization values and attenuate them such that the
  // target utilization is reported as the maximum sustainable amount.
  const double attenuated_utilization =
      std::max(last_reported_encoder_utilization_,
               last_reported_lossy_utilization_) /
      (kTargetUtilizationPercentage / 100.0);
  if (attenuated_utilization >= 0.0) {
    // Key frames are artificially capped to 1.0 because their actual
    // utilization is atypical compared to the other frames in the stream, and
    // this can misguide the producer of the input video frames.
    VideoFrameFeedback feedback;
    feedback.resource_utilization =
        encoded_frame->dependency == EncodedFrame::KEY
            ? std::min(1.0, attenuated_utilization)
            : attenuated_utilization;
    if (feedback_cb_)
      feedback_cb_.Run(feedback);
  }

  SendEncodedFrame(encoder_bitrate, std::move(encoded_frame));
}

}  // namespace cast
}  // namespace media