1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_ 12 #define API_VIDEO_CODECS_VIDEO_ENCODER_H_ 13 14 #include <limits> 15 #include <memory> 16 #include <string> 17 #include <vector> 18 19 #include "absl/container/inlined_vector.h" 20 #include "absl/types/optional.h" 21 #include "api/fec_controller_override.h" 22 #include "api/units/data_rate.h" 23 #include "api/video/encoded_image.h" 24 #include "api/video/video_bitrate_allocation.h" 25 #include "api/video/video_codec_constants.h" 26 #include "api/video/video_frame.h" 27 #include "api/video_codecs/video_codec.h" 28 #include "rtc_base/checks.h" 29 #include "rtc_base/system/rtc_export.h" 30 31 namespace webrtc { 32 33 // TODO(pbos): Expose these through a public (root) header or change these APIs. 34 struct CodecSpecificInfo; 35 36 constexpr int kDefaultMinPixelsPerFrame = 320 * 180; 37 38 class RTC_EXPORT EncodedImageCallback { 39 public: ~EncodedImageCallback()40 virtual ~EncodedImageCallback() {} 41 42 struct Result { 43 enum Error { 44 OK, 45 46 // Failed to send the packet. 47 ERROR_SEND_FAILED, 48 }; 49 ResultResult50 explicit Result(Error error) : error(error) {} ResultResult51 Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {} 52 53 Error error; 54 55 // Frame ID assigned to the frame. The frame ID should be the same as the ID 56 // seen by the receiver for this frame. RTP timestamp of the frame is used 57 // as frame ID when RTP is used to send video. Must be used only when 58 // error=OK. 59 uint32_t frame_id = 0; 60 61 // Tells the encoder that the next frame is should be dropped. 62 bool drop_next_frame = false; 63 }; 64 65 // Used to signal the encoder about reason a frame is dropped. 66 // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate 67 // limiting purposes). 68 // kDroppedByEncoder - dropped by encoder's internal rate limiter. 69 enum class DropReason : uint8_t { 70 kDroppedByMediaOptimizations, 71 kDroppedByEncoder 72 }; 73 74 // Callback function which is called when an image has been encoded. 75 virtual Result OnEncodedImage( 76 const EncodedImage& encoded_image, 77 const CodecSpecificInfo* codec_specific_info) = 0; 78 OnDroppedFrame(DropReason reason)79 virtual void OnDroppedFrame(DropReason reason) {} 80 }; 81 82 class RTC_EXPORT VideoEncoder { 83 public: 84 struct QpThresholds { QpThresholdsQpThresholds85 QpThresholds(int l, int h) : low(l), high(h) {} QpThresholdsQpThresholds86 QpThresholds() : low(-1), high(-1) {} 87 int low; 88 int high; 89 }; 90 91 // Quality scaling is enabled if thresholds are provided. 92 struct RTC_EXPORT ScalingSettings { 93 private: 94 // Private magic type for kOff, implicitly convertible to 95 // ScalingSettings. 96 struct KOff {}; 97 98 public: 99 // TODO(nisse): Would be nicer if kOff were a constant ScalingSettings 100 // rather than a magic value. However, absl::optional is not trivially copy 101 // constructible, and hence a constant ScalingSettings needs a static 102 // initializer, which is strongly discouraged in Chrome. We can hopefully 103 // fix this when we switch to absl::optional or std::optional. 104 static constexpr KOff kOff = {}; 105 106 ScalingSettings(int low, int high); 107 ScalingSettings(int low, int high, int min_pixels); 108 ScalingSettings(const ScalingSettings&); 109 ScalingSettings(KOff); // NOLINT(runtime/explicit) 110 ~ScalingSettings(); 111 112 absl::optional<QpThresholds> thresholds; 113 114 // We will never ask for a resolution lower than this. 115 // TODO(kthelgason): Lower this limit when better testing 116 // on MediaCodec and fallback implementations are in place. 117 // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206 118 int min_pixels_per_frame = kDefaultMinPixelsPerFrame; 119 120 private: 121 // Private constructor; to get an object without thresholds, use 122 // the magic constant ScalingSettings::kOff. 123 ScalingSettings(); 124 }; 125 126 // Bitrate limits for resolution. 127 struct ResolutionBitrateLimits { ResolutionBitrateLimitsResolutionBitrateLimits128 ResolutionBitrateLimits(int frame_size_pixels, 129 int min_start_bitrate_bps, 130 int min_bitrate_bps, 131 int max_bitrate_bps) 132 : frame_size_pixels(frame_size_pixels), 133 min_start_bitrate_bps(min_start_bitrate_bps), 134 min_bitrate_bps(min_bitrate_bps), 135 max_bitrate_bps(max_bitrate_bps) {} 136 // Size of video frame, in pixels, the bitrate thresholds are intended for. 137 int frame_size_pixels = 0; 138 // Recommended minimum bitrate to start encoding. 139 int min_start_bitrate_bps = 0; 140 // Recommended minimum bitrate. 141 int min_bitrate_bps = 0; 142 // Recommended maximum bitrate. 143 int max_bitrate_bps = 0; 144 145 bool operator==(const ResolutionBitrateLimits& rhs) const; 146 bool operator!=(const ResolutionBitrateLimits& rhs) const { 147 return !(*this == rhs); 148 } 149 }; 150 151 // Struct containing metadata about the encoder implementing this interface. 152 struct RTC_EXPORT EncoderInfo { 153 static constexpr uint8_t kMaxFramerateFraction = 154 std::numeric_limits<uint8_t>::max(); 155 156 EncoderInfo(); 157 EncoderInfo(const EncoderInfo&); 158 159 ~EncoderInfo(); 160 161 std::string ToString() const; 162 bool operator==(const EncoderInfo& rhs) const; 163 bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); } 164 165 // Any encoder implementation wishing to use the WebRTC provided 166 // quality scaler must populate this field. 167 ScalingSettings scaling_settings; 168 169 // The width and height of the incoming video frames should be divisible 170 // by |requested_resolution_alignment|. If they are not, the encoder may 171 // drop the incoming frame. 172 // For example: With I420, this value would be a multiple of 2. 173 // Note that this field is unrelated to any horizontal or vertical stride 174 // requirements the encoder has on the incoming video frame buffers. 175 int requested_resolution_alignment; 176 177 // Same as above but if true, each simulcast layer should also be divisible 178 // by |requested_resolution_alignment|. 179 // Note that scale factors |scale_resolution_down_by| may be adjusted so a 180 // common multiple is not too large to avoid largely cropped frames and 181 // possibly with an aspect ratio far from the original. 182 // Warning: large values of scale_resolution_down_by could be changed 183 // considerably, especially if |requested_resolution_alignment| is large. 184 bool apply_alignment_to_all_simulcast_layers; 185 186 // If true, encoder supports working with a native handle (e.g. texture 187 // handle for hw codecs) rather than requiring a raw I420 buffer. 188 bool supports_native_handle; 189 190 // The name of this particular encoder implementation, e.g. "libvpx". 191 std::string implementation_name; 192 193 // If this field is true, the encoder rate controller must perform 194 // well even in difficult situations, and produce close to the specified 195 // target bitrate seen over a reasonable time window, drop frames if 196 // necessary in order to keep the rate correct, and react quickly to 197 // changing bitrate targets. If this method returns true, we disable the 198 // frame dropper in the media optimization module and rely entirely on the 199 // encoder to produce media at a bitrate that closely matches the target. 200 // Any overshooting may result in delay buildup. If this method returns 201 // false (default behavior), the media opt frame dropper will drop input 202 // frames if it suspect encoder misbehavior. Misbehavior is common, 203 // especially in hardware codecs. Disable media opt at your own risk. 204 bool has_trusted_rate_controller; 205 206 // If this field is true, the encoder uses hardware support and different 207 // thresholds will be used in CPU adaptation. 208 bool is_hardware_accelerated; 209 210 // If this field is true, the encoder uses internal camera sources, meaning 211 // that it does not require/expect frames to be delivered via 212 // webrtc::VideoEncoder::Encode. 213 // Internal source encoders are deprecated and support for them will be 214 // phased out. 215 bool has_internal_source; 216 217 // For each spatial layer (simulcast stream or SVC layer), represented as an 218 // element in |fps_allocation| a vector indicates how many temporal layers 219 // the encoder is using for that spatial layer. 220 // For each spatial/temporal layer pair, the frame rate fraction is given as 221 // an 8bit unsigned integer where 0 = 0% and 255 = 100%. 222 // 223 // If the vector is empty for a given spatial layer, it indicates that frame 224 // rates are not defined and we can't count on any specific frame rate to be 225 // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic. 226 // 227 // The encoder may update this on a per-frame basis in response to both 228 // internal and external signals. 229 // 230 // Spatial layers are treated independently, but temporal layers are 231 // cumulative. For instance, if: 232 // fps_allocation[0][0] = kFullFramerate / 2; 233 // fps_allocation[0][1] = kFullFramerate; 234 // Then half of the frames are in the base layer and half is in TL1, but 235 // since TL1 is assumed to depend on the base layer, the frame rate is 236 // indicated as the full 100% for the top layer. 237 // 238 // Defaults to a single spatial layer containing a single temporal layer 239 // with a 100% frame rate fraction. 240 absl::InlinedVector<uint8_t, kMaxTemporalStreams> 241 fps_allocation[kMaxSpatialLayers]; 242 243 // Recommended bitrate limits for different resolutions. 244 std::vector<ResolutionBitrateLimits> resolution_bitrate_limits; 245 246 // Obtains the limits from |resolution_bitrate_limits| that best matches the 247 // |frame_size_pixels|. 248 absl::optional<ResolutionBitrateLimits> 249 GetEncoderBitrateLimitsForResolution(int frame_size_pixels) const; 250 251 // If true, this encoder has internal support for generating simulcast 252 // streams. Otherwise, an adapter class will be needed. 253 // Even if true, the config provided to InitEncode() might not be supported, 254 // in such case the encoder should return 255 // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED. 256 bool supports_simulcast; 257 258 // The list of pixel formats preferred by the encoder. It is assumed that if 259 // the list is empty and supports_native_handle is false, then {I420} is the 260 // preferred pixel format. The order of the formats does not matter. 261 absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> 262 preferred_pixel_formats; 263 }; 264 265 struct RTC_EXPORT RateControlParameters { 266 RateControlParameters(); 267 RateControlParameters(const VideoBitrateAllocation& bitrate, 268 double framerate_fps); 269 RateControlParameters(const VideoBitrateAllocation& bitrate, 270 double framerate_fps, 271 DataRate bandwidth_allocation); 272 virtual ~RateControlParameters(); 273 274 // Target bitrate, per spatial/temporal layer. 275 // A target bitrate of 0bps indicates a layer should not be encoded at all. 276 VideoBitrateAllocation target_bitrate; 277 // Adjusted target bitrate, per spatial/temporal layer. May be lower or 278 // higher than the target depending on encoder behaviour. 279 VideoBitrateAllocation bitrate; 280 // Target framerate, in fps. A value <= 0.0 is invalid and should be 281 // interpreted as framerate target not available. In this case the encoder 282 // should fall back to the max framerate specified in |codec_settings| of 283 // the last InitEncode() call. 284 double framerate_fps; 285 // The network bandwidth available for video. This is at least 286 // |bitrate.get_sum_bps()|, but may be higher if the application is not 287 // network constrained. 288 DataRate bandwidth_allocation; 289 290 bool operator==(const RateControlParameters& rhs) const; 291 bool operator!=(const RateControlParameters& rhs) const; 292 }; 293 294 struct LossNotification { 295 // The timestamp of the last decodable frame *prior* to the last received. 296 // (The last received - described below - might itself be decodable or not.) 297 uint32_t timestamp_of_last_decodable; 298 // The timestamp of the last received frame. 299 uint32_t timestamp_of_last_received; 300 // Describes whether the dependencies of the last received frame were 301 // all decodable. 302 // |false| if some dependencies were undecodable, |true| if all dependencies 303 // were decodable, and |nullopt| if the dependencies are unknown. 304 absl::optional<bool> dependencies_of_last_received_decodable; 305 // Describes whether the received frame was decodable. 306 // |false| if some dependency was undecodable or if some packet belonging 307 // to the last received frame was missed. 308 // |true| if all dependencies were decodable and all packets belonging 309 // to the last received frame were received. 310 // |nullopt| if no packet belonging to the last frame was missed, but the 311 // last packet in the frame was not yet received. 312 absl::optional<bool> last_received_decodable; 313 }; 314 315 // Negotiated capabilities which the VideoEncoder may expect the other 316 // side to use. 317 struct Capabilities { CapabilitiesCapabilities318 explicit Capabilities(bool loss_notification) 319 : loss_notification(loss_notification) {} 320 bool loss_notification; 321 }; 322 323 struct Settings { SettingsSettings324 Settings(const Capabilities& capabilities, 325 int number_of_cores, 326 size_t max_payload_size) 327 : capabilities(capabilities), 328 number_of_cores(number_of_cores), 329 max_payload_size(max_payload_size) {} 330 331 Capabilities capabilities; 332 int number_of_cores; 333 size_t max_payload_size; 334 }; 335 336 static VideoCodecVP8 GetDefaultVp8Settings(); 337 static VideoCodecVP9 GetDefaultVp9Settings(); 338 static VideoCodecH264 GetDefaultH264Settings(); 339 ~VideoEncoder()340 virtual ~VideoEncoder() {} 341 342 // Set a FecControllerOverride, through which the encoder may override 343 // decisions made by FecController. 344 // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual. 345 virtual void SetFecControllerOverride( 346 FecControllerOverride* fec_controller_override); 347 348 // Initialize the encoder with the information from the codecSettings 349 // 350 // Input: 351 // - codec_settings : Codec settings 352 // - settings : Settings affecting the encoding itself. 353 // Input for deprecated version: 354 // - number_of_cores : Number of cores available for the encoder 355 // - max_payload_size : The maximum size each payload is allowed 356 // to have. Usually MTU - overhead. 357 // 358 // Return value : Set bit rate if OK 359 // <0 - Errors: 360 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER 361 // WEBRTC_VIDEO_CODEC_ERR_SIZE 362 // WEBRTC_VIDEO_CODEC_MEMORY 363 // WEBRTC_VIDEO_CODEC_ERROR 364 // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting 365 // an announcement to discuss-webrtc, remove the three-parameters variant 366 // and make the two-parameters variant pure-virtual. 367 /* RTC_DEPRECATED */ virtual int32_t InitEncode( 368 const VideoCodec* codec_settings, 369 int32_t number_of_cores, 370 size_t max_payload_size); 371 virtual int InitEncode(const VideoCodec* codec_settings, 372 const VideoEncoder::Settings& settings); 373 374 // Register an encode complete callback object. 375 // 376 // Input: 377 // - callback : Callback object which handles encoded images. 378 // 379 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise. 380 virtual int32_t RegisterEncodeCompleteCallback( 381 EncodedImageCallback* callback) = 0; 382 383 // Free encoder memory. 384 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise. 385 virtual int32_t Release() = 0; 386 387 // Encode an image (as a part of a video stream). The encoded image 388 // will be returned to the user through the encode complete callback. 389 // 390 // Input: 391 // - frame : Image to be encoded 392 // - frame_types : Frame type to be generated by the encoder. 393 // 394 // Return value : WEBRTC_VIDEO_CODEC_OK if OK 395 // <0 - Errors: 396 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER 397 // WEBRTC_VIDEO_CODEC_MEMORY 398 // WEBRTC_VIDEO_CODEC_ERROR 399 virtual int32_t Encode(const VideoFrame& frame, 400 const std::vector<VideoFrameType>* frame_types) = 0; 401 402 // Sets rate control parameters: bitrate, framerate, etc. These settings are 403 // instantaneous (i.e. not moving averages) and should apply from now until 404 // the next call to SetRates(). 405 virtual void SetRates(const RateControlParameters& parameters) = 0; 406 407 // Inform the encoder when the packet loss rate changes. 408 // 409 // Input: - packet_loss_rate : The packet loss rate (0.0 to 1.0). 410 virtual void OnPacketLossRateUpdate(float packet_loss_rate); 411 412 // Inform the encoder when the round trip time changes. 413 // 414 // Input: - rtt_ms : The new RTT, in milliseconds. 415 virtual void OnRttUpdate(int64_t rtt_ms); 416 417 // Called when a loss notification is received. 418 virtual void OnLossNotification(const LossNotification& loss_notification); 419 420 // Returns meta-data about the encoder, such as implementation name. 421 // The output of this method may change during runtime. For instance if a 422 // hardware encoder fails, it may fall back to doing software encoding using 423 // an implementation with different characteristics. 424 virtual EncoderInfo GetEncoderInfo() const; 425 }; 426 } // namespace webrtc 427 #endif // API_VIDEO_CODECS_VIDEO_ENCODER_H_ 428