1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #ifndef AOM_AOM_AOM_ENCODER_H_ 12 #define AOM_AOM_AOM_ENCODER_H_ 13 14 /*!\defgroup encoder Encoder Algorithm Interface 15 * \ingroup codec 16 * This abstraction allows applications using this encoder to easily support 17 * multiple video formats with minimal code duplication. This section describes 18 * the interface common to all encoders. 19 * @{ 20 */ 21 22 /*!\file 23 * \brief Describes the encoder algorithm interface to applications. 24 * 25 * This file describes the interface between an application and a 26 * video encoder algorithm. 27 * 28 */ 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #include "aom/aom_codec.h" 34 35 /*!\brief Current ABI version number 36 * 37 * \internal 38 * If this file is altered in any way that changes the ABI, this value 39 * must be bumped. Examples include, but are not limited to, changing 40 * types, removing or reassigning enums, adding/removing/rearranging 41 * fields to structures 42 */ 43 #define AOM_ENCODER_ABI_VERSION \ 44 (5 + AOM_CODEC_ABI_VERSION) /**<\hideinitializer*/ 45 46 /*! \brief Encoder capabilities bitfield 47 * 48 * Each encoder advertises the capabilities it supports as part of its 49 * ::aom_codec_iface_t interface structure. Capabilities are extra 50 * interfaces or functionality, and are not required to be supported 51 * by an encoder. 52 * 53 * The available flags are specified by AOM_CODEC_CAP_* defines. 54 */ 55 #define AOM_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */ 56 57 /*! Can support input images at greater than 8 bitdepth. 58 */ 59 #define AOM_CODEC_CAP_HIGHBITDEPTH 0x40000 60 61 /*! \brief Initialization-time Feature Enabling 62 * 63 * Certain codec features must be known at initialization time, to allow 64 * for proper memory allocation. 65 * 66 * The available flags are specified by AOM_CODEC_USE_* defines. 67 */ 68 #define AOM_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */ 69 /*!\brief Make the encoder output one partition at a time. */ 70 #define AOM_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */ 71 72 /*!\brief Generic fixed size buffer structure 73 * 74 * This structure is able to hold a reference to any fixed size buffer. 75 */ 76 typedef struct aom_fixed_buf { 77 void *buf; /**< Pointer to the data */ 78 size_t sz; /**< Length of the buffer, in chars */ 79 } aom_fixed_buf_t; /**< alias for struct aom_fixed_buf */ 80 81 /*!\brief Time Stamp Type 82 * 83 * An integer, which when multiplied by the stream's time base, provides 84 * the absolute time of a sample. 85 */ 86 typedef int64_t aom_codec_pts_t; 87 88 /*!\brief Compressed Frame Flags 89 * 90 * This type represents a bitfield containing information about a compressed 91 * frame that may be useful to an application. The most significant 16 bits 92 * can be used by an algorithm to provide additional detail, for example to 93 * support frame types that are codec specific (MPEG-1 D-frames for example) 94 */ 95 typedef uint32_t aom_codec_frame_flags_t; 96 #define AOM_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */ 97 /*!\brief frame can be dropped without affecting the stream (no future frame 98 * depends on this one) */ 99 #define AOM_FRAME_IS_DROPPABLE 0x2 100 /*!\brief frame should be decoded but will not be shown */ 101 #define AOM_FRAME_IS_INVISIBLE 0x4 102 /*!\brief this is a fragment of the encoded frame */ 103 #define AOM_FRAME_IS_FRAGMENT 0x8 104 105 /*!\brief Error Resilient flags 106 * 107 * These flags define which error resilient features to enable in the 108 * encoder. The flags are specified through the 109 * aom_codec_enc_cfg::g_error_resilient variable. 110 */ 111 typedef uint32_t aom_codec_er_flags_t; 112 /*!\brief Improve resiliency against losses of whole frames */ 113 #define AOM_ERROR_RESILIENT_DEFAULT 0x1 114 115 /*!\brief Encoder output packet variants 116 * 117 * This enumeration lists the different kinds of data packets that can be 118 * returned by calls to aom_codec_get_cx_data(). Algorithms \ref MAY 119 * extend this list to provide additional functionality. 120 */ 121 enum aom_codec_cx_pkt_kind { 122 AOM_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ 123 AOM_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ 124 AOM_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ 125 AOM_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ 126 AOM_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ 127 }; 128 129 /*!\brief Encoder output packet 130 * 131 * This structure contains the different kinds of output data the encoder 132 * may produce while compressing a frame. 133 */ 134 typedef struct aom_codec_cx_pkt { 135 enum aom_codec_cx_pkt_kind kind; /**< packet variant */ 136 union { 137 struct { 138 void *buf; /**< compressed data buffer */ 139 size_t sz; /**< length of compressed data */ 140 /*!\brief time stamp to show frame (in timebase units) */ 141 aom_codec_pts_t pts; 142 /*!\brief duration to show frame (in timebase units) */ 143 unsigned long duration; 144 aom_codec_frame_flags_t flags; /**< flags for this frame */ 145 /*!\brief the partition id defines the decoding order of the partitions. 146 * Only applicable when "output partition" mode is enabled. First 147 * partition has id 0.*/ 148 int partition_id; 149 /*!\brief size of the visible frame in this packet */ 150 size_t vis_frame_size; 151 } frame; /**< data for compressed frame packet */ 152 aom_fixed_buf_t twopass_stats; /**< data for two-pass packet */ 153 aom_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ 154 struct aom_psnr_pkt { 155 unsigned int samples[4]; /**< Number of samples, total/y/u/v */ 156 uint64_t sse[4]; /**< sum squared error, total/y/u/v */ 157 double psnr[4]; /**< PSNR, total/y/u/v */ 158 } psnr; /**< data for PSNR packet */ 159 aom_fixed_buf_t raw; /**< data for arbitrary packets */ 160 161 /* This packet size is fixed to allow codecs to extend this 162 * interface without having to manage storage for raw packets, 163 * i.e., if it's smaller than 128 bytes, you can store in the 164 * packet list directly. 165 */ 166 char pad[128 - sizeof(enum aom_codec_cx_pkt_kind)]; /**< fixed sz */ 167 } data; /**< packet data */ 168 } aom_codec_cx_pkt_t; /**< alias for struct aom_codec_cx_pkt */ 169 170 /*!\brief Rational Number 171 * 172 * This structure holds a fractional value. 173 */ 174 typedef struct aom_rational { 175 int num; /**< fraction numerator */ 176 int den; /**< fraction denominator */ 177 } aom_rational_t; /**< alias for struct aom_rational */ 178 179 /*!\brief Multi-pass Encoding Pass */ 180 enum aom_enc_pass { 181 AOM_RC_ONE_PASS, /**< Single pass mode */ 182 AOM_RC_FIRST_PASS, /**< First pass of multi-pass mode */ 183 AOM_RC_LAST_PASS /**< Final pass of multi-pass mode */ 184 }; 185 186 /*!\brief Rate control mode */ 187 enum aom_rc_mode { 188 AOM_VBR, /**< Variable Bit Rate (VBR) mode */ 189 AOM_CBR, /**< Constant Bit Rate (CBR) mode */ 190 AOM_CQ, /**< Constrained Quality (CQ) mode */ 191 AOM_Q, /**< Constant Quality (Q) mode */ 192 }; 193 194 /*!\brief Keyframe placement mode. 195 * 196 * This enumeration determines whether keyframes are placed automatically by 197 * the encoder or whether this behavior is disabled. Older releases of this 198 * SDK were implemented such that AOM_KF_FIXED meant keyframes were disabled. 199 * This name is confusing for this behavior, so the new symbols to be used 200 * are AOM_KF_AUTO and AOM_KF_DISABLED. 201 */ 202 enum aom_kf_mode { 203 AOM_KF_FIXED, /**< deprecated, implies AOM_KF_DISABLED */ 204 AOM_KF_AUTO, /**< Encoder determines optimal placement automatically */ 205 AOM_KF_DISABLED = 0 /**< Encoder does not place keyframes. */ 206 }; 207 208 /*!\brief Encoded Frame Flags 209 * 210 * This type indicates a bitfield to be passed to aom_codec_encode(), defining 211 * per-frame boolean values. By convention, bits common to all codecs will be 212 * named AOM_EFLAG_*, and bits specific to an algorithm will be named 213 * /algo/_eflag_*. The lower order 16 bits are reserved for common use. 214 */ 215 typedef long aom_enc_frame_flags_t; 216 #define AOM_EFLAG_FORCE_KF (1 << 0) /**< Force this frame to be a keyframe */ 217 218 /*!\brief Encoder configuration structure 219 * 220 * This structure contains the encoder settings that have common representations 221 * across all codecs. This doesn't imply that all codecs support all features, 222 * however. 223 */ 224 typedef struct aom_codec_enc_cfg { 225 /* 226 * generic settings (g) 227 */ 228 229 /*!\brief Algorithm specific "usage" value 230 * 231 * Algorithms may define multiple values for usage, which may convey the 232 * intent of how the application intends to use the stream. If this value 233 * is non-zero, consult the documentation for the codec to determine its 234 * meaning. 235 */ 236 unsigned int g_usage; 237 238 /*!\brief Maximum number of threads to use 239 * 240 * For multi-threaded implementations, use no more than this number of 241 * threads. The codec may use fewer threads than allowed. The value 242 * 0 is equivalent to the value 1. 243 */ 244 unsigned int g_threads; 245 246 /*!\brief Bitstream profile to use 247 * 248 * Some codecs support a notion of multiple bitstream profiles. Typically 249 * this maps to a set of features that are turned on or off. Often the 250 * profile to use is determined by the features of the intended decoder. 251 * Consult the documentation for the codec to determine the valid values 252 * for this parameter, or set to zero for a sane default. 253 */ 254 unsigned int g_profile; /**< profile of bitstream to use */ 255 256 /*!\brief Width of the frame 257 * 258 * This value identifies the presentation resolution of the frame, 259 * in pixels. Note that the frames passed as input to the encoder must 260 * have this resolution. Frames will be presented by the decoder in this 261 * resolution, independent of any spatial resampling the encoder may do. 262 */ 263 unsigned int g_w; 264 265 /*!\brief Height of the frame 266 * 267 * This value identifies the presentation resolution of the frame, 268 * in pixels. Note that the frames passed as input to the encoder must 269 * have this resolution. Frames will be presented by the decoder in this 270 * resolution, independent of any spatial resampling the encoder may do. 271 */ 272 unsigned int g_h; 273 274 /*!\brief Max number of frames to encode 275 * 276 */ 277 unsigned int g_limit; 278 279 /*!\brief Forced maximum width of the frame 280 * 281 * If this value is non-zero then it is used to force the maximum frame 282 * width written in write_sequence_header(). 283 */ 284 unsigned int g_forced_max_frame_width; 285 286 /*!\brief Forced maximum height of the frame 287 * 288 * If this value is non-zero then it is used to force the maximum frame 289 * height written in write_sequence_header(). 290 */ 291 unsigned int g_forced_max_frame_height; 292 293 /*!\brief Bit-depth of the codec 294 * 295 * This value identifies the bit_depth of the codec, 296 * Only certain bit-depths are supported as identified in the 297 * aom_bit_depth_t enum. 298 */ 299 aom_bit_depth_t g_bit_depth; 300 301 /*!\brief Bit-depth of the input frames 302 * 303 * This value identifies the bit_depth of the input frames in bits. 304 * Note that the frames passed as input to the encoder must have 305 * this bit-depth. 306 */ 307 unsigned int g_input_bit_depth; 308 309 /*!\brief Stream timebase units 310 * 311 * Indicates the smallest interval of time, in seconds, used by the stream. 312 * For fixed frame rate material, or variable frame rate material where 313 * frames are timed at a multiple of a given clock (ex: video capture), 314 * the \ref RECOMMENDED method is to set the timebase to the reciprocal 315 * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the 316 * pts to correspond to the frame number, which can be handy. For 317 * re-encoding video from containers with absolute time timestamps, the 318 * \ref RECOMMENDED method is to set the timebase to that of the parent 319 * container or multimedia framework (ex: 1/1000 for ms, as in FLV). 320 */ 321 struct aom_rational g_timebase; 322 323 /*!\brief Enable error resilient modes. 324 * 325 * The error resilient bitfield indicates to the encoder which features 326 * it should enable to take measures for streaming over lossy or noisy 327 * links. 328 */ 329 aom_codec_er_flags_t g_error_resilient; 330 331 /*!\brief Multi-pass Encoding Mode 332 * 333 * This value should be set to the current phase for multi-pass encoding. 334 * For single pass, set to #AOM_RC_ONE_PASS. 335 */ 336 enum aom_enc_pass g_pass; 337 338 /*!\brief Allow lagged encoding 339 * 340 * If set, this value allows the encoder to consume a number of input 341 * frames before producing output frames. This allows the encoder to 342 * base decisions for the current frame on future frames. This does 343 * increase the latency of the encoding pipeline, so it is not appropriate 344 * in all situations (ex: realtime encoding). 345 * 346 * Note that this is a maximum value -- the encoder may produce frames 347 * sooner than the given limit. Set this value to 0 to disable this 348 * feature. 349 */ 350 unsigned int g_lag_in_frames; 351 352 /* 353 * rate control settings (rc) 354 */ 355 356 /*!\brief Temporal resampling configuration, if supported by the codec. 357 * 358 * Temporal resampling allows the codec to "drop" frames as a strategy to 359 * meet its target data rate. This can cause temporal discontinuities in 360 * the encoded video, which may appear as stuttering during playback. This 361 * trade-off is often acceptable, but for many applications is not. It can 362 * be disabled in these cases. 363 * 364 * Note that not all codecs support this feature. All aom AVx codecs do. 365 * For other codecs, consult the documentation for that algorithm. 366 * 367 * This threshold is described as a percentage of the target data buffer. 368 * When the data buffer falls below this percentage of fullness, a 369 * dropped frame is indicated. Set the threshold to zero (0) to disable 370 * this feature. 371 */ 372 unsigned int rc_dropframe_thresh; 373 374 /*!\brief Mode for spatial resampling, if supported by the codec. 375 * 376 * Spatial resampling allows the codec to compress a lower resolution 377 * version of the frame, which is then upscaled by the decoder to the 378 * correct presentation resolution. This increases visual quality at 379 * low data rates, at the expense of CPU time on the encoder/decoder. 380 */ 381 unsigned int rc_resize_mode; 382 383 /*!\brief Frame resize denominator. 384 * 385 * The denominator for resize to use, assuming 8 as the numerator. 386 * 387 * Valid denominators are 8 - 16 for now. 388 */ 389 unsigned int rc_resize_denominator; 390 391 /*!\brief Keyframe resize denominator. 392 * 393 * The denominator for resize to use, assuming 8 as the numerator. 394 * 395 * Valid denominators are 8 - 16 for now. 396 */ 397 unsigned int rc_resize_kf_denominator; 398 399 /*!\brief Frame super-resolution scaling mode. 400 * 401 * Similar to spatial resampling, frame super-resolution integrates 402 * upscaling after the encode/decode process. Taking control of upscaling and 403 * using restoration filters should allow it to outperform normal resizing. 404 * 405 * Mode 0 is SUPERRES_NONE, mode 1 is SUPERRES_FIXED, mode 2 is 406 * SUPERRES_RANDOM and mode 3 is SUPERRES_QTHRESH. 407 */ 408 unsigned int rc_superres_mode; 409 410 /*!\brief Frame super-resolution denominator. 411 * 412 * The denominator for superres to use. If fixed it will only change if the 413 * cumulative scale change over resizing and superres is greater than 1/2; 414 * this forces superres to reduce scaling. 415 * 416 * Valid denominators are 8 to 16. 417 * 418 * Used only by SUPERRES_FIXED. 419 */ 420 unsigned int rc_superres_denominator; 421 422 /*!\brief Keyframe super-resolution denominator. 423 * 424 * The denominator for superres to use. If fixed it will only change if the 425 * cumulative scale change over resizing and superres is greater than 1/2; 426 * this forces superres to reduce scaling. 427 * 428 * Valid denominators are 8 - 16 for now. 429 */ 430 unsigned int rc_superres_kf_denominator; 431 432 /*!\brief Frame super-resolution q threshold. 433 * 434 * The q level threshold after which superres is used. 435 * Valid values are 1 to 63. 436 * 437 * Used only by SUPERRES_QTHRESH 438 */ 439 unsigned int rc_superres_qthresh; 440 441 /*!\brief Keyframe super-resolution q threshold. 442 * 443 * The q level threshold after which superres is used for key frames. 444 * Valid values are 1 to 63. 445 * 446 * Used only by SUPERRES_QTHRESH 447 */ 448 unsigned int rc_superres_kf_qthresh; 449 450 /*!\brief Rate control algorithm to use. 451 * 452 * Indicates whether the end usage of this stream is to be streamed over 453 * a bandwidth constrained link, indicating that Constant Bit Rate (CBR) 454 * mode should be used, or whether it will be played back on a high 455 * bandwidth link, as from a local disk, where higher variations in 456 * bitrate are acceptable. 457 */ 458 enum aom_rc_mode rc_end_usage; 459 460 /*!\brief Two-pass stats buffer. 461 * 462 * A buffer containing all of the stats packets produced in the first 463 * pass, concatenated. 464 */ 465 aom_fixed_buf_t rc_twopass_stats_in; 466 467 /*!\brief first pass mb stats buffer. 468 * 469 * A buffer containing all of the first pass mb stats packets produced 470 * in the first pass, concatenated. 471 */ 472 aom_fixed_buf_t rc_firstpass_mb_stats_in; 473 474 /*!\brief Target data rate 475 * 476 * Target bandwidth to use for this stream, in kilobits per second. 477 */ 478 unsigned int rc_target_bitrate; 479 480 /* 481 * quantizer settings 482 */ 483 484 /*!\brief Minimum (Best Quality) Quantizer 485 * 486 * The quantizer is the most direct control over the quality of the 487 * encoded image. The range of valid values for the quantizer is codec 488 * specific. Consult the documentation for the codec to determine the 489 * values to use. To determine the range programmatically, call 490 * aom_codec_enc_config_default() with a usage value of 0. 491 */ 492 unsigned int rc_min_quantizer; 493 494 /*!\brief Maximum (Worst Quality) Quantizer 495 * 496 * The quantizer is the most direct control over the quality of the 497 * encoded image. The range of valid values for the quantizer is codec 498 * specific. Consult the documentation for the codec to determine the 499 * values to use. To determine the range programmatically, call 500 * aom_codec_enc_config_default() with a usage value of 0. 501 */ 502 unsigned int rc_max_quantizer; 503 504 /* 505 * bitrate tolerance 506 */ 507 508 /*!\brief Rate control adaptation undershoot control 509 * 510 * This value, expressed as a percentage of the target bitrate, 511 * controls the maximum allowed adaptation speed of the codec. 512 * This factor controls the maximum amount of bits that can 513 * be subtracted from the target bitrate in order to compensate 514 * for prior overshoot. 515 * 516 * Valid values in the range 0-1000. 517 */ 518 unsigned int rc_undershoot_pct; 519 520 /*!\brief Rate control adaptation overshoot control 521 * 522 * This value, expressed as a percentage of the target bitrate, 523 * controls the maximum allowed adaptation speed of the codec. 524 * This factor controls the maximum amount of bits that can 525 * be added to the target bitrate in order to compensate for 526 * prior undershoot. 527 * 528 * Valid values in the range 0-1000. 529 */ 530 unsigned int rc_overshoot_pct; 531 532 /* 533 * decoder buffer model parameters 534 */ 535 536 /*!\brief Decoder Buffer Size 537 * 538 * This value indicates the amount of data that may be buffered by the 539 * decoding application. Note that this value is expressed in units of 540 * time (milliseconds). For example, a value of 5000 indicates that the 541 * client will buffer (at least) 5000ms worth of encoded data. Use the 542 * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if 543 * necessary. 544 */ 545 unsigned int rc_buf_sz; 546 547 /*!\brief Decoder Buffer Initial Size 548 * 549 * This value indicates the amount of data that will be buffered by the 550 * decoding application prior to beginning playback. This value is 551 * expressed in units of time (milliseconds). Use the target bitrate 552 * (#rc_target_bitrate) to convert to bits/bytes, if necessary. 553 */ 554 unsigned int rc_buf_initial_sz; 555 556 /*!\brief Decoder Buffer Optimal Size 557 * 558 * This value indicates the amount of data that the encoder should try 559 * to maintain in the decoder's buffer. This value is expressed in units 560 * of time (milliseconds). Use the target bitrate (#rc_target_bitrate) 561 * to convert to bits/bytes, if necessary. 562 */ 563 unsigned int rc_buf_optimal_sz; 564 565 /* 566 * 2 pass rate control parameters 567 */ 568 569 /*!\brief Two-pass mode CBR/VBR bias 570 * 571 * Bias, expressed on a scale of 0 to 100, for determining target size 572 * for the current frame. The value 0 indicates the optimal CBR mode 573 * value should be used. The value 100 indicates the optimal VBR mode 574 * value should be used. Values in between indicate which way the 575 * encoder should "lean." 576 */ 577 unsigned int rc_2pass_vbr_bias_pct; 578 579 /*!\brief Two-pass mode per-GOP minimum bitrate 580 * 581 * This value, expressed as a percentage of the target bitrate, indicates 582 * the minimum bitrate to be used for a single GOP (aka "section") 583 */ 584 unsigned int rc_2pass_vbr_minsection_pct; 585 586 /*!\brief Two-pass mode per-GOP maximum bitrate 587 * 588 * This value, expressed as a percentage of the target bitrate, indicates 589 * the maximum bitrate to be used for a single GOP (aka "section") 590 */ 591 unsigned int rc_2pass_vbr_maxsection_pct; 592 593 /* 594 * keyframing settings (kf) 595 */ 596 597 /*!\brief Option to enable forward reference key frame 598 * 599 */ 600 int fwd_kf_enabled; 601 602 /*!\brief Keyframe placement mode 603 * 604 * This value indicates whether the encoder should place keyframes at a 605 * fixed interval, or determine the optimal placement automatically 606 * (as governed by the #kf_min_dist and #kf_max_dist parameters) 607 */ 608 enum aom_kf_mode kf_mode; 609 610 /*!\brief Keyframe minimum interval 611 * 612 * This value, expressed as a number of frames, prevents the encoder from 613 * placing a keyframe nearer than kf_min_dist to the previous keyframe. At 614 * least kf_min_dist frames non-keyframes will be coded before the next 615 * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval. 616 */ 617 unsigned int kf_min_dist; 618 619 /*!\brief Keyframe maximum interval 620 * 621 * This value, expressed as a number of frames, forces the encoder to code 622 * a keyframe if one has not been coded in the last kf_max_dist frames. 623 * A value of 0 implies all frames will be keyframes. Set kf_min_dist 624 * equal to kf_max_dist for a fixed interval. 625 */ 626 unsigned int kf_max_dist; 627 628 /*!\brief sframe interval 629 * 630 * This value, expressed as a number of frames, forces the encoder to code 631 * an S-Frame every sframe_dist frames. 632 */ 633 unsigned int sframe_dist; 634 635 /*!\brief sframe insertion mode 636 * 637 * This value must be set to 1 or 2, and tells the encoder how to insert 638 * S-Frames. It will only have an effect if sframe_dist != 0. 639 * 640 * If altref is enabled: 641 * - if sframe_mode == 1, the considered frame will be made into an 642 * S-Frame only if it is an altref frame 643 * - if sframe_mode == 2, the next altref frame will be made into an 644 * S-Frame. 645 * 646 * Otherwise: the considered frame will be made into an S-Frame. 647 */ 648 unsigned int sframe_mode; 649 650 /*!\brief Tile coding mode 651 * 652 * This value indicates the tile coding mode. 653 * A value of 0 implies a normal non-large-scale tile coding. A value of 1 654 * implies a large-scale tile coding. 655 */ 656 unsigned int large_scale_tile; 657 658 /*!\brief Monochrome mode 659 * 660 * If this is nonzero, the encoder will generate a monochrome stream 661 * with no chroma planes. 662 */ 663 unsigned int monochrome; 664 665 /*!\brief full_still_picture_hdr 666 * 667 * If this is nonzero, the encoder will generate a full header even for 668 * still picture encoding. if zero, a reduced header is used for still 669 * picture. This flag has no effect when a regular video with more than 670 * a single frame is encoded. 671 */ 672 unsigned int full_still_picture_hdr; 673 674 /*!\brief Bitstream syntax mode 675 * 676 * This value indicates the bitstream syntax mode. 677 * A value of 0 indicates bitstream is saved as Section 5 bitstream. A value 678 * of 1 indicates the bitstream is saved in Annex-B format 679 */ 680 unsigned int save_as_annexb; 681 682 /*!\brief Number of explicit tile widths specified 683 * 684 * This value indicates the number of tile widths specified 685 * A value of 0 implies no tile widths are specified. 686 * Tile widths are given in the array tile_widths[] 687 */ 688 int tile_width_count; 689 690 /*!\brief Number of explicit tile heights specified 691 * 692 * This value indicates the number of tile heights specified 693 * A value of 0 implies no tile heights are specified. 694 * Tile heights are given in the array tile_heights[] 695 */ 696 int tile_height_count; 697 698 /*!\brief Maximum number of tile widths in tile widths array 699 * 700 * This define gives the maximum number of elements in the tile_widths array. 701 */ 702 #define MAX_TILE_WIDTHS 64 // maximum tile width array length 703 704 /*!\brief Array of specified tile widths 705 * 706 * This array specifies tile widths (and may be empty) 707 * The number of widths specified is given by tile_width_count 708 */ 709 int tile_widths[MAX_TILE_WIDTHS]; 710 711 /*!\brief Maximum number of tile heights in tile heights array. 712 * 713 * This define gives the maximum number of elements in the tile_heights array. 714 */ 715 #define MAX_TILE_HEIGHTS 64 // maximum tile height array length 716 717 /*!\brief Array of specified tile heights 718 * 719 * This array specifies tile heights (and may be empty) 720 * The number of heights specified is given by tile_height_count 721 */ 722 int tile_heights[MAX_TILE_HEIGHTS]; 723 724 /*!\brief Options defined per config file 725 * 726 */ 727 cfg_options_t cfg; 728 } aom_codec_enc_cfg_t; /**< alias for struct aom_codec_enc_cfg */ 729 730 /*!\brief Initialize an encoder instance 731 * 732 * Initializes a encoder context using the given interface. Applications 733 * should call the aom_codec_enc_init convenience macro instead of this 734 * function directly, to ensure that the ABI version number parameter 735 * is properly initialized. 736 * 737 * If the library was configured with --disable-multithread, this call 738 * is not thread safe and should be guarded with a lock if being used 739 * in a multithreaded context. 740 * 741 * \param[in] ctx Pointer to this instance's context. 742 * \param[in] iface Pointer to the algorithm interface to use. 743 * \param[in] cfg Configuration to use, if known. 744 * \param[in] flags Bitfield of AOM_CODEC_USE_* flags 745 * \param[in] ver ABI version number. Must be set to 746 * AOM_ENCODER_ABI_VERSION 747 * \retval #AOM_CODEC_OK 748 * The decoder algorithm initialized. 749 * \retval #AOM_CODEC_MEM_ERROR 750 * Memory allocation failed. 751 */ 752 aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx, 753 aom_codec_iface_t *iface, 754 const aom_codec_enc_cfg_t *cfg, 755 aom_codec_flags_t flags, int ver); 756 757 /*!\brief Convenience macro for aom_codec_enc_init_ver() 758 * 759 * Ensures the ABI version parameter is properly set. 760 */ 761 #define aom_codec_enc_init(ctx, iface, cfg, flags) \ 762 aom_codec_enc_init_ver(ctx, iface, cfg, flags, AOM_ENCODER_ABI_VERSION) 763 764 /*!\brief Initialize multi-encoder instance 765 * 766 * Initializes multi-encoder context using the given interface. 767 * Applications should call the aom_codec_enc_init_multi convenience macro 768 * instead of this function directly, to ensure that the ABI version number 769 * parameter is properly initialized. 770 * 771 * \param[in] ctx Pointer to this instance's context. 772 * \param[in] iface Pointer to the algorithm interface to use. 773 * \param[in] cfg Configuration to use, if known. 774 * \param[in] num_enc Total number of encoders. 775 * \param[in] flags Bitfield of AOM_CODEC_USE_* flags 776 * \param[in] dsf Pointer to down-sampling factors. 777 * \param[in] ver ABI version number. Must be set to 778 * AOM_ENCODER_ABI_VERSION 779 * \retval #AOM_CODEC_OK 780 * The decoder algorithm initialized. 781 * \retval #AOM_CODEC_MEM_ERROR 782 * Memory allocation failed. 783 */ 784 aom_codec_err_t aom_codec_enc_init_multi_ver( 785 aom_codec_ctx_t *ctx, aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, 786 int num_enc, aom_codec_flags_t flags, aom_rational_t *dsf, int ver); 787 788 /*!\brief Convenience macro for aom_codec_enc_init_multi_ver() 789 * 790 * Ensures the ABI version parameter is properly set. 791 */ 792 #define aom_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \ 793 aom_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \ 794 AOM_ENCODER_ABI_VERSION) 795 796 /*!\brief Get a default configuration 797 * 798 * Initializes a encoder configuration structure with default values. Supports 799 * the notion of "usages" so that an algorithm may offer different default 800 * settings depending on the user's intended goal. This function \ref SHOULD 801 * be called by all applications to initialize the configuration structure 802 * before specializing the configuration with application specific values. 803 * 804 * \param[in] iface Pointer to the algorithm interface to use. 805 * \param[out] cfg Configuration buffer to populate. 806 * \param[in] reserved Must set to 0. 807 * 808 * \retval #AOM_CODEC_OK 809 * The configuration was populated. 810 * \retval #AOM_CODEC_INCAPABLE 811 * Interface is not an encoder interface. 812 * \retval #AOM_CODEC_INVALID_PARAM 813 * A parameter was NULL, or the usage value was not recognized. 814 */ 815 aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, 816 aom_codec_enc_cfg_t *cfg, 817 unsigned int reserved); 818 819 /*!\brief Set or change configuration 820 * 821 * Reconfigures an encoder instance according to the given configuration. 822 * 823 * \param[in] ctx Pointer to this instance's context 824 * \param[in] cfg Configuration buffer to use 825 * 826 * \retval #AOM_CODEC_OK 827 * The configuration was populated. 828 * \retval #AOM_CODEC_INCAPABLE 829 * Interface is not an encoder interface. 830 * \retval #AOM_CODEC_INVALID_PARAM 831 * A parameter was NULL, or the usage value was not recognized. 832 */ 833 aom_codec_err_t aom_codec_enc_config_set(aom_codec_ctx_t *ctx, 834 const aom_codec_enc_cfg_t *cfg); 835 836 /*!\brief Get global stream headers 837 * 838 * Retrieves a stream level global header packet, if supported by the codec. 839 * Calls to this function should be deferred until all configuration information 840 * has been passed to libaom. Otherwise the global header data may be 841 * invalidated by additional configuration changes. 842 * 843 * The AV1 implementation of this function returns an OBU. The OBU returned is 844 * in Low Overhead Bitstream Format. Specifically, the obu_has_size_field bit is 845 * set, and the buffer contains the obu_size field for the returned OBU. 846 * 847 * \param[in] ctx Pointer to this instance's context 848 * 849 * \retval NULL 850 * Encoder does not support global header, or an error occurred while 851 * generating the global header. 852 * 853 * \retval Non-NULL 854 * Pointer to buffer containing global header packet. The caller owns the 855 * memory associated with this buffer, and must free the 'buf' member of the 856 * aom_fixed_buf_t as well as the aom_fixed_buf_t pointer. Memory returned 857 * must be freed via call to free(). 858 */ 859 aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx); 860 861 /*!\brief Encode a frame 862 * 863 * Encodes a video frame at the given "presentation time." The presentation 864 * time stamp (PTS) \ref MUST be strictly increasing. 865 * 866 * When the last frame has been passed to the encoder, this function should 867 * continue to be called, with the img parameter set to NULL. This will 868 * signal the end-of-stream condition to the encoder and allow it to encode 869 * any held buffers. Encoding is complete when aom_codec_encode() is called 870 * and aom_codec_get_cx_data() returns no data. 871 * 872 * \param[in] ctx Pointer to this instance's context 873 * \param[in] img Image data to encode, NULL to flush. 874 * \param[in] pts Presentation time stamp, in timebase units. 875 * \param[in] duration Duration to show frame, in timebase units. 876 * \param[in] flags Flags to use for encoding this frame. 877 * 878 * \retval #AOM_CODEC_OK 879 * The configuration was populated. 880 * \retval #AOM_CODEC_INCAPABLE 881 * Interface is not an encoder interface. 882 * \retval #AOM_CODEC_INVALID_PARAM 883 * A parameter was NULL, the image format is unsupported, etc. 884 */ 885 aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, 886 aom_codec_pts_t pts, unsigned long duration, 887 aom_enc_frame_flags_t flags); 888 889 /*!\brief Set compressed data output buffer 890 * 891 * Sets the buffer that the codec should output the compressed data 892 * into. This call effectively sets the buffer pointer returned in the 893 * next AOM_CODEC_CX_FRAME_PKT packet. Subsequent packets will be 894 * appended into this buffer. The buffer is preserved across frames, 895 * so applications must periodically call this function after flushing 896 * the accumulated compressed data to disk or to the network to reset 897 * the pointer to the buffer's head. 898 * 899 * `pad_before` bytes will be skipped before writing the compressed 900 * data, and `pad_after` bytes will be appended to the packet. The size 901 * of the packet will be the sum of the size of the actual compressed 902 * data, pad_before, and pad_after. The padding bytes will be preserved 903 * (not overwritten). 904 * 905 * Note that calling this function does not guarantee that the returned 906 * compressed data will be placed into the specified buffer. In the 907 * event that the encoded data will not fit into the buffer provided, 908 * the returned packet \ref MAY point to an internal buffer, as it would 909 * if this call were never used. In this event, the output packet will 910 * NOT have any padding, and the application must free space and copy it 911 * to the proper place. This is of particular note in configurations 912 * that may output multiple packets for a single encoded frame (e.g., lagged 913 * encoding) or if the application does not reset the buffer periodically. 914 * 915 * Applications may restore the default behavior of the codec providing 916 * the compressed data buffer by calling this function with a NULL 917 * buffer. 918 * 919 * Applications \ref MUSTNOT call this function during iteration of 920 * aom_codec_get_cx_data(). 921 * 922 * \param[in] ctx Pointer to this instance's context 923 * \param[in] buf Buffer to store compressed data into 924 * \param[in] pad_before Bytes to skip before writing compressed data 925 * \param[in] pad_after Bytes to skip after writing compressed data 926 * 927 * \retval #AOM_CODEC_OK 928 * The buffer was set successfully. 929 * \retval #AOM_CODEC_INVALID_PARAM 930 * A parameter was NULL, the image format is unsupported, etc. 931 */ 932 aom_codec_err_t aom_codec_set_cx_data_buf(aom_codec_ctx_t *ctx, 933 const aom_fixed_buf_t *buf, 934 unsigned int pad_before, 935 unsigned int pad_after); 936 937 /*!\brief Encoded data iterator 938 * 939 * Iterates over a list of data packets to be passed from the encoder to the 940 * application. The different kinds of packets available are enumerated in 941 * #aom_codec_cx_pkt_kind. 942 * 943 * #AOM_CODEC_CX_FRAME_PKT packets should be passed to the application's 944 * muxer. Multiple compressed frames may be in the list. 945 * #AOM_CODEC_STATS_PKT packets should be appended to a global buffer. 946 * 947 * The application \ref MUST silently ignore any packet kinds that it does 948 * not recognize or support. 949 * 950 * The data buffers returned from this function are only guaranteed to be 951 * valid until the application makes another call to any aom_codec_* function. 952 * 953 * \param[in] ctx Pointer to this instance's context 954 * \param[in,out] iter Iterator storage, initialized to NULL 955 * 956 * \return Returns a pointer to an output data packet (compressed frame data, 957 * two-pass statistics, etc.) or NULL to signal end-of-list. 958 * 959 */ 960 const aom_codec_cx_pkt_t *aom_codec_get_cx_data(aom_codec_ctx_t *ctx, 961 aom_codec_iter_t *iter); 962 963 /*!\brief Get Preview Frame 964 * 965 * Returns an image that can be used as a preview. Shows the image as it would 966 * exist at the decompressor. The application \ref MUST NOT write into this 967 * image buffer. 968 * 969 * \param[in] ctx Pointer to this instance's context 970 * 971 * \return Returns a pointer to a preview image, or NULL if no image is 972 * available. 973 * 974 */ 975 const aom_image_t *aom_codec_get_preview_frame(aom_codec_ctx_t *ctx); 976 977 /*!@} - end defgroup encoder*/ 978 #ifdef __cplusplus 979 } 980 #endif 981 #endif // AOM_AOM_AOM_ENCODER_H_ 982