1 /*
2  *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <memory>
12 #include <vector>
13 #include "./ivfenc.h"
14 #include "vp9/common/vp9_entropymode.h"
15 #include "vp9/common/vp9_enums.h"
16 #include "vp9/common/vp9_onyxc_int.h"
17 #include "vp9/vp9_iface_common.h"
18 #include "vp9/encoder/vp9_encoder.h"
19 #include "vp9/encoder/vp9_firstpass.h"
20 #include "vp9/simple_encode.h"
21 #include "vp9/vp9_cx_iface.h"
22 
23 namespace vp9 {
24 
get_plane_height(vpx_img_fmt_t img_fmt,int frame_height,int plane)25 static int get_plane_height(vpx_img_fmt_t img_fmt, int frame_height,
26                             int plane) {
27   assert(plane < 3);
28   if (plane == 0) {
29     return frame_height;
30   }
31   switch (img_fmt) {
32     case VPX_IMG_FMT_I420:
33     case VPX_IMG_FMT_I440:
34     case VPX_IMG_FMT_YV12:
35     case VPX_IMG_FMT_I42016:
36     case VPX_IMG_FMT_I44016: return (frame_height + 1) >> 1;
37     default: return frame_height;
38   }
39 }
40 
get_plane_width(vpx_img_fmt_t img_fmt,int frame_width,int plane)41 static int get_plane_width(vpx_img_fmt_t img_fmt, int frame_width, int plane) {
42   assert(plane < 3);
43   if (plane == 0) {
44     return frame_width;
45   }
46   switch (img_fmt) {
47     case VPX_IMG_FMT_I420:
48     case VPX_IMG_FMT_YV12:
49     case VPX_IMG_FMT_I422:
50     case VPX_IMG_FMT_I42016:
51     case VPX_IMG_FMT_I42216: return (frame_width + 1) >> 1;
52     default: return frame_width;
53   }
54 }
55 
56 // TODO(angiebird): Merge this function with vpx_img_plane_width()
img_plane_width(const vpx_image_t * img,int plane)57 static int img_plane_width(const vpx_image_t *img, int plane) {
58   if (plane > 0 && img->x_chroma_shift > 0)
59     return (img->d_w + 1) >> img->x_chroma_shift;
60   else
61     return img->d_w;
62 }
63 
64 // TODO(angiebird): Merge this function with vpx_img_plane_height()
img_plane_height(const vpx_image_t * img,int plane)65 static int img_plane_height(const vpx_image_t *img, int plane) {
66   if (plane > 0 && img->y_chroma_shift > 0)
67     return (img->d_h + 1) >> img->y_chroma_shift;
68   else
69     return img->d_h;
70 }
71 
72 // TODO(angiebird): Merge this function with vpx_img_read()
img_read(vpx_image_t * img,FILE * file)73 static int img_read(vpx_image_t *img, FILE *file) {
74   int plane;
75 
76   for (plane = 0; plane < 3; ++plane) {
77     unsigned char *buf = img->planes[plane];
78     const int stride = img->stride[plane];
79     const int w = img_plane_width(img, plane) *
80                   ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
81     const int h = img_plane_height(img, plane);
82     int y;
83 
84     for (y = 0; y < h; ++y) {
85       if (fread(buf, 1, w, file) != (size_t)w) return 0;
86       buf += stride;
87     }
88   }
89 
90   return 1;
91 }
92 
93 // Assume every config in VP9EncoderConfig is less than 100 characters.
94 #define ENCODE_CONFIG_BUF_SIZE 100
95 struct EncodeConfig {
96   char name[ENCODE_CONFIG_BUF_SIZE];
97   char value[ENCODE_CONFIG_BUF_SIZE];
98 };
99 
100 class SimpleEncode::EncodeImpl {
101  public:
102   VP9_COMP *cpi;
103   vpx_img_fmt_t img_fmt;
104   vpx_image_t tmp_img;
105   std::vector<FIRSTPASS_STATS> first_pass_stats;
106   std::vector<EncodeConfig> encode_config_list;
107 };
108 
init_encoder(const VP9EncoderConfig * oxcf,vpx_img_fmt_t img_fmt)109 static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
110                               vpx_img_fmt_t img_fmt) {
111   VP9_COMP *cpi;
112   BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
113   vp9_initialize_enc();
114   cpi = vp9_create_compressor(oxcf, buffer_pool);
115   vp9_update_compressor_with_img_fmt(cpi, img_fmt);
116   return cpi;
117 }
118 
free_encoder(VP9_COMP * cpi)119 static void free_encoder(VP9_COMP *cpi) {
120   BufferPool *buffer_pool = cpi->common.buffer_pool;
121   vp9_remove_compressor(cpi);
122   // buffer_pool needs to be free after cpi because buffer_pool contains
123   // allocated buffers that will be free in vp9_remove_compressor()
124   vpx_free(buffer_pool);
125 }
126 
make_vpx_rational(int num,int den)127 static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
128   vpx_rational_t v;
129   v.num = num;
130   v.den = den;
131   return v;
132 }
133 
134 static INLINE FrameType
get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type)135 get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
136   switch (update_type) {
137     case KF_UPDATE: return kFrameTypeKey;
138     case ARF_UPDATE: return kFrameTypeAltRef;
139     case GF_UPDATE: return kFrameTypeGolden;
140     case OVERLAY_UPDATE: return kFrameTypeOverlay;
141     case LF_UPDATE: return kFrameTypeInter;
142     default:
143       fprintf(stderr, "Unsupported update_type %d\n", update_type);
144       abort();
145       return kFrameTypeInter;
146   }
147 }
148 
update_partition_info(const PARTITION_INFO * input_partition_info,const int num_rows_4x4,const int num_cols_4x4,PartitionInfo * output_partition_info)149 static void update_partition_info(const PARTITION_INFO *input_partition_info,
150                                   const int num_rows_4x4,
151                                   const int num_cols_4x4,
152                                   PartitionInfo *output_partition_info) {
153   const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
154   for (int i = 0; i < num_units_4x4; ++i) {
155     output_partition_info[i].row = input_partition_info[i].row;
156     output_partition_info[i].column = input_partition_info[i].column;
157     output_partition_info[i].row_start = input_partition_info[i].row_start;
158     output_partition_info[i].column_start =
159         input_partition_info[i].column_start;
160     output_partition_info[i].width = input_partition_info[i].width;
161     output_partition_info[i].height = input_partition_info[i].height;
162   }
163 }
164 
165 // translate MV_REFERENCE_FRAME to RefFrameType
mv_ref_frame_to_ref_frame_type(MV_REFERENCE_FRAME mv_ref_frame)166 static RefFrameType mv_ref_frame_to_ref_frame_type(
167     MV_REFERENCE_FRAME mv_ref_frame) {
168   switch (mv_ref_frame) {
169     case LAST_FRAME: return kRefFrameTypeLast;
170     case GOLDEN_FRAME: return kRefFrameTypePast;
171     case ALTREF_FRAME: return kRefFrameTypeFuture;
172     default: return kRefFrameTypeNone;
173   }
174 }
175 
update_motion_vector_info(const MOTION_VECTOR_INFO * input_motion_vector_info,const int num_rows_4x4,const int num_cols_4x4,MotionVectorInfo * output_motion_vector_info,int motion_vector_scale)176 static void update_motion_vector_info(
177     const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4,
178     const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info,
179     int motion_vector_scale) {
180   const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
181   for (int i = 0; i < num_units_4x4; ++i) {
182     const MV_REFERENCE_FRAME *in_ref_frame =
183         input_motion_vector_info[i].ref_frame;
184     output_motion_vector_info[i].mv_count =
185         (in_ref_frame[0] == INTRA_FRAME) ? 0
186                                          : ((in_ref_frame[1] == NONE) ? 1 : 2);
187     if (in_ref_frame[0] == NONE) {
188       fprintf(stderr, "in_ref_frame[0] shouldn't be NONE\n");
189       abort();
190     }
191     output_motion_vector_info[i].ref_frame[0] =
192         mv_ref_frame_to_ref_frame_type(in_ref_frame[0]);
193     output_motion_vector_info[i].ref_frame[1] =
194         mv_ref_frame_to_ref_frame_type(in_ref_frame[1]);
195     output_motion_vector_info[i].mv_row[0] =
196         (double)input_motion_vector_info[i].mv[0].as_mv.row /
197         motion_vector_scale;
198     output_motion_vector_info[i].mv_column[0] =
199         (double)input_motion_vector_info[i].mv[0].as_mv.col /
200         motion_vector_scale;
201     output_motion_vector_info[i].mv_row[1] =
202         (double)input_motion_vector_info[i].mv[1].as_mv.row /
203         motion_vector_scale;
204     output_motion_vector_info[i].mv_column[1] =
205         (double)input_motion_vector_info[i].mv[1].as_mv.col /
206         motion_vector_scale;
207   }
208 }
209 
update_frame_counts(const FRAME_COUNTS * input_counts,FrameCounts * output_counts)210 static void update_frame_counts(const FRAME_COUNTS *input_counts,
211                                 FrameCounts *output_counts) {
212   // Init array sizes.
213   output_counts->y_mode.resize(BLOCK_SIZE_GROUPS);
214   for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
215     output_counts->y_mode[i].resize(INTRA_MODES);
216   }
217 
218   output_counts->uv_mode.resize(INTRA_MODES);
219   for (int i = 0; i < INTRA_MODES; ++i) {
220     output_counts->uv_mode[i].resize(INTRA_MODES);
221   }
222 
223   output_counts->partition.resize(PARTITION_CONTEXTS);
224   for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
225     output_counts->partition[i].resize(PARTITION_TYPES);
226   }
227 
228   output_counts->coef.resize(TX_SIZES);
229   output_counts->eob_branch.resize(TX_SIZES);
230   for (int i = 0; i < TX_SIZES; ++i) {
231     output_counts->coef[i].resize(PLANE_TYPES);
232     output_counts->eob_branch[i].resize(PLANE_TYPES);
233     for (int j = 0; j < PLANE_TYPES; ++j) {
234       output_counts->coef[i][j].resize(REF_TYPES);
235       output_counts->eob_branch[i][j].resize(REF_TYPES);
236       for (int k = 0; k < REF_TYPES; ++k) {
237         output_counts->coef[i][j][k].resize(COEF_BANDS);
238         output_counts->eob_branch[i][j][k].resize(COEF_BANDS);
239         for (int l = 0; l < COEF_BANDS; ++l) {
240           output_counts->coef[i][j][k][l].resize(COEFF_CONTEXTS);
241           output_counts->eob_branch[i][j][k][l].resize(COEFF_CONTEXTS);
242           for (int m = 0; m < COEFF_CONTEXTS; ++m) {
243             output_counts->coef[i][j][k][l][m].resize(UNCONSTRAINED_NODES + 1);
244           }
245         }
246       }
247     }
248   }
249 
250   output_counts->switchable_interp.resize(SWITCHABLE_FILTER_CONTEXTS);
251   for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
252     output_counts->switchable_interp[i].resize(SWITCHABLE_FILTERS);
253   }
254 
255   output_counts->inter_mode.resize(INTER_MODE_CONTEXTS);
256   for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
257     output_counts->inter_mode[i].resize(INTER_MODES);
258   }
259 
260   output_counts->intra_inter.resize(INTRA_INTER_CONTEXTS);
261   for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
262     output_counts->intra_inter[i].resize(2);
263   }
264 
265   output_counts->comp_inter.resize(COMP_INTER_CONTEXTS);
266   for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
267     output_counts->comp_inter[i].resize(2);
268   }
269 
270   output_counts->single_ref.resize(REF_CONTEXTS);
271   for (int i = 0; i < REF_CONTEXTS; ++i) {
272     output_counts->single_ref[i].resize(2);
273     for (int j = 0; j < 2; ++j) {
274       output_counts->single_ref[i][j].resize(2);
275     }
276   }
277 
278   output_counts->comp_ref.resize(REF_CONTEXTS);
279   for (int i = 0; i < REF_CONTEXTS; ++i) {
280     output_counts->comp_ref[i].resize(2);
281   }
282 
283   output_counts->skip.resize(SKIP_CONTEXTS);
284   for (int i = 0; i < SKIP_CONTEXTS; ++i) {
285     output_counts->skip[i].resize(2);
286   }
287 
288   output_counts->tx.p32x32.resize(TX_SIZE_CONTEXTS);
289   output_counts->tx.p16x16.resize(TX_SIZE_CONTEXTS);
290   output_counts->tx.p8x8.resize(TX_SIZE_CONTEXTS);
291   for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
292     output_counts->tx.p32x32[i].resize(TX_SIZES);
293     output_counts->tx.p16x16[i].resize(TX_SIZES - 1);
294     output_counts->tx.p8x8[i].resize(TX_SIZES - 2);
295   }
296   output_counts->tx.tx_totals.resize(TX_SIZES);
297 
298   output_counts->mv.joints.resize(MV_JOINTS);
299   output_counts->mv.comps.resize(2);
300   for (int i = 0; i < 2; ++i) {
301     output_counts->mv.comps[i].sign.resize(2);
302     output_counts->mv.comps[i].classes.resize(MV_CLASSES);
303     output_counts->mv.comps[i].class0.resize(CLASS0_SIZE);
304     output_counts->mv.comps[i].bits.resize(MV_OFFSET_BITS);
305     for (int j = 0; j < MV_OFFSET_BITS; ++j) {
306       output_counts->mv.comps[i].bits[j].resize(2);
307     }
308     output_counts->mv.comps[i].class0_fp.resize(CLASS0_SIZE);
309     for (int j = 0; j < CLASS0_SIZE; ++j) {
310       output_counts->mv.comps[i].class0_fp[j].resize(MV_FP_SIZE);
311     }
312     output_counts->mv.comps[i].fp.resize(MV_FP_SIZE);
313     output_counts->mv.comps[i].class0_hp.resize(2);
314     output_counts->mv.comps[i].hp.resize(2);
315   }
316 
317   // Populate counts.
318   for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
319     for (int j = 0; j < INTRA_MODES; ++j) {
320       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
321     }
322   }
323   for (int i = 0; i < INTRA_MODES; ++i) {
324     for (int j = 0; j < INTRA_MODES; ++j) {
325       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
326     }
327   }
328   for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
329     for (int j = 0; j < PARTITION_TYPES; ++j) {
330       output_counts->partition[i][j] = input_counts->partition[i][j];
331     }
332   }
333   for (int i = 0; i < TX_SIZES; ++i) {
334     for (int j = 0; j < PLANE_TYPES; ++j) {
335       for (int k = 0; k < REF_TYPES; ++k) {
336         for (int l = 0; l < COEF_BANDS; ++l) {
337           for (int m = 0; m < COEFF_CONTEXTS; ++m) {
338             output_counts->eob_branch[i][j][k][l][m] =
339                 input_counts->eob_branch[i][j][k][l][m];
340             for (int n = 0; n < UNCONSTRAINED_NODES + 1; n++) {
341               output_counts->coef[i][j][k][l][m][n] =
342                   input_counts->coef[i][j][k][l][m][n];
343             }
344           }
345         }
346       }
347     }
348   }
349   for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
350     for (int j = 0; j < SWITCHABLE_FILTERS; ++j) {
351       output_counts->switchable_interp[i][j] =
352           input_counts->switchable_interp[i][j];
353     }
354   }
355   for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
356     for (int j = 0; j < INTER_MODES; ++j) {
357       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
358     }
359   }
360   for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
361     for (int j = 0; j < 2; ++j) {
362       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
363     }
364   }
365   for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
366     for (int j = 0; j < 2; ++j) {
367       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
368     }
369   }
370   for (int i = 0; i < REF_CONTEXTS; ++i) {
371     for (int j = 0; j < 2; ++j) {
372       for (int k = 0; k < 2; ++k) {
373         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
374       }
375     }
376   }
377   for (int i = 0; i < REF_CONTEXTS; ++i) {
378     for (int j = 0; j < 2; ++j) {
379       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
380     }
381   }
382   for (int i = 0; i < SKIP_CONTEXTS; ++i) {
383     for (int j = 0; j < 2; ++j) {
384       output_counts->skip[i][j] = input_counts->skip[i][j];
385     }
386   }
387   for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
388     for (int j = 0; j < TX_SIZES; j++) {
389       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
390     }
391     for (int j = 0; j < TX_SIZES - 1; j++) {
392       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
393     }
394     for (int j = 0; j < TX_SIZES - 2; j++) {
395       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
396     }
397   }
398   for (int i = 0; i < TX_SIZES; i++) {
399     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
400   }
401   for (int i = 0; i < MV_JOINTS; i++) {
402     output_counts->mv.joints[i] = input_counts->mv.joints[i];
403   }
404   for (int k = 0; k < 2; k++) {
405     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
406     for (int i = 0; i < 2; i++) {
407       output_counts->mv.comps[k].sign[i] = comps_t->sign[i];
408       output_counts->mv.comps[k].class0_hp[i] = comps_t->class0_hp[i];
409       output_counts->mv.comps[k].hp[i] = comps_t->hp[i];
410     }
411     for (int i = 0; i < MV_CLASSES; i++) {
412       output_counts->mv.comps[k].classes[i] = comps_t->classes[i];
413     }
414     for (int i = 0; i < CLASS0_SIZE; i++) {
415       output_counts->mv.comps[k].class0[i] = comps_t->class0[i];
416       for (int j = 0; j < MV_FP_SIZE; j++) {
417         output_counts->mv.comps[k].class0_fp[i][j] = comps_t->class0_fp[i][j];
418       }
419     }
420     for (int i = 0; i < MV_OFFSET_BITS; i++) {
421       for (int j = 0; j < 2; j++) {
422         output_counts->mv.comps[k].bits[i][j] = comps_t->bits[i][j];
423       }
424     }
425     for (int i = 0; i < MV_FP_SIZE; i++) {
426       output_counts->mv.comps[k].fp[i] = comps_t->fp[i];
427     }
428   }
429 }
430 
output_image_buffer(const ImageBuffer & image_buffer,std::FILE * out_file)431 void output_image_buffer(const ImageBuffer &image_buffer, std::FILE *out_file) {
432   for (int plane = 0; plane < 3; ++plane) {
433     const int w = image_buffer.plane_width[plane];
434     const int h = image_buffer.plane_height[plane];
435     const uint8_t *buf = image_buffer.plane_buffer[plane].get();
436     fprintf(out_file, "%d %d\n", h, w);
437     for (int i = 0; i < w * h; ++i) {
438       fprintf(out_file, "%d ", (int)buf[i]);
439     }
440     fprintf(out_file, "\n");
441   }
442 }
443 
init_image_buffer(ImageBuffer * image_buffer,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)444 static bool init_image_buffer(ImageBuffer *image_buffer, int frame_width,
445                               int frame_height, vpx_img_fmt_t img_fmt) {
446   for (int plane = 0; plane < 3; ++plane) {
447     const int w = get_plane_width(img_fmt, frame_width, plane);
448     const int h = get_plane_height(img_fmt, frame_height, plane);
449     image_buffer->plane_width[plane] = w;
450     image_buffer->plane_height[plane] = h;
451     image_buffer->plane_buffer[plane].reset(new (std::nothrow) uint8_t[w * h]);
452     if (image_buffer->plane_buffer[plane].get() == nullptr) {
453       return false;
454     }
455   }
456   return true;
457 }
458 
ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer & image_buffer,IMAGE_BUFFER * image_buffer_c)459 static void ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer &image_buffer,
460                                         IMAGE_BUFFER *image_buffer_c) {
461   image_buffer_c->allocated = 1;
462   for (int plane = 0; plane < 3; ++plane) {
463     image_buffer_c->plane_width[plane] = image_buffer.plane_width[plane];
464     image_buffer_c->plane_height[plane] = image_buffer.plane_height[plane];
465     image_buffer_c->plane_buffer[plane] =
466         image_buffer.plane_buffer[plane].get();
467   }
468 }
469 
get_max_coding_data_byte_size(int frame_width,int frame_height)470 static size_t get_max_coding_data_byte_size(int frame_width, int frame_height) {
471   return frame_width * frame_height * 3;
472 }
473 
init_encode_frame_result(EncodeFrameResult * encode_frame_result,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)474 static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
475                                      int frame_width, int frame_height,
476                                      vpx_img_fmt_t img_fmt) {
477   const size_t max_coding_data_byte_size =
478       get_max_coding_data_byte_size(frame_width, frame_height);
479 
480   encode_frame_result->coding_data.reset(
481       new (std::nothrow) uint8_t[max_coding_data_byte_size]);
482 
483   encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
484   encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
485   encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
486                                              encode_frame_result->num_cols_4x4);
487   encode_frame_result->motion_vector_info.resize(
488       encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4);
489 
490   if (encode_frame_result->coding_data.get() == nullptr) {
491     return false;
492   }
493   return init_image_buffer(&encode_frame_result->coded_frame, frame_width,
494                            frame_height, img_fmt);
495 }
496 
encode_frame_result_update_rq_history(const RATE_QINDEX_HISTORY * rq_history,EncodeFrameResult * encode_frame_result)497 static void encode_frame_result_update_rq_history(
498     const RATE_QINDEX_HISTORY *rq_history,
499     EncodeFrameResult *encode_frame_result) {
500   encode_frame_result->recode_count = rq_history->recode_count;
501   for (int i = 0; i < encode_frame_result->recode_count; ++i) {
502     const int q_index = rq_history->q_index_history[i];
503     const int rate = rq_history->rate_history[i];
504     encode_frame_result->q_index_history.push_back(q_index);
505     encode_frame_result->rate_history.push_back(rate);
506   }
507 }
508 
update_encode_frame_result(EncodeFrameResult * encode_frame_result,const ENCODE_FRAME_RESULT * encode_frame_info)509 static void update_encode_frame_result(
510     EncodeFrameResult *encode_frame_result,
511     const ENCODE_FRAME_RESULT *encode_frame_info) {
512   encode_frame_result->coding_data_bit_size =
513       encode_frame_result->coding_data_byte_size * 8;
514   encode_frame_result->show_idx = encode_frame_info->show_idx;
515   encode_frame_result->coding_idx = encode_frame_info->frame_coding_index;
516   assert(kRefFrameTypeMax == MAX_INTER_REF_FRAMES);
517   for (int i = 0; i < kRefFrameTypeMax; ++i) {
518     encode_frame_result->ref_frame_info.coding_indexes[i] =
519         encode_frame_info->ref_frame_coding_indexes[i];
520     encode_frame_result->ref_frame_info.valid_list[i] =
521         encode_frame_info->ref_frame_valid_list[i];
522   }
523   encode_frame_result->frame_type =
524       get_frame_type_from_update_type(encode_frame_info->update_type);
525   encode_frame_result->psnr = encode_frame_info->psnr;
526   encode_frame_result->sse = encode_frame_info->sse;
527   encode_frame_result->quantize_index = encode_frame_info->quantize_index;
528   update_partition_info(encode_frame_info->partition_info,
529                         encode_frame_result->num_rows_4x4,
530                         encode_frame_result->num_cols_4x4,
531                         &encode_frame_result->partition_info[0]);
532   update_motion_vector_info(encode_frame_info->motion_vector_info,
533                             encode_frame_result->num_rows_4x4,
534                             encode_frame_result->num_cols_4x4,
535                             &encode_frame_result->motion_vector_info[0],
536                             kMotionVectorSubPixelPrecision);
537   update_frame_counts(&encode_frame_info->frame_counts,
538                       &encode_frame_result->frame_counts);
539   encode_frame_result_update_rq_history(&encode_frame_info->rq_history,
540                                         encode_frame_result);
541 }
542 
IncreaseGroupOfPictureIndex(GroupOfPicture * group_of_picture)543 static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) {
544   ++group_of_picture->next_encode_frame_index;
545 }
546 
IsGroupOfPictureFinished(const GroupOfPicture & group_of_picture)547 static int IsGroupOfPictureFinished(const GroupOfPicture &group_of_picture) {
548   return static_cast<size_t>(group_of_picture.next_encode_frame_index) ==
549          group_of_picture.encode_frame_list.size();
550 }
551 
operator ==(const RefFrameInfo & a,const RefFrameInfo & b)552 bool operator==(const RefFrameInfo &a, const RefFrameInfo &b) {
553   bool match = true;
554   for (int i = 0; i < kRefFrameTypeMax; ++i) {
555     match &= a.coding_indexes[i] == b.coding_indexes[i];
556     match &= a.valid_list[i] == b.valid_list[i];
557   }
558   return match;
559 }
560 
InitRefFrameInfo(RefFrameInfo * ref_frame_info)561 static void InitRefFrameInfo(RefFrameInfo *ref_frame_info) {
562   for (int i = 0; i < kRefFrameTypeMax; ++i) {
563     ref_frame_info->coding_indexes[i] = -1;
564     ref_frame_info->valid_list[i] = 0;
565   }
566 }
567 
568 // After finishing coding a frame, this function will update the coded frame
569 // into the ref_frame_info based on the frame_type and the coding_index.
PostUpdateRefFrameInfo(FrameType frame_type,int frame_coding_index,RefFrameInfo * ref_frame_info)570 static void PostUpdateRefFrameInfo(FrameType frame_type, int frame_coding_index,
571                                    RefFrameInfo *ref_frame_info) {
572   // This part is written based on the logics in vp9_configure_buffer_updates()
573   // and update_ref_frames()
574   int *ref_frame_coding_indexes = ref_frame_info->coding_indexes;
575   switch (frame_type) {
576     case kFrameTypeKey:
577       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
578       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
579       ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
580       break;
581     case kFrameTypeInter:
582       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
583       break;
584     case kFrameTypeAltRef:
585       ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
586       break;
587     case kFrameTypeOverlay:
588       // Reserve the past coding_index in the future slot. This logic is from
589       // update_ref_frames() with condition vp9_preserve_existing_gf() == 1
590       // TODO(angiebird): Invetegate why we need this.
591       ref_frame_coding_indexes[kRefFrameTypeFuture] =
592           ref_frame_coding_indexes[kRefFrameTypePast];
593       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
594       break;
595     case kFrameTypeGolden:
596       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
597       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
598       break;
599   }
600 
601   //  This part is written based on the logics in get_ref_frame_flags() but we
602   //  rename the flags alt, golden to future, past respectively. Mark
603   //  non-duplicated reference frames as valid. The priorities are
604   //  kRefFrameTypeLast > kRefFrameTypePast > kRefFrameTypeFuture.
605   const int last_index = ref_frame_coding_indexes[kRefFrameTypeLast];
606   const int past_index = ref_frame_coding_indexes[kRefFrameTypePast];
607   const int future_index = ref_frame_coding_indexes[kRefFrameTypeFuture];
608 
609   int *ref_frame_valid_list = ref_frame_info->valid_list;
610   for (int ref_frame_idx = 0; ref_frame_idx < kRefFrameTypeMax;
611        ++ref_frame_idx) {
612     ref_frame_valid_list[ref_frame_idx] = 1;
613   }
614 
615   if (past_index == last_index) {
616     ref_frame_valid_list[kRefFrameTypePast] = 0;
617   }
618 
619   if (future_index == last_index) {
620     ref_frame_valid_list[kRefFrameTypeFuture] = 0;
621   }
622 
623   if (future_index == past_index) {
624     ref_frame_valid_list[kRefFrameTypeFuture] = 0;
625   }
626 }
627 
SetGroupOfPicture(int first_is_key_frame,int use_alt_ref,int coding_frame_count,int first_show_idx,int last_gop_use_alt_ref,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)628 static void SetGroupOfPicture(int first_is_key_frame, int use_alt_ref,
629                               int coding_frame_count, int first_show_idx,
630                               int last_gop_use_alt_ref, int start_coding_index,
631                               const RefFrameInfo &start_ref_frame_info,
632                               GroupOfPicture *group_of_picture) {
633   // Clean up the state of previous group of picture.
634   group_of_picture->encode_frame_list.clear();
635   group_of_picture->next_encode_frame_index = 0;
636   group_of_picture->show_frame_count = coding_frame_count - use_alt_ref;
637   group_of_picture->start_show_index = first_show_idx;
638   group_of_picture->start_coding_index = start_coding_index;
639   group_of_picture->first_is_key_frame = first_is_key_frame;
640   group_of_picture->use_alt_ref = use_alt_ref;
641   group_of_picture->last_gop_use_alt_ref = last_gop_use_alt_ref;
642 
643   // We need to make a copy of start reference frame info because we
644   // use it to simulate the ref frame update.
645   RefFrameInfo ref_frame_info = start_ref_frame_info;
646 
647   {
648     // First frame in the group of pictures. It's either key frame or show inter
649     // frame.
650     EncodeFrameInfo encode_frame_info;
651     // Set frame_type
652     if (first_is_key_frame) {
653       encode_frame_info.frame_type = kFrameTypeKey;
654     } else {
655       if (last_gop_use_alt_ref) {
656         encode_frame_info.frame_type = kFrameTypeOverlay;
657       } else {
658         encode_frame_info.frame_type = kFrameTypeGolden;
659       }
660     }
661 
662     encode_frame_info.show_idx = first_show_idx;
663     encode_frame_info.coding_index = start_coding_index;
664 
665     encode_frame_info.ref_frame_info = ref_frame_info;
666     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
667                            encode_frame_info.coding_index, &ref_frame_info);
668 
669     group_of_picture->encode_frame_list.push_back(encode_frame_info);
670   }
671 
672   const int show_frame_count = coding_frame_count - use_alt_ref;
673   if (use_alt_ref) {
674     // If there is alternate reference, it is always coded at the second place.
675     // Its show index (or timestamp) is at the last of this group
676     EncodeFrameInfo encode_frame_info;
677     encode_frame_info.frame_type = kFrameTypeAltRef;
678     encode_frame_info.show_idx = first_show_idx + show_frame_count;
679     encode_frame_info.coding_index = start_coding_index + 1;
680 
681     encode_frame_info.ref_frame_info = ref_frame_info;
682     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
683                            encode_frame_info.coding_index, &ref_frame_info);
684 
685     group_of_picture->encode_frame_list.push_back(encode_frame_info);
686   }
687 
688   // Encode the rest show inter frames.
689   for (int i = 1; i < show_frame_count; ++i) {
690     EncodeFrameInfo encode_frame_info;
691     encode_frame_info.frame_type = kFrameTypeInter;
692     encode_frame_info.show_idx = first_show_idx + i;
693     encode_frame_info.coding_index = start_coding_index + use_alt_ref + i;
694 
695     encode_frame_info.ref_frame_info = ref_frame_info;
696     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
697                            encode_frame_info.coding_index, &ref_frame_info);
698 
699     group_of_picture->encode_frame_list.push_back(encode_frame_info);
700   }
701 }
702 
703 // Gets group of picture information from VP9's decision, and update
704 // |group_of_picture| accordingly.
705 // This is called at the starting of encoding of each group of picture.
UpdateGroupOfPicture(const VP9_COMP * cpi,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)706 static void UpdateGroupOfPicture(const VP9_COMP *cpi, int start_coding_index,
707                                  const RefFrameInfo &start_ref_frame_info,
708                                  GroupOfPicture *group_of_picture) {
709   int first_is_key_frame;
710   int use_alt_ref;
711   int coding_frame_count;
712   int first_show_idx;
713   int last_gop_use_alt_ref;
714   vp9_get_next_group_of_picture(cpi, &first_is_key_frame, &use_alt_ref,
715                                 &coding_frame_count, &first_show_idx,
716                                 &last_gop_use_alt_ref);
717   SetGroupOfPicture(first_is_key_frame, use_alt_ref, coding_frame_count,
718                     first_show_idx, last_gop_use_alt_ref, start_coding_index,
719                     start_ref_frame_info, group_of_picture);
720 }
721 
722 #define SET_STRUCT_VALUE(config, structure, ret, field) \
723   if (strcmp(config.name, #field) == 0) {               \
724     structure->field = atoi(config.value);              \
725     ret = 1;                                            \
726   }
727 
UpdateEncodeConfig(const EncodeConfig & config,VP9EncoderConfig * oxcf)728 static void UpdateEncodeConfig(const EncodeConfig &config,
729                                VP9EncoderConfig *oxcf) {
730   int ret = 0;
731   SET_STRUCT_VALUE(config, oxcf, ret, key_freq);
732   SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmin_section);
733   SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmax_section);
734   SET_STRUCT_VALUE(config, oxcf, ret, under_shoot_pct);
735   SET_STRUCT_VALUE(config, oxcf, ret, over_shoot_pct);
736   SET_STRUCT_VALUE(config, oxcf, ret, max_threads);
737   SET_STRUCT_VALUE(config, oxcf, ret, frame_parallel_decoding_mode);
738   SET_STRUCT_VALUE(config, oxcf, ret, tile_columns);
739   SET_STRUCT_VALUE(config, oxcf, ret, arnr_max_frames);
740   SET_STRUCT_VALUE(config, oxcf, ret, arnr_strength);
741   SET_STRUCT_VALUE(config, oxcf, ret, lag_in_frames);
742   SET_STRUCT_VALUE(config, oxcf, ret, encode_breakout);
743   SET_STRUCT_VALUE(config, oxcf, ret, enable_tpl_model);
744   SET_STRUCT_VALUE(config, oxcf, ret, enable_auto_arf);
745   if (ret == 0) {
746     fprintf(stderr, "Ignored unsupported encode_config %s\n", config.name);
747   }
748 }
749 
GetEncodeConfig(int frame_width,int frame_height,vpx_rational_t frame_rate,int target_bitrate,int encode_speed,vpx_enc_pass enc_pass,const std::vector<EncodeConfig> & encode_config_list)750 static VP9EncoderConfig GetEncodeConfig(
751     int frame_width, int frame_height, vpx_rational_t frame_rate,
752     int target_bitrate, int encode_speed, vpx_enc_pass enc_pass,
753     const std::vector<EncodeConfig> &encode_config_list) {
754   VP9EncoderConfig oxcf =
755       vp9_get_encoder_config(frame_width, frame_height, frame_rate,
756                              target_bitrate, encode_speed, enc_pass);
757   for (const auto &config : encode_config_list) {
758     UpdateEncodeConfig(config, &oxcf);
759   }
760   if (enc_pass == VPX_RC_FIRST_PASS) {
761     oxcf.lag_in_frames = 0;
762   }
763   return oxcf;
764 }
765 
SimpleEncode(int frame_width,int frame_height,int frame_rate_num,int frame_rate_den,int target_bitrate,int num_frames,const char * infile_path,const char * outfile_path)766 SimpleEncode::SimpleEncode(int frame_width, int frame_height,
767                            int frame_rate_num, int frame_rate_den,
768                            int target_bitrate, int num_frames,
769                            const char *infile_path, const char *outfile_path) {
770   impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
771   frame_width_ = frame_width;
772   frame_height_ = frame_height;
773   frame_rate_num_ = frame_rate_num;
774   frame_rate_den_ = frame_rate_den;
775   target_bitrate_ = target_bitrate;
776   num_frames_ = num_frames;
777   encode_speed_ = 0;
778 
779   frame_coding_index_ = 0;
780   show_frame_count_ = 0;
781 
782   key_frame_group_index_ = 0;
783   key_frame_group_size_ = 0;
784 
785   // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
786   assert(infile_path != nullptr);
787   in_file_ = fopen(infile_path, "r");
788   if (outfile_path != nullptr) {
789     out_file_ = fopen(outfile_path, "w");
790   } else {
791     out_file_ = nullptr;
792   }
793   impl_ptr_->cpi = nullptr;
794   impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
795 
796   InitRefFrameInfo(&ref_frame_info_);
797 }
798 
SetEncodeSpeed(int encode_speed)799 void SimpleEncode::SetEncodeSpeed(int encode_speed) {
800   encode_speed_ = encode_speed;
801 }
802 
SetEncodeConfig(const char * name,const char * value)803 StatusCode SimpleEncode::SetEncodeConfig(const char *name, const char *value) {
804   if (name == nullptr || value == nullptr) {
805     fprintf(stderr, "SetEncodeConfig: null pointer, name %p value %p\n", name,
806             value);
807     return StatusError;
808   }
809   EncodeConfig config;
810   snprintf(config.name, ENCODE_CONFIG_BUF_SIZE, "%s", name);
811   snprintf(config.value, ENCODE_CONFIG_BUF_SIZE, "%s", value);
812   impl_ptr_->encode_config_list.push_back(config);
813   return StatusOk;
814 }
815 
DumpEncodeConfigs(int pass,FILE * fp)816 StatusCode SimpleEncode::DumpEncodeConfigs(int pass, FILE *fp) {
817   if (fp == nullptr) {
818     fprintf(stderr, "DumpEncodeConfigs: null pointer, fp %p\n", fp);
819     return StatusError;
820   }
821   vpx_enc_pass enc_pass;
822   if (pass == 1) {
823     enc_pass = VPX_RC_FIRST_PASS;
824   } else {
825     enc_pass = VPX_RC_LAST_PASS;
826   }
827   const vpx_rational_t frame_rate =
828       make_vpx_rational(frame_rate_num_, frame_rate_den_);
829   const VP9EncoderConfig oxcf =
830       GetEncodeConfig(frame_width_, frame_height_, frame_rate, target_bitrate_,
831                       encode_speed_, enc_pass, impl_ptr_->encode_config_list);
832   vp9_dump_encoder_config(&oxcf, fp);
833   return StatusOk;
834 }
835 
ComputeFirstPassStats()836 void SimpleEncode::ComputeFirstPassStats() {
837   vpx_rational_t frame_rate =
838       make_vpx_rational(frame_rate_num_, frame_rate_den_);
839   const VP9EncoderConfig oxcf = GetEncodeConfig(
840       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
841       VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
842   VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
843   struct lookahead_ctx *lookahead = cpi->lookahead;
844   int i;
845   int use_highbitdepth = 0;
846   const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
847   const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
848 #if CONFIG_VP9_HIGHBITDEPTH
849   use_highbitdepth = cpi->common.use_highbitdepth;
850 #endif
851   vpx_image_t img;
852   vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
853   rewind(in_file_);
854   impl_ptr_->first_pass_stats.clear();
855   for (i = 0; i < num_frames_; ++i) {
856     assert(!vp9_lookahead_full(lookahead));
857     if (img_read(&img, in_file_)) {
858       int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
859       int64_t ts_start =
860           timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
861       int64_t ts_end =
862           timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
863       YV12_BUFFER_CONFIG sd;
864       image2yuvconfig(&img, &sd);
865       vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
866       {
867         int64_t time_stamp;
868         int64_t time_end;
869         int flush = 1;  // Makes vp9_get_compressed_data process a frame
870         size_t size;
871         unsigned int frame_flags = 0;
872         ENCODE_FRAME_RESULT encode_frame_info;
873         vp9_init_encode_frame_result(&encode_frame_info);
874         // TODO(angiebird): Call vp9_first_pass directly
875         vp9_get_compressed_data(cpi, &frame_flags, &size, nullptr, &time_stamp,
876                                 &time_end, flush, &encode_frame_info);
877         // vp9_get_compressed_data only generates first pass stats not
878         // compresses data
879         assert(size == 0);
880         // Get vp9 first pass motion vector info.
881         std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
882         update_motion_vector_info(cpi->fp_motion_vector_info, num_rows_16x16,
883                                   num_cols_16x16, mv_info.data(),
884                                   kMotionVectorFullPixelPrecision);
885         fp_motion_vector_info_.push_back(mv_info);
886       }
887       impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
888     }
889   }
890   vp9_end_first_pass(cpi);
891   // TODO(angiebird): Store the total_stats apart form first_pass_stats
892   impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass));
893   free_encoder(cpi);
894   rewind(in_file_);
895   vpx_img_free(&img);
896 
897   // Generate key_frame_map based on impl_ptr_->first_pass_stats.
898   key_frame_map_ = ComputeKeyFrameMap();
899 }
900 
ObserveFirstPassStats()901 std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
902   std::vector<std::vector<double>> output_stats;
903   // TODO(angiebird): This function make several assumptions of
904   // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
905   // last one. 2) The last entry of first_pass_stats is the total_stats.
906   // Change the code structure, so that we don't have to make these assumptions
907 
908   // Note the last entry of first_pass_stats is the total_stats, we don't need
909   // it.
910   for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
911     double *buf_start =
912         reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
913     // We use - 1 here because the last member in FIRSTPASS_STATS is not double
914     double *buf_end =
915         buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
916         1;
917     std::vector<double> this_stats(buf_start, buf_end);
918     output_stats.push_back(this_stats);
919   }
920   return output_stats;
921 }
922 
923 std::vector<std::vector<MotionVectorInfo>>
ObserveFirstPassMotionVectors()924 SimpleEncode::ObserveFirstPassMotionVectors() {
925   return fp_motion_vector_info_;
926 }
927 
SetExternalGroupOfPicturesMap(int * gop_map,int gop_map_size)928 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
929                                                  int gop_map_size) {
930   for (int i = 0; i < gop_map_size; ++i) {
931     gop_map_.push_back(gop_map[i]);
932   }
933   // The following will check and modify gop_map_ to make sure the
934   // gop_map_ satisfies the constraints.
935   // 1) Each key frame position should be at the start of a gop.
936   // 2) The last gop should not use an alt ref.
937   assert(gop_map_.size() == key_frame_map_.size());
938   int last_gop_start = 0;
939   for (int i = 0; static_cast<size_t>(i) < gop_map_.size(); ++i) {
940     if (key_frame_map_[i] == 1 && gop_map_[i] == 0) {
941       fprintf(stderr, "Add an extra gop start at show_idx %d\n", i);
942       // Insert a gop start at key frame location.
943       gop_map_[i] |= kGopMapFlagStart;
944       gop_map_[i] |= kGopMapFlagUseAltRef;
945     }
946     if (gop_map_[i] & kGopMapFlagStart) {
947       last_gop_start = i;
948     }
949   }
950   if (gop_map_[last_gop_start] & kGopMapFlagUseAltRef) {
951     fprintf(stderr,
952             "Last group of pictures starting at show_idx %d shouldn't use alt "
953             "ref\n",
954             last_gop_start);
955     gop_map_[last_gop_start] &= ~kGopMapFlagUseAltRef;
956   }
957 }
958 
ObserveExternalGroupOfPicturesMap()959 std::vector<int> SimpleEncode::ObserveExternalGroupOfPicturesMap() {
960   return gop_map_;
961 }
962 
963 template <typename T>
GetVectorData(const std::vector<T> & v)964 T *GetVectorData(const std::vector<T> &v) {
965   if (v.empty()) {
966     return nullptr;
967   }
968   return const_cast<T *>(v.data());
969 }
970 
GetGopCommand(const std::vector<int> & gop_map,int start_show_index)971 static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
972                                  int start_show_index) {
973   GOP_COMMAND gop_command;
974   if (gop_map.size() > 0) {
975     assert(static_cast<size_t>(start_show_index) < gop_map.size());
976     assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
977     int end_show_index = start_show_index + 1;
978     // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
979     // the start of a gop.
980     while (static_cast<size_t>(end_show_index) < gop_map.size() &&
981            (gop_map[end_show_index] & kGopMapFlagStart) == 0) {
982       ++end_show_index;
983     }
984     const int show_frame_count = end_show_index - start_show_index;
985     int use_alt_ref = (gop_map[start_show_index] & kGopMapFlagUseAltRef) != 0;
986     if (static_cast<size_t>(end_show_index) == gop_map.size()) {
987       // This is the last gop group, there must be no altref.
988       use_alt_ref = 0;
989     }
990     gop_command_on(&gop_command, show_frame_count, use_alt_ref);
991   } else {
992     gop_command_off(&gop_command);
993   }
994   return gop_command;
995 }
996 
StartEncode()997 void SimpleEncode::StartEncode() {
998   assert(impl_ptr_->first_pass_stats.size() > 0);
999   vpx_rational_t frame_rate =
1000       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1001   VP9EncoderConfig oxcf = GetEncodeConfig(
1002       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1003       VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1004 
1005   vpx_fixed_buf_t stats;
1006   stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
1007   stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
1008              impl_ptr_->first_pass_stats.size();
1009 
1010   vp9_set_first_pass_stats(&oxcf, &stats);
1011   assert(impl_ptr_->cpi == nullptr);
1012   impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
1013   vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
1014                 frame_height_, 1);
1015 
1016   frame_coding_index_ = 0;
1017   show_frame_count_ = 0;
1018 
1019   UpdateKeyFrameGroup(show_frame_count_);
1020 
1021   const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1022   encode_command_set_gop_command(&impl_ptr_->cpi->encode_command, gop_command);
1023   UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1024                        &group_of_picture_);
1025   rewind(in_file_);
1026 
1027   if (out_file_ != nullptr) {
1028     const char *fourcc = "VP90";
1029     // In SimpleEncode, we use time_base = 1 / TICKS_PER_SEC.
1030     // Based on that, the ivf_timestamp for each image is set to
1031     // show_idx * TICKS_PER_SEC / frame_rate
1032     // such that each image's actual timestamp in seconds can be computed as
1033     // ivf_timestamp * time_base == show_idx / frame_rate
1034     // TODO(angiebird): 1) Add unit test for ivf timestamp.
1035     // 2) Simplify the frame_rate setting process.
1036     vpx_rational_t time_base = make_vpx_rational(1, TICKS_PER_SEC);
1037     ivf_write_file_header_with_video_info(out_file_, *(const uint32_t *)fourcc,
1038                                           num_frames_, frame_width_,
1039                                           frame_height_, time_base);
1040   }
1041 }
1042 
EndEncode()1043 void SimpleEncode::EndEncode() {
1044   free_encoder(impl_ptr_->cpi);
1045   impl_ptr_->cpi = nullptr;
1046   vpx_img_free(&impl_ptr_->tmp_img);
1047   rewind(in_file_);
1048 }
1049 
UpdateKeyFrameGroup(int key_frame_show_index)1050 void SimpleEncode::UpdateKeyFrameGroup(int key_frame_show_index) {
1051   const VP9_COMP *cpi = impl_ptr_->cpi;
1052   key_frame_group_index_ = 0;
1053   key_frame_group_size_ = vp9_get_frames_to_next_key(
1054       &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info,
1055       key_frame_show_index, cpi->rc.min_gf_interval);
1056   assert(key_frame_group_size_ > 0);
1057   // Init the reference frame info when a new key frame group appears.
1058   InitRefFrameInfo(&ref_frame_info_);
1059 }
1060 
PostUpdateKeyFrameGroupIndex(FrameType frame_type)1061 void SimpleEncode::PostUpdateKeyFrameGroupIndex(FrameType frame_type) {
1062   if (frame_type != kFrameTypeAltRef) {
1063     // key_frame_group_index_ only counts show frames
1064     ++key_frame_group_index_;
1065   }
1066 }
1067 
GetKeyFrameGroupSize() const1068 int SimpleEncode::GetKeyFrameGroupSize() const { return key_frame_group_size_; }
1069 
ObserveGroupOfPicture() const1070 GroupOfPicture SimpleEncode::ObserveGroupOfPicture() const {
1071   return group_of_picture_;
1072 }
1073 
GetNextEncodeFrameInfo() const1074 EncodeFrameInfo SimpleEncode::GetNextEncodeFrameInfo() const {
1075   return group_of_picture_
1076       .encode_frame_list[group_of_picture_.next_encode_frame_index];
1077 }
1078 
PostUpdateState(const EncodeFrameResult & encode_frame_result)1079 void SimpleEncode::PostUpdateState(
1080     const EncodeFrameResult &encode_frame_result) {
1081   // This function needs to be called before the increament of
1082   // frame_coding_index_
1083   PostUpdateRefFrameInfo(encode_frame_result.frame_type, frame_coding_index_,
1084                          &ref_frame_info_);
1085   ++frame_coding_index_;
1086   if (encode_frame_result.frame_type != kFrameTypeAltRef) {
1087     // Only kFrameTypeAltRef is not a show frame
1088     ++show_frame_count_;
1089   }
1090 
1091   PostUpdateKeyFrameGroupIndex(encode_frame_result.frame_type);
1092   if (key_frame_group_index_ == key_frame_group_size_) {
1093     UpdateKeyFrameGroup(show_frame_count_);
1094   }
1095 
1096   IncreaseGroupOfPictureIndex(&group_of_picture_);
1097   if (IsGroupOfPictureFinished(group_of_picture_)) {
1098     const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1099     encode_command_set_gop_command(&impl_ptr_->cpi->encode_command,
1100                                    gop_command);
1101     // This function needs to be called after ref_frame_info_ is updated
1102     // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
1103     UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1104                          &group_of_picture_);
1105   }
1106 }
1107 
EncodeFrame(EncodeFrameResult * encode_frame_result)1108 void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
1109   VP9_COMP *cpi = impl_ptr_->cpi;
1110   struct lookahead_ctx *lookahead = cpi->lookahead;
1111   int use_highbitdepth = 0;
1112 #if CONFIG_VP9_HIGHBITDEPTH
1113   use_highbitdepth = cpi->common.use_highbitdepth;
1114 #endif
1115   // The lookahead's size is set to oxcf->lag_in_frames.
1116   // We want to fill lookahead to it's max capacity if possible so that the
1117   // encoder can construct alt ref frame in time.
1118   // In the other words, we hope vp9_get_compressed_data to encode a frame
1119   // every time in the function
1120   while (!vp9_lookahead_full(lookahead)) {
1121     // TODO(angiebird): Check whether we can move this file read logics to
1122     // lookahead
1123     if (img_read(&impl_ptr_->tmp_img, in_file_)) {
1124       int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
1125       int64_t ts_start =
1126           timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
1127       int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
1128                                                next_show_idx + 1);
1129       YV12_BUFFER_CONFIG sd;
1130       image2yuvconfig(&impl_ptr_->tmp_img, &sd);
1131       vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
1132     } else {
1133       break;
1134     }
1135   }
1136 
1137   if (init_encode_frame_result(encode_frame_result, frame_width_, frame_height_,
1138                                impl_ptr_->img_fmt)) {
1139     int64_t time_stamp;
1140     int64_t time_end;
1141     int flush = 1;  // Make vp9_get_compressed_data encode a frame
1142     unsigned int frame_flags = 0;
1143     ENCODE_FRAME_RESULT encode_frame_info;
1144     vp9_init_encode_frame_result(&encode_frame_info);
1145     ImageBuffer_to_IMAGE_BUFFER(encode_frame_result->coded_frame,
1146                                 &encode_frame_info.coded_frame);
1147     vp9_get_compressed_data(cpi, &frame_flags,
1148                             &encode_frame_result->coding_data_byte_size,
1149                             encode_frame_result->coding_data.get(), &time_stamp,
1150                             &time_end, flush, &encode_frame_info);
1151     if (out_file_ != nullptr) {
1152       ivf_write_frame_header(out_file_, time_stamp,
1153                              encode_frame_result->coding_data_byte_size);
1154       fwrite(encode_frame_result->coding_data.get(), 1,
1155              encode_frame_result->coding_data_byte_size, out_file_);
1156     }
1157 
1158     // vp9_get_compressed_data is expected to encode a frame every time, so the
1159     // data size should be greater than zero.
1160     if (encode_frame_result->coding_data_byte_size <= 0) {
1161       fprintf(stderr, "Coding data size <= 0\n");
1162       abort();
1163     }
1164     const size_t max_coding_data_byte_size =
1165         get_max_coding_data_byte_size(frame_width_, frame_height_);
1166     if (encode_frame_result->coding_data_byte_size >
1167         max_coding_data_byte_size) {
1168       fprintf(stderr, "Coding data size exceeds the maximum.\n");
1169       abort();
1170     }
1171 
1172     update_encode_frame_result(encode_frame_result, &encode_frame_info);
1173     PostUpdateState(*encode_frame_result);
1174   } else {
1175     // TODO(angiebird): Clean up encode_frame_result.
1176     fprintf(stderr, "init_encode_frame_result() failed.\n");
1177     this->EndEncode();
1178   }
1179 }
1180 
EncodeFrameWithQuantizeIndex(EncodeFrameResult * encode_frame_result,int quantize_index)1181 void SimpleEncode::EncodeFrameWithQuantizeIndex(
1182     EncodeFrameResult *encode_frame_result, int quantize_index) {
1183   encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
1184                                              quantize_index);
1185   EncodeFrame(encode_frame_result);
1186   encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
1187 }
1188 
EncodeFrameWithTargetFrameBits(EncodeFrameResult * encode_frame_result,int target_frame_bits,double percent_diff)1189 void SimpleEncode::EncodeFrameWithTargetFrameBits(
1190     EncodeFrameResult *encode_frame_result, int target_frame_bits,
1191     double percent_diff) {
1192   encode_command_set_target_frame_bits(&impl_ptr_->cpi->encode_command,
1193                                        target_frame_bits, percent_diff);
1194   EncodeFrame(encode_frame_result);
1195   encode_command_reset_target_frame_bits(&impl_ptr_->cpi->encode_command);
1196 }
1197 
GetCodingFrameNumFromGopMap(const std::vector<int> & gop_map)1198 static int GetCodingFrameNumFromGopMap(const std::vector<int> &gop_map) {
1199   int start_show_index = 0;
1200   int coding_frame_count = 0;
1201   while (static_cast<size_t>(start_show_index) < gop_map.size()) {
1202     const GOP_COMMAND gop_command = GetGopCommand(gop_map, start_show_index);
1203     start_show_index += gop_command.show_frame_count;
1204     coding_frame_count += gop_command_coding_frame_count(&gop_command);
1205   }
1206   assert(start_show_index == gop_map.size());
1207   return coding_frame_count;
1208 }
1209 
GetCodingFrameNum() const1210 int SimpleEncode::GetCodingFrameNum() const {
1211   assert(impl_ptr_->first_pass_stats.size() > 0);
1212   if (gop_map_.size() > 0) {
1213     return GetCodingFrameNumFromGopMap(gop_map_);
1214   }
1215 
1216   // These are the default settings for now.
1217   const int multi_layer_arf = 0;
1218   const int allow_alt_ref = 1;
1219   vpx_rational_t frame_rate =
1220       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1221   const VP9EncoderConfig oxcf = GetEncodeConfig(
1222       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1223       VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1224   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1225   FIRST_PASS_INFO first_pass_info;
1226   fps_init_first_pass_info(&first_pass_info,
1227                            GetVectorData(impl_ptr_->first_pass_stats),
1228                            num_frames_);
1229   return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,
1230                                   multi_layer_arf, allow_alt_ref);
1231 }
1232 
ComputeKeyFrameMap() const1233 std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
1234   // The last entry of first_pass_stats is the overall stats.
1235   assert(impl_ptr_->first_pass_stats.size() == num_frames_ + 1);
1236   vpx_rational_t frame_rate =
1237       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1238   const VP9EncoderConfig oxcf = GetEncodeConfig(
1239       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1240       VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1241   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1242   FIRST_PASS_INFO first_pass_info;
1243   fps_init_first_pass_info(&first_pass_info,
1244                            GetVectorData(impl_ptr_->first_pass_stats),
1245                            num_frames_);
1246   std::vector<int> key_frame_map(num_frames_, 0);
1247   vp9_get_key_frame_map(&oxcf, &frame_info, &first_pass_info,
1248                         GetVectorData(key_frame_map));
1249   return key_frame_map;
1250 }
1251 
ObserveKeyFrameMap() const1252 std::vector<int> SimpleEncode::ObserveKeyFrameMap() const {
1253   return key_frame_map_;
1254 }
1255 
GetFramePixelCount() const1256 uint64_t SimpleEncode::GetFramePixelCount() const {
1257   assert(frame_width_ % 2 == 0);
1258   assert(frame_height_ % 2 == 0);
1259   switch (impl_ptr_->img_fmt) {
1260     case VPX_IMG_FMT_I420: return frame_width_ * frame_height_ * 3 / 2;
1261     case VPX_IMG_FMT_I422: return frame_width_ * frame_height_ * 2;
1262     case VPX_IMG_FMT_I444: return frame_width_ * frame_height_ * 3;
1263     case VPX_IMG_FMT_I440: return frame_width_ * frame_height_ * 2;
1264     case VPX_IMG_FMT_I42016: return frame_width_ * frame_height_ * 3 / 2;
1265     case VPX_IMG_FMT_I42216: return frame_width_ * frame_height_ * 2;
1266     case VPX_IMG_FMT_I44416: return frame_width_ * frame_height_ * 3;
1267     case VPX_IMG_FMT_I44016: return frame_width_ * frame_height_ * 2;
1268     default: return 0;
1269   }
1270 }
1271 
~SimpleEncode()1272 SimpleEncode::~SimpleEncode() {
1273   if (in_file_ != nullptr) {
1274     fclose(in_file_);
1275   }
1276   if (out_file_ != nullptr) {
1277     fclose(out_file_);
1278   }
1279 }
1280 
1281 }  // namespace vp9
1282