1 /*
2  * Copyright (c) 2001-2017, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <limits.h>
16 #include "av1/common/odintrin.h"
17 #include "av1/encoder/ratectrl_xiph.h"
18 
19 #define OD_Q57(v) ((int64_t)((uint64_t)(v) << 57))
20 #define OD_F_Q45(v) ((int64_t)(((v) * ((int64_t)1 << 45))))
21 #define OD_F_Q12(v) ((int32_t)(((v) * ((int32_t)1 << 12))))
22 
23 /*A rough lookup table for tan(x), 0 <= x < pi/2.
24   The values are Q12 fixed-point and spaced at 5 degree intervals.
25   These decisions are somewhat arbitrary, but sufficient for the 2nd order
26    Bessel follower below.
27   Values of x larger than 85 degrees are extrapolated from the last interval,
28    which is way off, but "good enough".*/
29 static uint16_t OD_ROUGH_TAN_LOOKUP[18] = { 0,     358,   722,  1098, 1491,
30                                             1910,  2365,  2868, 3437, 4096,
31                                             4881,  5850,  7094, 8784, 11254,
32                                             15286, 23230, 46817 };
33 
34 /*alpha is Q24 in the range [0,0.5).
35   The return values is 5.12.*/
od_warp_alpha(int alpha)36 static int od_warp_alpha(int alpha) {
37   int i;
38   int d;
39   int t0;
40   int t1;
41   i = alpha * 36 >> 24;
42   if (i >= 17) i = 16;
43   t0 = OD_ROUGH_TAN_LOOKUP[i];
44   t1 = OD_ROUGH_TAN_LOOKUP[i + 1];
45   d = alpha * 36 - (i << 24);
46   return (int)((((int64_t)t0 << 32) + ((t1 - t0) << 8) * (int64_t)d) >> 32);
47 }
48 
49 static const int64_t OD_ATANH_LOG2[32] = {
50   0x32B803473F7AD0F4LL, 0x2F2A71BD4E25E916LL, 0x2E68B244BB93BA06LL,
51   0x2E39FB9198CE62E4LL, 0x2E2E683F68565C8FLL, 0x2E2B850BE2077FC1LL,
52   0x2E2ACC58FE7B78DBLL, 0x2E2A9E2DE52FD5F2LL, 0x2E2A92A338D53EECLL,
53   0x2E2A8FC08F5E19B6LL, 0x2E2A8F07E51A485ELL, 0x2E2A8ED9BA8AF388LL,
54   0x2E2A8ECE2FE7384ALL, 0x2E2A8ECB4D3E4B1ALL, 0x2E2A8ECA94940FE8LL,
55   0x2E2A8ECA6669811DLL, 0x2E2A8ECA5ADEDD6ALL, 0x2E2A8ECA57FC347ELL,
56   0x2E2A8ECA57438A43LL, 0x2E2A8ECA57155FB4LL, 0x2E2A8ECA5709D510LL,
57   0x2E2A8ECA5706F267LL, 0x2E2A8ECA570639BDLL, 0x2E2A8ECA57060B92LL,
58   0x2E2A8ECA57060008LL, 0x2E2A8ECA5705FD25LL, 0x2E2A8ECA5705FC6CLL,
59   0x2E2A8ECA5705FC3ELL, 0x2E2A8ECA5705FC33LL, 0x2E2A8ECA5705FC30LL,
60   0x2E2A8ECA5705FC2FLL, 0x2E2A8ECA5705FC2FLL
61 };
62 
od_ilog64(int64_t v)63 static int od_ilog64(int64_t v) {
64   static const unsigned char OD_DEBRUIJN_IDX64[64] = {
65     0,  1,  2,  7,  3,  13, 8,  19, 4,  25, 14, 28, 9,  34, 20, 40,
66     5,  17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
67     63, 6,  12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
68     62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
69   };
70   int ret;
71   v |= v >> 1;
72   v |= v >> 2;
73   v |= v >> 4;
74   v |= v >> 8;
75   v |= v >> 16;
76   v |= v >> 32;
77   ret = (int)v & 1;
78   v = (v >> 1) + 1;
79   ret += OD_DEBRUIJN_IDX64[v * UINT64_C(0x218A392CD3D5DBF) >> 58 & 0x3F];
80   return ret;
81 }
82 
83 /*Computes the binary exponential of logq57.
84   input: a log base 2 in Q57 format
85   output: a 64 bit integer in Q0 (no fraction) */
od_bexp64(int64_t logq57)86 static int64_t od_bexp64(int64_t logq57) {
87   int64_t w;
88   int64_t z;
89   int ipart;
90   ipart = (int)(logq57 >> 57);
91   if (ipart < 0) return 0;
92   if (ipart >= 63) return 0x7FFFFFFFFFFFFFFFLL;
93   z = logq57 - OD_Q57(ipart);
94   if (z) {
95     int64_t mask;
96     int64_t wlo;
97     int i;
98     /*C doesn't give us 64x64->128 muls, so we use CORDIC.
99       This is not particularly fast, but it's not being used in time-critical
100        code; it is very accurate.*/
101     /*z is the fractional part of the log in Q62 format.
102       We need 1 bit of headroom since the magnitude can get larger than 1
103        during the iteration, and a sign bit.*/
104     z <<= 5;
105     /*w is the exponential in Q61 format (since it also needs headroom and can
106        get as large as 2.0); we could get another bit if we dropped the sign,
107        but we'll recover that bit later anyway.
108       Ideally this should start out as
109         \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
110        but in order to guarantee convergence we have to repeat iterations 4,
111         13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
112     w = 0x26A3D0E401DD846DLL;
113     for (i = 0;; i++) {
114       mask = -(z < 0);
115       w += ((w >> (i + 1)) + mask) ^ mask;
116       z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
117       /*Repeat iteration 4.*/
118       if (i >= 3) break;
119       z *= 2;
120     }
121     for (;; i++) {
122       mask = -(z < 0);
123       w += ((w >> (i + 1)) + mask) ^ mask;
124       z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
125       /*Repeat iteration 13.*/
126       if (i >= 12) break;
127       z *= 2;
128     }
129     for (; i < 32; i++) {
130       mask = -(z < 0);
131       w += ((w >> (i + 1)) + mask) ^ mask;
132       z = (z - ((OD_ATANH_LOG2[i] + mask) ^ mask)) * 2;
133     }
134     wlo = 0;
135     /*Skip the remaining iterations unless we really require that much
136        precision.
137       We could have bailed out earlier for smaller iparts, but that would
138        require initializing w from a table, as the limit doesn't converge to
139        61-bit precision until n=30.*/
140     if (ipart > 30) {
141       /*For these iterations, we just update the low bits, as the high bits
142          can't possibly be affected.
143         OD_ATANH_LOG2 has also converged (it actually did so one iteration
144          earlier, but that's no reason for an extra special case).*/
145       for (;; i++) {
146         mask = -(z < 0);
147         wlo += ((w >> i) + mask) ^ mask;
148         z -= (OD_ATANH_LOG2[31] + mask) ^ mask;
149         /*Repeat iteration 40.*/
150         if (i >= 39) break;
151         z <<= 1;
152       }
153       for (; i < 61; i++) {
154         mask = -(z < 0);
155         wlo += ((w >> i) + mask) ^ mask;
156         z = (z - ((OD_ATANH_LOG2[31] + mask) ^ mask)) << 1;
157       }
158     }
159     w = (w << 1) + wlo;
160   } else {
161     w = (int64_t)1 << 62;
162   }
163   if (ipart < 62) {
164     w = ((w >> (61 - ipart)) + 1) >> 1;
165   }
166   return w;
167 }
168 
169 /*Computes the binary log of w
170   input: a 64-bit integer in Q0 (no fraction)
171   output: a 64-bit log in Q57 */
od_blog64(int64_t w)172 static int64_t od_blog64(int64_t w) {
173   int64_t z;
174   int ipart;
175   if (w <= 0) return -1;
176   ipart = od_ilog64(w) - 1;
177   if (ipart > 61) {
178     w >>= ipart - 61;
179   } else {
180     w <<= 61 - ipart;
181   }
182   z = 0;
183   if (w & (w - 1)) {
184     int64_t x;
185     int64_t y;
186     int64_t u;
187     int64_t mask;
188     int i;
189     /*C doesn't give us 64x64->128 muls, so we use CORDIC.
190       This is not particularly fast, but it's not being used in time-critical
191        code; it is very accurate.*/
192     /*z is the fractional part of the log in Q61 format.*/
193     /*x and y are the cosh() and sinh(), respectively, in Q61 format.
194       We are computing z = 2*atanh(y/x) = 2*atanh((w - 1)/(w + 1)).*/
195     x = w + ((int64_t)1 << 61);
196     y = w - ((int64_t)1 << 61);
197     for (i = 0; i < 4; i++) {
198       mask = -(y < 0);
199       z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
200       u = x >> (i + 1);
201       x -= ((y >> (i + 1)) + mask) ^ mask;
202       y -= (u + mask) ^ mask;
203     }
204     /*Repeat iteration 4.*/
205     for (i--; i < 13; i++) {
206       mask = -(y < 0);
207       z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
208       u = x >> (i + 1);
209       x -= ((y >> (i + 1)) + mask) ^ mask;
210       y -= (u + mask) ^ mask;
211     }
212     /*Repeat iteration 13.*/
213     for (i--; i < 32; i++) {
214       mask = -(y < 0);
215       z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
216       u = x >> (i + 1);
217       x -= ((y >> (i + 1)) + mask) ^ mask;
218       y -= (u + mask) ^ mask;
219     }
220     /*OD_ATANH_LOG2 has converged.*/
221     for (; i < 40; i++) {
222       mask = -(y < 0);
223       z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
224       u = x >> (i + 1);
225       x -= ((y >> (i + 1)) + mask) ^ mask;
226       y -= (u + mask) ^ mask;
227     }
228     /*Repeat iteration 40.*/
229     for (i--; i < 62; i++) {
230       mask = -(y < 0);
231       z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
232       u = x >> (i + 1);
233       x -= ((y >> (i + 1)) + mask) ^ mask;
234       y -= (u + mask) ^ mask;
235     }
236     z = (z + 8) >> 4;
237   }
238   return OD_Q57(ipart) + z;
239 }
240 
241 /*Convenience function converts Q57 value to a clamped 32-bit Q24 value
242   in: input in Q57 format.
243   Return: same number in Q24 */
od_q57_to_q24(int64_t in)244 static int32_t od_q57_to_q24(int64_t in) {
245   int64_t ret;
246   ret = (in + ((int64_t)1 << 32)) >> 33;
247   /*0x80000000 is automatically converted to unsigned on 32-bit systems.
248     -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
249     unsigned.*/
250   return (int32_t)OD_CLAMPI(-0x7FFFFFFF - 1, ret, 0x7FFFFFFF);
251 }
252 
253 /*Binary exponential of log_scale with 24-bit fractional precision and
254    saturation.
255   log_scale: A binary logarithm in Q57 format.
256   Return: The binary exponential in Q24 format, saturated to 2**31-1 if
257    log_scale was too large.*/
od_bexp64_q24(int64_t log_scale)258 static int32_t od_bexp64_q24(int64_t log_scale) {
259   if (log_scale < OD_Q57(8)) {
260     int64_t ret;
261     ret = od_bexp64(log_scale + OD_Q57(24));
262     return ret < 0x7FFFFFFF ? (int32_t)ret : 0x7FFFFFFF;
263   }
264   return 0x7FFFFFFF;
265 }
266 
267 /*Re-initialize Bessel filter coefficients with the specified delay.
268   This does not alter the x/y state, but changes the reaction time of the
269    filter.
270   Altering the time constant of a reactive filter without alterning internal
271    state is something that has to be done carefuly, but our design operates at
272    high enough delays and with small enough time constant changes to make it
273    safe.*/
od_iir_bessel2_reinit(od_iir_bessel2 * f,int delay)274 static void od_iir_bessel2_reinit(od_iir_bessel2 *f, int delay) {
275   int alpha;
276   int64_t one48;
277   int64_t warp;
278   int64_t k1;
279   int64_t k2;
280   int64_t d;
281   int64_t a;
282   int64_t ik2;
283   int64_t b1;
284   int64_t b2;
285   /*This borrows some code from an unreleased version of Postfish.
286     See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
287      on deriving the filter coefficients.*/
288   /*alpha is Q24*/
289   alpha = (1 << 24) / delay;
290   one48 = (int64_t)1 << 48;
291   /*warp is 7.12*/
292   warp = OD_MAXI(od_warp_alpha(alpha), 1);
293   /*k1 is 9.12*/
294   k1 = 3 * warp;
295   /*k2 is 16.24.*/
296   k2 = k1 * warp;
297   /*d is 16.15.*/
298   d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9;
299   /*a is 0.32, since d is larger than both 1.0 and k2.*/
300   a = (k2 << 23) / d;
301   /*ik2 is 25.24.*/
302   ik2 = one48 / k2;
303   /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
304   b1 = 2 * a * (ik2 - (1 << 24));
305   /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
306   b2 = (one48 << 8) - ((4 * a) << 24) - b1;
307   /*All of the filter parameters are Q24.*/
308   f->c[0] = (int32_t)((b1 + ((int64_t)1 << 31)) >> 32);
309   f->c[1] = (int32_t)((b2 + ((int64_t)1 << 31)) >> 32);
310   f->g = (int32_t)((a + 128) >> 8);
311 }
312 
313 /*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
314    and initial value.
315   value is Q24.*/
od_iir_bessel2_init(od_iir_bessel2 * f,int delay,int32_t value)316 static void od_iir_bessel2_init(od_iir_bessel2 *f, int delay, int32_t value) {
317   od_iir_bessel2_reinit(f, delay);
318   f->y[1] = f->y[0] = f->x[1] = f->x[0] = value;
319 }
320 
od_iir_bessel2_update(od_iir_bessel2 * f,int32_t x)321 static int64_t od_iir_bessel2_update(od_iir_bessel2 *f, int32_t x) {
322   int64_t c0;
323   int64_t c1;
324   int64_t g;
325   int64_t x0;
326   int64_t x1;
327   int64_t y0;
328   int64_t y1;
329   int64_t ya;
330   c0 = f->c[0];
331   c1 = f->c[1];
332   g = f->g;
333   x0 = f->x[0];
334   x1 = f->x[1];
335   y0 = f->y[0];
336   y1 = f->y[1];
337   ya = ((x + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1 << 23)) >> 24;
338   f->x[1] = (int32_t)x0;
339   f->x[0] = x;
340   f->y[1] = (int32_t)y0;
341   f->y[0] = (int32_t)ya;
342   return ya;
343 }
344 
od_enc_rc_reset(od_rc_state * rc)345 static void od_enc_rc_reset(od_rc_state *rc) {
346   int64_t npixels;
347   int64_t ibpp;
348   rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
349   /*Insane framerates or frame sizes mean insane bitrates.
350     Let's not get carried away.*/
351   if (rc->bits_per_frame > 0x400000000000LL) {
352     rc->bits_per_frame = (int64_t)0x400000000000LL;
353   } else {
354     if (rc->bits_per_frame < 32) {
355       rc->bits_per_frame = 32;
356     }
357   }
358   rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
359   rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
360   /*Start with a buffer fullness and fullness target of 50% */
361   rc->reservoir_target = (rc->reservoir_max + 1) >> 1;
362   rc->reservoir_fullness = rc->reservoir_target;
363   /*Pick exponents and initial scales for quantizer selection.*/
364   npixels = rc->frame_width * (int64_t)rc->frame_height;
365   rc->log_npixels = od_blog64(npixels);
366   ibpp = npixels / rc->bits_per_frame;
367   /*All of these initial scale/exp values are from Theora, and have not yet
368      been adapted to Daala, so they're certainly wrong.
369     The B-frame values especially are simply copies of the P-frame values.*/
370   if (ibpp < 1) {
371     rc->exp[OD_I_FRAME] = 59;
372     rc->log_scale[OD_I_FRAME] = od_blog64(1997) - OD_Q57(OD_COEFF_SHIFT);
373   } else if (ibpp < 2) {
374     rc->exp[OD_I_FRAME] = 55;
375     rc->log_scale[OD_I_FRAME] = od_blog64(1604) - OD_Q57(OD_COEFF_SHIFT);
376   } else {
377     rc->exp[OD_I_FRAME] = 48;
378     rc->log_scale[OD_I_FRAME] = od_blog64(834) - OD_Q57(OD_COEFF_SHIFT);
379   }
380   if (ibpp < 4) {
381     rc->exp[OD_P_FRAME] = 100;
382     rc->log_scale[OD_P_FRAME] = od_blog64(2249) - OD_Q57(OD_COEFF_SHIFT);
383   } else if (ibpp < 8) {
384     rc->exp[OD_P_FRAME] = 95;
385     rc->log_scale[OD_P_FRAME] = od_blog64(1751) - OD_Q57(OD_COEFF_SHIFT);
386   } else {
387     rc->exp[OD_P_FRAME] = 73;
388     rc->log_scale[OD_P_FRAME] = od_blog64(1260) - OD_Q57(OD_COEFF_SHIFT);
389   }
390   /*Golden P-frames both use the same log_scale and exp modeling
391      values as regular P-frames and the same scale follower.
392     For convenience in the rate calculation code, we maintain a copy of
393     the scale and exp values in OD_GOLDEN_P_FRAME.*/
394   rc->exp[OD_GOLDEN_P_FRAME] = rc->exp[OD_P_FRAME];
395   rc->log_scale[OD_GOLDEN_P_FRAME] = rc->log_scale[OD_P_FRAME];
396   rc->exp[OD_ALTREF_P_FRAME] = rc->exp[OD_P_FRAME];
397   rc->log_scale[OD_ALTREF_P_FRAME] = rc->log_scale[OD_P_FRAME];
398   /*We clamp the actual I and B frame delays to a minimum of 10 to work within
399      the range of values where later incrementing the delay works as designed.
400     10 is not an exact choice, but rather a good working trade-off.*/
401   rc->inter_p_delay = 10;
402   rc->inter_delay_target = rc->reservoir_frame_delay >> 1;
403   memset(rc->frame_count, 0, sizeof(rc->frame_count));
404   /*Drop-frame tracking is concerned with more than just the basic three frame
405      types.
406     It needs to track boosted and cut subtypes (of which there is only one
407      right now, OD_GOLDEN_P_FRAME). */
408   rc->prev_drop_count[OD_I_FRAME] = 0;
409   rc->log_drop_scale[OD_I_FRAME] = OD_Q57(0);
410   rc->prev_drop_count[OD_P_FRAME] = 0;
411   rc->log_drop_scale[OD_P_FRAME] = OD_Q57(0);
412   rc->prev_drop_count[OD_GOLDEN_P_FRAME] = 0;
413   rc->log_drop_scale[OD_GOLDEN_P_FRAME] = OD_Q57(0);
414   rc->prev_drop_count[OD_ALTREF_P_FRAME] = 0;
415   rc->log_drop_scale[OD_ALTREF_P_FRAME] = OD_Q57(0);
416   /*Set up second order followers, initialized according to corresponding
417      time constants.*/
418   od_iir_bessel2_init(&rc->scalefilter[OD_I_FRAME], 4,
419                       od_q57_to_q24(rc->log_scale[OD_I_FRAME]));
420   od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], rc->inter_p_delay,
421                       od_q57_to_q24(rc->log_scale[OD_P_FRAME]));
422   od_iir_bessel2_init(&rc->vfrfilter[OD_I_FRAME], 4,
423                       od_bexp64_q24(rc->log_drop_scale[OD_I_FRAME]));
424   od_iir_bessel2_init(&rc->vfrfilter[OD_P_FRAME], 4,
425                       od_bexp64_q24(rc->log_drop_scale[OD_P_FRAME]));
426   od_iir_bessel2_init(&rc->vfrfilter[OD_GOLDEN_P_FRAME], 4,
427                       od_bexp64_q24(rc->log_drop_scale[OD_GOLDEN_P_FRAME]));
428   od_iir_bessel2_init(&rc->vfrfilter[OD_ALTREF_P_FRAME], 4,
429                       od_bexp64_q24(rc->log_drop_scale[OD_ALTREF_P_FRAME]));
430 }
431 
od_enc_rc_resize(od_rc_state * rc)432 int od_enc_rc_resize(od_rc_state *rc) {
433   /*If encoding has not yet begun, reset the buffer state.*/
434   if (rc->cur_frame == 0) {
435     od_enc_rc_reset(rc);
436   } else {
437     int idt;
438     /*Otherwise, update the bounds on the buffer, but not the current
439        fullness.*/
440     rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
441     /*Insane framerates or frame sizes mean insane bitrates.
442       Let's not get carried away.*/
443     if (rc->bits_per_frame > 0x400000000000LL) {
444       rc->bits_per_frame = (int64_t)0x400000000000LL;
445     } else {
446       if (rc->bits_per_frame < 32) {
447         rc->bits_per_frame = 32;
448       }
449     }
450     rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
451     rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
452     rc->reservoir_target =
453         ((rc->reservoir_max + 1) >> 1) +
454         ((rc->bits_per_frame + 2) >> 2) *
455             OD_MINI(rc->keyframe_rate, rc->reservoir_frame_delay);
456     /*Update the INTER-frame scale filter delay.
457       We jump to it immediately if we've already seen enough frames; otherwise
458        it is simply set as the new target.*/
459     rc->inter_delay_target = idt = OD_MAXI(rc->reservoir_frame_delay >> 1, 10);
460     if (idt < OD_MINI(rc->inter_p_delay, rc->frame_count[OD_P_FRAME])) {
461       od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], idt,
462                           rc->scalefilter[OD_P_FRAME].y[0]);
463       rc->inter_p_delay = idt;
464     }
465   }
466   return 0;
467 }
468 
od_enc_rc_init(od_rc_state * rc,int64_t bitrate,int delay_ms)469 int od_enc_rc_init(od_rc_state *rc, int64_t bitrate, int delay_ms) {
470   if (rc->framerate <= 0) return 1;
471   if (rc->target_bitrate > 0) {
472     /*State has already been initialized; rather than reinitialize,
473       adjust the buffering for the new target rate. */
474     rc->target_bitrate = bitrate;
475     return od_enc_rc_resize(rc);
476   }
477   rc->target_quantizer = 0;
478   rc->target_bitrate = bitrate;
479   rc->rate_bias = 0;
480   if (bitrate > 0) {
481     /* The buffer size is clamped between [12, 256], this interval is short
482        enough to
483        allow reaction, but long enough to allow looking into the next GOP
484        (avoiding
485        the case where the last frames before an I-frame get starved).
486        The 12 frame minimum gives us some chance to distribute bit estimation
487        errors in the worst case. The 256 frame maximum means we'll require 8-10
488        seconds
489        of pre-buffering at 24-30 fps, which is not unreasonable.*/
490     rc->reservoir_frame_delay =
491         (int)OD_MINI((delay_ms / 1000) * rc->framerate, 256);
492     rc->drop_frames = 1;
493     rc->cap_overflow = 1;
494     rc->cap_underflow = 0;
495     rc->twopass_state = 0;
496     od_enc_rc_reset(rc);
497   }
498   return 0;
499 }
500 
501 /*Scale the number of frames by the number of expected drops/duplicates.*/
od_rc_scale_drop(od_rc_state * rc,int frame_type,int nframes)502 static int od_rc_scale_drop(od_rc_state *rc, int frame_type, int nframes) {
503   if (rc->prev_drop_count[frame_type] > 0 ||
504       rc->log_drop_scale[frame_type] > OD_Q57(0)) {
505     int64_t dup_scale;
506     dup_scale = od_bexp64(((rc->log_drop_scale[frame_type] +
507                             od_blog64(rc->prev_drop_count[frame_type] + 1)) >>
508                            1) +
509                           OD_Q57(8));
510     if (dup_scale < nframes << 8) {
511       int dup_scalei;
512       dup_scalei = (int)dup_scale;
513       if (dup_scalei > 0) {
514         nframes = ((nframes << 8) + dup_scalei - 1) / dup_scalei;
515       }
516     } else {
517       nframes = !!nframes;
518     }
519   }
520   return nframes;
521 }
522 
523 /*Closed form version of frame determination code.
524   Used by rate control to predict frame types and subtypes into the future.
525   No side effects, may be called any number of times.
526   Note that it ignores end-of-file conditions; one-pass planning *should*
527    ignore end-of-file. */
od_frame_type(od_rc_state * rc,int64_t coding_frame_count,int * is_golden,int * is_altref,int64_t * ip_count)528 int od_frame_type(od_rc_state *rc, int64_t coding_frame_count, int *is_golden,
529                   int *is_altref, int64_t *ip_count) {
530   int frame_type;
531   if (coding_frame_count == 0) {
532     *is_golden = 1;
533     *is_altref = 1;
534     *ip_count = 0;
535     frame_type = OD_I_FRAME;
536   } else {
537     int keyrate = rc->keyframe_rate;
538     if (rc->closed_gop) {
539       int ip_per_gop;
540       int gop_n;
541       int gop_i;
542       ip_per_gop = (keyrate - 1) / 2;
543       gop_n = coding_frame_count / keyrate;
544       gop_i = coding_frame_count - gop_n * keyrate;
545       *ip_count = gop_n * ip_per_gop + (gop_i > 0) + (gop_i - 1);
546       frame_type = gop_i == 0 ? OD_I_FRAME : OD_P_FRAME;
547     } else {
548       int ip_per_gop;
549       int gop_n;
550       int gop_i;
551       ip_per_gop = (keyrate);
552       gop_n = (coding_frame_count - 1) / keyrate;
553       gop_i = coding_frame_count - gop_n * keyrate - 1;
554       *ip_count = (coding_frame_count > 0) + gop_n * ip_per_gop + (gop_i);
555       frame_type = gop_i / 1 < ip_per_gop - 1 ? OD_P_FRAME : OD_I_FRAME;
556     }
557   }
558   *is_golden =
559       (*ip_count % rc->goldenframe_rate) == 0 || frame_type == OD_I_FRAME;
560   *is_altref = (*ip_count % rc->altref_rate) == 0 || frame_type == OD_I_FRAME;
561   return frame_type;
562 }
563 
564 /*Count frames types forward from the current frame up to but not including
565    the last I-frame in reservoir_frame_delay.
566   If reservoir_frame_delay contains no I-frames (or the current frame is the
567    only I-frame), count all reservoir_frame_delay frames.
568   Returns the number of frames counted.
569   Right now, this implementation is simple, brute-force, and expensive.
570   It is also easy to understand and debug.
571   TODO: replace with a virtual FIFO that keeps running totals as
572    repeating the counting over-and-over will have a performance impact on
573    whole-file 2pass usage.*/
frame_type_count(od_rc_state * rc,int nframes[OD_FRAME_NSUBTYPES])574 static int frame_type_count(od_rc_state *rc, int nframes[OD_FRAME_NSUBTYPES]) {
575   int i;
576   int j;
577   int acc[OD_FRAME_NSUBTYPES];
578   int count;
579   int reservoir_frames;
580   int reservoir_frame_delay;
581   memset(nframes, 0, OD_FRAME_NSUBTYPES * sizeof(*nframes));
582   memset(acc, 0, sizeof(acc));
583   count = 0;
584   reservoir_frames = 0;
585 #if 1
586   /*Go ahead and count past end-of-stream.
587     We won't nail the exact bitrate on short files that end with a partial
588      GOP, but we also won't [potentially] destroy the quality of the last few
589      frames in that same case when we suddenly find out the stream is ending
590      before the original planning horizon.*/
591   reservoir_frame_delay = rc->reservoir_frame_delay;
592 #else
593   /*Don't count past the end of the stream (once we know where end-of-stream
594      is).*/
595   reservoir_frame_delay =
596       rc->end_of_input ? rc->input_size + 1 : rc->reservoir_frame_delay;
597 #endif
598   for (i = 0; i < reservoir_frame_delay; i++) {
599     int frame_type;
600     int is_golden;
601     int is_altref;
602     int64_t dummy;
603     frame_type =
604         od_frame_type(rc, rc->cur_frame + i, &is_golden, &is_altref, &dummy);
605     switch (frame_type) {
606       case OD_I_FRAME: {
607         for (j = 0; j < OD_FRAME_NSUBTYPES; j++) nframes[j] += acc[j];
608         reservoir_frames += count;
609         memset(acc, 0, sizeof(acc));
610         acc[OD_I_FRAME] = 1;
611         count = 1;
612         break;
613       }
614       case OD_P_FRAME: {
615         if (is_golden) {
616           ++acc[OD_GOLDEN_P_FRAME];
617           ++count;
618         } else if (is_altref) {
619           ++acc[OD_ALTREF_P_FRAME];
620           ++count;
621         } else {
622           ++acc[OD_P_FRAME];
623           ++count;
624         }
625         break;
626       }
627     }
628   }
629   /*If there were no I-frames at all, or only the first frame was an I-frame,
630      the accumulators never flushed and still contain the counts for the
631      entire buffer.
632     In both these cases, we return these counts.
633     Otherwise, we discard what remains in the accumulators as they contain
634      the counts from and past the last I-frame.*/
635   if (reservoir_frames == 0) {
636     for (i = 0; i < OD_FRAME_NSUBTYPES; i++) nframes[i] = acc[i];
637     reservoir_frames += count;
638   }
639   return reservoir_frames;
640 }
641 
convert_to_ac_quant(int q,int bit_depth)642 static int convert_to_ac_quant(int q, int bit_depth) {
643   return lrint(av1_convert_qindex_to_q(q, bit_depth));
644 }
645 
od_enc_rc_select_quantizers_and_lambdas(od_rc_state * rc,int is_golden_frame,int is_altref_frame,int frame_type,int * bottom_idx,int * top_idx)646 int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
647                                             int is_golden_frame,
648                                             int is_altref_frame, int frame_type,
649                                             int *bottom_idx, int *top_idx) {
650   int frame_subtype;
651   int64_t log_cur_scale;
652   int lossy_quantizer_min;
653   int lossy_quantizer_max;
654   double mqp_i = OD_MQP_I;
655   double mqp_p = OD_MQP_P;
656   double mqp_gp = OD_MQP_GP;
657   double mqp_ap = OD_MQP_AP;
658   int reservoir_frames;
659   int nframes[OD_FRAME_NSUBTYPES];
660   int32_t mqp_Q12[OD_FRAME_NSUBTYPES];
661   int64_t dqp_Q45[OD_FRAME_NSUBTYPES];
662   /*Verify the closed-form frame type determination code matches what the
663      input queue set.*/
664   /*One pseudo-non-closed-form caveat:
665     Once we've seen end-of-input, the batched frame determination code
666      suppresses the last open-GOP's I-frame (since it would only be
667      useful for the next GOP, which doesn't exist).
668      Thus, don't check one the input queue is drained.*/
669   if (!rc->end_of_input) {
670     int closed_form_type;
671     int closed_form_golden;
672     int closed_form_altref;
673     int64_t closed_form_cur_frame;
674     closed_form_type =
675         od_frame_type(rc, rc->cur_frame, &closed_form_golden,
676                       &closed_form_altref, &closed_form_cur_frame);
677     OD_UNUSED(closed_form_type);
678     OD_UNUSED(is_altref_frame);
679     assert(closed_form_type == frame_type);
680     assert(closed_form_cur_frame == rc->cur_frame);
681     assert(closed_form_altref == is_altref_frame);
682     assert(closed_form_golden == is_golden_frame);
683   }
684 
685   log_cur_scale = (int64_t)rc->scalefilter[frame_type].y[0] << 33;
686 
687   /*Count the various types and classes of frames.*/
688   reservoir_frames = frame_type_count(rc, nframes);
689   nframes[OD_I_FRAME] = od_rc_scale_drop(rc, OD_I_FRAME, nframes[OD_I_FRAME]);
690   nframes[OD_P_FRAME] = od_rc_scale_drop(rc, OD_P_FRAME, nframes[OD_P_FRAME]);
691   nframes[OD_GOLDEN_P_FRAME] =
692       od_rc_scale_drop(rc, OD_GOLDEN_P_FRAME, nframes[OD_GOLDEN_P_FRAME]);
693   nframes[OD_ALTREF_P_FRAME] =
694       od_rc_scale_drop(rc, OD_ALTREF_P_FRAME, nframes[OD_ALTREF_P_FRAME]);
695 
696   switch (rc->twopass_state) {
697     default: break;
698     case 1: {
699       /*Pass 1 mode: use a fixed qi value.*/
700       return rc->firstpass_quant;
701     } break;
702     case 2: {
703       int i;
704       int64_t scale_sum[OD_FRAME_NSUBTYPES];
705       int qti;
706       /*Pass 2 mode: we know exactly how much of each frame type there is in
707          the current buffer window, and have estimates for the scales.*/
708       for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
709         nframes[i] = rc->nframes[i];
710         nframes[i] = rc->nframes[i];
711         scale_sum[i] = rc->scale_sum[i];
712       }
713       /*If we're not using the same frame type as in pass 1 (because someone
714          changed the keyframe interval), remove that scale estimate.
715         We'll add in a replacement for the correct frame type below.*/
716       qti = rc->cur_metrics.frame_type;
717       if (qti != frame_type) {
718         nframes[qti]--;
719         scale_sum[qti] -= od_bexp64_q24(rc->cur_metrics.log_scale);
720       }
721       /*Compute log_scale estimates for each frame type from the pass-1 scales
722          we measured in the current window.*/
723       for (qti = 0; qti < OD_FRAME_NSUBTYPES; qti++) {
724         rc->log_scale[qti] = nframes[qti] > 0
725                                  ? od_blog64(scale_sum[qti]) -
726                                        od_blog64(nframes[qti]) - OD_Q57(24)
727                                  : -rc->log_npixels;
728       }
729       /*If we're not using the same frame type as in pass 1, add a scale
730          estimate for the corresponding frame using the current low-pass
731          filter value.
732         This is mostly to ensure we have a valid estimate even when pass 1 had
733          no frames of this type in the buffer window.
734         TODO: We could also plan ahead and figure out how many keyframes we'll
735          be forced to add in the current buffer window.*/
736       qti = rc->cur_metrics.frame_type;
737       if (qti != frame_type) {
738         int64_t scale;
739         scale = rc->log_scale[frame_type] < OD_Q57(23)
740                     ? od_bexp64(rc->log_scale[frame_type] + OD_Q57(24))
741                     : 0x7FFFFFFFFFFFLL;
742         scale *= nframes[frame_type];
743         nframes[frame_type]++;
744         scale += od_bexp64_q24(log_cur_scale >> 33);
745         rc->log_scale[frame_type] =
746             od_blog64(scale) - od_blog64(nframes[qti]) - OD_Q57(24);
747       } else {
748         log_cur_scale = (int64_t)rc->cur_metrics.log_scale << 33;
749       }
750     } break;
751   }
752 
753   /*Quantizer selection sticks to the codable, lossy portion of the quantizer
754     range.*/
755   lossy_quantizer_min = convert_to_ac_quant(rc->minq, rc->bit_depth);
756   lossy_quantizer_max = convert_to_ac_quant(rc->maxq, rc->bit_depth);
757   frame_subtype = frame_type;
758   /*Stash quantizer modulation by frame type.*/
759   mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
760   mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
761   mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
762   mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
763   dqp_Q45[OD_I_FRAME] = OD_F_Q45(OD_DQP_I);
764   dqp_Q45[OD_P_FRAME] = OD_F_Q45(OD_DQP_P);
765   dqp_Q45[OD_GOLDEN_P_FRAME] = OD_F_Q45(OD_DQP_GP);
766   dqp_Q45[OD_ALTREF_P_FRAME] = OD_F_Q45(OD_DQP_AP);
767   /*Is rate control active?*/
768   if (rc->target_bitrate <= 0) {
769     /*Rate control is not active; derive quantizer directly from
770       quality parameter and frame type. */
771     /*Can't use the OD_LOSSLESS macro, as it uses state.quantizer to intuit,
772       and we've not set it yet.*/
773     if (rc->quality == 0) {
774       /*Lossless coding requested.*/
775       rc->base_quantizer = 0;
776       rc->target_quantizer = 0;
777     } else {
778       int64_t log_quantizer;
779 
780       /* Adjust the modulation constants using the last frame's quantizer. */
781       double mqp_delta = (255 - rc->target_quantizer) / 2000.0f;
782       mqp_i -= mqp_delta;
783       mqp_p += mqp_delta;
784       mqp_gp -= mqp_delta;
785       mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
786       mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
787       mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
788       mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
789 
790       if (rc->quality == -1) {
791         /*A quality of -1 means quality was unset; use a default.*/
792         rc->base_quantizer = convert_to_ac_quant(10, rc->bit_depth);
793       } else {
794         rc->base_quantizer = convert_to_ac_quant(rc->quality, rc->bit_depth);
795       }
796 
797       if (rc->periodic_boosts && !is_golden_frame) {
798         int pattern_rate = (rc->goldenframe_rate >> 1);
799         int dist_to_golden = rc->cur_frame % pattern_rate;
800         int dist_away_golden = pattern_rate - dist_to_golden;
801         int boost = dist_to_golden;
802         if (dist_away_golden > dist_to_golden) boost = dist_away_golden;
803         boost -= pattern_rate;
804         boost *= (rc->base_quantizer) / OD_PERIODIC_BOOST_DIV;
805         rc->base_quantizer = rc->base_quantizer + boost;
806       }
807 
808       /*As originally written, qp modulation is applied to the coded quantizer.
809         Because we now have and use a more precise target quantizer for various
810         calculation, that needs to be modulated as well.
811         Calculate what is, effectively, a fractional coded quantizer. */
812       /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
813       log_quantizer = od_blog64(rc->base_quantizer) - OD_Q57(OD_COEFF_SHIFT);
814       /*log_quantizer to Q21.*/
815       log_quantizer >>= 36;
816       /*scale log quantizer, result is Q33.*/
817       log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
818       /*Add Q33 offset to Q33 log_quantizer.*/
819       log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
820       /*Modulate quantizer according to frame type; result is Q45.*/
821       log_quantizer *= mqp_Q12[frame_subtype];
822       /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
823       log_quantizer += dqp_Q45[frame_subtype];
824       /*Back to log2 quantizer in Q57.*/
825       log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
826                           OD_LOG_QUANTIZER_EXP_Q12 +
827                       OD_Q57(OD_COEFF_SHIFT);
828       /*Convert Q57 log2 quantizer to unclamped linear target quantizer value.*/
829       rc->target_quantizer = od_bexp64(log_quantizer);
830     }
831   } else {
832     int clamp;
833     int64_t rate_bias;
834     int64_t rate_total;
835     int base_quantizer;
836     int64_t log_quantizer;
837     int qlo;
838     int qhi;
839     int i;
840     /*We clamp the allowed amount of qi change (after initialization).*/
841     clamp = rc->cur_frame > 0;
842     /*Figure out how to re-distribute bits so that we hit our fullness target
843        before the last keyframe in our current buffer window (after the current
844        frame), or the end of the buffer window, whichever comes first.*/
845     /*Single pass only right now.*/
846     /*If we've been missing our target, add a penalty term.*/
847     rate_bias = (rc->rate_bias / (rc->cur_frame + 1000)) * reservoir_frames;
848     /*rate_total is the total bits available over the next
849        reservoir_frames frames.*/
850     rate_total = rc->reservoir_fullness - rc->reservoir_target + rate_bias +
851                  reservoir_frames * rc->bits_per_frame;
852     /*Find a target quantizer that meets our rate target for the specific mix
853        of frame types we'll have over the next frame_delay frames.
854       We model the rate<->quantizer relationship as:
855        rate = scale*(quantizer**-exp)
856       In this case, we have our desired rate, an exponent selected in setup,
857        and a scale that's been measured over our frame history, so we're
858        solving for the quantizer.
859       Exponentiation with arbitrary exponents is expensive, so we work in
860        the binary log domain (binary exp and log aren't too bad):
861        rate = e2(log2_scale - log2_quantizer * exp)
862       There's no easy closed form solution, so we bisection search for it.*/
863     /*We do not currently allow rate control to select lossless encoding.*/
864     qlo = 1;
865     /*If there's a quality specified, it's used to select the
866        coarsest base quantizer we can select.
867       Otherwise we can use up to and including the coarsest codable
868        quantizer.*/
869     if (rc->quality > 0)
870       qhi = convert_to_ac_quant(rc->quality, rc->bit_depth);
871     else
872       qhi = lossy_quantizer_max;
873     base_quantizer = (qlo + qhi) >> 1;
874     while (qlo < qhi) {
875       volatile int64_t log_base_quantizer;
876       int64_t diff;
877       int64_t bits;
878       /*Count bits contributed by each frame type using the model.*/
879       bits = 0;
880       log_base_quantizer = od_blog64(base_quantizer);
881       for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
882         /*Modulate base quantizer by frame type.*/
883         /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
884         log_quantizer = log_base_quantizer - OD_Q57(OD_COEFF_SHIFT);
885         /*log_quantizer to Q21.*/
886         log_quantizer >>= 36;
887         /*scale log quantizer, result is Q33.*/
888         log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
889         /*Add Q33 offset to Q33 log_quantizer.*/
890         log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
891         /*Modulate quantizer according to frame type; result is Q45.*/
892         log_quantizer *= mqp_Q12[i];
893         /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
894         log_quantizer += dqp_Q45[i];
895         /*Back to log2 quantizer in Q57.*/
896         log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
897                             OD_LOG_QUANTIZER_EXP_Q12 +
898                         OD_Q57(OD_COEFF_SHIFT);
899         /*Clamp modulated quantizer values.*/
900         log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
901                                   od_blog64(lossy_quantizer_max));
902         /* All the fields here are Q57 except for the exponent which is Q6.*/
903         bits += nframes[i] * od_bexp64(rc->log_scale[i] + rc->log_npixels -
904                                        (log_quantizer >> 6) * rc->exp[i]);
905       }
906       diff = bits - rate_total;
907       if (diff > 0) {
908         qlo = base_quantizer + 1;
909       } else if (diff < 0) {
910         qhi = base_quantizer - 1;
911       } else {
912         break;
913       }
914       base_quantizer = (qlo + qhi) >> 1;
915     }
916     /*If this was not one of the initial frames, limit the change in base
917        quantizer to within [0.8*Q,1.2*Q], where Q is the previous frame's
918        base quantizer.*/
919     if (clamp) {
920       base_quantizer = OD_CLAMPI((rc->base_quantizer * 0x0CCCD + 0x8000) >> 16,
921                                  base_quantizer,
922                                  (rc->base_quantizer * 0x13333 + 0x8000) >> 16);
923     }
924     /*Modulate chosen base quantizer to produce target quantizer.*/
925     log_quantizer = od_blog64(base_quantizer);
926     /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
927     log_quantizer -= OD_Q57(OD_COEFF_SHIFT);
928     /*log_quantizer to Q21.*/
929     log_quantizer >>= 36;
930     /*scale log quantizer, result is Q33.*/
931     log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
932     /*Add Q33 offset to Q33 log_quantizer.*/
933     log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
934     /*Modulate quantizer according to frame type; result is Q45.*/
935     log_quantizer *= mqp_Q12[frame_subtype];
936     /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
937     log_quantizer += dqp_Q45[frame_subtype];
938     /*Back to log2 quantizer in Q57.*/
939     log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
940                         OD_LOG_QUANTIZER_EXP_Q12 +
941                     OD_Q57(OD_COEFF_SHIFT);
942     /*Clamp modulated quantizer values.*/
943     log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
944                               od_blog64(lossy_quantizer_max));
945     /*The above allocation looks only at the total rate we'll accumulate in
946        the next reservoir_frame_delay frames.
947       However we could overflow the bit reservoir on the very next frame, so
948        check for that here if we're not using a soft target.*/
949     if (rc->cap_overflow) {
950       int64_t margin;
951       int64_t soft_limit;
952       int64_t log_soft_limit;
953       int64_t log_scale_pixels;
954       int64_t exp;
955       int64_t log_qexp;
956       /*Allow 3% of the buffer for prediction error.
957         This should be plenty, and we don't mind if we go a bit over; we only
958          want to keep these bits from being completely wasted.*/
959       margin = (rc->reservoir_max + 31) >> 5;
960       /*We want to use at least this many bits next frame.*/
961       soft_limit = rc->reservoir_fullness + rc->bits_per_frame -
962                    (rc->reservoir_max - margin);
963       log_soft_limit = od_blog64(soft_limit);
964       /*If we're predicting we won't use that many bits...*/
965       log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
966       exp = rc->exp[frame_subtype];
967       log_qexp = (log_quantizer >> 6) * exp;
968       if (log_scale_pixels - log_qexp < log_soft_limit) {
969         /*Scale the adjustment based on how far into the margin we are.*/
970         log_qexp += ((log_scale_pixels - log_soft_limit - log_qexp) >> 32) *
971                     (OD_MINI(margin, soft_limit) << 32) / margin;
972         log_quantizer = (((log_qexp + (exp >> 1)) / exp) << 6);
973       }
974     }
975     /*We just checked we don't overflow the reservoir next frame, now check
976        we don't underflow and bust the budget (when not using a soft target).
977       Disabled when a quality bound is set; if we saturate quantizer to the
978        maximum possible size when we have a limiting max quality, the
979        resulting lambda can cause strange behavior.*/
980     if (rc->quality == -1) {
981       int64_t exp;
982       int64_t log_qexp;
983       int64_t log_scale_pixels;
984       int64_t log_hard_limit;
985       /*Compute the maximum number of bits we can use in the next frame.
986         Allow 50% of the rate for a single frame for prediction error.
987         This may not be enough for keyframes or sudden changes in
988          complexity.*/
989       log_hard_limit =
990           od_blog64(rc->reservoir_fullness + (rc->bits_per_frame >> 1));
991       /*If we're predicting we'll use more than this...*/
992       log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
993       exp = rc->exp[frame_subtype];
994       log_qexp = (log_quantizer >> 6) * exp;
995       if (log_scale_pixels - log_qexp > log_hard_limit) {
996         /*Force the target to hit our limit exactly.*/
997         log_qexp = log_scale_pixels - log_hard_limit;
998         log_quantizer = (log_qexp + (exp >> 1)) / exp << 6;
999         /*If that target is unreasonable, oh well; we'll have to drop.*/
1000         log_quantizer = OD_MAXI(log_quantizer, od_blog64(lossy_quantizer_max));
1001       }
1002     }
1003     /*Compute a final estimate of the number of bits we plan to use, update
1004        the running rate bias measurement.*/
1005     {
1006       int64_t log_qexp;
1007       int64_t log_scale_pixels;
1008       log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
1009       log_qexp = (log_quantizer >> 6) * rc->exp[frame_subtype];
1010       rc->rate_bias += od_bexp64(log_scale_pixels - log_qexp);
1011     }
1012     rc->target_quantizer = od_bexp64(log_quantizer);
1013     /*The various cappings and adjustments may have altered the log_quantizer
1014        target significantly.
1015       We can either update the base quantizer to be consistent with the
1016        target or let it track separately.
1017       Theora behavior effectively keeps them consistent, as it regenerates
1018        the effective base quantizer from the target each frame rather than
1019        saving both.
1020       For Daala, it's easier to allow them to track separately.
1021       For now, allow them to track separately and see how it behaves.*/
1022     rc->base_quantizer = base_quantizer;
1023   }
1024   *bottom_idx = lossy_quantizer_min;
1025   *top_idx = lossy_quantizer_max;
1026   rc->target_quantizer = av1_qindex_from_ac(
1027       OD_CLAMPI(lossy_quantizer_min, rc->target_quantizer, lossy_quantizer_max),
1028       rc->bit_depth);
1029   return rc->target_quantizer;
1030 }
1031 
od_enc_rc_update_state(od_rc_state * rc,int64_t bits,int is_golden_frame,int is_altref_frame,int frame_type,int droppable)1032 int od_enc_rc_update_state(od_rc_state *rc, int64_t bits, int is_golden_frame,
1033                            int is_altref_frame, int frame_type, int droppable) {
1034   int dropped;
1035   dropped = 0;
1036   /*Update rate control only if rate control is active.*/
1037   if (rc->target_bitrate > 0) {
1038     int64_t log_scale;
1039     int frame_subtype;
1040     frame_subtype = frame_type;
1041     /*Track non-golden and golden P frame drops separately.*/
1042     if (is_golden_frame && frame_type == OD_P_FRAME)
1043       frame_subtype = OD_GOLDEN_P_FRAME;
1044     else if (is_altref_frame && frame_type == OD_P_FRAME)
1045       frame_subtype = OD_ALTREF_P_FRAME;
1046     if (bits <= 0) {
1047       /*We didn't code any blocks in this frame.*/
1048       log_scale = OD_Q57(-64);
1049       bits = 0;
1050       ++rc->prev_drop_count[frame_subtype];
1051     } else {
1052       int64_t log_bits;
1053       int64_t log_qexp;
1054       /*Compute the estimated scale factor for this frame type.*/
1055       log_bits = od_blog64(bits);
1056       log_qexp = od_blog64(rc->target_quantizer);
1057       log_qexp = (log_qexp >> 6) * (rc->exp[frame_type]);
1058       log_scale = OD_MINI(log_bits - rc->log_npixels + log_qexp, OD_Q57(16));
1059     }
1060 
1061     switch (rc->twopass_state) {
1062       case 1: {
1063         int golden, altref;
1064         int64_t ipc;
1065         rc->cur_metrics.frame_type =
1066             od_frame_type(rc, rc->cur_frame, &golden, &altref, &ipc);
1067         /*Pass 1 mode: save the metrics for this frame.*/
1068         rc->cur_metrics.log_scale = od_q57_to_q24(log_scale);
1069       } break;
1070       case 2: {
1071         /*Pass 2 mode:*/
1072         int m_frame_type = rc->cur_metrics.frame_type;
1073         rc->nframes[m_frame_type]--;
1074         rc->scale_sum[m_frame_type] -= od_bexp64_q24(rc->cur_metrics.log_scale);
1075       } break;
1076     }
1077 
1078     if (bits > 0) {
1079       od_iir_bessel2 *f;
1080       /*If this is the first example of the given frame type we've
1081          seen, we immediately replace the default scale factor guess
1082          with the estimate we just computed using the first frame.*/
1083       if (rc->frame_count[frame_type] == 0) {
1084         f = rc->scalefilter + frame_type;
1085         f->y[1] = f->y[0] = f->x[1] = f->x[0] = od_q57_to_q24(log_scale);
1086         rc->log_scale[frame_type] = log_scale;
1087       } else {
1088         /*Lengthen the time constant for the inter filters as we collect more
1089            frame statistics, until we reach our target.*/
1090         if (frame_type != OD_I_FRAME &&
1091             rc->inter_p_delay < rc->inter_delay_target &&
1092             rc->frame_count[frame_type] >= rc->inter_p_delay) {
1093           od_iir_bessel2_reinit(&rc->scalefilter[frame_type],
1094                                 ++rc->inter_p_delay);
1095         }
1096         /*Update the low-pass scale filter for this frame type
1097            regardless of whether or not we drop this frame.*/
1098         rc->log_scale[frame_type] =
1099             od_iir_bessel2_update(rc->scalefilter + frame_type,
1100                                   od_q57_to_q24(log_scale))
1101             << 33;
1102       }
1103       /*If this frame busts our budget, it must be dropped.*/
1104       if (droppable && rc->reservoir_fullness + rc->bits_per_frame < bits) {
1105         ++rc->prev_drop_count[frame_subtype];
1106         bits = 0;
1107         dropped = 1;
1108       } else {
1109         uint32_t drop_count;
1110         /*Update a low-pass filter to estimate the "real" frame rate taking
1111            drops into account.
1112           This is only done if the frame is coded, as it needs the final
1113            count of dropped frames.*/
1114         drop_count = rc->prev_drop_count[frame_subtype] + 1;
1115         if (drop_count > 0x7F) {
1116           drop_count = 0x7FFFFFFF;
1117         } else {
1118           drop_count <<= 24;
1119         }
1120         rc->log_drop_scale[frame_subtype] =
1121             od_blog64(od_iir_bessel2_update(rc->vfrfilter + frame_subtype,
1122                                             drop_count)) -
1123             OD_Q57(24);
1124         /*Zero the drop count for this frame.
1125           It will be increased if we drop frames.*/
1126         rc->prev_drop_count[frame_subtype] = 0;
1127       }
1128       /*Increment the frame count for filter adaptation purposes.*/
1129       if (!rc->twopass_state) rc->frame_count[frame_type]++;
1130     }
1131     rc->reservoir_fullness += rc->bits_per_frame - bits;
1132     /*If we're too quick filling the buffer and overflow is capped,
1133       that rate is lost forever.*/
1134     if (rc->cap_overflow && rc->reservoir_fullness > rc->reservoir_max) {
1135       rc->reservoir_fullness = rc->reservoir_max;
1136     }
1137     /*If we're too quick draining the buffer and underflow is capped,
1138       don't try to make up that rate later.*/
1139     if (rc->cap_underflow && rc->reservoir_fullness < 0) {
1140       rc->reservoir_fullness = 0;
1141     }
1142     /*Adjust the bias for the real bits we've used.*/
1143     rc->rate_bias -= bits;
1144   }
1145   return dropped;
1146 }
1147 
od_rc_buffer_val(od_rc_state * rc,int64_t val,int bytes)1148 static INLINE void od_rc_buffer_val(od_rc_state *rc, int64_t val, int bytes) {
1149   while (bytes-- > 0) {
1150     rc->twopass_buffer[rc->twopass_buffer_bytes++] = (uint8_t)(val & 0xFF);
1151     val >>= 8;
1152   }
1153 }
1154 
od_rc_unbuffer_val(od_rc_state * rc,int bytes)1155 static INLINE int64_t od_rc_unbuffer_val(od_rc_state *rc, int bytes) {
1156   int64_t ret = 0;
1157   int shift = 0;
1158   while (bytes-- > 0) {
1159     ret |= ((int64_t)rc->twopass_buffer[rc->twopass_buffer_bytes++]) << shift;
1160     shift += 8;
1161   }
1162   return ret;
1163 }
1164 
od_enc_rc_2pass_out(od_rc_state * rc,struct aom_codec_pkt_list * pkt_list,int summary)1165 int od_enc_rc_2pass_out(od_rc_state *rc, struct aom_codec_pkt_list *pkt_list,
1166                         int summary) {
1167   int i;
1168   struct aom_codec_cx_pkt pkt;
1169   rc->twopass_buffer = rc->firstpass_buffer;
1170   rc->twopass_buffer_bytes = 0;
1171   if (!rc->twopass_state) {
1172     rc->twopass_state = 1;
1173     for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
1174       rc->frame_count[i] = 0;
1175       rc->exp[i] = 0;
1176       rc->scale_sum[i] = 0;
1177     }
1178   }
1179   if (summary) {
1180     od_rc_buffer_val(rc, OD_RC_2PASS_MAGIC, 4);
1181     od_rc_buffer_val(rc, OD_RC_2PASS_VERSION, 1);
1182     for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
1183       od_rc_buffer_val(rc, rc->frame_count[i], 4);
1184       od_rc_buffer_val(rc, rc->exp[i], 4);
1185       od_rc_buffer_val(rc, rc->scale_sum[i], 8);
1186     }
1187   } else {
1188     int frame_type = rc->cur_metrics.frame_type;
1189     rc->scale_sum[frame_type] += od_bexp64_q24(rc->cur_metrics.log_scale);
1190     rc->frame_count[frame_type]++;
1191     od_rc_buffer_val(rc, rc->cur_metrics.frame_type, 1);
1192     od_rc_buffer_val(rc, rc->cur_metrics.log_scale, 4);
1193   }
1194   pkt.data.twopass_stats.buf = rc->firstpass_buffer;
1195   pkt.data.twopass_stats.sz = rc->twopass_buffer_bytes;
1196   pkt.kind = AOM_CODEC_STATS_PKT;
1197   aom_codec_pkt_list_add(pkt_list, &pkt);
1198   return 0;
1199 }
1200 
od_enc_rc_2pass_in(od_rc_state * rc)1201 int od_enc_rc_2pass_in(od_rc_state *rc) {
1202   /* Enable pass 2 mode if this is the first call. */
1203   if (rc->twopass_state == 0) {
1204     uint32_t i, total_frames = 0;
1205 
1206     if (!rc->twopass_allframes_buf ||
1207         rc->twopass_allframes_buf_size < OD_RC_2PASS_MIN)
1208       return -1;
1209 
1210     /* Find summary packet at the end */
1211     rc->twopass_buffer = rc->twopass_allframes_buf;
1212     rc->twopass_buffer +=
1213         rc->twopass_allframes_buf_size - OD_RC_2PASS_SUMMARY_SZ;
1214     rc->twopass_buffer_bytes = 0;
1215 
1216     if (od_rc_unbuffer_val(rc, 4) != OD_RC_2PASS_MAGIC) return -1;
1217     if (od_rc_unbuffer_val(rc, 1) != OD_RC_2PASS_VERSION) return -1;
1218 
1219     for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
1220       rc->frame_count[i] = od_rc_unbuffer_val(rc, 4);
1221       rc->exp[i] = od_rc_unbuffer_val(rc, 4);
1222       rc->scale_sum[i] = od_rc_unbuffer_val(rc, 8);
1223       rc->nframes[i] = rc->frame_count[i];
1224       total_frames += rc->frame_count[i];
1225     }
1226 
1227     if (total_frames < 1) return -1;
1228 
1229     if (total_frames * OD_RC_2PASS_PACKET_SZ > rc->twopass_allframes_buf_size)
1230       return -1;
1231 
1232     od_enc_rc_reset(rc);
1233 
1234     /* Everything looks ok */
1235     rc->twopass_buffer = rc->twopass_allframes_buf;
1236     rc->twopass_state = 2;
1237     rc->twopass_buffer_bytes = 0;
1238   }
1239 
1240   rc->cur_metrics.frame_type = od_rc_unbuffer_val(rc, 1);
1241   rc->cur_metrics.log_scale = od_rc_unbuffer_val(rc, 4);
1242 
1243   return 0;
1244 }
1245