1 /*
2 * Copyright (c) 2001-2017, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <limits.h>
16 #include "av1/common/odintrin.h"
17 #include "av1/encoder/ratectrl_xiph.h"
18
19 #define OD_Q57(v) ((int64_t)((uint64_t)(v) << 57))
20 #define OD_F_Q45(v) ((int64_t)(((v) * ((int64_t)1 << 45))))
21 #define OD_F_Q12(v) ((int32_t)(((v) * ((int32_t)1 << 12))))
22
23 /*A rough lookup table for tan(x), 0 <= x < pi/2.
24 The values are Q12 fixed-point and spaced at 5 degree intervals.
25 These decisions are somewhat arbitrary, but sufficient for the 2nd order
26 Bessel follower below.
27 Values of x larger than 85 degrees are extrapolated from the last interval,
28 which is way off, but "good enough".*/
29 static uint16_t OD_ROUGH_TAN_LOOKUP[18] = { 0, 358, 722, 1098, 1491,
30 1910, 2365, 2868, 3437, 4096,
31 4881, 5850, 7094, 8784, 11254,
32 15286, 23230, 46817 };
33
34 /*alpha is Q24 in the range [0,0.5).
35 The return values is 5.12.*/
od_warp_alpha(int alpha)36 static int od_warp_alpha(int alpha) {
37 int i;
38 int d;
39 int t0;
40 int t1;
41 i = alpha * 36 >> 24;
42 if (i >= 17) i = 16;
43 t0 = OD_ROUGH_TAN_LOOKUP[i];
44 t1 = OD_ROUGH_TAN_LOOKUP[i + 1];
45 d = alpha * 36 - (i << 24);
46 return (int)((((int64_t)t0 << 32) + ((t1 - t0) << 8) * (int64_t)d) >> 32);
47 }
48
49 static const int64_t OD_ATANH_LOG2[32] = {
50 0x32B803473F7AD0F4LL, 0x2F2A71BD4E25E916LL, 0x2E68B244BB93BA06LL,
51 0x2E39FB9198CE62E4LL, 0x2E2E683F68565C8FLL, 0x2E2B850BE2077FC1LL,
52 0x2E2ACC58FE7B78DBLL, 0x2E2A9E2DE52FD5F2LL, 0x2E2A92A338D53EECLL,
53 0x2E2A8FC08F5E19B6LL, 0x2E2A8F07E51A485ELL, 0x2E2A8ED9BA8AF388LL,
54 0x2E2A8ECE2FE7384ALL, 0x2E2A8ECB4D3E4B1ALL, 0x2E2A8ECA94940FE8LL,
55 0x2E2A8ECA6669811DLL, 0x2E2A8ECA5ADEDD6ALL, 0x2E2A8ECA57FC347ELL,
56 0x2E2A8ECA57438A43LL, 0x2E2A8ECA57155FB4LL, 0x2E2A8ECA5709D510LL,
57 0x2E2A8ECA5706F267LL, 0x2E2A8ECA570639BDLL, 0x2E2A8ECA57060B92LL,
58 0x2E2A8ECA57060008LL, 0x2E2A8ECA5705FD25LL, 0x2E2A8ECA5705FC6CLL,
59 0x2E2A8ECA5705FC3ELL, 0x2E2A8ECA5705FC33LL, 0x2E2A8ECA5705FC30LL,
60 0x2E2A8ECA5705FC2FLL, 0x2E2A8ECA5705FC2FLL
61 };
62
od_ilog64(int64_t v)63 static int od_ilog64(int64_t v) {
64 static const unsigned char OD_DEBRUIJN_IDX64[64] = {
65 0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
66 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
67 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
68 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
69 };
70 int ret;
71 v |= v >> 1;
72 v |= v >> 2;
73 v |= v >> 4;
74 v |= v >> 8;
75 v |= v >> 16;
76 v |= v >> 32;
77 ret = (int)v & 1;
78 v = (v >> 1) + 1;
79 ret += OD_DEBRUIJN_IDX64[v * UINT64_C(0x218A392CD3D5DBF) >> 58 & 0x3F];
80 return ret;
81 }
82
83 /*Computes the binary exponential of logq57.
84 input: a log base 2 in Q57 format
85 output: a 64 bit integer in Q0 (no fraction) */
od_bexp64(int64_t logq57)86 static int64_t od_bexp64(int64_t logq57) {
87 int64_t w;
88 int64_t z;
89 int ipart;
90 ipart = (int)(logq57 >> 57);
91 if (ipart < 0) return 0;
92 if (ipart >= 63) return 0x7FFFFFFFFFFFFFFFLL;
93 z = logq57 - OD_Q57(ipart);
94 if (z) {
95 int64_t mask;
96 int64_t wlo;
97 int i;
98 /*C doesn't give us 64x64->128 muls, so we use CORDIC.
99 This is not particularly fast, but it's not being used in time-critical
100 code; it is very accurate.*/
101 /*z is the fractional part of the log in Q62 format.
102 We need 1 bit of headroom since the magnitude can get larger than 1
103 during the iteration, and a sign bit.*/
104 z <<= 5;
105 /*w is the exponential in Q61 format (since it also needs headroom and can
106 get as large as 2.0); we could get another bit if we dropped the sign,
107 but we'll recover that bit later anyway.
108 Ideally this should start out as
109 \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
110 but in order to guarantee convergence we have to repeat iterations 4,
111 13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
112 w = 0x26A3D0E401DD846DLL;
113 for (i = 0;; i++) {
114 mask = -(z < 0);
115 w += ((w >> (i + 1)) + mask) ^ mask;
116 z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
117 /*Repeat iteration 4.*/
118 if (i >= 3) break;
119 z *= 2;
120 }
121 for (;; i++) {
122 mask = -(z < 0);
123 w += ((w >> (i + 1)) + mask) ^ mask;
124 z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
125 /*Repeat iteration 13.*/
126 if (i >= 12) break;
127 z *= 2;
128 }
129 for (; i < 32; i++) {
130 mask = -(z < 0);
131 w += ((w >> (i + 1)) + mask) ^ mask;
132 z = (z - ((OD_ATANH_LOG2[i] + mask) ^ mask)) * 2;
133 }
134 wlo = 0;
135 /*Skip the remaining iterations unless we really require that much
136 precision.
137 We could have bailed out earlier for smaller iparts, but that would
138 require initializing w from a table, as the limit doesn't converge to
139 61-bit precision until n=30.*/
140 if (ipart > 30) {
141 /*For these iterations, we just update the low bits, as the high bits
142 can't possibly be affected.
143 OD_ATANH_LOG2 has also converged (it actually did so one iteration
144 earlier, but that's no reason for an extra special case).*/
145 for (;; i++) {
146 mask = -(z < 0);
147 wlo += ((w >> i) + mask) ^ mask;
148 z -= (OD_ATANH_LOG2[31] + mask) ^ mask;
149 /*Repeat iteration 40.*/
150 if (i >= 39) break;
151 z <<= 1;
152 }
153 for (; i < 61; i++) {
154 mask = -(z < 0);
155 wlo += ((w >> i) + mask) ^ mask;
156 z = (z - ((OD_ATANH_LOG2[31] + mask) ^ mask)) << 1;
157 }
158 }
159 w = (w << 1) + wlo;
160 } else {
161 w = (int64_t)1 << 62;
162 }
163 if (ipart < 62) {
164 w = ((w >> (61 - ipart)) + 1) >> 1;
165 }
166 return w;
167 }
168
169 /*Computes the binary log of w
170 input: a 64-bit integer in Q0 (no fraction)
171 output: a 64-bit log in Q57 */
od_blog64(int64_t w)172 static int64_t od_blog64(int64_t w) {
173 int64_t z;
174 int ipart;
175 if (w <= 0) return -1;
176 ipart = od_ilog64(w) - 1;
177 if (ipart > 61) {
178 w >>= ipart - 61;
179 } else {
180 w <<= 61 - ipart;
181 }
182 z = 0;
183 if (w & (w - 1)) {
184 int64_t x;
185 int64_t y;
186 int64_t u;
187 int64_t mask;
188 int i;
189 /*C doesn't give us 64x64->128 muls, so we use CORDIC.
190 This is not particularly fast, but it's not being used in time-critical
191 code; it is very accurate.*/
192 /*z is the fractional part of the log in Q61 format.*/
193 /*x and y are the cosh() and sinh(), respectively, in Q61 format.
194 We are computing z = 2*atanh(y/x) = 2*atanh((w - 1)/(w + 1)).*/
195 x = w + ((int64_t)1 << 61);
196 y = w - ((int64_t)1 << 61);
197 for (i = 0; i < 4; i++) {
198 mask = -(y < 0);
199 z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
200 u = x >> (i + 1);
201 x -= ((y >> (i + 1)) + mask) ^ mask;
202 y -= (u + mask) ^ mask;
203 }
204 /*Repeat iteration 4.*/
205 for (i--; i < 13; i++) {
206 mask = -(y < 0);
207 z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
208 u = x >> (i + 1);
209 x -= ((y >> (i + 1)) + mask) ^ mask;
210 y -= (u + mask) ^ mask;
211 }
212 /*Repeat iteration 13.*/
213 for (i--; i < 32; i++) {
214 mask = -(y < 0);
215 z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
216 u = x >> (i + 1);
217 x -= ((y >> (i + 1)) + mask) ^ mask;
218 y -= (u + mask) ^ mask;
219 }
220 /*OD_ATANH_LOG2 has converged.*/
221 for (; i < 40; i++) {
222 mask = -(y < 0);
223 z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
224 u = x >> (i + 1);
225 x -= ((y >> (i + 1)) + mask) ^ mask;
226 y -= (u + mask) ^ mask;
227 }
228 /*Repeat iteration 40.*/
229 for (i--; i < 62; i++) {
230 mask = -(y < 0);
231 z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
232 u = x >> (i + 1);
233 x -= ((y >> (i + 1)) + mask) ^ mask;
234 y -= (u + mask) ^ mask;
235 }
236 z = (z + 8) >> 4;
237 }
238 return OD_Q57(ipart) + z;
239 }
240
241 /*Convenience function converts Q57 value to a clamped 32-bit Q24 value
242 in: input in Q57 format.
243 Return: same number in Q24 */
od_q57_to_q24(int64_t in)244 static int32_t od_q57_to_q24(int64_t in) {
245 int64_t ret;
246 ret = (in + ((int64_t)1 << 32)) >> 33;
247 /*0x80000000 is automatically converted to unsigned on 32-bit systems.
248 -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
249 unsigned.*/
250 return (int32_t)OD_CLAMPI(-0x7FFFFFFF - 1, ret, 0x7FFFFFFF);
251 }
252
253 /*Binary exponential of log_scale with 24-bit fractional precision and
254 saturation.
255 log_scale: A binary logarithm in Q57 format.
256 Return: The binary exponential in Q24 format, saturated to 2**31-1 if
257 log_scale was too large.*/
od_bexp64_q24(int64_t log_scale)258 static int32_t od_bexp64_q24(int64_t log_scale) {
259 if (log_scale < OD_Q57(8)) {
260 int64_t ret;
261 ret = od_bexp64(log_scale + OD_Q57(24));
262 return ret < 0x7FFFFFFF ? (int32_t)ret : 0x7FFFFFFF;
263 }
264 return 0x7FFFFFFF;
265 }
266
267 /*Re-initialize Bessel filter coefficients with the specified delay.
268 This does not alter the x/y state, but changes the reaction time of the
269 filter.
270 Altering the time constant of a reactive filter without alterning internal
271 state is something that has to be done carefuly, but our design operates at
272 high enough delays and with small enough time constant changes to make it
273 safe.*/
od_iir_bessel2_reinit(od_iir_bessel2 * f,int delay)274 static void od_iir_bessel2_reinit(od_iir_bessel2 *f, int delay) {
275 int alpha;
276 int64_t one48;
277 int64_t warp;
278 int64_t k1;
279 int64_t k2;
280 int64_t d;
281 int64_t a;
282 int64_t ik2;
283 int64_t b1;
284 int64_t b2;
285 /*This borrows some code from an unreleased version of Postfish.
286 See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
287 on deriving the filter coefficients.*/
288 /*alpha is Q24*/
289 alpha = (1 << 24) / delay;
290 one48 = (int64_t)1 << 48;
291 /*warp is 7.12*/
292 warp = OD_MAXI(od_warp_alpha(alpha), 1);
293 /*k1 is 9.12*/
294 k1 = 3 * warp;
295 /*k2 is 16.24.*/
296 k2 = k1 * warp;
297 /*d is 16.15.*/
298 d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9;
299 /*a is 0.32, since d is larger than both 1.0 and k2.*/
300 a = (k2 << 23) / d;
301 /*ik2 is 25.24.*/
302 ik2 = one48 / k2;
303 /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
304 b1 = 2 * a * (ik2 - (1 << 24));
305 /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
306 b2 = (one48 << 8) - ((4 * a) << 24) - b1;
307 /*All of the filter parameters are Q24.*/
308 f->c[0] = (int32_t)((b1 + ((int64_t)1 << 31)) >> 32);
309 f->c[1] = (int32_t)((b2 + ((int64_t)1 << 31)) >> 32);
310 f->g = (int32_t)((a + 128) >> 8);
311 }
312
313 /*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
314 and initial value.
315 value is Q24.*/
od_iir_bessel2_init(od_iir_bessel2 * f,int delay,int32_t value)316 static void od_iir_bessel2_init(od_iir_bessel2 *f, int delay, int32_t value) {
317 od_iir_bessel2_reinit(f, delay);
318 f->y[1] = f->y[0] = f->x[1] = f->x[0] = value;
319 }
320
od_iir_bessel2_update(od_iir_bessel2 * f,int32_t x)321 static int64_t od_iir_bessel2_update(od_iir_bessel2 *f, int32_t x) {
322 int64_t c0;
323 int64_t c1;
324 int64_t g;
325 int64_t x0;
326 int64_t x1;
327 int64_t y0;
328 int64_t y1;
329 int64_t ya;
330 c0 = f->c[0];
331 c1 = f->c[1];
332 g = f->g;
333 x0 = f->x[0];
334 x1 = f->x[1];
335 y0 = f->y[0];
336 y1 = f->y[1];
337 ya = ((x + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1 << 23)) >> 24;
338 f->x[1] = (int32_t)x0;
339 f->x[0] = x;
340 f->y[1] = (int32_t)y0;
341 f->y[0] = (int32_t)ya;
342 return ya;
343 }
344
od_enc_rc_reset(od_rc_state * rc)345 static void od_enc_rc_reset(od_rc_state *rc) {
346 int64_t npixels;
347 int64_t ibpp;
348 rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
349 /*Insane framerates or frame sizes mean insane bitrates.
350 Let's not get carried away.*/
351 if (rc->bits_per_frame > 0x400000000000LL) {
352 rc->bits_per_frame = (int64_t)0x400000000000LL;
353 } else {
354 if (rc->bits_per_frame < 32) {
355 rc->bits_per_frame = 32;
356 }
357 }
358 rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
359 rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
360 /*Start with a buffer fullness and fullness target of 50% */
361 rc->reservoir_target = (rc->reservoir_max + 1) >> 1;
362 rc->reservoir_fullness = rc->reservoir_target;
363 /*Pick exponents and initial scales for quantizer selection.*/
364 npixels = rc->frame_width * (int64_t)rc->frame_height;
365 rc->log_npixels = od_blog64(npixels);
366 ibpp = npixels / rc->bits_per_frame;
367 /*All of these initial scale/exp values are from Theora, and have not yet
368 been adapted to Daala, so they're certainly wrong.
369 The B-frame values especially are simply copies of the P-frame values.*/
370 if (ibpp < 1) {
371 rc->exp[OD_I_FRAME] = 59;
372 rc->log_scale[OD_I_FRAME] = od_blog64(1997) - OD_Q57(OD_COEFF_SHIFT);
373 } else if (ibpp < 2) {
374 rc->exp[OD_I_FRAME] = 55;
375 rc->log_scale[OD_I_FRAME] = od_blog64(1604) - OD_Q57(OD_COEFF_SHIFT);
376 } else {
377 rc->exp[OD_I_FRAME] = 48;
378 rc->log_scale[OD_I_FRAME] = od_blog64(834) - OD_Q57(OD_COEFF_SHIFT);
379 }
380 if (ibpp < 4) {
381 rc->exp[OD_P_FRAME] = 100;
382 rc->log_scale[OD_P_FRAME] = od_blog64(2249) - OD_Q57(OD_COEFF_SHIFT);
383 } else if (ibpp < 8) {
384 rc->exp[OD_P_FRAME] = 95;
385 rc->log_scale[OD_P_FRAME] = od_blog64(1751) - OD_Q57(OD_COEFF_SHIFT);
386 } else {
387 rc->exp[OD_P_FRAME] = 73;
388 rc->log_scale[OD_P_FRAME] = od_blog64(1260) - OD_Q57(OD_COEFF_SHIFT);
389 }
390 /*Golden P-frames both use the same log_scale and exp modeling
391 values as regular P-frames and the same scale follower.
392 For convenience in the rate calculation code, we maintain a copy of
393 the scale and exp values in OD_GOLDEN_P_FRAME.*/
394 rc->exp[OD_GOLDEN_P_FRAME] = rc->exp[OD_P_FRAME];
395 rc->log_scale[OD_GOLDEN_P_FRAME] = rc->log_scale[OD_P_FRAME];
396 rc->exp[OD_ALTREF_P_FRAME] = rc->exp[OD_P_FRAME];
397 rc->log_scale[OD_ALTREF_P_FRAME] = rc->log_scale[OD_P_FRAME];
398 /*We clamp the actual I and B frame delays to a minimum of 10 to work within
399 the range of values where later incrementing the delay works as designed.
400 10 is not an exact choice, but rather a good working trade-off.*/
401 rc->inter_p_delay = 10;
402 rc->inter_delay_target = rc->reservoir_frame_delay >> 1;
403 memset(rc->frame_count, 0, sizeof(rc->frame_count));
404 /*Drop-frame tracking is concerned with more than just the basic three frame
405 types.
406 It needs to track boosted and cut subtypes (of which there is only one
407 right now, OD_GOLDEN_P_FRAME). */
408 rc->prev_drop_count[OD_I_FRAME] = 0;
409 rc->log_drop_scale[OD_I_FRAME] = OD_Q57(0);
410 rc->prev_drop_count[OD_P_FRAME] = 0;
411 rc->log_drop_scale[OD_P_FRAME] = OD_Q57(0);
412 rc->prev_drop_count[OD_GOLDEN_P_FRAME] = 0;
413 rc->log_drop_scale[OD_GOLDEN_P_FRAME] = OD_Q57(0);
414 rc->prev_drop_count[OD_ALTREF_P_FRAME] = 0;
415 rc->log_drop_scale[OD_ALTREF_P_FRAME] = OD_Q57(0);
416 /*Set up second order followers, initialized according to corresponding
417 time constants.*/
418 od_iir_bessel2_init(&rc->scalefilter[OD_I_FRAME], 4,
419 od_q57_to_q24(rc->log_scale[OD_I_FRAME]));
420 od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], rc->inter_p_delay,
421 od_q57_to_q24(rc->log_scale[OD_P_FRAME]));
422 od_iir_bessel2_init(&rc->vfrfilter[OD_I_FRAME], 4,
423 od_bexp64_q24(rc->log_drop_scale[OD_I_FRAME]));
424 od_iir_bessel2_init(&rc->vfrfilter[OD_P_FRAME], 4,
425 od_bexp64_q24(rc->log_drop_scale[OD_P_FRAME]));
426 od_iir_bessel2_init(&rc->vfrfilter[OD_GOLDEN_P_FRAME], 4,
427 od_bexp64_q24(rc->log_drop_scale[OD_GOLDEN_P_FRAME]));
428 od_iir_bessel2_init(&rc->vfrfilter[OD_ALTREF_P_FRAME], 4,
429 od_bexp64_q24(rc->log_drop_scale[OD_ALTREF_P_FRAME]));
430 }
431
od_enc_rc_resize(od_rc_state * rc)432 int od_enc_rc_resize(od_rc_state *rc) {
433 /*If encoding has not yet begun, reset the buffer state.*/
434 if (rc->cur_frame == 0) {
435 od_enc_rc_reset(rc);
436 } else {
437 int idt;
438 /*Otherwise, update the bounds on the buffer, but not the current
439 fullness.*/
440 rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
441 /*Insane framerates or frame sizes mean insane bitrates.
442 Let's not get carried away.*/
443 if (rc->bits_per_frame > 0x400000000000LL) {
444 rc->bits_per_frame = (int64_t)0x400000000000LL;
445 } else {
446 if (rc->bits_per_frame < 32) {
447 rc->bits_per_frame = 32;
448 }
449 }
450 rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
451 rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
452 rc->reservoir_target =
453 ((rc->reservoir_max + 1) >> 1) +
454 ((rc->bits_per_frame + 2) >> 2) *
455 OD_MINI(rc->keyframe_rate, rc->reservoir_frame_delay);
456 /*Update the INTER-frame scale filter delay.
457 We jump to it immediately if we've already seen enough frames; otherwise
458 it is simply set as the new target.*/
459 rc->inter_delay_target = idt = OD_MAXI(rc->reservoir_frame_delay >> 1, 10);
460 if (idt < OD_MINI(rc->inter_p_delay, rc->frame_count[OD_P_FRAME])) {
461 od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], idt,
462 rc->scalefilter[OD_P_FRAME].y[0]);
463 rc->inter_p_delay = idt;
464 }
465 }
466 return 0;
467 }
468
od_enc_rc_init(od_rc_state * rc,int64_t bitrate,int delay_ms)469 int od_enc_rc_init(od_rc_state *rc, int64_t bitrate, int delay_ms) {
470 if (rc->framerate <= 0) return 1;
471 if (rc->target_bitrate > 0) {
472 /*State has already been initialized; rather than reinitialize,
473 adjust the buffering for the new target rate. */
474 rc->target_bitrate = bitrate;
475 return od_enc_rc_resize(rc);
476 }
477 rc->target_quantizer = 0;
478 rc->target_bitrate = bitrate;
479 rc->rate_bias = 0;
480 if (bitrate > 0) {
481 /* The buffer size is clamped between [12, 256], this interval is short
482 enough to
483 allow reaction, but long enough to allow looking into the next GOP
484 (avoiding
485 the case where the last frames before an I-frame get starved).
486 The 12 frame minimum gives us some chance to distribute bit estimation
487 errors in the worst case. The 256 frame maximum means we'll require 8-10
488 seconds
489 of pre-buffering at 24-30 fps, which is not unreasonable.*/
490 rc->reservoir_frame_delay =
491 (int)OD_MINI((delay_ms / 1000) * rc->framerate, 256);
492 rc->drop_frames = 1;
493 rc->cap_overflow = 1;
494 rc->cap_underflow = 0;
495 rc->twopass_state = 0;
496 od_enc_rc_reset(rc);
497 }
498 return 0;
499 }
500
501 /*Scale the number of frames by the number of expected drops/duplicates.*/
od_rc_scale_drop(od_rc_state * rc,int frame_type,int nframes)502 static int od_rc_scale_drop(od_rc_state *rc, int frame_type, int nframes) {
503 if (rc->prev_drop_count[frame_type] > 0 ||
504 rc->log_drop_scale[frame_type] > OD_Q57(0)) {
505 int64_t dup_scale;
506 dup_scale = od_bexp64(((rc->log_drop_scale[frame_type] +
507 od_blog64(rc->prev_drop_count[frame_type] + 1)) >>
508 1) +
509 OD_Q57(8));
510 if (dup_scale < nframes << 8) {
511 int dup_scalei;
512 dup_scalei = (int)dup_scale;
513 if (dup_scalei > 0) {
514 nframes = ((nframes << 8) + dup_scalei - 1) / dup_scalei;
515 }
516 } else {
517 nframes = !!nframes;
518 }
519 }
520 return nframes;
521 }
522
523 /*Closed form version of frame determination code.
524 Used by rate control to predict frame types and subtypes into the future.
525 No side effects, may be called any number of times.
526 Note that it ignores end-of-file conditions; one-pass planning *should*
527 ignore end-of-file. */
od_frame_type(od_rc_state * rc,int64_t coding_frame_count,int * is_golden,int * is_altref,int64_t * ip_count)528 int od_frame_type(od_rc_state *rc, int64_t coding_frame_count, int *is_golden,
529 int *is_altref, int64_t *ip_count) {
530 int frame_type;
531 if (coding_frame_count == 0) {
532 *is_golden = 1;
533 *is_altref = 1;
534 *ip_count = 0;
535 frame_type = OD_I_FRAME;
536 } else {
537 int keyrate = rc->keyframe_rate;
538 if (rc->closed_gop) {
539 int ip_per_gop;
540 int gop_n;
541 int gop_i;
542 ip_per_gop = (keyrate - 1) / 2;
543 gop_n = coding_frame_count / keyrate;
544 gop_i = coding_frame_count - gop_n * keyrate;
545 *ip_count = gop_n * ip_per_gop + (gop_i > 0) + (gop_i - 1);
546 frame_type = gop_i == 0 ? OD_I_FRAME : OD_P_FRAME;
547 } else {
548 int ip_per_gop;
549 int gop_n;
550 int gop_i;
551 ip_per_gop = (keyrate);
552 gop_n = (coding_frame_count - 1) / keyrate;
553 gop_i = coding_frame_count - gop_n * keyrate - 1;
554 *ip_count = (coding_frame_count > 0) + gop_n * ip_per_gop + (gop_i);
555 frame_type = gop_i / 1 < ip_per_gop - 1 ? OD_P_FRAME : OD_I_FRAME;
556 }
557 }
558 *is_golden =
559 (*ip_count % rc->goldenframe_rate) == 0 || frame_type == OD_I_FRAME;
560 *is_altref = (*ip_count % rc->altref_rate) == 0 || frame_type == OD_I_FRAME;
561 return frame_type;
562 }
563
564 /*Count frames types forward from the current frame up to but not including
565 the last I-frame in reservoir_frame_delay.
566 If reservoir_frame_delay contains no I-frames (or the current frame is the
567 only I-frame), count all reservoir_frame_delay frames.
568 Returns the number of frames counted.
569 Right now, this implementation is simple, brute-force, and expensive.
570 It is also easy to understand and debug.
571 TODO: replace with a virtual FIFO that keeps running totals as
572 repeating the counting over-and-over will have a performance impact on
573 whole-file 2pass usage.*/
frame_type_count(od_rc_state * rc,int nframes[OD_FRAME_NSUBTYPES])574 static int frame_type_count(od_rc_state *rc, int nframes[OD_FRAME_NSUBTYPES]) {
575 int i;
576 int j;
577 int acc[OD_FRAME_NSUBTYPES];
578 int count;
579 int reservoir_frames;
580 int reservoir_frame_delay;
581 memset(nframes, 0, OD_FRAME_NSUBTYPES * sizeof(*nframes));
582 memset(acc, 0, sizeof(acc));
583 count = 0;
584 reservoir_frames = 0;
585 #if 1
586 /*Go ahead and count past end-of-stream.
587 We won't nail the exact bitrate on short files that end with a partial
588 GOP, but we also won't [potentially] destroy the quality of the last few
589 frames in that same case when we suddenly find out the stream is ending
590 before the original planning horizon.*/
591 reservoir_frame_delay = rc->reservoir_frame_delay;
592 #else
593 /*Don't count past the end of the stream (once we know where end-of-stream
594 is).*/
595 reservoir_frame_delay =
596 rc->end_of_input ? rc->input_size + 1 : rc->reservoir_frame_delay;
597 #endif
598 for (i = 0; i < reservoir_frame_delay; i++) {
599 int frame_type;
600 int is_golden;
601 int is_altref;
602 int64_t dummy;
603 frame_type =
604 od_frame_type(rc, rc->cur_frame + i, &is_golden, &is_altref, &dummy);
605 switch (frame_type) {
606 case OD_I_FRAME: {
607 for (j = 0; j < OD_FRAME_NSUBTYPES; j++) nframes[j] += acc[j];
608 reservoir_frames += count;
609 memset(acc, 0, sizeof(acc));
610 acc[OD_I_FRAME] = 1;
611 count = 1;
612 break;
613 }
614 case OD_P_FRAME: {
615 if (is_golden) {
616 ++acc[OD_GOLDEN_P_FRAME];
617 ++count;
618 } else if (is_altref) {
619 ++acc[OD_ALTREF_P_FRAME];
620 ++count;
621 } else {
622 ++acc[OD_P_FRAME];
623 ++count;
624 }
625 break;
626 }
627 }
628 }
629 /*If there were no I-frames at all, or only the first frame was an I-frame,
630 the accumulators never flushed and still contain the counts for the
631 entire buffer.
632 In both these cases, we return these counts.
633 Otherwise, we discard what remains in the accumulators as they contain
634 the counts from and past the last I-frame.*/
635 if (reservoir_frames == 0) {
636 for (i = 0; i < OD_FRAME_NSUBTYPES; i++) nframes[i] = acc[i];
637 reservoir_frames += count;
638 }
639 return reservoir_frames;
640 }
641
convert_to_ac_quant(int q,int bit_depth)642 static int convert_to_ac_quant(int q, int bit_depth) {
643 return lrint(av1_convert_qindex_to_q(q, bit_depth));
644 }
645
od_enc_rc_select_quantizers_and_lambdas(od_rc_state * rc,int is_golden_frame,int is_altref_frame,int frame_type,int * bottom_idx,int * top_idx)646 int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
647 int is_golden_frame,
648 int is_altref_frame, int frame_type,
649 int *bottom_idx, int *top_idx) {
650 int frame_subtype;
651 int64_t log_cur_scale;
652 int lossy_quantizer_min;
653 int lossy_quantizer_max;
654 double mqp_i = OD_MQP_I;
655 double mqp_p = OD_MQP_P;
656 double mqp_gp = OD_MQP_GP;
657 double mqp_ap = OD_MQP_AP;
658 int reservoir_frames;
659 int nframes[OD_FRAME_NSUBTYPES];
660 int32_t mqp_Q12[OD_FRAME_NSUBTYPES];
661 int64_t dqp_Q45[OD_FRAME_NSUBTYPES];
662 /*Verify the closed-form frame type determination code matches what the
663 input queue set.*/
664 /*One pseudo-non-closed-form caveat:
665 Once we've seen end-of-input, the batched frame determination code
666 suppresses the last open-GOP's I-frame (since it would only be
667 useful for the next GOP, which doesn't exist).
668 Thus, don't check one the input queue is drained.*/
669 if (!rc->end_of_input) {
670 int closed_form_type;
671 int closed_form_golden;
672 int closed_form_altref;
673 int64_t closed_form_cur_frame;
674 closed_form_type =
675 od_frame_type(rc, rc->cur_frame, &closed_form_golden,
676 &closed_form_altref, &closed_form_cur_frame);
677 OD_UNUSED(closed_form_type);
678 OD_UNUSED(is_altref_frame);
679 assert(closed_form_type == frame_type);
680 assert(closed_form_cur_frame == rc->cur_frame);
681 assert(closed_form_altref == is_altref_frame);
682 assert(closed_form_golden == is_golden_frame);
683 }
684
685 log_cur_scale = (int64_t)rc->scalefilter[frame_type].y[0] << 33;
686
687 /*Count the various types and classes of frames.*/
688 reservoir_frames = frame_type_count(rc, nframes);
689 nframes[OD_I_FRAME] = od_rc_scale_drop(rc, OD_I_FRAME, nframes[OD_I_FRAME]);
690 nframes[OD_P_FRAME] = od_rc_scale_drop(rc, OD_P_FRAME, nframes[OD_P_FRAME]);
691 nframes[OD_GOLDEN_P_FRAME] =
692 od_rc_scale_drop(rc, OD_GOLDEN_P_FRAME, nframes[OD_GOLDEN_P_FRAME]);
693 nframes[OD_ALTREF_P_FRAME] =
694 od_rc_scale_drop(rc, OD_ALTREF_P_FRAME, nframes[OD_ALTREF_P_FRAME]);
695
696 switch (rc->twopass_state) {
697 default: break;
698 case 1: {
699 /*Pass 1 mode: use a fixed qi value.*/
700 return rc->firstpass_quant;
701 } break;
702 case 2: {
703 int i;
704 int64_t scale_sum[OD_FRAME_NSUBTYPES];
705 int qti;
706 /*Pass 2 mode: we know exactly how much of each frame type there is in
707 the current buffer window, and have estimates for the scales.*/
708 for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
709 nframes[i] = rc->nframes[i];
710 nframes[i] = rc->nframes[i];
711 scale_sum[i] = rc->scale_sum[i];
712 }
713 /*If we're not using the same frame type as in pass 1 (because someone
714 changed the keyframe interval), remove that scale estimate.
715 We'll add in a replacement for the correct frame type below.*/
716 qti = rc->cur_metrics.frame_type;
717 if (qti != frame_type) {
718 nframes[qti]--;
719 scale_sum[qti] -= od_bexp64_q24(rc->cur_metrics.log_scale);
720 }
721 /*Compute log_scale estimates for each frame type from the pass-1 scales
722 we measured in the current window.*/
723 for (qti = 0; qti < OD_FRAME_NSUBTYPES; qti++) {
724 rc->log_scale[qti] = nframes[qti] > 0
725 ? od_blog64(scale_sum[qti]) -
726 od_blog64(nframes[qti]) - OD_Q57(24)
727 : -rc->log_npixels;
728 }
729 /*If we're not using the same frame type as in pass 1, add a scale
730 estimate for the corresponding frame using the current low-pass
731 filter value.
732 This is mostly to ensure we have a valid estimate even when pass 1 had
733 no frames of this type in the buffer window.
734 TODO: We could also plan ahead and figure out how many keyframes we'll
735 be forced to add in the current buffer window.*/
736 qti = rc->cur_metrics.frame_type;
737 if (qti != frame_type) {
738 int64_t scale;
739 scale = rc->log_scale[frame_type] < OD_Q57(23)
740 ? od_bexp64(rc->log_scale[frame_type] + OD_Q57(24))
741 : 0x7FFFFFFFFFFFLL;
742 scale *= nframes[frame_type];
743 nframes[frame_type]++;
744 scale += od_bexp64_q24(log_cur_scale >> 33);
745 rc->log_scale[frame_type] =
746 od_blog64(scale) - od_blog64(nframes[qti]) - OD_Q57(24);
747 } else {
748 log_cur_scale = (int64_t)rc->cur_metrics.log_scale << 33;
749 }
750 } break;
751 }
752
753 /*Quantizer selection sticks to the codable, lossy portion of the quantizer
754 range.*/
755 lossy_quantizer_min = convert_to_ac_quant(rc->minq, rc->bit_depth);
756 lossy_quantizer_max = convert_to_ac_quant(rc->maxq, rc->bit_depth);
757 frame_subtype = frame_type;
758 /*Stash quantizer modulation by frame type.*/
759 mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
760 mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
761 mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
762 mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
763 dqp_Q45[OD_I_FRAME] = OD_F_Q45(OD_DQP_I);
764 dqp_Q45[OD_P_FRAME] = OD_F_Q45(OD_DQP_P);
765 dqp_Q45[OD_GOLDEN_P_FRAME] = OD_F_Q45(OD_DQP_GP);
766 dqp_Q45[OD_ALTREF_P_FRAME] = OD_F_Q45(OD_DQP_AP);
767 /*Is rate control active?*/
768 if (rc->target_bitrate <= 0) {
769 /*Rate control is not active; derive quantizer directly from
770 quality parameter and frame type. */
771 /*Can't use the OD_LOSSLESS macro, as it uses state.quantizer to intuit,
772 and we've not set it yet.*/
773 if (rc->quality == 0) {
774 /*Lossless coding requested.*/
775 rc->base_quantizer = 0;
776 rc->target_quantizer = 0;
777 } else {
778 int64_t log_quantizer;
779
780 /* Adjust the modulation constants using the last frame's quantizer. */
781 double mqp_delta = (255 - rc->target_quantizer) / 2000.0f;
782 mqp_i -= mqp_delta;
783 mqp_p += mqp_delta;
784 mqp_gp -= mqp_delta;
785 mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
786 mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
787 mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
788 mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
789
790 if (rc->quality == -1) {
791 /*A quality of -1 means quality was unset; use a default.*/
792 rc->base_quantizer = convert_to_ac_quant(10, rc->bit_depth);
793 } else {
794 rc->base_quantizer = convert_to_ac_quant(rc->quality, rc->bit_depth);
795 }
796
797 if (rc->periodic_boosts && !is_golden_frame) {
798 int pattern_rate = (rc->goldenframe_rate >> 1);
799 int dist_to_golden = rc->cur_frame % pattern_rate;
800 int dist_away_golden = pattern_rate - dist_to_golden;
801 int boost = dist_to_golden;
802 if (dist_away_golden > dist_to_golden) boost = dist_away_golden;
803 boost -= pattern_rate;
804 boost *= (rc->base_quantizer) / OD_PERIODIC_BOOST_DIV;
805 rc->base_quantizer = rc->base_quantizer + boost;
806 }
807
808 /*As originally written, qp modulation is applied to the coded quantizer.
809 Because we now have and use a more precise target quantizer for various
810 calculation, that needs to be modulated as well.
811 Calculate what is, effectively, a fractional coded quantizer. */
812 /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
813 log_quantizer = od_blog64(rc->base_quantizer) - OD_Q57(OD_COEFF_SHIFT);
814 /*log_quantizer to Q21.*/
815 log_quantizer >>= 36;
816 /*scale log quantizer, result is Q33.*/
817 log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
818 /*Add Q33 offset to Q33 log_quantizer.*/
819 log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
820 /*Modulate quantizer according to frame type; result is Q45.*/
821 log_quantizer *= mqp_Q12[frame_subtype];
822 /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
823 log_quantizer += dqp_Q45[frame_subtype];
824 /*Back to log2 quantizer in Q57.*/
825 log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
826 OD_LOG_QUANTIZER_EXP_Q12 +
827 OD_Q57(OD_COEFF_SHIFT);
828 /*Convert Q57 log2 quantizer to unclamped linear target quantizer value.*/
829 rc->target_quantizer = od_bexp64(log_quantizer);
830 }
831 } else {
832 int clamp;
833 int64_t rate_bias;
834 int64_t rate_total;
835 int base_quantizer;
836 int64_t log_quantizer;
837 int qlo;
838 int qhi;
839 int i;
840 /*We clamp the allowed amount of qi change (after initialization).*/
841 clamp = rc->cur_frame > 0;
842 /*Figure out how to re-distribute bits so that we hit our fullness target
843 before the last keyframe in our current buffer window (after the current
844 frame), or the end of the buffer window, whichever comes first.*/
845 /*Single pass only right now.*/
846 /*If we've been missing our target, add a penalty term.*/
847 rate_bias = (rc->rate_bias / (rc->cur_frame + 1000)) * reservoir_frames;
848 /*rate_total is the total bits available over the next
849 reservoir_frames frames.*/
850 rate_total = rc->reservoir_fullness - rc->reservoir_target + rate_bias +
851 reservoir_frames * rc->bits_per_frame;
852 /*Find a target quantizer that meets our rate target for the specific mix
853 of frame types we'll have over the next frame_delay frames.
854 We model the rate<->quantizer relationship as:
855 rate = scale*(quantizer**-exp)
856 In this case, we have our desired rate, an exponent selected in setup,
857 and a scale that's been measured over our frame history, so we're
858 solving for the quantizer.
859 Exponentiation with arbitrary exponents is expensive, so we work in
860 the binary log domain (binary exp and log aren't too bad):
861 rate = e2(log2_scale - log2_quantizer * exp)
862 There's no easy closed form solution, so we bisection search for it.*/
863 /*We do not currently allow rate control to select lossless encoding.*/
864 qlo = 1;
865 /*If there's a quality specified, it's used to select the
866 coarsest base quantizer we can select.
867 Otherwise we can use up to and including the coarsest codable
868 quantizer.*/
869 if (rc->quality > 0)
870 qhi = convert_to_ac_quant(rc->quality, rc->bit_depth);
871 else
872 qhi = lossy_quantizer_max;
873 base_quantizer = (qlo + qhi) >> 1;
874 while (qlo < qhi) {
875 volatile int64_t log_base_quantizer;
876 int64_t diff;
877 int64_t bits;
878 /*Count bits contributed by each frame type using the model.*/
879 bits = 0;
880 log_base_quantizer = od_blog64(base_quantizer);
881 for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
882 /*Modulate base quantizer by frame type.*/
883 /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
884 log_quantizer = log_base_quantizer - OD_Q57(OD_COEFF_SHIFT);
885 /*log_quantizer to Q21.*/
886 log_quantizer >>= 36;
887 /*scale log quantizer, result is Q33.*/
888 log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
889 /*Add Q33 offset to Q33 log_quantizer.*/
890 log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
891 /*Modulate quantizer according to frame type; result is Q45.*/
892 log_quantizer *= mqp_Q12[i];
893 /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
894 log_quantizer += dqp_Q45[i];
895 /*Back to log2 quantizer in Q57.*/
896 log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
897 OD_LOG_QUANTIZER_EXP_Q12 +
898 OD_Q57(OD_COEFF_SHIFT);
899 /*Clamp modulated quantizer values.*/
900 log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
901 od_blog64(lossy_quantizer_max));
902 /* All the fields here are Q57 except for the exponent which is Q6.*/
903 bits += nframes[i] * od_bexp64(rc->log_scale[i] + rc->log_npixels -
904 (log_quantizer >> 6) * rc->exp[i]);
905 }
906 diff = bits - rate_total;
907 if (diff > 0) {
908 qlo = base_quantizer + 1;
909 } else if (diff < 0) {
910 qhi = base_quantizer - 1;
911 } else {
912 break;
913 }
914 base_quantizer = (qlo + qhi) >> 1;
915 }
916 /*If this was not one of the initial frames, limit the change in base
917 quantizer to within [0.8*Q,1.2*Q], where Q is the previous frame's
918 base quantizer.*/
919 if (clamp) {
920 base_quantizer = OD_CLAMPI((rc->base_quantizer * 0x0CCCD + 0x8000) >> 16,
921 base_quantizer,
922 (rc->base_quantizer * 0x13333 + 0x8000) >> 16);
923 }
924 /*Modulate chosen base quantizer to produce target quantizer.*/
925 log_quantizer = od_blog64(base_quantizer);
926 /*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
927 log_quantizer -= OD_Q57(OD_COEFF_SHIFT);
928 /*log_quantizer to Q21.*/
929 log_quantizer >>= 36;
930 /*scale log quantizer, result is Q33.*/
931 log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
932 /*Add Q33 offset to Q33 log_quantizer.*/
933 log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
934 /*Modulate quantizer according to frame type; result is Q45.*/
935 log_quantizer *= mqp_Q12[frame_subtype];
936 /*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
937 log_quantizer += dqp_Q45[frame_subtype];
938 /*Back to log2 quantizer in Q57.*/
939 log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
940 OD_LOG_QUANTIZER_EXP_Q12 +
941 OD_Q57(OD_COEFF_SHIFT);
942 /*Clamp modulated quantizer values.*/
943 log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
944 od_blog64(lossy_quantizer_max));
945 /*The above allocation looks only at the total rate we'll accumulate in
946 the next reservoir_frame_delay frames.
947 However we could overflow the bit reservoir on the very next frame, so
948 check for that here if we're not using a soft target.*/
949 if (rc->cap_overflow) {
950 int64_t margin;
951 int64_t soft_limit;
952 int64_t log_soft_limit;
953 int64_t log_scale_pixels;
954 int64_t exp;
955 int64_t log_qexp;
956 /*Allow 3% of the buffer for prediction error.
957 This should be plenty, and we don't mind if we go a bit over; we only
958 want to keep these bits from being completely wasted.*/
959 margin = (rc->reservoir_max + 31) >> 5;
960 /*We want to use at least this many bits next frame.*/
961 soft_limit = rc->reservoir_fullness + rc->bits_per_frame -
962 (rc->reservoir_max - margin);
963 log_soft_limit = od_blog64(soft_limit);
964 /*If we're predicting we won't use that many bits...*/
965 log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
966 exp = rc->exp[frame_subtype];
967 log_qexp = (log_quantizer >> 6) * exp;
968 if (log_scale_pixels - log_qexp < log_soft_limit) {
969 /*Scale the adjustment based on how far into the margin we are.*/
970 log_qexp += ((log_scale_pixels - log_soft_limit - log_qexp) >> 32) *
971 (OD_MINI(margin, soft_limit) << 32) / margin;
972 log_quantizer = (((log_qexp + (exp >> 1)) / exp) << 6);
973 }
974 }
975 /*We just checked we don't overflow the reservoir next frame, now check
976 we don't underflow and bust the budget (when not using a soft target).
977 Disabled when a quality bound is set; if we saturate quantizer to the
978 maximum possible size when we have a limiting max quality, the
979 resulting lambda can cause strange behavior.*/
980 if (rc->quality == -1) {
981 int64_t exp;
982 int64_t log_qexp;
983 int64_t log_scale_pixels;
984 int64_t log_hard_limit;
985 /*Compute the maximum number of bits we can use in the next frame.
986 Allow 50% of the rate for a single frame for prediction error.
987 This may not be enough for keyframes or sudden changes in
988 complexity.*/
989 log_hard_limit =
990 od_blog64(rc->reservoir_fullness + (rc->bits_per_frame >> 1));
991 /*If we're predicting we'll use more than this...*/
992 log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
993 exp = rc->exp[frame_subtype];
994 log_qexp = (log_quantizer >> 6) * exp;
995 if (log_scale_pixels - log_qexp > log_hard_limit) {
996 /*Force the target to hit our limit exactly.*/
997 log_qexp = log_scale_pixels - log_hard_limit;
998 log_quantizer = (log_qexp + (exp >> 1)) / exp << 6;
999 /*If that target is unreasonable, oh well; we'll have to drop.*/
1000 log_quantizer = OD_MAXI(log_quantizer, od_blog64(lossy_quantizer_max));
1001 }
1002 }
1003 /*Compute a final estimate of the number of bits we plan to use, update
1004 the running rate bias measurement.*/
1005 {
1006 int64_t log_qexp;
1007 int64_t log_scale_pixels;
1008 log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
1009 log_qexp = (log_quantizer >> 6) * rc->exp[frame_subtype];
1010 rc->rate_bias += od_bexp64(log_scale_pixels - log_qexp);
1011 }
1012 rc->target_quantizer = od_bexp64(log_quantizer);
1013 /*The various cappings and adjustments may have altered the log_quantizer
1014 target significantly.
1015 We can either update the base quantizer to be consistent with the
1016 target or let it track separately.
1017 Theora behavior effectively keeps them consistent, as it regenerates
1018 the effective base quantizer from the target each frame rather than
1019 saving both.
1020 For Daala, it's easier to allow them to track separately.
1021 For now, allow them to track separately and see how it behaves.*/
1022 rc->base_quantizer = base_quantizer;
1023 }
1024 *bottom_idx = lossy_quantizer_min;
1025 *top_idx = lossy_quantizer_max;
1026 rc->target_quantizer = av1_qindex_from_ac(
1027 OD_CLAMPI(lossy_quantizer_min, rc->target_quantizer, lossy_quantizer_max),
1028 rc->bit_depth);
1029 return rc->target_quantizer;
1030 }
1031
od_enc_rc_update_state(od_rc_state * rc,int64_t bits,int is_golden_frame,int is_altref_frame,int frame_type,int droppable)1032 int od_enc_rc_update_state(od_rc_state *rc, int64_t bits, int is_golden_frame,
1033 int is_altref_frame, int frame_type, int droppable) {
1034 int dropped;
1035 dropped = 0;
1036 /*Update rate control only if rate control is active.*/
1037 if (rc->target_bitrate > 0) {
1038 int64_t log_scale;
1039 int frame_subtype;
1040 frame_subtype = frame_type;
1041 /*Track non-golden and golden P frame drops separately.*/
1042 if (is_golden_frame && frame_type == OD_P_FRAME)
1043 frame_subtype = OD_GOLDEN_P_FRAME;
1044 else if (is_altref_frame && frame_type == OD_P_FRAME)
1045 frame_subtype = OD_ALTREF_P_FRAME;
1046 if (bits <= 0) {
1047 /*We didn't code any blocks in this frame.*/
1048 log_scale = OD_Q57(-64);
1049 bits = 0;
1050 ++rc->prev_drop_count[frame_subtype];
1051 } else {
1052 int64_t log_bits;
1053 int64_t log_qexp;
1054 /*Compute the estimated scale factor for this frame type.*/
1055 log_bits = od_blog64(bits);
1056 log_qexp = od_blog64(rc->target_quantizer);
1057 log_qexp = (log_qexp >> 6) * (rc->exp[frame_type]);
1058 log_scale = OD_MINI(log_bits - rc->log_npixels + log_qexp, OD_Q57(16));
1059 }
1060
1061 switch (rc->twopass_state) {
1062 case 1: {
1063 int golden, altref;
1064 int64_t ipc;
1065 rc->cur_metrics.frame_type =
1066 od_frame_type(rc, rc->cur_frame, &golden, &altref, &ipc);
1067 /*Pass 1 mode: save the metrics for this frame.*/
1068 rc->cur_metrics.log_scale = od_q57_to_q24(log_scale);
1069 } break;
1070 case 2: {
1071 /*Pass 2 mode:*/
1072 int m_frame_type = rc->cur_metrics.frame_type;
1073 rc->nframes[m_frame_type]--;
1074 rc->scale_sum[m_frame_type] -= od_bexp64_q24(rc->cur_metrics.log_scale);
1075 } break;
1076 }
1077
1078 if (bits > 0) {
1079 od_iir_bessel2 *f;
1080 /*If this is the first example of the given frame type we've
1081 seen, we immediately replace the default scale factor guess
1082 with the estimate we just computed using the first frame.*/
1083 if (rc->frame_count[frame_type] == 0) {
1084 f = rc->scalefilter + frame_type;
1085 f->y[1] = f->y[0] = f->x[1] = f->x[0] = od_q57_to_q24(log_scale);
1086 rc->log_scale[frame_type] = log_scale;
1087 } else {
1088 /*Lengthen the time constant for the inter filters as we collect more
1089 frame statistics, until we reach our target.*/
1090 if (frame_type != OD_I_FRAME &&
1091 rc->inter_p_delay < rc->inter_delay_target &&
1092 rc->frame_count[frame_type] >= rc->inter_p_delay) {
1093 od_iir_bessel2_reinit(&rc->scalefilter[frame_type],
1094 ++rc->inter_p_delay);
1095 }
1096 /*Update the low-pass scale filter for this frame type
1097 regardless of whether or not we drop this frame.*/
1098 rc->log_scale[frame_type] =
1099 od_iir_bessel2_update(rc->scalefilter + frame_type,
1100 od_q57_to_q24(log_scale))
1101 << 33;
1102 }
1103 /*If this frame busts our budget, it must be dropped.*/
1104 if (droppable && rc->reservoir_fullness + rc->bits_per_frame < bits) {
1105 ++rc->prev_drop_count[frame_subtype];
1106 bits = 0;
1107 dropped = 1;
1108 } else {
1109 uint32_t drop_count;
1110 /*Update a low-pass filter to estimate the "real" frame rate taking
1111 drops into account.
1112 This is only done if the frame is coded, as it needs the final
1113 count of dropped frames.*/
1114 drop_count = rc->prev_drop_count[frame_subtype] + 1;
1115 if (drop_count > 0x7F) {
1116 drop_count = 0x7FFFFFFF;
1117 } else {
1118 drop_count <<= 24;
1119 }
1120 rc->log_drop_scale[frame_subtype] =
1121 od_blog64(od_iir_bessel2_update(rc->vfrfilter + frame_subtype,
1122 drop_count)) -
1123 OD_Q57(24);
1124 /*Zero the drop count for this frame.
1125 It will be increased if we drop frames.*/
1126 rc->prev_drop_count[frame_subtype] = 0;
1127 }
1128 /*Increment the frame count for filter adaptation purposes.*/
1129 if (!rc->twopass_state) rc->frame_count[frame_type]++;
1130 }
1131 rc->reservoir_fullness += rc->bits_per_frame - bits;
1132 /*If we're too quick filling the buffer and overflow is capped,
1133 that rate is lost forever.*/
1134 if (rc->cap_overflow && rc->reservoir_fullness > rc->reservoir_max) {
1135 rc->reservoir_fullness = rc->reservoir_max;
1136 }
1137 /*If we're too quick draining the buffer and underflow is capped,
1138 don't try to make up that rate later.*/
1139 if (rc->cap_underflow && rc->reservoir_fullness < 0) {
1140 rc->reservoir_fullness = 0;
1141 }
1142 /*Adjust the bias for the real bits we've used.*/
1143 rc->rate_bias -= bits;
1144 }
1145 return dropped;
1146 }
1147
od_rc_buffer_val(od_rc_state * rc,int64_t val,int bytes)1148 static INLINE void od_rc_buffer_val(od_rc_state *rc, int64_t val, int bytes) {
1149 while (bytes-- > 0) {
1150 rc->twopass_buffer[rc->twopass_buffer_bytes++] = (uint8_t)(val & 0xFF);
1151 val >>= 8;
1152 }
1153 }
1154
od_rc_unbuffer_val(od_rc_state * rc,int bytes)1155 static INLINE int64_t od_rc_unbuffer_val(od_rc_state *rc, int bytes) {
1156 int64_t ret = 0;
1157 int shift = 0;
1158 while (bytes-- > 0) {
1159 ret |= ((int64_t)rc->twopass_buffer[rc->twopass_buffer_bytes++]) << shift;
1160 shift += 8;
1161 }
1162 return ret;
1163 }
1164
od_enc_rc_2pass_out(od_rc_state * rc,struct aom_codec_pkt_list * pkt_list,int summary)1165 int od_enc_rc_2pass_out(od_rc_state *rc, struct aom_codec_pkt_list *pkt_list,
1166 int summary) {
1167 int i;
1168 struct aom_codec_cx_pkt pkt;
1169 rc->twopass_buffer = rc->firstpass_buffer;
1170 rc->twopass_buffer_bytes = 0;
1171 if (!rc->twopass_state) {
1172 rc->twopass_state = 1;
1173 for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
1174 rc->frame_count[i] = 0;
1175 rc->exp[i] = 0;
1176 rc->scale_sum[i] = 0;
1177 }
1178 }
1179 if (summary) {
1180 od_rc_buffer_val(rc, OD_RC_2PASS_MAGIC, 4);
1181 od_rc_buffer_val(rc, OD_RC_2PASS_VERSION, 1);
1182 for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
1183 od_rc_buffer_val(rc, rc->frame_count[i], 4);
1184 od_rc_buffer_val(rc, rc->exp[i], 4);
1185 od_rc_buffer_val(rc, rc->scale_sum[i], 8);
1186 }
1187 } else {
1188 int frame_type = rc->cur_metrics.frame_type;
1189 rc->scale_sum[frame_type] += od_bexp64_q24(rc->cur_metrics.log_scale);
1190 rc->frame_count[frame_type]++;
1191 od_rc_buffer_val(rc, rc->cur_metrics.frame_type, 1);
1192 od_rc_buffer_val(rc, rc->cur_metrics.log_scale, 4);
1193 }
1194 pkt.data.twopass_stats.buf = rc->firstpass_buffer;
1195 pkt.data.twopass_stats.sz = rc->twopass_buffer_bytes;
1196 pkt.kind = AOM_CODEC_STATS_PKT;
1197 aom_codec_pkt_list_add(pkt_list, &pkt);
1198 return 0;
1199 }
1200
od_enc_rc_2pass_in(od_rc_state * rc)1201 int od_enc_rc_2pass_in(od_rc_state *rc) {
1202 /* Enable pass 2 mode if this is the first call. */
1203 if (rc->twopass_state == 0) {
1204 uint32_t i, total_frames = 0;
1205
1206 if (!rc->twopass_allframes_buf ||
1207 rc->twopass_allframes_buf_size < OD_RC_2PASS_MIN)
1208 return -1;
1209
1210 /* Find summary packet at the end */
1211 rc->twopass_buffer = rc->twopass_allframes_buf;
1212 rc->twopass_buffer +=
1213 rc->twopass_allframes_buf_size - OD_RC_2PASS_SUMMARY_SZ;
1214 rc->twopass_buffer_bytes = 0;
1215
1216 if (od_rc_unbuffer_val(rc, 4) != OD_RC_2PASS_MAGIC) return -1;
1217 if (od_rc_unbuffer_val(rc, 1) != OD_RC_2PASS_VERSION) return -1;
1218
1219 for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
1220 rc->frame_count[i] = od_rc_unbuffer_val(rc, 4);
1221 rc->exp[i] = od_rc_unbuffer_val(rc, 4);
1222 rc->scale_sum[i] = od_rc_unbuffer_val(rc, 8);
1223 rc->nframes[i] = rc->frame_count[i];
1224 total_frames += rc->frame_count[i];
1225 }
1226
1227 if (total_frames < 1) return -1;
1228
1229 if (total_frames * OD_RC_2PASS_PACKET_SZ > rc->twopass_allframes_buf_size)
1230 return -1;
1231
1232 od_enc_rc_reset(rc);
1233
1234 /* Everything looks ok */
1235 rc->twopass_buffer = rc->twopass_allframes_buf;
1236 rc->twopass_state = 2;
1237 rc->twopass_buffer_bytes = 0;
1238 }
1239
1240 rc->cur_metrics.frame_type = od_rc_unbuffer_val(rc, 1);
1241 rc->cur_metrics.log_scale = od_rc_unbuffer_val(rc, 4);
1242
1243 return 0;
1244 }
1245