1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function:
14 last mod: $Id$
15
16 ********************************************************************/
17 #include <stdlib.h>
18 #include <string.h>
19 #include "encint.h"
20
21 /*A rough lookup table for tan(x), 0<=x<pi/2.
22 The values are Q12 fixed-point and spaced at 5 degree intervals.
23 These decisions are somewhat arbitrary, but sufficient for the 2nd order
24 Bessel follower below.
25 Values of x larger than 85 degrees are extrapolated from the last inteval,
26 which is way off, but "good enough".*/
27 static unsigned short OC_ROUGH_TAN_LOOKUP[18]={
28 0, 358, 722, 1098, 1491, 1910,
29 2365, 2868, 3437, 4096, 4881, 5850,
30 7094, 8784,11254,15286,23230,46817
31 };
32
33 /*_alpha is Q24 in the range [0,0.5).
34 The return values is 5.12.*/
oc_warp_alpha(int _alpha)35 static int oc_warp_alpha(int _alpha){
36 int i;
37 int d;
38 int t0;
39 int t1;
40 i=_alpha*36>>24;
41 if(i>=17)i=16;
42 t0=OC_ROUGH_TAN_LOOKUP[i];
43 t1=OC_ROUGH_TAN_LOOKUP[i+1];
44 d=_alpha*36-(i<<24);
45 return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32);
46 }
47
48 /*Re-initialize the Bessel filter coefficients with the specified delay.
49 This does not alter the x/y state, but changes the reaction time of the
50 filter.
51 Altering the time constant of a reactive filter without alterning internal
52 state is something that has to be done carefuly, but our design operates at
53 high enough delays and with small enough time constant changes to make it
54 safe.*/
oc_iir_filter_reinit(oc_iir_filter * _f,int _delay)55 static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){
56 int alpha;
57 ogg_int64_t one48;
58 ogg_int64_t warp;
59 ogg_int64_t k1;
60 ogg_int64_t k2;
61 ogg_int64_t d;
62 ogg_int64_t a;
63 ogg_int64_t ik2;
64 ogg_int64_t b1;
65 ogg_int64_t b2;
66 /*This borrows some code from an unreleased version of Postfish.
67 See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
68 on deriving the filter coefficients.*/
69 /*alpha is Q24*/
70 alpha=(1<<24)/_delay;
71 one48=(ogg_int64_t)1<<48;
72 /*warp is 7.12*/
73 warp=OC_MAXI(oc_warp_alpha(alpha),1);
74 /*k1 is 9.12*/
75 k1=3*warp;
76 /*k2 is 16.24.*/
77 k2=k1*warp;
78 /*d is 16.15.*/
79 d=((1<<12)+k1<<12)+k2+256>>9;
80 /*a is 0.32, since d is larger than both 1.0 and k2.*/
81 a=(k2<<23)/d;
82 /*ik2 is 25.24.*/
83 ik2=one48/k2;
84 /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
85 b1=2*a*(ik2-(1<<24));
86 /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
87 b2=(one48<<8)-(4*a<<24)-b1;
88 /*All of the filter parameters are Q24.*/
89 _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32);
90 _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32);
91 _f->g=(ogg_int32_t)(a+128>>8);
92 }
93
94 /*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
95 and initial value.
96 _value is Q24.*/
oc_iir_filter_init(oc_iir_filter * _f,int _delay,ogg_int32_t _value)97 static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){
98 oc_iir_filter_reinit(_f,_delay);
99 _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value;
100 }
101
oc_iir_filter_update(oc_iir_filter * _f,ogg_int32_t _x)102 static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){
103 ogg_int64_t c0;
104 ogg_int64_t c1;
105 ogg_int64_t g;
106 ogg_int64_t x0;
107 ogg_int64_t x1;
108 ogg_int64_t y0;
109 ogg_int64_t y1;
110 ogg_int64_t ya;
111 c0=_f->c[0];
112 c1=_f->c[1];
113 g=_f->g;
114 x0=_f->x[0];
115 x1=_f->x[1];
116 y0=_f->y[0];
117 y1=_f->y[1];
118 ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24;
119 _f->x[1]=(ogg_int32_t)x0;
120 _f->x[0]=_x;
121 _f->y[1]=(ogg_int32_t)y0;
122 _f->y[0]=(ogg_int32_t)ya;
123 return ya;
124 }
125
126
127
128 /*Search for the quantizer that matches the target most closely.
129 We don't assume a linear ordering, but when there are ties we pick the
130 quantizer closest to the old one.*/
oc_enc_find_qi_for_target(oc_enc_ctx * _enc,int _qti,int _qi_old,int _qi_min,ogg_int64_t _log_qtarget)131 static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old,
132 int _qi_min,ogg_int64_t _log_qtarget){
133 ogg_int64_t best_qdiff;
134 int best_qi;
135 int qi;
136 best_qi=_qi_min;
137 best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget;
138 best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff);
139 for(qi=_qi_min+1;qi<64;qi++){
140 ogg_int64_t qdiff;
141 qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget;
142 qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff);
143 if(qdiff<best_qdiff||
144 qdiff==best_qdiff&&abs(qi-_qi_old)<abs(best_qi-_qi_old)){
145 best_qi=qi;
146 best_qdiff=qdiff;
147 }
148 }
149 return best_qi;
150 }
151
oc_enc_calc_lambda(oc_enc_ctx * _enc,int _qti)152 void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
153 ogg_int64_t lq;
154 int qi;
155 int qi1;
156 int nqis;
157 /*For now, lambda is fixed depending on the qi value and frame type:
158 lambda=qscale*(qavg[qti][qi]**2),
159 where qscale=0.2125.
160 This was derived by exhaustively searching for the optimal quantizer for
161 the AC coefficients in each block from a number of test sequences for a
162 number of fixed lambda values and fitting the peaks of the resulting
163 histograms (on the log(qavg) scale).
164 The same model applies to both inter and intra frames.
165 A more adaptive scheme might perform better.*/
166 qi=_enc->state.qis[0];
167 /*If rate control is active, use the lambda for the _target_ quantizer.
168 This allows us to scale to rates slightly lower than we'd normally be able
169 to reach, and give the rate control a semblance of "fractional qi"
170 precision.
171 TODO: Add API for changing QI, and allow extra precision.*/
172 if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
173 else lq=_enc->log_qavg[_qti][qi];
174 /*The resulting lambda value is less than 0x500000.*/
175 _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
176 /*Select additional quantizers.
177 The R-D optimal block AC quantizer statistics suggest that the distribution
178 is roughly Gaussian-like with a slight positive skew.
179 K-means clustering on log_qavg to select 3 quantizers produces cluster
180 centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
181 Experiments confirm these are relatively good choices.
182
183 Although we do greedy R-D optimization of the qii flags to avoid switching
184 too frequently, this becomes ineffective at low rates, either because we
185 do a poor job of predicting the actual R-D cost, or the greedy
186 optimization is not sufficient.
187 Therefore adaptive quantization is disabled above an (experimentally
188 suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
189 INTER qi=20 with current matrices).
190 This may need to be revised if the R-D cost estimation or qii flag
191 optimization strategies change.*/
192 nqis=1;
193 if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){
194 qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
195 lq+(OC_Q57(7)+5)/10);
196 if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
197 qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
198 lq-(OC_Q57(6)+5)/10);
199 if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
200 }
201 _enc->state.nqis=nqis;
202 }
203
204 /*Binary exponential of _log_scale with 24-bit fractional precision and
205 saturation.
206 _log_scale: A binary logarithm in Q24 format.
207 Return: The binary exponential in Q24 format, saturated to 2**47-1 if
208 _log_scale was too large.*/
oc_bexp_q24(ogg_int32_t _log_scale)209 static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){
210 if(_log_scale<(ogg_int32_t)23<<24){
211 ogg_int64_t ret;
212 ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24));
213 return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL;
214 }
215 return 0x7FFFFFFFFFFFLL;
216 }
217
218 /*Convenience function converts Q57 value to a clamped 32-bit Q24 value
219 _in: input in Q57 format.
220 Return: same number in Q24 */
oc_q57_to_q24(ogg_int64_t _in)221 static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){
222 ogg_int64_t ret;
223 ret=_in+((ogg_int64_t)1<<32)>>33;
224 /*0x80000000 is automatically converted to unsigned on 32-bit systems.
225 -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
226 unsigned.*/
227 return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF);
228 }
229
230 /*Binary exponential of _log_scale with 24-bit fractional precision and
231 saturation.
232 _log_scale: A binary logarithm in Q57 format.
233 Return: The binary exponential in Q24 format, saturated to 2**31-1 if
234 _log_scale was too large.*/
oc_bexp64_q24(ogg_int64_t _log_scale)235 static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){
236 if(_log_scale<OC_Q57(8)){
237 ogg_int64_t ret;
238 ret=oc_bexp64(_log_scale+OC_Q57(24));
239 return ret<0x7FFFFFFF?(ogg_int32_t)ret:0x7FFFFFFF;
240 }
241 return 0x7FFFFFFF;
242 }
243
244
oc_enc_rc_reset(oc_enc_ctx * _enc)245 static void oc_enc_rc_reset(oc_enc_ctx *_enc){
246 ogg_int64_t npixels;
247 ogg_int64_t ibpp;
248 int inter_delay;
249 /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
250 _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
251 (ogg_int64_t)_enc->state.info.fps_denominator)/
252 _enc->state.info.fps_numerator;
253 /*Insane framerates or frame sizes mean insane bitrates.
254 Let's not get carried away.*/
255 if(_enc->rc.bits_per_frame>0x400000000000LL){
256 _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
257 }
258 else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
259 _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
260 _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
261 /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
262 on a single keyframe interval.
263 We can require fully half the bits in an interval for a keyframe, so this
264 initial level gives us maximum flexibility for over/under-shooting in
265 subsequent frames.*/
266 _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
267 OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
268 _enc->rc.fullness=_enc->rc.target;
269 /*Pick exponents and initial scales for quantizer selection.*/
270 npixels=_enc->state.info.frame_width*
271 (ogg_int64_t)_enc->state.info.frame_height;
272 _enc->rc.log_npixels=oc_blog64(npixels);
273 ibpp=npixels/_enc->rc.bits_per_frame;
274 if(ibpp<1){
275 _enc->rc.exp[0]=59;
276 _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
277 }
278 else if(ibpp<2){
279 _enc->rc.exp[0]=55;
280 _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
281 }
282 else{
283 _enc->rc.exp[0]=48;
284 _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
285 }
286 if(ibpp<4){
287 _enc->rc.exp[1]=100;
288 _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
289 }
290 else if(ibpp<8){
291 _enc->rc.exp[1]=95;
292 _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
293 }
294 else{
295 _enc->rc.exp[1]=73;
296 _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
297 }
298 _enc->rc.prev_drop_count=0;
299 _enc->rc.log_drop_scale=OC_Q57(0);
300 /*Set up second order followers, initialized according to corresponding
301 time constants.*/
302 oc_iir_filter_init(&_enc->rc.scalefilter[0],4,
303 oc_q57_to_q24(_enc->rc.log_scale[0]));
304 inter_delay=(_enc->rc.twopass?
305 OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1;
306 _enc->rc.inter_count=0;
307 /*We clamp the actual inter_delay to a minimum of 10 to work within the range
308 of values where later incrementing the delay works as designed.
309 10 is not an exact choice, but rather a good working trade-off.*/
310 _enc->rc.inter_delay=10;
311 _enc->rc.inter_delay_target=inter_delay;
312 oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay,
313 oc_q57_to_q24(_enc->rc.log_scale[1]));
314 oc_iir_filter_init(&_enc->rc.vfrfilter,4,
315 oc_bexp64_q24(_enc->rc.log_drop_scale));
316 }
317
oc_rc_state_init(oc_rc_state * _rc,oc_enc_ctx * _enc)318 void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){
319 _rc->twopass=0;
320 _rc->twopass_buffer_bytes=0;
321 _rc->twopass_force_kf=0;
322 _rc->frame_metrics=NULL;
323 _rc->rate_bias=0;
324 if(_enc->state.info.target_bitrate>0){
325 /*The buffer size is set equal to the keyframe interval, clamped to the
326 range [12,256] frames.
327 The 12 frame minimum gives us some chance to distribute bit estimation
328 errors.
329 The 256 frame maximum means we'll require 8-10 seconds of pre-buffering
330 at 24-30 fps, which is not unreasonable.*/
331 _rc->buf_delay=_enc->keyframe_frequency_force>256?
332 256:_enc->keyframe_frequency_force;
333 /*By default, enforce all buffer constraints.*/
334 _rc->drop_frames=1;
335 _rc->cap_overflow=1;
336 _rc->cap_underflow=0;
337 oc_enc_rc_reset(_enc);
338 }
339 }
340
oc_rc_state_clear(oc_rc_state * _rc)341 void oc_rc_state_clear(oc_rc_state *_rc){
342 _ogg_free(_rc->frame_metrics);
343 }
344
oc_enc_rc_resize(oc_enc_ctx * _enc)345 void oc_enc_rc_resize(oc_enc_ctx *_enc){
346 /*If encoding has not yet begun, reset the buffer state.*/
347 if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
348 else{
349 int idt;
350 /*Otherwise, update the bounds on the buffer, but not the current
351 fullness.*/
352 _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
353 (ogg_int64_t)_enc->state.info.fps_denominator)/
354 _enc->state.info.fps_numerator;
355 /*Insane framerates or frame sizes mean insane bitrates.
356 Let's not get carried away.*/
357 if(_enc->rc.bits_per_frame>0x400000000000LL){
358 _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
359 }
360 else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
361 _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
362 _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
363 _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
364 OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
365 /*Update the INTER-frame scale filter delay.
366 We jump to it immediately if we've already seen enough frames; otherwise
367 it is simply set as the new target.*/
368 _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10);
369 if(idt<OC_MINI(_enc->rc.inter_delay,_enc->rc.inter_count)){
370 oc_iir_filter_init(&_enc->rc.scalefilter[1],idt,
371 _enc->rc.scalefilter[1].y[0]);
372 _enc->rc.inter_delay=idt;
373 }
374 }
375 /*If we're in pass-2 mode, make sure the frame metrics array is big enough
376 to hold frame statistics for the full buffer.*/
377 if(_enc->rc.twopass==2){
378 int cfm;
379 int buf_delay;
380 int reset_window;
381 buf_delay=_enc->rc.buf_delay;
382 reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
383 buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
384 +_enc->rc.frames_total[2]);
385 cfm=_enc->rc.cframe_metrics;
386 /*Only try to resize the frame metrics buffer if a) it's too small and
387 b) we were using a finite buffer, or are about to start.*/
388 if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
389 oc_frame_metrics *fm;
390 int nfm;
391 int fmh;
392 fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
393 buf_delay*sizeof(*_enc->rc.frame_metrics));
394 if(fm==NULL){
395 /*We failed to allocate a finite buffer.*/
396 /*If we don't have a valid 2-pass header yet, just return; we'll reset
397 the buffer size when we read the header.*/
398 if(_enc->rc.frames_total[0]==0)return;
399 /*Otherwise revert to the largest finite buffer previously set, or to
400 whole-file buffering if we were still using that.*/
401 _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
402 cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
403 +_enc->rc.frames_total[2];
404 oc_enc_rc_resize(_enc);
405 return;
406 }
407 _enc->rc.frame_metrics=fm;
408 _enc->rc.cframe_metrics=buf_delay;
409 /*Re-organize the circular buffer.*/
410 fmh=_enc->rc.frame_metrics_head;
411 nfm=_enc->rc.nframe_metrics;
412 if(fmh+nfm>cfm){
413 int shift;
414 shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
415 memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
416 if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
417 }
418 }
419 /*We were using whole-file buffering; now we're not.*/
420 if(reset_window){
421 _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
422 _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
423 _enc->rc.scale_window_end=_enc->rc.scale_window0=
424 _enc->state.curframe_num+_enc->prev_dup_count+1;
425 if(_enc->rc.twopass_buffer_bytes){
426 int qti;
427 /*We already read the metrics for the first frame in the window.*/
428 *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
429 _enc->rc.nframe_metrics++;
430 qti=_enc->rc.cur_metrics.frame_type;
431 _enc->rc.nframes[qti]++;
432 _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
433 _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
434 _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
435 if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
436 /*We need more frame data.*/
437 _enc->rc.twopass_buffer_bytes=0;
438 }
439 }
440 }
441 /*Otherwise, we could shrink the size of the current window, if necessary,
442 but leaving it like it is lets us adapt to the new buffer size more
443 gracefully.*/
444 }
445 }
446
447 /*Scale the number of frames by the number of expected drops/duplicates.*/
oc_rc_scale_drop(oc_rc_state * _rc,int _nframes)448 static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){
449 if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){
450 ogg_int64_t dup_scale;
451 dup_scale=oc_bexp64((_rc->log_drop_scale
452 +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8));
453 if(dup_scale<_nframes<<8){
454 int dup_scalei;
455 dup_scalei=(int)dup_scale;
456 if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei;
457 }
458 else _nframes=!!_nframes;
459 }
460 return _nframes;
461 }
462
oc_enc_select_qi(oc_enc_ctx * _enc,int _qti,int _clamp)463 int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){
464 ogg_int64_t rate_total;
465 ogg_int64_t rate_bias;
466 int nframes[2];
467 int buf_delay;
468 int buf_pad;
469 ogg_int64_t log_qtarget;
470 ogg_int64_t log_scale0;
471 ogg_int64_t log_cur_scale;
472 ogg_int64_t log_qexp;
473 int exp0;
474 int old_qi;
475 int qi;
476 /*Figure out how to re-distribute bits so that we hit our fullness target
477 before the last keyframe in our current buffer window (after the current
478 frame), or the end of the buffer window, whichever comes first.*/
479 log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33;
480 buf_pad=0;
481 switch(_enc->rc.twopass){
482 default:{
483 ogg_uint32_t next_key_frame;
484 /*Single pass mode: assume only forced keyframes and attempt to estimate
485 the drop count for VFR content.*/
486 next_key_frame=_qti?_enc->keyframe_frequency_force
487 -(_enc->state.curframe_num-_enc->state.keyframe_num):0;
488 nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay)
489 +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force;
490 if(nframes[0]+_qti>1){
491 nframes[0]--;
492 buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force;
493 }
494 else buf_delay=_enc->rc.buf_delay;
495 nframes[1]=buf_delay-nframes[0];
496 /*Downgrade the delta frame rate to correspond to the recent drop count
497 history.*/
498 nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]);
499 }break;
500 case 1:{
501 /*Pass 1 mode: use a fixed qi value.*/
502 qi=_enc->state.qis[0];
503 _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi];
504 return qi;
505 }break;
506 case 2:{
507 ogg_int64_t scale_sum[2];
508 int qti;
509 /*Pass 2 mode: we know exactly how much of each frame type there is in
510 the current buffer window, and have estimates for the scales.*/
511 nframes[0]=_enc->rc.nframes[0];
512 nframes[1]=_enc->rc.nframes[1];
513 scale_sum[0]=_enc->rc.scale_sum[0];
514 scale_sum[1]=_enc->rc.scale_sum[1];
515 /*The window size can be slightly larger than the buffer window for VFR
516 content; clamp it down, if appropriate (the excess will all be dup
517 frames).*/
518 buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0,
519 _enc->rc.buf_delay);
520 /*If we're approaching the end of the file, add some slack to keep us
521 from slamming into a rail.
522 Our rate accuracy goes down, but it keeps the result sensible.
523 We position the target where the first forced keyframe beyond the end
524 of the file would be (for consistency with 1-pass mode).*/
525 buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num
526 +_enc->keyframe_frequency_force-_enc->rc.scale_window0);
527 if(buf_delay<buf_pad)buf_pad-=buf_delay;
528 else{
529 /*Otherwise, search for the last keyframe in the buffer window and
530 target that.*/
531 buf_pad=0;
532 /*TODO: Currently we only do this when using a finite buffer; we could
533 save the position of the last keyframe in the summary data and do it
534 with a whole-file buffer as well, but it isn't likely to make a
535 difference.*/
536 if(_enc->rc.frame_metrics!=NULL){
537 int fmi;
538 int fm_tail;
539 fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics;
540 if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics;
541 for(fmi=fm_tail;;){
542 oc_frame_metrics *m;
543 fmi--;
544 if(fmi<0)fmi+=_enc->rc.cframe_metrics;
545 /*Stop before we remove the first frame.*/
546 if(fmi==_enc->rc.frame_metrics_head)break;
547 m=_enc->rc.frame_metrics+fmi;
548 /*If we find a keyframe, remove it and everything past it.*/
549 if(m->frame_type==OC_INTRA_FRAME){
550 do{
551 qti=m->frame_type;
552 nframes[qti]--;
553 scale_sum[qti]-=oc_bexp_q24(m->log_scale);
554 buf_delay-=m->dup_count+1;
555 fmi++;
556 if(fmi>=_enc->rc.cframe_metrics)fmi=0;
557 m=_enc->rc.frame_metrics+fmi;
558 }
559 while(fmi!=fm_tail);
560 /*And stop scanning backwards.*/
561 break;
562 }
563 }
564 }
565 }
566 /*If we're not using the same frame type as in pass 1 (because someone
567 changed the keyframe interval), remove that scale estimate.
568 We'll add in a replacement for the correct frame type below.*/
569 qti=_enc->rc.cur_metrics.frame_type;
570 if(qti!=_qti){
571 nframes[qti]--;
572 scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
573 }
574 /*Compute log_scale estimates for each frame type from the pass-1 scales
575 we measured in the current window.*/
576 for(qti=0;qti<2;qti++){
577 _enc->rc.log_scale[qti]=nframes[qti]>0?
578 oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24):
579 -_enc->rc.log_npixels;
580 }
581 /*If we're not using the same frame type as in pass 1, add a scale
582 estimate for the corresponding frame using the current low-pass
583 filter value.
584 This is mostly to ensure we have a valid estimate even when pass 1 had
585 no frames of this type in the buffer window.
586 TODO: We could also plan ahead and figure out how many keyframes we'll
587 be forced to add in the current buffer window.*/
588 qti=_enc->rc.cur_metrics.frame_type;
589 if(qti!=_qti){
590 ogg_int64_t scale;
591 scale=_enc->rc.log_scale[_qti]<OC_Q57(23)?
592 oc_bexp64(_enc->rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL;
593 scale*=nframes[_qti];
594 nframes[_qti]++;
595 scale+=oc_bexp_q24(log_cur_scale>>33);
596 _enc->rc.log_scale[_qti]=oc_blog64(scale)
597 -oc_blog64(nframes[qti])-OC_Q57(24);
598 }
599 else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33;
600 /*Add the padding from above.
601 This basically reverts to 1-pass estimations in the last keyframe
602 interval.*/
603 if(buf_pad>0){
604 ogg_int64_t scale;
605 int nextra_frames;
606 /*Extend the buffer.*/
607 buf_delay+=buf_pad;
608 /*Add virtual delta frames according to the estimated drop count.*/
609 nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad);
610 /*And blend in the low-pass filtered scale according to how many frames
611 we added.*/
612 scale=
613 oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1]
614 +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames;
615 nframes[1]+=nextra_frames;
616 _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24);
617 }
618 }break;
619 }
620 /*If we've been missing our target, add a penalty term.*/
621 rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))*
622 (buf_delay-buf_pad);
623 /*rate_total is the total bits available over the next buf_delay frames.*/
624 rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias
625 +buf_delay*_enc->rc.bits_per_frame;
626 log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels;
627 /*If there aren't enough bits to achieve our desired fullness level, use the
628 minimum quality permitted.*/
629 if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG;
630 else{
631 static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL;
632 ogg_int64_t log_scale1;
633 ogg_int64_t rlo;
634 ogg_int64_t rhi;
635 log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels;
636 rlo=0;
637 rhi=(rate_total+nframes[_qti]-1)/nframes[_qti];
638 while(rlo<rhi){
639 ogg_int64_t curr;
640 ogg_int64_t rdiff;
641 ogg_int64_t log_rpow;
642 ogg_int64_t rscale;
643 curr=rlo+rhi>>1;
644 log_rpow=oc_blog64(curr)-log_scale0;
645 log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti];
646 if(_qti)log_rpow+=LOG_KEY_RATIO>>6;
647 else log_rpow-=LOG_KEY_RATIO>>6;
648 log_rpow*=_enc->rc.exp[1-_qti];
649 rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow);
650 rdiff=nframes[_qti]*curr+rscale-rate_total;
651 if(rdiff<0)rlo=curr+1;
652 else if(rdiff>0)rhi=curr-1;
653 else break;
654 }
655 log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/
656 _enc->rc.exp[_qti]<<6);
657 log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
658 }
659 /*The above allocation looks only at the total rate we'll accumulate in the
660 next buf_delay frames.
661 However, we could overflow the buffer on the very next frame, so check for
662 that here, if we're not using a soft target.*/
663 exp0=_enc->rc.exp[_qti];
664 if(_enc->rc.cap_overflow){
665 ogg_int64_t margin;
666 ogg_int64_t soft_limit;
667 ogg_int64_t log_soft_limit;
668 /*Allow 3% of the buffer for prediction error.
669 This should be plenty, and we don't mind if we go a bit over; we only
670 want to keep these bits from being completely wasted.*/
671 margin=_enc->rc.max+31>>5;
672 /*We want to use at least this many bits next frame.*/
673 soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin);
674 log_soft_limit=oc_blog64(soft_limit);
675 /*If we're predicting we won't use that many...*/
676 log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
677 if(log_scale0-log_qexp<log_soft_limit){
678 /*Scale the adjustment based on how far into the margin we are.*/
679 log_qexp+=(log_scale0-log_soft_limit-log_qexp>>32)*
680 ((OC_MINI(margin,soft_limit)<<32)/margin);
681 log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
682 }
683 }
684 /*If this was not one of the initial frames, limit the change in quality.*/
685 old_qi=_enc->state.qis[0];
686 if(_clamp){
687 ogg_int64_t log_qmin;
688 ogg_int64_t log_qmax;
689 /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the
690 current quantizer.
691 TODO: With user-specified quant matrices, we need to enlarge these limits
692 if they don't actually let us change qi values.*/
693 log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL;
694 log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL;
695 log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax);
696 }
697 /*The above allocation looks only at the total rate we'll accumulate in the
698 next buf_delay frames.
699 However, we could bust the budget on the very next frame, so check for that
700 here, if we're not using a soft target.*/
701 /* Disabled when our minimum qi > 0; if we saturate log_qtarget to
702 to the maximum possible size when we have a minimum qi, the
703 resulting lambda will interact very strangely with SKIP. The
704 resulting artifacts look like waterfalls. */
705 if(_enc->state.info.quality==0){
706 ogg_int64_t log_hard_limit;
707 /*Compute the maximum number of bits we can use in the next frame.
708 Allow 50% of the rate for a single frame for prediction error.
709 This may not be enough for keyframes or sudden changes in complexity.*/
710 log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1));
711 /*If we're predicting we'll use more than this...*/
712 log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
713 if(log_scale0-log_qexp>log_hard_limit){
714 /*Force the target to hit our limit exactly.*/
715 log_qexp=log_scale0-log_hard_limit;
716 log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
717 /*If that target is unreasonable, oh well; we'll have to drop.*/
718 log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
719 }
720 }
721 /*Compute a final estimate of the number of bits we plan to use.*/
722 log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti];
723 _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp);
724 qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi,
725 _enc->state.info.quality,log_qtarget);
726 /*Save the quantizer target for lambda calculations.*/
727 _enc->rc.log_qtarget=log_qtarget;
728 return qi;
729 }
730
oc_enc_update_rc_state(oc_enc_ctx * _enc,long _bits,int _qti,int _qi,int _trial,int _droppable)731 int oc_enc_update_rc_state(oc_enc_ctx *_enc,
732 long _bits,int _qti,int _qi,int _trial,int _droppable){
733 ogg_int64_t buf_delta;
734 ogg_int64_t log_scale;
735 int dropped;
736 dropped=0;
737 /* Drop frames also disabled for now in the case of infinite-buffer
738 two-pass mode */
739 if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){
740 _droppable=0;
741 }
742 buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count);
743 if(_bits<=0){
744 /*We didn't code any blocks in this frame.*/
745 log_scale=OC_Q57(-64);
746 _bits=0;
747 }
748 else{
749 ogg_int64_t log_bits;
750 ogg_int64_t log_qexp;
751 /*Compute the estimated scale factor for this frame type.*/
752 log_bits=oc_blog64(_bits);
753 log_qexp=_enc->rc.log_qtarget-OC_Q57(2);
754 log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]);
755 log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16));
756 }
757 /*Special two-pass processing.*/
758 switch(_enc->rc.twopass){
759 case 1:{
760 /*Pass 1 mode: save the metrics for this frame.*/
761 _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale);
762 _enc->rc.cur_metrics.dup_count=_enc->dup_count;
763 _enc->rc.cur_metrics.frame_type=_enc->state.frame_type;
764 _enc->rc.twopass_buffer_bytes=0;
765 }break;
766 case 2:{
767 /*Pass 2 mode:*/
768 if(!_trial){
769 ogg_int64_t next_frame_num;
770 int qti;
771 /*Move the current metrics back one frame.*/
772 *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics;
773 next_frame_num=_enc->state.curframe_num+_enc->dup_count+1;
774 /*Back out the last frame's statistics from the sliding window.*/
775 qti=_enc->rc.prev_metrics.frame_type;
776 _enc->rc.frames_left[qti]--;
777 _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count;
778 _enc->rc.nframes[qti]--;
779 _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count;
780 _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale);
781 _enc->rc.scale_window0=(int)next_frame_num;
782 /*Free the corresponding entry in the circular buffer.*/
783 if(_enc->rc.frame_metrics!=NULL){
784 _enc->rc.nframe_metrics--;
785 _enc->rc.frame_metrics_head++;
786 if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){
787 _enc->rc.frame_metrics_head=0;
788 }
789 }
790 /*Mark us ready for the next 2-pass packet.*/
791 _enc->rc.twopass_buffer_bytes=0;
792 /*Update state, so the user doesn't have to keep calling 2pass_in after
793 they've fed in all the data when we're using a finite buffer.*/
794 _enc->prev_dup_count=_enc->dup_count;
795 oc_enc_rc_2pass_in(_enc,NULL,0);
796 }
797 }break;
798 }
799 /*Common to all passes:*/
800 if(_bits>0){
801 if(_trial){
802 oc_iir_filter *f;
803 /*Use the estimated scale factor directly if this was a trial.*/
804 f=_enc->rc.scalefilter+_qti;
805 f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale);
806 _enc->rc.log_scale[_qti]=log_scale;
807 }
808 else{
809 /*Lengthen the time constant for the INTER filter as we collect more
810 frame statistics, until we reach our target.*/
811 if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&&
812 _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){
813 oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay);
814 }
815 /*Otherwise update the low-pass scale filter for this frame type,
816 regardless of whether or not we dropped this frame.*/
817 _enc->rc.log_scale[_qti]=oc_iir_filter_update(
818 _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33;
819 /*If this frame busts our budget, it must be dropped.*/
820 if(_droppable&&_enc->rc.fullness+buf_delta<_bits){
821 _enc->rc.prev_drop_count+=1+_enc->dup_count;
822 _bits=0;
823 dropped=1;
824 }
825 else{
826 ogg_uint32_t drop_count;
827 /*Update a low-pass filter to estimate the "real" frame rate taking
828 drops and duplicates into account.
829 This is only done if the frame is coded, as it needs the final
830 count of dropped frames.*/
831 drop_count=_enc->rc.prev_drop_count+1;
832 if(drop_count>0x7F)drop_count=0x7FFFFFFF;
833 else drop_count<<=24;
834 _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update(
835 &_enc->rc.vfrfilter,drop_count))-OC_Q57(24);
836 /*Initialize the drop count for this frame to the user-requested dup
837 count.
838 It will be increased if we drop more frames.*/
839 _enc->rc.prev_drop_count=_enc->dup_count;
840 }
841 }
842 /*Increment the INTER frame count, for filter adaptation purposes.*/
843 if(_enc->rc.inter_count<INT_MAX)_enc->rc.inter_count+=_qti;
844 }
845 /*Increase the drop count.*/
846 else _enc->rc.prev_drop_count+=1+_enc->dup_count;
847 /*And update the buffer fullness level.*/
848 if(!_trial){
849 _enc->rc.fullness+=buf_delta-_bits;
850 /*If we're too quick filling the buffer and overflow is capped,
851 that rate is lost forever.*/
852 if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){
853 _enc->rc.fullness=_enc->rc.max;
854 }
855 /*If we're too quick draining the buffer and underflow is capped,
856 don't try to make up that rate later.*/
857 if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){
858 _enc->rc.fullness=0;
859 }
860 /*Adjust the bias for the real bits we've used.*/
861 _enc->rc.rate_bias-=_bits;
862 }
863 return dropped;
864 }
865
866 #define OC_RC_2PASS_VERSION (1)
867 #define OC_RC_2PASS_HDR_SZ (38)
868 #define OC_RC_2PASS_PACKET_SZ (8)
869
oc_rc_buffer_val(oc_rc_state * _rc,ogg_int64_t _val,int _bytes)870 static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){
871 while(_bytes-->0){
872 _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF);
873 _val>>=8;
874 }
875 }
876
oc_enc_rc_2pass_out(oc_enc_ctx * _enc,unsigned char ** _buf)877 int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){
878 if(_enc->rc.twopass_buffer_bytes==0){
879 if(_enc->rc.twopass==0){
880 int qi;
881 /*Pick first-pass qi for scale calculations.*/
882 qi=oc_enc_select_qi(_enc,0,0);
883 _enc->state.nqis=1;
884 _enc->state.qis[0]=qi;
885 _enc->rc.twopass=1;
886 _enc->rc.frames_total[0]=_enc->rc.frames_total[1]=
887 _enc->rc.frames_total[2]=0;
888 _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
889 /*Fill in dummy summary values.*/
890 oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
891 oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
892 oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8);
893 }
894 else{
895 int qti;
896 qti=_enc->rc.cur_metrics.frame_type;
897 _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
898 _enc->rc.frames_total[qti]++;
899 _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count;
900 oc_rc_buffer_val(&_enc->rc,
901 _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4);
902 oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4);
903 }
904 }
905 else if(_enc->packet_state==OC_PACKET_DONE&&
906 _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){
907 _enc->rc.twopass_buffer_bytes=0;
908 oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
909 oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
910 oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4);
911 oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4);
912 oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4);
913 oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1);
914 oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1);
915 oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8);
916 oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8);
917 }
918 else{
919 /*The data for this frame has already been retrieved.*/
920 *_buf=NULL;
921 return 0;
922 }
923 *_buf=_enc->rc.twopass_buffer;
924 return _enc->rc.twopass_buffer_bytes;
925 }
926
oc_rc_buffer_fill(oc_rc_state * _rc,unsigned char * _buf,size_t _bytes,size_t _consumed,size_t _goal)927 static size_t oc_rc_buffer_fill(oc_rc_state *_rc,
928 unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){
929 while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){
930 _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++];
931 }
932 return _consumed;
933 }
934
oc_rc_unbuffer_val(oc_rc_state * _rc,int _bytes)935 static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){
936 ogg_int64_t ret;
937 int shift;
938 ret=0;
939 shift=0;
940 while(_bytes-->0){
941 ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<<shift;
942 shift+=8;
943 }
944 return ret;
945 }
946
oc_enc_rc_2pass_in(oc_enc_ctx * _enc,unsigned char * _buf,size_t _bytes)947 int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes){
948 size_t consumed;
949 consumed=0;
950 /*Enable pass 2 mode if this is the first call.*/
951 if(_enc->rc.twopass==0){
952 _enc->rc.twopass=2;
953 _enc->rc.twopass_buffer_fill=0;
954 _enc->rc.frames_total[0]=0;
955 _enc->rc.nframe_metrics=0;
956 _enc->rc.cframe_metrics=0;
957 _enc->rc.frame_metrics_head=0;
958 _enc->rc.scale_window0=0;
959 _enc->rc.scale_window_end=0;
960 }
961 /*If we haven't got a valid summary header yet, try to parse one.*/
962 if(_enc->rc.frames_total[0]==0){
963 if(!_buf){
964 int frames_needed;
965 /*If we're using a whole-file buffer, we just need the first frame.
966 Otherwise, we may need as many as one per buffer slot.*/
967 frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay;
968 return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ
969 -_enc->rc.twopass_buffer_fill;
970 }
971 consumed=oc_rc_buffer_fill(&_enc->rc,
972 _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ);
973 if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){
974 ogg_int64_t scale_sum[2];
975 int exp[2];
976 int buf_delay;
977 /*Read the summary header data.*/
978 /*Check the magic value and version number.*/
979 if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F||
980 oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){
981 _enc->rc.twopass_buffer_bytes=0;
982 return TH_ENOTFORMAT;
983 }
984 _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
985 _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
986 _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
987 exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
988 exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
989 scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8);
990 scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8);
991 /*Make sure the file claims to have at least one frame.
992 Otherwise we probably got the placeholder data from an aborted pass 1.
993 Also make sure the total frame count doesn't overflow an integer.*/
994 buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
995 +_enc->rc.frames_total[2];
996 if(_enc->rc.frames_total[0]==0||buf_delay<0||
997 (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]||
998 (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){
999 _enc->rc.frames_total[0]=0;
1000 _enc->rc.twopass_buffer_bytes=0;
1001 return TH_EBADHEADER;
1002 }
1003 /*Got a valid header; set up pass 2.*/
1004 _enc->rc.frames_left[0]=_enc->rc.frames_total[0];
1005 _enc->rc.frames_left[1]=_enc->rc.frames_total[1];
1006 _enc->rc.frames_left[2]=_enc->rc.frames_total[2];
1007 /*If the user hasn't specified a buffer size, use the whole file.*/
1008 if(_enc->rc.frame_metrics==NULL){
1009 _enc->rc.buf_delay=buf_delay;
1010 _enc->rc.nframes[0]=_enc->rc.frames_total[0];
1011 _enc->rc.nframes[1]=_enc->rc.frames_total[1];
1012 _enc->rc.nframes[2]=_enc->rc.frames_total[2];
1013 _enc->rc.scale_sum[0]=scale_sum[0];
1014 _enc->rc.scale_sum[1]=scale_sum[1];
1015 _enc->rc.scale_window_end=buf_delay;
1016 oc_enc_rc_reset(_enc);
1017 }
1018 _enc->rc.exp[0]=exp[0];
1019 _enc->rc.exp[1]=exp[1];
1020 /*Clear the header data from the buffer to make room for packet data.*/
1021 _enc->rc.twopass_buffer_fill=0;
1022 _enc->rc.twopass_buffer_bytes=0;
1023 }
1024 }
1025 if(_enc->rc.frames_total[0]!=0){
1026 ogg_int64_t curframe_num;
1027 int nframes_total;
1028 curframe_num=_enc->state.curframe_num;
1029 if(curframe_num>=0){
1030 /*We just encoded a frame; make sure things matched.*/
1031 if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){
1032 _enc->rc.twopass_buffer_bytes=0;
1033 return TH_EINVAL;
1034 }
1035 }
1036 curframe_num+=_enc->prev_dup_count+1;
1037 nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
1038 +_enc->rc.frames_total[2];
1039 if(curframe_num>=nframes_total){
1040 /*We don't want any more data after the last frame, and we don't want to
1041 allow any more frames to be encoded.*/
1042 _enc->rc.twopass_buffer_bytes=0;
1043 }
1044 else if(_enc->rc.twopass_buffer_bytes==0){
1045 if(_enc->rc.frame_metrics==NULL){
1046 /*We're using a whole-file buffer:*/
1047 if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill;
1048 consumed=oc_rc_buffer_fill(&_enc->rc,
1049 _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
1050 if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
1051 ogg_uint32_t dup_count;
1052 ogg_int32_t log_scale;
1053 int qti;
1054 int arg;
1055 /*Read the metrics for the next frame.*/
1056 dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
1057 log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
1058 _enc->rc.cur_metrics.log_scale=log_scale;
1059 qti=(dup_count&0x80000000)>>31;
1060 _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF;
1061 _enc->rc.cur_metrics.frame_type=qti;
1062 _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME;
1063 /*"Helpfully" set the dup count back to what it was in pass 1.*/
1064 arg=_enc->rc.cur_metrics.dup_count;
1065 th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
1066 /*Clear the buffer for the next frame.*/
1067 _enc->rc.twopass_buffer_fill=0;
1068 }
1069 }
1070 else{
1071 int frames_needed;
1072 /*We're using a finite buffer:*/
1073 frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
1074 -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
1075 _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
1076 -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
1077 while(frames_needed>0){
1078 if(!_buf){
1079 return OC_RC_2PASS_PACKET_SZ*frames_needed
1080 -_enc->rc.twopass_buffer_fill;
1081 }
1082 consumed=oc_rc_buffer_fill(&_enc->rc,
1083 _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
1084 if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
1085 oc_frame_metrics *m;
1086 int fmi;
1087 ogg_uint32_t dup_count;
1088 ogg_int32_t log_scale;
1089 int qti;
1090 /*Read the metrics for the next frame.*/
1091 dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
1092 log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
1093 /*Add the to the circular buffer.*/
1094 fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++;
1095 if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics;
1096 m=_enc->rc.frame_metrics+fmi;
1097 m->log_scale=log_scale;
1098 qti=(dup_count&0x80000000)>>31;
1099 m->dup_count=dup_count&0x7FFFFFFF;
1100 m->frame_type=qti;
1101 /*And accumulate the statistics over the window.*/
1102 _enc->rc.nframes[qti]++;
1103 _enc->rc.nframes[2]+=m->dup_count;
1104 _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale);
1105 _enc->rc.scale_window_end+=m->dup_count+1;
1106 /*Compute an upper bound on the number of remaining packets needed
1107 for the current window.*/
1108 frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
1109 -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
1110 _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
1111 -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
1112 /*Clear the buffer for the next frame.*/
1113 _enc->rc.twopass_buffer_fill=0;
1114 _enc->rc.twopass_buffer_bytes=0;
1115 }
1116 /*Go back for more data.*/
1117 else break;
1118 }
1119 /*If we've got all the frames we need, fill in the current metrics.
1120 We're ready to go.*/
1121 if(frames_needed<=0){
1122 int arg;
1123 *&_enc->rc.cur_metrics=
1124 *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head);
1125 _enc->rc.twopass_force_kf=
1126 _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME;
1127 /*"Helpfully" set the dup count back to what it was in pass 1.*/
1128 arg=_enc->rc.cur_metrics.dup_count;
1129 th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
1130 /*Mark us ready for the next frame.*/
1131 _enc->rc.twopass_buffer_bytes=1;
1132 }
1133 }
1134 }
1135 }
1136 return (int)consumed;
1137 }
1138