1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9  * by the Xiph.Org Foundation http://www.xiph.org/                  *
10  *                                                                  *
11  ********************************************************************
12 
13   function:
14   last mod: $Id: rate.c 16503 2009-08-22 18:14:02Z giles $
15 
16  ********************************************************************/
17 #include <stdlib.h>
18 #include <string.h>
19 #include "encint.h"
20 
21 /*A rough lookup table for tan(x), 0<=x<pi/2.
22   The values are Q12 fixed-point and spaced at 5 degree intervals.
23   These decisions are somewhat arbitrary, but sufficient for the 2nd order
24    Bessel follower below.
25   Values of x larger than 85 degrees are extrapolated from the last inteval,
26    which is way off, but "good enough".*/
27 static unsigned short OC_ROUGH_TAN_LOOKUP[18]={
28       0,  358,  722, 1098, 1491, 1910,
29    2365, 2868, 3437, 4096, 4881, 5850,
30    7094, 8784,11254,15286,23230,46817
31 };
32 
33 /*_alpha is Q24 in the range [0,0.5).
34   The return values is 5.12.*/
oc_warp_alpha(int _alpha)35 static int oc_warp_alpha(int _alpha){
36   int i;
37   int d;
38   int t0;
39   int t1;
40   i=_alpha*36>>24;
41   if(i>=17)i=16;
42   t0=OC_ROUGH_TAN_LOOKUP[i];
43   t1=OC_ROUGH_TAN_LOOKUP[i+1];
44   d=_alpha*36-(i<<24);
45   return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32);
46 }
47 
48 /*Re-initialize the Bessel filter coefficients with the specified delay.
49   This does not alter the x/y state, but changes the reaction time of the
50    filter.
51   Altering the time constant of a reactive filter without alterning internal
52    state is something that has to be done carefuly, but our design operates at
53    high enough delays and with small enough time constant changes to make it
54    safe.*/
oc_iir_filter_reinit(oc_iir_filter * _f,int _delay)55 static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){
56   int         alpha;
57   ogg_int64_t one48;
58   ogg_int64_t warp;
59   ogg_int64_t k1;
60   ogg_int64_t k2;
61   ogg_int64_t d;
62   ogg_int64_t a;
63   ogg_int64_t ik2;
64   ogg_int64_t b1;
65   ogg_int64_t b2;
66   /*This borrows some code from an unreleased version of Postfish.
67     See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
68      on deriving the filter coefficients.*/
69   /*alpha is Q24*/
70   alpha=(1<<24)/_delay;
71   one48=(ogg_int64_t)1<<48;
72   /*warp is 7.12*/
73   warp=OC_MAXI(oc_warp_alpha(alpha),1);
74   /*k1 is 9.12*/
75   k1=3*warp;
76   /*k2 is 16.24.*/
77   k2=k1*warp;
78   /*d is 16.15.*/
79   d=((1<<12)+k1<<12)+k2+256>>9;
80   /*a is 0.32, since d is larger than both 1.0 and k2.*/
81   a=(k2<<23)/d;
82   /*ik2 is 25.24.*/
83   ik2=one48/k2;
84   /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
85   b1=2*a*(ik2-(1<<24));
86   /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
87   b2=(one48<<8)-(4*a<<24)-b1;
88   /*All of the filter parameters are Q24.*/
89   _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32);
90   _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32);
91   _f->g=(ogg_int32_t)(a+128>>8);
92 }
93 
94 /*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
95    and initial value.
96   _value is Q24.*/
oc_iir_filter_init(oc_iir_filter * _f,int _delay,ogg_int32_t _value)97 static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){
98   oc_iir_filter_reinit(_f,_delay);
99   _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value;
100 }
101 
oc_iir_filter_update(oc_iir_filter * _f,ogg_int32_t _x)102 static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){
103   ogg_int64_t c0;
104   ogg_int64_t c1;
105   ogg_int64_t g;
106   ogg_int64_t x0;
107   ogg_int64_t x1;
108   ogg_int64_t y0;
109   ogg_int64_t y1;
110   ogg_int64_t ya;
111   c0=_f->c[0];
112   c1=_f->c[1];
113   g=_f->g;
114   x0=_f->x[0];
115   x1=_f->x[1];
116   y0=_f->y[0];
117   y1=_f->y[1];
118   ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24;
119   _f->x[1]=(ogg_int32_t)x0;
120   _f->x[0]=_x;
121   _f->y[1]=(ogg_int32_t)y0;
122   _f->y[0]=(ogg_int32_t)ya;
123   return ya;
124 }
125 
126 
127 
128 /*Search for the quantizer that matches the target most closely.
129   We don't assume a linear ordering, but when there are ties we pick the
130    quantizer closest to the old one.*/
oc_enc_find_qi_for_target(oc_enc_ctx * _enc,int _qti,int _qi_old,int _qi_min,ogg_int64_t _log_qtarget)131 static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old,
132  int _qi_min,ogg_int64_t _log_qtarget){
133   ogg_int64_t best_qdiff;
134   int         best_qi;
135   int         qi;
136   best_qi=_qi_min;
137   best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget;
138   best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff);
139   for(qi=_qi_min+1;qi<64;qi++){
140     ogg_int64_t qdiff;
141     qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget;
142     qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff);
143     if(qdiff<best_qdiff||
144      qdiff==best_qdiff&&abs(qi-_qi_old)<abs(best_qi-_qi_old)){
145       best_qi=qi;
146       best_qdiff=qdiff;
147     }
148   }
149   return best_qi;
150 }
151 
oc_enc_calc_lambda(oc_enc_ctx * _enc,int _qti)152 void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
153   ogg_int64_t lq;
154   int         qi;
155   int         qi1;
156   int         nqis;
157   /*For now, lambda is fixed depending on the qi value and frame type:
158       lambda=qscale*(qavg[qti][qi]**2),
159      where qscale=0.2125.
160     This was derived by exhaustively searching for the optimal quantizer for
161      the AC coefficients in each block from a number of test sequences for a
162      number of fixed lambda values and fitting the peaks of the resulting
163      histograms (on the log(qavg) scale).
164     The same model applies to both inter and intra frames.
165     A more adaptive scheme might perform better.*/
166   qi=_enc->state.qis[0];
167   /*If rate control is active, use the lambda for the _target_ quantizer.
168     This allows us to scale to rates slightly lower than we'd normally be able
169      to reach, and give the rate control a semblance of "fractional qi"
170      precision.
171     TODO: Add API for changing QI, and allow extra precision.*/
172   if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
173   else lq=_enc->log_qavg[_qti][qi];
174   /*The resulting lambda value is less than 0x500000.*/
175   _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
176   /*Select additional quantizers.
177     The R-D optimal block AC quantizer statistics suggest that the distribution
178      is roughly Gaussian-like with a slight positive skew.
179     K-means clustering on log_qavg to select 3 quantizers produces cluster
180      centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
181     Experiments confirm these are relatively good choices.
182 
183     Although we do greedy R-D optimization of the qii flags to avoid switching
184      too frequently, this becomes ineffective at low rates, either because we
185      do a poor job of predicting the actual R-D cost, or the greedy
186      optimization is not sufficient.
187     Therefore adaptive quantization is disabled above an (experimentally
188      suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
189      INTER qi=20 with current matrices).
190     This may need to be revised if the R-D cost estimation or qii flag
191      optimization strategies change.*/
192   nqis=1;
193   if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){
194     qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
195      lq+(OC_Q57(7)+5)/10);
196     if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
197     qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
198      lq-(OC_Q57(6)+5)/10);
199     if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
200   }
201   _enc->state.nqis=nqis;
202 }
203 
204 /*Binary exponential of _log_scale with 24-bit fractional precision and
205    saturation.
206   _log_scale: A binary logarithm in Q24 format.
207   Return: The binary exponential in Q24 format, saturated to 2**47-1 if
208    _log_scale was too large.*/
oc_bexp_q24(ogg_int32_t _log_scale)209 static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){
210   if(_log_scale<(ogg_int32_t)23<<24){
211     ogg_int64_t ret;
212     ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24));
213     return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL;
214   }
215   return 0x7FFFFFFFFFFFLL;
216 }
217 
218 /*Convenience function converts Q57 value to a clamped 32-bit Q24 value
219   _in: input in Q57 format.
220   Return: same number in Q24 */
oc_q57_to_q24(ogg_int64_t _in)221 static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){
222   ogg_int64_t ret;
223   ret=_in+((ogg_int64_t)1<<32)>>33;
224   /*0x80000000 is automatically converted to unsigned on 32-bit systems.
225     -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
226     unsigned.*/
227   return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF);
228 }
229 
230 /*Binary exponential of _log_scale with 24-bit fractional precision and
231    saturation.
232   _log_scale: A binary logarithm in Q57 format.
233   Return: The binary exponential in Q24 format, saturated to 2**31-1 if
234    _log_scale was too large.*/
oc_bexp64_q24(ogg_int64_t _log_scale)235 static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){
236   if(_log_scale<OC_Q57(8)){
237     ogg_int64_t ret;
238     ret=oc_bexp64(_log_scale+OC_Q57(24));
239     return ret<0x7FFFFFFF?(ogg_int32_t)ret:0x7FFFFFFF;
240   }
241   return 0x7FFFFFFF;
242 }
243 
244 
oc_enc_rc_reset(oc_enc_ctx * _enc)245 static void oc_enc_rc_reset(oc_enc_ctx *_enc){
246   ogg_int64_t npixels;
247   ogg_int64_t ibpp;
248   int         inter_delay;
249   /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
250   _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
251    (ogg_int64_t)_enc->state.info.fps_denominator)/
252    _enc->state.info.fps_numerator;
253   /*Insane framerates or frame sizes mean insane bitrates.
254     Let's not get carried away.*/
255   if(_enc->rc.bits_per_frame>0x400000000000LL){
256     _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
257   }
258   else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
259   _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
260   _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
261   /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
262      on a single keyframe interval.
263     We can require fully half the bits in an interval for a keyframe, so this
264      initial level gives us maximum flexibility for over/under-shooting in
265      subsequent frames.*/
266   _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
267    OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
268   _enc->rc.fullness=_enc->rc.target;
269   /*Pick exponents and initial scales for quantizer selection.*/
270   npixels=_enc->state.info.frame_width*
271    (ogg_int64_t)_enc->state.info.frame_height;
272   _enc->rc.log_npixels=oc_blog64(npixels);
273   ibpp=npixels/_enc->rc.bits_per_frame;
274   if(ibpp<1){
275     _enc->rc.exp[0]=59;
276     _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
277   }
278   else if(ibpp<2){
279     _enc->rc.exp[0]=55;
280     _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
281   }
282   else{
283     _enc->rc.exp[0]=48;
284     _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
285   }
286   if(ibpp<4){
287     _enc->rc.exp[1]=100;
288     _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
289   }
290   else if(ibpp<8){
291     _enc->rc.exp[1]=95;
292     _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
293   }
294   else{
295     _enc->rc.exp[1]=73;
296     _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
297   }
298   _enc->rc.prev_drop_count=0;
299   _enc->rc.log_drop_scale=OC_Q57(0);
300   /*Set up second order followers, initialized according to corresponding
301      time constants.*/
302   oc_iir_filter_init(&_enc->rc.scalefilter[0],4,
303    oc_q57_to_q24(_enc->rc.log_scale[0]));
304   inter_delay=(_enc->rc.twopass?
305    OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1;
306   _enc->rc.inter_count=0;
307   /*We clamp the actual inter_delay to a minimum of 10 to work within the range
308      of values where later incrementing the delay works as designed.
309     10 is not an exact choice, but rather a good working trade-off.*/
310   _enc->rc.inter_delay=10;
311   _enc->rc.inter_delay_target=inter_delay;
312   oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay,
313    oc_q57_to_q24(_enc->rc.log_scale[1]));
314   oc_iir_filter_init(&_enc->rc.vfrfilter,4,
315    oc_bexp64_q24(_enc->rc.log_drop_scale));
316 }
317 
oc_rc_state_init(oc_rc_state * _rc,oc_enc_ctx * _enc)318 void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){
319   _rc->twopass=0;
320   _rc->twopass_buffer_bytes=0;
321   _rc->twopass_force_kf=0;
322   _rc->frame_metrics=NULL;
323   _rc->rate_bias=0;
324   if(_enc->state.info.target_bitrate>0){
325     /*The buffer size is set equal to the keyframe interval, clamped to the
326        range [12,256] frames.
327       The 12 frame minimum gives us some chance to distribute bit estimation
328        errors.
329       The 256 frame maximum means we'll require 8-10 seconds of pre-buffering
330        at 24-30 fps, which is not unreasonable.*/
331     _rc->buf_delay=_enc->keyframe_frequency_force>256?
332      256:_enc->keyframe_frequency_force;
333     /*By default, enforce all buffer constraints.*/
334     _rc->drop_frames=1;
335     _rc->cap_overflow=1;
336     _rc->cap_underflow=0;
337     oc_enc_rc_reset(_enc);
338   }
339 }
340 
oc_rc_state_clear(oc_rc_state * _rc)341 void oc_rc_state_clear(oc_rc_state *_rc){
342   _ogg_free(_rc->frame_metrics);
343 }
344 
oc_enc_rc_resize(oc_enc_ctx * _enc)345 void oc_enc_rc_resize(oc_enc_ctx *_enc){
346   /*If encoding has not yet begun, reset the buffer state.*/
347   if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
348   else{
349     int idt;
350     /*Otherwise, update the bounds on the buffer, but not the current
351        fullness.*/
352     _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
353      (ogg_int64_t)_enc->state.info.fps_denominator)/
354      _enc->state.info.fps_numerator;
355     /*Insane framerates or frame sizes mean insane bitrates.
356       Let's not get carried away.*/
357     if(_enc->rc.bits_per_frame>0x400000000000LL){
358       _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
359     }
360     else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
361     _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
362     _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
363     _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
364      OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
365     /*Update the INTER-frame scale filter delay.
366       We jump to it immediately if we've already seen enough frames; otherwise
367        it is simply set as the new target.*/
368     _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10);
369     if(idt<OC_MINI(_enc->rc.inter_delay,_enc->rc.inter_count)){
370       oc_iir_filter_init(&_enc->rc.scalefilter[1],idt,
371        _enc->rc.scalefilter[1].y[0]);
372       _enc->rc.inter_delay=idt;
373     }
374   }
375   /*If we're in pass-2 mode, make sure the frame metrics array is big enough
376      to hold frame statistics for the full buffer.*/
377   if(_enc->rc.twopass==2){
378     int cfm;
379     int buf_delay;
380     int reset_window;
381     buf_delay=_enc->rc.buf_delay;
382     reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
383      buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
384      +_enc->rc.frames_total[2]);
385     cfm=_enc->rc.cframe_metrics;
386     /*Only try to resize the frame metrics buffer if a) it's too small and
387        b) we were using a finite buffer, or are about to start.*/
388     if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
389       oc_frame_metrics *fm;
390       int               nfm;
391       int               fmh;
392       fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
393        buf_delay*sizeof(*_enc->rc.frame_metrics));
394       if(fm==NULL){
395         /*We failed to allocate a finite buffer.*/
396         /*If we don't have a valid 2-pass header yet, just return; we'll reset
397            the buffer size when we read the header.*/
398         if(_enc->rc.frames_total[0]==0)return;
399         /*Otherwise revert to the largest finite buffer previously set, or to
400            whole-file buffering if we were still using that.*/
401         _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
402          cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
403          +_enc->rc.frames_total[2];
404         oc_enc_rc_resize(_enc);
405         return;
406       }
407       _enc->rc.frame_metrics=fm;
408       _enc->rc.cframe_metrics=buf_delay;
409       /*Re-organize the circular buffer.*/
410       fmh=_enc->rc.frame_metrics_head;
411       nfm=_enc->rc.nframe_metrics;
412       if(fmh+nfm>cfm){
413         int shift;
414         shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
415         memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
416         if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
417       }
418     }
419     /*We were using whole-file buffering; now we're not.*/
420     if(reset_window){
421       _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
422       _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
423       _enc->rc.scale_window_end=_enc->rc.scale_window0=
424        _enc->state.curframe_num+_enc->prev_dup_count+1;
425       if(_enc->rc.twopass_buffer_bytes){
426         int qti;
427         /*We already read the metrics for the first frame in the window.*/
428         *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
429         _enc->rc.nframe_metrics++;
430         qti=_enc->rc.cur_metrics.frame_type;
431         _enc->rc.nframes[qti]++;
432         _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
433         _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
434         _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
435         if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
436           /*We need more frame data.*/
437           _enc->rc.twopass_buffer_bytes=0;
438         }
439       }
440     }
441     /*Otherwise, we could shrink the size of the current window, if necessary,
442        but leaving it like it is lets us adapt to the new buffer size more
443        gracefully.*/
444   }
445 }
446 
447 /*Scale the number of frames by the number of expected drops/duplicates.*/
oc_rc_scale_drop(oc_rc_state * _rc,int _nframes)448 static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){
449   if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){
450     ogg_int64_t dup_scale;
451     dup_scale=oc_bexp64((_rc->log_drop_scale
452      +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8));
453     if(dup_scale<_nframes<<8){
454       int dup_scalei;
455       dup_scalei=(int)dup_scale;
456       if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei;
457     }
458     else _nframes=!!_nframes;
459   }
460   return _nframes;
461 }
462 
oc_enc_select_qi(oc_enc_ctx * _enc,int _qti,int _clamp)463 int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){
464   ogg_int64_t  rate_total;
465   ogg_int64_t  rate_bias;
466   int          nframes[2];
467   int          buf_delay;
468   int          buf_pad;
469   ogg_int64_t  log_qtarget;
470   ogg_int64_t  log_scale0;
471   ogg_int64_t  log_cur_scale;
472   ogg_int64_t  log_qexp;
473   int          exp0;
474   int          old_qi;
475   int          qi;
476   /*Figure out how to re-distribute bits so that we hit our fullness target
477      before the last keyframe in our current buffer window (after the current
478      frame), or the end of the buffer window, whichever comes first.*/
479   log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33;
480   buf_pad=0;
481   switch(_enc->rc.twopass){
482     default:{
483       ogg_uint32_t next_key_frame;
484       /*Single pass mode: assume only forced keyframes and attempt to estimate
485          the drop count for VFR content.*/
486       next_key_frame=_qti?_enc->keyframe_frequency_force
487        -(_enc->state.curframe_num-_enc->state.keyframe_num):0;
488       nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay)
489        +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force;
490       if(nframes[0]+_qti>1){
491         nframes[0]--;
492         buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force;
493       }
494       else buf_delay=_enc->rc.buf_delay;
495       nframes[1]=buf_delay-nframes[0];
496       /*Downgrade the delta frame rate to correspond to the recent drop count
497          history.*/
498       nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]);
499     }break;
500     case 1:{
501       /*Pass 1 mode: use a fixed qi value.*/
502       qi=_enc->state.qis[0];
503       _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi];
504       return qi;
505     }break;
506     case 2:{
507       ogg_int64_t scale_sum[2];
508       int         qti;
509       /*Pass 2 mode: we know exactly how much of each frame type there is in
510          the current buffer window, and have estimates for the scales.*/
511       nframes[0]=_enc->rc.nframes[0];
512       nframes[1]=_enc->rc.nframes[1];
513       scale_sum[0]=_enc->rc.scale_sum[0];
514       scale_sum[1]=_enc->rc.scale_sum[1];
515       /*The window size can be slightly larger than the buffer window for VFR
516          content; clamp it down, if appropriate (the excess will all be dup
517          frames).*/
518       buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0,
519        _enc->rc.buf_delay);
520       /*If we're approaching the end of the file, add some slack to keep us
521          from slamming into a rail.
522         Our rate accuracy goes down, but it keeps the result sensible.
523         We position the target where the first forced keyframe beyond the end
524          of the file would be (for consistency with 1-pass mode).*/
525       buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num
526        +_enc->keyframe_frequency_force-_enc->rc.scale_window0);
527       if(buf_delay<buf_pad)buf_pad-=buf_delay;
528       else{
529         /*Otherwise, search for the last keyframe in the buffer window and
530            target that.*/
531         buf_pad=0;
532         /*TODO: Currently we only do this when using a finite buffer; we could
533            save the position of the last keyframe in the summary data and do it
534            with a whole-file buffer as well, but it isn't likely to make a
535            difference.*/
536         if(_enc->rc.frame_metrics!=NULL){
537           int fmi;
538           int fm_tail;
539           fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics;
540           if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics;
541           for(fmi=fm_tail;;){
542             oc_frame_metrics *m;
543             fmi--;
544             if(fmi<0)fmi+=_enc->rc.cframe_metrics;
545             /*Stop before we remove the first frame.*/
546             if(fmi==_enc->rc.frame_metrics_head)break;
547             m=_enc->rc.frame_metrics+fmi;
548             /*If we find a keyframe, remove it and everything past it.*/
549             if(m->frame_type==OC_INTRA_FRAME){
550               do{
551                 qti=m->frame_type;
552                 nframes[qti]--;
553                 scale_sum[qti]-=oc_bexp_q24(m->log_scale);
554                 buf_delay-=m->dup_count+1;
555                 fmi++;
556                 if(fmi>=_enc->rc.cframe_metrics)fmi=0;
557                 m=_enc->rc.frame_metrics+fmi;
558               }
559               while(fmi!=fm_tail);
560               /*And stop scanning backwards.*/
561               break;
562             }
563           }
564         }
565       }
566       /*If we're not using the same frame type as in pass 1 (because someone
567          changed the keyframe interval), remove that scale estimate.
568         We'll add in a replacement for the correct frame type below.*/
569       qti=_enc->rc.cur_metrics.frame_type;
570       if(qti!=_qti){
571         nframes[qti]--;
572         scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
573       }
574       /*Compute log_scale estimates for each frame type from the pass-1 scales
575          we measured in the current window.*/
576       for(qti=0;qti<2;qti++){
577         _enc->rc.log_scale[qti]=nframes[qti]>0?
578          oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24):
579          -_enc->rc.log_npixels;
580       }
581       /*If we're not using the same frame type as in pass 1, add a scale
582          estimate for the corresponding frame using the current low-pass
583          filter value.
584         This is mostly to ensure we have a valid estimate even when pass 1 had
585          no frames of this type in the buffer window.
586         TODO: We could also plan ahead and figure out how many keyframes we'll
587          be forced to add in the current buffer window.*/
588       qti=_enc->rc.cur_metrics.frame_type;
589       if(qti!=_qti){
590         ogg_int64_t scale;
591         scale=_enc->rc.log_scale[_qti]<OC_Q57(23)?
592          oc_bexp64(_enc->rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL;
593         scale*=nframes[_qti];
594         nframes[_qti]++;
595         scale+=oc_bexp_q24(log_cur_scale>>33);
596         _enc->rc.log_scale[_qti]=oc_blog64(scale)
597          -oc_blog64(nframes[qti])-OC_Q57(24);
598       }
599       else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33;
600       /*Add the padding from above.
601         This basically reverts to 1-pass estimations in the last keyframe
602          interval.*/
603       if(buf_pad>0){
604         ogg_int64_t scale;
605         int         nextra_frames;
606         /*Extend the buffer.*/
607         buf_delay+=buf_pad;
608         /*Add virtual delta frames according to the estimated drop count.*/
609         nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad);
610         /*And blend in the low-pass filtered scale according to how many frames
611            we added.*/
612         scale=
613          oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1]
614          +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames;
615         nframes[1]+=nextra_frames;
616         _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24);
617       }
618     }break;
619   }
620   /*If we've been missing our target, add a penalty term.*/
621   rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))*
622    (buf_delay-buf_pad);
623   /*rate_total is the total bits available over the next buf_delay frames.*/
624   rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias
625    +buf_delay*_enc->rc.bits_per_frame;
626   log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels;
627   /*If there aren't enough bits to achieve our desired fullness level, use the
628      minimum quality permitted.*/
629   if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG;
630   else{
631     static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL;
632     ogg_int64_t log_scale1;
633     ogg_int64_t rlo;
634     ogg_int64_t rhi;
635     log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels;
636     rlo=0;
637     rhi=(rate_total+nframes[_qti]-1)/nframes[_qti];
638     while(rlo<rhi){
639       ogg_int64_t curr;
640       ogg_int64_t rdiff;
641       ogg_int64_t log_rpow;
642       ogg_int64_t rscale;
643       curr=rlo+rhi>>1;
644       log_rpow=oc_blog64(curr)-log_scale0;
645       log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti];
646       if(_qti)log_rpow+=LOG_KEY_RATIO>>6;
647       else log_rpow-=LOG_KEY_RATIO>>6;
648       log_rpow*=_enc->rc.exp[1-_qti];
649       rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow);
650       rdiff=nframes[_qti]*curr+rscale-rate_total;
651       if(rdiff<0)rlo=curr+1;
652       else if(rdiff>0)rhi=curr-1;
653       else break;
654     }
655     log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/
656      _enc->rc.exp[_qti]<<6);
657     log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
658   }
659   /*The above allocation looks only at the total rate we'll accumulate in the
660      next buf_delay frames.
661     However, we could overflow the buffer on the very next frame, so check for
662      that here, if we're not using a soft target.*/
663   exp0=_enc->rc.exp[_qti];
664   if(_enc->rc.cap_overflow){
665     ogg_int64_t margin;
666     ogg_int64_t soft_limit;
667     ogg_int64_t log_soft_limit;
668     /*Allow 3% of the buffer for prediction error.
669       This should be plenty, and we don't mind if we go a bit over; we only
670        want to keep these bits from being completely wasted.*/
671     margin=_enc->rc.max+31>>5;
672     /*We want to use at least this many bits next frame.*/
673     soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin);
674     log_soft_limit=oc_blog64(soft_limit);
675     /*If we're predicting we won't use that many...*/
676     log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
677     if(log_scale0-log_qexp<log_soft_limit){
678       /*Scale the adjustment based on how far into the margin we are.*/
679       log_qexp+=(log_scale0-log_soft_limit-log_qexp>>32)*
680        ((OC_MINI(margin,soft_limit)<<32)/margin);
681       log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
682     }
683   }
684   /*If this was not one of the initial frames, limit the change in quality.*/
685   old_qi=_enc->state.qis[0];
686   if(_clamp){
687     ogg_int64_t log_qmin;
688     ogg_int64_t log_qmax;
689     /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the
690        current quantizer.
691       TODO: With user-specified quant matrices, we need to enlarge these limits
692        if they don't actually let us change qi values.*/
693     log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL;
694     log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL;
695     log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax);
696   }
697   /*The above allocation looks only at the total rate we'll accumulate in the
698      next buf_delay frames.
699     However, we could bust the budget on the very next frame, so check for that
700      here, if we're not using a soft target.*/
701   /* Disabled when our minimum qi > 0; if we saturate log_qtarget to
702      to the maximum possible size when we have a minimum qi, the
703      resulting lambda will interact very strangely with SKIP.  The
704      resulting artifacts look like waterfalls. */
705   if(_enc->state.info.quality==0){
706     ogg_int64_t log_hard_limit;
707     /*Compute the maximum number of bits we can use in the next frame.
708       Allow 50% of the rate for a single frame for prediction error.
709       This may not be enough for keyframes or sudden changes in complexity.*/
710     log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1));
711     /*If we're predicting we'll use more than this...*/
712     log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
713     if(log_scale0-log_qexp>log_hard_limit){
714       /*Force the target to hit our limit exactly.*/
715       log_qexp=log_scale0-log_hard_limit;
716       log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
717       /*If that target is unreasonable, oh well; we'll have to drop.*/
718       log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
719     }
720   }
721   /*Compute a final estimate of the number of bits we plan to use.*/
722   log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti];
723   _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp);
724   qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi,
725    _enc->state.info.quality,log_qtarget);
726   /*Save the quantizer target for lambda calculations.*/
727   _enc->rc.log_qtarget=log_qtarget;
728   return qi;
729 }
730 
oc_enc_update_rc_state(oc_enc_ctx * _enc,long _bits,int _qti,int _qi,int _trial,int _droppable)731 int oc_enc_update_rc_state(oc_enc_ctx *_enc,
732  long _bits,int _qti,int _qi,int _trial,int _droppable){
733   ogg_int64_t buf_delta;
734   ogg_int64_t log_scale;
735   int         dropped;
736   dropped=0;
737   /* Drop frames also disabled for now in the case of infinite-buffer
738      two-pass mode */
739   if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){
740     _droppable=0;
741   }
742   buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count);
743   if(_bits<=0){
744     /*We didn't code any blocks in this frame.*/
745     log_scale=OC_Q57(-64);
746     _bits=0;
747   }
748   else{
749     ogg_int64_t log_bits;
750     ogg_int64_t log_qexp;
751     /*Compute the estimated scale factor for this frame type.*/
752     log_bits=oc_blog64(_bits);
753     log_qexp=_enc->rc.log_qtarget-OC_Q57(2);
754     log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]);
755     log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16));
756   }
757   /*Special two-pass processing.*/
758   switch(_enc->rc.twopass){
759     case 1:{
760       /*Pass 1 mode: save the metrics for this frame.*/
761       _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale);
762       _enc->rc.cur_metrics.dup_count=_enc->dup_count;
763       _enc->rc.cur_metrics.frame_type=_enc->state.frame_type;
764       _enc->rc.twopass_buffer_bytes=0;
765     }break;
766     case 2:{
767       /*Pass 2 mode:*/
768       if(!_trial){
769         ogg_int64_t next_frame_num;
770         int         qti;
771         /*Move the current metrics back one frame.*/
772         *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics;
773         next_frame_num=_enc->state.curframe_num+_enc->dup_count+1;
774         /*Back out the last frame's statistics from the sliding window.*/
775         qti=_enc->rc.prev_metrics.frame_type;
776         _enc->rc.frames_left[qti]--;
777         _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count;
778         _enc->rc.nframes[qti]--;
779         _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count;
780         _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale);
781         _enc->rc.scale_window0=(int)next_frame_num;
782         /*Free the corresponding entry in the circular buffer.*/
783         if(_enc->rc.frame_metrics!=NULL){
784           _enc->rc.nframe_metrics--;
785           _enc->rc.frame_metrics_head++;
786           if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){
787             _enc->rc.frame_metrics_head=0;
788           }
789         }
790         /*Mark us ready for the next 2-pass packet.*/
791         _enc->rc.twopass_buffer_bytes=0;
792         /*Update state, so the user doesn't have to keep calling 2pass_in after
793            they've fed in all the data when we're using a finite buffer.*/
794         _enc->prev_dup_count=_enc->dup_count;
795         oc_enc_rc_2pass_in(_enc,NULL,0);
796       }
797     }break;
798   }
799   /*Common to all passes:*/
800   if(_bits>0){
801     if(_trial){
802       oc_iir_filter *f;
803       /*Use the estimated scale factor directly if this was a trial.*/
804       f=_enc->rc.scalefilter+_qti;
805       f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale);
806       _enc->rc.log_scale[_qti]=log_scale;
807     }
808     else{
809       /*Lengthen the time constant for the INTER filter as we collect more
810          frame statistics, until we reach our target.*/
811       if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&&
812        _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){
813         oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay);
814       }
815       /*Otherwise update the low-pass scale filter for this frame type,
816          regardless of whether or not we dropped this frame.*/
817       _enc->rc.log_scale[_qti]=oc_iir_filter_update(
818        _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33;
819       /*If this frame busts our budget, it must be dropped.*/
820       if(_droppable&&_enc->rc.fullness+buf_delta<_bits){
821         _enc->rc.prev_drop_count+=1+_enc->dup_count;
822         _bits=0;
823         dropped=1;
824       }
825       else{
826         ogg_uint32_t drop_count;
827         /*Update a low-pass filter to estimate the "real" frame rate taking
828            drops and duplicates into account.
829           This is only done if the frame is coded, as it needs the final
830            count of dropped frames.*/
831         drop_count=_enc->rc.prev_drop_count+1;
832         if(drop_count>0x7F)drop_count=0x7FFFFFFF;
833         else drop_count<<=24;
834         _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update(
835          &_enc->rc.vfrfilter,drop_count))-OC_Q57(24);
836         /*Initialize the drop count for this frame to the user-requested dup
837            count.
838           It will be increased if we drop more frames.*/
839         _enc->rc.prev_drop_count=_enc->dup_count;
840       }
841     }
842     /*Increment the INTER frame count, for filter adaptation purposes.*/
843     if(_enc->rc.inter_count<INT_MAX)_enc->rc.inter_count+=_qti;
844   }
845   /*Increase the drop count.*/
846   else _enc->rc.prev_drop_count+=1+_enc->dup_count;
847   /*And update the buffer fullness level.*/
848   if(!_trial){
849     _enc->rc.fullness+=buf_delta-_bits;
850     /*If we're too quick filling the buffer and overflow is capped,
851       that rate is lost forever.*/
852     if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){
853       _enc->rc.fullness=_enc->rc.max;
854     }
855     /*If we're too quick draining the buffer and underflow is capped,
856       don't try to make up that rate later.*/
857     if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){
858       _enc->rc.fullness=0;
859     }
860     /*Adjust the bias for the real bits we've used.*/
861     _enc->rc.rate_bias-=_bits;
862   }
863   return dropped;
864 }
865 
866 #define OC_RC_2PASS_VERSION   (1)
867 #define OC_RC_2PASS_HDR_SZ    (38)
868 #define OC_RC_2PASS_PACKET_SZ (8)
869 
oc_rc_buffer_val(oc_rc_state * _rc,ogg_int64_t _val,int _bytes)870 static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){
871   while(_bytes-->0){
872     _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF);
873     _val>>=8;
874   }
875 }
876 
oc_enc_rc_2pass_out(oc_enc_ctx * _enc,unsigned char ** _buf)877 int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){
878   if(_enc->rc.twopass_buffer_bytes==0){
879     if(_enc->rc.twopass==0){
880       int qi;
881       /*Pick first-pass qi for scale calculations.*/
882       qi=oc_enc_select_qi(_enc,0,0);
883       _enc->state.nqis=1;
884       _enc->state.qis[0]=qi;
885       _enc->rc.twopass=1;
886       _enc->rc.frames_total[0]=_enc->rc.frames_total[1]=
887        _enc->rc.frames_total[2]=0;
888       _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
889       /*Fill in dummy summary values.*/
890       oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
891       oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
892       oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8);
893     }
894     else{
895       int qti;
896       qti=_enc->rc.cur_metrics.frame_type;
897       _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
898       _enc->rc.frames_total[qti]++;
899       _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count;
900       oc_rc_buffer_val(&_enc->rc,
901        _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4);
902       oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4);
903     }
904   }
905   else if(_enc->packet_state==OC_PACKET_DONE&&
906    _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){
907     _enc->rc.twopass_buffer_bytes=0;
908     oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
909     oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
910     oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4);
911     oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4);
912     oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4);
913     oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1);
914     oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1);
915     oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8);
916     oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8);
917   }
918   else{
919     /*The data for this frame has already been retrieved.*/
920     *_buf=NULL;
921     return 0;
922   }
923   *_buf=_enc->rc.twopass_buffer;
924   return _enc->rc.twopass_buffer_bytes;
925 }
926 
oc_rc_buffer_fill(oc_rc_state * _rc,unsigned char * _buf,size_t _bytes,size_t _consumed,size_t _goal)927 static size_t oc_rc_buffer_fill(oc_rc_state *_rc,
928  unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){
929   while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){
930     _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++];
931   }
932   return _consumed;
933 }
934 
oc_rc_unbuffer_val(oc_rc_state * _rc,int _bytes)935 static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){
936   ogg_int64_t ret;
937   int         shift;
938   ret=0;
939   shift=0;
940   while(_bytes-->0){
941     ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<<shift;
942     shift+=8;
943   }
944   return ret;
945 }
946 
oc_enc_rc_2pass_in(oc_enc_ctx * _enc,unsigned char * _buf,size_t _bytes)947 int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes){
948   size_t consumed;
949   consumed=0;
950   /*Enable pass 2 mode if this is the first call.*/
951   if(_enc->rc.twopass==0){
952     _enc->rc.twopass=2;
953     _enc->rc.twopass_buffer_fill=0;
954     _enc->rc.frames_total[0]=0;
955     _enc->rc.nframe_metrics=0;
956     _enc->rc.cframe_metrics=0;
957     _enc->rc.frame_metrics_head=0;
958     _enc->rc.scale_window0=0;
959     _enc->rc.scale_window_end=0;
960   }
961   /*If we haven't got a valid summary header yet, try to parse one.*/
962   if(_enc->rc.frames_total[0]==0){
963     if(!_buf){
964       int frames_needed;
965       /*If we're using a whole-file buffer, we just need the first frame.
966         Otherwise, we may need as many as one per buffer slot.*/
967       frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay;
968       return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ
969        -_enc->rc.twopass_buffer_fill;
970     }
971     consumed=oc_rc_buffer_fill(&_enc->rc,
972      _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ);
973     if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){
974       ogg_int64_t scale_sum[2];
975       int         exp[2];
976       int         buf_delay;
977       /*Read the summary header data.*/
978       /*Check the magic value and version number.*/
979       if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F||
980        oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){
981         _enc->rc.twopass_buffer_bytes=0;
982         return TH_ENOTFORMAT;
983       }
984       _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
985       _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
986       _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
987       exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
988       exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
989       scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8);
990       scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8);
991       /*Make sure the file claims to have at least one frame.
992         Otherwise we probably got the placeholder data from an aborted pass 1.
993         Also make sure the total frame count doesn't overflow an integer.*/
994       buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
995        +_enc->rc.frames_total[2];
996       if(_enc->rc.frames_total[0]==0||buf_delay<0||
997        (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]||
998        (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){
999         _enc->rc.frames_total[0]=0;
1000         _enc->rc.twopass_buffer_bytes=0;
1001         return TH_EBADHEADER;
1002       }
1003       /*Got a valid header; set up pass 2.*/
1004       _enc->rc.frames_left[0]=_enc->rc.frames_total[0];
1005       _enc->rc.frames_left[1]=_enc->rc.frames_total[1];
1006       _enc->rc.frames_left[2]=_enc->rc.frames_total[2];
1007       /*If the user hasn't specified a buffer size, use the whole file.*/
1008       if(_enc->rc.frame_metrics==NULL){
1009         _enc->rc.buf_delay=buf_delay;
1010         _enc->rc.nframes[0]=_enc->rc.frames_total[0];
1011         _enc->rc.nframes[1]=_enc->rc.frames_total[1];
1012         _enc->rc.nframes[2]=_enc->rc.frames_total[2];
1013         _enc->rc.scale_sum[0]=scale_sum[0];
1014         _enc->rc.scale_sum[1]=scale_sum[1];
1015         _enc->rc.scale_window_end=buf_delay;
1016         oc_enc_rc_reset(_enc);
1017       }
1018       _enc->rc.exp[0]=exp[0];
1019       _enc->rc.exp[1]=exp[1];
1020       /*Clear the header data from the buffer to make room for packet data.*/
1021       _enc->rc.twopass_buffer_fill=0;
1022       _enc->rc.twopass_buffer_bytes=0;
1023     }
1024   }
1025   if(_enc->rc.frames_total[0]!=0){
1026     ogg_int64_t curframe_num;
1027     int         nframes_total;
1028     curframe_num=_enc->state.curframe_num;
1029     if(curframe_num>=0){
1030       /*We just encoded a frame; make sure things matched.*/
1031       if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){
1032         _enc->rc.twopass_buffer_bytes=0;
1033         return TH_EINVAL;
1034       }
1035     }
1036     curframe_num+=_enc->prev_dup_count+1;
1037     nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
1038      +_enc->rc.frames_total[2];
1039     if(curframe_num>=nframes_total){
1040       /*We don't want any more data after the last frame, and we don't want to
1041          allow any more frames to be encoded.*/
1042       _enc->rc.twopass_buffer_bytes=0;
1043     }
1044     else if(_enc->rc.twopass_buffer_bytes==0){
1045       if(_enc->rc.frame_metrics==NULL){
1046         /*We're using a whole-file buffer:*/
1047         if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill;
1048         consumed=oc_rc_buffer_fill(&_enc->rc,
1049          _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
1050         if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
1051           ogg_uint32_t dup_count;
1052           ogg_int32_t  log_scale;
1053           int          qti;
1054           int          arg;
1055           /*Read the metrics for the next frame.*/
1056           dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
1057           log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
1058           _enc->rc.cur_metrics.log_scale=log_scale;
1059           qti=(dup_count&0x80000000)>>31;
1060           _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF;
1061           _enc->rc.cur_metrics.frame_type=qti;
1062           _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME;
1063           /*"Helpfully" set the dup count back to what it was in pass 1.*/
1064           arg=_enc->rc.cur_metrics.dup_count;
1065           th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
1066           /*Clear the buffer for the next frame.*/
1067           _enc->rc.twopass_buffer_fill=0;
1068         }
1069       }
1070       else{
1071         int frames_needed;
1072         /*We're using a finite buffer:*/
1073         frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
1074          -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
1075          _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
1076          -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
1077         while(frames_needed>0){
1078           if(!_buf){
1079             return OC_RC_2PASS_PACKET_SZ*frames_needed
1080            -_enc->rc.twopass_buffer_fill;
1081           }
1082           consumed=oc_rc_buffer_fill(&_enc->rc,
1083            _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
1084           if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
1085             oc_frame_metrics *m;
1086             int               fmi;
1087             ogg_uint32_t      dup_count;
1088             ogg_int32_t       log_scale;
1089             int               qti;
1090             /*Read the metrics for the next frame.*/
1091             dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
1092             log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
1093             /*Add the to the circular buffer.*/
1094             fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++;
1095             if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics;
1096             m=_enc->rc.frame_metrics+fmi;
1097             m->log_scale=log_scale;
1098             qti=(dup_count&0x80000000)>>31;
1099             m->dup_count=dup_count&0x7FFFFFFF;
1100             m->frame_type=qti;
1101             /*And accumulate the statistics over the window.*/
1102             _enc->rc.nframes[qti]++;
1103             _enc->rc.nframes[2]+=m->dup_count;
1104             _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale);
1105             _enc->rc.scale_window_end+=m->dup_count+1;
1106             /*Compute an upper bound on the number of remaining packets needed
1107                for the current window.*/
1108             frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
1109              -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
1110              _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
1111              -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
1112             /*Clear the buffer for the next frame.*/
1113             _enc->rc.twopass_buffer_fill=0;
1114             _enc->rc.twopass_buffer_bytes=0;
1115           }
1116           /*Go back for more data.*/
1117           else break;
1118         }
1119         /*If we've got all the frames we need, fill in the current metrics.
1120           We're ready to go.*/
1121         if(frames_needed<=0){
1122           int arg;
1123           *&_enc->rc.cur_metrics=
1124            *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head);
1125           _enc->rc.twopass_force_kf=
1126            _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME;
1127           /*"Helpfully" set the dup count back to what it was in pass 1.*/
1128           arg=_enc->rc.cur_metrics.dup_count;
1129           th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
1130           /*Mark us ready for the next frame.*/
1131           _enc->rc.twopass_buffer_bytes=1;
1132         }
1133       }
1134     }
1135   }
1136   return (int)consumed;
1137 }
1138