1 /*
2  * Copyright (C) 2005-2006 iptelorg GmbH
3  *
4  * This file is part of SEMS, a free SIP media server.
5  *
6  * SEMS is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version. This program is released under
10  * the GPL with the additional exemption that compiling, linking,
11  * and/or using OpenSSL is allowed.
12  *
13  * For a license to use the SEMS software under conditions
14  * other than those described here, or to purchase support for this
15  * software, please contact iptel.org by e-mail at the following addresses:
16  *    info@iptel.org
17  *
18  * SEMS is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26  */
27 
28 #include "AmPlayoutBuffer.h"
29 #include "AmAudio.h"
30 #include "AmRtpAudio.h"
31 
32 #define SEARCH_OFFSET  140
33 
34 #define SEARCH_REGION  110
35 #define DELTA          5
36 
37 #define TSM_MAX_SCALE  2.0
38 #define TSM_MIN_SCALE  0.5
39 
40 // only scale if 0.9 < f < 1.1
41 #define SCALE_FACTOR_START 0.1
42 
43 #define PI 3.14
44 
45 #define MAX_DELAY sample_rate*1 /* 1 second */
46 
AmPlayoutBuffer(AmPLCBuffer * plcbuffer,unsigned int sample_rate)47 AmPlayoutBuffer::AmPlayoutBuffer(AmPLCBuffer *plcbuffer, unsigned int sample_rate)
48   : r_ts(0),w_ts(0), m_plcbuffer(plcbuffer),
49     last_ts_i(false), sample_rate(sample_rate),
50     recv_offset_i(false)
51 {
52   buffer.clear_all();
53 }
54 
direct_write_buffer(unsigned int ts,ShortSample * buf,unsigned int len)55 void AmPlayoutBuffer::direct_write_buffer(unsigned int ts, ShortSample* buf, unsigned int len)
56 {
57   buffer_put(w_ts,buf,len);
58 }
59 
write(u_int32_t ref_ts,u_int32_t rtp_ts,int16_t * buf,u_int32_t len,bool begin_talk)60 void AmPlayoutBuffer::write(u_int32_t ref_ts, u_int32_t rtp_ts,
61 			    int16_t* buf, u_int32_t len, bool begin_talk)
62 {
63   unsigned int mapped_ts;
64   if(!recv_offset_i)
65     {
66       recv_offset = rtp_ts - ref_ts;
67       recv_offset_i = true;
68       DBG("initialized recv_offset with %u (%u - %u)\n",
69 	  recv_offset, ref_ts, rtp_ts);
70       mapped_ts = r_ts = w_ts = ref_ts;
71     }
72   else {
73     mapped_ts = rtp_ts - recv_offset;
74 
75     // resync
76     if( ts_less()(mapped_ts, ref_ts - MAX_DELAY/2) ||
77 	!ts_less()(mapped_ts, ref_ts + MAX_DELAY) ){
78 
79       DBG("resync needed: reference ts = %u; write ts = %u\n",
80 	  ref_ts, mapped_ts);
81       recv_offset = rtp_ts - ref_ts;
82       mapped_ts = r_ts = w_ts = ref_ts;
83     }
84   }
85 
86   if(!last_ts_i)
87     {
88       last_ts = mapped_ts;
89       last_ts_i = true;
90     }
91 
92   if(ts_less()(last_ts, mapped_ts) && !begin_talk
93      && (mapped_ts - last_ts <= PLC_MAX_SAMPLES))
94     {
95       unsigned char tmp[AUDIO_BUFFER_SIZE * 2];
96       int l_size = m_plcbuffer->conceal_loss(mapped_ts - last_ts, tmp);
97       if (l_size>0)
98         {
99 	  direct_write_buffer(last_ts, (ShortSample*)tmp, PCM16_B2S(l_size));
100         }
101     }
102   m_plcbuffer->add_to_history(buf, PCM16_S2B(len));
103 
104   write_buffer(ref_ts, mapped_ts, buf, len);
105 
106   // update last_ts to end of received packet
107   // if not out-of-sequence
108   if (ts_less()(last_ts, mapped_ts) || last_ts == mapped_ts)
109     last_ts = mapped_ts + len;
110 }
111 
112 
write_buffer(u_int32_t ref_ts,u_int32_t ts,int16_t * buf,u_int32_t len)113 void AmPlayoutBuffer::write_buffer(u_int32_t ref_ts, u_int32_t ts, int16_t* buf, u_int32_t len)
114 {
115   buffer_put(w_ts,buf,len);
116 }
117 
read(u_int32_t ts,int16_t * buf,u_int32_t len)118 u_int32_t AmPlayoutBuffer::read(u_int32_t ts, int16_t* buf, u_int32_t len)
119 {
120   if(ts_less()(r_ts,w_ts)){
121 
122     u_int32_t rlen=0;
123     if(ts_less()(r_ts+PCM16_B2S(AUDIO_BUFFER_SIZE),w_ts))
124       rlen = PCM16_B2S(AUDIO_BUFFER_SIZE);
125     else
126       rlen = w_ts - r_ts;
127 
128     buffer_get(r_ts,buf,rlen);
129     return rlen;
130   }
131 
132   return 0;
133 }
134 
135 
buffer_put(unsigned int ts,ShortSample * buf,unsigned int len)136 void AmPlayoutBuffer::buffer_put(unsigned int ts, ShortSample* buf, unsigned int len)
137 {
138   buffer.put(ts,buf,len);
139 
140   if(ts_less()(w_ts,ts+len))
141     w_ts = ts + len;
142 }
143 
buffer_get(unsigned int ts,ShortSample * buf,unsigned int len)144 void AmPlayoutBuffer::buffer_get(unsigned int ts, ShortSample* buf, unsigned int len)
145 {
146   buffer.get(ts,buf,len);
147 
148   if(ts_less()(r_ts,ts+len))
149     r_ts = ts + len;
150 }
151 
152 //
153 // See: Y. J. Liang, N. Farber, and B. Girod. Adaptive playout scheduling
154 // and loss concealment for voice communication over IP networks. Submitted
155 // to IEEE Transactions on Multimedia, Feb. 2001.
156 // Online at:
157 //  http://www-ise.stanford.edu/yiliang/publications/
158 //  http://citeseer.ist.psu.edu/liang02adaptive.html
159 //
AmAdaptivePlayout(AmPLCBuffer * plcbuffer,unsigned int sample_rate)160 AmAdaptivePlayout::AmAdaptivePlayout(AmPLCBuffer *plcbuffer, unsigned int sample_rate)
161   : AmPlayoutBuffer(plcbuffer, sample_rate),
162     idx(0),
163     loss_rate(ORDER_STAT_LOSS_RATE),
164     wsola_off(WSOLA_START_OFF),
165     shr_threshold(SHR_THRESHOLD),
166     short_scaled(WSOLA_SCALED_WIN),
167     plc_cnt(0),
168     fec(sample_rate)
169 {
170   memset(n_stat,0,sizeof(int32_t)*ORDER_STAT_WIN_SIZE);
171 }
172 
next_delay(u_int32_t ref_ts,u_int32_t ts)173 u_int32_t AmAdaptivePlayout::next_delay(u_int32_t ref_ts, u_int32_t ts)
174 {
175   int32_t n = (int32_t)(ref_ts - ts);
176 
177   multiset<int32_t>::iterator it = o_stat.find(n_stat[idx]);
178   if(it != o_stat.end())
179     o_stat.erase(it);
180 
181   n_stat[idx] = n;
182   o_stat.insert(n);
183 
184 
185   int32_t D_r=0,D_r1=0;
186   int r = int((double(o_stat.size()) + 1.0)*(1.0 - loss_rate));
187 
188   if((r == 0) || (r >= (int)o_stat.size())){
189 
190     StddevValue n_std;
191     for(int i=0; i<ORDER_STAT_WIN_SIZE; i++){
192       n_std.push(double(n_stat[i]));
193     }
194 
195     if(r == 0){
196       D_r = (*o_stat.begin()) - (int32_t)(2.0*n_std.stddev());
197       D_r1 = (*o_stat.begin());
198     }
199     else {
200       D_r = (*o_stat.rbegin());
201       D_r1 = (*o_stat.rbegin()) + (int32_t)(2.0*n_std.stddev());
202     }
203 
204 
205   }
206   else {
207     int i=0;
208     for(it = o_stat.begin(); it != o_stat.end(); it++){
209 
210       if(++i == r){
211 	D_r = (*it);
212 	++it;
213 	D_r1 = (*it);
214 	break;
215       }
216     }
217   }
218 
219   int32_t D =
220     int32_t(D_r + double(D_r1 - D_r)
221 	    * ( (double(o_stat.size()) + 1.0)
222 		*(1.0-loss_rate) - double(r)));
223 
224   if(++idx >= ORDER_STAT_WIN_SIZE)
225     idx = 0;
226 
227   return D;
228 }
229 
write_buffer(u_int32_t ref_ts,u_int32_t ts,int16_t * buf,u_int32_t len)230 void AmAdaptivePlayout::write_buffer(u_int32_t ref_ts, u_int32_t ts,
231 				     int16_t* buf, u_int32_t len)
232 {
233   // predict next delay
234   u_int32_t p_delay = next_delay(ref_ts,ts);
235 
236   u_int32_t old_off = wsola_off;
237   ts += old_off;
238 
239   if(short_scaled.mean() > 2.0){
240     if(shr_threshold < 3000)
241       shr_threshold += 10;
242   }
243   else if(short_scaled.mean() < 1.0){
244     if(shr_threshold > 100)
245       shr_threshold -= 2;
246   }
247 
248   // need to scale?
249   if( ts_less()(wsola_off+EXP_THRESHOLD,p_delay) ||   // expand packet
250       ts_less()(p_delay+shr_threshold,wsola_off) )  { // shrink packet
251 
252     wsola_off = p_delay;
253   }
254   else {
255     if(ts_less()(r_ts,ts+len)){
256       plc_cnt = 0;
257       buffer_put(ts,buf,len);
258     }
259     else {
260       // lost
261     }
262 
263     // statistics
264     short_scaled.push(0.0);
265 
266     return;
267   }
268 
269   int32_t n_len = len + wsola_off - old_off;
270   if(n_len < 0)
271     n_len = 1;
272 
273   float f = float(n_len) / float(len);
274   if(f > TSM_MAX_SCALE)
275     f = TSM_MAX_SCALE;
276 
277   n_len = (int32_t)(float(len) * f);
278   if(ts_less()(ts+n_len,r_ts)){
279 
280     // statistics
281     short_scaled.push(0.0);
282     return;
283   }
284 
285   u_int32_t old_wts = w_ts;
286   buffer_put(ts,buf,len);
287 
288   n_len = time_scale(ts,f,len);
289   wsola_off = old_off + n_len - len;
290 
291   // if we have shrinked the voice, set back w_ts
292   // in order to have correct start point for possible
293   // PLC
294   if (n_len < (int32_t) len)
295     w_ts += n_len - len;
296 
297   if(w_ts != old_wts)
298     plc_cnt = 0;
299 
300   // statistics
301   short_scaled.push(100.0);
302 }
303 
read(u_int32_t ts,int16_t * buf,u_int32_t len)304 u_int32_t AmAdaptivePlayout::read(u_int32_t ts, int16_t* buf, u_int32_t len)
305 {
306   bool do_plc=false;
307 
308   if(ts_less()(w_ts,ts+len) && (plc_cnt < 6)){
309 
310     if(!plc_cnt){
311       int nlen = time_scale(w_ts-len,2.0, len);
312       wsola_off += nlen-len;
313     }
314     else {
315       do_plc = true;
316     }
317     plc_cnt++;
318   }
319 
320   if(do_plc){
321 
322     short plc_buf[FRAMESZ];
323 
324     for(unsigned int i=0; i<len/FRAMESZ; i++){
325 
326       fec.dofe(plc_buf);
327 
328       buffer_put(w_ts,plc_buf,FRAMESZ);
329     }
330 
331     buffer_get(ts,buf,len);
332   }
333   else {
334 
335     buffer_get(ts,buf,len);
336 
337     for(unsigned int i=0; i<len/FRAMESZ; i++)
338       fec.addtohistory(buf + i*FRAMESZ);
339   }
340 
341   return len;
342 }
343 
direct_write_buffer(unsigned int ts,ShortSample * buf,unsigned int len)344 void AmAdaptivePlayout::direct_write_buffer(unsigned int ts, ShortSample* buf, unsigned int len)
345 {
346   buffer_put(ts+wsola_off,buf,len);
347 }
348 
349 /**
350  * find best cross correlation of a TEMPLATE_SEG samples
351  * long frame
352  *  * starting between sr_beg ... sr_end
353  *  * to TEMPLATE_SEG samples frame starting from ts
354  *
355  */
find_best_corr(short * ts,short * sr_beg,short * sr_end,unsigned int sample_rate)356 short* find_best_corr(short *ts, short *sr_beg, short* sr_end, unsigned int sample_rate)
357 {
358   // find best correlation
359   float corr=0.f,best_corr=0.f;
360   short *best_sr=ts;
361   short *sr;
362 
363   for(sr = sr_beg; sr != sr_end; sr++){
364 
365     corr=0.f;
366     for(unsigned int i=0; i<TEMPLATE_SEG; i++)
367       corr += float(sr[i]) * float(ts[i]);
368 
369     if((best_sr == 0) || (corr > best_corr)){
370       best_corr = corr;
371       best_sr = sr;
372     }
373   }
374 
375   return best_sr;
376 }
377 
time_scale(u_int32_t ts,float factor,u_int32_t packet_len)378 u_int32_t AmAdaptivePlayout::time_scale(u_int32_t ts, float factor,
379 					u_int32_t packet_len)
380 {
381   // current position in strech buffer
382   short *tmpl      = p_buf + packet_len;
383   // begin and end of strech buffer
384   short *p_buf_beg = p_buf;
385   short *p_buf_end;
386 
387   // initially size is packet_len
388   unsigned int s     = packet_len;
389 
390   // we start from beginning of frame
391   unsigned int cur_ts   = ts;
392 
393   // safety
394   if (packet_len > MAX_PACKET_SAMPLES)
395     return s;
396 
397   // not possible to stretch packets shorter than 10ms
398   if (packet_len < TEMPLATE_SEG)
399       return s;
400 
401   if (fabs(factor - 1.0) <= SCALE_FACTOR_START) {
402 #ifdef DEBUG_PLAYOUTBUF
403     DBG("not scaling - too little f difference \n");
404 #endif
405     return s;
406   }
407 
408   // boundaries of scaling
409   if(factor > TSM_MAX_SCALE)
410     factor = TSM_MAX_SCALE;
411   else if(factor < TSM_MIN_SCALE)
412     factor = TSM_MIN_SCALE;
413 
414   short *srch_beg, *srch_end, *srch;
415 
416   while(true){
417     // get previous packet_len frame + scaled frame
418     // (with size s) into p_buf
419     buffer_get(ts - packet_len, p_buf_beg, s + packet_len);
420     p_buf_end = p_buf_beg + s + packet_len;
421 
422     // determine search region for template seg
423     // as srch_beg ... srch_end
424     if (factor > 1.0){
425       // expansion
426       srch_beg = tmpl - (int)((float)TEMPLATE_SEG * (factor - 1.0)) - SEARCH_REGION/2;
427       srch_end = srch_beg + SEARCH_REGION;
428 
429       if(srch_beg < p_buf_beg)
430 	srch_beg = p_buf_beg;
431 
432       if(srch_end + DELTA >= tmpl)
433 	srch_end = tmpl - DELTA;
434     }
435     else {
436       // compression
437       srch_end = tmpl + (int)((float)TEMPLATE_SEG * (1.0 - factor)) + SEARCH_REGION/2;
438       srch_beg = srch_end - SEARCH_REGION;
439 
440       if(srch_end + TEMPLATE_SEG > p_buf_end)
441 	srch_end = p_buf_end - TEMPLATE_SEG;
442 
443       if(srch_beg - DELTA < tmpl)
444 	srch_beg = tmpl + DELTA;
445     }
446     // stop if search region size < 0
447     if (srch_beg >= srch_end)
448       break;
449 
450     // find best correlation to tmpl in srch_beg..srch_end
451     srch = find_best_corr(tmpl,srch_beg,srch_end,sample_rate);
452 
453     // merge original segment (starting from tmpl) and
454     // best correlation (starting from srch) into merge_buf
455     float f = 0.5,v = 0.5;
456     for(unsigned int k=0; k<TEMPLATE_SEG; k++){
457 
458       f = 0.5 - 0.5 * cos( PI*float(k) / float(TEMPLATE_SEG) );
459       v = (float)srch[k] * f + (float)tmpl[k] * (1.0 - f);
460 
461       if(v > 32767.)
462 	v = 32767.;
463       else if(v < -32768.)
464 	v = -32768.;
465 
466       merge_buf[k] = (short)v;
467     }
468 
469     // put merged segment into buffer
470     buffer_put( cur_ts, merge_buf, TEMPLATE_SEG);
471     if (p_buf_end - srch - TEMPLATE_SEG < 0) {
472       ERROR("audio after merged segment spills over\n");
473       break;
474     }
475     // add after merged segment audio from after srch
476     buffer_put( cur_ts + TEMPLATE_SEG, srch + TEMPLATE_SEG,
477 		p_buf_end - srch - TEMPLATE_SEG );
478     // size s has changed
479     s      += tmpl - srch;
480 
481     // go to next segment
482     cur_ts += TEMPLATE_SEG/2;
483     tmpl   += TEMPLATE_SEG/2;
484 
485     // calculate current factor
486     float act_fact = s / (float)packet_len;
487 
488 #ifdef DEBUG_PLAYOUTBUF
489     DBG("at ts %u: new size = %u, ratio = %f, requested = %f (wsola_off = %ld)\n",
490 	ts, s, act_fact, factor, (long)wsola_off);
491 #endif
492     // break condition: coming to the end of the frame (with safety margin)
493     if((unsigned int)(p_buf_end - tmpl) < TEMPLATE_SEG + DELTA)
494       break;
495 
496     // streched enough?
497     if((factor > 1.0) && (act_fact >= factor))
498       break;
499     else if((factor < 1.0) && (act_fact <= factor))
500       break;
501 
502     // streched over maximum already?
503     else if(act_fact >= TSM_MAX_SCALE || f <= TSM_MIN_SCALE)
504       break;
505 
506   }
507 
508   return s;
509 }
510 
511 /*****************************************************************
512  *
513  *  AmJbPlayout class methods
514  *
515  *****************************************************************/
516 
AmJbPlayout(AmPLCBuffer * plcbuffer,unsigned int sample_rate)517 AmJbPlayout::AmJbPlayout(AmPLCBuffer *plcbuffer, unsigned int sample_rate)
518   : AmPlayoutBuffer(plcbuffer, sample_rate)
519 {
520 }
521 
read(u_int32_t ts,int16_t * buf,u_int32_t len)522 u_int32_t AmJbPlayout::read(u_int32_t ts, int16_t* buf, u_int32_t len)
523 {
524   prepare_buffer(ts, len);
525   buffer_get(ts, buf, len);
526   return len;
527 }
528 
direct_write_buffer(unsigned int ts,ShortSample * buf,unsigned int len)529 void AmJbPlayout::direct_write_buffer(unsigned int ts, ShortSample* buf, unsigned int len)
530 {
531   buffer_put(ts, buf, len);
532 }
533 
prepare_buffer(unsigned int audio_buffer_ts,unsigned int ms)534 void AmJbPlayout::prepare_buffer(unsigned int audio_buffer_ts, unsigned int ms)
535 {
536   ShortSample buf[AUDIO_BUFFER_SIZE * 10];
537   unsigned int ts;
538   unsigned int nb_samples;
539   /**
540    * Get all RTP packets that correspond to the required interval,
541    * decode them and put into playout buffer.
542    */
543   while (m_jb.get(audio_buffer_ts, ms, buf, &nb_samples, &ts))
544     {
545       direct_write_buffer(ts, buf, nb_samples);
546       m_plcbuffer->add_to_history(buf, PCM16_S2B(nb_samples));
547       /* Conceal the gap between previous and current RTP packets */
548       if (last_ts_i && ts_less()(m_last_rtp_endts, ts))
549        	{
550 	  int concealed_size = m_plcbuffer->conceal_loss(ts - m_last_rtp_endts, (unsigned char *)buf);
551 	  if (concealed_size > 0)
552 	    direct_write_buffer(m_last_rtp_endts, buf, PCM16_B2S(concealed_size));
553 	}
554       m_last_rtp_endts = ts + nb_samples;
555       last_ts_i = true;
556     }
557   if (!last_ts_i) {
558     return;
559   }
560   if (ts_less()(m_last_rtp_endts, audio_buffer_ts + ms))
561     {
562       /* Last packets have been lost. Conceal them */
563       int concealed_size = m_plcbuffer->conceal_loss(audio_buffer_ts + ms - m_last_rtp_endts, (unsigned char *)buf);
564       if (concealed_size > 0)
565 	direct_write_buffer(m_last_rtp_endts, buf, PCM16_B2S(concealed_size));
566       m_last_rtp_endts = audio_buffer_ts + ms;
567     }
568 }
569 
write(u_int32_t ref_ts,u_int32_t rtp_ts,int16_t * buf,u_int32_t len,bool begin_talk)570 void AmJbPlayout::write(u_int32_t ref_ts, u_int32_t rtp_ts, int16_t* buf, u_int32_t len, bool begin_talk)
571 {
572   m_jb.put(buf, len, rtp_ts, begin_talk);
573 }
574