1 /* $Id$ */
2 /*
3  * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4  * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  */
20 #include <pjmedia/silencedet.h>
21 #include <pjmedia/alaw_ulaw.h>
22 #include <pjmedia/errno.h>
23 #include <pj/assert.h>
24 #include <pj/log.h>
25 #include <pj/pool.h>
26 #include <pj/string.h>
27 
28 #define THIS_FILE   "silencedet.c"
29 
30 #if 1
31 #   define TRACE_(x)	PJ_LOG(5,x)
32 #else
33 #   define TRACE_(x)
34 #endif
35 
36 /**
37  * This enumeration specifies operation mode of silence detector
38  */
39 typedef enum pjmedia_silence_det_mode {
40     VAD_MODE_NONE,
41     VAD_MODE_FIXED,
42     VAD_MODE_ADAPTIVE
43 } pjmedia_silence_det_mode;
44 
45 /**
46  * Default settings
47  */
48 #define DEF_RECALC_ON_VOICED	    4000 /* Time to recalculate threshold
49 					    in voiced condition, in ms	  */
50 #define DEF_RECALC_ON_SILENCE	    2000 /* Time to recalculate threshold
51 					    in silence condition, in ms.  */
52 #define DEF_BEFORE_SILENCE	    400	 /* Silence time before really changing
53 					    state into SILENCE, in ms.	  */
54 #define DEF_THRESHOLD		    1000 /* Default threshold.		  */
55 
56 /**
57  * This enumeration specifies the states of the silence detector.
58  */
59 enum pjmedia_silence_det_state {
60     STATE_SILENCE,
61     STATE_START_SILENCE,
62     STATE_VOICED
63 };
64 
65 /**
66  * This structure holds the silence detector state.
67  */
68 struct pjmedia_silence_det
69 {
70     char      objname[PJ_MAX_OBJ_NAME]; /**< VAD name.			    */
71 
72     int	      mode;			/**< VAD mode.			    */
73     unsigned  ptime;			/**< Frame time, in msec.	    */
74 
75     unsigned  threshold;		/**< Current threshold level.	    */
76     unsigned  sum_level;		/**< Total sum of recent level.	    */
77     unsigned  sum_cnt;			/**< Number of level summed.	    */
78     unsigned  silence_timer;		/**< Silence condition timer.	    */
79     unsigned  voiced_timer;		/**< Voiced condition timer.	    */
80 
81     enum pjmedia_silence_det_state state;/**< Silence detector state.	    */
82     unsigned  recalc_on_voiced;		/**< Setting of time to recalc
83 					     threshold in voiced condition. */
84     unsigned  recalc_on_silence;	/**< Setting of time to recalc
85 					     threshold in silence condition.*/
86     unsigned  before_silence;		/**< Setting of silence time before
87 					     really changing state into SILENCE,
88 					     in ms.			    */
89 };
90 
91 
92 
pjmedia_silence_det_create(pj_pool_t * pool,unsigned clock_rate,unsigned samples_per_frame,pjmedia_silence_det ** p_sd)93 PJ_DEF(pj_status_t) pjmedia_silence_det_create( pj_pool_t *pool,
94 						unsigned clock_rate,
95 						unsigned samples_per_frame,
96 						pjmedia_silence_det **p_sd)
97 {
98     pjmedia_silence_det *sd;
99 
100     PJ_ASSERT_RETURN(pool && p_sd, PJ_EINVAL);
101 
102     sd = PJ_POOL_ZALLOC_T(pool, pjmedia_silence_det);
103 
104     pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, "sd%p", sd);
105     sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
106 
107     sd->ptime = samples_per_frame * 1000 / clock_rate;
108 
109     /* Default settings */
110     pjmedia_silence_det_set_params(sd, -1, -1, -1);
111 
112     /* Restart in adaptive, silent mode */
113     pjmedia_silence_det_set_adaptive( sd, -1 );
114 
115     *p_sd = sd;
116     return PJ_SUCCESS;
117 }
118 
119 
pjmedia_silence_det_set_name(pjmedia_silence_det * sd,const char * name)120 PJ_DEF(pj_status_t) pjmedia_silence_det_set_name( pjmedia_silence_det *sd,
121 						  const char *name)
122 {
123     PJ_ASSERT_RETURN(sd && name, PJ_EINVAL);
124 
125     pj_ansi_snprintf(sd->objname, PJ_MAX_OBJ_NAME, name, sd);
126     sd->objname[PJ_MAX_OBJ_NAME-1] = '\0';
127     return PJ_SUCCESS;
128 }
129 
pjmedia_silence_det_set_adaptive(pjmedia_silence_det * sd,int threshold)130 PJ_DEF(pj_status_t) pjmedia_silence_det_set_adaptive(pjmedia_silence_det *sd,
131 						     int threshold)
132 {
133     PJ_ASSERT_RETURN(sd, PJ_EINVAL);
134 
135     if (threshold < 0)
136 	threshold = DEF_THRESHOLD;
137 
138     sd->mode = VAD_MODE_ADAPTIVE;
139     sd->threshold = threshold;
140 
141     return PJ_SUCCESS;
142 }
143 
pjmedia_silence_det_set_fixed(pjmedia_silence_det * sd,int threshold)144 PJ_DEF(pj_status_t) pjmedia_silence_det_set_fixed( pjmedia_silence_det *sd,
145 						   int threshold )
146 {
147     PJ_ASSERT_RETURN(sd, PJ_EINVAL);
148 
149     if (threshold < 0)
150 	threshold = DEF_THRESHOLD;
151 
152     sd->mode = VAD_MODE_FIXED;
153     sd->threshold = threshold;
154 
155     return PJ_SUCCESS;
156 }
157 
pjmedia_silence_det_set_params(pjmedia_silence_det * sd,int before_silence,int recalc_time1,int recalc_time2)158 PJ_DEF(pj_status_t) pjmedia_silence_det_set_params( pjmedia_silence_det *sd,
159 						    int before_silence,
160 						    int recalc_time1,
161 						    int recalc_time2)
162 {
163     PJ_ASSERT_RETURN(sd, PJ_EINVAL);
164 
165     if (recalc_time1 < 0)
166 	recalc_time1 = DEF_RECALC_ON_VOICED;
167     if (recalc_time2 < 0)
168 	recalc_time2 = DEF_RECALC_ON_SILENCE;
169     if (before_silence < 0)
170 	before_silence = DEF_BEFORE_SILENCE;
171 
172     sd->recalc_on_voiced = recalc_time1;
173     sd->recalc_on_silence = recalc_time2;
174     sd->before_silence  = before_silence;
175 
176     return PJ_SUCCESS;
177 }
178 
179 
pjmedia_silence_det_disable(pjmedia_silence_det * sd)180 PJ_DEF(pj_status_t) pjmedia_silence_det_disable( pjmedia_silence_det *sd )
181 {
182     PJ_ASSERT_RETURN(sd, PJ_EINVAL);
183 
184     sd->mode = VAD_MODE_NONE;
185 
186     return PJ_SUCCESS;
187 }
188 
189 
pjmedia_calc_avg_signal(const pj_int16_t samples[],pj_size_t count)190 PJ_DEF(pj_int32_t) pjmedia_calc_avg_signal( const pj_int16_t samples[],
191 					    pj_size_t count)
192 {
193     pj_uint32_t sum = 0;
194 
195     const pj_int16_t * pcm = samples;
196     const pj_int16_t * end = samples + count;
197 
198     if (count==0)
199 	return 0;
200 
201     while (pcm != end) {
202 	if (*pcm < 0)
203 	    sum -= *pcm++;
204 	else
205 	    sum += *pcm++;
206     }
207 
208     return (pj_int32_t)(sum / count);
209 }
210 
pjmedia_silence_det_apply(pjmedia_silence_det * sd,pj_uint32_t level)211 PJ_DEF(pj_bool_t) pjmedia_silence_det_apply( pjmedia_silence_det *sd,
212 					     pj_uint32_t level)
213 {
214     int avg_recent_level;
215 
216     if (sd->mode == VAD_MODE_NONE)
217 	return PJ_FALSE;
218 
219     if (sd->mode == VAD_MODE_FIXED)
220 	return (level < sd->threshold);
221 
222     /* Calculating recent level */
223     sd->sum_level += level;
224     ++sd->sum_cnt;
225     avg_recent_level = (sd->sum_level / sd->sum_cnt);
226 
227     if (level > sd->threshold ||
228 	level >= PJMEDIA_SILENCE_DET_MAX_THRESHOLD)
229     {
230 	sd->silence_timer = 0;
231 	sd->voiced_timer += sd->ptime;
232 
233 	switch(sd->state) {
234 	    case STATE_VOICED:
235 		if (sd->voiced_timer > sd->recalc_on_voiced) {
236 		    /* Voiced for long time (>recalc_on_voiced), current
237 		     * threshold seems to be too low.
238 		     */
239 		    sd->threshold = (avg_recent_level + sd->threshold) >> 1;
240 		    TRACE_((THIS_FILE,"Re-adjust threshold (in talk burst)"
241 			    "to %d", sd->threshold));
242 
243 		    sd->voiced_timer = 0;
244 
245 		    /* Reset sig_level */
246 		    sd->sum_level = avg_recent_level;
247 		    sd->sum_cnt = 1;
248 		}
249 		break;
250 
251 	    case STATE_SILENCE:
252 		TRACE_((THIS_FILE,"Starting talk burst (level=%d threshold=%d)",
253 			level, sd->threshold));
254 
255 	    case STATE_START_SILENCE:
256 		sd->state = STATE_VOICED;
257 
258 		/* Reset sig_level */
259 		sd->sum_level = level;
260 		sd->sum_cnt = 1;
261 
262 		break;
263 
264 	    default:
265 		pj_assert(0);
266 		break;
267 	}
268     } else {
269 	sd->voiced_timer = 0;
270 	sd->silence_timer += sd->ptime;
271 
272 	switch(sd->state) {
273 	    case STATE_SILENCE:
274 		if (sd->silence_timer >= sd->recalc_on_silence) {
275 		    sd->threshold = avg_recent_level << 1;
276 		    TRACE_((THIS_FILE,"Re-adjust threshold (in silence)"
277 			    "to %d", sd->threshold));
278 
279 		    sd->silence_timer = 0;
280 
281 		    /* Reset sig_level */
282 		    sd->sum_level = avg_recent_level;
283 		    sd->sum_cnt = 1;
284 		}
285 		break;
286 
287 	    case STATE_VOICED:
288 		sd->state = STATE_START_SILENCE;
289 
290 		/* Reset sig_level */
291 		sd->sum_level = level;
292 		sd->sum_cnt = 1;
293 
294 	    case STATE_START_SILENCE:
295 		if (sd->silence_timer >= sd->before_silence) {
296 		    sd->state = STATE_SILENCE;
297 		    sd->threshold = avg_recent_level << 1;
298 		    TRACE_((THIS_FILE,"Starting silence (level=%d "
299 			    "threshold=%d)", level, sd->threshold));
300 
301 		    /* Reset sig_level */
302 		    sd->sum_level = avg_recent_level;
303 		    sd->sum_cnt = 1;
304 		}
305 		break;
306 
307 	    default:
308 		pj_assert(0);
309 		break;
310 	}
311     }
312 
313     return (sd->state == STATE_SILENCE);
314 }
315 
316 
pjmedia_silence_det_detect(pjmedia_silence_det * sd,const pj_int16_t samples[],pj_size_t count,pj_int32_t * p_level)317 PJ_DEF(pj_bool_t) pjmedia_silence_det_detect( pjmedia_silence_det *sd,
318 					      const pj_int16_t samples[],
319 					      pj_size_t count,
320 					      pj_int32_t *p_level)
321 {
322     pj_uint32_t level;
323 
324     /* Calculate average signal level. */
325     level = pjmedia_calc_avg_signal(samples, count);
326 
327     /* Report to caller, if required. */
328     if (p_level)
329 	*p_level = level;
330 
331     return pjmedia_silence_det_apply(sd, level);
332 }
333 
334