1 #include <switch.h>
2 
3 #define AMD_PARAMS (2)
4 #define AMD_SYNTAX "<uuid> <command>"
5 
6 SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_amd_shutdown);
7 SWITCH_MODULE_LOAD_FUNCTION(mod_amd_load);
8 SWITCH_MODULE_DEFINITION(mod_amd, mod_amd_load, mod_amd_shutdown, NULL);
9 SWITCH_STANDARD_APP(amd_start_function);
10 
11 static struct {
12 	uint32_t initial_silence;
13 	uint32_t greeting;
14 	uint32_t after_greeting_silence;
15 	uint32_t total_analysis_time;
16 	uint32_t minimum_word_length;
17 	uint32_t between_words_silence;
18 	uint32_t maximum_number_of_words;
19 	uint32_t silence_threshold;
20 	uint32_t maximum_word_length;
21 } globals;
22 
23 static switch_xml_config_item_t instructions[] = {
24 	SWITCH_CONFIG_ITEM(
25 		"initial_silence",
26 		SWITCH_CONFIG_INT,
27 		CONFIG_RELOADABLE,
28 		&globals.initial_silence,
29 		(void *) 2500,
30 		NULL, NULL, NULL),
31 
32 	SWITCH_CONFIG_ITEM(
33 		"greeting",
34 		SWITCH_CONFIG_INT,
35 		CONFIG_RELOADABLE,
36 		&globals.greeting,
37 		(void *) 1500,
38 		NULL, NULL, NULL),
39 
40 	SWITCH_CONFIG_ITEM(
41 		"after_greeting_silence",
42 		SWITCH_CONFIG_INT,
43 		CONFIG_RELOADABLE,
44 		&globals.after_greeting_silence,
45 		(void *) 800,
46 		NULL, NULL, NULL),
47 
48 	SWITCH_CONFIG_ITEM(
49 		"total_analysis_time",
50 		SWITCH_CONFIG_INT,
51 		CONFIG_RELOADABLE,
52 		&globals.total_analysis_time,
53 		(void *) 5000,
54 		NULL, NULL, NULL),
55 
56 	SWITCH_CONFIG_ITEM(
57 		"min_word_length",
58 		SWITCH_CONFIG_INT,
59 		CONFIG_RELOADABLE,
60 		&globals.minimum_word_length,
61 		(void *) 100,
62 		NULL, NULL, NULL),
63 
64 	SWITCH_CONFIG_ITEM(
65 		"between_words_silence",
66 		SWITCH_CONFIG_INT,
67 		CONFIG_RELOADABLE,
68 		&globals.between_words_silence,
69 		(void *) 50,
70 		NULL, NULL, NULL),
71 
72 	SWITCH_CONFIG_ITEM(
73 		"maximum_number_of_words",
74 		SWITCH_CONFIG_INT,
75 		CONFIG_RELOADABLE,
76 		&globals.maximum_number_of_words,
77 		(void *) 3,
78 		NULL, NULL, NULL),
79 
80 	SWITCH_CONFIG_ITEM(
81 		"maximum_word_length",
82 		SWITCH_CONFIG_INT,
83 		CONFIG_RELOADABLE,
84 		&globals.maximum_word_length,
85 		(void *)5000,
86 		NULL, NULL, NULL),
87 
88 	SWITCH_CONFIG_ITEM(
89 		"silence_threshold",
90 		SWITCH_CONFIG_INT,
91 		CONFIG_RELOADABLE,
92 		&globals.silence_threshold,
93 		(void *) 256,
94 		NULL, NULL, NULL),
95 
96 	SWITCH_CONFIG_ITEM_END()
97 };
98 
do_config(switch_bool_t reload)99 static switch_status_t do_config(switch_bool_t reload)
100 {
101 	memset(&globals, 0, sizeof(globals));
102 
103 	if (switch_xml_config_parse_module_settings("amd.conf", reload, instructions) != SWITCH_STATUS_SUCCESS) {
104 		return SWITCH_STATUS_FALSE;
105 	}
106 
107 	return SWITCH_STATUS_SUCCESS;
108 }
109 
SWITCH_MODULE_LOAD_FUNCTION(mod_amd_load)110 SWITCH_MODULE_LOAD_FUNCTION(mod_amd_load)
111 {
112 	switch_application_interface_t *app_interface;
113 
114 	*module_interface = switch_loadable_module_create_module_interface(pool, modname);
115 
116 	do_config(SWITCH_FALSE);
117 
118 	SWITCH_ADD_APP(
119 		app_interface,
120 		"amd",
121 		"Voice activity detection (blocking)",
122 		"Asterisk's AMD (Blocking)",
123 		amd_start_function,
124 		NULL,
125 		SAF_NONE);
126 
127 	return SWITCH_STATUS_SUCCESS;
128 }
129 
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_amd_shutdown)130 SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_amd_shutdown)
131 {
132 	switch_xml_config_cleanup(instructions);
133 
134 	return SWITCH_STATUS_SUCCESS;
135 }
136 
137 typedef enum {
138 	SILENCE,
139 	VOICED
140 } amd_frame_classifier;
141 
142 typedef enum {
143 	VAD_STATE_IN_WORD,
144 	VAD_STATE_IN_SILENCE,
145 } amd_vad_state_t;
146 
147 typedef struct {
148 	const switch_core_session_t *session;
149 	switch_channel_t *channel;
150 	amd_vad_state_t state;
151 	uint32_t frame_ms;
152 
153 	uint32_t silence_duration;
154 	uint32_t voice_duration;
155 	uint32_t words;
156 
157 	uint32_t in_initial_silence:1;
158 	uint32_t in_greeting:1;
159 } amd_vad_t;
160 
classify_frame(const switch_frame_t * f,const switch_codec_implementation_t * codec)161 static amd_frame_classifier classify_frame(const switch_frame_t *f, const switch_codec_implementation_t *codec)
162 {
163 	int16_t *audio = f->data;
164 	uint32_t score, count, j;
165 	double energy;
166 	int divisor;
167 
168 	divisor = codec->actual_samples_per_second / 8000;
169 
170 	for (energy = 0, j = 0, count = 0; count < f->samples; count++) {
171 		energy += abs(audio[j++]);
172 		j += codec->number_of_channels;
173 	}
174 
175 	score = (uint32_t) (energy / (f->samples / divisor));
176 
177 	if (score >= globals.silence_threshold) {
178 		return VOICED;
179 	}
180 
181 	return SILENCE;
182 }
183 
amd_handle_silence_frame(amd_vad_t * vad,const switch_frame_t * f)184 static switch_bool_t amd_handle_silence_frame(amd_vad_t *vad, const switch_frame_t *f)
185 {
186 	vad->silence_duration += vad->frame_ms;
187 
188 	if (vad->silence_duration >= globals.between_words_silence) {
189 		if (vad->state != VAD_STATE_IN_SILENCE) {
190 			switch_log_printf(
191 				SWITCH_CHANNEL_SESSION_LOG(vad->session),
192 				SWITCH_LOG_DEBUG,
193 				"AMD: Changed state to VAD_STATE_IN_SILENCE\n");
194 		}
195 
196 		vad->state = VAD_STATE_IN_SILENCE;
197 		vad->voice_duration = 0;
198 	}
199 
200 	if (vad->in_initial_silence && vad->silence_duration >= globals.initial_silence) {
201 		switch_log_printf(
202 			SWITCH_CHANNEL_SESSION_LOG(vad->session),
203 			SWITCH_LOG_DEBUG,
204 			"AMD: MACHINE (silence_duration: %d, initial_silence: %d)\n",
205 			vad->silence_duration,
206 			globals.initial_silence);
207 
208 		switch_channel_set_variable(vad->channel, "amd_result", "MACHINE");
209 		switch_channel_set_variable(vad->channel, "amd_cause", "INITIALSILENCE");
210 		return SWITCH_TRUE;
211 	}
212 
213 	if (vad->silence_duration >= globals.after_greeting_silence && vad->in_greeting) {
214 		switch_log_printf(
215 			SWITCH_CHANNEL_SESSION_LOG(vad->session),
216 			SWITCH_LOG_DEBUG,
217 			"AMD: HUMAN (silence_duration: %d, after_greeting_silence: %d)\n",
218 			vad->silence_duration,
219 			globals.after_greeting_silence);
220 
221 		switch_channel_set_variable(vad->channel, "amd_result", "HUMAN");
222 		switch_channel_set_variable(vad->channel, "amd_cause", "HUMAN");
223 		return SWITCH_TRUE;
224 	}
225 
226 	return SWITCH_FALSE;
227 }
228 
amd_handle_voiced_frame(amd_vad_t * vad,const switch_frame_t * f)229 static switch_bool_t amd_handle_voiced_frame(amd_vad_t *vad, const switch_frame_t *f)
230 {
231 	vad->voice_duration += vad->frame_ms;
232 
233 	if (vad->voice_duration >= globals.minimum_word_length && vad->state == VAD_STATE_IN_SILENCE) {
234 		vad->words++;
235 
236 		switch_log_printf(
237 			SWITCH_CHANNEL_SESSION_LOG(vad->session),
238 			SWITCH_LOG_DEBUG,
239 			"AMD: Word detected (words: %d)\n",
240 			vad->words);
241 
242 		vad->state = VAD_STATE_IN_WORD;
243 	}
244 
245 	if (vad->voice_duration >= globals.maximum_word_length) {
246 		switch_log_printf(
247 			SWITCH_CHANNEL_SESSION_LOG(vad->session),
248 			SWITCH_LOG_DEBUG,
249 			"AMD: MACHINE (voice_duration: %d, maximum_word_length: %d)\n",
250 			vad->voice_duration,
251 			globals.maximum_word_length);
252 
253 		switch_channel_set_variable(vad->channel, "amd_result", "MACHINE");
254 		switch_channel_set_variable(vad->channel, "amd_cause", "MAXWORDLENGTH");
255 		return SWITCH_TRUE;
256 	}
257 
258 	if (vad->words >= globals.maximum_number_of_words) {
259 		switch_log_printf(
260 			SWITCH_CHANNEL_SESSION_LOG(vad->session),
261 			SWITCH_LOG_DEBUG,
262 			"AMD: MACHINE (words: %d, maximum_number_of_words: %d)\n",
263 			vad->words,
264 			globals.maximum_number_of_words);
265 
266 		switch_channel_set_variable(vad->channel, "amd_result", "MACHINE");
267 		switch_channel_set_variable(vad->channel, "amd_cause", "MAXWORDS");
268 		return SWITCH_TRUE;
269 	}
270 
271 	if (vad->in_greeting && vad->voice_duration >= globals.greeting) {
272 		switch_log_printf(
273 			SWITCH_CHANNEL_SESSION_LOG(vad->session),
274 			SWITCH_LOG_DEBUG,
275 			"AMD: MACHINE (voice_duration: %d, greeting: %d)\n",
276 			vad->voice_duration,
277 			globals.greeting);
278 
279 		switch_channel_set_variable(vad->channel, "amd_result", "MACHINE");
280 		switch_channel_set_variable(vad->channel, "amd_cause", "LONGGREETING");
281 		return SWITCH_TRUE;
282 	}
283 
284 	if (vad->voice_duration >= globals.minimum_word_length) {
285 		if (vad->silence_duration) {
286 			switch_log_printf(
287 				SWITCH_CHANNEL_SESSION_LOG(vad->session),
288 				SWITCH_LOG_DEBUG,
289 				"AMD: Detected Talk, previous silence duration: %dms\n",
290 				vad->silence_duration);
291 		}
292 
293 		vad->silence_duration = 0;
294 	}
295 
296 	if (vad->voice_duration >= globals.minimum_word_length && !vad->in_greeting) {
297 		if (vad->silence_duration) {
298 			switch_log_printf(
299 				SWITCH_CHANNEL_SESSION_LOG(vad->session),
300 				SWITCH_LOG_DEBUG,
301 				"AMD: Before Greeting Time (silence_duration: %d, voice_duration: %d)\n",
302 				vad->silence_duration,
303 				vad->voice_duration);
304 		}
305 
306 		vad->in_initial_silence = 0;
307 		vad->in_greeting = 1;
308 	}
309 
310 	return SWITCH_FALSE;
311 }
312 
SWITCH_STANDARD_APP(amd_start_function)313 SWITCH_STANDARD_APP(amd_start_function)
314 {
315 	switch_channel_t *channel = switch_core_session_get_channel(session);
316 	switch_codec_t raw_codec = { 0 };
317 	switch_codec_implementation_t read_impl = { 0 };
318 	switch_frame_t *read_frame;
319 	switch_status_t status;
320 	uint32_t timeout_ms = globals.total_analysis_time;
321 	int32_t sample_count_limit;
322 	switch_bool_t complete = SWITCH_FALSE;
323 
324 	amd_vad_t vad = { 0 };
325 
326 	if (!session) {
327 		return;
328 	}
329 
330 	vad.channel = channel;
331 	vad.session = session;
332 	vad.state = VAD_STATE_IN_WORD;
333 	vad.silence_duration = 0;
334 	vad.voice_duration = 0;
335 	vad.frame_ms = 0;
336 	vad.in_initial_silence = 1;
337 	vad.in_greeting = 0;
338 	vad.words = 0;
339 
340 	switch_core_session_get_read_impl(session, &read_impl);
341 
342 	if (timeout_ms) {
343 		sample_count_limit = (read_impl.actual_samples_per_second / 1000) * timeout_ms;
344 	}
345 
346 	/*
347 	 * We are creating a new L16 (raw 16-bit samples) codec for the read end
348 	 * of our channel.  We'll use this to process the audio coming off of the
349 	 * channel so that we always know what we are dealing with.
350 	 */
351 	status = switch_core_codec_init(
352 		&raw_codec,
353 		"L16",
354 		NULL,
355 		NULL,
356 		read_impl.actual_samples_per_second,
357 		read_impl.microseconds_per_packet / 1000,
358 		1,
359 		SWITCH_CODEC_FLAG_ENCODE | SWITCH_CODEC_FLAG_DECODE,
360 		NULL,
361 		switch_core_session_get_pool(session));
362 
363 	if (status != SWITCH_STATUS_SUCCESS) {
364 		switch_log_printf(
365 			SWITCH_CHANNEL_SESSION_LOG(session),
366 			SWITCH_LOG_ERROR,
367 			"Unable to initialize L16 (raw) codec.\n");
368 		return;
369 	}
370 
371 	switch_core_session_set_read_codec(session, &raw_codec);
372 
373 	while (switch_channel_ready(channel)) {
374 		status = switch_core_session_read_frame(session, &read_frame, SWITCH_IO_FLAG_NONE, 0);
375 
376 		if (!SWITCH_READ_ACCEPTABLE(status)) {
377 			break;
378 		}
379 
380 		if (read_frame->samples == 0) {
381 			continue;
382 		}
383 
384 		vad.frame_ms = 1000 / (read_impl.actual_samples_per_second / read_frame->samples);
385 
386 		if (sample_count_limit) {
387 			sample_count_limit -= raw_codec.implementation->samples_per_packet;
388 			if (sample_count_limit <= 0) {
389 				switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "AMD: Timeout\n");
390 
391 				switch_channel_set_variable(channel, "amd_result", "NOTSURE");
392 				switch_channel_set_variable(channel, "amd_cause", "TOOLONG");
393 				break;
394 			}
395 		}
396 
397 		switch (classify_frame(read_frame, &read_impl)) {
398 		case SILENCE:
399 			switch_log_printf(
400 				SWITCH_CHANNEL_SESSION_LOG(session),
401 				SWITCH_LOG_DEBUG,
402 				"AMD: Silence\n");
403 
404 			if (amd_handle_silence_frame(&vad, read_frame)) {
405 				complete = SWITCH_TRUE;
406 			}
407 			break;
408 		case VOICED:
409 		default:
410 			switch_log_printf(
411 				SWITCH_CHANNEL_SESSION_LOG(session),
412 				SWITCH_LOG_DEBUG,
413 				"AMD: Voiced\n");
414 
415 			if (amd_handle_voiced_frame(&vad, read_frame)) {
416 				complete = SWITCH_TRUE;
417 			}
418 			break;
419 		}
420 
421 		if (complete) {
422 			break;
423 		}
424 	}
425 
426 	switch_core_session_reset(session, SWITCH_FALSE, SWITCH_TRUE);
427 	switch_core_codec_destroy(&raw_codec);
428 }
429