1 /* ====================================================================
2  * Copyright (c) 1999-2010 Carnegie Mellon University.  All rights
3  * reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * This work was supported in part by funding from the Defense Advanced
18  * Research Projects Agency and the National Science Foundation of the
19  * United States of America, and the CMU Sphinx Speech Consortium.
20  *
21  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
22  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
25  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * ====================================================================
34  *
35  */
36 
37 #ifndef __GST_VADER_H__
38 #define __GST_VADER_H__
39 
40 
41 #include <gst/gst.h>
42 #include <gst/gstevent.h>
43 
44 #ifdef __cplusplus
45 extern "C" {
46 #endif /* __cplusplus */
47 #if 0
48 }
49 #endif
50 
51 #define GST_TYPE_VADER				\
52     (gst_vader_get_type())
53 #define GST_VADER(obj)                                          \
54     (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_VADER,GstVader))
55 #define GST_VADER_CLASS(klass)						\
56     (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_VADER,GstVaderClass))
57 #define GST_IS_VADER(obj)                               \
58     (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_VADER))
59 #define GST_IS_VADER_CLASS(klass)                       \
60     (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_VADER))
61 
62 /* Custom events inserted in the stream at start and stop of cuts. */
63 #define GST_EVENT_VADER_START						\
64     GST_EVENT_MAKE_TYPE(146, GST_EVENT_TYPE_DOWNSTREAM | GST_EVENT_TYPE_SERIALIZED)
65 #define GST_EVENT_VADER_STOP						\
66     GST_EVENT_MAKE_TYPE(147, GST_EVENT_TYPE_DOWNSTREAM | GST_EVENT_TYPE_SERIALIZED)
67 
68 typedef struct _GstVader GstVader;
69 typedef struct _GstVaderClass GstVaderClass;
70 
71 /* Maximum frame size over which VAD is calculated. */
72 #define VADER_FRAME 512
73 /* Number of frames over which to vote on speech/non-speech decision. */
74 #define VADER_WINDOW 5
75 
76 struct _GstVader
77 {
78     GstElement element;
79 
80     GstPad *sinkpad, *srcpad;
81 
82     GStaticRecMutex mtx;          /**< Lock used in setting parameters. */
83 
84     gboolean window[VADER_WINDOW];/**< Voting window of speech/silence decisions. */
85     gboolean silent;		  /**< Current state of the filter. */
86     gboolean silent_prev;	  /**< Previous state of the filter. */
87     GList *pre_buffer;            /**< list of GstBuffers in pre-record buffer */
88     guint64 silent_run_length;    /**< How much silence have we endured so far? */
89     guint64 pre_run_length;       /**< How much pre-silence have we endured so far? */
90 
91     gint threshold_level;         /**< Silence threshold level (Q15, adaptive). */
92     gint prior_sample;		  /**< Prior sample for pre-emphasis filter. */
93     guint64 threshold_length;     /**< Minimum silence for cutting, in nanoseconds. */
94     guint64 pre_length;           /**< Pre-buffer to add on silence->speech transition. */
95 
96     gboolean auto_threshold;      /**< Set threshold automatically. */
97     gint silence_mean;            /**< Mean RMS power of silence frames. */
98     gint silence_stddev;          /**< Variance in RMS power of silence frames. */
99     gint silence_frames;          /**< Number of frames used in estimating mean/variance */
100 
101     gchar *dumpdir;               /**< Directory to dump audio to (for debugging). */
102     FILE *dumpfile;		  /**< Current audio dump file. */
103     gint dumpidx;                 /**< Dump file index. */
104 };
105 
106 struct _GstVaderClass
107 {
108     GstElementClass parent_class;
109     void (*vader_start) (GstVader* filter);
110     void (*vader_stop) (GstVader* filter);
111 };
112 
113 GType gst_vader_get_type (void);
114 
115 #ifdef __cplusplus
116 }
117 #endif /* __cplusplus */
118 
119 
120 #endif /* __GST_VADER_H__ */
121