1 /* ==================================================================== 2 * Copyright (c) 1999-2010 Carnegie Mellon University. All rights 3 * reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 17 * This work was supported in part by funding from the Defense Advanced 18 * Research Projects Agency and the National Science Foundation of the 19 * United States of America, and the CMU Sphinx Speech Consortium. 20 * 21 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 22 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 25 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 * 33 * ==================================================================== 34 * 35 */ 36 37 #ifndef __GST_VADER_H__ 38 #define __GST_VADER_H__ 39 40 41 #include <gst/gst.h> 42 #include <gst/gstevent.h> 43 44 #ifdef __cplusplus 45 extern "C" { 46 #endif /* __cplusplus */ 47 #if 0 48 } 49 #endif 50 51 #define GST_TYPE_VADER \ 52 (gst_vader_get_type()) 53 #define GST_VADER(obj) \ 54 (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_VADER,GstVader)) 55 #define GST_VADER_CLASS(klass) \ 56 (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_VADER,GstVaderClass)) 57 #define GST_IS_VADER(obj) \ 58 (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_VADER)) 59 #define GST_IS_VADER_CLASS(klass) \ 60 (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_VADER)) 61 62 /* Custom events inserted in the stream at start and stop of cuts. */ 63 #define GST_EVENT_VADER_START \ 64 GST_EVENT_MAKE_TYPE(146, GST_EVENT_TYPE_DOWNSTREAM | GST_EVENT_TYPE_SERIALIZED) 65 #define GST_EVENT_VADER_STOP \ 66 GST_EVENT_MAKE_TYPE(147, GST_EVENT_TYPE_DOWNSTREAM | GST_EVENT_TYPE_SERIALIZED) 67 68 typedef struct _GstVader GstVader; 69 typedef struct _GstVaderClass GstVaderClass; 70 71 /* Maximum frame size over which VAD is calculated. */ 72 #define VADER_FRAME 512 73 /* Number of frames over which to vote on speech/non-speech decision. */ 74 #define VADER_WINDOW 5 75 76 struct _GstVader 77 { 78 GstElement element; 79 80 GstPad *sinkpad, *srcpad; 81 82 GStaticRecMutex mtx; /**< Lock used in setting parameters. */ 83 84 gboolean window[VADER_WINDOW];/**< Voting window of speech/silence decisions. */ 85 gboolean silent; /**< Current state of the filter. */ 86 gboolean silent_prev; /**< Previous state of the filter. */ 87 GList *pre_buffer; /**< list of GstBuffers in pre-record buffer */ 88 guint64 silent_run_length; /**< How much silence have we endured so far? */ 89 guint64 pre_run_length; /**< How much pre-silence have we endured so far? */ 90 91 gint threshold_level; /**< Silence threshold level (Q15, adaptive). */ 92 gint prior_sample; /**< Prior sample for pre-emphasis filter. */ 93 guint64 threshold_length; /**< Minimum silence for cutting, in nanoseconds. */ 94 guint64 pre_length; /**< Pre-buffer to add on silence->speech transition. */ 95 96 gboolean auto_threshold; /**< Set threshold automatically. */ 97 gint silence_mean; /**< Mean RMS power of silence frames. */ 98 gint silence_stddev; /**< Variance in RMS power of silence frames. */ 99 gint silence_frames; /**< Number of frames used in estimating mean/variance */ 100 101 gchar *dumpdir; /**< Directory to dump audio to (for debugging). */ 102 FILE *dumpfile; /**< Current audio dump file. */ 103 gint dumpidx; /**< Dump file index. */ 104 }; 105 106 struct _GstVaderClass 107 { 108 GstElementClass parent_class; 109 void (*vader_start) (GstVader* filter); 110 void (*vader_stop) (GstVader* filter); 111 }; 112 113 GType gst_vader_get_type (void); 114 115 #ifdef __cplusplus 116 } 117 #endif /* __cplusplus */ 118 119 120 #endif /* __GST_VADER_H__ */ 121