1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2007 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
37  */
38 
39 #ifdef HAVE_CONFIG_H
40 #  include <config.h>
41 #endif
42 
43 #include <string.h>
44 #include <gst/gst.h>
45 
46 #include <sphinxbase/strfuncs.h>
47 
48 #include "gstpocketsphinx.h"
49 #include "gstvader.h"
50 #include "psmarshal.h"
51 
52 GST_DEBUG_CATEGORY_STATIC(pocketsphinx_debug);
53 #define GST_CAT_DEFAULT pocketsphinx_debug
54 
55 /*
56  * Forward declarations.
57  */
58 
59 static void gst_pocketsphinx_set_property(GObject * object, guint prop_id,
60                                           const GValue * value, GParamSpec * pspec);
61 static void gst_pocketsphinx_get_property(GObject * object, guint prop_id,
62                                           GValue * value, GParamSpec * pspec);
63 static GstFlowReturn gst_pocketsphinx_chain(GstPad * pad, GstBuffer * buffer);
64 static gboolean gst_pocketsphinx_event(GstPad *pad, GstEvent *event);
65 
66 enum
67 {
68     SIGNAL_PARTIAL_RESULT,
69     SIGNAL_RESULT,
70     LAST_SIGNAL
71 };
72 
73 enum
74 {
75     PROP_0,
76     PROP_HMM_DIR,
77     PROP_LM_FILE,
78     PROP_LMCTL_FILE,
79     PROP_LM_NAME,
80     PROP_DICT_FILE,
81     PROP_MLLR_FILE,
82     PROP_FSG_FILE,
83     PROP_FSG_MODEL,
84     PROP_FWDFLAT,
85     PROP_BESTPATH,
86     PROP_MAXHMMPF,
87     PROP_MAXWPF,
88     PROP_BEAM,
89     PROP_WBEAM,
90     PROP_PBEAM,
91     PROP_DSRATIO,
92     PROP_LATDIR,
93     PROP_LATTICE,
94     PROP_NBEST,
95     PROP_NBEST_SIZE,
96     PROP_DECODER,
97     PROP_CONFIGURED
98 };
99 
100 /*
101  * Static data.
102  */
103 
104 /* Default command line. (will go away soon and be constructed using properties) */
105 static char *default_argv[] = {
106     "gst-pocketsphinx",
107     "-samprate", "8000",
108     "-cmn", "prior",
109     "-fwdflat", "no",
110     "-bestpath", "no",
111     "-maxhmmpf", "2000",
112     "-maxwpf", "20"
113 };
114 static const int default_argc = sizeof(default_argv)/sizeof(default_argv[0]);
115 
116 static GstStaticPadTemplate sink_factory =
117     GST_STATIC_PAD_TEMPLATE("sink",
118                             GST_PAD_SINK,
119                             GST_PAD_ALWAYS,
120                             GST_STATIC_CAPS("audio/x-raw-int, "
121                                             "width = (int) 16, "
122                                             "depth = (int) 16, "
123                                             "signed = (boolean) true, "
124                                             "endianness = (int) BYTE_ORDER, "
125                                             "channels = (int) 1, "
126                                             "rate = (int) 8000")
127         );
128 
129 static GstStaticPadTemplate src_factory =
130     GST_STATIC_PAD_TEMPLATE("src",
131                             GST_PAD_SRC,
132                             GST_PAD_ALWAYS,
133                             GST_STATIC_CAPS("text/plain")
134         );
135 static guint gst_pocketsphinx_signals[LAST_SIGNAL];
136 
137 /*
138  * Boxing of ps_lattice_t.
139  */
140 
141 GType
ps_lattice_get_type(void)142 ps_lattice_get_type(void)
143 {
144     static GType ps_lattice_type = 0;
145 
146     if (G_UNLIKELY(ps_lattice_type == 0)) {
147         ps_lattice_type = g_boxed_type_register_static
148             ("PSLattice",
149              /* Conveniently, these should just work. */
150              (GBoxedCopyFunc) ps_lattice_retain,
151              (GBoxedFreeFunc) ps_lattice_free);
152     }
153 
154     return ps_lattice_type;
155 }
156 
157 /*
158  * Boxing of ps_decoder_t.
159  */
160 
161 GType
ps_decoder_get_type(void)162 ps_decoder_get_type(void)
163 {
164     static GType ps_decoder_type = 0;
165 
166     if (G_UNLIKELY(ps_decoder_type == 0)) {
167         ps_decoder_type = g_boxed_type_register_static
168             ("PSDecoder",
169              /* Conveniently, these should just work. */
170              (GBoxedCopyFunc) ps_retain,
171              (GBoxedFreeFunc) ps_free);
172     }
173 
174     return ps_decoder_type;
175 }
176 
177 
178 /*
179  * gst_pocketsphinx element.
180  */
181 GST_BOILERPLATE (GstPocketSphinx, gst_pocketsphinx, GstElement, GST_TYPE_ELEMENT);
182 
183 static void
gst_pocketsphinx_base_init(gpointer gclass)184 gst_pocketsphinx_base_init(gpointer gclass)
185 {
186     static const GstElementDetails element_details = {
187         "PocketSphinx",
188         "Filter/Audio",
189         "Convert speech to text",
190         "David Huggins-Daines <dhuggins@cs.cmu.edu>"
191     };
192     GstElementClass *element_class = GST_ELEMENT_CLASS(gclass);
193 
194     gst_element_class_add_pad_template(element_class,
195                                        gst_static_pad_template_get(&sink_factory));
196     gst_element_class_add_pad_template(element_class,
197                                        gst_static_pad_template_get(&src_factory));
198     gst_element_class_set_details(element_class, &element_details);
199 }
200 
201 static void
string_disposal(gpointer key,gpointer value,gpointer user_data)202 string_disposal(gpointer key, gpointer value, gpointer user_data)
203 {
204     g_free(value);
205 }
206 
207 static void
gst_pocketsphinx_finalize(GObject * gobject)208 gst_pocketsphinx_finalize(GObject * gobject)
209 {
210     GstPocketSphinx *ps = GST_POCKETSPHINX(gobject);
211 
212     g_hash_table_foreach(ps->arghash, string_disposal, NULL);
213     g_hash_table_destroy(ps->arghash);
214     g_free(ps->last_result);
215     ps_free(ps->ps);
216     cmd_ln_free_r(ps->config);
217     GST_CALL_PARENT(G_OBJECT_CLASS, finalize,(gobject));
218 }
219 
220 static void
gst_pocketsphinx_class_init(GstPocketSphinxClass * klass)221 gst_pocketsphinx_class_init(GstPocketSphinxClass * klass)
222 {
223     GObjectClass *gobject_class;
224 
225     gobject_class =(GObjectClass *) klass;
226 
227     gobject_class->set_property = gst_pocketsphinx_set_property;
228     gobject_class->get_property = gst_pocketsphinx_get_property;
229     gobject_class->finalize = GST_DEBUG_FUNCPTR(gst_pocketsphinx_finalize);
230 
231     /* TODO: We will bridge cmd_ln.h properties to GObject
232      * properties here somehow eventually. */
233     g_object_class_install_property
234         (gobject_class, PROP_HMM_DIR,
235          g_param_spec_string("hmm", "HMM Directory",
236                              "Directory containing acoustic model parameters",
237                              NULL,
238                              G_PARAM_READWRITE));
239     g_object_class_install_property
240         (gobject_class, PROP_LM_FILE,
241          g_param_spec_string("lm", "LM File",
242                              "Language model file",
243                              NULL,
244                              G_PARAM_READWRITE));
245     g_object_class_install_property
246         (gobject_class, PROP_LMCTL_FILE,
247          g_param_spec_string("lmctl", "LM Control File",
248                              "Language model control file (for class LMs)",
249                              NULL,
250                              G_PARAM_READWRITE));
251     g_object_class_install_property
252         (gobject_class, PROP_LM_NAME,
253          g_param_spec_string("lmname", "LM Name",
254                              "Language model name (to select LMs from lmctl)",
255                              NULL,
256                              G_PARAM_READWRITE));
257     g_object_class_install_property
258         (gobject_class, PROP_FSG_FILE,
259          g_param_spec_string("fsg", "FSG File",
260                              "Finite state grammar file",
261                              NULL,
262                              G_PARAM_READWRITE));
263     g_object_class_install_property
264         (gobject_class, PROP_FSG_MODEL,
265          g_param_spec_pointer("fsg_model", "FSG Model",
266                               "Finite state grammar object (fsg_model_t *)",
267                               G_PARAM_WRITABLE));
268     g_object_class_install_property
269         (gobject_class, PROP_DICT_FILE,
270          g_param_spec_string("dict", "Dictionary File",
271                              "Dictionary File",
272                              NULL,
273                              G_PARAM_READWRITE));
274     g_object_class_install_property
275         (gobject_class, PROP_MLLR_FILE,
276          g_param_spec_string("mllr", "MLLR file",
277                              "MLLR file",
278                              NULL,
279                              G_PARAM_READWRITE));
280     g_object_class_install_property
281         (gobject_class, PROP_FWDFLAT,
282          g_param_spec_boolean("fwdflat", "Flat Lexicon Search",
283                               "Enable Flat Lexicon Search",
284                               FALSE,
285                               G_PARAM_READWRITE));
286     g_object_class_install_property
287         (gobject_class, PROP_BESTPATH,
288          g_param_spec_boolean("bestpath", "Graph Search",
289                               "Enable Graph Search",
290                               FALSE,
291                               G_PARAM_READWRITE));
292 
293     g_object_class_install_property
294         (gobject_class, PROP_LATDIR,
295          g_param_spec_string("latdir", "Lattice Directory",
296                              "Output Directory for Lattices",
297                              NULL,
298                              G_PARAM_READWRITE));
299     g_object_class_install_property
300         (gobject_class, PROP_LATTICE,
301          g_param_spec_boxed("lattice", "Word Lattice",
302                             "Word lattice object for most recent result",
303                             PS_LATTICE_TYPE,
304                             G_PARAM_READABLE));
305     g_object_class_install_property
306         (gobject_class, PROP_NBEST,
307          g_param_spec_value_array("nbest", "N-best results",
308                           "N-best results",
309                           g_param_spec_string("nbest-hyp", "N-best hyp",
310                             "N-best hyp",
311                             NULL,
312                             G_PARAM_READABLE),
313                           G_PARAM_READABLE));
314     g_object_class_install_property
315         (gobject_class, PROP_NBEST_SIZE,
316          g_param_spec_int("nbest_size", "Size of N-best list",
317                           "Number of hypothesis in the N-best list",
318                           1, 1000, 10,
319                           G_PARAM_READWRITE));
320     g_object_class_install_property
321         (gobject_class, PROP_MAXHMMPF,
322          g_param_spec_int("maxhmmpf", "Maximum HMMs per frame",
323                           "Maximum number of HMMs searched per frame",
324                           1, 100000, 1000,
325                           G_PARAM_READWRITE));
326     g_object_class_install_property
327         (gobject_class, PROP_MAXWPF,
328          g_param_spec_int("maxwpf", "Maximum words per frame",
329                           "Maximum number of words searched per frame",
330                           1, 100000, 10,
331                           G_PARAM_READWRITE));
332     g_object_class_install_property
333         (gobject_class, PROP_BEAM,
334          g_param_spec_float("beam", "Beam width applied to every frame in Viterbi search",
335                           "Beam width applied to every frame in Viterbi search",
336                           -1, 1, 1e-48,
337                           G_PARAM_READWRITE));
338     g_object_class_install_property
339         (gobject_class, PROP_PBEAM,
340          g_param_spec_float("pbeam", "Beam width applied to phone transitions",
341                           "Beam width applied to phone transitions",
342                           -1, 1, 1e-48,
343                           G_PARAM_READWRITE));
344     g_object_class_install_property
345         (gobject_class, PROP_WBEAM,
346          g_param_spec_float("wbeam", "Beam width applied to word exits",
347                           "Beam width applied to phone transitions",
348                           -1, 1, 7e-29,
349                           G_PARAM_READWRITE));
350     g_object_class_install_property
351         (gobject_class, PROP_DSRATIO,
352          g_param_spec_int("dsratio", "Frame downsampling ratio",
353                           "Evaluate acoustic model every N frames",
354                           1, 10, 1,
355                           G_PARAM_READWRITE));
356     g_object_class_install_property
357         (gobject_class, PROP_DECODER,
358          g_param_spec_boxed("decoder", "Decoder object",
359                             "The underlying decoder",
360                             PS_DECODER_TYPE,
361                             G_PARAM_READABLE));
362     g_object_class_install_property
363         (gobject_class, PROP_CONFIGURED,
364          g_param_spec_boolean("configured", "Finalize configuration",
365                               "Set this to finalize configuration",
366                               FALSE,
367                               G_PARAM_READWRITE));
368 
369     gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT] =
370         g_signal_new("partial_result",
371                      G_TYPE_FROM_CLASS(klass),
372                      G_SIGNAL_RUN_LAST,
373                      G_STRUCT_OFFSET(GstPocketSphinxClass, partial_result),
374                      NULL, NULL,
375                      ps_marshal_VOID__STRING_STRING,
376                      G_TYPE_NONE,
377                      2, G_TYPE_STRING, G_TYPE_STRING
378             );
379 
380     gst_pocketsphinx_signals[SIGNAL_RESULT] =
381         g_signal_new("result",
382                      G_TYPE_FROM_CLASS(klass),
383                      G_SIGNAL_RUN_LAST,
384                      G_STRUCT_OFFSET(GstPocketSphinxClass, result),
385                      NULL, NULL,
386                      ps_marshal_VOID__STRING_STRING,
387                      G_TYPE_NONE,
388                      2, G_TYPE_STRING, G_TYPE_STRING
389             );
390 
391     GST_DEBUG_CATEGORY_INIT(pocketsphinx_debug, "pocketsphinx", 0,
392                             "Automatic Speech Recognition");
393 }
394 
395 static void
gst_pocketsphinx_set_string(GstPocketSphinx * ps,const gchar * key,const GValue * value)396 gst_pocketsphinx_set_string(GstPocketSphinx *ps,
397                             const gchar *key, const GValue *value)
398 {
399     gchar *oldstr, *newstr;
400 
401     if (value != NULL)
402         newstr = g_strdup(g_value_get_string(value));
403     else
404         newstr = NULL;
405     if ((oldstr = g_hash_table_lookup(ps->arghash, key)))
406         g_free(oldstr);
407     cmd_ln_set_str_r(ps->config, key, newstr);
408     g_hash_table_foreach(ps->arghash, (gpointer)key, newstr);
409 }
410 
411 static void
gst_pocketsphinx_set_int(GstPocketSphinx * ps,const gchar * key,const GValue * value)412 gst_pocketsphinx_set_int(GstPocketSphinx *ps,
413                          const gchar *key, const GValue *value)
414 {
415     cmd_ln_set_int32_r(ps->config, key, g_value_get_int(value));
416 }
417 
418 static void
gst_pocketsphinx_set_boolean(GstPocketSphinx * ps,const gchar * key,const GValue * value)419 gst_pocketsphinx_set_boolean(GstPocketSphinx *ps,
420                              const gchar *key, const GValue *value)
421 {
422     cmd_ln_set_boolean_r(ps->config, key, g_value_get_boolean(value));
423 }
424 
425 static void
gst_pocketsphinx_set_float(GstPocketSphinx * ps,const gchar * key,const GValue * value)426 gst_pocketsphinx_set_float(GstPocketSphinx *ps,
427                          const gchar *key, const GValue *value)
428 {
429     cmd_ln_set_float_r(ps->config, key, g_value_get_float(value));
430 }
431 
432 static void
gst_pocketsphinx_set_property(GObject * object,guint prop_id,const GValue * value,GParamSpec * pspec)433 gst_pocketsphinx_set_property(GObject * object, guint prop_id,
434                               const GValue * value, GParamSpec * pspec)
435 {
436     GstPocketSphinx *ps = GST_POCKETSPHINX(object);
437 
438     switch (prop_id) {
439     case PROP_CONFIGURED:
440         if (ps->ps)
441             ps_reinit(ps->ps, NULL);
442         else
443             ps->ps = ps_init(ps->config);
444         break;
445     case PROP_HMM_DIR:
446         gst_pocketsphinx_set_string(ps, "-hmm", value);
447         if (ps->ps) {
448             /* Reinitialize the decoder with the new acoustic model. */
449             ps_reinit(ps->ps, NULL);
450         }
451         break;
452     case PROP_LM_FILE:
453         /* FSG and LM are mutually exclusive. */
454         gst_pocketsphinx_set_string(ps, "-fsg", NULL);
455         gst_pocketsphinx_set_string(ps, "-lmctl", NULL);
456         gst_pocketsphinx_set_string(ps, "-lm", value);
457         if (ps->ps) {
458             ngram_model_t *lm, *lmset;
459 
460             /* Switch to this new LM. */
461             lm = ngram_model_read(ps->config,
462                                   g_value_get_string(value),
463                                   NGRAM_AUTO,
464                                   ps_get_logmath(ps->ps));
465             lmset = ps_get_lmset(ps->ps);
466             ngram_model_set_add(lmset, lm, g_value_get_string(value),
467                                 1.0, TRUE);
468             ps_update_lmset(ps->ps, lmset);
469         }
470         break;
471     case PROP_LMCTL_FILE:
472         /* FSG and LM are mutually exclusive. */
473         gst_pocketsphinx_set_string(ps, "-fsg", NULL);
474         gst_pocketsphinx_set_string(ps, "-lmctl", value);
475         gst_pocketsphinx_set_string(ps, "-lm", NULL);
476         if (ps->ps) {
477             ngram_model_t *lmset;
478             lmset = ngram_model_set_read(ps->config,
479                                          g_value_get_string(value),
480                                          ps_get_logmath(ps->ps));
481             ps_update_lmset(ps->ps, lmset);
482         }
483         break;
484     case PROP_LM_NAME:
485         gst_pocketsphinx_set_string(ps, "-fsg", NULL);
486         gst_pocketsphinx_set_string(ps, "-lmname", value);
487         if (ps->ps) {
488             ngram_model_t *lm, *lmset;
489 
490             lmset = ps_get_lmset(ps->ps);
491             lm = ngram_model_set_select(lmset, g_value_get_string(value));
492             ps_update_lmset(ps->ps, lmset);
493         }
494 
495     case PROP_DICT_FILE:
496         gst_pocketsphinx_set_string(ps, "-dict", value);
497         if (ps->ps) {
498             /* Reinitialize the decoder with the new dictionary. */
499             ps_reinit(ps->ps, NULL);
500         }
501         break;
502     case PROP_MLLR_FILE:
503         gst_pocketsphinx_set_string(ps, "-mllr", value);
504         if (ps->ps) {
505             /* Reinitialize the decoder with the new MLLR transform. */
506             ps_reinit(ps->ps, NULL);
507         }
508         break;
509     case PROP_FSG_MODEL:
510     {
511         fsg_set_t *fsgs = ps_get_fsgset(ps->ps);
512 
513         if (fsgs == NULL)
514     	    fsgs = ps_update_fsgset(ps->ps);
515 
516         if (fsgs) {
517 	    fsg_model_t *fsg = g_value_get_pointer(value);
518 
519     	    fsg_set_remove_byname(fsgs, fsg_model_name(fsg));
520     	    fsg_set_add(fsgs, fsg_model_name(fsg), fsg);
521     	    fsg_set_select(fsgs, fsg_model_name(fsg));
522     	}
523         break;
524     }
525     case PROP_FSG_FILE:
526         /* FSG and LM are mutually exclusive */
527         gst_pocketsphinx_set_string(ps, "-lm", NULL);
528         gst_pocketsphinx_set_string(ps, "-fsg", value);
529 
530         if (ps->ps) {
531             /* Switch to this new FSG. */
532             fsg_model_t *fsg;
533             fsg_set_t *fsgs = ps_get_fsgset(ps->ps);
534 
535             if (fsgs == NULL)
536     		fsgs = ps_update_fsgset(ps->ps);
537 
538             fsg = fsg_model_readfile(g_value_get_string(value),
539                                      ps_get_logmath(ps->ps),
540                                      cmd_ln_float32_r(ps->config, "-lw"));
541 
542             if (fsgs && fsg) {
543                 fsg_set_add(fsgs, fsg_model_name(fsg), fsg);
544                 fsg_set_select(fsgs, fsg_model_name(fsg));
545             }
546         }
547         break;
548     case PROP_FWDFLAT:
549         gst_pocketsphinx_set_boolean(ps, "-fwdflat", value);
550         break;
551     case PROP_BESTPATH:
552         gst_pocketsphinx_set_boolean(ps, "-bestpath", value);
553         break;
554     case PROP_LATDIR:
555         if (ps->latdir)
556             g_free(ps->latdir);
557         ps->latdir = g_strdup(g_value_get_string(value));
558         break;
559     case PROP_NBEST_SIZE:
560 	ps->n_best_size = g_value_get_int(value);
561         break;
562     case PROP_MAXHMMPF:
563         gst_pocketsphinx_set_int(ps, "-maxhmmpf", value);
564         break;
565     case PROP_MAXWPF:
566         gst_pocketsphinx_set_int(ps, "-maxwpf", value);
567         break;
568     case PROP_BEAM:
569         gst_pocketsphinx_set_float(ps, "-beam", value);
570         break;
571     case PROP_PBEAM:
572         gst_pocketsphinx_set_float(ps, "-pbeam", value);
573         break;
574     case PROP_WBEAM:
575         gst_pocketsphinx_set_float(ps, "-wbeam", value);
576         break;
577     case PROP_DSRATIO:
578         gst_pocketsphinx_set_int(ps, "-ds", value);
579         break;
580     default:
581         G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
582         return;
583     }
584 }
585 
586 static void
gst_pocketsphinx_get_property(GObject * object,guint prop_id,GValue * value,GParamSpec * pspec)587 gst_pocketsphinx_get_property(GObject * object, guint prop_id,
588                               GValue * value, GParamSpec * pspec)
589 {
590     GstPocketSphinx *ps = GST_POCKETSPHINX(object);
591 
592     switch (prop_id) {
593     case PROP_DECODER:
594         g_value_set_boxed(value, ps->ps);
595         break;
596     case PROP_CONFIGURED:
597         g_value_set_boolean(value, ps->ps != NULL);
598         break;
599     case PROP_HMM_DIR:
600         g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm"));
601         break;
602     case PROP_LM_FILE:
603         g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm"));
604         break;
605     case PROP_LMCTL_FILE:
606         g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl"));
607         break;
608     case PROP_LM_NAME:
609         g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname"));
610         break;
611     case PROP_DICT_FILE:
612         g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict"));
613         break;
614     case PROP_MLLR_FILE:
615         g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr"));
616         break;
617     case PROP_FSG_FILE:
618         g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg"));
619         break;
620     case PROP_FWDFLAT:
621         g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat"));
622         break;
623     case PROP_BESTPATH:
624         g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath"));
625         break;
626     case PROP_LATDIR:
627         g_value_set_string(value, ps->latdir);
628         break;
629     case PROP_LATTICE: {
630         ps_lattice_t *dag;
631 
632         if (ps->ps && (dag = ps_get_lattice(ps->ps)))
633             g_value_set_boxed(value, dag);
634         else
635             g_value_set_boxed(value, NULL);
636         break;
637     }
638     case PROP_MAXHMMPF:
639         g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf"));
640         break;
641     case PROP_MAXWPF:
642         g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf"));
643         break;
644     case PROP_BEAM:
645         g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam"));
646         break;
647     case PROP_PBEAM:
648         g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam"));
649         break;
650     case PROP_WBEAM:
651         g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam"));
652         break;
653     case PROP_DSRATIO:
654         g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds"));
655         break;
656     case PROP_NBEST_SIZE:
657         g_value_set_int(value, ps->n_best_size);
658         break;
659     case PROP_NBEST: {
660         int i = 0, out_score = 0;
661         GValueArray *arr;
662         if (!ps->ps) {
663             break;
664         }
665 	arr = g_value_array_new(1);
666         ps_nbest_t *ps_nbest_list = ps_nbest(ps->ps, 0, -1, NULL, NULL);
667         if (ps_nbest_list) {
668             ps_nbest_list = ps_nbest_next(ps_nbest_list);
669             while ((i < ps->n_best_size) && (ps_nbest_list != NULL)) {
670                 GValue value1 = { 0 };
671                 g_value_init (&value1, G_TYPE_STRING);
672                 const char* hyp = ps_nbest_hyp(ps_nbest_list, &out_score);
673                 g_value_set_string(&value1, hyp);
674                 g_value_array_append(arr, &value1);
675                 ps_nbest_list = ps_nbest_next(ps_nbest_list);
676                 i++;
677             }
678             if (ps_nbest_list) {
679                 ps_nbest_free(ps_nbest_list);
680             }
681         }
682         g_value_set_boxed (value, arr);
683         break;
684     }
685     default:
686         G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
687         break;
688     }
689 }
690 
691 static void
gst_pocketsphinx_init(GstPocketSphinx * ps,GstPocketSphinxClass * gclass)692 gst_pocketsphinx_init(GstPocketSphinx * ps,
693                       GstPocketSphinxClass * gclass)
694 {
695     ps->sinkpad =
696         gst_pad_new_from_static_template(&sink_factory, "sink");
697     ps->srcpad =
698         gst_pad_new_from_static_template(&src_factory, "src");
699 
700     /* Create the hash table to store argument strings. */
701     ps->arghash = g_hash_table_new(g_str_hash, g_str_equal);
702 
703     /* Parse default command-line options. */
704     ps->config = cmd_ln_parse_r(NULL, ps_args(), default_argc, default_argv, FALSE);
705 
706     /* Set up pads. */
707     gst_element_add_pad(GST_ELEMENT(ps), ps->sinkpad);
708     gst_pad_set_chain_function(ps->sinkpad, gst_pocketsphinx_chain);
709     gst_pad_set_event_function(ps->sinkpad, gst_pocketsphinx_event);
710     gst_pad_use_fixed_caps(ps->sinkpad);
711 
712     gst_element_add_pad(GST_ELEMENT(ps), ps->srcpad);
713     gst_pad_use_fixed_caps(ps->srcpad);
714 
715     /* Initialize time. */
716     ps->last_result_time = 0;
717     ps->last_result = NULL;
718 
719     /* Nbest size */
720     ps->n_best_size = 10;
721 }
722 
723 static GstFlowReturn
gst_pocketsphinx_chain(GstPad * pad,GstBuffer * buffer)724 gst_pocketsphinx_chain(GstPad * pad, GstBuffer * buffer)
725 {
726     GstPocketSphinx *ps;
727 
728     ps = GST_POCKETSPHINX(GST_OBJECT_PARENT(pad));
729 
730     /* Start an utterance for the first buffer we get (i.e. we assume
731      * that the VADER is "leaky") */
732     if (!ps->listening) {
733         ps->listening = TRUE;
734         ps_start_utt(ps->ps, NULL);
735     }
736     ps_process_raw(ps->ps,
737                    (short *)GST_BUFFER_DATA(buffer),
738                    GST_BUFFER_SIZE(buffer) / sizeof(short),
739                    FALSE, FALSE);
740 
741     /* Get a partial result every now and then, see if it is different. */
742     if (ps->last_result_time == 0
743         /* Check every 100 milliseconds. */
744         || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) {
745         int32 score;
746         char const *hyp;
747         char const *uttid;
748 
749         hyp = ps_get_hyp(ps->ps, &score, &uttid);
750         ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer);
751         if (hyp && strlen(hyp) > 0) {
752             if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) {
753                 g_free(ps->last_result);
754                 ps->last_result = g_strdup(hyp);
755                 /* Emit a signal for applications. */
756                 g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT],
757                               0, hyp, uttid);
758             }
759         }
760     }
761     gst_buffer_unref(buffer);
762     return GST_FLOW_OK;
763 }
764 
765 static gboolean
gst_pocketsphinx_event(GstPad * pad,GstEvent * event)766 gst_pocketsphinx_event(GstPad *pad, GstEvent *event)
767 {
768     GstPocketSphinx *ps;
769 
770     ps = GST_POCKETSPHINX(GST_OBJECT_PARENT(pad));
771 
772     /* Pick out VAD events. */
773     switch (event->type) {
774     case GST_EVENT_NEWSEGMENT:
775         /* Initialize the decoder once the audio starts, if it's not
776          * there yet. */
777         if (ps->ps == NULL) {
778             ps->ps = ps_init(ps->config);
779             if (ps->ps == NULL) {
780                 GST_ELEMENT_ERROR(GST_ELEMENT(ps), LIBRARY, INIT,
781                                   ("Failed to initialize PocketSphinx"),
782                                   ("Failed to initialize PocketSphinx"));
783                 return FALSE;
784             }
785         }
786         return gst_pad_event_default(pad, event);
787     case GST_EVENT_VADER_START:
788         ps->listening = TRUE;
789         ps_start_utt(ps->ps, NULL);
790         /* Forward this event. */
791         return gst_pad_event_default(pad, event);
792     case GST_EVENT_EOS:
793     case GST_EVENT_VADER_STOP: {
794         GstBuffer *buffer;
795         int32 score;
796         char const *hyp;
797         char const *uttid;
798 
799         hyp = NULL;
800         if (ps->listening) {
801             ps->listening = FALSE;
802             ps_end_utt(ps->ps);
803             hyp = ps_get_hyp(ps->ps, &score, &uttid);
804             /* Dump the lattice if requested. */
805             if (ps->latdir) {
806                 char *latfile = string_join(ps->latdir, "/", uttid, ".lat", NULL);
807                 ps_lattice_t *dag;
808 
809                 if ((dag = ps_get_lattice(ps->ps)))
810                     ps_lattice_write(dag, latfile);
811                 ckd_free(latfile);
812             }
813         }
814         if (hyp) {
815             /* Emit a signal for applications. */
816             g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_RESULT],
817                           0, hyp, uttid);
818             /* Forward this result in a buffer. */
819             buffer = gst_buffer_new_and_alloc(strlen(hyp) + 2);
820             strcpy((char *)GST_BUFFER_DATA(buffer), hyp);
821             GST_BUFFER_DATA(buffer)[strlen(hyp)] = '\n';
822             GST_BUFFER_DATA(buffer)[strlen(hyp)+1] = '\0';
823             GST_BUFFER_TIMESTAMP(buffer) = GST_EVENT_TIMESTAMP(event);
824             gst_buffer_set_caps(buffer, GST_PAD_CAPS(ps->srcpad));
825             gst_pad_push(ps->srcpad, buffer);
826         }
827 
828         /* Forward this event. */
829         return gst_pad_event_default(pad, event);
830     }
831     default:
832         /* Don't bother with other events. */
833         return gst_pad_event_default(pad, event);
834     }
835 }
836 
837 static gboolean
plugin_init(GstPlugin * plugin)838 plugin_init(GstPlugin * plugin)
839 {
840     if (!gst_element_register(plugin, "pocketsphinx",
841                               GST_RANK_NONE, GST_TYPE_POCKETSPHINX))
842         return FALSE;
843     if (!gst_element_register(plugin, "vader",
844                               GST_RANK_NONE, GST_TYPE_VADER))
845         return FALSE;
846     return TRUE;
847 }
848 
849 #define VERSION PACKAGE_VERSION
850 #define PACKAGE PACKAGE_NAME
851 GST_PLUGIN_DEFINE(GST_VERSION_MAJOR,
852                   GST_VERSION_MINOR,
853                   "pocketsphinx",
854                   "PocketSphinx plugin",
855                   plugin_init, VERSION,
856 #if (GST_VERSION_MINOR == 10 && GST_VERSION_MICRO < 15) /* Nokia's bogus old GStreamer */
857                   "LGPL",
858 #else
859                   "BSD",
860 #endif
861                   "PocketSphinx", "http://cmusphinx.sourceforge.net/")
862