1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 2007 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
37 */
38
39 #ifdef HAVE_CONFIG_H
40 # include <config.h>
41 #endif
42
43 #include <string.h>
44 #include <gst/gst.h>
45
46 #include <sphinxbase/strfuncs.h>
47
48 #include "gstpocketsphinx.h"
49 #include "gstvader.h"
50 #include "psmarshal.h"
51
52 GST_DEBUG_CATEGORY_STATIC(pocketsphinx_debug);
53 #define GST_CAT_DEFAULT pocketsphinx_debug
54
55 /*
56 * Forward declarations.
57 */
58
59 static void gst_pocketsphinx_set_property(GObject * object, guint prop_id,
60 const GValue * value, GParamSpec * pspec);
61 static void gst_pocketsphinx_get_property(GObject * object, guint prop_id,
62 GValue * value, GParamSpec * pspec);
63 static GstFlowReturn gst_pocketsphinx_chain(GstPad * pad, GstBuffer * buffer);
64 static gboolean gst_pocketsphinx_event(GstPad *pad, GstEvent *event);
65
66 enum
67 {
68 SIGNAL_PARTIAL_RESULT,
69 SIGNAL_RESULT,
70 LAST_SIGNAL
71 };
72
73 enum
74 {
75 PROP_0,
76 PROP_HMM_DIR,
77 PROP_LM_FILE,
78 PROP_LMCTL_FILE,
79 PROP_LM_NAME,
80 PROP_DICT_FILE,
81 PROP_MLLR_FILE,
82 PROP_FSG_FILE,
83 PROP_FSG_MODEL,
84 PROP_FWDFLAT,
85 PROP_BESTPATH,
86 PROP_MAXHMMPF,
87 PROP_MAXWPF,
88 PROP_BEAM,
89 PROP_WBEAM,
90 PROP_PBEAM,
91 PROP_DSRATIO,
92 PROP_LATDIR,
93 PROP_LATTICE,
94 PROP_NBEST,
95 PROP_NBEST_SIZE,
96 PROP_DECODER,
97 PROP_CONFIGURED
98 };
99
100 /*
101 * Static data.
102 */
103
104 /* Default command line. (will go away soon and be constructed using properties) */
105 static char *default_argv[] = {
106 "gst-pocketsphinx",
107 "-samprate", "8000",
108 "-cmn", "prior",
109 "-fwdflat", "no",
110 "-bestpath", "no",
111 "-maxhmmpf", "2000",
112 "-maxwpf", "20"
113 };
114 static const int default_argc = sizeof(default_argv)/sizeof(default_argv[0]);
115
116 static GstStaticPadTemplate sink_factory =
117 GST_STATIC_PAD_TEMPLATE("sink",
118 GST_PAD_SINK,
119 GST_PAD_ALWAYS,
120 GST_STATIC_CAPS("audio/x-raw-int, "
121 "width = (int) 16, "
122 "depth = (int) 16, "
123 "signed = (boolean) true, "
124 "endianness = (int) BYTE_ORDER, "
125 "channels = (int) 1, "
126 "rate = (int) 8000")
127 );
128
129 static GstStaticPadTemplate src_factory =
130 GST_STATIC_PAD_TEMPLATE("src",
131 GST_PAD_SRC,
132 GST_PAD_ALWAYS,
133 GST_STATIC_CAPS("text/plain")
134 );
135 static guint gst_pocketsphinx_signals[LAST_SIGNAL];
136
137 /*
138 * Boxing of ps_lattice_t.
139 */
140
141 GType
ps_lattice_get_type(void)142 ps_lattice_get_type(void)
143 {
144 static GType ps_lattice_type = 0;
145
146 if (G_UNLIKELY(ps_lattice_type == 0)) {
147 ps_lattice_type = g_boxed_type_register_static
148 ("PSLattice",
149 /* Conveniently, these should just work. */
150 (GBoxedCopyFunc) ps_lattice_retain,
151 (GBoxedFreeFunc) ps_lattice_free);
152 }
153
154 return ps_lattice_type;
155 }
156
157 /*
158 * Boxing of ps_decoder_t.
159 */
160
161 GType
ps_decoder_get_type(void)162 ps_decoder_get_type(void)
163 {
164 static GType ps_decoder_type = 0;
165
166 if (G_UNLIKELY(ps_decoder_type == 0)) {
167 ps_decoder_type = g_boxed_type_register_static
168 ("PSDecoder",
169 /* Conveniently, these should just work. */
170 (GBoxedCopyFunc) ps_retain,
171 (GBoxedFreeFunc) ps_free);
172 }
173
174 return ps_decoder_type;
175 }
176
177
178 /*
179 * gst_pocketsphinx element.
180 */
181 GST_BOILERPLATE (GstPocketSphinx, gst_pocketsphinx, GstElement, GST_TYPE_ELEMENT);
182
183 static void
gst_pocketsphinx_base_init(gpointer gclass)184 gst_pocketsphinx_base_init(gpointer gclass)
185 {
186 static const GstElementDetails element_details = {
187 "PocketSphinx",
188 "Filter/Audio",
189 "Convert speech to text",
190 "David Huggins-Daines <dhuggins@cs.cmu.edu>"
191 };
192 GstElementClass *element_class = GST_ELEMENT_CLASS(gclass);
193
194 gst_element_class_add_pad_template(element_class,
195 gst_static_pad_template_get(&sink_factory));
196 gst_element_class_add_pad_template(element_class,
197 gst_static_pad_template_get(&src_factory));
198 gst_element_class_set_details(element_class, &element_details);
199 }
200
201 static void
string_disposal(gpointer key,gpointer value,gpointer user_data)202 string_disposal(gpointer key, gpointer value, gpointer user_data)
203 {
204 g_free(value);
205 }
206
207 static void
gst_pocketsphinx_finalize(GObject * gobject)208 gst_pocketsphinx_finalize(GObject * gobject)
209 {
210 GstPocketSphinx *ps = GST_POCKETSPHINX(gobject);
211
212 g_hash_table_foreach(ps->arghash, string_disposal, NULL);
213 g_hash_table_destroy(ps->arghash);
214 g_free(ps->last_result);
215 ps_free(ps->ps);
216 cmd_ln_free_r(ps->config);
217 GST_CALL_PARENT(G_OBJECT_CLASS, finalize,(gobject));
218 }
219
220 static void
gst_pocketsphinx_class_init(GstPocketSphinxClass * klass)221 gst_pocketsphinx_class_init(GstPocketSphinxClass * klass)
222 {
223 GObjectClass *gobject_class;
224
225 gobject_class =(GObjectClass *) klass;
226
227 gobject_class->set_property = gst_pocketsphinx_set_property;
228 gobject_class->get_property = gst_pocketsphinx_get_property;
229 gobject_class->finalize = GST_DEBUG_FUNCPTR(gst_pocketsphinx_finalize);
230
231 /* TODO: We will bridge cmd_ln.h properties to GObject
232 * properties here somehow eventually. */
233 g_object_class_install_property
234 (gobject_class, PROP_HMM_DIR,
235 g_param_spec_string("hmm", "HMM Directory",
236 "Directory containing acoustic model parameters",
237 NULL,
238 G_PARAM_READWRITE));
239 g_object_class_install_property
240 (gobject_class, PROP_LM_FILE,
241 g_param_spec_string("lm", "LM File",
242 "Language model file",
243 NULL,
244 G_PARAM_READWRITE));
245 g_object_class_install_property
246 (gobject_class, PROP_LMCTL_FILE,
247 g_param_spec_string("lmctl", "LM Control File",
248 "Language model control file (for class LMs)",
249 NULL,
250 G_PARAM_READWRITE));
251 g_object_class_install_property
252 (gobject_class, PROP_LM_NAME,
253 g_param_spec_string("lmname", "LM Name",
254 "Language model name (to select LMs from lmctl)",
255 NULL,
256 G_PARAM_READWRITE));
257 g_object_class_install_property
258 (gobject_class, PROP_FSG_FILE,
259 g_param_spec_string("fsg", "FSG File",
260 "Finite state grammar file",
261 NULL,
262 G_PARAM_READWRITE));
263 g_object_class_install_property
264 (gobject_class, PROP_FSG_MODEL,
265 g_param_spec_pointer("fsg_model", "FSG Model",
266 "Finite state grammar object (fsg_model_t *)",
267 G_PARAM_WRITABLE));
268 g_object_class_install_property
269 (gobject_class, PROP_DICT_FILE,
270 g_param_spec_string("dict", "Dictionary File",
271 "Dictionary File",
272 NULL,
273 G_PARAM_READWRITE));
274 g_object_class_install_property
275 (gobject_class, PROP_MLLR_FILE,
276 g_param_spec_string("mllr", "MLLR file",
277 "MLLR file",
278 NULL,
279 G_PARAM_READWRITE));
280 g_object_class_install_property
281 (gobject_class, PROP_FWDFLAT,
282 g_param_spec_boolean("fwdflat", "Flat Lexicon Search",
283 "Enable Flat Lexicon Search",
284 FALSE,
285 G_PARAM_READWRITE));
286 g_object_class_install_property
287 (gobject_class, PROP_BESTPATH,
288 g_param_spec_boolean("bestpath", "Graph Search",
289 "Enable Graph Search",
290 FALSE,
291 G_PARAM_READWRITE));
292
293 g_object_class_install_property
294 (gobject_class, PROP_LATDIR,
295 g_param_spec_string("latdir", "Lattice Directory",
296 "Output Directory for Lattices",
297 NULL,
298 G_PARAM_READWRITE));
299 g_object_class_install_property
300 (gobject_class, PROP_LATTICE,
301 g_param_spec_boxed("lattice", "Word Lattice",
302 "Word lattice object for most recent result",
303 PS_LATTICE_TYPE,
304 G_PARAM_READABLE));
305 g_object_class_install_property
306 (gobject_class, PROP_NBEST,
307 g_param_spec_value_array("nbest", "N-best results",
308 "N-best results",
309 g_param_spec_string("nbest-hyp", "N-best hyp",
310 "N-best hyp",
311 NULL,
312 G_PARAM_READABLE),
313 G_PARAM_READABLE));
314 g_object_class_install_property
315 (gobject_class, PROP_NBEST_SIZE,
316 g_param_spec_int("nbest_size", "Size of N-best list",
317 "Number of hypothesis in the N-best list",
318 1, 1000, 10,
319 G_PARAM_READWRITE));
320 g_object_class_install_property
321 (gobject_class, PROP_MAXHMMPF,
322 g_param_spec_int("maxhmmpf", "Maximum HMMs per frame",
323 "Maximum number of HMMs searched per frame",
324 1, 100000, 1000,
325 G_PARAM_READWRITE));
326 g_object_class_install_property
327 (gobject_class, PROP_MAXWPF,
328 g_param_spec_int("maxwpf", "Maximum words per frame",
329 "Maximum number of words searched per frame",
330 1, 100000, 10,
331 G_PARAM_READWRITE));
332 g_object_class_install_property
333 (gobject_class, PROP_BEAM,
334 g_param_spec_float("beam", "Beam width applied to every frame in Viterbi search",
335 "Beam width applied to every frame in Viterbi search",
336 -1, 1, 1e-48,
337 G_PARAM_READWRITE));
338 g_object_class_install_property
339 (gobject_class, PROP_PBEAM,
340 g_param_spec_float("pbeam", "Beam width applied to phone transitions",
341 "Beam width applied to phone transitions",
342 -1, 1, 1e-48,
343 G_PARAM_READWRITE));
344 g_object_class_install_property
345 (gobject_class, PROP_WBEAM,
346 g_param_spec_float("wbeam", "Beam width applied to word exits",
347 "Beam width applied to phone transitions",
348 -1, 1, 7e-29,
349 G_PARAM_READWRITE));
350 g_object_class_install_property
351 (gobject_class, PROP_DSRATIO,
352 g_param_spec_int("dsratio", "Frame downsampling ratio",
353 "Evaluate acoustic model every N frames",
354 1, 10, 1,
355 G_PARAM_READWRITE));
356 g_object_class_install_property
357 (gobject_class, PROP_DECODER,
358 g_param_spec_boxed("decoder", "Decoder object",
359 "The underlying decoder",
360 PS_DECODER_TYPE,
361 G_PARAM_READABLE));
362 g_object_class_install_property
363 (gobject_class, PROP_CONFIGURED,
364 g_param_spec_boolean("configured", "Finalize configuration",
365 "Set this to finalize configuration",
366 FALSE,
367 G_PARAM_READWRITE));
368
369 gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT] =
370 g_signal_new("partial_result",
371 G_TYPE_FROM_CLASS(klass),
372 G_SIGNAL_RUN_LAST,
373 G_STRUCT_OFFSET(GstPocketSphinxClass, partial_result),
374 NULL, NULL,
375 ps_marshal_VOID__STRING_STRING,
376 G_TYPE_NONE,
377 2, G_TYPE_STRING, G_TYPE_STRING
378 );
379
380 gst_pocketsphinx_signals[SIGNAL_RESULT] =
381 g_signal_new("result",
382 G_TYPE_FROM_CLASS(klass),
383 G_SIGNAL_RUN_LAST,
384 G_STRUCT_OFFSET(GstPocketSphinxClass, result),
385 NULL, NULL,
386 ps_marshal_VOID__STRING_STRING,
387 G_TYPE_NONE,
388 2, G_TYPE_STRING, G_TYPE_STRING
389 );
390
391 GST_DEBUG_CATEGORY_INIT(pocketsphinx_debug, "pocketsphinx", 0,
392 "Automatic Speech Recognition");
393 }
394
395 static void
gst_pocketsphinx_set_string(GstPocketSphinx * ps,const gchar * key,const GValue * value)396 gst_pocketsphinx_set_string(GstPocketSphinx *ps,
397 const gchar *key, const GValue *value)
398 {
399 gchar *oldstr, *newstr;
400
401 if (value != NULL)
402 newstr = g_strdup(g_value_get_string(value));
403 else
404 newstr = NULL;
405 if ((oldstr = g_hash_table_lookup(ps->arghash, key)))
406 g_free(oldstr);
407 cmd_ln_set_str_r(ps->config, key, newstr);
408 g_hash_table_foreach(ps->arghash, (gpointer)key, newstr);
409 }
410
411 static void
gst_pocketsphinx_set_int(GstPocketSphinx * ps,const gchar * key,const GValue * value)412 gst_pocketsphinx_set_int(GstPocketSphinx *ps,
413 const gchar *key, const GValue *value)
414 {
415 cmd_ln_set_int32_r(ps->config, key, g_value_get_int(value));
416 }
417
418 static void
gst_pocketsphinx_set_boolean(GstPocketSphinx * ps,const gchar * key,const GValue * value)419 gst_pocketsphinx_set_boolean(GstPocketSphinx *ps,
420 const gchar *key, const GValue *value)
421 {
422 cmd_ln_set_boolean_r(ps->config, key, g_value_get_boolean(value));
423 }
424
425 static void
gst_pocketsphinx_set_float(GstPocketSphinx * ps,const gchar * key,const GValue * value)426 gst_pocketsphinx_set_float(GstPocketSphinx *ps,
427 const gchar *key, const GValue *value)
428 {
429 cmd_ln_set_float_r(ps->config, key, g_value_get_float(value));
430 }
431
432 static void
gst_pocketsphinx_set_property(GObject * object,guint prop_id,const GValue * value,GParamSpec * pspec)433 gst_pocketsphinx_set_property(GObject * object, guint prop_id,
434 const GValue * value, GParamSpec * pspec)
435 {
436 GstPocketSphinx *ps = GST_POCKETSPHINX(object);
437
438 switch (prop_id) {
439 case PROP_CONFIGURED:
440 if (ps->ps)
441 ps_reinit(ps->ps, NULL);
442 else
443 ps->ps = ps_init(ps->config);
444 break;
445 case PROP_HMM_DIR:
446 gst_pocketsphinx_set_string(ps, "-hmm", value);
447 if (ps->ps) {
448 /* Reinitialize the decoder with the new acoustic model. */
449 ps_reinit(ps->ps, NULL);
450 }
451 break;
452 case PROP_LM_FILE:
453 /* FSG and LM are mutually exclusive. */
454 gst_pocketsphinx_set_string(ps, "-fsg", NULL);
455 gst_pocketsphinx_set_string(ps, "-lmctl", NULL);
456 gst_pocketsphinx_set_string(ps, "-lm", value);
457 if (ps->ps) {
458 ngram_model_t *lm, *lmset;
459
460 /* Switch to this new LM. */
461 lm = ngram_model_read(ps->config,
462 g_value_get_string(value),
463 NGRAM_AUTO,
464 ps_get_logmath(ps->ps));
465 lmset = ps_get_lmset(ps->ps);
466 ngram_model_set_add(lmset, lm, g_value_get_string(value),
467 1.0, TRUE);
468 ps_update_lmset(ps->ps, lmset);
469 }
470 break;
471 case PROP_LMCTL_FILE:
472 /* FSG and LM are mutually exclusive. */
473 gst_pocketsphinx_set_string(ps, "-fsg", NULL);
474 gst_pocketsphinx_set_string(ps, "-lmctl", value);
475 gst_pocketsphinx_set_string(ps, "-lm", NULL);
476 if (ps->ps) {
477 ngram_model_t *lmset;
478 lmset = ngram_model_set_read(ps->config,
479 g_value_get_string(value),
480 ps_get_logmath(ps->ps));
481 ps_update_lmset(ps->ps, lmset);
482 }
483 break;
484 case PROP_LM_NAME:
485 gst_pocketsphinx_set_string(ps, "-fsg", NULL);
486 gst_pocketsphinx_set_string(ps, "-lmname", value);
487 if (ps->ps) {
488 ngram_model_t *lm, *lmset;
489
490 lmset = ps_get_lmset(ps->ps);
491 lm = ngram_model_set_select(lmset, g_value_get_string(value));
492 ps_update_lmset(ps->ps, lmset);
493 }
494
495 case PROP_DICT_FILE:
496 gst_pocketsphinx_set_string(ps, "-dict", value);
497 if (ps->ps) {
498 /* Reinitialize the decoder with the new dictionary. */
499 ps_reinit(ps->ps, NULL);
500 }
501 break;
502 case PROP_MLLR_FILE:
503 gst_pocketsphinx_set_string(ps, "-mllr", value);
504 if (ps->ps) {
505 /* Reinitialize the decoder with the new MLLR transform. */
506 ps_reinit(ps->ps, NULL);
507 }
508 break;
509 case PROP_FSG_MODEL:
510 {
511 fsg_set_t *fsgs = ps_get_fsgset(ps->ps);
512
513 if (fsgs == NULL)
514 fsgs = ps_update_fsgset(ps->ps);
515
516 if (fsgs) {
517 fsg_model_t *fsg = g_value_get_pointer(value);
518
519 fsg_set_remove_byname(fsgs, fsg_model_name(fsg));
520 fsg_set_add(fsgs, fsg_model_name(fsg), fsg);
521 fsg_set_select(fsgs, fsg_model_name(fsg));
522 }
523 break;
524 }
525 case PROP_FSG_FILE:
526 /* FSG and LM are mutually exclusive */
527 gst_pocketsphinx_set_string(ps, "-lm", NULL);
528 gst_pocketsphinx_set_string(ps, "-fsg", value);
529
530 if (ps->ps) {
531 /* Switch to this new FSG. */
532 fsg_model_t *fsg;
533 fsg_set_t *fsgs = ps_get_fsgset(ps->ps);
534
535 if (fsgs == NULL)
536 fsgs = ps_update_fsgset(ps->ps);
537
538 fsg = fsg_model_readfile(g_value_get_string(value),
539 ps_get_logmath(ps->ps),
540 cmd_ln_float32_r(ps->config, "-lw"));
541
542 if (fsgs && fsg) {
543 fsg_set_add(fsgs, fsg_model_name(fsg), fsg);
544 fsg_set_select(fsgs, fsg_model_name(fsg));
545 }
546 }
547 break;
548 case PROP_FWDFLAT:
549 gst_pocketsphinx_set_boolean(ps, "-fwdflat", value);
550 break;
551 case PROP_BESTPATH:
552 gst_pocketsphinx_set_boolean(ps, "-bestpath", value);
553 break;
554 case PROP_LATDIR:
555 if (ps->latdir)
556 g_free(ps->latdir);
557 ps->latdir = g_strdup(g_value_get_string(value));
558 break;
559 case PROP_NBEST_SIZE:
560 ps->n_best_size = g_value_get_int(value);
561 break;
562 case PROP_MAXHMMPF:
563 gst_pocketsphinx_set_int(ps, "-maxhmmpf", value);
564 break;
565 case PROP_MAXWPF:
566 gst_pocketsphinx_set_int(ps, "-maxwpf", value);
567 break;
568 case PROP_BEAM:
569 gst_pocketsphinx_set_float(ps, "-beam", value);
570 break;
571 case PROP_PBEAM:
572 gst_pocketsphinx_set_float(ps, "-pbeam", value);
573 break;
574 case PROP_WBEAM:
575 gst_pocketsphinx_set_float(ps, "-wbeam", value);
576 break;
577 case PROP_DSRATIO:
578 gst_pocketsphinx_set_int(ps, "-ds", value);
579 break;
580 default:
581 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
582 return;
583 }
584 }
585
586 static void
gst_pocketsphinx_get_property(GObject * object,guint prop_id,GValue * value,GParamSpec * pspec)587 gst_pocketsphinx_get_property(GObject * object, guint prop_id,
588 GValue * value, GParamSpec * pspec)
589 {
590 GstPocketSphinx *ps = GST_POCKETSPHINX(object);
591
592 switch (prop_id) {
593 case PROP_DECODER:
594 g_value_set_boxed(value, ps->ps);
595 break;
596 case PROP_CONFIGURED:
597 g_value_set_boolean(value, ps->ps != NULL);
598 break;
599 case PROP_HMM_DIR:
600 g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm"));
601 break;
602 case PROP_LM_FILE:
603 g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm"));
604 break;
605 case PROP_LMCTL_FILE:
606 g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl"));
607 break;
608 case PROP_LM_NAME:
609 g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname"));
610 break;
611 case PROP_DICT_FILE:
612 g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict"));
613 break;
614 case PROP_MLLR_FILE:
615 g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr"));
616 break;
617 case PROP_FSG_FILE:
618 g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg"));
619 break;
620 case PROP_FWDFLAT:
621 g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat"));
622 break;
623 case PROP_BESTPATH:
624 g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath"));
625 break;
626 case PROP_LATDIR:
627 g_value_set_string(value, ps->latdir);
628 break;
629 case PROP_LATTICE: {
630 ps_lattice_t *dag;
631
632 if (ps->ps && (dag = ps_get_lattice(ps->ps)))
633 g_value_set_boxed(value, dag);
634 else
635 g_value_set_boxed(value, NULL);
636 break;
637 }
638 case PROP_MAXHMMPF:
639 g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf"));
640 break;
641 case PROP_MAXWPF:
642 g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf"));
643 break;
644 case PROP_BEAM:
645 g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam"));
646 break;
647 case PROP_PBEAM:
648 g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam"));
649 break;
650 case PROP_WBEAM:
651 g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam"));
652 break;
653 case PROP_DSRATIO:
654 g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds"));
655 break;
656 case PROP_NBEST_SIZE:
657 g_value_set_int(value, ps->n_best_size);
658 break;
659 case PROP_NBEST: {
660 int i = 0, out_score = 0;
661 GValueArray *arr;
662 if (!ps->ps) {
663 break;
664 }
665 arr = g_value_array_new(1);
666 ps_nbest_t *ps_nbest_list = ps_nbest(ps->ps, 0, -1, NULL, NULL);
667 if (ps_nbest_list) {
668 ps_nbest_list = ps_nbest_next(ps_nbest_list);
669 while ((i < ps->n_best_size) && (ps_nbest_list != NULL)) {
670 GValue value1 = { 0 };
671 g_value_init (&value1, G_TYPE_STRING);
672 const char* hyp = ps_nbest_hyp(ps_nbest_list, &out_score);
673 g_value_set_string(&value1, hyp);
674 g_value_array_append(arr, &value1);
675 ps_nbest_list = ps_nbest_next(ps_nbest_list);
676 i++;
677 }
678 if (ps_nbest_list) {
679 ps_nbest_free(ps_nbest_list);
680 }
681 }
682 g_value_set_boxed (value, arr);
683 break;
684 }
685 default:
686 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
687 break;
688 }
689 }
690
691 static void
gst_pocketsphinx_init(GstPocketSphinx * ps,GstPocketSphinxClass * gclass)692 gst_pocketsphinx_init(GstPocketSphinx * ps,
693 GstPocketSphinxClass * gclass)
694 {
695 ps->sinkpad =
696 gst_pad_new_from_static_template(&sink_factory, "sink");
697 ps->srcpad =
698 gst_pad_new_from_static_template(&src_factory, "src");
699
700 /* Create the hash table to store argument strings. */
701 ps->arghash = g_hash_table_new(g_str_hash, g_str_equal);
702
703 /* Parse default command-line options. */
704 ps->config = cmd_ln_parse_r(NULL, ps_args(), default_argc, default_argv, FALSE);
705
706 /* Set up pads. */
707 gst_element_add_pad(GST_ELEMENT(ps), ps->sinkpad);
708 gst_pad_set_chain_function(ps->sinkpad, gst_pocketsphinx_chain);
709 gst_pad_set_event_function(ps->sinkpad, gst_pocketsphinx_event);
710 gst_pad_use_fixed_caps(ps->sinkpad);
711
712 gst_element_add_pad(GST_ELEMENT(ps), ps->srcpad);
713 gst_pad_use_fixed_caps(ps->srcpad);
714
715 /* Initialize time. */
716 ps->last_result_time = 0;
717 ps->last_result = NULL;
718
719 /* Nbest size */
720 ps->n_best_size = 10;
721 }
722
723 static GstFlowReturn
gst_pocketsphinx_chain(GstPad * pad,GstBuffer * buffer)724 gst_pocketsphinx_chain(GstPad * pad, GstBuffer * buffer)
725 {
726 GstPocketSphinx *ps;
727
728 ps = GST_POCKETSPHINX(GST_OBJECT_PARENT(pad));
729
730 /* Start an utterance for the first buffer we get (i.e. we assume
731 * that the VADER is "leaky") */
732 if (!ps->listening) {
733 ps->listening = TRUE;
734 ps_start_utt(ps->ps, NULL);
735 }
736 ps_process_raw(ps->ps,
737 (short *)GST_BUFFER_DATA(buffer),
738 GST_BUFFER_SIZE(buffer) / sizeof(short),
739 FALSE, FALSE);
740
741 /* Get a partial result every now and then, see if it is different. */
742 if (ps->last_result_time == 0
743 /* Check every 100 milliseconds. */
744 || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) {
745 int32 score;
746 char const *hyp;
747 char const *uttid;
748
749 hyp = ps_get_hyp(ps->ps, &score, &uttid);
750 ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer);
751 if (hyp && strlen(hyp) > 0) {
752 if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) {
753 g_free(ps->last_result);
754 ps->last_result = g_strdup(hyp);
755 /* Emit a signal for applications. */
756 g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_PARTIAL_RESULT],
757 0, hyp, uttid);
758 }
759 }
760 }
761 gst_buffer_unref(buffer);
762 return GST_FLOW_OK;
763 }
764
765 static gboolean
gst_pocketsphinx_event(GstPad * pad,GstEvent * event)766 gst_pocketsphinx_event(GstPad *pad, GstEvent *event)
767 {
768 GstPocketSphinx *ps;
769
770 ps = GST_POCKETSPHINX(GST_OBJECT_PARENT(pad));
771
772 /* Pick out VAD events. */
773 switch (event->type) {
774 case GST_EVENT_NEWSEGMENT:
775 /* Initialize the decoder once the audio starts, if it's not
776 * there yet. */
777 if (ps->ps == NULL) {
778 ps->ps = ps_init(ps->config);
779 if (ps->ps == NULL) {
780 GST_ELEMENT_ERROR(GST_ELEMENT(ps), LIBRARY, INIT,
781 ("Failed to initialize PocketSphinx"),
782 ("Failed to initialize PocketSphinx"));
783 return FALSE;
784 }
785 }
786 return gst_pad_event_default(pad, event);
787 case GST_EVENT_VADER_START:
788 ps->listening = TRUE;
789 ps_start_utt(ps->ps, NULL);
790 /* Forward this event. */
791 return gst_pad_event_default(pad, event);
792 case GST_EVENT_EOS:
793 case GST_EVENT_VADER_STOP: {
794 GstBuffer *buffer;
795 int32 score;
796 char const *hyp;
797 char const *uttid;
798
799 hyp = NULL;
800 if (ps->listening) {
801 ps->listening = FALSE;
802 ps_end_utt(ps->ps);
803 hyp = ps_get_hyp(ps->ps, &score, &uttid);
804 /* Dump the lattice if requested. */
805 if (ps->latdir) {
806 char *latfile = string_join(ps->latdir, "/", uttid, ".lat", NULL);
807 ps_lattice_t *dag;
808
809 if ((dag = ps_get_lattice(ps->ps)))
810 ps_lattice_write(dag, latfile);
811 ckd_free(latfile);
812 }
813 }
814 if (hyp) {
815 /* Emit a signal for applications. */
816 g_signal_emit(ps, gst_pocketsphinx_signals[SIGNAL_RESULT],
817 0, hyp, uttid);
818 /* Forward this result in a buffer. */
819 buffer = gst_buffer_new_and_alloc(strlen(hyp) + 2);
820 strcpy((char *)GST_BUFFER_DATA(buffer), hyp);
821 GST_BUFFER_DATA(buffer)[strlen(hyp)] = '\n';
822 GST_BUFFER_DATA(buffer)[strlen(hyp)+1] = '\0';
823 GST_BUFFER_TIMESTAMP(buffer) = GST_EVENT_TIMESTAMP(event);
824 gst_buffer_set_caps(buffer, GST_PAD_CAPS(ps->srcpad));
825 gst_pad_push(ps->srcpad, buffer);
826 }
827
828 /* Forward this event. */
829 return gst_pad_event_default(pad, event);
830 }
831 default:
832 /* Don't bother with other events. */
833 return gst_pad_event_default(pad, event);
834 }
835 }
836
837 static gboolean
plugin_init(GstPlugin * plugin)838 plugin_init(GstPlugin * plugin)
839 {
840 if (!gst_element_register(plugin, "pocketsphinx",
841 GST_RANK_NONE, GST_TYPE_POCKETSPHINX))
842 return FALSE;
843 if (!gst_element_register(plugin, "vader",
844 GST_RANK_NONE, GST_TYPE_VADER))
845 return FALSE;
846 return TRUE;
847 }
848
849 #define VERSION PACKAGE_VERSION
850 #define PACKAGE PACKAGE_NAME
851 GST_PLUGIN_DEFINE(GST_VERSION_MAJOR,
852 GST_VERSION_MINOR,
853 "pocketsphinx",
854 "PocketSphinx plugin",
855 plugin_init, VERSION,
856 #if (GST_VERSION_MINOR == 10 && GST_VERSION_MICRO < 15) /* Nokia's bogus old GStreamer */
857 "LGPL",
858 #else
859 "BSD",
860 #endif
861 "PocketSphinx", "http://cmusphinx.sourceforge.net/")
862