1 /* -*- tab-width: 8; c-file-style: "linux" -*- */
2 /* Copyright (c) 2007 Carnegie Mellon University. All rights *
3 * reserved.
4 *
5 * You may copy, modify, and distribute this code under the same terms
6 * as Sphinx3 or Python, at your convenience, as long as this notice
7 * is not removed.
8 *
9 * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
10 *
11 * This work was partially funded by BAE Systems under contract from
12 * the Defense Advanced Research Projects Agency.
13 */
14
15 #include "Python.h"
16
17 #include "s3_decode.h"
18 #include "utt.h"
19
20 /* This is a singleton. */
21 static s3_decode_t decoder;
22 /* And this. */
23 static fe_t *fe;
24 /* And these. */
25 static char **argv;
26 static int argc;
27
28 /* Parse command line from file */
29 static PyObject *
sphinx3_parse_argfile(PyObject * self,PyObject * args)30 sphinx3_parse_argfile(PyObject *self, PyObject *args)
31 {
32 const char *filename;
33
34 if (!PyArg_ParseTuple(args, "s", &filename))
35 return NULL;
36 if (cmd_ln_parse_file(S3_DECODE_ARG_DEFS, (char *)filename, FALSE) == -1) {
37 /* Raise an IOError, the file did not exist (probably). */
38 PyErr_SetString(PyExc_IOError, "Argument file could not be read");
39 return NULL;
40 }
41 Py_INCREF(Py_None);
42 return Py_None;
43 }
44
45 /* Parse command line from Python dictionary */
46 static PyObject *
sphinx3_parse_argdict(PyObject * self,PyObject * args)47 sphinx3_parse_argdict(PyObject *self, PyObject *args)
48 {
49 PyObject *seq;
50 int i;
51
52 if (!PyArg_ParseTuple(args, "O", &seq))
53 return NULL;
54
55 if ((seq = PyMapping_Items(seq)) == NULL) {
56 return NULL;
57 }
58
59 if (argv) {
60 for (i = 0; i < argc; ++i) {
61 free(argv[i]);
62 }
63 free(argv);
64 argv = NULL;
65 }
66
67 argc = PySequence_Size(seq);
68 /* Allocate bogus initial and NULL final entries */
69 if ((argv = calloc(argc * 2 + 2, sizeof(*argv))) == NULL)
70 return PyErr_NoMemory();
71 argv[0] = strdup("sphinx3_python");
72
73 for (i = 0; i < argc; ++i) {
74 PyObject *pair, *str;
75 const char *key, *val;
76
77 if ((pair = PySequence_GetItem(seq, i)) == NULL)
78 return NULL;
79 if ((str = PyTuple_GetItem(pair, 0)) == NULL)
80 return NULL;
81 if ((str = PyObject_Str(str)) == NULL)
82 return NULL;
83 if ((key = PyString_AsString(str)) == NULL)
84 return NULL;
85 Py_DECREF(str);
86 if (key[0] != '-') {
87 argv[i*2+1] = calloc(strlen(key) + 2, 1);
88 argv[i*2+1][0] = '-';
89 strcat(argv[i*2+1], key);
90 }
91 else
92 argv[i*2+1] = strdup(key);
93
94 if ((str = PyTuple_GetItem(pair, 1)) == NULL)
95 return NULL;
96 if ((str = PyObject_Str(str)) == NULL)
97 return NULL;
98 Py_DECREF(str);
99 if ((val = PyString_AsString(str)) == NULL)
100 return NULL;
101 argv[i*2+2] = strdup(val);
102 }
103
104 argc = argc * 2 + 1;
105 if (cmd_ln_parse(S3_DECODE_ARG_DEFS, argc, argv, FALSE) == -1) {
106 /* This actually won't ever happen */
107 PyErr_SetString(PyExc_ValueError, "Arguments are invalid");
108 return NULL;
109 }
110
111 Py_DECREF(seq);
112 Py_INCREF(Py_None);
113 return Py_None;
114 }
115
116 /* Parse command line from Python array or sequence */
117 static PyObject *
sphinx3_parse_argv(PyObject * self,PyObject * args)118 sphinx3_parse_argv(PyObject *self, PyObject *args)
119 {
120 PyObject *seq;
121 int i;
122
123 if (!PyArg_ParseTuple(args, "O", &seq))
124 return NULL;
125
126 if (!PySequence_Check(seq)) {
127 PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
128 return NULL;
129 }
130
131 if (argv) {
132 for (i = 0; i < argc; ++i) {
133 free(argv[i]);
134 }
135 free(argv);
136 argv = NULL;
137 }
138
139 argc = PySequence_Size(seq);
140 if ((argv = calloc(argc + 1, sizeof(*argv))) == NULL)
141 return PyErr_NoMemory();
142
143 for (i = 0; i < argc; ++i) {
144 PyObject *str;
145 const char *arg;
146
147 if ((str = PySequence_GetItem(seq, i)) == NULL)
148 return NULL;
149 if ((str = PyObject_Str(str)) == NULL)
150 return NULL;
151 if ((arg = PyString_AsString(str)) == NULL)
152 return NULL;
153 argv[i] = strdup(arg);
154
155 Py_DECREF(str);
156 }
157
158 if (cmd_ln_parse(S3_DECODE_ARG_DEFS, argc, argv, FALSE) == -1) {
159 /* This actually won't ever happen */
160 PyErr_SetString(PyExc_ValueError, "Arguments are invalid");
161 return NULL;
162 }
163
164 Py_INCREF(Py_None);
165 return Py_None;
166 }
167
168 /* Initialize live decoder */
169 static PyObject *
sphinx3_init(PyObject * self,PyObject * args)170 sphinx3_init(PyObject *self, PyObject *args)
171 {
172 s3_decode_init(&decoder, cmd_ln_get());
173 fe = fe_init_auto();
174 Py_INCREF(Py_None);
175 return Py_None;
176 }
177
178 /* Wrap up the live decoder */
179 static PyObject *
sphinx3_close(PyObject * self,PyObject * args)180 sphinx3_close(PyObject *self, PyObject *args)
181 {
182 s3_decode_close(&decoder);
183 fe_free(fe);
184 cmd_ln_free();
185 Py_INCREF(Py_None);
186 return Py_None;
187 }
188
189 /* Get hypothesis string and segmentation. */
190 static PyObject *
sphinx3_get_hypothesis(PyObject * self,PyObject * args)191 sphinx3_get_hypothesis(PyObject *self, PyObject *args)
192 {
193 PyObject *hypstr_obj, *hypseg_obj;
194 hyp_t **hypsegs, **h;
195 char *hypstr, *uttid;
196 int nhyps, i, allphone;
197
198 s3_decode_hypothesis(&decoder, &uttid, &hypstr, &hypsegs);
199 nhyps = 0;
200 for (h = hypsegs; *h; ++h)
201 ++nhyps;
202
203 allphone = (cmd_ln_int32("-op_mode") == 1);
204
205 hypstr_obj = PyString_FromString(hypstr);
206 hypseg_obj = PyTuple_New(nhyps);
207 for (i = 0; i < nhyps; ++i) {
208 PyObject *seg_obj;
209 const char *wordstr;
210
211 /* hyp_t is BOGUS, it should have a string, then we
212 * wouldn't have to screw around like this for
213 * allphones. */
214 if (allphone) {
215 wordstr = mdef_ciphone_str(kbcore_mdef(decoder.kbcore),
216 hypsegs[i]->id);
217 }
218 else {
219 wordstr = dict_wordstr(kbcore_dict(decoder.kbcore),
220 hypsegs[i]->id);
221 }
222 seg_obj = Py_BuildValue("(siiii)",
223 wordstr,
224 hypsegs[i]->sf,
225 hypsegs[i]->ef,
226 hypsegs[i]->ascr,
227 hypsegs[i]->lscr);
228 PyTuple_SET_ITEM(hypseg_obj, i, seg_obj);
229 }
230
231 return Py_BuildValue("(OO)", hypstr_obj, hypseg_obj);
232 }
233
234 static PyObject *
sphinx3_begin_utt(PyObject * self,PyObject * args)235 sphinx3_begin_utt(PyObject *self, PyObject *args)
236 {
237 char *uttid = NULL;
238
239 if (!PyArg_ParseTuple(args, "|s", &uttid))
240 return NULL;
241
242 if (s3_decode_begin_utt(&decoder, uttid) < 0) {
243 PyErr_SetString(PyExc_RuntimeError, "s3_decode_begin_utt() failed");
244 return NULL;
245 }
246
247 Py_INCREF(Py_None);
248 return Py_None;
249 }
250
251 static PyObject *
sphinx3_end_utt(PyObject * self,PyObject * args)252 sphinx3_end_utt(PyObject *self, PyObject *args)
253 {
254 s3_decode_end_utt(&decoder);
255 Py_INCREF(Py_None);
256 return Py_None;
257 }
258
259 static PyObject *
sphinx3_process_raw(PyObject * self,PyObject * args)260 sphinx3_process_raw(PyObject *self, PyObject *args)
261 {
262 PyObject *str;
263 int16 *data;
264 int32 nsamps;
265 mfcc_t **cep_block;
266 int32 nframes;
267
268 if (!PyArg_ParseTuple(args, "O", &str))
269 return NULL;
270 if ((data = (int16 *)PyString_AsString(str)) == NULL)
271 return NULL;
272 nsamps = PyString_Size(str)/2;
273
274 if (fe_process_utt(fe, data, nsamps, &cep_block, &nframes) == -1) {
275 PyErr_SetString(PyExc_ValueError, "Problem in fe_process_utt()");
276 return NULL;
277 }
278 s3_decode_process(&decoder, cep_block, nframes);
279 ckd_free_2d((void **)cep_block);
280
281 return Py_BuildValue("i", nframes);
282 }
283
284 static PyObject *
sphinx3_read_lm(PyObject * self,PyObject * args)285 sphinx3_read_lm(PyObject *self, PyObject *args)
286 {
287 const char *lmfile, *lmname;
288
289 if (!PyArg_ParseTuple(args, "ss", &lmfile, &lmname))
290 return NULL;
291
292 s3_decode_read_lm(&decoder, lmfile, lmname);
293 Py_INCREF(Py_None);
294 return Py_None;
295 }
296
297 static PyObject *
sphinx3_set_lm(PyObject * self,PyObject * args)298 sphinx3_set_lm(PyObject *self, PyObject *args)
299 {
300 const char *lmname;
301
302 if (!PyArg_ParseTuple(args, "s", &lmname))
303 return NULL;
304
305 s3_decode_set_lm(&decoder, lmname);
306 Py_INCREF(Py_None);
307 return Py_None;
308 }
309
310 static PyObject *
sphinx3_delete_lm(PyObject * self,PyObject * args)311 sphinx3_delete_lm(PyObject *self, PyObject *args)
312 {
313 const char *lmname;
314
315 if (!PyArg_ParseTuple(args, "s", &lmname))
316 return NULL;
317
318 s3_decode_set_lm(&decoder, lmname);
319 Py_INCREF(Py_None);
320 return Py_None;
321 }
322
323 /* Decode raw waveform data */
324 static PyObject *
sphinx3_decode_raw(PyObject * self,PyObject * args)325 sphinx3_decode_raw(PyObject *self, PyObject *args)
326 {
327 PyObject *str;
328 int16 *data;
329 int32 nsamps;
330 mfcc_t **cep_block, ***feat_block;
331 int32 nframes;
332 char *uttid = NULL;
333
334 if (!PyArg_ParseTuple(args, "O|s", &str, &uttid))
335 return NULL;
336 if ((data = (int16 *)PyString_AsString(str)) == NULL)
337 return NULL;
338 nsamps = PyString_Size(str)/2;
339
340 if (fe_process_utt(fe, data, nsamps, &cep_block, &nframes) == -1) {
341 PyErr_SetString(PyExc_ValueError, "Problem in fe_process_utt()");
342 return NULL;
343 }
344 feat_block = feat_array_alloc(kbcore_fcb(decoder.kbcore),nframes);
345
346 s3_decode_begin_utt(&decoder, uttid);
347 /* In theory we should check the return from this, but it will
348 * always process the whole thing if both beginutt and endutt
349 * are TRUE. */
350 decoder.num_frames_entered
351 = feat_s2mfc2feat_live(kbcore_fcb(decoder.kbcore),
352 cep_block, &nframes, TRUE, TRUE,
353 feat_block);
354 ckd_free_2d((void **)cep_block);
355 if (nframes == 0) {
356 PyErr_SetString(PyExc_ValueError, "Utterance too short");
357 ckd_free_2d((void **)feat_block);
358 return NULL;
359 }
360
361 /* Unfortunately we have to bypass s3_decode.c a bit here. */
362 utt_decode_block(feat_block, nframes, &decoder.num_frames_decoded, &decoder.kb);
363 feat_array_free(feat_block);
364 s3_decode_end_utt(&decoder);
365
366 /* Now get the results and return them. */
367 return sphinx3_get_hypothesis(self, args);
368 }
369
370 /* Decode a feature file */
371 static PyObject *
sphinx3_decode_cep_file(PyObject * self,PyObject * args)372 sphinx3_decode_cep_file(PyObject *self, PyObject *args)
373 {
374 const char *filename;
375 char *uttid = NULL;
376 int sf = 0;
377 int ef = -1;
378
379 if (!PyArg_ParseTuple(args, "s|iis", &filename, &sf, &ef, &uttid))
380 return NULL;
381
382 /* Unfortunately we have to bypass s3_decode.c a bit here. */
383 s3_decode_begin_utt(&decoder, uttid);
384 decoder.num_frames_entered
385 = feat_s2mfc2feat(kbcore_fcb(decoder.kbcore),
386 filename,
387 NULL, "",
388 sf, ef,
389 decoder.kb.feat, S3_MAX_FRAMES);
390 if (decoder.num_frames_entered < 0) {
391 PyErr_SetString(PyExc_IOError, "Could not read feature file");
392 return NULL;
393 }
394 else if (decoder.num_frames_entered == 0) {
395 PyErr_SetString(PyExc_ValueError, "Utterance too short");
396 return NULL;
397 }
398 utt_decode_block(decoder.kb.feat, decoder.num_frames_entered,
399 &decoder.num_frames_decoded, &decoder.kb);
400 s3_decode_end_utt(&decoder);
401
402 /* Now get the results and return them. */
403 return sphinx3_get_hypothesis(self, args);
404 }
405
406 static PyMethodDef sphinx3methods[] = {
407 { "init", sphinx3_init, METH_VARARGS,
408 "Initialize the Sphinx3 decoder.\n"
409 "You must first call one of parse_argfile, parse_argdict, parse_argv." },
410 { "close", sphinx3_close, METH_VARARGS,
411 "Shut down the Sphinx3 decoder." },
412 { "parse_argfile", sphinx3_parse_argfile, METH_VARARGS,
413 "Load Sphinx3 parameters from a file." },
414 { "parse_argv", sphinx3_parse_argv, METH_VARARGS,
415 "Set Sphinx3 parameters from an argv array (e.g. sys.argv)\n"
416 "Note that the first element of this array is IGNORED." },
417 { "parse_argdict", sphinx3_parse_argdict, METH_VARARGS,
418 "Load Sphinx3 parameters from a dictionary.\n"
419 "Keys can optionally begin with a - as in the command line." },
420 { "begin_utt", sphinx3_begin_utt, METH_VARARGS,
421 "Mark the start of the current utterance.\n" },
422 { "end_utt", sphinx3_end_utt, METH_VARARGS,
423 "Mark the end of the current utterance, doing final search if necessary.\n" },
424 { "process_raw", sphinx3_process_raw, METH_VARARGS,
425 "Process a block of raw audio.\n" },
426 /* Processing cepstra might wait for a bit until I decide if
427 * it's worth pulling in NumPy */
428 { "get_hypothesis", sphinx3_get_hypothesis, METH_VARARGS,
429 "Get current hypothesis string and segmentation.\n" },
430 { "set_lm", sphinx3_set_lm, METH_VARARGS,
431 "Set the current language model to the one named (must be previously loaded).\n" },
432 { "read_lm", sphinx3_read_lm, METH_VARARGS,
433 "Load a language model from a file and associate it with a name.\n" },
434 { "delete_lm", sphinx3_delete_lm, METH_VARARGS,
435 "Unload and free resources used by the named language model.\n" },
436 { "decode_raw", sphinx3_decode_raw, METH_VARARGS,
437 "Decode an entire utterance of raw waveform data from a Python string.\n" },
438 { "decode_cep_file", sphinx3_decode_cep_file, METH_VARARGS,
439 "Decode a Sphinx-format feature (MFCC) file.\n" },
440 { NULL, NULL, 0, NULL }
441 };
442
443 PyMODINIT_FUNC
init_sphinx3(void)444 init_sphinx3(void)
445 {
446 (void) Py_InitModule("_sphinx3", sphinx3methods);
447 }
448