1 /* -*- tab-width: 8; c-file-style: "linux" -*- */
2 /* Copyright (c) 2007 Carnegie Mellon University. All rights *
3  * reserved.
4  *
5  * You may copy, modify, and distribute this code under the same terms
6  * as Sphinx3 or Python, at your convenience, as long as this notice
7  * is not removed.
8  *
9  * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
10  *
11  * This work was partially funded by BAE Systems under contract from
12  * the Defense Advanced Research Projects Agency.
13  */
14 
15 #include "Python.h"
16 
17 #include "s3_decode.h"
18 #include "utt.h"
19 
20 /* This is a singleton. */
21 static s3_decode_t decoder;
22 /* And this. */
23 static fe_t *fe;
24 /* And these. */
25 static char **argv;
26 static int argc;
27 
28 /* Parse command line from file */
29 static PyObject *
sphinx3_parse_argfile(PyObject * self,PyObject * args)30 sphinx3_parse_argfile(PyObject *self, PyObject *args)
31 {
32 	const char *filename;
33 
34 	if (!PyArg_ParseTuple(args, "s", &filename))
35 		return NULL;
36 	if (cmd_ln_parse_file(S3_DECODE_ARG_DEFS, (char *)filename, FALSE) == -1) {
37 		/* Raise an IOError, the file did not exist (probably). */
38 		PyErr_SetString(PyExc_IOError, "Argument file could not be read");
39 		return NULL;
40 	}
41 	Py_INCREF(Py_None);
42 	return Py_None;
43 }
44 
45 /* Parse command line from Python dictionary */
46 static PyObject *
sphinx3_parse_argdict(PyObject * self,PyObject * args)47 sphinx3_parse_argdict(PyObject *self, PyObject *args)
48 {
49 	PyObject *seq;
50 	int i;
51 
52 	if (!PyArg_ParseTuple(args, "O", &seq))
53 		return NULL;
54 
55 	if ((seq = PyMapping_Items(seq)) == NULL) {
56 		return NULL;
57 	}
58 
59 	if (argv) {
60 		for (i = 0; i < argc; ++i) {
61 			free(argv[i]);
62 		}
63 		free(argv);
64 		argv = NULL;
65 	}
66 
67 	argc = PySequence_Size(seq);
68 	/* Allocate bogus initial and NULL final entries */
69 	if ((argv = calloc(argc * 2 + 2, sizeof(*argv))) == NULL)
70 		return PyErr_NoMemory();
71 	argv[0] = strdup("sphinx3_python");
72 
73 	for (i = 0; i < argc; ++i) {
74 		PyObject *pair, *str;
75 		const char *key, *val;
76 
77 		if ((pair = PySequence_GetItem(seq, i)) == NULL)
78 			return NULL;
79 		if ((str = PyTuple_GetItem(pair, 0)) == NULL)
80 			return NULL;
81 		if ((str = PyObject_Str(str)) == NULL)
82 			return NULL;
83 		if ((key = PyString_AsString(str)) == NULL)
84 			return NULL;
85 		Py_DECREF(str);
86 		if (key[0] != '-') {
87 			argv[i*2+1] = calloc(strlen(key) + 2, 1);
88 			argv[i*2+1][0] = '-';
89 			strcat(argv[i*2+1], key);
90 		}
91 		else
92 			argv[i*2+1] = strdup(key);
93 
94 		if ((str = PyTuple_GetItem(pair, 1)) == NULL)
95 			return NULL;
96 		if ((str = PyObject_Str(str)) == NULL)
97 			return NULL;
98 		Py_DECREF(str);
99 		if ((val = PyString_AsString(str)) == NULL)
100 			return NULL;
101 		argv[i*2+2] = strdup(val);
102 	}
103 
104 	argc = argc * 2 + 1;
105 	if (cmd_ln_parse(S3_DECODE_ARG_DEFS, argc, argv, FALSE) == -1) {
106 		/* This actually won't ever happen */
107 		PyErr_SetString(PyExc_ValueError, "Arguments are invalid");
108 		return NULL;
109 	}
110 
111 	Py_DECREF(seq);
112 	Py_INCREF(Py_None);
113 	return Py_None;
114 }
115 
116 /* Parse command line from Python array or sequence */
117 static PyObject *
sphinx3_parse_argv(PyObject * self,PyObject * args)118 sphinx3_parse_argv(PyObject *self, PyObject *args)
119 {
120 	PyObject *seq;
121 	int i;
122 
123 	if (!PyArg_ParseTuple(args, "O", &seq))
124 		return NULL;
125 
126 	if (!PySequence_Check(seq)) {
127 		PyErr_SetString(PyExc_TypeError, "Argument is not a sequence");
128 		return NULL;
129 	}
130 
131 	if (argv) {
132 		for (i = 0; i < argc; ++i) {
133 			free(argv[i]);
134 		}
135 		free(argv);
136 		argv = NULL;
137 	}
138 
139 	argc = PySequence_Size(seq);
140 	if ((argv = calloc(argc + 1, sizeof(*argv))) == NULL)
141 		return PyErr_NoMemory();
142 
143 	for (i = 0; i < argc; ++i) {
144 		PyObject *str;
145 		const char *arg;
146 
147 		if ((str = PySequence_GetItem(seq, i)) == NULL)
148 			return NULL;
149 		if ((str = PyObject_Str(str)) == NULL)
150 			return NULL;
151 		if ((arg = PyString_AsString(str)) == NULL)
152 			return NULL;
153 		argv[i] = strdup(arg);
154 
155 		Py_DECREF(str);
156 	}
157 
158 	if (cmd_ln_parse(S3_DECODE_ARG_DEFS, argc, argv, FALSE) == -1) {
159 		/* This actually won't ever happen */
160 		PyErr_SetString(PyExc_ValueError, "Arguments are invalid");
161 		return NULL;
162 	}
163 
164 	Py_INCREF(Py_None);
165 	return Py_None;
166 }
167 
168 /* Initialize live decoder */
169 static PyObject *
sphinx3_init(PyObject * self,PyObject * args)170 sphinx3_init(PyObject *self, PyObject *args)
171 {
172 	s3_decode_init(&decoder, cmd_ln_get());
173 	fe = fe_init_auto();
174 	Py_INCREF(Py_None);
175 	return Py_None;
176 }
177 
178 /* Wrap up the live decoder */
179 static PyObject *
sphinx3_close(PyObject * self,PyObject * args)180 sphinx3_close(PyObject *self, PyObject *args)
181 {
182 	s3_decode_close(&decoder);
183 	fe_free(fe);
184 	cmd_ln_free();
185 	Py_INCREF(Py_None);
186 	return Py_None;
187 }
188 
189 /* Get hypothesis string and segmentation. */
190 static PyObject *
sphinx3_get_hypothesis(PyObject * self,PyObject * args)191 sphinx3_get_hypothesis(PyObject *self, PyObject *args)
192 {
193 	PyObject *hypstr_obj, *hypseg_obj;
194 	hyp_t **hypsegs, **h;
195 	char *hypstr, *uttid;
196 	int nhyps, i, allphone;
197 
198 	s3_decode_hypothesis(&decoder, &uttid, &hypstr, &hypsegs);
199 	nhyps = 0;
200 	for (h = hypsegs; *h; ++h)
201 		++nhyps;
202 
203 	allphone = (cmd_ln_int32("-op_mode") == 1);
204 
205 	hypstr_obj = PyString_FromString(hypstr);
206 	hypseg_obj = PyTuple_New(nhyps);
207 	for (i = 0; i < nhyps; ++i) {
208 		PyObject *seg_obj;
209 		const char *wordstr;
210 
211 		/* hyp_t is BOGUS, it should have a string, then we
212 		 * wouldn't have to screw around like this for
213 		 * allphones. */
214 		if (allphone) {
215 			wordstr = mdef_ciphone_str(kbcore_mdef(decoder.kbcore),
216 						   hypsegs[i]->id);
217 		}
218 		else {
219 			wordstr = dict_wordstr(kbcore_dict(decoder.kbcore),
220 					       hypsegs[i]->id);
221 		}
222 		seg_obj = Py_BuildValue("(siiii)",
223 					wordstr,
224 					hypsegs[i]->sf,
225 					hypsegs[i]->ef,
226 					hypsegs[i]->ascr,
227 					hypsegs[i]->lscr);
228 		PyTuple_SET_ITEM(hypseg_obj, i, seg_obj);
229 	}
230 
231 	return Py_BuildValue("(OO)", hypstr_obj, hypseg_obj);
232 }
233 
234 static PyObject *
sphinx3_begin_utt(PyObject * self,PyObject * args)235 sphinx3_begin_utt(PyObject *self, PyObject *args)
236 {
237 	char *uttid = NULL;
238 
239 	if (!PyArg_ParseTuple(args, "|s", &uttid))
240 		return NULL;
241 
242 	if (s3_decode_begin_utt(&decoder, uttid) < 0) {
243 		PyErr_SetString(PyExc_RuntimeError, "s3_decode_begin_utt() failed");
244 		return NULL;
245 	}
246 
247 	Py_INCREF(Py_None);
248 	return Py_None;
249 }
250 
251 static PyObject *
sphinx3_end_utt(PyObject * self,PyObject * args)252 sphinx3_end_utt(PyObject *self, PyObject *args)
253 {
254 	s3_decode_end_utt(&decoder);
255 	Py_INCREF(Py_None);
256 	return Py_None;
257 }
258 
259 static PyObject *
sphinx3_process_raw(PyObject * self,PyObject * args)260 sphinx3_process_raw(PyObject *self, PyObject *args)
261 {
262 	PyObject *str;
263 	int16 *data;
264 	int32 nsamps;
265 	mfcc_t **cep_block;
266 	int32 nframes;
267 
268 	if (!PyArg_ParseTuple(args, "O", &str))
269 		return NULL;
270 	if ((data = (int16 *)PyString_AsString(str)) == NULL)
271 		return NULL;
272 	nsamps = PyString_Size(str)/2;
273 
274 	if (fe_process_utt(fe, data, nsamps, &cep_block, &nframes) == -1) {
275 		PyErr_SetString(PyExc_ValueError, "Problem in fe_process_utt()");
276 		return NULL;
277 	}
278 	s3_decode_process(&decoder, cep_block, nframes);
279 	ckd_free_2d((void **)cep_block);
280 
281 	return Py_BuildValue("i", nframes);
282 }
283 
284 static PyObject *
sphinx3_read_lm(PyObject * self,PyObject * args)285 sphinx3_read_lm(PyObject *self, PyObject *args)
286 {
287 	const char *lmfile, *lmname;
288 
289 	if (!PyArg_ParseTuple(args, "ss", &lmfile, &lmname))
290 		return NULL;
291 
292 	s3_decode_read_lm(&decoder, lmfile, lmname);
293 	Py_INCREF(Py_None);
294 	return Py_None;
295 }
296 
297 static PyObject *
sphinx3_set_lm(PyObject * self,PyObject * args)298 sphinx3_set_lm(PyObject *self, PyObject *args)
299 {
300 	const char *lmname;
301 
302 	if (!PyArg_ParseTuple(args, "s", &lmname))
303 		return NULL;
304 
305 	s3_decode_set_lm(&decoder, lmname);
306 	Py_INCREF(Py_None);
307 	return Py_None;
308 }
309 
310 static PyObject *
sphinx3_delete_lm(PyObject * self,PyObject * args)311 sphinx3_delete_lm(PyObject *self, PyObject *args)
312 {
313 	const char *lmname;
314 
315 	if (!PyArg_ParseTuple(args, "s", &lmname))
316 		return NULL;
317 
318 	s3_decode_set_lm(&decoder, lmname);
319 	Py_INCREF(Py_None);
320 	return Py_None;
321 }
322 
323 /* Decode raw waveform data */
324 static PyObject *
sphinx3_decode_raw(PyObject * self,PyObject * args)325 sphinx3_decode_raw(PyObject *self, PyObject *args)
326 {
327 	PyObject *str;
328 	int16 *data;
329 	int32 nsamps;
330 	mfcc_t **cep_block, ***feat_block;
331 	int32 nframes;
332 	char *uttid = NULL;
333 
334 	if (!PyArg_ParseTuple(args, "O|s", &str, &uttid))
335 		return NULL;
336 	if ((data = (int16 *)PyString_AsString(str)) == NULL)
337 		return NULL;
338 	nsamps = PyString_Size(str)/2;
339 
340 	if (fe_process_utt(fe, data, nsamps, &cep_block, &nframes) == -1) {
341 		PyErr_SetString(PyExc_ValueError, "Problem in fe_process_utt()");
342 		return NULL;
343 	}
344 	feat_block = feat_array_alloc(kbcore_fcb(decoder.kbcore),nframes);
345 
346 	s3_decode_begin_utt(&decoder, uttid);
347 	/* In theory we should check the return from this, but it will
348 	 * always process the whole thing if both beginutt and endutt
349 	 * are TRUE. */
350 	decoder.num_frames_entered
351 		= feat_s2mfc2feat_live(kbcore_fcb(decoder.kbcore),
352 				       cep_block, &nframes, TRUE, TRUE,
353 				       feat_block);
354 	ckd_free_2d((void **)cep_block);
355 	if (nframes == 0) {
356 		PyErr_SetString(PyExc_ValueError, "Utterance too short");
357 		ckd_free_2d((void **)feat_block);
358 		return NULL;
359 	}
360 
361 	/* Unfortunately we have to bypass s3_decode.c a bit here. */
362 	utt_decode_block(feat_block, nframes, &decoder.num_frames_decoded, &decoder.kb);
363 	feat_array_free(feat_block);
364 	s3_decode_end_utt(&decoder);
365 
366 	/* Now get the results and return them. */
367 	return sphinx3_get_hypothesis(self, args);
368 }
369 
370 /* Decode a feature file */
371 static PyObject *
sphinx3_decode_cep_file(PyObject * self,PyObject * args)372 sphinx3_decode_cep_file(PyObject *self, PyObject *args)
373 {
374 	const char *filename;
375 	char *uttid = NULL;
376 	int sf = 0;
377 	int ef = -1;
378 
379 	if (!PyArg_ParseTuple(args, "s|iis", &filename, &sf, &ef, &uttid))
380 		return NULL;
381 
382 	/* Unfortunately we have to bypass s3_decode.c a bit here. */
383 	s3_decode_begin_utt(&decoder, uttid);
384 	decoder.num_frames_entered
385 		= feat_s2mfc2feat(kbcore_fcb(decoder.kbcore),
386 				  filename,
387 				  NULL, "",
388 				  sf, ef,
389 				  decoder.kb.feat, S3_MAX_FRAMES);
390 	if (decoder.num_frames_entered < 0) {
391 		PyErr_SetString(PyExc_IOError, "Could not read feature file");
392 		return NULL;
393 	}
394 	else if (decoder.num_frames_entered == 0) {
395 		PyErr_SetString(PyExc_ValueError, "Utterance too short");
396 		return NULL;
397 	}
398 	utt_decode_block(decoder.kb.feat, decoder.num_frames_entered,
399 			 &decoder.num_frames_decoded, &decoder.kb);
400 	s3_decode_end_utt(&decoder);
401 
402 	/* Now get the results and return them. */
403 	return sphinx3_get_hypothesis(self, args);
404 }
405 
406 static PyMethodDef sphinx3methods[] = {
407 	{ "init", sphinx3_init, METH_VARARGS,
408 	  "Initialize the Sphinx3 decoder.\n"
409 	  "You must first call one of parse_argfile, parse_argdict, parse_argv." },
410 	{ "close", sphinx3_close, METH_VARARGS,
411 	  "Shut down the Sphinx3 decoder." },
412 	{ "parse_argfile", sphinx3_parse_argfile, METH_VARARGS,
413 	  "Load Sphinx3 parameters from a file." },
414 	{ "parse_argv", sphinx3_parse_argv, METH_VARARGS,
415 	  "Set Sphinx3 parameters from an argv array (e.g. sys.argv)\n"
416 	  "Note that the first element of this array is IGNORED." },
417 	{ "parse_argdict", sphinx3_parse_argdict, METH_VARARGS,
418 	  "Load Sphinx3 parameters from a dictionary.\n"
419 	  "Keys can optionally begin with a - as in the command line."	},
420 	{ "begin_utt", sphinx3_begin_utt, METH_VARARGS,
421 	  "Mark the start of the current utterance.\n" },
422 	{ "end_utt", sphinx3_end_utt, METH_VARARGS,
423 	  "Mark the end of the current utterance, doing final search if necessary.\n" },
424 	{ "process_raw", sphinx3_process_raw, METH_VARARGS,
425 	  "Process a block of raw audio.\n" },
426 	/* Processing cepstra might wait for a bit until I decide if
427 	 * it's worth pulling in NumPy */
428 	{ "get_hypothesis", sphinx3_get_hypothesis, METH_VARARGS,
429 	  "Get current hypothesis string and segmentation.\n" },
430 	{ "set_lm", sphinx3_set_lm, METH_VARARGS,
431 	  "Set the current language model to the one named (must be previously loaded).\n" },
432 	{ "read_lm", sphinx3_read_lm, METH_VARARGS,
433 	  "Load a language model from a file and associate it with a name.\n" },
434 	{ "delete_lm", sphinx3_delete_lm, METH_VARARGS,
435 	  "Unload and free resources used by the named language model.\n" },
436 	{ "decode_raw", sphinx3_decode_raw, METH_VARARGS,
437 	  "Decode an entire utterance of raw waveform data from a Python string.\n" },
438 	{ "decode_cep_file", sphinx3_decode_cep_file, METH_VARARGS,
439 	  "Decode a Sphinx-format feature (MFCC) file.\n" },
440 	{ NULL, NULL, 0, NULL }
441 };
442 
443 PyMODINIT_FUNC
init_sphinx3(void)444 init_sphinx3(void)
445 {
446 	(void) Py_InitModule("_sphinx3", sphinx3methods);
447 }
448