1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3 * Copyright (c) 2010 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38 /**
39 * @file ps_alignment.c Multi-level alignment structure
40 */
41
42 /* System headers. */
43
44 /* SphinxBase headers. */
45 #include <sphinxbase/ckd_alloc.h>
46
47 /* Local headers. */
48 #include "ps_alignment.h"
49
50 ps_alignment_t *
ps_alignment_init(dict2pid_t * d2p)51 ps_alignment_init(dict2pid_t *d2p)
52 {
53 ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
54 al->d2p = dict2pid_retain(d2p);
55 return al;
56 }
57
58 int
ps_alignment_free(ps_alignment_t * al)59 ps_alignment_free(ps_alignment_t *al)
60 {
61 if (al == NULL)
62 return 0;
63 dict2pid_free(al->d2p);
64 ckd_free(al->word.seq);
65 ckd_free(al->sseq.seq);
66 ckd_free(al->state.seq);
67 ckd_free(al);
68 return 0;
69 }
70
71 #define VECTOR_GROW 10
72 static void *
vector_grow_one(void * ptr,uint16 * n_alloc,uint16 * n,size_t item_size)73 vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
74 {
75 int newsize = *n + 1;
76 if (newsize < *n_alloc) {
77 *n += 1;
78 return ptr;
79 }
80 newsize += VECTOR_GROW;
81 if (newsize > 0xffff)
82 return NULL;
83 ptr = ckd_realloc(ptr, newsize * item_size);
84 *n += 1;
85 *n_alloc = newsize;
86 return ptr;
87 }
88
89 static ps_alignment_entry_t *
ps_alignment_vector_grow_one(ps_alignment_vector_t * vec)90 ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
91 {
92 void *ptr;
93 ptr = vector_grow_one(vec->seq, &vec->n_alloc,
94 &vec->n_ent, sizeof(*vec->seq));
95 if (ptr == NULL)
96 return NULL;
97 vec->seq = ptr;
98 return vec->seq + vec->n_ent - 1;
99 }
100
101 static void
ps_alignment_vector_empty(ps_alignment_vector_t * vec)102 ps_alignment_vector_empty(ps_alignment_vector_t *vec)
103 {
104 vec->n_ent = 0;
105 }
106
107 int
ps_alignment_add_word(ps_alignment_t * al,int32 wid,int duration)108 ps_alignment_add_word(ps_alignment_t *al,
109 int32 wid, int duration)
110 {
111 ps_alignment_entry_t *ent;
112
113 if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
114 return 0;
115 ent->id.wid = wid;
116 if (al->word.n_ent > 1)
117 ent->start = ent[-1].start + ent[-1].duration;
118 else
119 ent->start = 0;
120 ent->duration = duration;
121 ent->parent = PS_ALIGNMENT_NONE;
122 ent->child = PS_ALIGNMENT_NONE;
123
124 return al->word.n_ent;
125 }
126
127 int
ps_alignment_populate(ps_alignment_t * al)128 ps_alignment_populate(ps_alignment_t *al)
129 {
130 dict2pid_t *d2p;
131 dict_t *dict;
132 bin_mdef_t *mdef;
133 int i, lc;
134
135 /* Clear phone and state sequences. */
136 ps_alignment_vector_empty(&al->sseq);
137 ps_alignment_vector_empty(&al->state);
138
139 /* For each word, expand to phones/senone sequences. */
140 d2p = al->d2p;
141 dict = d2p->dict;
142 mdef = d2p->mdef;
143 lc = bin_mdef_silphone(mdef);
144 for (i = 0; i < al->word.n_ent; ++i) {
145 ps_alignment_entry_t *went = al->word.seq + i;
146 ps_alignment_entry_t *sent;
147 int wid = went->id.wid;
148 int len = dict_pronlen(dict, wid);
149 int j, rc;
150
151 if (i < al->word.n_ent - 1)
152 rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
153 else
154 rc = bin_mdef_silphone(mdef);
155
156 /* First phone. */
157 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
158 E_ERROR("Failed to add phone entry!\n");
159 return -1;
160 }
161 sent->id.pid.cipid = dict_first_phone(dict, wid);
162 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
163 sent->start = went->start;
164 sent->duration = went->duration;
165 sent->parent = i;
166 went->child = (uint16)(sent - al->sseq.seq);
167 if (len == 1)
168 sent->id.pid.ssid
169 = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
170 else
171 sent->id.pid.ssid
172 = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
173 dict_second_phone(dict, wid), lc);
174 assert(sent->id.pid.ssid != BAD_SSID);
175
176 /* Internal phones. */
177 for (j = 1; j < len - 1; ++j) {
178 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
179 E_ERROR("Failed to add phone entry!\n");
180 return -1;
181 }
182 sent->id.pid.cipid = dict_pron(dict, wid, j);
183 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
184 sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
185 assert(sent->id.pid.ssid != BAD_SSID);
186 sent->start = went->start;
187 sent->duration = went->duration;
188 sent->parent = i;
189 }
190
191 /* Last phone. */
192 if (j < len) {
193 xwdssid_t *rssid;
194 assert(j == len - 1);
195 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
196 E_ERROR("Failed to add phone entry!\n");
197 return -1;
198 }
199 sent->id.pid.cipid = dict_last_phone(dict, wid);
200 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
201 rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
202 dict_second_last_phone(dict, wid));
203 sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
204 assert(sent->id.pid.ssid != BAD_SSID);
205 sent->start = went->start;
206 sent->duration = went->duration;
207 sent->parent = i;
208 }
209 /* Update lc. Could just use sent->id.pid.cipid here but that
210 * seems needlessly obscure. */
211 lc = dict_last_phone(dict, wid);
212 }
213
214 /* For each senone sequence, expand to senones. (we could do this
215 * nested above but this makes it more clear and easier to
216 * refactor) */
217 for (i = 0; i < al->sseq.n_ent; ++i) {
218 ps_alignment_entry_t *pent = al->sseq.seq + i;
219 ps_alignment_entry_t *sent;
220 int j;
221
222 for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
223 if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
224 E_ERROR("Failed to add state entry!\n");
225 return -1;
226 }
227 sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
228 assert(sent->id.senid != BAD_SENID);
229 sent->start = pent->start;
230 sent->duration = pent->duration;
231 sent->parent = i;
232 if (j == 0)
233 pent->child = (uint16)(sent - al->state.seq);
234 }
235 }
236
237 return 0;
238 }
239
240 /* FIXME: Somewhat the same as the above function, needs refactoring */
241 int
ps_alignment_populate_ci(ps_alignment_t * al)242 ps_alignment_populate_ci(ps_alignment_t *al)
243 {
244 dict2pid_t *d2p;
245 dict_t *dict;
246 bin_mdef_t *mdef;
247 int i;
248
249 /* Clear phone and state sequences. */
250 ps_alignment_vector_empty(&al->sseq);
251 ps_alignment_vector_empty(&al->state);
252
253 /* For each word, expand to phones/senone sequences. */
254 d2p = al->d2p;
255 dict = d2p->dict;
256 mdef = d2p->mdef;
257 for (i = 0; i < al->word.n_ent; ++i) {
258 ps_alignment_entry_t *went = al->word.seq + i;
259 ps_alignment_entry_t *sent;
260 int wid = went->id.wid;
261 int len = dict_pronlen(dict, wid);
262 int j;
263
264 for (j = 0; j < len; ++j) {
265 if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
266 E_ERROR("Failed to add phone entry!\n");
267 return -1;
268 }
269 sent->id.pid.cipid = dict_pron(dict, wid, j);
270 sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
271 sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
272 assert(sent->id.pid.ssid != BAD_SSID);
273 sent->start = went->start;
274 sent->duration = went->duration;
275 sent->parent = i;
276 }
277 }
278
279 /* For each senone sequence, expand to senones. (we could do this
280 * nested above but this makes it more clear and easier to
281 * refactor) */
282 for (i = 0; i < al->sseq.n_ent; ++i) {
283 ps_alignment_entry_t *pent = al->sseq.seq + i;
284 ps_alignment_entry_t *sent;
285 int j;
286
287 for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
288 if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
289 E_ERROR("Failed to add state entry!\n");
290 return -1;
291 }
292 sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
293 assert(sent->id.senid != BAD_SENID);
294 sent->start = pent->start;
295 sent->duration = pent->duration;
296 sent->parent = i;
297 if (j == 0)
298 pent->child = (uint16)(sent - al->state.seq);
299 }
300 }
301
302 return 0;
303 }
304
305 int
ps_alignment_propagate(ps_alignment_t * al)306 ps_alignment_propagate(ps_alignment_t *al)
307 {
308 ps_alignment_entry_t *last_ent = NULL;
309 int i;
310
311 /* Propagate duration up from states to phones. */
312 for (i = 0; i < al->state.n_ent; ++i) {
313 ps_alignment_entry_t *sent = al->state.seq + i;
314 ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
315 if (pent != last_ent) {
316 pent->start = sent->start;
317 pent->duration = 0;
318 }
319 pent->duration += sent->duration;
320 last_ent = pent;
321 }
322
323 /* Propagate duration up from phones to words. */
324 last_ent = NULL;
325 for (i = 0; i < al->sseq.n_ent; ++i) {
326 ps_alignment_entry_t *pent = al->sseq.seq + i;
327 ps_alignment_entry_t *went = al->word.seq + pent->parent;
328 if (went != last_ent) {
329 went->start = pent->start;
330 went->duration = 0;
331 }
332 went->duration += pent->duration;
333 last_ent = went;
334 }
335
336 return 0;
337 }
338
339 int
ps_alignment_n_words(ps_alignment_t * al)340 ps_alignment_n_words(ps_alignment_t *al)
341 {
342 return (int)al->word.n_ent;
343 }
344
345 int
ps_alignment_n_phones(ps_alignment_t * al)346 ps_alignment_n_phones(ps_alignment_t *al)
347 {
348 return (int)al->sseq.n_ent;
349 }
350
351 int
ps_alignment_n_states(ps_alignment_t * al)352 ps_alignment_n_states(ps_alignment_t *al)
353 {
354 return (int)al->state.n_ent;
355 }
356
357 ps_alignment_iter_t *
ps_alignment_words(ps_alignment_t * al)358 ps_alignment_words(ps_alignment_t *al)
359 {
360 ps_alignment_iter_t *itor;
361
362 if (al->word.n_ent == 0)
363 return NULL;
364 itor = ckd_calloc(1, sizeof(*itor));
365 itor->al = al;
366 itor->vec = &al->word;
367 itor->pos = 0;
368 return itor;
369 }
370
371 ps_alignment_iter_t *
ps_alignment_phones(ps_alignment_t * al)372 ps_alignment_phones(ps_alignment_t *al)
373 {
374 ps_alignment_iter_t *itor;
375
376 if (al->sseq.n_ent == 0)
377 return NULL;
378 itor = ckd_calloc(1, sizeof(*itor));
379 itor->al = al;
380 itor->vec = &al->sseq;
381 itor->pos = 0;
382 return itor;
383 }
384
385 ps_alignment_iter_t *
ps_alignment_states(ps_alignment_t * al)386 ps_alignment_states(ps_alignment_t *al)
387 {
388 ps_alignment_iter_t *itor;
389
390 if (al->state.n_ent == 0)
391 return NULL;
392 itor = ckd_calloc(1, sizeof(*itor));
393 itor->al = al;
394 itor->vec = &al->state;
395 itor->pos = 0;
396 return itor;
397 }
398
399 ps_alignment_entry_t *
ps_alignment_iter_get(ps_alignment_iter_t * itor)400 ps_alignment_iter_get(ps_alignment_iter_t *itor)
401 {
402 return itor->vec->seq + itor->pos;
403 }
404
405 int
ps_alignment_iter_free(ps_alignment_iter_t * itor)406 ps_alignment_iter_free(ps_alignment_iter_t *itor)
407 {
408 ckd_free(itor);
409 return 0;
410 }
411
412 ps_alignment_iter_t *
ps_alignment_iter_goto(ps_alignment_iter_t * itor,int pos)413 ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
414 {
415 if (itor == NULL)
416 return NULL;
417 if (pos >= itor->vec->n_ent) {
418 ps_alignment_iter_free(itor);
419 return NULL;
420 }
421 itor->pos = pos;
422 return itor;
423 }
424
425 ps_alignment_iter_t *
ps_alignment_iter_next(ps_alignment_iter_t * itor)426 ps_alignment_iter_next(ps_alignment_iter_t *itor)
427 {
428 if (itor == NULL)
429 return NULL;
430 if (++itor->pos >= itor->vec->n_ent) {
431 ps_alignment_iter_free(itor);
432 return NULL;
433 }
434 return itor;
435 }
436
437 ps_alignment_iter_t *
ps_alignment_iter_prev(ps_alignment_iter_t * itor)438 ps_alignment_iter_prev(ps_alignment_iter_t *itor)
439 {
440 if (itor == NULL)
441 return NULL;
442 if (--itor->pos < 0) {
443 ps_alignment_iter_free(itor);
444 return NULL;
445 }
446 return itor;
447 }
448
449 ps_alignment_iter_t *
ps_alignment_iter_up(ps_alignment_iter_t * itor)450 ps_alignment_iter_up(ps_alignment_iter_t *itor)
451 {
452 ps_alignment_iter_t *itor2;
453 if (itor == NULL)
454 return NULL;
455 if (itor->vec == &itor->al->word)
456 return NULL;
457 if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
458 return NULL;
459 itor2 = ckd_calloc(1, sizeof(*itor2));
460 itor2->al = itor->al;
461 itor2->pos = itor->vec->seq[itor->pos].parent;
462 if (itor->vec == &itor->al->sseq)
463 itor2->vec = &itor->al->word;
464 else
465 itor2->vec = &itor->al->sseq;
466 return itor2;
467 }
468
469 ps_alignment_iter_t *
ps_alignment_iter_down(ps_alignment_iter_t * itor)470 ps_alignment_iter_down(ps_alignment_iter_t *itor)
471 {
472 ps_alignment_iter_t *itor2;
473 if (itor == NULL)
474 return NULL;
475 if (itor->vec == &itor->al->state)
476 return NULL;
477 if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
478 return NULL;
479 itor2 = ckd_calloc(1, sizeof(*itor2));
480 itor2->al = itor->al;
481 itor2->pos = itor->vec->seq[itor->pos].child;
482 if (itor->vec == &itor->al->word)
483 itor2->vec = &itor->al->sseq;
484 else
485 itor2->vec = &itor->al->state;
486 return itor2;
487 }
488