1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2010 Carnegie Mellon University.  All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /**
39  * @file ps_alignment.c Multi-level alignment structure
40  */
41 
42 /* System headers. */
43 
44 /* SphinxBase headers. */
45 #include <sphinxbase/ckd_alloc.h>
46 
47 /* Local headers. */
48 #include "ps_alignment.h"
49 
50 ps_alignment_t *
ps_alignment_init(dict2pid_t * d2p)51 ps_alignment_init(dict2pid_t *d2p)
52 {
53     ps_alignment_t *al = ckd_calloc(1, sizeof(*al));
54     al->d2p = dict2pid_retain(d2p);
55     return al;
56 }
57 
58 int
ps_alignment_free(ps_alignment_t * al)59 ps_alignment_free(ps_alignment_t *al)
60 {
61     if (al == NULL)
62         return 0;
63     dict2pid_free(al->d2p);
64     ckd_free(al->word.seq);
65     ckd_free(al->sseq.seq);
66     ckd_free(al->state.seq);
67     ckd_free(al);
68     return 0;
69 }
70 
71 #define VECTOR_GROW 10
72 static void *
vector_grow_one(void * ptr,uint16 * n_alloc,uint16 * n,size_t item_size)73 vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size)
74 {
75     int newsize = *n + 1;
76     if (newsize < *n_alloc) {
77         *n += 1;
78         return ptr;
79     }
80     newsize += VECTOR_GROW;
81     if (newsize > 0xffff)
82         return NULL;
83     ptr = ckd_realloc(ptr, newsize * item_size);
84     *n += 1;
85     *n_alloc = newsize;
86     return ptr;
87 }
88 
89 static ps_alignment_entry_t *
ps_alignment_vector_grow_one(ps_alignment_vector_t * vec)90 ps_alignment_vector_grow_one(ps_alignment_vector_t *vec)
91 {
92     void *ptr;
93     ptr = vector_grow_one(vec->seq, &vec->n_alloc,
94                           &vec->n_ent, sizeof(*vec->seq));
95     if (ptr == NULL)
96         return NULL;
97     vec->seq = ptr;
98     return vec->seq + vec->n_ent - 1;
99 }
100 
101 static void
ps_alignment_vector_empty(ps_alignment_vector_t * vec)102 ps_alignment_vector_empty(ps_alignment_vector_t *vec)
103 {
104     vec->n_ent = 0;
105 }
106 
107 int
ps_alignment_add_word(ps_alignment_t * al,int32 wid,int duration)108 ps_alignment_add_word(ps_alignment_t *al,
109                       int32 wid, int duration)
110 {
111     ps_alignment_entry_t *ent;
112 
113     if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL)
114         return 0;
115     ent->id.wid = wid;
116     if (al->word.n_ent > 1)
117         ent->start = ent[-1].start + ent[-1].duration;
118     else
119         ent->start = 0;
120     ent->duration = duration;
121     ent->parent = PS_ALIGNMENT_NONE;
122     ent->child = PS_ALIGNMENT_NONE;
123 
124     return al->word.n_ent;
125 }
126 
127 int
ps_alignment_populate(ps_alignment_t * al)128 ps_alignment_populate(ps_alignment_t *al)
129 {
130     dict2pid_t *d2p;
131     dict_t *dict;
132     bin_mdef_t *mdef;
133     int i, lc;
134 
135     /* Clear phone and state sequences. */
136     ps_alignment_vector_empty(&al->sseq);
137     ps_alignment_vector_empty(&al->state);
138 
139     /* For each word, expand to phones/senone sequences. */
140     d2p = al->d2p;
141     dict = d2p->dict;
142     mdef = d2p->mdef;
143     lc = bin_mdef_silphone(mdef);
144     for (i = 0; i < al->word.n_ent; ++i) {
145         ps_alignment_entry_t *went = al->word.seq + i;
146         ps_alignment_entry_t *sent;
147         int wid = went->id.wid;
148         int len = dict_pronlen(dict, wid);
149         int j, rc;
150 
151         if (i < al->word.n_ent - 1)
152             rc = dict_first_phone(dict, al->word.seq[i+1].id.wid);
153         else
154             rc = bin_mdef_silphone(mdef);
155 
156         /* First phone. */
157         if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
158             E_ERROR("Failed to add phone entry!\n");
159             return -1;
160         }
161         sent->id.pid.cipid = dict_first_phone(dict, wid);
162         sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
163         sent->start = went->start;
164         sent->duration = went->duration;
165         sent->parent = i;
166         went->child = (uint16)(sent - al->sseq.seq);
167         if (len == 1)
168             sent->id.pid.ssid
169                 = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc);
170         else
171             sent->id.pid.ssid
172                 = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid,
173                                     dict_second_phone(dict, wid), lc);
174         assert(sent->id.pid.ssid != BAD_SSID);
175 
176         /* Internal phones. */
177         for (j = 1; j < len - 1; ++j) {
178             if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
179                 E_ERROR("Failed to add phone entry!\n");
180                 return -1;
181             }
182             sent->id.pid.cipid = dict_pron(dict, wid, j);
183             sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
184             sent->id.pid.ssid = dict2pid_internal(d2p, wid, j);
185             assert(sent->id.pid.ssid != BAD_SSID);
186             sent->start = went->start;
187             sent->duration = went->duration;
188             sent->parent = i;
189         }
190 
191         /* Last phone. */
192         if (j < len) {
193             xwdssid_t *rssid;
194             assert(j == len - 1);
195             if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
196                 E_ERROR("Failed to add phone entry!\n");
197                 return -1;
198             }
199             sent->id.pid.cipid = dict_last_phone(dict, wid);
200             sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
201             rssid = dict2pid_rssid(d2p, sent->id.pid.cipid,
202                                    dict_second_last_phone(dict, wid));
203             sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]];
204             assert(sent->id.pid.ssid != BAD_SSID);
205             sent->start = went->start;
206             sent->duration = went->duration;
207             sent->parent = i;
208         }
209         /* Update lc.  Could just use sent->id.pid.cipid here but that
210          * seems needlessly obscure. */
211         lc = dict_last_phone(dict, wid);
212     }
213 
214     /* For each senone sequence, expand to senones.  (we could do this
215      * nested above but this makes it more clear and easier to
216      * refactor) */
217     for (i = 0; i < al->sseq.n_ent; ++i) {
218         ps_alignment_entry_t *pent = al->sseq.seq + i;
219         ps_alignment_entry_t *sent;
220         int j;
221 
222         for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
223             if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
224                 E_ERROR("Failed to add state entry!\n");
225                 return -1;
226             }
227             sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
228             assert(sent->id.senid != BAD_SENID);
229             sent->start = pent->start;
230             sent->duration = pent->duration;
231             sent->parent = i;
232             if (j == 0)
233                 pent->child = (uint16)(sent - al->state.seq);
234         }
235     }
236 
237     return 0;
238 }
239 
240 /* FIXME: Somewhat the same as the above function, needs refactoring */
241 int
ps_alignment_populate_ci(ps_alignment_t * al)242 ps_alignment_populate_ci(ps_alignment_t *al)
243 {
244     dict2pid_t *d2p;
245     dict_t *dict;
246     bin_mdef_t *mdef;
247     int i;
248 
249     /* Clear phone and state sequences. */
250     ps_alignment_vector_empty(&al->sseq);
251     ps_alignment_vector_empty(&al->state);
252 
253     /* For each word, expand to phones/senone sequences. */
254     d2p = al->d2p;
255     dict = d2p->dict;
256     mdef = d2p->mdef;
257     for (i = 0; i < al->word.n_ent; ++i) {
258         ps_alignment_entry_t *went = al->word.seq + i;
259         ps_alignment_entry_t *sent;
260         int wid = went->id.wid;
261         int len = dict_pronlen(dict, wid);
262         int j;
263 
264         for (j = 0; j < len; ++j) {
265             if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) {
266                 E_ERROR("Failed to add phone entry!\n");
267                 return -1;
268             }
269             sent->id.pid.cipid = dict_pron(dict, wid, j);
270             sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid);
271             sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid);
272             assert(sent->id.pid.ssid != BAD_SSID);
273             sent->start = went->start;
274             sent->duration = went->duration;
275             sent->parent = i;
276         }
277     }
278 
279     /* For each senone sequence, expand to senones.  (we could do this
280      * nested above but this makes it more clear and easier to
281      * refactor) */
282     for (i = 0; i < al->sseq.n_ent; ++i) {
283         ps_alignment_entry_t *pent = al->sseq.seq + i;
284         ps_alignment_entry_t *sent;
285         int j;
286 
287         for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) {
288             if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) {
289                 E_ERROR("Failed to add state entry!\n");
290                 return -1;
291             }
292             sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j);
293             assert(sent->id.senid != BAD_SENID);
294             sent->start = pent->start;
295             sent->duration = pent->duration;
296             sent->parent = i;
297             if (j == 0)
298                 pent->child = (uint16)(sent - al->state.seq);
299         }
300     }
301 
302     return 0;
303 }
304 
305 int
ps_alignment_propagate(ps_alignment_t * al)306 ps_alignment_propagate(ps_alignment_t *al)
307 {
308     ps_alignment_entry_t *last_ent = NULL;
309     int i;
310 
311     /* Propagate duration up from states to phones. */
312     for (i = 0; i < al->state.n_ent; ++i) {
313         ps_alignment_entry_t *sent = al->state.seq + i;
314         ps_alignment_entry_t *pent = al->sseq.seq + sent->parent;
315         if (pent != last_ent) {
316             pent->start = sent->start;
317             pent->duration = 0;
318         }
319         pent->duration += sent->duration;
320         last_ent = pent;
321     }
322 
323     /* Propagate duration up from phones to words. */
324     last_ent = NULL;
325     for (i = 0; i < al->sseq.n_ent; ++i) {
326         ps_alignment_entry_t *pent = al->sseq.seq + i;
327         ps_alignment_entry_t *went = al->word.seq + pent->parent;
328         if (went != last_ent) {
329             went->start = pent->start;
330             went->duration = 0;
331         }
332         went->duration += pent->duration;
333         last_ent = went;
334     }
335 
336     return 0;
337 }
338 
339 int
ps_alignment_n_words(ps_alignment_t * al)340 ps_alignment_n_words(ps_alignment_t *al)
341 {
342     return (int)al->word.n_ent;
343 }
344 
345 int
ps_alignment_n_phones(ps_alignment_t * al)346 ps_alignment_n_phones(ps_alignment_t *al)
347 {
348     return (int)al->sseq.n_ent;
349 }
350 
351 int
ps_alignment_n_states(ps_alignment_t * al)352 ps_alignment_n_states(ps_alignment_t *al)
353 {
354     return (int)al->state.n_ent;
355 }
356 
357 ps_alignment_iter_t *
ps_alignment_words(ps_alignment_t * al)358 ps_alignment_words(ps_alignment_t *al)
359 {
360     ps_alignment_iter_t *itor;
361 
362     if (al->word.n_ent == 0)
363         return NULL;
364     itor = ckd_calloc(1, sizeof(*itor));
365     itor->al = al;
366     itor->vec = &al->word;
367     itor->pos = 0;
368     return itor;
369 }
370 
371 ps_alignment_iter_t *
ps_alignment_phones(ps_alignment_t * al)372 ps_alignment_phones(ps_alignment_t *al)
373 {
374     ps_alignment_iter_t *itor;
375 
376     if (al->sseq.n_ent == 0)
377         return NULL;
378     itor = ckd_calloc(1, sizeof(*itor));
379     itor->al = al;
380     itor->vec = &al->sseq;
381     itor->pos = 0;
382     return itor;
383 }
384 
385 ps_alignment_iter_t *
ps_alignment_states(ps_alignment_t * al)386 ps_alignment_states(ps_alignment_t *al)
387 {
388     ps_alignment_iter_t *itor;
389 
390     if (al->state.n_ent == 0)
391         return NULL;
392     itor = ckd_calloc(1, sizeof(*itor));
393     itor->al = al;
394     itor->vec = &al->state;
395     itor->pos = 0;
396     return itor;
397 }
398 
399 ps_alignment_entry_t *
ps_alignment_iter_get(ps_alignment_iter_t * itor)400 ps_alignment_iter_get(ps_alignment_iter_t *itor)
401 {
402     return itor->vec->seq + itor->pos;
403 }
404 
405 int
ps_alignment_iter_free(ps_alignment_iter_t * itor)406 ps_alignment_iter_free(ps_alignment_iter_t *itor)
407 {
408     ckd_free(itor);
409     return 0;
410 }
411 
412 ps_alignment_iter_t *
ps_alignment_iter_goto(ps_alignment_iter_t * itor,int pos)413 ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos)
414 {
415     if (itor == NULL)
416         return NULL;
417     if (pos >= itor->vec->n_ent) {
418         ps_alignment_iter_free(itor);
419         return NULL;
420     }
421     itor->pos = pos;
422     return itor;
423 }
424 
425 ps_alignment_iter_t *
ps_alignment_iter_next(ps_alignment_iter_t * itor)426 ps_alignment_iter_next(ps_alignment_iter_t *itor)
427 {
428     if (itor == NULL)
429         return NULL;
430     if (++itor->pos >= itor->vec->n_ent) {
431         ps_alignment_iter_free(itor);
432         return NULL;
433     }
434     return itor;
435 }
436 
437 ps_alignment_iter_t *
ps_alignment_iter_prev(ps_alignment_iter_t * itor)438 ps_alignment_iter_prev(ps_alignment_iter_t *itor)
439 {
440     if (itor == NULL)
441         return NULL;
442     if (--itor->pos < 0) {
443         ps_alignment_iter_free(itor);
444         return NULL;
445     }
446     return itor;
447 }
448 
449 ps_alignment_iter_t *
ps_alignment_iter_up(ps_alignment_iter_t * itor)450 ps_alignment_iter_up(ps_alignment_iter_t *itor)
451 {
452     ps_alignment_iter_t *itor2;
453     if (itor == NULL)
454         return NULL;
455     if (itor->vec == &itor->al->word)
456         return NULL;
457     if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE)
458         return NULL;
459     itor2 = ckd_calloc(1, sizeof(*itor2));
460     itor2->al = itor->al;
461     itor2->pos = itor->vec->seq[itor->pos].parent;
462     if (itor->vec == &itor->al->sseq)
463         itor2->vec = &itor->al->word;
464     else
465         itor2->vec = &itor->al->sseq;
466     return itor2;
467 }
468 
469 ps_alignment_iter_t *
ps_alignment_iter_down(ps_alignment_iter_t * itor)470 ps_alignment_iter_down(ps_alignment_iter_t *itor)
471 {
472     ps_alignment_iter_t *itor2;
473     if (itor == NULL)
474         return NULL;
475     if (itor->vec == &itor->al->state)
476         return NULL;
477     if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE)
478         return NULL;
479     itor2 = ckd_calloc(1, sizeof(*itor2));
480     itor2->al = itor->al;
481     itor2->pos = itor->vec->seq[itor->pos].child;
482     if (itor->vec == &itor->al->word)
483         itor2->vec = &itor->al->sseq;
484     else
485         itor2->vec = &itor->al->state;
486     return itor2;
487 }
488