1 /*************************************************************************/
2 /*                                                                       */
3 /*                  Language Technologies Institute                      */
4 /*                     Carnegie Mellon University                        */
5 /*                         Copyright (c) 2009                            */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
34 /*               Date:  January 2009                                     */
35 /*************************************************************************/
36 /*                                                                       */
37 /*  flowm functions for flite access                                     */
38 /*                                                                       */
39 /*************************************************************************/
40 
41 #include <windows.h>
42 #include <commctrl.h>
43 #include <aygshell.h>
44 
45 #include "cst_wchar.h"
46 #include "flite.h"
47 #include "flowm.h"
48 
49 /* For debugging its sometimes good to switch off the actual synthesis */
50 #define DOTTS 1
51 
52 static cst_audiodev *fl_ad = 0;
53 
54 #ifdef DOTTS
55 cst_voice *register_cmu_us_kal(const char *voxdir);
56 void unregister_cmu_us_kal(cst_voice *v);
57 cst_voice *register_cmu_us_awb(const char *voxdir);
58 void unregister_cmu_us_awb(cst_voice *v);
59 cst_voice *register_cmu_us_rms(const char *voxdir);
60 void unregister_cmu_us_rms(cst_voice *v);
61 cst_voice *register_cmu_us_slt(const char *voxdir);
62 void unregister_cmu_us_slt(cst_voice *v);
63 #endif
64 
65 cst_wave *previous_wave = NULL;
66 
67 typedef struct VoxDef_struct
68 {
69     TCHAR *name;
70     cst_voice *(*rv)(const char *voxdir);  /* register_voice */
71     void (*urv)(cst_voice *v);             /* unregister_voice */
72     int min_buffsize;                      /* for audio streaming */
73     cst_voice *v;
74 } VoxDef;
75 
76 VoxDef VoxDefs[] = {
77 #ifdef cmu_us_kal
78     { L"kal", register_cmu_us_kal, unregister_cmu_us_kal, 256, NULL },
79 #endif
80 #ifdef cmu_us_awb
81     { L"awb", register_cmu_us_awb, unregister_cmu_us_awb, 2000, NULL },
82 #endif
83 #ifdef cmu_us_rms
84     { L"rms", register_cmu_us_rms, unregister_cmu_us_rms, 2000, NULL },
85 #endif
86 #ifdef cmu_us_slt
87     { L"slt", register_cmu_us_slt, unregister_cmu_us_slt, 2000, NULL },
88 #endif
89     { NULL, NULL }
90 };
91 
92 cst_utterance *flowm_print_relation_callback(cst_utterance *u);
93 cst_utterance *flowm_utt_callback(cst_utterance *u);
94 int flowm_audio_stream_chunk(const cst_wave *w, int start, int size,
95                              int last, cst_audio_streaming_info *asi);
96 
flowm_find_file_percentage()97 float flowm_find_file_percentage()
98 {
99     if (flowm_file_size <= 0)
100         return 0.0;
101     else
102         return (flowm_file_pos*100.0)/flowm_file_size;
103 }
104 
flowm_voice_name(int i)105 TCHAR *flowm_voice_name(int i)
106 {
107     /* In order not to have flite things in flowm_main, we provide an */
108     /* interface to the voice list */
109     return VoxDefs[i].name;
110 }
111 
flowm_init()112 void flowm_init()
113 {
114 #ifdef DOTTS
115     int i;
116     cst_audio_streaming_info *asi;
117 
118     flite_init();        /* Initialize flite interface */
119 
120     for (i=0; VoxDefs[i].name; i++)
121     {
122         VoxDefs[i].v = (VoxDefs[i].rv)(NULL); /* register voice */
123 
124         /* Set up call back function for low level audio streaming */
125         /* This way it plays the waveform as it synthesizes it */
126         /* This is necessary for the slower (CG) voices */
127         asi = new_audio_streaming_info();
128         asi->asc = flowm_audio_stream_chunk;
129         asi->min_buffsize = VoxDefs[i].min_buffsize;
130         feat_set(VoxDefs[i].v->features,
131                  "streaming_info",
132                  audio_streaming_info_val(asi));
133 
134         /* Set up call back function for sending what tokens are being */
135         /* synthesized and for keeping track of the current position in */
136         /* the file */
137         feat_set(VoxDefs[i].v->features,
138                  "utt_user_callback",
139                  uttfunc_val(flowm_utt_callback));
140 
141         /* For outputing results of a relation (only used in play) */
142         feat_set(VoxDefs[i].v->features,
143                  "post_synth_hook_func",
144                  uttfunc_val(flowm_print_relation_callback));
145     }
146 
147 #endif
148     return;
149 }
150 
flowm_terminate()151 void flowm_terminate()
152 {
153 #ifdef DOTTS
154     int i;
155 
156     for (i=0; VoxDefs[i].name; i++)
157     {
158         (VoxDefs[i].urv)(VoxDefs[i].v); /* unregister voice */
159     }
160 #endif
161     if (previous_wave)
162     {
163         delete_wave(previous_wave);
164         previous_wave = NULL;
165     }
166 
167     return;
168 }
169 
flowm_save_wave(TCHAR * filename)170 int flowm_save_wave(TCHAR *filename)
171 {
172     /* Save the Last synthesized waveform file to filename */
173     char *sfilename;
174     int rc;
175 
176     if (!previous_wave)
177         return -1;
178 
179     sfilename = cst_wstr2cstr(filename);
180     rc = cst_wave_save_riff(previous_wave,sfilename);
181     cst_free(sfilename);
182 
183     return rc;
184 }
185 
186 #ifdef DOTTS
flowm_say_text(TCHAR * text)187 int flowm_say_text(TCHAR *text)
188 {
189     char *s;
190     int ns;
191     cst_voice *v;
192 
193     if (previous_wave)
194     {
195         delete_wave(previous_wave);
196         previous_wave = NULL;
197     }
198 
199     s = cst_wstr2cstr(text);               /* text to synthesize */
200     v = VoxDefs[flowm_selected_voice].v;   /* voice to synthesize with */
201 
202     feat_remove(v->features,"print_info_relation");
203     if (flowm_selected_relation == 1)
204         feat_set_string(v->features, "print_info_relation", "Word");
205     if (flowm_selected_relation == 2)
206         feat_set_string(v->features, "print_info_relation", "Segment");
207 
208     /* Do the synthesis */
209     previous_wave = flite_text_to_wave(s,v);
210 
211     ns = cst_wave_num_samples(previous_wave);
212 
213     cst_free(s);
214     audio_flush(fl_ad);
215     audio_close(fl_ad);
216     fl_ad = NULL;
217 
218     return ns;
219 }
220 #else
flowm_say_text(TCHAR * text)221 int flowm_say_text(TCHAR *text)
222 {
223     MessageBoxW(0,text,L"SayText",0);
224     return 0;
225 }
226 #endif
227 
flowm_print_relation_callback(cst_utterance * u)228 cst_utterance *flowm_print_relation_callback(cst_utterance *u)
229 {
230     /* Say the details of a named relation for display */
231     char rst[FL_MAX_MSG_CHARS];
232     const char *name;
233     const char *relname;
234     cst_item *item;
235     char *space;
236 
237     space = "";
238     relname = get_param_string(u->features,"print_info_relation", NULL);
239     cst_sprintf(rst,"%s: ",relname);
240 
241     if (!relname)
242     {
243         mbstowcs(fl_tts_msg,"",FL_MAX_MSG_CHARS);
244         return u;
245     }
246 
247     for (item=relation_head(utt_relation(u,relname));
248          item; item=item_next(item))
249     {
250         name = item_feat_string(item,"name");
251 
252         if (cst_strlen(name)+1+4 < FL_MAX_MSG_CHARS)
253             cst_sprintf(rst,"%s%s%s",rst,space,name);
254         else if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS)
255             cst_sprintf(rst,"%s ...",rst);
256         else
257             break;
258         space = " ";
259     }
260     mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS);
261 
262     return u;
263 }
264 
flowm_utt_callback(cst_utterance * u)265 cst_utterance *flowm_utt_callback(cst_utterance *u)
266 {
267     char rst[FL_MAX_MSG_CHARS];
268     const char *tok;
269     cst_item *item;
270     char *space;
271     int extend_length;
272 
273     /* In order to stop the synthesizer if the STOP button is pressed */
274     /* This stops the synthesis of the next utterance */
275 
276     if ((flowm_play_status == FLOWM_PLAY) ||
277         (flowm_play_status == FLOWM_SKIP))
278     {
279         if (TTSWindow)
280         {
281             rst[0] = '\0';
282             space = "";
283             for (item=relation_head(utt_relation(u,"Token"));
284                  item; item=item_next(item))
285             {
286                 tok = item_feat_string(item,"name");
287                 if (cst_streq("",space))
288                     /* Only do this on the first token/word */
289                     flowm_file_pos = item_feat_int(item,"file_pos");
290                 extend_length = cst_strlen(rst) + 1 +
291                     cst_strlen(item_feat_string(item,"prepunctuation"))+
292                     cst_strlen(item_feat_string(item,"punc"));
293                 if (cst_strlen(tok)+extend_length+4 < FL_MAX_MSG_CHARS)
294                     cst_sprintf(rst,"%s%s%s%s%s",rst,space,
295                                 item_feat_string(item,"prepunctuation"),
296                                 tok,
297                                 item_feat_string(item,"punc"));
298                 else
299                 {
300                     if (cst_strlen(rst)+4 < FL_MAX_MSG_CHARS)
301                         cst_sprintf(rst,"%s ...",rst);
302                     break;
303                 }
304                 space = " ";
305             }
306 
307             if (flowm_file_pos > flowm_prev_utt_pos[flowm_utt_pos_pos])
308             {
309                 if ((flowm_utt_pos_pos+1) >= FLOWM_NUM_UTT_POS)
310                 {
311                     /* Filled it up, so move it down */
312                     memmove(flowm_prev_utt_pos,&flowm_prev_utt_pos[1],
313                             sizeof(int)*(FLOWM_NUM_UTT_POS-10));
314                     flowm_utt_pos_pos = (FLOWM_NUM_UTT_POS-10);
315                 }
316                 flowm_utt_pos_pos++;
317                 flowm_prev_utt_pos[flowm_utt_pos_pos] = flowm_file_pos;
318             }
319 
320             /* Send text to TTSWindow */
321             mbstowcs(fl_tts_msg,rst,FL_MAX_MSG_CHARS);
322             SetDlgItemText(TTSWindow, FL_SYNTHTEXT, fl_tts_msg);
323 
324             /* Update file pos percentage in FilePos window */
325             cst_sprintf(rst,"%2.3f",flowm_find_file_percentage());
326             mbstowcs(fl_fp_msg,rst,FL_MAX_MSG_CHARS);
327             SetDlgItemText(TTSWindow, FL_FILEPOS, fl_fp_msg);
328 
329             SystemIdleTimerReset();  /* keep alive while synthesizing */
330             if (flowm_play_status == FLOWM_SKIP)
331                 flowm_play_status = FLOWM_PLAY;
332         }
333         return u;
334     }
335     else
336     {
337         delete_utterance(u);
338         return 0;
339     }
340 }
341 
flowm_audio_stream_chunk(const cst_wave * w,int start,int size,int last,cst_audio_streaming_info * asi)342 int flowm_audio_stream_chunk(const cst_wave *w, int start, int size,
343                              int last, cst_audio_streaming_info *asi)
344 {
345 
346     if (fl_ad == NULL)
347     {
348         fl_ad = audio_open(w->sample_rate,w->num_channels,CST_AUDIO_LINEAR16);
349     }
350 
351     if (flowm_play_status == FLOWM_PLAY)
352     {
353         audio_write(fl_ad,&w->samples[start],size*sizeof(short));
354         return CST_AUDIO_STREAM_CONT;
355     }
356     else if (flowm_play_status == FLOWM_BENCH)
357     {   /* Do TTS but don't actually play it */
358         /* How much have we played */
359         flowm_duration += (size*1.0)/w->sample_rate;
360         return CST_AUDIO_STREAM_CONT;
361     }
362     else
363     {   /* for STOP, and the SKIPS (if they get here) */
364         return CST_AUDIO_STREAM_STOP;
365     }
366 }
367 
368 #ifdef DOTTS
flowm_say_file(TCHAR * tfilename)369 int flowm_say_file(TCHAR *tfilename)
370 {
371     int rc = 0;
372     char *filename;
373     cst_voice *v;
374 
375     if (previous_wave)
376     {   /* This is really tidy up from Play -- but might say space */
377         delete_wave(previous_wave);
378         previous_wave = NULL;
379     }
380 
381     if (fl_ad)
382     {
383         MessageBoxW(0,L"audio fd still open",L"SayFile",0);
384         audio_close(fl_ad);
385         fl_ad = NULL;
386     }
387 
388     v = VoxDefs[flowm_selected_voice].v;
389 
390     /* Where we want to start from */
391     feat_set_int(v->features, "file_start_position", flowm_file_pos);
392 
393     /* Only do print_info in play mode */
394     feat_remove(v->features,"print_info_relation");
395 
396     filename = cst_wstr2cstr(tfilename);
397     rc = flite_file_to_speech(filename, v, "stream");
398     cst_free(filename);
399 
400     audio_flush(fl_ad);
401     audio_close(fl_ad);
402     fl_ad = NULL;
403 
404     return rc;
405 
406 }
407 #else
flowm_say_file(TCHAR * text)408 int flowm_say_file(TCHAR *text)
409 {
410     MessageBoxW(0,text,L"SayFile",0);
411     return 0;
412 }
413 #endif
414 
415 
416 
417 
418 
419