1 /*
2 * ibmtts.c - Speech Dispatcher backend for IBM TTS / Voxin
3 *
4 * Copyright (C) 2006, 2007 Brailcom, o.p.s.
5 * Copyright (C) 2020 Gilles Casse <gcasse@oralux.org>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program. If not, see <https://www.gnu.org/licenses/>.
19 *
20 * @author Gary Cramblitt <garycramblitt@comcast.net> (original author)
21 *
22 * $Id: ibmtts.c,v 1.30 2008-06-30 14:34:02 gcasse Exp $
23 */
24
25 /* This output module operates with two threads:
26
27 The main thread called from Speech Dispatcher (module_*()).
28 A synthesis thread that accepts messages, parses them, and forwards
29 them to the IBM TTS via the Eloquence Command Interface (ECI).
30 This thread receives audio and index mark callbacks from
31 IBM TTS and queues them into a playback queue. See _synth().
32
33 A semaphore is used between the main thread and the synthesis thread as a
34 producer/consumer relation.
35
36 TODO:
37 - Support list_synthesis_voices()
38 - Limit amount of waveform data synthesised in advance.
39 - Use SSML mark feature of ibmtts instead of handcrafted parsing.
40 */
41
42 #ifdef HAVE_CONFIG_H
43 #include <config.h>
44 #endif
45
46 /* System includes. */
47 #include <string.h>
48 #include <glib.h>
49 #include <semaphore.h>
50 #include <ctype.h>
51
52 #ifdef VOXIN
53 /* Voxin include */
54 #include "voxin.h"
55 #else
56 /* IBM Eloquence Command Interface. */
57 #include <eci.h>
58 #endif
59
60 /* Speech Dispatcher includes. */
61 #include "spd_audio.h"
62 #include <speechd_types.h>
63 #include "module_utils.h"
64
65 #include "module_utils_speak_queue.h"
66
67 typedef enum {
68 MODULE_FATAL_ERROR = -1,
69 MODULE_OK = 0,
70 MODULE_ERROR = 1
71 } module_status;
72
73 /* TODO: These defines are in src/server/index_marking.h, but including that
74 file here causes a redefinition error on FATAL macro in speechd.h. */
75
76 #define SD_SPEAK "<speak>"
77 #define SD_ENDSPEAK "</speak>"
78
79 #define SD_MARK_HEAD_ONLY "<mark name=\""
80 #define SD_MARK_TAIL "\"/>"
81 #define SD_MARK_TAILTAIL ">"
82 #define SD_MARK_HEAD_ONLY_LEN 12
83 #define SD_MARK_TAIL_LEN 3
84
85 #ifdef VOXIN
86 #define MODULE_NAME "voxin"
87 #define DBG_MODNAME "Voxin: "
88 #else
89 #define MODULE_NAME "ibmtts"
90 #define DBG_MODNAME "Ibmtts: "
91 #endif
92 #define MODULE_VERSION "0.1"
93
94 #define DEBUG_MODULE 1
95 DECLARE_DEBUG();
96
97 /* Define a hash table where each entry is a double-linked list
98 loaded from the config file. Each entry in the config file
99 is 3 strings, where the 1st string is used to access a list
100 of the 2nd and 3rd strings. */
101 #define MOD_OPTION_3_STR_HT_DLL(name, arg1, arg2, arg3) \
102 typedef struct{ \
103 char* arg2; \
104 char* arg3; \
105 }T ## name; \
106 GHashTable *name; \
107 \
108 DOTCONF_CB(name ## _cb) \
109 { \
110 T ## name *new_item; \
111 char *new_key; \
112 GList *dll = NULL; \
113 new_item = (T ## name *) g_malloc(sizeof(T ## name)); \
114 new_key = g_strdup(cmd->data.list[0]); \
115 if (NULL != cmd->data.list[1]) \
116 new_item->arg2 = g_strdup(cmd->data.list[1]); \
117 else \
118 new_item->arg2 = NULL; \
119 if (NULL != cmd->data.list[2]) \
120 new_item->arg3 = g_strdup(cmd->data.list[2]); \
121 else \
122 new_item->arg3 = NULL; \
123 dll = g_hash_table_lookup(name, new_key); \
124 dll = g_list_append(dll, new_item); \
125 g_hash_table_insert(name, new_key, dll); \
126 return NULL; \
127 }
128
129 /* Load a double-linked list from config file. */
130 #define MOD_OPTION_HT_DLL_REG(name) \
131 name = g_hash_table_new(g_str_hash, g_str_equal); \
132 module_dc_options = module_add_config_option(module_dc_options, \
133 &module_num_dc_options, #name, \
134 ARG_LIST, name ## _cb, NULL, 0);
135
136 /* Define a hash table mapping a string to 7 integer values. */
137 #define MOD_OPTION_6_INT_HT(name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
138 typedef struct{ \
139 int arg1; \
140 int arg2; \
141 int arg3; \
142 int arg4; \
143 int arg5; \
144 int arg6; \
145 int arg7; \
146 }T ## name; \
147 GHashTable *name; \
148 \
149 DOTCONF_CB(name ## _cb) \
150 { \
151 T ## name *new_item; \
152 char* new_key; \
153 new_item = (T ## name *) g_malloc(sizeof(T ## name)); \
154 if (cmd->data.list[0] == NULL) return NULL; \
155 new_key = g_strdup(cmd->data.list[0]); \
156 new_item->arg1 = (int) strtol(cmd->data.list[1], NULL, 10); \
157 new_item->arg2 = (int) strtol(cmd->data.list[2], NULL, 10); \
158 new_item->arg3 = (int) strtol(cmd->data.list[3], NULL, 10); \
159 new_item->arg4 = (int) strtol(cmd->data.list[4], NULL, 10); \
160 new_item->arg5 = (int) strtol(cmd->data.list[5], NULL, 10); \
161 new_item->arg6 = (int) strtol(cmd->data.list[6], NULL, 10); \
162 new_item->arg7 = (int) strtol(cmd->data.list[7], NULL, 10); \
163 g_hash_table_insert(name, new_key, new_item); \
164 return NULL; \
165 }
166
167 /* Thread and process control. */
168 static pthread_t synth_thread;
169
170 static sem_t synth_semaphore;
171
172 static gboolean thread_exit_requested = FALSE;
173 static gboolean pause_requested = FALSE;
174
175 /* Current message from Speech Dispatcher. */
176 static char *message;
177 static SPDMessageType message_type;
178
179 /* ECI */
180 static ECIHand eciHandle = NULL_ECI_HAND;
181 static int eci_sample_rate = 0;
182
183 /* ECI sends audio back in chunks to this buffer.
184 The smaller the buffer, the higher the overhead, but the better
185 the index mark resolution. */
186 typedef signed short int TEciAudioSamples;
187 static TEciAudioSamples *audio_chunk;
188
189 /* For some reason, these were left out of eci.h. */
190 typedef enum {
191 eciTextModeDefault = 0,
192 eciTextModeAlphaSpell = 1,
193 eciTextModeAllSpell = 2,
194 eciIRCSpell = 3
195 } ECITextMode;
196
197 /* A lookup table for index mark name given integer id. */
198 static GHashTable *index_mark_ht = NULL;
199 #define MSG_END_MARK 0
200
201 /* When a voice is set, this is the baseline pitch of the voice.
202 SSIP PITCH commands then adjust relative to this. */
203 static int voice_pitch_baseline;
204 /* When a voice is set, this the default speed of the voice.
205 SSIP RATE commands then adjust relative to this. */
206 static int voice_speed;
207
208 /* Expected input encoding for current language dialect. */
209 #ifdef VOXIN
210 static char *input_encoding = "utf-8";
211 #else
212 static char *input_encoding = "cp1252";
213 #endif
214
215 /* list of speechd voices */
216 static SPDVoice **speechd_voice = NULL;
217 #ifdef VOXIN
218 #define voice_index(i) i
219 #else
220 static int *speechd_voice_index = NULL;
221 #define voice_index(i) speechd_voice_index[i]
222 #endif
223
224 /* Internal function prototypes for main thread. */
225 static void update_sample_rate();
226 static void set_language(char *lang);
227 static void set_voice_type(SPDVoiceType voice_type);
228 static char *voice_enum_to_str(SPDVoiceType voice);
229 static void set_language_and_voice(char *lang, SPDVoiceType voice_type, char *name);
230 static void set_synthesis_voice(char *);
231 static void set_rate(signed int rate);
232 static void set_pitch(signed int pitch);
233 static void set_punctuation_mode(SPDPunctuation punct_mode);
234 static void set_volume(signed int pitch);
235 static void set_capital_mode(SPDCapitalLetters cap_mode);
236
237 /* locale_index_atomic stores the current index of the voices or eciLocales array.
238 The main thread writes this information, the synthesis thread reads it.
239 */
240 static gint locale_index_atomic;
241
242 /* Internal function prototypes for synthesis thread. */
243 static char *extract_mark_name(char *mark);
244 static char *next_part(char *msg, char **mark_name);
245 static int replace(char *from, char *to, GString * msg);
246 static void subst_keys_cb(gpointer data, gpointer user_data);
247 static char *subst_keys(char *key);
248 static char *search_for_sound_icon(const char *icon_name);
249 static gboolean add_sound_icon_to_playback_queue(char *filename);
250 static void load_user_dictionary();
251
252 static enum ECICallbackReturn eciCallback(ECIHand hEngine,
253 enum ECIMessage msg,
254 long lparam, void *data);
255
256 /* Internal function prototypes for playback thread. */
257 static gboolean add_audio_to_playback_queue(TEciAudioSamples *
258 audio_chunk,
259 long num_samples);
260 static void add_mark_to_playback_queue(long markId);
261 static void add_end_to_playback_queue(void);
262
263 /* Miscellaneous internal function prototypes. */
264 static void log_eci_error();
265 static gboolean alloc_voice_list();
266 static void free_voice_list();
267
268 /* The synthesis thread start routine. */
269 static void *_synth(void *);
270
271 /* Module configuration options. */
272 MOD_OPTION_1_INT(IbmttsUseSSML);
273 MOD_OPTION_1_STR(IbmttsPunctuationList);
274 MOD_OPTION_1_INT(IbmttsUseAbbreviation);
275 MOD_OPTION_1_STR(IbmttsDictionaryFolder);
276 MOD_OPTION_1_INT(IbmttsAudioChunkSize);
277 MOD_OPTION_1_STR(IbmttsSoundIconFolder);
278 MOD_OPTION_6_INT_HT(IbmttsVoiceParameters,
279 gender, breathiness, head_size, pitch_baseline,
280 pitch_fluctuation, roughness, speed);
281 MOD_OPTION_3_STR_HT_DLL(IbmttsKeySubstitution, lang, key, newkey);
282
283 #ifdef VOXIN
284 /* Array of installed voices returned by voxGetVoices() */
285 static vox_t *voices;
286 static unsigned int number_of_voices;
287 #define MAX_NB_OF_LANGUAGES number_of_voices
288 #else
289 typedef struct _eciLocale {
290 char *name;
291 char *lang;
292 char *variant;
293 enum ECILanguageDialect langID;
294 char *charset;
295 } eciLocale, *eciLocaleList;
296
297 static eciLocale eciLocales[] = {
298 {"American_English", "en-US", NULL, eciGeneralAmericanEnglish, "ISO-8859-1"},
299 {"British_English", "en-GB", NULL, eciBritishEnglish, "ISO-8859-1"},
300 {"Castilian_Spanish", "es-ES", NULL, eciCastilianSpanish, "ISO-8859-1"},
301 {"Mexican_Spanish", "es-MX", NULL, eciMexicanSpanish, "ISO-8859-1"},
302 {"French", "fr-FR", NULL, eciStandardFrench, "ISO-8859-1"},
303 {"Canadian_French", "fr-CA", NULL, eciCanadianFrench, "ISO-8859-1"},
304 {"German", "de-DE", NULL, eciStandardGerman, "ISO-8859-1"},
305 {"Italian", "it-IT", NULL, eciStandardItalian, "ISO-8859-1"},
306 {"Mandarin_Chinese UCS", "zh-CN", "UCS2", eciMandarinChineseUCS, "UCS2"},
307 {"Mandarin_Chinese", "zh-CN", NULL, eciMandarinChinese, "GBK"},
308 {"Mandarin_Chinese GB", "zh-CN", "GB", eciMandarinChineseGB, "GBK"},
309 {"Mandarin_Chinese PinYin", "zh-CN", "PinYin", eciMandarinChinesePinYin, "GBK"},
310 {"Taiwanese_Mandarin UCS", "zh-TW", "UCS", eciTaiwaneseMandarinUCS, "UCS2"},
311 {"Taiwanese_Mandarin", "zh-TW", NULL, eciTaiwaneseMandarin, "BIG5"},
312 {"Taiwanese_Mandarin Big 5", "zh-TW", "Big5", eciTaiwaneseMandarinBig5, "BIG5"},
313 {"Taiwanese_Mandarin ZhuYin", "zh-TW", "ZhuYin", eciTaiwaneseMandarinZhuYin, "BIG5"},
314 {"Taiwanese_Mandarin PinYin", "zh-TW", "PinYin", eciTaiwaneseMandarinPinYin, "BIG5"},
315 {"Brazilian_Portuguese", "pt-BR", NULL, eciBrazilianPortuguese, "ISO-8859-1"},
316 {"Japanese_UCS", "ja-JP", "UCS", eciStandardJapaneseUCS, "UCS2"},
317 {"Japanese", "ja-JP", NULL, eciStandardJapanese, "SJIS"},
318 {"Japanese_SJIS", "ja-JP", "SJIS", eciStandardJapaneseSJIS, "SJIS"},
319 {"Finnish", "fi-FI", NULL, eciStandardFinnish, "ISO-8859-1"},
320 {"Korean_UCS", "ko-KR", "UCS", eciStandardKoreanUCS, "UCS2"},
321 {"Korean", "ko-KR", NULL, eciStandardKorean, "UHC"},
322 {"Korean_UHC", "ko-KR", "UHC", eciStandardKoreanUHC, "UHC"},
323 {"Cantonese_UCS", "zh-HK", "UCS", eciStandardCantoneseUCS, "UCS2"},
324 {"Cantonese", "zh-HK", NULL, eciStandardCantonese, "GBK"},
325 {"Cantonese_GB", "zh-HK", "GB", eciStandardCantoneseGB, "GBK"},
326 {"HongKong_Cantonese UCS", "zh-HK", "UCS", eciHongKongCantoneseUCS, "UCS-2"},
327 {"HongKong_Cantonese", "zh-HK", NULL, eciHongKongCantonese, "BIG5"},
328 {"HongKong_Cantonese Big 5", "zh-HK", "BIG5", eciHongKongCantoneseBig5, "BIG5"},
329 {"Dutch", "nl-BE", NULL, eciStandardDutch, "ISO-8859-1"},
330 {"Norwegian", "no-NO", NULL, eciStandardNorwegian, "ISO-8859-1"},
331 {"Swedish", "sv-SE", NULL, eciStandardSwedish, "ISO-8859-1"},
332 {"Danish", "da-DK", NULL, eciStandardDanish, "ISO-8859-1"},
333 {"Reserved", "en-US", NULL, eciStandardReserved, "ISO-8859-1"},
334 {"Thai", "th-TH", NULL, eciStandardThai, "TIS-620"},
335 {"ThaiTIS", "th-TH", "TIS", eciStandardThaiTIS, "TIS-620"},
336 {NULL, 0, NULL}
337 };
338
339 #define MAX_NB_OF_LANGUAGES (sizeof(eciLocales)/sizeof(eciLocales[0]) - 1)
340 #endif
341
342 /* dictionary_filename: its index corresponds to the ECIDictVolume enumerate */
343 static char *dictionary_filenames[] = {
344 "main.dct",
345 "root.dct",
346 "abbreviation.dct",
347 "extension.dct"
348 };
349
350 #define NB_OF_DICTIONARY_FILENAMES (sizeof(dictionary_filenames)/sizeof(dictionary_filenames[0]))
351
352 /* Public functions */
353
module_load(void)354 int module_load(void)
355 {
356 INIT_SETTINGS_TABLES();
357
358 REGISTER_DEBUG();
359
360 MOD_OPTION_1_INT_REG(IbmttsUseSSML, 1);
361 MOD_OPTION_1_INT_REG(IbmttsUseAbbreviation, 1);
362 MOD_OPTION_1_STR_REG(IbmttsPunctuationList, "()?");
363 MOD_OPTION_1_STR_REG(IbmttsDictionaryFolder,
364 "/var/opt/IBM/ibmtts/dict");
365
366 MOD_OPTION_1_INT_REG(IbmttsAudioChunkSize, 20000);
367 MOD_OPTION_1_STR_REG(IbmttsSoundIconFolder,
368 "/usr/share/sounds/sound-icons/");
369
370 /* Register voices. */
371 module_register_settings_voices();
372
373 /* Register voice parameters */
374 MOD_OPTION_HT_REG(IbmttsVoiceParameters);
375
376 /* Register key substitutions. */
377 MOD_OPTION_HT_DLL_REG(IbmttsKeySubstitution);
378
379 return MODULE_OK;
380 }
381
module_init(char ** status_info)382 int module_init(char **status_info)
383 {
384 int ret;
385 char version[20];
386
387 DBG(DBG_MODNAME "Module init().");
388 INIT_INDEX_MARKING();
389
390 *status_info = NULL;
391 thread_exit_requested = FALSE;
392
393 /* Report versions. */
394 eciVersion(version);
395 DBG(DBG_MODNAME "output module version %s, engine version %s", MODULE_VERSION, version);
396
397 /* TODO: according to version, enable SSML and punct by default or not
398 */
399
400 /* Setup TTS engine. */
401 DBG(DBG_MODNAME "Creating an engine instance.");
402 eciHandle = eciNew();
403 if (NULL_ECI_HAND == eciHandle) {
404 DBG(DBG_MODNAME "Could not create an engine instance.\n");
405 *status_info = g_strdup("Could not create an engine instance. "
406 "Is the TTS engine installed?");
407 return MODULE_FATAL_ERROR;
408 }
409
410 update_sample_rate();
411
412 /* Allocate a chunk for ECI to return audio. */
413 audio_chunk =
414 (TEciAudioSamples *) g_malloc((IbmttsAudioChunkSize) *
415 sizeof(TEciAudioSamples));
416
417 DBG(DBG_MODNAME "Registering ECI callback.");
418 eciRegisterCallback(eciHandle, eciCallback, NULL);
419
420 DBG(DBG_MODNAME "Registering an ECI audio buffer.");
421 if (!eciSetOutputBuffer(eciHandle, IbmttsAudioChunkSize, audio_chunk)) {
422 DBG(DBG_MODNAME "Error registering ECI audio buffer.");
423 log_eci_error();
424 }
425
426 eciSetParam(eciHandle, eciDictionary, !IbmttsUseAbbreviation);
427
428 /* enable annotations */
429 eciSetParam(eciHandle, eciInputType, 1);
430
431 /* load possibly the ssml filter */
432 if (IbmttsUseSSML)
433 eciAddText(eciHandle, " `gfa1 ");
434
435 /* load possibly the punctuation filter */
436 eciAddText(eciHandle, " `gfa2 ");
437
438 set_punctuation_mode(msg_settings.punctuation_mode);
439
440 if (!alloc_voice_list()) {
441 DBG(DBG_MODNAME "voice list allocation failed.");
442 *status_info =
443 g_strdup
444 ("The module can't build the list of installed voices.");
445 return MODULE_FATAL_ERROR;
446 }
447
448 DBG(DBG_MODNAME "IbmttsAudioChunkSize = %d", IbmttsAudioChunkSize);
449
450 message = NULL;
451
452 DBG(DBG_MODNAME "Creating playback queue.");
453 if (module_speak_queue_init(IbmttsAudioChunkSize, status_info)) {
454 DBG(DBG_MODNAME "queue initialization failed.");
455 return MODULE_FATAL_ERROR;
456 }
457
458 DBG(DBG_MODNAME "Creating new thread for TTS synthesis.");
459 sem_init(&synth_semaphore, 0, 0);
460
461 ret = pthread_create(&synth_thread, NULL, _synth, NULL);
462 if (0 != ret) {
463 DBG(DBG_MODNAME "synthesis thread creation failed.");
464 *status_info =
465 g_strdup("The module couldn't initialize synthesis thread. "
466 "This could be either an internal problem or an "
467 "architecture problem. If you are sure your architecture "
468 "supports threads, please report a bug.");
469 return MODULE_FATAL_ERROR;
470 }
471
472 *status_info = g_strdup(DBG_MODNAME "Initialized successfully.");
473
474 return MODULE_OK;
475 }
476
module_list_voices(void)477 SPDVoice **module_list_voices(void)
478 {
479 DBG(DBG_MODNAME "ENTER %s", __func__);
480 return speechd_voice;
481 }
482
module_speak(gchar * data,size_t bytes,SPDMessageType msgtype)483 int module_speak(gchar * data, size_t bytes, SPDMessageType msgtype)
484 {
485 DBG(DBG_MODNAME "module_speak().");
486
487 DBG(DBG_MODNAME "Type: %d, bytes: %lu, requested data: |%s|\n", msgtype,
488 (unsigned long)bytes, data);
489
490 g_free(message);
491 message = NULL;
492
493 if (!g_utf8_validate(data, bytes, NULL)) {
494 DBG(DBG_MODNAME "Input is not valid utf-8.");
495 /* Actually, we should just fail here, but let's assume input is latin-1 */
496 message =
497 g_convert(data, bytes, "utf-8", "iso-8859-1", NULL, NULL,
498 NULL);
499 if (message == NULL) {
500 DBG(DBG_MODNAME "Fallback conversion to utf-8 failed.");
501 return FALSE;
502 }
503 } else {
504 message = g_strndup(data, bytes);
505 }
506
507 message_type = msgtype;
508 if ((msgtype == SPD_MSGTYPE_TEXT)
509 && (msg_settings.spelling_mode == SPD_SPELL_ON))
510 message_type = SPD_MSGTYPE_SPELL;
511
512 /* Setting speech parameters. */
513 UPDATE_STRING_PARAMETER(voice.language, set_language);
514 UPDATE_PARAMETER(voice_type, set_voice_type);
515 UPDATE_STRING_PARAMETER(voice.name, set_synthesis_voice);
516 UPDATE_PARAMETER(rate, set_rate);
517 UPDATE_PARAMETER(volume, set_volume);
518 UPDATE_PARAMETER(pitch, set_pitch);
519 UPDATE_PARAMETER(punctuation_mode, set_punctuation_mode);
520 UPDATE_PARAMETER(cap_let_recogn, set_capital_mode);
521
522 if (!IbmttsUseSSML) {
523 /* Strip all SSML */
524 char *tmp = message;
525 message = module_strip_ssml(message);
526 g_free(tmp);
527 /* Convert input to suitable encoding for current language dialect */
528 tmp =
529 g_convert_with_fallback(message, -1,
530 input_encoding, "utf-8", "?",
531 NULL, &bytes, NULL);
532 if (tmp != NULL) {
533 g_free(message);
534 message = tmp;
535 }
536 }
537
538 /* Send semaphore signal to the synthesis thread */
539 sem_post(&synth_semaphore);
540
541 DBG(DBG_MODNAME "Leaving module_speak() normally.");
542 return TRUE;
543 }
544
module_stop(void)545 int module_stop(void)
546 {
547 DBG(DBG_MODNAME "module_stop().");
548
549 module_speak_queue_stop();
550
551 return MODULE_OK;
552 }
553
module_pause(void)554 size_t module_pause(void)
555 {
556 /* The semantics of module_pause() is the same as module_stop()
557 except that processing should continue until the next index mark is
558 reached before stopping.
559 Note that although IBM TTS offers an eciPause function, we cannot
560 make use of it because Speech Dispatcher doesn't have a module_resume
561 function. Instead, Speech Dispatcher resumes by calling module_speak
562 from the last index mark reported in the text. */
563 DBG(DBG_MODNAME "module_pause().");
564
565 pause_requested = TRUE;
566 module_speak_queue_pause();
567
568 return MODULE_OK;
569 }
570
module_close(void)571 int module_close(void)
572 {
573
574 DBG(DBG_MODNAME "close().");
575
576 module_speak_queue_terminate();
577
578 DBG(DBG_MODNAME "Stopping speech");
579 module_stop();
580
581 DBG(DBG_MODNAME "De-registering ECI callback.");
582 eciRegisterCallback(eciHandle, NULL, NULL);
583
584 DBG(DBG_MODNAME "Destroying ECI instance.");
585 eciDelete(eciHandle);
586 eciHandle = NULL_ECI_HAND;
587
588 /* Free buffer for ECI audio. */
589 g_free(audio_chunk);
590
591 /* Request each thread exit and wait until it exits. */
592 DBG(DBG_MODNAME "Terminating threads");
593 thread_exit_requested = TRUE;
594 sem_post(&synth_semaphore);
595 if (0 != pthread_join(synth_thread, NULL))
596 return -1;
597
598 module_speak_queue_free();
599
600 /* Free index mark lookup table. */
601 if (index_mark_ht) {
602 g_hash_table_destroy(index_mark_ht);
603 index_mark_ht = NULL;
604 }
605
606 free_voice_list();
607 sem_destroy(&synth_semaphore);
608
609 return 0;
610 }
611
612 /* Internal functions */
613
update_sample_rate()614 static void update_sample_rate()
615 {
616 // DBG(DBG_MODNAME "ENTER %s", __func__);
617 int sample_rate;
618 /* Get ECI audio sample rate. */
619 sample_rate = eciGetParam(eciHandle, eciSampleRate);
620 switch (sample_rate) {
621 case 0:
622 eci_sample_rate = 8000;
623 break;
624 case 1:
625 eci_sample_rate = 11025;
626 break;
627 case 2:
628 eci_sample_rate = 22050;
629 break;
630 default:
631 DBG(DBG_MODNAME "Invalid audio sample rate returned by ECI = %i",
632 sample_rate);
633 }
634 DBG(DBG_MODNAME "LEAVE %s, eci_sample_rate=%d", __FUNCTION__, eci_sample_rate);
635 }
636
637 /* Given a string containing an index mark in the form
638 <mark name="some_name"/>, returns some_name. Calling routine is
639 responsible for freeing returned string. If an error occurs,
640 returns NULL. */
extract_mark_name(char * mark)641 static char *extract_mark_name(char *mark)
642 {
643 if ((SD_MARK_HEAD_ONLY_LEN + SD_MARK_TAIL_LEN + 1) > strlen(mark))
644 return NULL;
645 mark = mark + SD_MARK_HEAD_ONLY_LEN;
646 char *tail = strstr(mark, SD_MARK_TAIL);
647 if (NULL == tail)
648 return NULL;
649 return (char *)g_strndup(mark, tail - mark);
650 }
651
652 /* Returns the portion of msg up to, but not including, the next index
653 mark, or end of msg if no index mark is found. If msg begins with
654 and index mark, returns the entire index mark clause (<mark name="whatever"/>)
655 and returns the mark name. If msg does not begin with an index mark,
656 mark_name will be NULL. If msg is empty, returns a zero-length string (not NULL).
657 Caller is responsible for freeing both returned string and mark_name (if not NULL). */
658 /* TODO: This routine needs to be more tolerant of custom index marks with spaces. */
659 /* TODO: Should there be a MaxChunkLength? Delimiters? */
next_part(char * msg,char ** mark_name)660 static char *next_part(char *msg, char **mark_name)
661 {
662 char *mark_head = strstr(msg, SD_MARK_HEAD_ONLY);
663 if (NULL == mark_head)
664 return (char *)g_strdup(msg);
665 else if (mark_head == msg) {
666 *mark_name = extract_mark_name(mark_head);
667 if (NULL == *mark_name) {
668 /* ill-formed, ignore the mark */
669 DBG(DBG_MODNAME "Note: ill-formed mark %s", msg);
670 char *tail = strstr(msg + SD_MARK_HEAD_ONLY_LEN, SD_MARK_TAILTAIL);
671 if (!tail) {
672 /* Uh, not even the tail... */
673 return (char *)g_strdup(msg);
674 }
675 tail += strlen(SD_MARK_TAILTAIL);
676 char *remainder = next_part(tail, mark_name);
677 char *ret = g_strdup_printf("%.*s%s",
678 (int) (tail - msg), msg, remainder);
679 g_free(remainder);
680 return ret;
681 }
682 else
683 return (char *)g_strndup(msg,
684 SD_MARK_HEAD_ONLY_LEN +
685 strlen(*mark_name) +
686 SD_MARK_TAIL_LEN);
687 } else
688 return (char *)g_strndup(msg, mark_head - msg);
689 }
690
process_text_mark(char * part,int part_len,char * mark_name)691 static int process_text_mark(char *part, int part_len, char *mark_name)
692 {
693 /* Handle index marks. */
694 if (NULL != mark_name) {
695 /* Assign the mark name an integer number and store in lookup table. */
696 int *markId = (int *)g_malloc(sizeof(int));
697 *markId = 1 + g_hash_table_size(index_mark_ht);
698 g_hash_table_insert(index_mark_ht, markId, mark_name);
699 if (!eciInsertIndex(eciHandle, *markId)) {
700 DBG(DBG_MODNAME "Error sending index mark to synthesizer.");
701 log_eci_error();
702 /* Try to keep going. */
703 } else
704 DBG(DBG_MODNAME "Index mark |%s| (id %i) sent to synthesizer.", mark_name, *markId);
705 /* If pause is requested, skip over rest of message,
706 but synthesize what we have so far. */
707 if (pause_requested) {
708 DBG(DBG_MODNAME "Pause requested in synthesis thread.");
709 return 1;
710 }
711 return 0;
712 }
713
714 /* Handle normal text. */
715 if (part_len > 0) {
716 DBG(DBG_MODNAME "Returned %d bytes from get_part.", part_len);
717 DBG(DBG_MODNAME "Text to synthesize is |%s|", part);
718 DBG(DBG_MODNAME "Sending text to synthesizer.");
719 if (!eciAddText(eciHandle, part)) {
720 DBG(DBG_MODNAME "Error sending text.");
721 log_eci_error();
722 return 2;
723 }
724 return 0;
725 }
726
727 /* Handle end of text. */
728 DBG(DBG_MODNAME "End of data in synthesis thread.");
729 /*
730 Add index mark for end of message.
731 This also makes sure the callback gets called at least once
732 */
733 eciInsertIndex(eciHandle, MSG_END_MARK);
734 DBG(DBG_MODNAME "Trying to synthesize text.");
735 if (!eciSynthesize(eciHandle)) {
736 DBG(DBG_MODNAME "Error synthesizing.");
737 log_eci_error();
738 return 2;;
739 }
740
741 /* Audio and index marks are returned in eciCallback(). */
742 DBG(DBG_MODNAME "Waiting for synthesis to complete.");
743 if (!eciSynchronize(eciHandle)) {
744 DBG(DBG_MODNAME "Error waiting for synthesis to complete.");
745 log_eci_error();
746 return 2;
747 }
748 DBG(DBG_MODNAME "Synthesis complete.");
749 return 3;
750 }
751
752 /* Synthesis thread. */
_synth(void * nothing)753 static void *_synth(void *nothing)
754 {
755 char *pos = NULL;
756 char *part = NULL;
757 int part_skip_end, part_len;
758 int ret;
759
760 DBG(DBG_MODNAME "Synthesis thread starting.......\n");
761
762 /* Block all signals to this thread. */
763 set_speaking_thread_parameters();
764
765 /* Allocate a place for index mark names to be placed. */
766 char *mark_name = NULL;
767
768 while (!thread_exit_requested) {
769 sem_wait(&synth_semaphore);
770 if (thread_exit_requested)
771 break;
772 DBG(DBG_MODNAME "Synthesis semaphore on.");
773
774 /* This table assigns each index mark name an integer id for fast lookup when
775 ECI returns the integer index mark event. */
776 if (index_mark_ht)
777 g_hash_table_destroy(index_mark_ht);
778 index_mark_ht =
779 g_hash_table_new_full(g_int_hash, g_int_equal, g_free,
780 g_free);
781
782 pos = message;
783 load_user_dictionary();
784
785 module_speak_queue_before_synth();
786
787 switch (message_type) {
788 case SPD_MSGTYPE_TEXT:
789 eciSetParam(eciHandle, eciTextMode, eciTextModeDefault);
790 break;
791 case SPD_MSGTYPE_SOUND_ICON:
792 /* IBM TTS does not support sound icons.
793 If we can find a sound icon file, play that,
794 otherwise speak the name of the sound icon. */
795 part = search_for_sound_icon(message);
796 if (NULL != part) {
797 add_sound_icon_to_playback_queue(part);
798 continue;
799 } else
800 eciSetParam(eciHandle, eciTextMode,
801 eciTextModeDefault);
802 break;
803 case SPD_MSGTYPE_CHAR:
804 eciSetParam(eciHandle, eciTextMode,
805 eciTextModeAllSpell);
806 break;
807 case SPD_MSGTYPE_KEY:
808 /* TODO: make sure all SSIP cases are supported */
809 /* Map unspeakable keys to speakable words. */
810 DBG(DBG_MODNAME "Key from Speech Dispatcher: |%s|", pos);
811 pos = subst_keys(pos);
812 DBG(DBG_MODNAME "Key to speak: |%s|", pos);
813 g_free(message);
814 message = pos;
815 eciSetParam(eciHandle, eciTextMode, eciTextModeDefault);
816 break;
817 case SPD_MSGTYPE_SPELL:
818 if (SPD_PUNCT_NONE != msg_settings.punctuation_mode)
819 eciSetParam(eciHandle, eciTextMode,
820 eciTextModeAllSpell);
821 else
822 eciSetParam(eciHandle, eciTextMode,
823 eciTextModeAlphaSpell);
824 break;
825 }
826
827 module_speak_queue_before_play();
828
829 if (!IbmttsUseSSML)
830 {
831 process_text_mark(pos, strlen(pos), NULL);
832 process_text_mark(NULL, 0, NULL);
833 continue;
834 }
835
836 while (TRUE) {
837 if (module_speak_queue_stop_requested()) {
838 DBG(DBG_MODNAME "Stop in synthesis thread, terminating.");
839 break;
840 }
841
842 /* TODO: How to map these msg_settings to ibm tts?
843 ESpellMode spelling_mode;
844 SPELLING_ON already handled in module_speak()
845 ECapLetRecogn cap_let_recogn;
846 RECOGN_NONE = 0,
847 RECOGN_SPELL = 1,
848 RECOGN_ICON = 2
849 */
850
851 if (!strncmp(pos, SD_SPEAK, strlen(SD_SPEAK))) {
852 DBG(DBG_MODNAME "Drop heading "SD_SPEAK".");
853 pos += strlen(SD_SPEAK);
854 }
855
856 part = next_part(pos, &mark_name);
857 if (NULL == part) {
858 DBG(DBG_MODNAME "Error getting next part of message.");
859 /* TODO: What to do here? */
860 break;
861 }
862 part_len = strlen(part);
863 if (part_len >= strlen(SD_ENDSPEAK) &&
864 !strncmp(part + part_len - strlen(SD_ENDSPEAK),
865 SD_ENDSPEAK, strlen(SD_ENDSPEAK))) {
866 DBG(DBG_MODNAME "Drop trailing "SD_ENDSPEAK".");
867 part_skip_end = strlen(SD_ENDSPEAK);
868 part[part_len - part_skip_end] = 0;
869 } else {
870 part_skip_end = 0;
871 }
872 pos += part_len;
873 ret = process_text_mark(part,
874 part_len - part_skip_end,
875 mark_name);
876 g_free(part);
877 part = NULL;
878 mark_name = NULL;
879 if (ret == 1)
880 pos += strlen(pos);
881 else if (ret > 1)
882 break;
883 }
884 }
885
886 DBG(DBG_MODNAME "Synthesis thread ended.......\n");
887
888 pthread_exit(NULL);
889 }
890
set_rate(signed int rate)891 static void set_rate(signed int rate)
892 {
893 DBG(DBG_MODNAME "ENTER %s", __func__);
894 /* Setting rate to midpoint is too fast. An eci value of 50 is "normal".
895 See chart on pg 38 of the ECI manual. */
896 assert(rate >= -100 && rate <= +100);
897 int speed;
898 /* Possible ECI range is 0 to 250. */
899 /* Map rate -100 to 100 onto speed 0 to 140. */
900 if (rate < 0)
901 /* Map -100 to 0 onto 0 to voice_speed */
902 speed = ((float)(rate + 100) * voice_speed) / (float)100;
903 else
904 /* Map 0 to 100 onto voice_speed to 140 */
905 speed =
906 (((float)rate * (140 - voice_speed)) / (float)100)
907 + voice_speed;
908 assert(speed >= 0 && speed <= 140);
909 int ret = eciSetVoiceParam(eciHandle, 0, eciSpeed, speed);
910 if (-1 == ret) {
911 DBG(DBG_MODNAME "Error setting rate %i.", speed);
912 log_eci_error();
913 } else
914 DBG(DBG_MODNAME "Rate set to %i.", speed);
915 }
916
set_volume(signed int volume)917 static void set_volume(signed int volume)
918 {
919 DBG(DBG_MODNAME "ENTER %s", __func__);
920 /* Setting volume to midpoint makes speech too soft. An eci value
921 of 90 to 100 is "normal".
922 See chart on pg 38 of the ECI manual.
923 TODO: Rather than setting volume in the synth, maybe control volume on playback? */
924 assert(volume >= -100 && volume <= +100);
925 int vol;
926 /* Possible ECI range is 0 to 100. */
927 if (volume < 0)
928 /* Map -100 to 0 onto 0 to 90 */
929 vol = (((float)volume + 100) * 90) / (float)100;
930 else
931 /* Map 0 to 100 onto 90 to 100 */
932 vol = ((float)(volume * 10) / (float)100) + 90;
933 assert(vol >= 0 && vol <= 100);
934 int ret = eciSetVoiceParam(eciHandle, 0, eciVolume, vol);
935 if (-1 == ret) {
936 DBG(DBG_MODNAME "Error setting volume %i.", vol);
937 log_eci_error();
938 } else
939 DBG(DBG_MODNAME "Volume set to %i.", vol);
940 }
941
set_pitch(signed int pitch)942 static void set_pitch(signed int pitch)
943 {
944 DBG(DBG_MODNAME "ENTER %s", __func__);
945 /* Setting pitch to midpoint is to low. eci values between 65 and 89
946 are "normal".
947 See chart on pg 38 of the ECI manual. */
948 assert(pitch >= -100 && pitch <= +100);
949 int pitchBaseline;
950 /* Possible range 0 to 100. */
951 if (pitch < 0)
952 /* Map -100 to 0 onto 0 to voice_pitch_baseline */
953 pitchBaseline =
954 ((float)(pitch + 100) * voice_pitch_baseline) /
955 (float)100;
956 else
957 /* Map 0 to 100 onto voice_pitch_baseline to 100 */
958 pitchBaseline =
959 (((float)pitch * (100 - voice_pitch_baseline)) /
960 (float)100)
961 + voice_pitch_baseline;
962 assert(pitchBaseline >= 0 && pitchBaseline <= 100);
963 int ret =
964 eciSetVoiceParam(eciHandle, 0, eciPitchBaseline, pitchBaseline);
965 if (-1 == ret) {
966 DBG(DBG_MODNAME "Error setting pitch %i.", pitchBaseline);
967 log_eci_error();
968 } else
969 DBG(DBG_MODNAME "Pitch set to %i.", pitchBaseline);
970 }
971
set_punctuation_mode(SPDPunctuation punct_mode)972 static void set_punctuation_mode(SPDPunctuation punct_mode)
973 {
974 DBG(DBG_MODNAME "ENTER %s", __func__);
975 const char *fmt = " `Pf%d%s ";
976 char *msg = NULL;
977 int real_punct_mode = 0;
978
979 switch (punct_mode) {
980 case SPD_PUNCT_NONE:
981 real_punct_mode = 0;
982 break;
983 case SPD_PUNCT_SOME:
984 real_punct_mode = 2;
985 break;
986 case SPD_PUNCT_MOST:
987 /* XXX approximation */
988 real_punct_mode = 2;
989 break;
990 case SPD_PUNCT_ALL:
991 real_punct_mode = 1;
992 break;
993 }
994
995 msg = g_strdup_printf(fmt, real_punct_mode, IbmttsPunctuationList);
996 eciAddText(eciHandle, msg);
997 g_free(msg);
998 }
999
1000 #ifdef VOXIN
set_capital_mode(SPDCapitalLetters cap_mode)1001 static void set_capital_mode(SPDCapitalLetters cap_mode)
1002 {
1003 DBG(DBG_MODNAME "ENTER %s", __func__);
1004 voxCapitalMode mode = voxCapitalNone;
1005
1006 switch (cap_mode) {
1007 case SPD_CAP_NONE:
1008 mode = voxCapitalNone;
1009 break;
1010 case SPD_CAP_SPELL:
1011 mode = voxCapitalSpell;
1012 break;
1013 case SPD_CAP_ICON:
1014 mode = voxCapitalSoundIcon;
1015 break;
1016 }
1017
1018 voxSetParam(eciHandle, VOX_CAPITALS, mode);
1019 }
1020 #else
set_capital_mode(SPDCapitalLetters cap_mode)1021 static void set_capital_mode(SPDCapitalLetters cap_mode){}
1022 #endif
1023
voice_enum_to_str(SPDVoiceType voice_type)1024 static char *voice_enum_to_str(SPDVoiceType voice_type)
1025 {
1026 DBG(DBG_MODNAME "ENTER %s", __func__);
1027 /* TODO: Would be better to move this to module_utils.c. */
1028 char *voicename;
1029 switch (voice_type) {
1030 case SPD_MALE1:
1031 voicename = g_strdup("male1");
1032 break;
1033 case SPD_MALE2:
1034 voicename = g_strdup("male2");
1035 break;
1036 case SPD_MALE3:
1037 voicename = g_strdup("male3");
1038 break;
1039 case SPD_FEMALE1:
1040 voicename = g_strdup("female1");
1041 break;
1042 case SPD_FEMALE2:
1043 voicename = g_strdup("female2");
1044 break;
1045 case SPD_FEMALE3:
1046 voicename = g_strdup("female3");
1047 break;
1048 case SPD_CHILD_MALE:
1049 voicename = g_strdup("child_male");
1050 break;
1051 case SPD_CHILD_FEMALE:
1052 voicename = g_strdup("child_female");
1053 break;
1054 default:
1055 voicename = g_strdup("no voice");
1056 break;
1057 }
1058 return voicename;
1059 }
1060
1061 /** Set voice parameters (if any are defined for this voice) */
set_voice_parameters(SPDVoiceType voice_type)1062 static void set_voice_parameters(SPDVoiceType voice_type)
1063 {
1064 char *voicename = voice_enum_to_str(voice_type);
1065 int eciVoice;
1066 int ret = -1;
1067
1068 TIbmttsVoiceParameters *params = g_hash_table_lookup(IbmttsVoiceParameters, voicename);
1069 if (NULL == params) {
1070 DBG(DBG_MODNAME "Setting default VoiceParameters for voice %s", voicename);
1071
1072 switch (voice_type) {
1073 case SPD_MALE1:
1074 eciVoice = 1;
1075 break; /* Adult Male 1 */
1076 case SPD_MALE2:
1077 eciVoice = 4;
1078 break; /* Adult Male 2 */
1079 case SPD_MALE3:
1080 eciVoice = 5;
1081 break; /* Adult Male 3 */
1082 case SPD_FEMALE1:
1083 eciVoice = 2;
1084 break; /* Adult Female 1 */
1085 case SPD_FEMALE2:
1086 eciVoice = 6;
1087 break; /* Adult Female 2 */
1088 case SPD_FEMALE3:
1089 eciVoice = 7;
1090 break; /* Elderly Female 1 */
1091 case SPD_CHILD_MALE:
1092 case SPD_CHILD_FEMALE:
1093 eciVoice = 3;
1094 break; /* Child */
1095 default:
1096 eciVoice = 1;
1097 break; /* Adult Male 1 */
1098 }
1099 ret = eciCopyVoice(eciHandle, eciVoice, 0);
1100 if (-1 == ret)
1101 DBG(DBG_MODNAME "ERROR: Setting default voice parameters (voice %i).", eciVoice);
1102 } else {
1103 DBG(DBG_MODNAME "Setting custom VoiceParameters for voice %s", voicename);
1104
1105 ret = eciSetVoiceParam(eciHandle, 0, eciGender, params->gender);
1106 if (-1 == ret)
1107 DBG(DBG_MODNAME "ERROR: Setting gender %i", params->gender);
1108
1109 ret = eciSetVoiceParam(eciHandle, 0, eciBreathiness, params->breathiness);
1110 if (-1 == ret)
1111 DBG(DBG_MODNAME "ERROR: Setting breathiness %i", params->breathiness);
1112
1113 ret = eciSetVoiceParam(eciHandle, 0, eciHeadSize, params->head_size);
1114 if (-1 == ret)
1115 DBG(DBG_MODNAME "ERROR: Setting head size %i", params->head_size);
1116
1117 ret = eciSetVoiceParam(eciHandle, 0, eciPitchBaseline, params->pitch_baseline);
1118 if (-1 == ret)
1119 DBG(DBG_MODNAME "ERROR: Setting pitch baseline %i", params->pitch_baseline);
1120
1121 ret = eciSetVoiceParam(eciHandle, 0, eciPitchFluctuation, params->pitch_fluctuation);
1122 if (-1 == ret)
1123 DBG(DBG_MODNAME "ERROR: Setting pitch fluctuation %i", params->pitch_fluctuation);
1124
1125 ret = eciSetVoiceParam(eciHandle, 0, eciRoughness, params->roughness);
1126 if (-1 == ret)
1127 DBG(DBG_MODNAME "ERROR: Setting roughness %i", params->roughness);
1128
1129 ret = eciSetVoiceParam(eciHandle, 0, eciSpeed, params->speed);
1130 if (-1 == ret)
1131 DBG(DBG_MODNAME "ERROR: Setting speed %i", params->speed);
1132 }
1133
1134 g_free(voicename);
1135 }
1136
1137 #ifdef VOXIN
1138 /*
1139 Convert the supplied arguments to the eciLanguageDialect value and
1140 sets the eciLanguageDialect parameter.
1141
1142 The arguments are used in this order:
1143 - find a matching voice name,
1144 - otherwise find the first matching language
1145
1146 EXAMPLES
1147 1. Using Orca 3.30.1:
1148 - lang="en", voice=1, name="zuzana"
1149 name ("zuzana") matches the installed voice Zuzana embedded-compact
1150
1151 - lang="en", voice=1, name="voxin default voice"
1152 name does not match any installed voice.
1153 The first English voice present is returned.
1154
1155
1156 2. Using spd-say (LC_ALL=C)
1157 - lang="c", voice=1, name="nathan-embedded-compact"
1158 name matches the installed voice Nathan embedded-compact
1159
1160 spd-say command:
1161 spd-say -o voxin -y nathan-embedded-compact hello
1162
1163 - lang="en-us", voice=1, name=
1164 The first American English voice present is returned.
1165
1166 spd-say command:
1167 spd-say -o voxin -l en-US hello
1168
1169 */
1170 #else
1171 /* Given a language, dialect and SD voice codes sets the IBM voice */
1172 #endif
set_language_and_voice(char * lang,SPDVoiceType voice_type,char * name)1173 static void set_language_and_voice(char *lang, SPDVoiceType voice_type, char *name)
1174 {
1175 DBG(DBG_MODNAME "ENTER %s", __func__);
1176 int ret = -1;
1177 int i = 0, index = -1;
1178
1179 DBG(DBG_MODNAME "%s, lang=%s, voice_type=%d, name=%s",
1180 __FUNCTION__, lang, (int)voice_type, name ? name : "");
1181
1182 assert(speechd_voice);
1183
1184 if (name && *name) {
1185 for (i = 0; speechd_voice[i]; i++) {
1186 DBG("%d. name=%s", i, speechd_voice[i]->name);
1187 if (!strcasecmp(speechd_voice[i]->name, name)) {
1188 index = voice_index(i);
1189 break;
1190 }
1191 }
1192 }
1193
1194 if ((index == -1) && lang) {
1195 char *langbase; // requested base language + '-'
1196 char *dash = strchr(lang, '-');
1197 if (dash)
1198 langbase = g_strndup(lang, dash-lang+1);
1199 else
1200 langbase = g_strdup_printf("%s-", lang);
1201
1202 for (i = 0; speechd_voice[i]; i++) {
1203 DBG("%d. language=%s", i, speechd_voice[i]->language);
1204 if (!strcasecmp(speechd_voice[i]->language, lang)) {
1205 DBG("strong match!");
1206 index = voice_index(i);
1207 break;
1208 }
1209 if (index == -1) {
1210 /* Try base language matching as fallback */
1211 if (!strncasecmp(speechd_voice[i]->language, langbase, strlen(langbase))) {
1212 DBG("match!");
1213 index = voice_index(i);
1214 }
1215 }
1216 }
1217 g_free(langbase);
1218 }
1219
1220 if (index == -1) { // no matching voice: choose the first available voice
1221 if (!speechd_voice[0])
1222 return;
1223 index = 0;
1224 }
1225
1226 #ifdef VOXIN
1227 ret = eciSetParam(eciHandle, eciLanguageDialect, voices[index].id);
1228 #else
1229 ret = eciSetParam(eciHandle, eciLanguageDialect, eciLocales[index].langID);
1230 #endif
1231 if (ret == -1) {
1232 DBG(DBG_MODNAME "Unable to set language");
1233 log_eci_error();
1234 return;
1235 }
1236
1237 #ifdef VOXIN
1238 DBG(DBG_MODNAME "select speechd_voice[%d]: id=0x%x, name=%s (ret=%d)",
1239 index, voices[index].id, voices[index].name, ret);
1240
1241 input_encoding = voices[index].charset;
1242 #else
1243 DBG(DBG_MODNAME "set langID=0x%x (ret=%d)",
1244 eciLocales[index].langID, ret);
1245
1246 input_encoding = eciLocales[index].charset;
1247 #endif
1248 update_sample_rate();
1249 g_atomic_int_set(&locale_index_atomic, index);
1250
1251 set_voice_parameters(voice_type);
1252
1253 /* Retrieve the baseline pitch and speed of the voice. */
1254 voice_pitch_baseline = eciGetVoiceParam(eciHandle, 0, eciPitchBaseline);
1255 if (-1 == voice_pitch_baseline)
1256 DBG(DBG_MODNAME "Cannot get pitch baseline of voice.");
1257
1258 voice_speed = eciGetVoiceParam(eciHandle, 0, eciSpeed);
1259 if (-1 == voice_speed)
1260 DBG(DBG_MODNAME "Cannot get speed of voice.");
1261 }
1262
set_voice_type(SPDVoiceType voice_type)1263 static void set_voice_type(SPDVoiceType voice_type)
1264 {
1265 DBG(DBG_MODNAME "ENTER %s", __func__);
1266 if (msg_settings.voice.language) {
1267 set_language_and_voice(msg_settings.voice.language, voice_type, msg_settings.voice.name);
1268 }
1269 }
1270
set_language(char * lang)1271 static void set_language(char *lang)
1272 {
1273 DBG(DBG_MODNAME "ENTER %s", __func__);
1274 set_language_and_voice(lang, msg_settings.voice_type, msg_settings.voice.name);
1275 }
1276
1277 /* sets the voice according to its name.
1278
1279 If the voice name is not found, try to select the first available
1280 voice for the current language.
1281 */
set_synthesis_voice(char * synthesis_voice)1282 static void set_synthesis_voice(char *synthesis_voice)
1283 {
1284 if (synthesis_voice == NULL) {
1285 return;
1286 }
1287
1288 DBG(DBG_MODNAME "ENTER %s(%s)", __FUNCTION__, synthesis_voice);
1289
1290 set_language_and_voice(msg_settings.voice.language, msg_settings.voice_type, synthesis_voice);
1291 }
1292
log_eci_error()1293 static void log_eci_error()
1294 {
1295 DBG(DBG_MODNAME "ENTER %s", __func__);
1296 /* TODO: This routine is not working. Not sure why. */
1297 char buf[100];
1298 eciErrorMessage(eciHandle, buf);
1299 DBG(DBG_MODNAME "ECI Error Message: %s", buf);
1300 }
1301
1302 /* The text-to-speech calls back here when a chunk of audio is ready
1303 or an index mark has been reached. The good news is that it
1304 returns the audio up to each index mark or when the audio buffer is
1305 full. */
eciCallback(ECIHand hEngine,enum ECIMessage msg,long lparam,void * data)1306 static enum ECICallbackReturn eciCallback(ECIHand hEngine,
1307 enum ECIMessage msg,
1308 long lparam, void *data)
1309 {
1310 /* This callback is running in the same thread as called eciSynchronize(),
1311 i.e., the _synth() thread. */
1312
1313 /* If module_stop was called, discard any further callbacks until module_speak is called. */
1314 if (module_speak_queue_stop_requested()) {
1315 return eciDataProcessed;
1316 // TODO: try to use eciDataAbort to avoid continuing computing the synth?
1317 }
1318
1319 switch (msg) {
1320 case eciWaveformBuffer:
1321 DBG(DBG_MODNAME "%ld audio samples returned from TTS.", lparam);
1322 /* Add audio to output queue. */
1323 add_audio_to_playback_queue(audio_chunk, lparam);
1324 return eciDataProcessed;
1325
1326 case eciIndexReply:
1327 DBG(DBG_MODNAME "Index mark id %ld returned from TTS.", lparam);
1328 if (lparam == MSG_END_MARK) {
1329 add_end_to_playback_queue();
1330 } else {
1331 /* Add index mark to output queue. */
1332 add_mark_to_playback_queue(lparam);
1333 }
1334 return eciDataProcessed;
1335
1336 default:
1337 return eciDataProcessed;
1338 }
1339 }
1340
1341 /* Adds a chunk of pcm audio to the audio playback queue. */
add_audio_to_playback_queue(TEciAudioSamples * audio_chunk,long num_samples)1342 static gboolean add_audio_to_playback_queue(TEciAudioSamples * audio_chunk, long num_samples)
1343 {
1344 DBG(DBG_MODNAME "ENTER %s", __func__);
1345 AudioTrack track = {
1346 .bits = 16,
1347 .num_channels = 1,
1348 .sample_rate = eci_sample_rate,
1349 .num_samples = num_samples,
1350 .samples = audio_chunk,
1351 };
1352 #if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
1353 AudioFormat format = SPD_AUDIO_BE;
1354 #else
1355 AudioFormat format = SPD_AUDIO_LE;
1356 #endif
1357
1358 return module_speak_queue_add_audio(&track, format);
1359 }
1360
1361 /* Adds an Index Mark to the audio playback queue. */
add_mark_to_playback_queue(long markId)1362 static void add_mark_to_playback_queue(long markId)
1363 {
1364 DBG(DBG_MODNAME "ENTER %s", __func__);
1365 /* Look up the index mark integer id in lookup table to
1366 find string name and emit that name. */
1367 char *mark_name = g_hash_table_lookup(index_mark_ht, &markId);
1368 if (NULL == mark_name) {
1369 DBG(DBG_MODNAME "markId %ld returned by TTS not found in lookup table.", markId);
1370 return;
1371 }
1372 DBG(DBG_MODNAME "reporting index mark |%s|.", mark_name);
1373 module_speak_queue_before_play();
1374 module_speak_queue_add_mark(mark_name);
1375 DBG(DBG_MODNAME "index mark reported.");
1376 }
1377
1378 /* Adds a begin or end flag to the playback queue. */
add_end_to_playback_queue(void)1379 static void add_end_to_playback_queue(void)
1380 {
1381 module_speak_queue_before_play();
1382 module_speak_queue_add_end();
1383 }
1384
1385 /* Try to stop the synth. */
module_speak_queue_cancel(void)1386 void module_speak_queue_cancel(void)
1387 {
1388 /* TODO */
1389 }
1390
1391 /* Add a sound icon to the playback queue. */
add_sound_icon_to_playback_queue(char * filename)1392 static gboolean add_sound_icon_to_playback_queue(char *filename)
1393 {
1394 return module_speak_queue_add_sound_icon(filename);
1395 }
1396
1397 /* Replaces all occurrences of "from" with "to" in msg.
1398 Returns count of replacements. */
replace(char * from,char * to,GString * msg)1399 static int replace(char *from, char *to, GString * msg)
1400 {
1401 DBG(DBG_MODNAME "ENTER %s", __func__);
1402 int count = 0;
1403 int pos;
1404 int from_len = strlen(from);
1405 int to_len = strlen(to);
1406 char *p = msg->str;
1407 while (NULL != (p = strstr(p, from))) {
1408 pos = p - msg->str;
1409 g_string_erase(msg, pos, from_len);
1410 g_string_insert(msg, pos, to);
1411 p = msg->str + pos + to_len;
1412 ++count;
1413 }
1414 return count;
1415 }
1416
subst_keys_cb(gpointer data,gpointer user_data)1417 static void subst_keys_cb(gpointer data, gpointer user_data)
1418 {
1419 DBG(DBG_MODNAME "ENTER %s", __func__);
1420 TIbmttsKeySubstitution *key_subst = data;
1421 GString *msg = user_data;
1422 replace(key_subst->key, key_subst->newkey, msg);
1423 }
1424
1425 /* Given a Speech Dispatcher !KEY key sequence, replaces unspeakable
1426 or incorrectly spoken keys or characters with speakable ones.
1427 The subsitutions come from the KEY NAME SUBSTITUTIONS section of the
1428 config file.
1429 Caller is responsible for freeing returned string. */
subst_keys(char * key)1430 static char *subst_keys(char *key)
1431 {
1432 DBG(DBG_MODNAME "ENTER %s", __func__);
1433 GString *tmp = g_string_sized_new(30);
1434 g_string_append(tmp, key);
1435
1436 GList *keyTable = g_hash_table_lookup(IbmttsKeySubstitution,
1437 msg_settings.voice.language);
1438
1439 if (keyTable)
1440 g_list_foreach(keyTable, subst_keys_cb, tmp);
1441
1442 /* Hyphen hangs IBM TTS */
1443 if (0 == strcmp(tmp->str, "-"))
1444 g_string_assign(tmp, "hyphen");
1445
1446 return g_string_free(tmp, FALSE);
1447 }
1448
1449 /* Given a sound icon name, searches for a file to play and if found
1450 returns the filename. Returns NULL if none found. Caller is responsible
1451 for freeing the returned string. */
1452 /* TODO: These current assumptions should be dealt with:
1453 Sound icon files are in a single directory (IbmttsSoundIconFolder).
1454 The name of each icon is symlinked to a .wav file.
1455 If you have installed the free(b)soft sound-icons package under
1456 Debian, then these assumptions are true, but what about other distros
1457 and OSes? */
search_for_sound_icon(const char * icon_name)1458 static char *search_for_sound_icon(const char *icon_name)
1459 {
1460 DBG(DBG_MODNAME "ENTER %s", __func__);
1461 char *fn = NULL;
1462 if (0 == strlen(IbmttsSoundIconFolder))
1463 return fn;
1464 GString *filename = g_string_new(IbmttsSoundIconFolder);
1465 filename = g_string_append(filename, icon_name);
1466 if (g_file_test(filename->str, G_FILE_TEST_EXISTS))
1467 fn = filename->str;
1468 /*
1469 else {
1470 filename = g_string_assign(filename, g_utf8_strdown(filename->str, -1));
1471 if (g_file_test(filename->str, G_FILE_TEST_EXISTS))
1472 fn = filename->str;
1473 }
1474 */
1475
1476 /*
1477 * if the file was found, the pointer *fn points to the character data
1478 * of the string filename. In this situation the string filename must be
1479 * freed but its character data must be preserved.
1480 * If the file is not found, the pointer *fn contains NULL. In this
1481 * situation the string filename must be freed, including its character
1482 * data.
1483 */
1484 g_string_free(filename, (fn == NULL));
1485 return fn;
1486 }
1487
1488 #ifdef VOXIN
vox_to_spd_voice(vox_t * from,SPDVoice * to)1489 static gboolean vox_to_spd_voice(vox_t *from, SPDVoice *to)
1490 {
1491 DBG(DBG_MODNAME "ENTER %s", __func__);
1492 if (!from
1493 || !to
1494 || to->name || to->language || to->variant
1495 || from->name[sizeof(from->name)-1]
1496 || from->lang[sizeof(from->lang)-1]
1497 || from->variant[sizeof(from->variant)-1]
1498 ) {
1499 DBG(DBG_MODNAME "args error");
1500 return FALSE;
1501 }
1502
1503 { /* set name */
1504 int i;
1505 to->name = *from->quality ?
1506 g_strdup_printf("%s-%s", from->name, from->quality) :
1507 g_strdup(from->name);
1508 for (i=0; to->name[i]; i++) {
1509 to->name[i] = tolower(to->name[i]);
1510 }
1511 }
1512 { /* set language: language identifier (lower case) + variant/dialect (all caps) */
1513 if (*from->variant) {
1514 size_t len = strlen(from->lang);
1515 int i;
1516 to->language = g_strdup_printf("%s-%s", from->lang, from->variant);
1517 for (i=len; to->language[i]; i++) {
1518 to->language[i] = toupper(to->language[i]);
1519 }
1520 } else {
1521 to->language = g_strdup(from->lang);
1522 }
1523 }
1524 to->variant = g_strdup("none");
1525
1526 { /* log the 'from' argument */
1527 size_t size = 0;
1528 if (!voxToString(from, NULL, &size)) {
1529 gchar *str = g_malloc0(size);
1530 if (!voxToString(from, str, &size)) {
1531 DBG(DBG_MODNAME "from: %s", str);
1532 }
1533 g_free(str);
1534 }
1535 }
1536 DBG(DBG_MODNAME "to: name=%s, variant=%s, language=%s", to->name, to->variant, to->language);
1537 return TRUE;
1538 }
1539
alloc_voice_list()1540 static gboolean alloc_voice_list()
1541 {
1542 DBG(DBG_MODNAME "ENTER %s", __func__);
1543 int i = 0;
1544
1545 /* obtain the list of installed voices */
1546 number_of_voices = 0;
1547 if (voxGetVoices(NULL, &number_of_voices) || !number_of_voices) {
1548 return FALSE;
1549 }
1550
1551 voices = g_new0(vox_t, number_of_voices);
1552 if (voxGetVoices(voices, &number_of_voices) || !number_of_voices)
1553 goto exit0;
1554
1555 DBG(DBG_MODNAME "number_of_voices=%u", number_of_voices);
1556
1557 /* build speechd_voice */
1558 speechd_voice = g_new0(SPDVoice*, number_of_voices + 1);
1559 for (i = 0; i < number_of_voices; i++) {
1560 speechd_voice[i] = g_malloc0(sizeof(SPDVoice));
1561 if (!vox_to_spd_voice(voices+i, speechd_voice[i]))
1562 goto exit0;
1563 }
1564 speechd_voice[number_of_voices] = NULL;
1565
1566 for (i = 0; speechd_voice[i]; i++) {
1567 DBG(DBG_MODNAME "speechd_voice[%d]:name=%s, language=%s, variant=%s",
1568 i,
1569 speechd_voice[i]->name ? speechd_voice[i]->name : "null",
1570 speechd_voice[i]->language ? speechd_voice[i]->language : "null",
1571 speechd_voice[i]->variant ? speechd_voice[i]->variant : "null");
1572 }
1573
1574 DBG(DBG_MODNAME "LEAVE %s", __func__);
1575 return TRUE;
1576
1577 exit0:
1578 if (voices) {
1579 g_free(voices);
1580 voices = NULL;
1581 }
1582 free_voice_list();
1583 return FALSE;
1584 }
1585 #else
alloc_voice_list()1586 gboolean alloc_voice_list()
1587 {
1588 enum ECILanguageDialect aLanguage[MAX_NB_OF_LANGUAGES];
1589 int nLanguages = MAX_NB_OF_LANGUAGES;
1590 int i = 0;
1591
1592 if (eciGetAvailableLanguages(aLanguage, &nLanguages))
1593 return FALSE;
1594
1595 speechd_voice = g_malloc((nLanguages + 1) * sizeof(SPDVoice *));
1596 speechd_voice_index = g_malloc((nLanguages + 1) * sizeof(SPDVoice *));
1597 if (!speechd_voice)
1598 return FALSE;
1599
1600 DBG(DBG_MODNAME "nLanguages=%d/%lu", nLanguages, (unsigned long)MAX_NB_OF_LANGUAGES);
1601 for (i = 0; i < nLanguages; i++) {
1602 /* look for the language name */
1603 int j;
1604 speechd_voice[i] = g_malloc(sizeof(SPDVoice));
1605
1606 DBG(DBG_MODNAME "aLanguage[%d]=0x%08x", i, aLanguage[i]);
1607 for (j = 0; j < MAX_NB_OF_LANGUAGES; j++) {
1608 DBG(DBG_MODNAME "eciLocales[%d].langID=0x%08x", j,
1609 eciLocales[j].langID);
1610 if (eciLocales[j].langID == aLanguage[i]) {
1611 speechd_voice[i]->name = eciLocales[j].name;
1612 speechd_voice[i]->language =
1613 eciLocales[j].lang;
1614 speechd_voice[i]->variant =
1615 eciLocales[j].variant;
1616 speechd_voice_index[i] = j;
1617 DBG(DBG_MODNAME "alloc_voice_list %s",
1618 speechd_voice[i]->name);
1619 break;
1620 }
1621 }
1622 assert(j < MAX_NB_OF_LANGUAGES);
1623 }
1624 speechd_voice[nLanguages] = NULL;
1625 DBG(DBG_MODNAME "LEAVE %s", __func__);
1626
1627 return TRUE;
1628 }
1629 #endif
1630
free_voice_list()1631 static void free_voice_list()
1632 {
1633 DBG(DBG_MODNAME "ENTER %s", __func__);
1634 int i = 0;
1635
1636 #ifndef VOXIN
1637 if (speechd_voice_index) {
1638 g_free(speechd_voice_index);
1639 speechd_voice_index = NULL;
1640 }
1641 #endif
1642
1643 if (!speechd_voice)
1644 return;
1645
1646 for (i = 0; speechd_voice[i]; i++) {
1647 #ifdef VOXIN
1648 if (speechd_voice[i]->name) {
1649 g_free(speechd_voice[i]->name);
1650 speechd_voice[i]->name = NULL;
1651 }
1652 if (speechd_voice[i]->language) {
1653 g_free(speechd_voice[i]->language);
1654 speechd_voice[i]->language = NULL;
1655 }
1656 if (speechd_voice[i]->variant) {
1657 g_free(speechd_voice[i]->variant);
1658 speechd_voice[i]->variant = NULL;
1659 }
1660 #endif
1661 g_free(speechd_voice[i]);
1662 speechd_voice[i] = NULL;
1663 }
1664
1665 g_free(speechd_voice);
1666 speechd_voice = NULL;
1667 }
1668
load_user_dictionary()1669 static void load_user_dictionary()
1670 {
1671 DBG(DBG_MODNAME "ENTER %s", __func__);
1672 GString *dirname = NULL;
1673 GString *filename = NULL;
1674 int i = 0;
1675 int dictionary_is_present = 0;
1676 static guint old_index = G_MAXUINT;
1677 guint new_index;
1678 char *language = NULL;
1679 #ifdef VOXIN
1680 char *region = NULL;
1681 #else
1682 char *dash;
1683 #endif
1684 ECIDictHand eciDict = eciGetDict(eciHandle);
1685
1686 new_index = g_atomic_int_get(&locale_index_atomic);
1687 if (new_index >= MAX_NB_OF_LANGUAGES) {
1688 DBG(DBG_MODNAME "%s, unexpected index (0x%x)", __FUNCTION__,
1689 new_index);
1690 return;
1691 }
1692
1693 if (old_index == new_index) {
1694 DBG(DBG_MODNAME "LEAVE %s, no change", __FUNCTION__);
1695 return;
1696 }
1697
1698 #ifdef VOXIN
1699 language = g_strdup(voices[new_index].lang);
1700 region = voices[new_index].variant;
1701 #else
1702 language = g_strdup(eciLocales[new_index].lang);
1703 dash = strchr(language, '-');
1704 if (dash)
1705 *dash = '_';
1706 #endif
1707
1708 if (eciDict) {
1709 DBG(DBG_MODNAME "delete old dictionary");
1710 eciDeleteDict(eciHandle, eciDict);
1711 }
1712 eciDict = eciNewDict(eciHandle);
1713 if (eciDict) {
1714 old_index = new_index;
1715 } else {
1716 old_index = MAX_NB_OF_LANGUAGES;
1717 DBG(DBG_MODNAME "can't create new dictionary");
1718 g_free(language);
1719 return;
1720 }
1721
1722 /* Look for the dictionary directory */
1723 dirname = g_string_new(NULL);
1724 #ifdef VOXIN
1725 g_string_printf(dirname, "%s/%s_%s", IbmttsDictionaryFolder, language,
1726 region);
1727 if (!g_file_test(dirname->str, G_FILE_TEST_IS_DIR)) {
1728 DBG(DBG_MODNAME "%s is not a directory",
1729 dirname->str);
1730 g_string_printf(dirname, "%s/%s", IbmttsDictionaryFolder,
1731 language);
1732 #else
1733 g_string_printf(dirname, "%s/%s", IbmttsDictionaryFolder, language);
1734 if (!g_file_test(dirname->str, G_FILE_TEST_IS_DIR) && dash) {
1735 *dash = 0;
1736 g_string_printf(dirname, "%s/%s", IbmttsDictionaryFolder, language);
1737 #endif
1738 if (!g_file_test(dirname->str, G_FILE_TEST_IS_DIR)) {
1739 g_string_printf(dirname, "%s", IbmttsDictionaryFolder);
1740 if (!g_file_test(dirname->str, G_FILE_TEST_IS_DIR)) {
1741 DBG(DBG_MODNAME "%s is not a directory",
1742 dirname->str);
1743 g_free(language);
1744 return;
1745 }
1746 }
1747 }
1748 g_free(language);
1749
1750 DBG(DBG_MODNAME "Looking in dictionary directory %s", dirname->str);
1751 filename = g_string_new(NULL);
1752
1753 for (i = 0; i < NB_OF_DICTIONARY_FILENAMES; i++) {
1754 g_string_printf(filename, "%s/%s", dirname->str,
1755 dictionary_filenames[i]);
1756 if (g_file_test(filename->str, G_FILE_TEST_EXISTS)) {
1757 enum ECIDictError error =
1758 eciLoadDict(eciHandle, eciDict, i, filename->str);
1759 if (!error) {
1760 dictionary_is_present = 1;
1761 DBG(DBG_MODNAME "%s dictionary loaded",
1762 filename->str);
1763 } else {
1764 DBG(DBG_MODNAME "Can't load %s dictionary (%d)",
1765 filename->str, error);
1766 }
1767 } else {
1768 DBG(DBG_MODNAME "No %s dictionary", filename->str);
1769 }
1770 }
1771
1772 g_string_free(filename, TRUE);
1773 g_string_free(dirname, TRUE);
1774
1775 if (dictionary_is_present) {
1776 eciSetDict(eciHandle, eciDict);
1777 }
1778 }
1779 /* local variables: */
1780 /* c-basic-offset: 8 */
1781 /* end: */
1782