/* * Copyright (C) 2005 to 2013 by Jonathan Duddington * email: jonsd@users.sourceforge.net * Copyright (C) 2013-2017 Reece H. Dunn * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see: . */ #include "melder.h" #include "config.h" #include #include #include #include //#include #include #include #include #include #include #include #include #include #ifdef HAVE_PCAUDIOLIB_AUDIO_H #include #endif #if defined(_WIN32) || defined(_WIN64) #include #include #include #include #endif #include "espeak_ng.h" #include "speak_lib.h" #include "encoding.h" #include "speech.h" #include "synthesize.h" #include "translate.h" #include "espeak_command.h" #include "fifo.h" unsigned char *outbuf = NULL; int outbuf_size = 0; espeak_EVENT *event_list = NULL; int event_list_ix = 0; int n_event_list; long count_samples; #ifdef HAVE_PCAUDIOLIB_AUDIO_H struct audio_object *my_audio = NULL; #endif static const char *option_device = NULL; static unsigned int my_unique_identifier = 0; static void *my_user_data = NULL; static espeak_ng_OUTPUT_MODE my_mode = ENOUTPUT_MODE_SYNCHRONOUS; static int out_samplerate = 0; static int voice_samplerate = 22050; static espeak_ng_STATUS err = ENS_OK; t_espeak_callback *synth_callback = NULL; int (*uri_callback)(int, const char *, const char *) = NULL; int (*phoneme_callback)(const char *) = NULL; char path_home[N_PATH_HOME]; // this is the espeak-ng-data directory extern int saved_parameters[N_SPEECH_PARAM]; // Parameters saved on synthesis start void cancel_audio(void) { #ifdef HAVE_PCAUDIOLIB_AUDIO_H if ((my_mode & ENOUTPUT_MODE_SPEAK_AUDIO) == ENOUTPUT_MODE_SPEAK_AUDIO) { audio_object_flush(my_audio); } #endif } static int dispatch_audio(short *outbuffer, int length, espeak_EVENT *event) { int a_wave_can_be_played = 1; #ifdef USE_ASYNC if ((my_mode & ENOUTPUT_MODE_SYNCHRONOUS) == 0) a_wave_can_be_played = fifo_is_command_enabled(); #endif switch ((int)my_mode) { case ENOUTPUT_MODE_SPEAK_AUDIO: case ENOUTPUT_MODE_SPEAK_AUDIO | ENOUTPUT_MODE_SYNCHRONOUS: { int event_type = 0; if (event) event_type = event->type; if (event_type == espeakEVENT_SAMPLERATE) { voice_samplerate = event->id.number; if (out_samplerate != voice_samplerate) { #ifdef HAVE_PCAUDIOLIB_AUDIO_H if (out_samplerate != 0) { // sound was previously open with a different sample rate audio_object_close(my_audio); #ifdef HAVE_SLEEP sleep(1); #endif } #endif out_samplerate = voice_samplerate; #ifdef HAVE_PCAUDIOLIB_AUDIO_H int error = audio_object_open(my_audio, AUDIO_OBJECT_FORMAT_S16LE, voice_samplerate, 1); if (error != 0) { fprintf(stderr, "error: %s\n", audio_object_strerror(my_audio, error)); err = ENS_AUDIO_ERROR; return -1; } #endif #ifdef USE_ASYNC if ((my_mode & ENOUTPUT_MODE_SYNCHRONOUS) == 0) event_init(); #endif } } #ifdef HAVE_PCAUDIOLIB_AUDIO_H if (outbuffer && length && a_wave_can_be_played) { int error = audio_object_write(my_audio, (char *)outbuffer, 2*length); if (error != 0) fprintf(stderr, "error: %s\n", audio_object_strerror(my_audio, error)); } #endif #ifdef USE_ASYNC while (event && a_wave_can_be_played) { // TBD: some event are filtered here but some insight might be given // TBD: in synthesise.cpp for avoiding to create WORDs with size=0. // TBD: For example sentence "or ALT)." returns three words // "or", "ALT" and "". // TBD: the last one has its size=0. if ((event->type == espeakEVENT_WORD) && (event->length == 0)) break; if ((my_mode & ENOUTPUT_MODE_SYNCHRONOUS) == 0) { err = event_declare(event); if (err != ENS_EVENT_BUFFER_FULL) break; usleep(10000); a_wave_can_be_played = fifo_is_command_enabled(); } else break; } #endif } break; case 0: if (synth_callback) synth_callback(outbuffer, length, event); break; } return a_wave_can_be_played == 0; // 1 = stop synthesis, -1 = error } static int create_events(short *outbuffer, int length, espeak_EVENT *eventlist) { int finished; int i = 0; // The audio data are written to the output device. // The list of events in event_list (index: event_list_ix) is read: // Each event is declared to the "event" object which stores them internally. // The event object is responsible of calling the external callback // as soon as the relevant audio sample is played. do { // for each event espeak_EVENT *event; if (event_list_ix == 0) event = NULL; else event = eventlist + i; finished = dispatch_audio((short *)outbuffer, length, event); length = 0; // the wave data are played once. i++; } while ((i < event_list_ix) && !finished); return finished; } #ifdef USE_ASYNC int sync_espeak_terminated_msg(uint32_t unique_identifier, void *user_data) { int finished = 0; memset(event_list, 0, 2*sizeof(espeak_EVENT)); event_list[0].type = espeakEVENT_MSG_TERMINATED; event_list[0].unique_identifier = unique_identifier; event_list[0].user_data = user_data; event_list[1].type = espeakEVENT_LIST_TERMINATED; event_list[1].unique_identifier = unique_identifier; event_list[1].user_data = user_data; if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) { while (1) { err = event_declare(event_list); if (err != ENS_EVENT_BUFFER_FULL) break; usleep(10000); } } else if (synth_callback) finished = synth_callback(NULL, 0, event_list); return finished; } #endif static int check_data_path(const char *path, int allow_directory) { if (!path) return 0; snprintf(path_home, sizeof(path_home), "%s/espeak-ng-data", path); if (GetFileLength(path_home) == -EISDIR) return 1; if (!allow_directory) return 0; snprintf(path_home, sizeof(path_home), "%s", path); return GetFileLength(path_home) == -EISDIR; } #pragma GCC visibility push(default) ESPEAK_NG_API espeak_ng_STATUS espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode, int buffer_length, const char *device) { option_device = device; my_mode = output_mode; out_samplerate = 0; #ifdef HAVE_PCAUDIOLIB_AUDIO_H if (my_audio == NULL) my_audio = create_audio_device_object(device, "eSpeak", "Text-to-Speech"); #endif // buffer_length is in mS, allocate 2 bytes per sample if (buffer_length == 0) buffer_length = 60; outbuf_size = (buffer_length * samplerate)/500; out_start = (unsigned char *)realloc(outbuf, outbuf_size); if (out_start == NULL) return static_cast (ENOMEM); else outbuf = out_start; // allocate space for event list. Allow 200 events per second. // Add a constant to allow for very small buffer_length n_event_list = (buffer_length*200)/1000 + 20; espeak_EVENT *new_event_list = (espeak_EVENT *)realloc(event_list, sizeof(espeak_EVENT) * n_event_list); if (new_event_list == NULL) return static_cast (ENOMEM); event_list = new_event_list; return ENS_OK; } #if ! DATA_FROM_SOURCECODE_FILES int GetFileLength(const char *filename) { struct stat statbuf; if (stat(filename, &statbuf) != 0) return -errno; if (S_ISDIR(statbuf.st_mode)) return -EISDIR; return statbuf.st_size; } #endif ESPEAK_NG_API void espeak_ng_InitializePath(const char *path) { if (check_data_path(path, 1)) return; #ifdef PLATFORM_WINDOWS HKEY RegKey; unsigned long size; unsigned long var_type; unsigned char buf[sizeof(path_home)-13]; if (check_data_path(getenv("ESPEAK_DATA_PATH"), 1)) return; buf[0] = 0; RegOpenKeyExA(HKEY_LOCAL_MACHINE, "Software\\eSpeak NG", 0, KEY_READ, &RegKey); if (RegKey == NULL) RegOpenKeyExA(HKEY_LOCAL_MACHINE, "Software\\WOW6432Node\\eSpeak NG", 0, KEY_READ, &RegKey); size = sizeof(buf); var_type = REG_SZ; RegQueryValueExA(RegKey, "Path", 0, &var_type, buf, &size); if (check_data_path(buf, 1)) return; #elif !defined(PLATFORM_DOS) if (check_data_path(getenv("ESPEAK_DATA_PATH"), 1)) return; if (check_data_path(getenv("HOME"), 0)) return; #endif strcpy(path_home, PATH_ESPEAK_DATA); } ESPEAK_NG_API espeak_ng_STATUS espeak_ng_Initialize(espeak_ng_ERROR_CONTEXT *context) { int param; int srate = 22050; // default sample rate 22050 Hz // It seems that the wctype functions don't work until the locale has been set // to something other than the default "C". Then, not only Latin1 but also the // other characters give the correct results with iswalpha() etc. if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) { if (setlocale(LC_CTYPE, "UTF-8") == NULL) { if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) setlocale(LC_CTYPE, ""); } } fprintf (stderr, "Locale: %s\n", setlocale (LC_ALL, nullptr)); espeak_ng_STATUS result = LoadPhData(&srate, context); if (result != ENS_OK) return result; WavegenInit(srate, 0); LoadConfig(); memset(¤t_voice_selected, 0, sizeof(current_voice_selected)); SetVoiceStack(NULL, ""); SynthesizeInit(); InitNamedata(); VoiceReset(0); for (param = 0; param < N_SPEECH_PARAM; param++) param_stack[0].parameter[param] = saved_parameters[param] = param_defaults[param]; SetParameter(espeakRATE, 175, 0); SetParameter(espeakVOLUME, 100, 0); SetParameter(espeakCAPITALS, option_capitals, 0); SetParameter(espeakPUNCTUATION, option_punctuation, 0); SetParameter(espeakWORDGAP, 0, 0); #ifdef USE_ASYNC fifo_init(); #endif option_phonemes = 0; option_phoneme_events = 0; return ENS_OK; } ESPEAK_NG_API int espeak_ng_GetSampleRate(void) { return samplerate; } #pragma GCC visibility pop static espeak_ng_STATUS Synthesize(unsigned int unique_identifier, const void *text, int flags) { // Fill the buffer with output sound int length; int finished = 0; int count_buffers = 0; if ((outbuf == NULL) || (event_list == NULL)) return ENS_NOT_INITIALIZED; option_ssml = flags & espeakSSML; option_phoneme_input = flags & espeakPHONEMES; option_endpause = flags & espeakENDPAUSE; count_samples = 0; espeak_ng_STATUS status; if (translator == NULL) { status = espeak_ng_SetVoiceByName("en"); if (status != ENS_OK) return status; } if (p_decoder == NULL) p_decoder = create_text_decoder(); status = text_decoder_decode_string_multibyte(p_decoder, text, translator->encoding, flags); if (status != ENS_OK) return status; SpeakNextClause(0); for (;;) { out_ptr = outbuf; out_end = &outbuf[outbuf_size]; event_list_ix = 0; WavegenFill(); length = (out_ptr - outbuf)/2; count_samples += length; event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list event_list[event_list_ix].unique_identifier = unique_identifier; event_list[event_list_ix].user_data = my_user_data; count_buffers++; if ((my_mode & ENOUTPUT_MODE_SPEAK_AUDIO) == ENOUTPUT_MODE_SPEAK_AUDIO) { finished = create_events((short *)outbuf, length, event_list); if (finished < 0) return ENS_AUDIO_ERROR; } else if (synth_callback) finished = synth_callback((short *)outbuf, length, event_list); if (finished) { SpeakNextClause(2); // stop return ENS_SPEECH_STOPPED; } if (Generate(phoneme_list, &n_phoneme_list, 1) == 0) { if (WcmdqUsed() == 0) { // don't process the next clause until the previous clause has finished generating speech. // This ensures that