/*
* Copyright (C) 2005 to 2013 by Jonathan Duddington
* email: jonsd@users.sourceforge.net
* Copyright (C) 2013-2017 Reece H. Dunn
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see: .
*/
#include "melder.h"
#include "config.h"
#include
#include
#include
#include
//#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
#include
#endif
#if defined(_WIN32) || defined(_WIN64)
#include
#include
#include
#include
#endif
#include "espeak_ng.h"
#include "speak_lib.h"
#include "encoding.h"
#include "speech.h"
#include "synthesize.h"
#include "translate.h"
#include "espeak_command.h"
#include "fifo.h"
unsigned char *outbuf = NULL;
int outbuf_size = 0;
espeak_EVENT *event_list = NULL;
int event_list_ix = 0;
int n_event_list;
long count_samples;
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
struct audio_object *my_audio = NULL;
#endif
static const char *option_device = NULL;
static unsigned int my_unique_identifier = 0;
static void *my_user_data = NULL;
static espeak_ng_OUTPUT_MODE my_mode = ENOUTPUT_MODE_SYNCHRONOUS;
static int out_samplerate = 0;
static int voice_samplerate = 22050;
static espeak_ng_STATUS err = ENS_OK;
t_espeak_callback *synth_callback = NULL;
int (*uri_callback)(int, const char *, const char *) = NULL;
int (*phoneme_callback)(const char *) = NULL;
char path_home[N_PATH_HOME]; // this is the espeak-ng-data directory
extern int saved_parameters[N_SPEECH_PARAM]; // Parameters saved on synthesis start
void cancel_audio(void)
{
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
if ((my_mode & ENOUTPUT_MODE_SPEAK_AUDIO) == ENOUTPUT_MODE_SPEAK_AUDIO) {
audio_object_flush(my_audio);
}
#endif
}
static int dispatch_audio(short *outbuffer, int length, espeak_EVENT *event)
{
int a_wave_can_be_played = 1;
#ifdef USE_ASYNC
if ((my_mode & ENOUTPUT_MODE_SYNCHRONOUS) == 0)
a_wave_can_be_played = fifo_is_command_enabled();
#endif
switch ((int)my_mode)
{
case ENOUTPUT_MODE_SPEAK_AUDIO:
case ENOUTPUT_MODE_SPEAK_AUDIO | ENOUTPUT_MODE_SYNCHRONOUS:
{
int event_type = 0;
if (event)
event_type = event->type;
if (event_type == espeakEVENT_SAMPLERATE) {
voice_samplerate = event->id.number;
if (out_samplerate != voice_samplerate) {
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
if (out_samplerate != 0) {
// sound was previously open with a different sample rate
audio_object_close(my_audio);
#ifdef HAVE_SLEEP
sleep(1);
#endif
}
#endif
out_samplerate = voice_samplerate;
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
int error = audio_object_open(my_audio, AUDIO_OBJECT_FORMAT_S16LE, voice_samplerate, 1);
if (error != 0) {
fprintf(stderr, "error: %s\n", audio_object_strerror(my_audio, error));
err = ENS_AUDIO_ERROR;
return -1;
}
#endif
#ifdef USE_ASYNC
if ((my_mode & ENOUTPUT_MODE_SYNCHRONOUS) == 0)
event_init();
#endif
}
}
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
if (outbuffer && length && a_wave_can_be_played) {
int error = audio_object_write(my_audio, (char *)outbuffer, 2*length);
if (error != 0)
fprintf(stderr, "error: %s\n", audio_object_strerror(my_audio, error));
}
#endif
#ifdef USE_ASYNC
while (event && a_wave_can_be_played) {
// TBD: some event are filtered here but some insight might be given
// TBD: in synthesise.cpp for avoiding to create WORDs with size=0.
// TBD: For example sentence "or ALT)." returns three words
// "or", "ALT" and "".
// TBD: the last one has its size=0.
if ((event->type == espeakEVENT_WORD) && (event->length == 0))
break;
if ((my_mode & ENOUTPUT_MODE_SYNCHRONOUS) == 0) {
err = event_declare(event);
if (err != ENS_EVENT_BUFFER_FULL)
break;
usleep(10000);
a_wave_can_be_played = fifo_is_command_enabled();
} else
break;
}
#endif
}
break;
case 0:
if (synth_callback)
synth_callback(outbuffer, length, event);
break;
}
return a_wave_can_be_played == 0; // 1 = stop synthesis, -1 = error
}
static int create_events(short *outbuffer, int length, espeak_EVENT *eventlist)
{
int finished;
int i = 0;
// The audio data are written to the output device.
// The list of events in event_list (index: event_list_ix) is read:
// Each event is declared to the "event" object which stores them internally.
// The event object is responsible of calling the external callback
// as soon as the relevant audio sample is played.
do { // for each event
espeak_EVENT *event;
if (event_list_ix == 0)
event = NULL;
else
event = eventlist + i;
finished = dispatch_audio((short *)outbuffer, length, event);
length = 0; // the wave data are played once.
i++;
} while ((i < event_list_ix) && !finished);
return finished;
}
#ifdef USE_ASYNC
int sync_espeak_terminated_msg(uint32_t unique_identifier, void *user_data)
{
int finished = 0;
memset(event_list, 0, 2*sizeof(espeak_EVENT));
event_list[0].type = espeakEVENT_MSG_TERMINATED;
event_list[0].unique_identifier = unique_identifier;
event_list[0].user_data = user_data;
event_list[1].type = espeakEVENT_LIST_TERMINATED;
event_list[1].unique_identifier = unique_identifier;
event_list[1].user_data = user_data;
if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) {
while (1) {
err = event_declare(event_list);
if (err != ENS_EVENT_BUFFER_FULL)
break;
usleep(10000);
}
} else if (synth_callback)
finished = synth_callback(NULL, 0, event_list);
return finished;
}
#endif
static int check_data_path(const char *path, int allow_directory)
{
if (!path) return 0;
snprintf(path_home, sizeof(path_home), "%s/espeak-ng-data", path);
if (GetFileLength(path_home) == -EISDIR)
return 1;
if (!allow_directory)
return 0;
snprintf(path_home, sizeof(path_home), "%s", path);
return GetFileLength(path_home) == -EISDIR;
}
#pragma GCC visibility push(default)
ESPEAK_NG_API espeak_ng_STATUS espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode, int buffer_length, const char *device)
{
option_device = device;
my_mode = output_mode;
out_samplerate = 0;
#ifdef HAVE_PCAUDIOLIB_AUDIO_H
if (my_audio == NULL)
my_audio = create_audio_device_object(device, "eSpeak", "Text-to-Speech");
#endif
// buffer_length is in mS, allocate 2 bytes per sample
if (buffer_length == 0)
buffer_length = 60;
outbuf_size = (buffer_length * samplerate)/500;
out_start = (unsigned char *)realloc(outbuf, outbuf_size);
if (out_start == NULL)
return static_cast (ENOMEM);
else
outbuf = out_start;
// allocate space for event list. Allow 200 events per second.
// Add a constant to allow for very small buffer_length
n_event_list = (buffer_length*200)/1000 + 20;
espeak_EVENT *new_event_list = (espeak_EVENT *)realloc(event_list, sizeof(espeak_EVENT) * n_event_list);
if (new_event_list == NULL)
return static_cast (ENOMEM);
event_list = new_event_list;
return ENS_OK;
}
#if ! DATA_FROM_SOURCECODE_FILES
int GetFileLength(const char *filename)
{
struct stat statbuf;
if (stat(filename, &statbuf) != 0)
return -errno;
if (S_ISDIR(statbuf.st_mode))
return -EISDIR;
return statbuf.st_size;
}
#endif
ESPEAK_NG_API void espeak_ng_InitializePath(const char *path)
{
if (check_data_path(path, 1))
return;
#ifdef PLATFORM_WINDOWS
HKEY RegKey;
unsigned long size;
unsigned long var_type;
unsigned char buf[sizeof(path_home)-13];
if (check_data_path(getenv("ESPEAK_DATA_PATH"), 1))
return;
buf[0] = 0;
RegOpenKeyExA(HKEY_LOCAL_MACHINE, "Software\\eSpeak NG", 0, KEY_READ, &RegKey);
if (RegKey == NULL)
RegOpenKeyExA(HKEY_LOCAL_MACHINE, "Software\\WOW6432Node\\eSpeak NG", 0, KEY_READ, &RegKey);
size = sizeof(buf);
var_type = REG_SZ;
RegQueryValueExA(RegKey, "Path", 0, &var_type, buf, &size);
if (check_data_path(buf, 1))
return;
#elif !defined(PLATFORM_DOS)
if (check_data_path(getenv("ESPEAK_DATA_PATH"), 1))
return;
if (check_data_path(getenv("HOME"), 0))
return;
#endif
strcpy(path_home, PATH_ESPEAK_DATA);
}
ESPEAK_NG_API espeak_ng_STATUS espeak_ng_Initialize(espeak_ng_ERROR_CONTEXT *context)
{
int param;
int srate = 22050; // default sample rate 22050 Hz
// It seems that the wctype functions don't work until the locale has been set
// to something other than the default "C". Then, not only Latin1 but also the
// other characters give the correct results with iswalpha() etc.
if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) {
if (setlocale(LC_CTYPE, "UTF-8") == NULL) {
if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL)
setlocale(LC_CTYPE, "");
}
}
fprintf (stderr, "Locale: %s\n", setlocale (LC_ALL, nullptr));
espeak_ng_STATUS result = LoadPhData(&srate, context);
if (result != ENS_OK)
return result;
WavegenInit(srate, 0);
LoadConfig();
memset(¤t_voice_selected, 0, sizeof(current_voice_selected));
SetVoiceStack(NULL, "");
SynthesizeInit();
InitNamedata();
VoiceReset(0);
for (param = 0; param < N_SPEECH_PARAM; param++)
param_stack[0].parameter[param] = saved_parameters[param] = param_defaults[param];
SetParameter(espeakRATE, 175, 0);
SetParameter(espeakVOLUME, 100, 0);
SetParameter(espeakCAPITALS, option_capitals, 0);
SetParameter(espeakPUNCTUATION, option_punctuation, 0);
SetParameter(espeakWORDGAP, 0, 0);
#ifdef USE_ASYNC
fifo_init();
#endif
option_phonemes = 0;
option_phoneme_events = 0;
return ENS_OK;
}
ESPEAK_NG_API int espeak_ng_GetSampleRate(void)
{
return samplerate;
}
#pragma GCC visibility pop
static espeak_ng_STATUS Synthesize(unsigned int unique_identifier, const void *text, int flags)
{
// Fill the buffer with output sound
int length;
int finished = 0;
int count_buffers = 0;
if ((outbuf == NULL) || (event_list == NULL))
return ENS_NOT_INITIALIZED;
option_ssml = flags & espeakSSML;
option_phoneme_input = flags & espeakPHONEMES;
option_endpause = flags & espeakENDPAUSE;
count_samples = 0;
espeak_ng_STATUS status;
if (translator == NULL) {
status = espeak_ng_SetVoiceByName("en");
if (status != ENS_OK)
return status;
}
if (p_decoder == NULL)
p_decoder = create_text_decoder();
status = text_decoder_decode_string_multibyte(p_decoder, text, translator->encoding, flags);
if (status != ENS_OK)
return status;
SpeakNextClause(0);
for (;;) {
out_ptr = outbuf;
out_end = &outbuf[outbuf_size];
event_list_ix = 0;
WavegenFill();
length = (out_ptr - outbuf)/2;
count_samples += length;
event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list
event_list[event_list_ix].unique_identifier = unique_identifier;
event_list[event_list_ix].user_data = my_user_data;
count_buffers++;
if ((my_mode & ENOUTPUT_MODE_SPEAK_AUDIO) == ENOUTPUT_MODE_SPEAK_AUDIO) {
finished = create_events((short *)outbuf, length, event_list);
if (finished < 0)
return ENS_AUDIO_ERROR;
} else if (synth_callback)
finished = synth_callback((short *)outbuf, length, event_list);
if (finished) {
SpeakNextClause(2); // stop
return ENS_SPEECH_STOPPED;
}
if (Generate(phoneme_list, &n_phoneme_list, 1) == 0) {
if (WcmdqUsed() == 0) {
// don't process the next clause until the previous clause has finished generating speech.
// This ensures that