hts_engine_API-1.10/lib/HTS_engine.c

/* ----------------------------------------------------------------- */
/*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
/*           developed by HTS Working Group                          */
/*           http://hts-engine.sourceforge.net/                      */
/* ----------------------------------------------------------------- */
/*                                                                   */
/*  Copyright (c) 2001-2015  Nagoya Institute of Technology          */
/*                           Department of Computer Science          */
/*                                                                   */
/*                2001-2008  Tokyo Institute of Technology           */
/*                           Interdisciplinary Graduate School of    */
/*                           Science and Engineering                 */
/*                                                                   */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/* - Redistributions of source code must retain the above copyright  */
/*   notice, this list of conditions and the following disclaimer.   */
/* - Redistributions in binary form must reproduce the above         */
/*   copyright notice, this list of conditions and the following     */
/*   disclaimer in the documentation and/or other materials provided */
/*   with the distribution.                                          */
/* - Neither the name of the HTS working group nor the names of its  */
/*   contributors may be used to endorse or promote products derived */
/*   from this software without specific prior written permission.   */
/*                                                                   */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
/* POSSIBILITY OF SUCH DAMAGE.                                       */
/* ----------------------------------------------------------------- */

#ifndef HTS_ENGINE_C
#define HTS_ENGINE_C

#ifdef __cplusplus
#define HTS_ENGINE_C_START extern "C" {
#define HTS_ENGINE_C_END   }
#else
#define HTS_ENGINE_C_START
#define HTS_ENGINE_C_END
#endif                          /* __CPLUSPLUS */

HTS_ENGINE_C_START;

#include <stdlib.h>             /* for atof() */
#include <string.h>             /* for strcpy() */
#include <math.h>               /* for pow() */

/* hts_engine libraries */
#include "HTS_hidden.h"

/* HTS_Engine_initialize: initialize engine */
void HTS_Engine_initialize(HTS_Engine * engine)
{
   /* global */
   engine->condition.sampling_frequency = 0;
   engine->condition.fperiod = 0;
   engine->condition.audio_buff_size = 0;
   engine->condition.stop = FALSE;
   engine->condition.volume = 1.0;
   engine->condition.msd_threshold = NULL;
   engine->condition.gv_weight = NULL;

   /* duration */
   engine->condition.speed = 1.0;
   engine->condition.phoneme_alignment_flag = FALSE;

   /* spectrum */
   engine->condition.stage = 0;
   engine->condition.use_log_gain = FALSE;
   engine->condition.alpha = 0.0;
   engine->condition.beta = 0.0;

   /* log F0 */
   engine->condition.additional_half_tone = 0.0;

   /* interpolation weights */
   engine->condition.duration_iw = NULL;
   engine->condition.parameter_iw = NULL;
   engine->condition.gv_iw = NULL;

   /* initialize audio */
   HTS_Audio_initialize(&engine->audio);
   /* initialize model set */
   HTS_ModelSet_initialize(&engine->ms);
   /* initialize label list */
   HTS_Label_initialize(&engine->label);
   /* initialize state sequence set */
   HTS_SStreamSet_initialize(&engine->sss);
   /* initialize pstream set */
   HTS_PStreamSet_initialize(&engine->pss);
   /* initialize gstream set */
   HTS_GStreamSet_initialize(&engine->gss);
}

/* HTS_Engine_load: load HTS voices */
HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices)
{
   size_t i, j;
   size_t nstream;
   double average_weight;
   const char *option, *find;

   /* reset engine */
   HTS_Engine_clear(engine);

   /* load voices */
   if (HTS_ModelSet_load(&engine->ms, voices, num_voices) != TRUE) {
      HTS_Engine_clear(engine);
      return FALSE;
   }
   nstream = HTS_ModelSet_get_nstream(&engine->ms);
   average_weight = 1.0 / num_voices;

   /* global */
   engine->condition.sampling_frequency = HTS_ModelSet_get_sampling_frequency(&engine->ms);
   engine->condition.fperiod = HTS_ModelSet_get_fperiod(&engine->ms);
   engine->condition.msd_threshold = (double *) HTS_calloc(nstream, sizeof(double));
   for (i = 0; i < nstream; i++)
      engine->condition.msd_threshold[i] = 0.5;
   engine->condition.gv_weight = (double *) HTS_calloc(nstream, sizeof(double));
   for (i = 0; i < nstream; i++)
      engine->condition.gv_weight[i] = 1.0;

   /* spectrum */
   option = HTS_ModelSet_get_option(&engine->ms, 0);
   find = strstr(option, "GAMMA=");
   if (find != NULL)
      engine->condition.stage = (size_t) atoi(&find[strlen("GAMMA=")]);
   find = strstr(option, "LN_GAIN=");
   if (find != NULL)
      engine->condition.use_log_gain = atoi(&find[strlen("LN_GAIN=")]) == 1 ? TRUE : FALSE;
   find = strstr(option, "ALPHA=");
   if (find != NULL)
      engine->condition.alpha = atof(&find[strlen("ALPHA=")]);

   /* interpolation weights */
   engine->condition.duration_iw = (double *) HTS_calloc(num_voices, sizeof(double));
   for (i = 0; i < num_voices; i++)
      engine->condition.duration_iw[i] = average_weight;
   engine->condition.parameter_iw = (double **) HTS_calloc(num_voices, sizeof(double *));
   for (i = 0; i < num_voices; i++) {
      engine->condition.parameter_iw[i] = (double *) HTS_calloc(nstream, sizeof(double));
      for (j = 0; j < nstream; j++)
         engine->condition.parameter_iw[i][j] = average_weight;
   }
   engine->condition.gv_iw = (double **) HTS_calloc(num_voices, sizeof(double *));
   for (i = 0; i < num_voices; i++) {
      engine->condition.gv_iw[i] = (double *) HTS_calloc(nstream, sizeof(double));
      for (j = 0; j < nstream; j++)
         engine->condition.gv_iw[i][j] = average_weight;
   }

   return TRUE;
}

/* HTS_Engine_set_sampling_frequency: set sampling frequency */
void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i)
{
   if (i < 1)
      i = 1;
   engine->condition.sampling_frequency = i;
   HTS_Audio_set_parameter(&engine->audio, engine->condition.sampling_frequency, engine->condition.audio_buff_size);
}

/* HTS_Engine_get_sampling_frequency: get sampling frequency */
size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine)
{
   return engine->condition.sampling_frequency;
}

/* HTS_Engine_set_fperiod: set frame period */
void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i)
{
   if (i < 1)
      i = 1;
   engine->condition.fperiod = i;
}

/* HTS_Engine_get_fperiod: get frame period */
size_t HTS_Engine_get_fperiod(HTS_Engine * engine)
{
   return engine->condition.fperiod;
}

/* HTS_Engine_set_audio_buff_size: set audio buffer size */
void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i)
{
   engine->condition.audio_buff_size = i;
   HTS_Audio_set_parameter(&engine->audio, engine->condition.sampling_frequency, engine->condition.audio_buff_size);
}

/* HTS_Engine_get_audio_buff_size: get audio buffer size */
size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine)
{
   return engine->condition.audio_buff_size;
}

/* HTS_Engine_set_stop_flag: set stop flag */
void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b)
{
   engine->condition.stop = b;
}

/* HTS_Engine_get_stop_flag: get stop flag */
HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine)
{
   return engine->condition.stop;
}

/* HTS_Engine_set_volume: set volume in db */
void HTS_Engine_set_volume(HTS_Engine * engine, double f)
{
   engine->condition.volume = exp(f * DB);
}

/* HTS_Engine_get_volume: get volume in db */
double HTS_Engine_get_volume(HTS_Engine * engine)
{
   return log(engine->condition.volume) / DB;
}

/* HTS_Egnine_set_msd_threshold: set MSD threshold */
void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f)
{
   if (f < 0.0)
      f = 0.0;
   if (f > 1.0)
      f = 1.0;
   engine->condition.msd_threshold[stream_index] = f;
}

/* HTS_Engine_get_msd_threshold: get MSD threshold */
double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index)
{
   return engine->condition.msd_threshold[stream_index];
}

/* HTS_Engine_set_gv_weight: set GV weight */
void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f)
{
   if (f < 0.0)
      f = 0.0;
   engine->condition.gv_weight[stream_index] = f;
}

/* HTS_Engine_get_gv_weight: get GV weight */
double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index)
{
   return engine->condition.gv_weight[stream_index];
}

/* HTS_Engine_set_speed: set speech speed */
void HTS_Engine_set_speed(HTS_Engine * engine, double f)
{
   if (f < 1.0E-06)
      f = 1.0E-06;
   engine->condition.speed = f;
}

/* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b)
{
   engine->condition.phoneme_alignment_flag = b;
}

/* HTS_Engine_set_alpha: set alpha */
void HTS_Engine_set_alpha(HTS_Engine * engine, double f)
{
   if (f < 0.0)
      f = 0.0;
   if (f > 1.0)
      f = 1.0;
   engine->condition.alpha = f;
}

/* HTS_Engine_get_alpha: get alpha */
double HTS_Engine_get_alpha(HTS_Engine * engine)
{
   return engine->condition.alpha;
}

/* HTS_Engine_set_beta: set beta */
void HTS_Engine_set_beta(HTS_Engine * engine, double f)
{
   if (f < 0.0)
      f = 0.0;
   if (f > 1.0)
      f = 1.0;
   engine->condition.beta = f;
}

/* HTS_Engine_get_beta: get beta */
double HTS_Engine_get_beta(HTS_Engine * engine)
{
   return engine->condition.beta;
}

/* HTS_Engine_add_half_tone: add half tone */
void HTS_Engine_add_half_tone(HTS_Engine * engine, double f)
{
   engine->condition.additional_half_tone = f;
}

/* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f)
{
   engine->condition.duration_iw[voice_index] = f;
}

/* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index)
{
   return engine->condition.duration_iw[voice_index];
}

/* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f)
{
   engine->condition.parameter_iw[voice_index][stream_index] = f;
}

/* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index)
{
   return engine->condition.parameter_iw[voice_index][stream_index];
}

/* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f)
{
   engine->condition.gv_iw[voice_index][stream_index] = f;
}

/* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index)
{
   return engine->condition.gv_iw[voice_index][stream_index];
}

/* HTS_Engine_get_total_state: get total number of state */
size_t HTS_Engine_get_total_state(HTS_Engine * engine)
{
   return HTS_SStreamSet_get_total_state(&engine->sss);
}

/* HTS_Engine_set_state_mean: set mean value of state */
void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f)
{
   HTS_SStreamSet_set_mean(&engine->sss, stream_index, state_index, vector_index, f);
}

/* HTS_Engine_get_state_mean: get mean value of state */
double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index)
{
   return HTS_SStreamSet_get_mean(&engine->sss, stream_index, state_index, vector_index);
}

/* HTS_Engine_get_state_duration: get state duration */
size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index)
{
   return HTS_SStreamSet_get_duration(&engine->sss, state_index);
}

/* HTS_Engine_get_nvoices: get number of voices */
size_t HTS_Engine_get_nvoices(HTS_Engine * engine)
{
   return HTS_ModelSet_get_nvoices(&engine->ms);
}

/* HTS_Engine_get_nstream: get number of stream */
size_t HTS_Engine_get_nstream(HTS_Engine * engine)
{
   return HTS_ModelSet_get_nstream(&engine->ms);
}

/* HTS_Engine_get_nstate: get number of state */
size_t HTS_Engine_get_nstate(HTS_Engine * engine)
{
   return HTS_ModelSet_get_nstate(&engine->ms);
}

/* HTS_Engine_get_fullcontext_label_format: get full context label format */
const char *HTS_Engine_get_fullcontext_label_format(HTS_Engine * engine)
{
   return HTS_ModelSet_get_fullcontext_label_format(&engine->ms);
}

/* HTS_Engine_get_fullcontext_label_version: get full context label version */
const char *HTS_Engine_get_fullcontext_label_version(HTS_Engine * engine)
{
   return HTS_ModelSet_get_fullcontext_label_version(&engine->ms);
}

/* HTS_Engine_get_total_frame: get total number of frame */
size_t HTS_Engine_get_total_frame(HTS_Engine * engine)
{
   return HTS_GStreamSet_get_total_frame(&engine->gss);
}

/* HTS_Engine_get_nsamples: get number of samples */
size_t HTS_Engine_get_nsamples(HTS_Engine * engine)
{
   return HTS_GStreamSet_get_total_nsamples(&engine->gss);
}

/* HTS_Engine_get_generated_parameter: output generated parameter */
double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index)
{
   return HTS_GStreamSet_get_parameter(&engine->gss, stream_index, frame_index, vector_index);
}

/* HTS_Engine_get_generated_speech: output generated speech */
double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index)
{
   return HTS_GStreamSet_get_speech(&engine->gss, index);
}

/* HTS_Engine_generate_state_sequence: genereate state sequence (1st synthesis step) */
static HTS_Boolean HTS_Engine_generate_state_sequence(HTS_Engine * engine)
{
   size_t i, state_index, model_index;
   double f;

   if (HTS_SStreamSet_create(&engine->sss, &engine->ms, &engine->label, engine->condition.phoneme_alignment_flag, engine->condition.speed, engine->condition.duration_iw, engine->condition.parameter_iw, engine->condition.gv_iw) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }
   if (engine->condition.additional_half_tone != 0.0) {
      state_index = 0;
      model_index = 0;
      for (i = 0; i < HTS_Engine_get_total_state(engine); i++) {
         f = HTS_Engine_get_state_mean(engine, 1, i, 0);
         f += engine->condition.additional_half_tone * HALF_TONE;
         if (f < MIN_LF0)
            f = MIN_LF0;
         else if (f > MAX_LF0)
            f = MAX_LF0;
         HTS_Engine_set_state_mean(engine, 1, i, 0, f);
         state_index++;
         if (state_index >= HTS_Engine_get_nstate(engine)) {
            state_index = 0;
            model_index++;
         }
      }
   }
   return TRUE;
}

/* HTS_Engine_generate_state_sequence_from_fn: genereate state sequence from file name (1st synthesis step) */
HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn)
{
   HTS_Engine_refresh(engine);
   HTS_Label_load_from_fn(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, fn);
   return HTS_Engine_generate_state_sequence(engine);
}

/* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from strings (1st synthesis step) */
HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
{
   HTS_Engine_refresh(engine);
   HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
   return HTS_Engine_generate_state_sequence(engine);
}

/* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine)
{
   return HTS_PStreamSet_create(&engine->pss, &engine->sss, engine->condition.msd_threshold, engine->condition.gv_weight);
}

/* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine)
{
   return HTS_GStreamSet_create(&engine->gss, &engine->pss, engine->condition.stage, engine->condition.use_log_gain, engine->condition.sampling_frequency, engine->condition.fperiod, engine->condition.alpha, engine->condition.beta, &engine->condition.stop, engine->condition.volume, engine->condition.audio_buff_size > 0 ? &engine->audio : NULL);
}

/* HTS_Engine_synthesize: synthesize speech */
static HTS_Boolean HTS_Engine_synthesize(HTS_Engine * engine)
{
   if (HTS_Engine_generate_state_sequence(engine) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }
   if (HTS_Engine_generate_parameter_sequence(engine) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }
   if (HTS_Engine_generate_sample_sequence(engine) != TRUE) {
      HTS_Engine_refresh(engine);
      return FALSE;
   }
   return TRUE;
}

/* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn)
{
   HTS_Engine_refresh(engine);
   HTS_Label_load_from_fn(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, fn);
   return HTS_Engine_synthesize(engine);
}

/* HTS_Engine_synthesize_from_strings: synthesize speech from strings */
HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
{
   HTS_Engine_refresh(engine);
   HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
   return HTS_Engine_synthesize(engine);
}

/* HTS_Engine_save_information: save trace information */
void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp)
{
   size_t i, j, k, l, m, n;
   double temp;
   HTS_Condition *condition = &engine->condition;
   HTS_ModelSet *ms = &engine->ms;
   HTS_Label *label = &engine->label;
   HTS_SStreamSet *sss = &engine->sss;
   HTS_PStreamSet *pss = &engine->pss;

   /* global parameter */
   fprintf(fp, "[Global parameter]\n");
   fprintf(fp, "Sampring frequency                     -> %8lu(Hz)\n", (unsigned long) condition->sampling_frequency);
   fprintf(fp, "Frame period                           -> %8lu(point)\n", (unsigned long) condition->fperiod);
   fprintf(fp, "                                          %8.5f(msec)\n", 1e+3 * condition->fperiod / condition->sampling_frequency);
   fprintf(fp, "All-pass constant                      -> %8.5f\n", (float) condition->alpha);
   fprintf(fp, "Gamma                                  -> %8.5f\n", (float) (condition->stage == 0 ? 0.0 : -1.0 / condition->stage));
   if (condition->stage != 0) {
      if (condition->use_log_gain == TRUE)
         fprintf(fp, "Log gain flag                          ->     TRUE\n");
      else
         fprintf(fp, "Log gain flag                          ->    FALSE\n");
   }
   fprintf(fp, "Postfiltering coefficient              -> %8.5f\n", (float) condition->beta);
   fprintf(fp, "Audio buffer size                      -> %8lu(sample)\n", (unsigned long) condition->audio_buff_size);
   fprintf(fp, "\n");

   /* duration parameter */
   fprintf(fp, "[Duration parameter]\n");
   fprintf(fp, "Number of states                       -> %8lu\n", (unsigned long) HTS_ModelSet_get_nstate(ms));
   fprintf(fp, "         Interpolation size            -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
   /* check interpolation */
   for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
      temp += condition->duration_iw[i];
   for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
      if (condition->duration_iw[i] != 0.0)
         condition->duration_iw[i] /= temp;
   for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
      fprintf(fp, "         Interpolation weight[%2lu]      -> %8.0f(%%)\n", (unsigned long) i, (float) (100 * condition->duration_iw[i]));
   fprintf(fp, "\n");

   fprintf(fp, "[Stream parameter]\n");
   for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
      /* stream parameter */
      fprintf(fp, "Stream[%2lu] vector length               -> %8lu\n", (unsigned long) i, (unsigned long) HTS_ModelSet_get_vector_length(ms, i));
      fprintf(fp, "           Dynamic window size         -> %8lu\n", (unsigned long) HTS_ModelSet_get_window_size(ms, i));
      /* interpolation */
      fprintf(fp, "           Interpolation size          -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
      for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
         temp += condition->parameter_iw[j][i];
      for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
         if (condition->parameter_iw[j][i] != 0.0)
            condition->parameter_iw[j][i] /= temp;
      for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
         fprintf(fp, "           Interpolation weight[%2lu]    -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->parameter_iw[j][i]));
      /* MSD */
      if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */
         fprintf(fp, "           MSD flag                    ->     TRUE\n");
         fprintf(fp, "           MSD threshold               -> %8.5f\n", condition->msd_threshold[i]);
      } else {                  /* for non MSD */
         fprintf(fp, "           MSD flag                    ->    FALSE\n");
      }
      /* GV */
      if (HTS_ModelSet_use_gv(ms, i)) {
         fprintf(fp, "           GV flag                     ->     TRUE\n");
         fprintf(fp, "           GV weight                   -> %8.0f(%%)\n", (float) (100 * condition->gv_weight[i]));
         fprintf(fp, "           GV interpolation size       -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
         /* interpolation */
         for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
            temp += condition->gv_iw[j][i];
         for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
            if (condition->gv_iw[j][i] != 0.0)
               condition->gv_iw[j][i] /= temp;
         for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
            fprintf(fp, "           GV interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->gv_iw[j][i]));
      } else {
         fprintf(fp, "           GV flag                     ->    FALSE\n");
      }
   }
   fprintf(fp, "\n");

   /* generated sequence */
   fprintf(fp, "[Generated sequence]\n");
   fprintf(fp, "Number of HMMs                         -> %8lu\n", (unsigned long) HTS_Label_get_size(label));
   fprintf(fp, "Number of stats                        -> %8lu\n", (unsigned long) HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms));
   fprintf(fp, "Length of this speech                  -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod / condition->sampling_frequency));
   fprintf(fp, "                                       -> %8lu(frames)\n", (unsigned long) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod);

   for (i = 0; i < HTS_Label_get_size(label); i++) {
      fprintf(fp, "HMM[%2lu]\n", (unsigned long) i);
      fprintf(fp, "  Name                                 -> %s\n", HTS_Label_get_string(label, i));
      fprintf(fp, "  Duration\n");
      for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) {
         fprintf(fp, "    Interpolation[%2lu]\n", (unsigned long) j);
         HTS_ModelSet_get_duration_index(ms, j, HTS_Label_get_string(label, i), &k, &l);
         fprintf(fp, "      Tree index                       -> %8lu\n", (unsigned long) k);
         fprintf(fp, "      PDF index                        -> %8lu\n", (unsigned long) l);
      }
      for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) {
         fprintf(fp, "  State[%2lu]\n", (unsigned long) j + 2);
         fprintf(fp, "    Length                             -> %8lu(frames)\n", (unsigned long) HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j));
         for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) {
            fprintf(fp, "    Stream[%2lu]\n", (unsigned long) k);
            if (HTS_ModelSet_is_msd(ms, k)) {
               if (HTS_SStreamSet_get_msd(sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > condition->msd_threshold[k])
                  fprintf(fp, "      MSD flag                         ->     TRUE\n");
               else
                  fprintf(fp, "      MSD flag                         ->    FALSE\n");
            }
            for (l = 0; l < HTS_ModelSet_get_nvoices(ms); l++) {
               fprintf(fp, "      Interpolation[%2lu]\n", (unsigned long) l);
               HTS_ModelSet_get_parameter_index(ms, l, k, j + 2, HTS_Label_get_string(label, i), &m, &n);
               fprintf(fp, "        Tree index                     -> %8lu\n", (unsigned long) m);
               fprintf(fp, "        PDF index                      -> %8lu\n", (unsigned long) n);
            }
         }
      }
   }
}

/* HTS_Engine_save_label: save label with time */
void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp)
{
   size_t i, j;
   size_t frame, state, duration;

   HTS_Label *label = &engine->label;
   HTS_SStreamSet *sss = &engine->sss;
   size_t nstate = HTS_ModelSet_get_nstate(&engine->ms);
   double rate = engine->condition.fperiod * 1.0e+07 / engine->condition.sampling_frequency;

   for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) {
      for (j = 0, duration = 0; j < nstate; j++)
         duration += HTS_SStreamSet_get_duration(sss, state++);
      fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS_Label_get_string(label, i));
      frame += duration;
   }
}

/* HTS_Engine_save_generated_parameter: save generated parameter */
void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp)
{
   size_t i, j;
   float temp;
   HTS_GStreamSet *gss = &engine->gss;

   for (i = 0; i < HTS_GStreamSet_get_total_frame(gss); i++)
      for (j = 0; j < HTS_GStreamSet_get_vector_length(gss, stream_index); j++) {
         temp = (float) HTS_GStreamSet_get_parameter(gss, stream_index, i, j);
         fwrite(&temp, sizeof(float), 1, fp);
      }
}

/* HTS_Engine_save_generated_speech: save generated speech */
void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp)
{
   size_t i;
   double x;
   short temp;
   HTS_GStreamSet *gss = &engine->gss;

   for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
      x = HTS_GStreamSet_get_speech(gss, i);
      if (x > 32767.0)
         temp = 32767;
      else if (x < -32768.0)
         temp = -32768;
      else
         temp = (short) x;
      fwrite(&temp, sizeof(short), 1, fp);
   }
}

/* HTS_Engine_save_riff: save RIFF format file */
void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp)
{
   size_t i;
   double x;
   short temp;

   HTS_GStreamSet *gss = &engine->gss;
   char data_01_04[] = { 'R', 'I', 'F', 'F' };
   int data_05_08 = HTS_GStreamSet_get_total_nsamples(gss) * sizeof(short) + 36;
   char data_09_12[] = { 'W', 'A', 'V', 'E' };
   char data_13_16[] = { 'f', 'm', 't', ' ' };
   int data_17_20 = 16;
   short data_21_22 = 1;        /* PCM */
   short data_23_24 = 1;        /* monoral */
   int data_25_28 = engine->condition.sampling_frequency;
   int data_29_32 = engine->condition.sampling_frequency * sizeof(short);
   short data_33_34 = sizeof(short);
   short data_35_36 = (short) (sizeof(short) * 8);
   char data_37_40[] = { 'd', 'a', 't', 'a' };
   int data_41_44 = HTS_GStreamSet_get_total_nsamples(gss) * sizeof(short);

   /* write header */
   HTS_fwrite_little_endian(data_01_04, sizeof(char), 4, fp);
   HTS_fwrite_little_endian(&data_05_08, sizeof(int), 1, fp);
   HTS_fwrite_little_endian(data_09_12, sizeof(char), 4, fp);
   HTS_fwrite_little_endian(data_13_16, sizeof(char), 4, fp);
   HTS_fwrite_little_endian(&data_17_20, sizeof(int), 1, fp);
   HTS_fwrite_little_endian(&data_21_22, sizeof(short), 1, fp);
   HTS_fwrite_little_endian(&data_23_24, sizeof(short), 1, fp);
   HTS_fwrite_little_endian(&data_25_28, sizeof(int), 1, fp);
   HTS_fwrite_little_endian(&data_29_32, sizeof(int), 1, fp);
   HTS_fwrite_little_endian(&data_33_34, sizeof(short), 1, fp);
   HTS_fwrite_little_endian(&data_35_36, sizeof(short), 1, fp);
   HTS_fwrite_little_endian(data_37_40, sizeof(char), 4, fp);
   HTS_fwrite_little_endian(&data_41_44, sizeof(int), 1, fp);
   /* write data */
   for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
      x = HTS_GStreamSet_get_speech(gss, i);
      if (x > 32767.0)
         temp = 32767;
      else if (x < -32768.0)
         temp = -32768;
      else
         temp = (short) x;
      HTS_fwrite_little_endian(&temp, sizeof(short), 1, fp);
   }
}

/* HTS_Engine_refresh: free model per one time synthesis */
void HTS_Engine_refresh(HTS_Engine * engine)
{
   /* free generated parameter stream set */
   HTS_GStreamSet_clear(&engine->gss);
   /* free parameter stream set */
   HTS_PStreamSet_clear(&engine->pss);
   /* free state stream set */
   HTS_SStreamSet_clear(&engine->sss);
   /* free label list */
   HTS_Label_clear(&engine->label);
   /* stop flag */
   engine->condition.stop = FALSE;
}

/* HTS_Engine_clear: free engine */
void HTS_Engine_clear(HTS_Engine * engine)
{
   size_t i;

   if (engine->condition.msd_threshold != NULL)
      HTS_free(engine->condition.msd_threshold);
   if (engine->condition.duration_iw != NULL)
      HTS_free(engine->condition.duration_iw);
   if (engine->condition.gv_weight != NULL)
      HTS_free(engine->condition.gv_weight);
   if (engine->condition.parameter_iw != NULL) {
      for (i = 0; i < HTS_ModelSet_get_nvoices(&engine->ms); i++)
         HTS_free(engine->condition.parameter_iw[i]);
      HTS_free(engine->condition.parameter_iw);
   }
   if (engine->condition.gv_iw != NULL) {
      for (i = 0; i < HTS_ModelSet_get_nvoices(&engine->ms); i++)
         HTS_free(engine->condition.gv_iw[i]);
      HTS_free(engine->condition.gv_iw);
   }

   HTS_ModelSet_clear(&engine->ms);
   HTS_Audio_clear(&engine->audio);
   HTS_Engine_initialize(engine);
}

HTS_ENGINE_C_END;

#endif                          /* !HTS_ENGINE_C */