modules/donovan/t2s.h

/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                       Copyright (c) 1996,1997                         */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission is hereby granted, free of charge, to use and distribute  */
/*  this software and its documentation without restriction, including   */
/*  without limitation the rights to use, copy, modify, merge, publish,  */
/*  distribute, sublicense, and/or sell copies of this work, and to      */
/*  permit persons to whom this work is furnished to do so, subject to   */
/*  the following conditions:                                            */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*   4. The authors' names are not used to endorse or promote products   */
/*      derived from this software without specific prior written        */
/*      permission.                                                      */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*             Author :  Alistair Conkie and Steve Isard                 */
/*-----------------------------------------------------------------------*/

#ifndef _T2S_H_
#define _T2S_H_
#define NDIPHS 3000
#define NFRAMES 23000
#define FR_DATA 16  /* shorts per frame, coeffs + assorted  */

#define FW 1
#define CW 2
#define PUNCT 3

#define DEF_F0 125
#define SR 10000	/* sample rate  */
#define FR_SZ 132	/* standard frame size  */

/* malloc defaults  */
#define DEF_BUFFER 1024
#define DEF_LING_LIST 100
#define DEF_SPL 100
#define DEF_PHONS 100
#define DEF_TARGS 100
#define DEF_FRAMES 100
#define DEF_PM 100

#define PHON_SZ 5
#define DIPH_SZ 10

#define OUT_BUF 2048

#define NCOEFFS 12

/* non-rhotic vowel classification for assim.c */
#define V_DEL_R 0
#define V_AIR 1
#define V_EER 2
#define V_OOR 3
#define V_R2SCHWA 4

/* various typedefs  */

typedef struct {
	char *input_file;
	char *output_file;
	char *index_file;
	char *diphone_file;
	char *hash_file;
	char *format;
	int type;	/* format by any other name  */
	FILE *ifd;
	FILE *ofd;
	FILE *xfd;
	FILE *dfd;
	void *db;
	int fw_num;
	int sonority_num;
	int dur0_num;
} CONFIG;

typedef struct {
	int max;
	int sz;
	char *ptr;
} BUFFER;

typedef struct {
	char *word;
	int type;
	char *transcription;
} LING;

typedef struct {
	int max;
	int sz;
	LING **text;
} LING_LIST;

typedef struct key {
	char *keyword;
	int keycount;
} KEY;

typedef struct {
	char phoneme[5];
	int syll;
	int dur;
	char *sprosod1;
	char *sprosod2;
	float strength1;
	float strength2;	/* for combined elements  */
} SPROSOD;

typedef struct {
	int max;
	int sz;
	SPROSOD **phoneme;
} SPROSOD_LIST;

typedef struct {
        char diph[10];
        int beg;
        int mid;
        int end;
} ENTRY;

typedef struct {
        short frame[FR_DATA];
} FRAME;

typedef struct {
	int p_sz;
	int p_max;
	int t_sz;
	int t_max;
	char **phons;
	int *duration;
	int *cum_dur;
	int *pc_targs;
	int *targ_phon;
	int *targ_freq;
	int *abs_targ; /* maybe in samples  */
	int *pb;
	float *scale;
	char **diphs;
} SPN;

typedef struct {
	int f_sz;
	int p_sz;
	int f_max;
	int p_max;
	FRAME **mcebuf;
	short *duration; /* since variants may be required  */
	short *pitch;
} ACOUSTIC;


extern KEY fw[];
extern KEY son[];
extern KEY dur0[];

/* now definitions of global data  */

/* awb -- deleted */
/* extern ENTRY indx[NDIPHS]; */
/* extern FRAME dico[NFRAMES]; */
extern int nindex;
extern char *dbName;

/* program prototypes  */

/* audio.c  */
void audio_open(CONFIG *config);
void audio_play(short *start,int sz,int number,CONFIG *config);
void audio_close(CONFIG *config);
void audio_flush(CONFIG *config);

/* makewave.c  */
void makewave(CONFIG *config, ACOUSTIC *as);

/* coeffs.c  */
void rfctolpc(float *buf);

/* conv.c  */
void conv(CONFIG *config, LING_LIST *ling_list, SPROSOD_LIST *spl);
void spl_cpy(int index,int syll, char *phon, int dur, char *type, float strength, SPROSOD_LIST *spl);
void spl_cat(int index,char *type, float strength, SPROSOD_LIST *spl);
int vowel(char *ph) ;

/* durations.c  */
void durations(SPN *ps, ACOUSTIC *as);

/* excitation.c  */
float iexc(short voiced, ACOUSTIC *as, short *wkspace);

/* go.c  */
void go(CONFIG *config, BUFFER *buffer, LING_LIST *ling_list, SPROSOD_LIST *spl, SPN *ps, ACOUSTIC *as);

/* grammar.c  */
void grammar(LING_LIST *ling_list);

/* interface.c  */
char *nrl_rules(char *in);

/* load_diphs.c  */
int load_speech(CONFIG *config);
int lookup(char *diph);
void phonstoframes(SPN *ps, ACOUSTIC *as);

/* nrl_edin.c  */
void nrl_edin_conv(char *str, char *str2);

/* pitch.c  */
void calc_pitch(SPN *ps, ACOUSTIC *as);

/* prosody.c  */
void prosody(SPROSOD_LIST *spl, SPN *ps);

/* space.c  */
void init(CONFIG *config, BUFFER *buffer, LING_LIST *ling_list, SPROSOD_LIST *spl, SPN *ps, ACOUSTIC *as);
void terminate(CONFIG *config, BUFFER *buffer, LING_LIST *ling_list, SPROSOD_LIST *spl, SPN *ps, ACOUSTIC *as);
void buffer_malloc(int num,BUFFER *buffer);
void buffer_realloc(int num, BUFFER *buffer);
void buffer_free(BUFFER *buffer);
void ling_list_malloc(int num, LING_LIST *ling_list);
void ling_list_realloc(int num, LING_LIST *ling_list);
void ling_list_free(LING_LIST *ling_list);
void spl_malloc(int num, SPROSOD_LIST *spl);
void spl_realloc(int num, SPROSOD_LIST *spl);
void spl_free(SPROSOD_LIST *spl);
void ps_malloc(int nphons, int ntargs, SPN *ps);
void ps_realloc(int nphons, int ntargs, SPN *ps);
void ps_free(SPN *ps);
void as_malloc(int nframes, int npp, ACOUSTIC *as);
void as_realloc(int nframes, int npp, ACOUSTIC *as);
void as_free(ACOUSTIC *as);

/* syllab.c  */
char *syllabify(char *string, CONFIG *config);
char *stress(char *input);

/* t2s.c  */
void process_sentence(CONFIG *config, BUFFER *buffer, LING_LIST *ling_list, SPROSOD_LIST *spl, SPN *ps, ACOUSTIC *as);

/* tags.c  */
void tags(CONFIG *config, BUFFER *buffer, LING_LIST *ling_list);

/* transcribe.c  */
void transcribe(CONFIG *config, LING_LIST *ling_list);

/* utils.c  */
char **split(char *in);
void tidy_split(char **root);
KEY *binary(char *word, KEY tab[], int n);

/* library prototypes
int fprintf(FILE *stream, char *format, ...  );
int printf(const char *format, ...  );
int getopt(int argc,char **argv, char *optstring);
int sscanf(char *s,char * format, ... );
int fread (char *ptr, int size, int nitems, FILE *stream);
int fwrite (char *ptr, int size, int nitems, FILE *stream);
int fclose(FILE *stream);
*/


#endif /* _T2S_H_ */