vocabulary.h - OpenGrok cross reference for /dports/audio/julius/julius-4.1.2/libsent/include/sent/vocabulary.h

/**
 * @file   vocabulary.h
 *
 * <JA>
 * @brief  ñ�켭��ι�¤�����
 *
 * ���Υե������ǧ�����Ѥ�����ñ�켭���������ޤ���ñ�켭���ñ����ɤߡ�
 * ����ʸ�����������¾�ˡ�ʸ�γ��ϡ���λñ���Ʃ��ñ�������ݻ����ޤ���
 *
 * N-gram�˽и�������äμ���� NGRAM_INFO �˳�Ǽ���졤����ǧ����ñ�켭��Ȥ�
 * ���̤���뤳�Ȥ���դ��Ʋ�������ñ�켭����N-gram�θ��äؤΥޥåԥ���
 * WORD_INFO ��� wton[] �ˤ�äƹԤʤ��ޤ����ޤ�DFA�ξ�硤wton ��
 * ����ñ�줬°����DFA_INFO ��Υ��ƥ����ֹ��ޤߤޤ���
 * </JA>
 * <EN>
 * @brief  Word dictionary for recognition
 *
 * This file defines data structure for word dictionary used in recognition.
 * It stores word's string, output string, phoneme sequence, transparency.
 * Beginning-of-sentence word and End-of-sentence word guessed from runtime
 * environment is also stored here.
 *
 * Please note that the N-gram vocabulary is stored in NGRAM_INFO and it
 * can differ from this word dictionary.  The reference from the word
 * dictionary to a N-gram vocabulary is done by wton[] member in WORD_INFO.
 * When used with DFA, the wton[] holds a category number to which each word
 * belongs.
 * </EN>
 *
 * @author Akinobu LEE
 * @date   Sat Feb 12 12:38:13 2005
 *
 * $Revision: 1.5 $
 *
 */
/*
 * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University
 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology
 * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology
 * All rights reserved
 */

#ifndef __SENT_VOCA_H__
#define __SENT_VOCA_H__

#include <sent/stddefs.h>
#include <sent/htk_hmm.h>

/// Memory allocation step in number of words when loading a word dictionary
#define	MAXWSTEP 4000

/// Word dictionary structure to hold vocabulary
typedef struct {
  WORD_ID	maxnum;		///< Allocated number of word space
  WORD_ID	num;		///< Number of words
  WORD_ID	errnum;		///< Number of error words that were skipped when reading dictionary
  WORD_ID	linenum;	///< Current line number while loading
  boolean	do_conv;	///< TRUE if needs conversion while loading
  boolean	ok_flag;	///< FALSE if any error occur while loading

  unsigned char	*wlen;		///< Number of phonemes for each word [wid]

  char		**wname;	///< Word name string for each word [wid].  With DFA, it's category ID.  With N-gram, it's N-gram entry name.
  char		**woutput;	///< Word output string that will be output as recognition result for each word [wid]
  HMM_Logical   ***wseq;	///< Phone sequence of each word [wid][0..wlen[wid]-1]
  WORD_ID	*wton;		///< Reference to N-gram/category ID of each word ID [wid]
#ifdef CLASS_NGRAM
  LOGPROB	*cprob;		///< Class probability of each word [wid]
  WORD_ID	cwnum;		///< Number of words whose class prob is specified (just for a statistic)
#endif
  WORD_ID	head_silwid;	///< Word ID of beginning-of-sentence silence
  WORD_ID	tail_silwid;	///< Word ID of end-of-sentence silence
  short		maxwn;		///< Maximum number of %HMM states per word (statistic)
  short         maxwlen;	///< Maximum number of phones in a word (statistic)
  int		totalstatenum;  ///< Total number of HMM states
  int		totalmodelnum;  ///< Total number of models (phonemes)
  int		totaltransnum;  ///< Total number of state transitions
  boolean	*is_transparent; ///< TRUE if the word can be treated as transparent [wid]
  APATNODE	*errph_root; ///< Root node of index tree for gathering error %HMM name appeared when reading the dictionary
  BMALLOC_BASE *mroot;		///< Pointer for block memory allocation
} WORD_INFO;

WORD_INFO *word_info_new();
void word_info_free(WORD_INFO *winfo);
void winfo_init(WORD_INFO *winfo);
boolean winfo_expand(WORD_INFO *winfo);
boolean init_voca(WORD_INFO *winfo, char *filename, HTK_HMM_INFO *hmminfo, boolean, boolean);
boolean init_wordlist(WORD_INFO *winfo, char *filename, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone, boolean force_dict);
void voca_set_stats(WORD_INFO *winfo);
void voca_load_start(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv);
boolean voca_load_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo);
boolean voca_load_end(WORD_INFO *winfo);
boolean voca_load_htkdict(FILE *, WORD_INFO *, HTK_HMM_INFO *, boolean);
boolean	voca_load_htkdict_fp(FILE *, WORD_INFO *, HTK_HMM_INFO *, boolean);
boolean voca_append_htkdict(char *entry, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean ignore_tri_conv);
boolean voca_append(WORD_INFO *dstinfo, WORD_INFO *srcinfo, int coffset, int woffset);
boolean voca_load_htkdict_line(char *buf, WORD_ID *vnum, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag);

boolean voca_load_word_line(char *buf, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailpohone, char *contextphone);
boolean voca_load_wordlist(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone);
boolean voca_load_wordlist_fp(FILE *fp, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, char *headphone, char *tailphone, char *contextphone);
boolean voca_load_wordlist_line(char *buf, WORD_ID *vnum, int linenum, WORD_INFO *winfo, HTK_HMM_INFO *hmminfo, boolean do_conv, boolean *ok_flag, char *headphone, char *tailphone, char *contextphone);
boolean voca_mono2tri(WORD_INFO *winfo, HTK_HMM_INFO *hmminfo);
WORD_ID voca_lookup_wid(char *, WORD_INFO *);
WORD_ID *new_str2wordseq(WORD_INFO *, char *, int *);
char *cycle_triphone(char *p);
char *cycle_triphone_flush();
void print_voca_info(FILE *fp, WORD_INFO *);
void put_voca(FILE *fp, WORD_INFO *winfo, WORD_ID wid);

/* HMMList check */
boolean make_base_phone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo);
void print_phone_info(FILE *fp, HTK_HMM_INFO *hmminfo);
void print_all_basephone_detail(HMM_basephone *base);
void print_all_basephone_name(HMM_basephone *base);
void test_interword_triphone(HTK_HMM_INFO *hmminfo, WORD_INFO *winfo);

#endif /* __SENT_VOCA_H__ */