1 /*************************************************************************/ 2 /* */ 3 /* Language Technologies Institute */ 4 /* Carnegie Mellon University */ 5 /* Copyright (c) 1999 */ 6 /* All Rights Reserved. */ 7 /* */ 8 /* Permission is hereby granted, free of charge, to use and distribute */ 9 /* this software and its documentation without restriction, including */ 10 /* without limitation the rights to use, copy, modify, merge, publish, */ 11 /* distribute, sublicense, and/or sell copies of this work, and to */ 12 /* permit persons to whom this work is furnished to do so, subject to */ 13 /* the following conditions: */ 14 /* 1. The code must retain the above copyright notice, this list of */ 15 /* conditions and the following disclaimer. */ 16 /* 2. Any modifications must be clearly marked as such. */ 17 /* 3. Original authors' names are not deleted. */ 18 /* 4. The authors' names are not used to endorse or promote products */ 19 /* derived from this software without specific prior written */ 20 /* permission. */ 21 /* */ 22 /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */ 23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */ 24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */ 25 /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */ 26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */ 27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */ 28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */ 29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */ 30 /* THIS SOFTWARE. */ 31 /* */ 32 /*************************************************************************/ 33 /* Author: Alan W Black (awb@cs.cmu.edu) */ 34 /* Date: December 1999 */ 35 /*************************************************************************/ 36 /* */ 37 /* Lexicon related functions */ 38 /* */ 39 /*************************************************************************/ 40 #ifndef _CST_LEXICON_H__ 41 #define _CST_LEXICON_H__ 42 43 #include "cst_item.h" 44 #include "cst_lts.h" 45 46 typedef struct lexicon_struct { 47 const char *name; 48 int num_entries; 49 /* Entries are centered around bytes with value 255 */ 50 /* entries and forward (compressed) pronunciations and backwards */ 51 /* each are terminated (preceeded in pron case) by 0 */ 52 /* This saves 4 bytes per entry for an index */ 53 unsigned char *data; /* the entries and phone strings */ 54 int num_bytes; /* the number of bytes in the data */ 55 char **phone_table; 56 57 cst_lts_rules *lts_rule_set; 58 59 int (*syl_boundary)(const cst_item *i,const cst_val *p); 60 61 cst_val *(*lts_function)(const struct lexicon_struct *l, const char *word, const char *pos, const cst_features *feats); 62 63 char ***addenda; 64 /* ngram frequency table used for packed entries */ 65 const char * const *phone_hufftable; 66 const char * const *entry_hufftable; 67 68 cst_utterance *(*postlex)(cst_utterance *u); 69 70 cst_val *lex_addenda; /* For pronunciations added at run time */ 71 72 } cst_lexicon; 73 74 cst_lexicon *new_lexicon(); 75 void delete_lexicon(cst_lexicon *lex); 76 77 cst_val *cst_lex_make_entry(const cst_lexicon *lex, 78 const cst_string *entry); 79 cst_val *cst_lex_load_addenda(const cst_lexicon *lex, 80 const char *lexfile); 81 82 cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos, 83 const cst_features *feats); 84 int in_lex(const cst_lexicon *l, const char *word, const char *pos, 85 const cst_features *feats); 86 87 CST_VAL_USER_TYPE_DCLS(lexicon,cst_lexicon) 88 89 #endif 90