1 /*************************************************************************/
2 /*                                                                       */
3 /*                  Language Technologies Institute                      */
4 /*                     Carnegie Mellon University                        */
5 /*                        Copyright (c) 1999                             */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
34 /*               Date:  December 1999                                    */
35 /*************************************************************************/
36 /*                                                                       */
37 /*  Lexicon related functions                                            */
38 /*                                                                       */
39 /*************************************************************************/
40 #ifndef _CST_LEXICON_H__
41 #define _CST_LEXICON_H__
42 
43 #include "cst_item.h"
44 #include "cst_lts.h"
45 
46 typedef struct lexicon_struct {
47     const char *name;
48     int num_entries;
49     /* Entries are centered around bytes with value 255 */
50     /* entries and forward (compressed) pronunciations and backwards */
51     /* each are terminated (preceeded in pron case) by 0 */
52     /* This saves 4 bytes per entry for an index */
53     unsigned char *data; /* the entries and phone strings */
54     int num_bytes;       /* the number of bytes in the data */
55     char **phone_table;
56 
57     cst_lts_rules *lts_rule_set;
58 
59     int (*syl_boundary)(const cst_item *i,const cst_val *p);
60 
61     cst_val *(*lts_function)(const struct lexicon_struct *l, const char *word, const char *pos, const cst_features *feats);
62 
63     char ***addenda;
64     /* ngram frequency table used for packed entries */
65     const char * const *phone_hufftable;
66     const char * const *entry_hufftable;
67 
68     cst_utterance *(*postlex)(cst_utterance *u);
69 
70     cst_val *lex_addenda;  /* For pronunciations added at run time */
71 
72 } cst_lexicon;
73 
74 cst_lexicon *new_lexicon();
75 void delete_lexicon(cst_lexicon *lex);
76 
77 cst_val *cst_lex_make_entry(const cst_lexicon *lex,
78                             const cst_string *entry);
79 cst_val *cst_lex_load_addenda(const cst_lexicon *lex,
80                               const char *lexfile);
81 
82 cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos,
83                     const cst_features *feats);
84 int in_lex(const cst_lexicon *l, const char *word, const char *pos,
85            const cst_features *feats);
86 
87 CST_VAL_USER_TYPE_DCLS(lexicon,cst_lexicon)
88 
89 #endif
90