1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : June 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* Probabilistic parser for (S)CFG */
38 /* */
39 /*=======================================================================*/
40 #include <cmath>
41 #include "festival.h"
42 #include "parser.h"
43 #include "EST_SCFG_Chart.h"
44
FT_PParse_Generalized_Utt(LISP args,LISP env)45 LISP FT_PParse_Generalized_Utt(LISP args, LISP env) {
46 LISP utt; // Utterance to parse
47 LISP gram; // Grammar Name
48 LISP prel; // Relation Name to parse over
49 LISP pfeat; // Feature Name to parse over
50 LISP crel; // Relation to create parse in
51
52 args = leval(car(args), env);
53 utt = car(args);
54 gram = car(cdr(args));
55 prel = car(cdr(cdr(args)));
56 pfeat = car(cdr(cdr(cdr(args))));
57 crel = car(cdr(cdr(cdr(cdr(args)))));
58
59 // Parse utt using items in prel, using features in pfeat, and store
60 // the parser output into crel
61
62 EST_Utterance *u;
63 LISP rules;
64 const char *gram_name, *prel_name, *pfeat_name, *crel_name;
65
66 u = get_c_utt(utt);
67 gram_name = get_c_string(gram);
68 prel_name = get_c_string(prel);
69 pfeat_name = get_c_string(pfeat);
70 crel_name = get_c_string(crel);
71
72 rules = siod_get_lval(gram_name, NULL);
73 if (rules == NULL)
74 return utt;
75
76 EST_SCFG grammar(rules);
77
78 scfg_parse(u->relation(prel_name), pfeat_name,
79 u->create_relation(crel_name), grammar);
80
81 return utt;
82 }
83
FT_PParse_Utt(LISP utt)84 LISP FT_PParse_Utt(LISP utt)
85 {
86 // Parse Words (using part of speech tags) using given
87 // probabilistic grammar
88 EST_Utterance *u = get_c_utt(utt);
89 LISP rules;
90
91 rules = siod_get_lval("scfg_grammar", NULL);
92 if (rules == NULL)
93 return utt;
94
95 EST_SCFG grammar(rules);
96
97 scfg_parse(u->relation("Word"),"phr_pos",
98 u->create_relation("Syntax"),grammar);
99
100 return utt;
101 }
102
FT_MultiParse_Utt(LISP utt)103 LISP FT_MultiParse_Utt(LISP utt)
104 {
105 // You give them a parser and they just want more ...
106 // Because in some modes utterance may contain multiple sentences
107 // and the grammars we have only have only deal in more
108 // traditional sentences this tries to split the utterance into
109 // sentences and parse them individualls and add them to
110 // a single Syntax relation as a list of trees.
111 EST_Utterance *u = get_c_utt(utt);
112 LISP rules, eos_tree;
113 EST_Item *s,*e,*st,*et;
114
115 rules = siod_get_lval("scfg_grammar", NULL);
116 if (rules == NULL)
117 return utt;
118 eos_tree = siod_get_lval("scfg_eos_tree",NULL);
119 u->create_relation("Syntax");
120 EST_SCFG_Chart chart;
121 chart.set_grammar_rules(rules);
122
123 for (st=u->relation("Token")->head(); st; st = st->next())
124 {
125 for (et=st->next(); et; et=et->next())
126 if (wagon_predict(et,eos_tree) != 0)
127 break;
128 // Now find related words
129 s = first_leaf(st)->as_relation("Word");
130 e = first_leaf(et->next())->as_relation("Word");
131 chart.setup_wfst(s,e,"phr_pos");
132 chart.parse();
133 chart.extract_parse(u->relation("Syntax"),s,e,TRUE);
134 st = et;
135 }
136
137 return utt;
138 }
139
MultiParse(EST_Utterance & u)140 void MultiParse(EST_Utterance &u)
141 {
142 // You give them a parser and they just want more ...
143 // Because in some modes utterance may contain multiple sentences
144 // and the grammars we have only have only deal in more
145 // traditional sentences this tries to split the utterance into
146 // sentences and parse them individualls and add them to
147 // a single Syntax release as a list of trees.
148 LISP rules, eos_tree;
149 EST_Item *s, *w;
150
151 rules = siod_get_lval("scfg_grammar", NULL);
152 if (rules == NULL)
153 EST_error("Couldn't find grammar rules\n");
154 eos_tree = siod_get_lval("scfg_eos_tree",NULL);
155 u.create_relation("Syntax");
156 EST_SCFG_Chart chart;
157 chart.set_grammar_rules(rules);
158
159 // produce a parse wherever there is a sentence end marker or
160 // the end of utterance.
161
162 for (w = s = u.relation("Word")->head(); w; w = w->next())
163 if (w->f_present("sentence_end") || (w->next() == 0))
164 {
165 chart.setup_wfst(s, w->next(), "phr_pos");
166 chart.parse();
167 chart.extract_parse(u.relation("Syntax"), s, w->next(), TRUE);
168 s = w->next();
169 }
170 }
171
festival_parser_init(void)172 void festival_parser_init(void)
173 {
174 proclaim_module("parser");
175
176 festival_def_utt_module("ProbParse",FT_PParse_Utt,
177 "(ProbParse UTT)\n\
178 Parse part of speech tags in Word relation. Loads the grammar \n\
179 from scfg_grammar_filename and saves the best parse\n\
180 in the Syntax Relation.");
181
182 init_fsubr("ProbParseGeneralized", FT_PParse_Generalized_Utt,
183 "(ProbParseGeneralized (list utt gram prel pfeat crel))\n"
184 "Parse utt over the prel relation using its pfeat feature\n"
185 "Load grammar from gram, and save parse in relation crel");
186
187 festival_def_utt_module("MultiProbParse",FT_MultiParse_Utt,
188 "(MultiProbParse UTT)\n\
189 Parse part of speech tags in Word relation. Unlike ProbParse this \n\
190 allows multiple sentences to appear in the one utterance. The CART \n\
191 tree in eos_tree is used to define end of sentence. Loads the \n\
192 grammar from scfg_grammar_filename and saves the best parse\n\
193 in the Syntax Relation.");
194 }
195