1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author :  Alan W Black                                    */
34 /*             Date   :  June 1997                                       */
35 /*-----------------------------------------------------------------------*/
36 /*                                                                       */
37 /*  Probabilistic parser for (S)CFG                                      */
38 /*                                                                       */
39 /*=======================================================================*/
40 #include <cmath>
41 #include "festival.h"
42 #include "parser.h"
43 #include "EST_SCFG_Chart.h"
44 
FT_PParse_Generalized_Utt(LISP args,LISP env)45 LISP FT_PParse_Generalized_Utt(LISP args, LISP env) {
46   LISP utt;    // Utterance to parse
47   LISP gram;   // Grammar Name
48   LISP prel;   // Relation Name to parse over
49   LISP pfeat;  // Feature Name to parse over
50   LISP crel;   // Relation to create parse in
51 
52   args = leval(car(args), env);
53   utt = car(args);
54   gram = car(cdr(args));
55   prel = car(cdr(cdr(args)));
56   pfeat = car(cdr(cdr(cdr(args))));
57   crel = car(cdr(cdr(cdr(cdr(args)))));
58 
59   // Parse utt using items in prel, using features in pfeat, and store
60   // the parser output into crel
61 
62   EST_Utterance *u;
63   LISP rules;
64   const char *gram_name, *prel_name, *pfeat_name, *crel_name;
65 
66   u = get_c_utt(utt);
67   gram_name = get_c_string(gram);
68   prel_name = get_c_string(prel);
69   pfeat_name = get_c_string(pfeat);
70   crel_name = get_c_string(crel);
71 
72   rules = siod_get_lval(gram_name, NULL);
73   if (rules == NULL)
74     return utt;
75 
76   EST_SCFG grammar(rules);
77 
78   scfg_parse(u->relation(prel_name), pfeat_name,
79              u->create_relation(crel_name), grammar);
80 
81   return utt;
82 }
83 
FT_PParse_Utt(LISP utt)84 LISP FT_PParse_Utt(LISP utt)
85 {
86     // Parse Words (using part of speech tags) using given
87     // probabilistic grammar
88     EST_Utterance *u = get_c_utt(utt);
89     LISP rules;
90 
91     rules = siod_get_lval("scfg_grammar", NULL);
92     if (rules == NULL)
93 	return utt;
94 
95     EST_SCFG grammar(rules);
96 
97     scfg_parse(u->relation("Word"),"phr_pos",
98 	       u->create_relation("Syntax"),grammar);
99 
100     return utt;
101 }
102 
FT_MultiParse_Utt(LISP utt)103 LISP FT_MultiParse_Utt(LISP utt)
104 {
105     // You give them a parser and they just want more ...
106     // Because in some modes utterance may contain multiple sentences
107     // and the grammars we have only have only deal in more
108     // traditional sentences this tries to split the utterance into
109     // sentences and parse them individualls and add them to
110     // a single Syntax relation as a list of trees.
111     EST_Utterance *u = get_c_utt(utt);
112     LISP rules, eos_tree;
113     EST_Item *s,*e,*st,*et;
114 
115     rules = siod_get_lval("scfg_grammar", NULL);
116     if (rules == NULL)
117 	return utt;
118     eos_tree = siod_get_lval("scfg_eos_tree",NULL);
119     u->create_relation("Syntax");
120     EST_SCFG_Chart chart;
121     chart.set_grammar_rules(rules);
122 
123     for (st=u->relation("Token")->head(); st; st = st->next())
124     {
125 	for (et=st->next(); et; et=et->next())
126 	    if (wagon_predict(et,eos_tree) != 0)
127 		break;
128 	// Now find related words
129 	s = first_leaf(st)->as_relation("Word");
130 	e = first_leaf(et->next())->as_relation("Word");
131 	chart.setup_wfst(s,e,"phr_pos");
132 	chart.parse();
133 	chart.extract_parse(u->relation("Syntax"),s,e,TRUE);
134 	st = et;
135     }
136 
137     return utt;
138 }
139 
MultiParse(EST_Utterance & u)140 void MultiParse(EST_Utterance &u)
141 {
142     // You give them a parser and they just want more ...
143     // Because in some modes utterance may contain multiple sentences
144     // and the grammars we have only have only deal in more
145     // traditional sentences this tries to split the utterance into
146     // sentences and parse them individualls and add them to
147     // a single Syntax release as a list of trees.
148     LISP rules, eos_tree;
149     EST_Item *s, *w;
150 
151     rules = siod_get_lval("scfg_grammar", NULL);
152     if (rules == NULL)
153 	EST_error("Couldn't find grammar rules\n");
154     eos_tree = siod_get_lval("scfg_eos_tree",NULL);
155     u.create_relation("Syntax");
156     EST_SCFG_Chart chart;
157     chart.set_grammar_rules(rules);
158 
159     // produce a parse wherever there is a sentence end marker or
160     // the end of utterance.
161 
162     for (w = s = u.relation("Word")->head(); w; w = w->next())
163 	if (w->f_present("sentence_end") || (w->next() == 0))
164 	{
165 	    chart.setup_wfst(s, w->next(), "phr_pos");
166 	    chart.parse();
167 	    chart.extract_parse(u.relation("Syntax"), s, w->next(), TRUE);
168 	    s = w->next();
169 	}
170 }
171 
festival_parser_init(void)172 void festival_parser_init(void)
173 {
174     proclaim_module("parser");
175 
176     festival_def_utt_module("ProbParse",FT_PParse_Utt,
177     "(ProbParse UTT)\n\
178   Parse part of speech tags in Word relation.  Loads the grammar \n\
179   from scfg_grammar_filename and saves the best parse\n\
180   in the Syntax Relation.");
181 
182     init_fsubr("ProbParseGeneralized", FT_PParse_Generalized_Utt,
183                 "(ProbParseGeneralized (list utt gram prel pfeat crel))\n"
184                 "Parse utt over the prel relation using its pfeat feature\n"
185                 "Load grammar from gram, and save parse in relation crel");
186 
187     festival_def_utt_module("MultiProbParse",FT_MultiParse_Utt,
188     "(MultiProbParse UTT)\n\
189   Parse part of speech tags in Word relation.  Unlike ProbParse this \n\
190   allows multiple sentences to appear in the one utterance.  The CART \n\
191   tree in eos_tree is used to define end of sentence.  Loads the \n\
192   grammar from scfg_grammar_filename and saves the best parse\n\
193   in the Syntax Relation.");
194 }
195