1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author :  Alan W Black                                    */
34 /*             Date   :  August 1997                                     */
35 /*-----------------------------------------------------------------------*/
36 /*                                                                       */
37 /*  There are just too many different versions of sgml-based mark up     */
38 /*  and none of them are stable so this is allows arbitrary              */
39 /*  of tags to Lisp functions so any of them can be implemented in Lisp  */
40 /*  That is people can worry about the actual content later and do not   */
41 /*  need to change the C++                                               */
42 /*                                                                       */
43 /*  Of course once I give you this functionality you'll just want more   */
44 /*                                                                       */
45 /*=======================================================================*/
46 #include "EST_unix.h"
47 #include "festival.h"
48 #include "text.h"
49 #include "lexicon.h"
50 
51 static LISP xxml_get_attribute(const EST_String &remainder);
52 static char *xxml_process_line(const char *line);
53 static void tts_xxml_token(EST_Item *t);
54 static void tts_xxml_utt(LISP lutt);
55 
56 static LISP xxml_word_features = NIL;
57 static LISP xxml_token_hooks = NIL;
58 
tts_file_xxml(LISP filename)59 void tts_file_xxml(LISP filename)
60 {
61     // For stml ssml rsml and jsml etc
62     // filename contains *output* from something like nsgml
63     EST_String inname = get_c_string(filename);
64     EST_String line, type, remainder;
65     EST_TokenStream ts;
66     LISP atts, element_defs;
67     LISP utt = NIL;   // for cummulation of tokens
68 
69     if (ts.open(inname) == -1)
70     {
71 	cerr << "xxml: unable to open output from SGML parser" << endl;
72 	festival_error();
73     }
74     ts.set_WhiteSpaceChars(" \t\r\n");
75     ts.set_SingleCharSymbols("");
76     ts.set_PunctuationSymbols("");
77     ts.set_PrePunctuationSymbols("");
78 
79     element_defs = siod_get_lval("xxml_elements",NULL);
80     atts = NIL;
81 
82     if (ts.peek() != get_c_string(car(car(element_defs))))
83     {
84 	cerr << "xxml parse error: " << get_c_string(filename) <<
85 	    " Expected " << get_c_string(car(car(element_defs)))
86 		<< " but found " << ts.peek() << endl;
87 	festival_error();
88     }
89     while (ts.peek() != get_c_string(car(car(cdr(element_defs)))))
90     {
91 	if (ts.eof())
92 	{
93 	    cerr << "xxml parse error: unexpected end of file \n";
94 	    festival_error();
95 	}
96 	line = (EST_String)ts.get_upto_eoln();
97 	type = line.at(0,1);
98 	remainder = line.after(0);
99 	if (type == "-")
100 	{   //  Segments into utterances as it goes along
101 	    utt = xxml_get_tokens(remainder,
102 				  siod_get_lval("xxml_word_features",NULL),
103 				  utt);
104 	}
105 	else if (type == "A")        // general attribute
106 	{
107 	    atts = cons(xxml_get_attribute(remainder),atts);
108 	}
109 	else if ((type == "(") || (type == ")"))
110 	{
111 	    utt = xxml_call_element_function(type+remainder,atts,
112 					     element_defs,utt);
113 	    atts = NIL;
114 	}
115 	else
116 	{
117 	    cerr << "xxml parse error: unexpected token found "
118 		<< line << endl;
119 	    festival_error();
120 	}
121     }
122     // Last call (should synthesize trailing tokens
123     utt = xxml_call_element_function(ts.get().string(),atts,element_defs,utt);
124 
125     ts.close();
126 }
127 
xxml_call_element_function(const EST_String & element,LISP atts,LISP elements,LISP utt)128 LISP xxml_call_element_function(const EST_String &element,
129 				LISP atts, LISP elements, LISP utt)
130 {
131     // Form the call to the defined element function, with the attributes
132     // and the utterance, returns the utterance
133     LISP def,l;
134 
135     def = siod_assoc_str(element,elements);
136 
137     if (def != NIL)
138     {
139 	// You get two arguments, ATTLIST and UTTERANCE
140 	l = cons(
141 	    make_param_lisp("ATTLIST",
142 			    cons(rintern("quote"),cons(atts,NIL))),
143 	    cons(
144 	    make_param_lisp("UTT",
145 			    cons(rintern("quote"),cons(utt,NIL))),
146 		 NIL));
147 	return leval(cons(rintern("let"),
148 			  cons(l,cdr(cdr(def)))),NIL);
149     }
150     else   // no definition to do nothing
151 	return utt;
152 }
153 
xxml_get_attribute(const EST_String & remainder)154 static LISP xxml_get_attribute(const EST_String &remainder)
155 {
156     EST_TokenStream ts;
157     LISP tokens=NIL,att=NIL;
158     EST_String name;
159     EST_Token t;
160 
161     ts.open_string(remainder);
162     name = (EST_String)ts.get();
163     if ((t=ts.get()) == "IMPLIED")
164 	att = cons(rintern(name),cons(NIL,NIL));
165     else if (t == "TOKEN")
166     {
167 	EST_Token v = ts.get();
168 	att = cons(rintern(name),cons(cons(rintern(v.string()),NIL),NIL));
169     }
170     else if (t == "CDATA")
171     {
172 	while (!ts.eof())
173 	    tokens = cons(rintern(ts.get().string()),tokens);
174 	att = cons(rintern(name),cons(reverse(tokens),NIL));
175     }
176     else
177     {
178 	cerr << "XXML: unknow attribute type " << remainder << endl;
179 	festival_error();
180     }
181 
182     ts.close();
183     return att;
184 }
185 
xxml_process_line(const char * line)186 static char *xxml_process_line(const char *line)
187 {
188     // STML (sgml) data line have a number of special escape characters
189     // this undoes them, namely "\\n" to "\n"
190     char *procline = walloc(char,strlen(line)+1);
191     int i,j;
192 
193     for (i=j=0; line[i] != '\0'; j++,i++)
194     {
195 	if (line[i] == '\\')
196 	{
197 	    i++;
198 	    if (line[i] == 'n')
199 		procline[j] = '\n';
200 	    else if (line[i] == '\\')
201 		procline[j] = '\\';
202 	    else if ((line[i] == '0') || // its an octal number
203 		     (line[i] == '1'))
204 	    {
205 		int k,oct = 0;
206 		for (k=0; k < 3; k++,i++)
207 		    oct = (oct*8)+(line[i]-'0');
208 		procline[j] = oct;
209 		i--;
210 	    }
211 	    else
212 	    {
213 		procline[j] = line[i]; // no change
214 		i--;
215 	    }
216 	}
217 	else
218 	    procline[j] = line[i]; // no change
219     }
220     procline[j] = '\0';
221     return procline;
222 }
223 
tts_xxml_token(EST_Item * t)224 static void tts_xxml_token(EST_Item *t)
225 {
226     // Add xxml_word features to t
227     LISP a;
228 
229     for (a=xxml_word_features; a != NIL; a=cdr(a))
230 	if ((car(cdr(car(a))) != NIL) &&
231 	    (!streq("NAME",get_c_string(car(car(a))))))
232 	{
233 	    if (cdr(cdr(car(a))) == NIL)
234 		t->set(get_c_string(car(car(a))),
235 		       get_c_string(car(cdr(car(a)))));
236 	    else
237 	    {
238 		// Its more complex than a single atom so save the list
239 		t->set(get_c_string(car(car(a))),
240 		       siod_sprint(car(cdr(car(a)))));
241 	    }
242 	}
243 
244     apply_hooks(xxml_token_hooks,siod(t));
245 }
246 
xxml_get_tokens(const EST_String & line,LISP feats,LISP utt)247 LISP xxml_get_tokens(const EST_String &line,LISP feats,LISP utt)
248 {
249     // Read from here until end of line collects all the tokens
250     // Note tokens are in reverse order until they are made into an
251     // utterance
252     EST_TokenStream ls;
253     EST_Token t;
254     LISP eou_tree;
255     char *processed_line;
256     processed_line = xxml_process_line(line);
257     ls.open_string(processed_line);
258     ls.set_SingleCharSymbols(
259         get_c_string(siod_get_lval("token.singlecharsymbols",
260 				   "token.singlecharsymbols unset")));
261     ls.set_PunctuationSymbols(
262         get_c_string(siod_get_lval("token.punctuation",
263 				   "token.punctuation unset")));
264     ls.set_PrePunctuationSymbols(
265         get_c_string(siod_get_lval("token.prepunctuation",
266 				   "token.prepunctuation unset")));
267     ls.set_WhiteSpaceChars(
268         get_c_string(siod_get_lval("token.whitespace",
269 				   "token.whitespace unset")));
270 
271     eou_tree = siod_get_lval("eou_tree","No end of utterance tree set");
272 
273     xxml_word_features = feats;
274     xxml_token_hooks = siod_get_lval("xxml_token_hooks",NULL);
275 
276     // Segment and synth as much as appropriate
277     utt = tts_chunk_stream(ls,tts_xxml_token,tts_xxml_utt,eou_tree,utt);
278 
279     return utt;
280 }
281 
tts_xxml_utt(LISP lutt)282 static void tts_xxml_utt(LISP lutt)
283 {
284     // Build and utterance with these tokens and apply xxml synth function
285 
286     if ((lutt == NIL) ||
287 	(get_c_utt(lutt)->relation("Token")->length() == 0))
288 	return;   // in this case do nothing.
289 
290     leval(cons(rintern("xxml_synth"),
291 	       cons(quote(lutt),NIL)),NIL);
292 }
293 
294