1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : August 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* There are just too many different versions of sgml-based mark up */
38 /* and none of them are stable so this is allows arbitrary */
39 /* of tags to Lisp functions so any of them can be implemented in Lisp */
40 /* That is people can worry about the actual content later and do not */
41 /* need to change the C++ */
42 /* */
43 /* Of course once I give you this functionality you'll just want more */
44 /* */
45 /*=======================================================================*/
46 #include "EST_unix.h"
47 #include "festival.h"
48 #include "text.h"
49 #include "lexicon.h"
50
51 static LISP xxml_get_attribute(const EST_String &remainder);
52 static char *xxml_process_line(const char *line);
53 static void tts_xxml_token(EST_Item *t);
54 static void tts_xxml_utt(LISP lutt);
55
56 static LISP xxml_word_features = NIL;
57 static LISP xxml_token_hooks = NIL;
58
tts_file_xxml(LISP filename)59 void tts_file_xxml(LISP filename)
60 {
61 // For stml ssml rsml and jsml etc
62 // filename contains *output* from something like nsgml
63 EST_String inname = get_c_string(filename);
64 EST_String line, type, remainder;
65 EST_TokenStream ts;
66 LISP atts, element_defs;
67 LISP utt = NIL; // for cummulation of tokens
68
69 if (ts.open(inname) == -1)
70 {
71 cerr << "xxml: unable to open output from SGML parser" << endl;
72 festival_error();
73 }
74 ts.set_WhiteSpaceChars(" \t\r\n");
75 ts.set_SingleCharSymbols("");
76 ts.set_PunctuationSymbols("");
77 ts.set_PrePunctuationSymbols("");
78
79 element_defs = siod_get_lval("xxml_elements",NULL);
80 atts = NIL;
81
82 if (ts.peek() != get_c_string(car(car(element_defs))))
83 {
84 cerr << "xxml parse error: " << get_c_string(filename) <<
85 " Expected " << get_c_string(car(car(element_defs)))
86 << " but found " << ts.peek() << endl;
87 festival_error();
88 }
89 while (ts.peek() != get_c_string(car(car(cdr(element_defs)))))
90 {
91 if (ts.eof())
92 {
93 cerr << "xxml parse error: unexpected end of file \n";
94 festival_error();
95 }
96 line = (EST_String)ts.get_upto_eoln();
97 type = line.at(0,1);
98 remainder = line.after(0);
99 if (type == "-")
100 { // Segments into utterances as it goes along
101 utt = xxml_get_tokens(remainder,
102 siod_get_lval("xxml_word_features",NULL),
103 utt);
104 }
105 else if (type == "A") // general attribute
106 {
107 atts = cons(xxml_get_attribute(remainder),atts);
108 }
109 else if ((type == "(") || (type == ")"))
110 {
111 utt = xxml_call_element_function(type+remainder,atts,
112 element_defs,utt);
113 atts = NIL;
114 }
115 else
116 {
117 cerr << "xxml parse error: unexpected token found "
118 << line << endl;
119 festival_error();
120 }
121 }
122 // Last call (should synthesize trailing tokens
123 utt = xxml_call_element_function(ts.get().string(),atts,element_defs,utt);
124
125 ts.close();
126 }
127
xxml_call_element_function(const EST_String & element,LISP atts,LISP elements,LISP utt)128 LISP xxml_call_element_function(const EST_String &element,
129 LISP atts, LISP elements, LISP utt)
130 {
131 // Form the call to the defined element function, with the attributes
132 // and the utterance, returns the utterance
133 LISP def,l;
134
135 def = siod_assoc_str(element,elements);
136
137 if (def != NIL)
138 {
139 // You get two arguments, ATTLIST and UTTERANCE
140 l = cons(
141 make_param_lisp("ATTLIST",
142 cons(rintern("quote"),cons(atts,NIL))),
143 cons(
144 make_param_lisp("UTT",
145 cons(rintern("quote"),cons(utt,NIL))),
146 NIL));
147 return leval(cons(rintern("let"),
148 cons(l,cdr(cdr(def)))),NIL);
149 }
150 else // no definition to do nothing
151 return utt;
152 }
153
xxml_get_attribute(const EST_String & remainder)154 static LISP xxml_get_attribute(const EST_String &remainder)
155 {
156 EST_TokenStream ts;
157 LISP tokens=NIL,att=NIL;
158 EST_String name;
159 EST_Token t;
160
161 ts.open_string(remainder);
162 name = (EST_String)ts.get();
163 if ((t=ts.get()) == "IMPLIED")
164 att = cons(rintern(name),cons(NIL,NIL));
165 else if (t == "TOKEN")
166 {
167 EST_Token v = ts.get();
168 att = cons(rintern(name),cons(cons(rintern(v.string()),NIL),NIL));
169 }
170 else if (t == "CDATA")
171 {
172 while (!ts.eof())
173 tokens = cons(rintern(ts.get().string()),tokens);
174 att = cons(rintern(name),cons(reverse(tokens),NIL));
175 }
176 else
177 {
178 cerr << "XXML: unknow attribute type " << remainder << endl;
179 festival_error();
180 }
181
182 ts.close();
183 return att;
184 }
185
xxml_process_line(const char * line)186 static char *xxml_process_line(const char *line)
187 {
188 // STML (sgml) data line have a number of special escape characters
189 // this undoes them, namely "\\n" to "\n"
190 char *procline = walloc(char,strlen(line)+1);
191 int i,j;
192
193 for (i=j=0; line[i] != '\0'; j++,i++)
194 {
195 if (line[i] == '\\')
196 {
197 i++;
198 if (line[i] == 'n')
199 procline[j] = '\n';
200 else if (line[i] == '\\')
201 procline[j] = '\\';
202 else if ((line[i] == '0') || // its an octal number
203 (line[i] == '1'))
204 {
205 int k,oct = 0;
206 for (k=0; k < 3; k++,i++)
207 oct = (oct*8)+(line[i]-'0');
208 procline[j] = oct;
209 i--;
210 }
211 else
212 {
213 procline[j] = line[i]; // no change
214 i--;
215 }
216 }
217 else
218 procline[j] = line[i]; // no change
219 }
220 procline[j] = '\0';
221 return procline;
222 }
223
tts_xxml_token(EST_Item * t)224 static void tts_xxml_token(EST_Item *t)
225 {
226 // Add xxml_word features to t
227 LISP a;
228
229 for (a=xxml_word_features; a != NIL; a=cdr(a))
230 if ((car(cdr(car(a))) != NIL) &&
231 (!streq("NAME",get_c_string(car(car(a))))))
232 {
233 if (cdr(cdr(car(a))) == NIL)
234 t->set(get_c_string(car(car(a))),
235 get_c_string(car(cdr(car(a)))));
236 else
237 {
238 // Its more complex than a single atom so save the list
239 t->set(get_c_string(car(car(a))),
240 siod_sprint(car(cdr(car(a)))));
241 }
242 }
243
244 apply_hooks(xxml_token_hooks,siod(t));
245 }
246
xxml_get_tokens(const EST_String & line,LISP feats,LISP utt)247 LISP xxml_get_tokens(const EST_String &line,LISP feats,LISP utt)
248 {
249 // Read from here until end of line collects all the tokens
250 // Note tokens are in reverse order until they are made into an
251 // utterance
252 EST_TokenStream ls;
253 EST_Token t;
254 LISP eou_tree;
255 char *processed_line;
256 processed_line = xxml_process_line(line);
257 ls.open_string(processed_line);
258 ls.set_SingleCharSymbols(
259 get_c_string(siod_get_lval("token.singlecharsymbols",
260 "token.singlecharsymbols unset")));
261 ls.set_PunctuationSymbols(
262 get_c_string(siod_get_lval("token.punctuation",
263 "token.punctuation unset")));
264 ls.set_PrePunctuationSymbols(
265 get_c_string(siod_get_lval("token.prepunctuation",
266 "token.prepunctuation unset")));
267 ls.set_WhiteSpaceChars(
268 get_c_string(siod_get_lval("token.whitespace",
269 "token.whitespace unset")));
270
271 eou_tree = siod_get_lval("eou_tree","No end of utterance tree set");
272
273 xxml_word_features = feats;
274 xxml_token_hooks = siod_get_lval("xxml_token_hooks",NULL);
275
276 // Segment and synth as much as appropriate
277 utt = tts_chunk_stream(ls,tts_xxml_token,tts_xxml_utt,eou_tree,utt);
278
279 return utt;
280 }
281
tts_xxml_utt(LISP lutt)282 static void tts_xxml_utt(LISP lutt)
283 {
284 // Build and utterance with these tokens and apply xxml synth function
285
286 if ((lutt == NIL) ||
287 (get_c_utt(lutt)->relation("Token")->length() == 0))
288 return; // in this case do nothing.
289
290 leval(cons(rintern("xxml_synth"),
291 cons(quote(lutt),NIL)),NIL);
292 }
293
294