1 /*************************************************************************/
2 /*                                                                       */
3 /*                  Language Technologies Institute                      */
4 /*                     Carnegie Mellon University                        */
5 /*                         Copyright (c) 2000                            */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
34 /*               Date:  September 2000                                   */
35 /*************************************************************************/
36 /*                                                                       */
37 /*  General synthesis control                                            */
38 /*                                                                       */
39 /*************************************************************************/
40 
41 #include "cst_hrg.h"
42 #include "cst_cart.h"
43 #include "cst_tokenstream.h"
44 #include "cst_utt_utils.h"
45 #include "cst_lexicon.h"
46 #include "cst_units.h"
47 #include "cst_synth.h"
48 #include "cst_phoneset.h"
49 
50 CST_VAL_REGISTER_FUNCPTR(breakfunc,cst_breakfunc)
51 
52 #ifndef SYNTH_MODULES_DEBUG
53 #define SYNTH_MODULES_DEBUG 0
54 #endif
55 
56 #if SYNTH_MODULES_DEBUG > 0
57 #define DPRINTF(l,x) if (SYNTH_MODULES_DEBUG > l) cst_dbgmsg x
58 #else
59 #define DPRINTF(l,x)
60 #endif
61 
62 static cst_utterance *tokentosegs(cst_utterance *u);
63 
64 static const cst_synth_module synth_method_text[] = {
65     { "tokenizer_func", default_tokenization },
66     { "textanalysis_func", default_textanalysis },
67     { "pos_tagger_func", default_pos_tagger },
68     { "phrasing_func", default_phrasing },
69     { "lexical_insertion_func", default_lexical_insertion },
70     { "pause_insertion_func", default_pause_insertion },
71     { "intonation_func", cart_intonation },
72     { "postlex_func", NULL },
73     { "duration_model_func", cart_duration },
74     { "f0_model_func", NULL },
75     { "wave_synth_func", NULL },
76     { "post_synth_hook_func", NULL },
77     { NULL, NULL }
78 };
79 
80 static const cst_synth_module synth_method_text2segs[] = {
81     { "tokenizer_func", default_tokenization },
82     { "textanalysis_func", default_textanalysis },
83     { "pos_tagger_func", default_pos_tagger },
84     { "phrasing_func", default_phrasing },
85     { "lexical_insertion_func", default_lexical_insertion },
86     { "pause_insertion_func", default_pause_insertion },
87     { NULL, NULL }
88 };
89 
90 static const cst_synth_module synth_method_tokens[] = {
91     { "textanalysis_func", default_textanalysis },
92     { "pos_tagger_func", default_pos_tagger },
93     { "phrasing_func", default_phrasing },
94     { "lexical_insertion_func", default_lexical_insertion },
95     { "pause_insertion_func", default_pause_insertion },
96     { "intonation_func", cart_intonation },
97     { "postlex_func", NULL },
98     { "duration_model_func", cart_duration },
99     { "f0_model_func", NULL },
100     { "wave_synth_func", NULL },
101     { "post_synth_hook_func", NULL },
102     { NULL, NULL }
103 };
104 
105 static const cst_synth_module synth_method_phones[] = {
106     { "tokenizer_func", default_tokenization },
107     { "textanalysis_func", tokentosegs },
108     { "pos_tagger_func", default_pos_tagger },
109     { "intonation_func", NULL },
110     { "duration_model_func", cart_duration },
111     { "f0_model_func", flat_prosody },
112     { "wave_synth_func", NULL },
113     { "post_synth_hook_func", NULL },
114     { NULL, NULL }
115 };
116 
utt_synth_wave(cst_wave * w,cst_voice * v)117 cst_utterance *utt_synth_wave(cst_wave *w,cst_voice *v)
118 {
119     /* Create an utterance with a wave in it as if we've synthesized it */
120     /* Put it through streaming if that is require */
121     cst_utterance *u;
122     const cst_val *streaming_info_val;
123     cst_audio_streaming_info *asi = NULL;
124 
125     u = new_utterance();
126     utt_init(u,v);
127     utt_set_wave(u,w);
128 
129     streaming_info_val=get_param_val(u->features,"streaming_info",NULL);
130     if (streaming_info_val)
131     {
132         asi = val_audio_streaming_info(streaming_info_val);
133         asi->utt = u;
134     }
135 
136     if (!asi) return u;  /* no stream */
137 
138     /* Do streaming */
139     (*asi->asc)(w,0,w->num_samples,1,asi);
140 
141     return u;
142 }
143 
apply_synth_module(cst_utterance * u,const cst_synth_module * mod)144 cst_utterance *apply_synth_module(cst_utterance *u,
145 				  const cst_synth_module *mod)
146 {
147     const cst_val *v;
148 
149     v = feat_val(u->features, mod->hookname);
150     if (v)
151 	return (*val_uttfunc(v))(u);
152     if (mod->defhook)
153 	return (*mod->defhook)(u);
154     return u;
155 }
156 
apply_synth_method(cst_utterance * u,const cst_synth_module meth[])157 cst_utterance *apply_synth_method(cst_utterance *u,
158 				  const cst_synth_module meth[])
159 {
160     while (meth->hookname)
161     {
162 	if ((u = apply_synth_module(u, meth)) == NULL)
163 	    return NULL;
164 	++meth;
165     }
166 
167     return u;
168 }
169 
utt_init(cst_utterance * u,cst_voice * vox)170 cst_utterance *utt_init(cst_utterance *u, cst_voice *vox)
171 {
172     /* Link the vox features into the utterance features so the voice  */
173     /* features will be searched too (after the utt ones)              */
174     feat_link_into(vox->features,u->features);
175     feat_link_into(vox->ffunctions,u->ffunctions);
176 
177     /* Do the initialization function, if there is one */
178     if (vox->utt_init)
179 	vox->utt_init(u, vox);
180 
181     return u;
182 }
183 
utt_synth(cst_utterance * u)184 cst_utterance *utt_synth(cst_utterance *u)
185 {
186     return apply_synth_method(u, synth_method_text);
187 }
188 
utt_synth_tokens(cst_utterance * u)189 cst_utterance *utt_synth_tokens(cst_utterance *u)
190 {
191     return apply_synth_method(u, synth_method_tokens);
192 }
193 
utt_synth_text2segs(cst_utterance * u)194 cst_utterance *utt_synth_text2segs(cst_utterance *u)
195 {
196     return apply_synth_method(u, synth_method_text2segs);
197 }
198 
utt_synth_phones(cst_utterance * u)199 cst_utterance *utt_synth_phones(cst_utterance *u)
200 {
201     return apply_synth_method(u, synth_method_phones);
202 }
203 
default_tokenization(cst_utterance * u)204 cst_utterance *default_tokenization(cst_utterance *u)
205 {
206     const char *text,*token;
207     cst_tokenstream *fd;
208     cst_item *t;
209     cst_relation *r;
210 
211     text = utt_input_text(u);
212     r = utt_relation_create(u,"Token");
213     fd = ts_open_string(text,
214 	get_param_string(u->features,"text_whitespace",NULL),
215 	get_param_string(u->features,"text_singlecharsymbols",NULL),
216 	get_param_string(u->features,"text_prepunctuation",NULL),
217         get_param_string(u->features,"text_postpunctuation",NULL));
218 
219     while(!ts_eof(fd))
220     {
221 	token = ts_get(fd);
222 	if (cst_strlen(token) > 0)
223 	{
224 	    t = relation_append(r,NULL);
225 	    item_set_string(t,"name",token);
226 	    item_set_string(t,"whitespace",fd->whitespace);
227 	    item_set_string(t,"prepunctuation",fd->prepunctuation);
228 	    item_set_string(t,"punc",fd->postpunctuation);
229 	    item_set_int(t,"file_pos",fd->file_pos);
230 	    item_set_int(t,"line_number",fd->line_number);
231 	}
232     }
233 
234     ts_close(fd);
235 
236     return u;
237 }
238 
default_tokentowords(cst_item * i)239 cst_val *default_tokentowords(cst_item *i)
240 {
241     return cons_val(string_val(item_feat_string(i,"name")), NULL);
242 }
243 
default_textanalysis(cst_utterance * u)244 cst_utterance *default_textanalysis(cst_utterance *u)
245 {
246     cst_item *t,*word;
247     cst_relation *word_rel;
248     cst_val *words;
249     const cst_val *w;
250     const cst_val *ttwv;
251 
252     word_rel = utt_relation_create(u,"Word");
253     ttwv = feat_val(u->features, "tokentowords_func");
254 
255     for (t=relation_head(utt_relation(u,"Token")); t; t=item_next(t))
256     {
257 	if (ttwv)
258 	    words = (cst_val *)(*val_itemfunc(ttwv))(t);
259 	else
260 	    words = default_tokentowords(t);
261 
262 	for (w=words; w; w=val_cdr(w))
263 	{
264 	    word = item_add_daughter(t,NULL);
265 	    if (cst_val_consp(val_car(w)))
266 	    {   /* Has extra features */
267 		item_set_string(word,"name",val_string(val_car(val_car(w))));
268 		feat_copy_into(val_features(val_cdr(val_car(w))),
269 			       item_feats(word));
270 	    }
271 	    else
272 		item_set_string(word,"name",val_string(val_car(w)));
273 	    relation_append(word_rel,word);
274 	}
275 	delete_val(words);
276     }
277 
278     return u;
279 }
280 
default_phrasing(cst_utterance * u)281 cst_utterance *default_phrasing(cst_utterance *u)
282 {
283     cst_relation *r;
284     cst_item *w, *p, *lp=NULL;
285     const cst_val *v;
286     cst_cart *phrasing_cart;
287 
288     r = utt_relation_create(u,"Phrase");
289     if (feat_present(u->features,"phrasing_cart"))
290         phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart"));
291     else
292         phrasing_cart = NULL;
293 
294     for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w))
295     {
296 	if (p == NULL)
297 	{
298 	    p = relation_append(r,NULL);
299             lp = p;
300             item_set_string(p,"name","B");
301 	}
302 	item_add_daughter(p,w);
303         if (phrasing_cart)
304         {
305             v = cart_interpret(w,phrasing_cart);
306             if (cst_streq(val_string(v),"BB"))
307                 p = NULL;
308         }
309     }
310 
311     if (lp && item_prev(lp)) /* follow festival */
312         item_set_string(lp,"name","BB");
313 
314     return u;
315 }
316 
default_pause_insertion(cst_utterance * u)317 cst_utterance *default_pause_insertion(cst_utterance *u)
318 {
319     /* Add initial silences and silence at each phrase break */
320     const char *silence;
321     const cst_item *w;
322     cst_item *p, *s;
323 
324     silence = val_string(feat_val(u->features,"silence"));
325 
326     /* Insert initial silence */
327     s = relation_head(utt_relation(u,"Segment"));
328     if (s == NULL)
329 	s = relation_append(utt_relation(u,"Segment"),NULL);
330     else
331 	s = item_prepend(s,NULL);
332     item_set_string(s,"name",silence);
333 
334     for (p=relation_head(utt_relation(u,"Phrase")); p; p=item_next(p))
335     {
336 	for (w = item_last_daughter(p); w; w=item_prev(w))
337 	{
338 	    s = path_to_item(w,"R:SylStructure.daughtern.daughtern.R:Segment");
339 	    if (s)
340 	    {
341 		s = item_append(s,NULL);
342 		item_set_string(s,"name",silence);
343 		break;
344 	    }
345 	}
346     }
347 
348     return u;
349 }
350 
cart_intonation(cst_utterance * u)351 cst_utterance *cart_intonation(cst_utterance *u)
352 {
353     cst_cart *accents, *tones;
354     cst_item *s;
355     const cst_val *v;
356 
357     if (feat_present(u->features,"no_intonation_accent_model"))
358         return u;  /* not all languages have intonation models */
359 
360     accents = val_cart(feat_val(u->features,"int_cart_accents"));
361     tones = val_cart(feat_val(u->features,"int_cart_tones"));
362 
363     for (s=relation_head(utt_relation(u,"Syllable")); s; s=item_next(s))
364     {
365 	v = cart_interpret(s,accents);
366 	if (!cst_streq("NONE",val_string(v)))
367 	    item_set_string(s,"accent",val_string(v));
368 	v = cart_interpret(s,tones);
369 	if (!cst_streq("NONE",val_string(v)))
370 	    item_set_string(s,"endtone",val_string(v));
371 	DPRINTF(0,("word %s gpos %s stress %s ssyl_in %s ssyl_out %s accent %s endtone %s\n",
372 		   ffeature_string(s,"R:SylStructure.parent.name"),
373 		   ffeature_string(s,"R:SylStructure.parent.gpos"),
374 		   ffeature_string(s,"stress"),
375 		   ffeature_string(s,"ssyl_in"),
376 		   ffeature_string(s,"ssyl_out"),
377 		   ffeature_string(s,"accent"),
378 		   ffeature_string(s,"endtone")));
379     }
380 
381     return u;
382 }
383 
CST_VAL_REGISTER_TYPE_NODEL(dur_stats,dur_stats)384 CST_VAL_REGISTER_TYPE_NODEL(dur_stats,dur_stats)
385 
386 const dur_stat *phone_dur_stat(const dur_stats *ds,const char *ph)
387 {
388     int i;
389     for (i=0; ds[i]; i++)
390 	if (cst_streq(ph,ds[i]->phone))
391             return ds[i];
392 
393     return ds[0];
394 }
395 
cart_duration(cst_utterance * u)396 cst_utterance *cart_duration(cst_utterance *u)
397 {
398     cst_cart *dur_tree;
399     cst_item *s;
400     float zdur, dur_stretch, local_dur_stretch, dur;
401     float end;
402     dur_stats *ds;
403     const dur_stat *dur_stat;
404 
405     end = 0;
406 
407     if (feat_present(u->features,"no_segment_duration_model"))
408         return u;  /* not all methods need segment durations */
409 
410     dur_tree = val_cart(feat_val(u->features,"dur_cart"));
411     dur_stretch = get_param_float(u->features,"duration_stretch", 1.0);
412     ds = val_dur_stats(feat_val(u->features,"dur_stats"));
413 
414     for (s=relation_head(utt_relation(u,"Segment")); s; s=item_next(s))
415     {
416 	zdur = val_float(cart_interpret(s,dur_tree));
417 	dur_stat = phone_dur_stat(ds,item_name(s));
418 
419 	local_dur_stretch = ffeature_float(s, "R:SylStructure.parent.parent."
420 					   "R:Token.parent.local_duration_stretch");
421 	if (local_dur_stretch)
422 	    local_dur_stretch *= dur_stretch;
423 	else
424 	    local_dur_stretch = dur_stretch;
425 
426 	dur = local_dur_stretch * ((zdur*dur_stat->stddev)+dur_stat->mean);
427 	DPRINTF(0,("phone %s accent %s stress %s pdur %f stretch %f mean %f std %f dur %f\n",
428 		   item_name(s),
429 		   ffeature_string(s,"R:SylStructure.parent.accented"),
430 		   ffeature_string(s,"R:SylStructure.parent.stress"),
431 		   zdur, local_dur_stretch, dur_stat->mean,
432 		   dur_stat->stddev, dur));
433 	end += dur;
434 	item_set_float(s,"end",end);
435     }
436     return u;
437 }
438 
default_pos_tagger(cst_utterance * u)439 cst_utterance *default_pos_tagger(cst_utterance *u)
440 {
441     cst_item *word;
442     const cst_val *p;
443     const cst_cart *tagger;
444 
445     p = get_param_val(u->features,"pos_tagger_cart",NULL);
446     if (p == NULL)
447         return u;
448     tagger = val_cart(p);
449 
450     for (word=relation_head(utt_relation(u,"Word"));
451 	 word; word=item_next(word))
452     {
453         p = cart_interpret(word,tagger);
454         item_set_string(word,"pos",val_string(p));
455     }
456 
457     return u;
458 }
459 
default_lexical_insertion(cst_utterance * u)460 cst_utterance *default_lexical_insertion(cst_utterance *u)
461 {
462     cst_item *word;
463     cst_relation *sylstructure,*seg,*syl;
464     cst_lexicon *lex;
465     const cst_val *lex_addenda = NULL;
466     const cst_val *p, *wp = NULL;
467     char *phone_name;
468     const char *stress = "0";
469     const char *pos;
470     cst_val *phones;
471     cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;
472     const cst_val *vpn;
473     int dp = 0;
474 
475     lex = val_lexicon(feat_val(u->features,"lexicon"));
476     if (lex->lex_addenda)
477 	lex_addenda = lex->lex_addenda;
478 
479     syl = utt_relation_create(u,"Syllable");
480     sylstructure = utt_relation_create(u,"SylStructure");
481     seg = utt_relation_create(u,"Segment");
482 
483     for (word=relation_head(utt_relation(u,"Word"));
484 	 word; word=item_next(word))
485     {
486 	ssword = relation_append(sylstructure,word);
487         pos = ffeature_string(word,"pos");
488 	phones = NULL;
489         wp = NULL;
490         dp = 0;  /* should the phones get deleted or not */
491 
492         /*        printf("awb_debug word %s pos %s gpos %s\n",
493                item_feat_string(word,"name"),
494                pos,
495                ffeature_string(word,"gpos")); */
496 
497 	/* FIXME: need to make sure that textanalysis won't split
498            tokens with explicit pronunciation (or that it will
499            propagate such to words, then we can remove the path here) */
500 	if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
501         {
502             vpn = item_feat(item_parent(item_as(word, "Token")), "phones");
503             if (cst_val_consp(vpn))
504             {   /* for SAPI ?? */
505                 /* awb oct11: this seems wrong -- */
506                 /* not sure SAPI still (ever) works Oct11 */
507                 phones = (cst_val *) vpn;
508             }
509             else
510             {
511                 dp = 1;
512                 if (cst_streq(val_string(vpn),
513                               ffeature_string(word,"p.R:Token.parent.phones")))
514                     phones = NULL; /* Already given these phones */
515                 else
516                     phones = val_readlist_string(val_string(vpn));
517             }
518         }
519 	else
520 	{
521             wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda);
522             if (wp)
523                 phones = (cst_val *)val_cdr(val_cdr(wp));
524             else
525             {
526                 dp = 1;
527 		phones = lex_lookup(lex,item_feat_string(word,"name"),pos,
528                                     u->features);
529             }
530 	}
531 
532 	for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
533 	{
534 	    if (sylitem == NULL)
535 	    {
536 		sylitem = relation_append(syl,NULL);
537 		sssyl = item_add_daughter(ssword,sylitem);
538 		stress = "0";
539 	    }
540 	    segitem = relation_append(seg,NULL);
541 	    phone_name = cst_strdup(val_string(val_car(p)));
542 	    if (phone_name[cst_strlen(phone_name)-1] == '1')
543 	    {
544 		stress = "1";
545 		phone_name[cst_strlen(phone_name)-1] = '\0';
546 	    }
547 	    else if (phone_name[cst_strlen(phone_name)-1] == '0')
548 	    {
549 		stress = "0";
550 		phone_name[cst_strlen(phone_name)-1] = '\0';
551 	    }
552 	    item_set_string(segitem,"name",phone_name);
553 	    seg_in_syl = item_add_daughter(sssyl,segitem);
554 #if 0
555             printf("awb_debug ph %s\n",phone_name);
556 #endif
557 	    if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
558 	    {
559 #if 0
560                 printf("awb_debug SYL\n");
561 #endif
562 		sylitem = NULL;
563 		if (sssyl)
564 		    item_set_string(sssyl,"stress",stress);
565 	    }
566 	    cst_free(phone_name);
567 	}
568 	if (dp)
569         {
570 	    delete_val(phones);
571             phones = NULL;
572         }
573     }
574 
575     return u;
576 }
577 
578 /* Dummy F0 modelling for phones, copied directly from us_f0_model.c */
flat_prosody(cst_utterance * u)579 cst_utterance *flat_prosody(cst_utterance *u)
580 {
581     /* F0 target model */
582     cst_item *s,*t;
583     cst_relation *targ_rel;
584     float mean, stddev;
585 
586     targ_rel = utt_relation_create(u,"Target");
587     mean = get_param_float(u->features,"target_f0_mean", 100.0);
588     mean *= get_param_float(u->features,"f0_shift", 1.0);
589     stddev = get_param_float(u->features,"target_f0_stddev", 12.0);
590 
591     s=relation_head(utt_relation(u,"Segment"));
592     t = relation_append(targ_rel,NULL);
593     item_set_float(t,"pos",0.0);
594     item_set_float(t,"f0",mean+stddev);
595 
596     s=relation_tail(utt_relation(u,"Segment"));
597     t = relation_append(targ_rel,NULL);
598 
599     item_set_float(t,"pos",item_feat_float(s,"end"));
600     item_set_float(t,"f0",mean-stddev);
601 
602     return u;
603 }
604 
tokentosegs(cst_utterance * u)605 static cst_utterance *tokentosegs(cst_utterance *u)
606 {
607     cst_item *t;
608     cst_relation *seg, *syl, *sylstructure, *word;
609     cst_item *sylitem, *sylstructureitem, *worditem, *sssyl;
610     cst_phoneset *ps;
611 
612     ps = val_phoneset(utt_feat_val(u, "phoneset"));
613     /* Just copy tokens into the Segment relation */
614     seg = utt_relation_create(u, "Segment");
615     syl = utt_relation_create(u, "Syllable");
616     word = utt_relation_create(u, "Word");
617     sylstructure = utt_relation_create(u, "SylStructure");
618     sssyl = sylitem = worditem = sylstructureitem = 0;
619     for (t = relation_head(utt_relation(u, "Token")); t; t = item_next(t))
620     {
621 	cst_item *segitem = relation_append(seg, NULL);
622 	char const *pname = item_feat_string(t, "name");
623 	char *name = cst_strdup(pname);
624 
625 	if (worditem == 0)
626 	{
627 	    worditem = relation_append(word,NULL);
628 	    item_set_string(worditem, "name", "phonestring");
629 	    sylstructureitem = relation_append(sylstructure,worditem);
630 	}
631 	if (sylitem == 0)
632 	{
633 	    sylitem = relation_append(syl,NULL);
634 	    sssyl = item_add_daughter(sylstructureitem,sylitem);
635 	}
636 
637 	if (name[cst_strlen(name)-1] == '1')
638 	{
639 	    item_set_string(sssyl,"stress","1");
640 	    name[cst_strlen(name)-1] = '\0';
641 	}
642 	else if (name[cst_strlen(name)-1] == '0')
643 	{
644 	    item_set_string(sssyl,"stress","0");
645 	    name[cst_strlen(name)-1] = '\0';
646 	}
647 
648 	if (cst_streq(name,"-"))
649 	{
650 	    sylitem = 0;  /* syllable break */
651 	}
652 	else if (phone_id(ps, name) == -1)
653 	{
654 	    cst_errmsg("Phone `%s' not in phoneset\n", pname);
655 	    cst_error();
656 	}
657 	else
658 	{
659 	    item_add_daughter(sssyl,segitem);
660 	    item_set_string(segitem, "name", name);
661 	}
662 
663 	cst_free(name);
664     }
665 
666     return u;
667 }
668 
default_utt_break(cst_tokenstream * ts,const char * token,cst_relation * tokens)669 int default_utt_break(cst_tokenstream *ts,
670 		      const char *token,
671 		      cst_relation *tokens)
672 {
673     /* This is the default utt break functions, languages may override this */
674     /* This will be ok for some latin based languages */
675     const char *postpunct = item_feat_string(relation_tail(tokens), "punc");
676     const char *ltoken = item_name(relation_tail(tokens));
677 
678     if (cst_strchr(ts->whitespace,'\n') != cst_strrchr(ts->whitespace,'\n'))
679 	 /* contains two new lines */
680 	 return TRUE;
681     /* Well, this is a little specific isn't it. */
682     else if (((cst_streq(ltoken,"Yahoo")) ||
683               (cst_streq(ltoken,"YAHOO")) ||
684               (cst_streq(ltoken,"yahoo"))) &&
685              strchr(postpunct,'!') &&
686 	     strchr("abcdefghijklmnopqrstuvwxyz",token[0]))
687         return FALSE;
688     else if (strchr(postpunct,':') ||
689 	     strchr(postpunct,'?') ||
690 	     strchr(postpunct,'!'))
691 	return TRUE;
692     else if (strchr(postpunct,'.') &&
693 	     (cst_strlen(ts->whitespace) > 1) &&
694 	     strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]))
695 	return TRUE;
696     else if (strchr(postpunct,'.') &&
697 	     /* next word starts with a capital */
698 	     strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]) &&
699 	     /* last word isn't an abbreviation */
700 	     !(strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[cst_strlen(ltoken)-1])||
701 	       ((cst_strlen(ltoken) < 4) &&
702 		strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[0]))))
703 	return TRUE;
704     else
705 	return FALSE;
706 }
707