1 /*************************************************************************/
2 /* */
3 /* Language Technologies Institute */
4 /* Carnegie Mellon University */
5 /* Copyright (c) 2000 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author: Alan W Black (awb@cs.cmu.edu) */
34 /* Date: September 2000 */
35 /*************************************************************************/
36 /* */
37 /* General synthesis control */
38 /* */
39 /*************************************************************************/
40
41 #include "cst_hrg.h"
42 #include "cst_cart.h"
43 #include "cst_tokenstream.h"
44 #include "cst_utt_utils.h"
45 #include "cst_lexicon.h"
46 #include "cst_units.h"
47 #include "cst_synth.h"
48 #include "cst_phoneset.h"
49
50 CST_VAL_REGISTER_FUNCPTR(breakfunc,cst_breakfunc)
51
52 #ifndef SYNTH_MODULES_DEBUG
53 #define SYNTH_MODULES_DEBUG 0
54 #endif
55
56 #if SYNTH_MODULES_DEBUG > 0
57 #define DPRINTF(l,x) if (SYNTH_MODULES_DEBUG > l) cst_dbgmsg x
58 #else
59 #define DPRINTF(l,x)
60 #endif
61
62 static cst_utterance *tokentosegs(cst_utterance *u);
63
64 static const cst_synth_module synth_method_text[] = {
65 { "tokenizer_func", default_tokenization },
66 { "textanalysis_func", default_textanalysis },
67 { "pos_tagger_func", default_pos_tagger },
68 { "phrasing_func", default_phrasing },
69 { "lexical_insertion_func", default_lexical_insertion },
70 { "pause_insertion_func", default_pause_insertion },
71 { "intonation_func", cart_intonation },
72 { "postlex_func", NULL },
73 { "duration_model_func", cart_duration },
74 { "f0_model_func", NULL },
75 { "wave_synth_func", NULL },
76 { "post_synth_hook_func", NULL },
77 { NULL, NULL }
78 };
79
80 static const cst_synth_module synth_method_text2segs[] = {
81 { "tokenizer_func", default_tokenization },
82 { "textanalysis_func", default_textanalysis },
83 { "pos_tagger_func", default_pos_tagger },
84 { "phrasing_func", default_phrasing },
85 { "lexical_insertion_func", default_lexical_insertion },
86 { "pause_insertion_func", default_pause_insertion },
87 { NULL, NULL }
88 };
89
90 static const cst_synth_module synth_method_tokens[] = {
91 { "textanalysis_func", default_textanalysis },
92 { "pos_tagger_func", default_pos_tagger },
93 { "phrasing_func", default_phrasing },
94 { "lexical_insertion_func", default_lexical_insertion },
95 { "pause_insertion_func", default_pause_insertion },
96 { "intonation_func", cart_intonation },
97 { "postlex_func", NULL },
98 { "duration_model_func", cart_duration },
99 { "f0_model_func", NULL },
100 { "wave_synth_func", NULL },
101 { "post_synth_hook_func", NULL },
102 { NULL, NULL }
103 };
104
105 static const cst_synth_module synth_method_phones[] = {
106 { "tokenizer_func", default_tokenization },
107 { "textanalysis_func", tokentosegs },
108 { "pos_tagger_func", default_pos_tagger },
109 { "intonation_func", NULL },
110 { "duration_model_func", cart_duration },
111 { "f0_model_func", flat_prosody },
112 { "wave_synth_func", NULL },
113 { "post_synth_hook_func", NULL },
114 { NULL, NULL }
115 };
116
utt_synth_wave(cst_wave * w,cst_voice * v)117 cst_utterance *utt_synth_wave(cst_wave *w,cst_voice *v)
118 {
119 /* Create an utterance with a wave in it as if we've synthesized it */
120 /* Put it through streaming if that is require */
121 cst_utterance *u;
122 const cst_val *streaming_info_val;
123 cst_audio_streaming_info *asi = NULL;
124
125 u = new_utterance();
126 utt_init(u,v);
127 utt_set_wave(u,w);
128
129 streaming_info_val=get_param_val(u->features,"streaming_info",NULL);
130 if (streaming_info_val)
131 {
132 asi = val_audio_streaming_info(streaming_info_val);
133 asi->utt = u;
134 }
135
136 if (!asi) return u; /* no stream */
137
138 /* Do streaming */
139 (*asi->asc)(w,0,w->num_samples,1,asi);
140
141 return u;
142 }
143
apply_synth_module(cst_utterance * u,const cst_synth_module * mod)144 cst_utterance *apply_synth_module(cst_utterance *u,
145 const cst_synth_module *mod)
146 {
147 const cst_val *v;
148
149 v = feat_val(u->features, mod->hookname);
150 if (v)
151 return (*val_uttfunc(v))(u);
152 if (mod->defhook)
153 return (*mod->defhook)(u);
154 return u;
155 }
156
apply_synth_method(cst_utterance * u,const cst_synth_module meth[])157 cst_utterance *apply_synth_method(cst_utterance *u,
158 const cst_synth_module meth[])
159 {
160 while (meth->hookname)
161 {
162 if ((u = apply_synth_module(u, meth)) == NULL)
163 return NULL;
164 ++meth;
165 }
166
167 return u;
168 }
169
utt_init(cst_utterance * u,cst_voice * vox)170 cst_utterance *utt_init(cst_utterance *u, cst_voice *vox)
171 {
172 /* Link the vox features into the utterance features so the voice */
173 /* features will be searched too (after the utt ones) */
174 feat_link_into(vox->features,u->features);
175 feat_link_into(vox->ffunctions,u->ffunctions);
176
177 /* Do the initialization function, if there is one */
178 if (vox->utt_init)
179 vox->utt_init(u, vox);
180
181 return u;
182 }
183
utt_synth(cst_utterance * u)184 cst_utterance *utt_synth(cst_utterance *u)
185 {
186 return apply_synth_method(u, synth_method_text);
187 }
188
utt_synth_tokens(cst_utterance * u)189 cst_utterance *utt_synth_tokens(cst_utterance *u)
190 {
191 return apply_synth_method(u, synth_method_tokens);
192 }
193
utt_synth_text2segs(cst_utterance * u)194 cst_utterance *utt_synth_text2segs(cst_utterance *u)
195 {
196 return apply_synth_method(u, synth_method_text2segs);
197 }
198
utt_synth_phones(cst_utterance * u)199 cst_utterance *utt_synth_phones(cst_utterance *u)
200 {
201 return apply_synth_method(u, synth_method_phones);
202 }
203
default_tokenization(cst_utterance * u)204 cst_utterance *default_tokenization(cst_utterance *u)
205 {
206 const char *text,*token;
207 cst_tokenstream *fd;
208 cst_item *t;
209 cst_relation *r;
210
211 text = utt_input_text(u);
212 r = utt_relation_create(u,"Token");
213 fd = ts_open_string(text,
214 get_param_string(u->features,"text_whitespace",NULL),
215 get_param_string(u->features,"text_singlecharsymbols",NULL),
216 get_param_string(u->features,"text_prepunctuation",NULL),
217 get_param_string(u->features,"text_postpunctuation",NULL));
218
219 while(!ts_eof(fd))
220 {
221 token = ts_get(fd);
222 if (cst_strlen(token) > 0)
223 {
224 t = relation_append(r,NULL);
225 item_set_string(t,"name",token);
226 item_set_string(t,"whitespace",fd->whitespace);
227 item_set_string(t,"prepunctuation",fd->prepunctuation);
228 item_set_string(t,"punc",fd->postpunctuation);
229 item_set_int(t,"file_pos",fd->file_pos);
230 item_set_int(t,"line_number",fd->line_number);
231 }
232 }
233
234 ts_close(fd);
235
236 return u;
237 }
238
default_tokentowords(cst_item * i)239 cst_val *default_tokentowords(cst_item *i)
240 {
241 return cons_val(string_val(item_feat_string(i,"name")), NULL);
242 }
243
default_textanalysis(cst_utterance * u)244 cst_utterance *default_textanalysis(cst_utterance *u)
245 {
246 cst_item *t,*word;
247 cst_relation *word_rel;
248 cst_val *words;
249 const cst_val *w;
250 const cst_val *ttwv;
251
252 word_rel = utt_relation_create(u,"Word");
253 ttwv = feat_val(u->features, "tokentowords_func");
254
255 for (t=relation_head(utt_relation(u,"Token")); t; t=item_next(t))
256 {
257 if (ttwv)
258 words = (cst_val *)(*val_itemfunc(ttwv))(t);
259 else
260 words = default_tokentowords(t);
261
262 for (w=words; w; w=val_cdr(w))
263 {
264 word = item_add_daughter(t,NULL);
265 if (cst_val_consp(val_car(w)))
266 { /* Has extra features */
267 item_set_string(word,"name",val_string(val_car(val_car(w))));
268 feat_copy_into(val_features(val_cdr(val_car(w))),
269 item_feats(word));
270 }
271 else
272 item_set_string(word,"name",val_string(val_car(w)));
273 relation_append(word_rel,word);
274 }
275 delete_val(words);
276 }
277
278 return u;
279 }
280
default_phrasing(cst_utterance * u)281 cst_utterance *default_phrasing(cst_utterance *u)
282 {
283 cst_relation *r;
284 cst_item *w, *p, *lp=NULL;
285 const cst_val *v;
286 cst_cart *phrasing_cart;
287
288 r = utt_relation_create(u,"Phrase");
289 if (feat_present(u->features,"phrasing_cart"))
290 phrasing_cart = val_cart(feat_val(u->features,"phrasing_cart"));
291 else
292 phrasing_cart = NULL;
293
294 for (p=NULL,w=relation_head(utt_relation(u,"Word")); w; w=item_next(w))
295 {
296 if (p == NULL)
297 {
298 p = relation_append(r,NULL);
299 lp = p;
300 item_set_string(p,"name","B");
301 }
302 item_add_daughter(p,w);
303 if (phrasing_cart)
304 {
305 v = cart_interpret(w,phrasing_cart);
306 if (cst_streq(val_string(v),"BB"))
307 p = NULL;
308 }
309 }
310
311 if (lp && item_prev(lp)) /* follow festival */
312 item_set_string(lp,"name","BB");
313
314 return u;
315 }
316
default_pause_insertion(cst_utterance * u)317 cst_utterance *default_pause_insertion(cst_utterance *u)
318 {
319 /* Add initial silences and silence at each phrase break */
320 const char *silence;
321 const cst_item *w;
322 cst_item *p, *s;
323
324 silence = val_string(feat_val(u->features,"silence"));
325
326 /* Insert initial silence */
327 s = relation_head(utt_relation(u,"Segment"));
328 if (s == NULL)
329 s = relation_append(utt_relation(u,"Segment"),NULL);
330 else
331 s = item_prepend(s,NULL);
332 item_set_string(s,"name",silence);
333
334 for (p=relation_head(utt_relation(u,"Phrase")); p; p=item_next(p))
335 {
336 for (w = item_last_daughter(p); w; w=item_prev(w))
337 {
338 s = path_to_item(w,"R:SylStructure.daughtern.daughtern.R:Segment");
339 if (s)
340 {
341 s = item_append(s,NULL);
342 item_set_string(s,"name",silence);
343 break;
344 }
345 }
346 }
347
348 return u;
349 }
350
cart_intonation(cst_utterance * u)351 cst_utterance *cart_intonation(cst_utterance *u)
352 {
353 cst_cart *accents, *tones;
354 cst_item *s;
355 const cst_val *v;
356
357 if (feat_present(u->features,"no_intonation_accent_model"))
358 return u; /* not all languages have intonation models */
359
360 accents = val_cart(feat_val(u->features,"int_cart_accents"));
361 tones = val_cart(feat_val(u->features,"int_cart_tones"));
362
363 for (s=relation_head(utt_relation(u,"Syllable")); s; s=item_next(s))
364 {
365 v = cart_interpret(s,accents);
366 if (!cst_streq("NONE",val_string(v)))
367 item_set_string(s,"accent",val_string(v));
368 v = cart_interpret(s,tones);
369 if (!cst_streq("NONE",val_string(v)))
370 item_set_string(s,"endtone",val_string(v));
371 DPRINTF(0,("word %s gpos %s stress %s ssyl_in %s ssyl_out %s accent %s endtone %s\n",
372 ffeature_string(s,"R:SylStructure.parent.name"),
373 ffeature_string(s,"R:SylStructure.parent.gpos"),
374 ffeature_string(s,"stress"),
375 ffeature_string(s,"ssyl_in"),
376 ffeature_string(s,"ssyl_out"),
377 ffeature_string(s,"accent"),
378 ffeature_string(s,"endtone")));
379 }
380
381 return u;
382 }
383
CST_VAL_REGISTER_TYPE_NODEL(dur_stats,dur_stats)384 CST_VAL_REGISTER_TYPE_NODEL(dur_stats,dur_stats)
385
386 const dur_stat *phone_dur_stat(const dur_stats *ds,const char *ph)
387 {
388 int i;
389 for (i=0; ds[i]; i++)
390 if (cst_streq(ph,ds[i]->phone))
391 return ds[i];
392
393 return ds[0];
394 }
395
cart_duration(cst_utterance * u)396 cst_utterance *cart_duration(cst_utterance *u)
397 {
398 cst_cart *dur_tree;
399 cst_item *s;
400 float zdur, dur_stretch, local_dur_stretch, dur;
401 float end;
402 dur_stats *ds;
403 const dur_stat *dur_stat;
404
405 end = 0;
406
407 if (feat_present(u->features,"no_segment_duration_model"))
408 return u; /* not all methods need segment durations */
409
410 dur_tree = val_cart(feat_val(u->features,"dur_cart"));
411 dur_stretch = get_param_float(u->features,"duration_stretch", 1.0);
412 ds = val_dur_stats(feat_val(u->features,"dur_stats"));
413
414 for (s=relation_head(utt_relation(u,"Segment")); s; s=item_next(s))
415 {
416 zdur = val_float(cart_interpret(s,dur_tree));
417 dur_stat = phone_dur_stat(ds,item_name(s));
418
419 local_dur_stretch = ffeature_float(s, "R:SylStructure.parent.parent."
420 "R:Token.parent.local_duration_stretch");
421 if (local_dur_stretch)
422 local_dur_stretch *= dur_stretch;
423 else
424 local_dur_stretch = dur_stretch;
425
426 dur = local_dur_stretch * ((zdur*dur_stat->stddev)+dur_stat->mean);
427 DPRINTF(0,("phone %s accent %s stress %s pdur %f stretch %f mean %f std %f dur %f\n",
428 item_name(s),
429 ffeature_string(s,"R:SylStructure.parent.accented"),
430 ffeature_string(s,"R:SylStructure.parent.stress"),
431 zdur, local_dur_stretch, dur_stat->mean,
432 dur_stat->stddev, dur));
433 end += dur;
434 item_set_float(s,"end",end);
435 }
436 return u;
437 }
438
default_pos_tagger(cst_utterance * u)439 cst_utterance *default_pos_tagger(cst_utterance *u)
440 {
441 cst_item *word;
442 const cst_val *p;
443 const cst_cart *tagger;
444
445 p = get_param_val(u->features,"pos_tagger_cart",NULL);
446 if (p == NULL)
447 return u;
448 tagger = val_cart(p);
449
450 for (word=relation_head(utt_relation(u,"Word"));
451 word; word=item_next(word))
452 {
453 p = cart_interpret(word,tagger);
454 item_set_string(word,"pos",val_string(p));
455 }
456
457 return u;
458 }
459
default_lexical_insertion(cst_utterance * u)460 cst_utterance *default_lexical_insertion(cst_utterance *u)
461 {
462 cst_item *word;
463 cst_relation *sylstructure,*seg,*syl;
464 cst_lexicon *lex;
465 const cst_val *lex_addenda = NULL;
466 const cst_val *p, *wp = NULL;
467 char *phone_name;
468 const char *stress = "0";
469 const char *pos;
470 cst_val *phones;
471 cst_item *ssword, *sssyl, *segitem, *sylitem, *seg_in_syl;
472 const cst_val *vpn;
473 int dp = 0;
474
475 lex = val_lexicon(feat_val(u->features,"lexicon"));
476 if (lex->lex_addenda)
477 lex_addenda = lex->lex_addenda;
478
479 syl = utt_relation_create(u,"Syllable");
480 sylstructure = utt_relation_create(u,"SylStructure");
481 seg = utt_relation_create(u,"Segment");
482
483 for (word=relation_head(utt_relation(u,"Word"));
484 word; word=item_next(word))
485 {
486 ssword = relation_append(sylstructure,word);
487 pos = ffeature_string(word,"pos");
488 phones = NULL;
489 wp = NULL;
490 dp = 0; /* should the phones get deleted or not */
491
492 /* printf("awb_debug word %s pos %s gpos %s\n",
493 item_feat_string(word,"name"),
494 pos,
495 ffeature_string(word,"gpos")); */
496
497 /* FIXME: need to make sure that textanalysis won't split
498 tokens with explicit pronunciation (or that it will
499 propagate such to words, then we can remove the path here) */
500 if (item_feat_present(item_parent(item_as(word, "Token")), "phones"))
501 {
502 vpn = item_feat(item_parent(item_as(word, "Token")), "phones");
503 if (cst_val_consp(vpn))
504 { /* for SAPI ?? */
505 /* awb oct11: this seems wrong -- */
506 /* not sure SAPI still (ever) works Oct11 */
507 phones = (cst_val *) vpn;
508 }
509 else
510 {
511 dp = 1;
512 if (cst_streq(val_string(vpn),
513 ffeature_string(word,"p.R:Token.parent.phones")))
514 phones = NULL; /* Already given these phones */
515 else
516 phones = val_readlist_string(val_string(vpn));
517 }
518 }
519 else
520 {
521 wp = val_assoc_string(item_feat_string(word, "name"),lex_addenda);
522 if (wp)
523 phones = (cst_val *)val_cdr(val_cdr(wp));
524 else
525 {
526 dp = 1;
527 phones = lex_lookup(lex,item_feat_string(word,"name"),pos,
528 u->features);
529 }
530 }
531
532 for (sssyl=NULL,sylitem=NULL,p=phones; p; p=val_cdr(p))
533 {
534 if (sylitem == NULL)
535 {
536 sylitem = relation_append(syl,NULL);
537 sssyl = item_add_daughter(ssword,sylitem);
538 stress = "0";
539 }
540 segitem = relation_append(seg,NULL);
541 phone_name = cst_strdup(val_string(val_car(p)));
542 if (phone_name[cst_strlen(phone_name)-1] == '1')
543 {
544 stress = "1";
545 phone_name[cst_strlen(phone_name)-1] = '\0';
546 }
547 else if (phone_name[cst_strlen(phone_name)-1] == '0')
548 {
549 stress = "0";
550 phone_name[cst_strlen(phone_name)-1] = '\0';
551 }
552 item_set_string(segitem,"name",phone_name);
553 seg_in_syl = item_add_daughter(sssyl,segitem);
554 #if 0
555 printf("awb_debug ph %s\n",phone_name);
556 #endif
557 if ((lex->syl_boundary)(seg_in_syl,val_cdr(p)))
558 {
559 #if 0
560 printf("awb_debug SYL\n");
561 #endif
562 sylitem = NULL;
563 if (sssyl)
564 item_set_string(sssyl,"stress",stress);
565 }
566 cst_free(phone_name);
567 }
568 if (dp)
569 {
570 delete_val(phones);
571 phones = NULL;
572 }
573 }
574
575 return u;
576 }
577
578 /* Dummy F0 modelling for phones, copied directly from us_f0_model.c */
flat_prosody(cst_utterance * u)579 cst_utterance *flat_prosody(cst_utterance *u)
580 {
581 /* F0 target model */
582 cst_item *s,*t;
583 cst_relation *targ_rel;
584 float mean, stddev;
585
586 targ_rel = utt_relation_create(u,"Target");
587 mean = get_param_float(u->features,"target_f0_mean", 100.0);
588 mean *= get_param_float(u->features,"f0_shift", 1.0);
589 stddev = get_param_float(u->features,"target_f0_stddev", 12.0);
590
591 s=relation_head(utt_relation(u,"Segment"));
592 t = relation_append(targ_rel,NULL);
593 item_set_float(t,"pos",0.0);
594 item_set_float(t,"f0",mean+stddev);
595
596 s=relation_tail(utt_relation(u,"Segment"));
597 t = relation_append(targ_rel,NULL);
598
599 item_set_float(t,"pos",item_feat_float(s,"end"));
600 item_set_float(t,"f0",mean-stddev);
601
602 return u;
603 }
604
tokentosegs(cst_utterance * u)605 static cst_utterance *tokentosegs(cst_utterance *u)
606 {
607 cst_item *t;
608 cst_relation *seg, *syl, *sylstructure, *word;
609 cst_item *sylitem, *sylstructureitem, *worditem, *sssyl;
610 cst_phoneset *ps;
611
612 ps = val_phoneset(utt_feat_val(u, "phoneset"));
613 /* Just copy tokens into the Segment relation */
614 seg = utt_relation_create(u, "Segment");
615 syl = utt_relation_create(u, "Syllable");
616 word = utt_relation_create(u, "Word");
617 sylstructure = utt_relation_create(u, "SylStructure");
618 sssyl = sylitem = worditem = sylstructureitem = 0;
619 for (t = relation_head(utt_relation(u, "Token")); t; t = item_next(t))
620 {
621 cst_item *segitem = relation_append(seg, NULL);
622 char const *pname = item_feat_string(t, "name");
623 char *name = cst_strdup(pname);
624
625 if (worditem == 0)
626 {
627 worditem = relation_append(word,NULL);
628 item_set_string(worditem, "name", "phonestring");
629 sylstructureitem = relation_append(sylstructure,worditem);
630 }
631 if (sylitem == 0)
632 {
633 sylitem = relation_append(syl,NULL);
634 sssyl = item_add_daughter(sylstructureitem,sylitem);
635 }
636
637 if (name[cst_strlen(name)-1] == '1')
638 {
639 item_set_string(sssyl,"stress","1");
640 name[cst_strlen(name)-1] = '\0';
641 }
642 else if (name[cst_strlen(name)-1] == '0')
643 {
644 item_set_string(sssyl,"stress","0");
645 name[cst_strlen(name)-1] = '\0';
646 }
647
648 if (cst_streq(name,"-"))
649 {
650 sylitem = 0; /* syllable break */
651 }
652 else if (phone_id(ps, name) == -1)
653 {
654 cst_errmsg("Phone `%s' not in phoneset\n", pname);
655 cst_error();
656 }
657 else
658 {
659 item_add_daughter(sssyl,segitem);
660 item_set_string(segitem, "name", name);
661 }
662
663 cst_free(name);
664 }
665
666 return u;
667 }
668
default_utt_break(cst_tokenstream * ts,const char * token,cst_relation * tokens)669 int default_utt_break(cst_tokenstream *ts,
670 const char *token,
671 cst_relation *tokens)
672 {
673 /* This is the default utt break functions, languages may override this */
674 /* This will be ok for some latin based languages */
675 const char *postpunct = item_feat_string(relation_tail(tokens), "punc");
676 const char *ltoken = item_name(relation_tail(tokens));
677
678 if (cst_strchr(ts->whitespace,'\n') != cst_strrchr(ts->whitespace,'\n'))
679 /* contains two new lines */
680 return TRUE;
681 /* Well, this is a little specific isn't it. */
682 else if (((cst_streq(ltoken,"Yahoo")) ||
683 (cst_streq(ltoken,"YAHOO")) ||
684 (cst_streq(ltoken,"yahoo"))) &&
685 strchr(postpunct,'!') &&
686 strchr("abcdefghijklmnopqrstuvwxyz",token[0]))
687 return FALSE;
688 else if (strchr(postpunct,':') ||
689 strchr(postpunct,'?') ||
690 strchr(postpunct,'!'))
691 return TRUE;
692 else if (strchr(postpunct,'.') &&
693 (cst_strlen(ts->whitespace) > 1) &&
694 strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]))
695 return TRUE;
696 else if (strchr(postpunct,'.') &&
697 /* next word starts with a capital */
698 strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",token[0]) &&
699 /* last word isn't an abbreviation */
700 !(strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[cst_strlen(ltoken)-1])||
701 ((cst_strlen(ltoken) < 4) &&
702 strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ",ltoken[0]))))
703 return TRUE;
704 else
705 return FALSE;
706 }
707