1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;;;                                                                       ;;
3;;;                Centre for Speech Technology Research                  ;;
4;;;                     University of Edinburgh, UK                       ;;
5;;;                       Copyright (c) 1996,1997                         ;;
6;;;                        All Rights Reserved.                           ;;
7;;;                                                                       ;;
8;;;  Permission is hereby granted, free of charge, to use and distribute  ;;
9;;;  this software and its documentation without restriction, including   ;;
10;;;  without limitation the rights to use, copy, modify, merge, publish,  ;;
11;;;  distribute, sublicense, and/or sell copies of this work, and to      ;;
12;;;  permit persons to whom this work is furnished to do so, subject to   ;;
13;;;  the following conditions:                                            ;;
14;;;   1. The code must retain the above copyright notice, this list of    ;;
15;;;      conditions and the following disclaimer.                         ;;
16;;;   2. Any modifications must be clearly marked as such.                ;;
17;;;   3. Original authors' names are not deleted.                         ;;
18;;;   4. The authors' names are not used to endorse or promote products   ;;
19;;;      derived from this software without specific prior written        ;;
20;;;      permission.                                                      ;;
21;;;                                                                       ;;
22;;;  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ;;
23;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
24;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ;;
25;;;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ;;
26;;;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ;;
27;;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ;;
28;;;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ;;
29;;;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ;;
30;;;  THIS SOFTWARE.                                                       ;;
31;;;                                                                       ;;
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33;;;
34;;;  This contains an example of how to use Festival with a
35;;;  a talking head.  This is a combination of various answers given
36;;;  to different groups who have been using Festival with talking
37;;;  heads
38;;;
39;;;  This version has not actually been used by any talking head
40;;;  but just serves as an example.
41;;;
42;;;  The basic mode produces /tmp/th.ph (phone info) and /tmp/th.com
43;;;  (commands: smile frown) for each utterance in the file.  These
44;;;  files are produced and the program makefaces called before
45;;;  waveform synthesis for each utterance.  The play command then
46;;;  calls xanim with the generate animation and waveform.
47;;;
48;;;  There are probabaly better way to do this.  Using Festival as a
49;;;  server to generate the phone and command files might
50;;;  be more reasonable.  Note festival not supports the returning
51;;;  of Lisp data to the client as well as waveform data.
52;;;  In that case you'd want to change th_output_info to use
53;;;  the send_cleint command and package the phone info into an
54;;;  s-expression.
55
56(defvar th-prepare-prog "makefaces"
57  " A program that takes phones and other data and produces the
58animated face.")
59
60(define (utt.save.phonedata utt filename)
61"(utt.save.mydata UTT FILE)
62  Saves phone, duration, stress, F0 word pos."
63  (let ((fd (fopen filename "w")))
64    (mapcar
65     (lambda (seg)
66       (format fd "%s %2.4f %s %s"
67	       (item.feat seg "name")
68	       (item.feat seg "segment_duration")
69	       (item.feat seg "R:SylStructure.parent.stress")
70	       (item.feat seg "R:Target.daughter1.name"))
71       ;; output word name and part of speech if start of word
72       (if (and (not (item.relation.next seg "SylStructure"))
73		(not (item.next
74		      (item.relation.parent seg "SylStructure"))))
75	   (format fd " %s %s"
76		   (item.feat seg "R:SylStructure.parent.parent.name")
77		   (item.feat seg "R:SylStructure.parent.parent.pos")))
78       (format fd "\n"))
79     (utt.relation.items utt 'Segment))
80    (fclose fd)
81    utt))
82
83(define (utt.save.commands utt filename)
84"(utt.save.commands UTT FILE)
85  Save commands with time stamps.  Commands are those tokens which
86start and end with an asterisk."
87  (let ((fd (fopen filename "w")))
88    (format fd "#\n")
89    (mapcar
90     (lambda (tok_item)
91       (if (string-matches (item.name tok_item) "\\*.+\\*")
92	   (format fd "%2.4f 100 %s\n"
93		   (find_com_time utt tok_item)
94		   (item.name tok_item))))
95     (utt.relation.items utt 'Token))
96    (fclose fd)
97    utt))
98
99(define (find_com_time utt tok_item)
100"Returns time of tok_item.  Looks backward for first token that
101is related to a word and returns the end time of that word."
102  (cond
103   ((item.daughtern tok_item)
104    (item.feat (item.daughtern tok_item) "word_end"))
105   ((not (item.prev tok_item))  ;; start of stream
106    0.0)
107   (t
108    (find_com_time utt (item.prev tok_item)))))
109
110(define (th_output_info utt)
111  "(th_output_info utt)
112This is called after linguistic analysis but before waveform synthesis.
113It collects the phone and duration data and also any th commands
114found in the utterance.  The file names are then passed to some
115external program which will process them for the talking head."
116  (set! th-current-file "/tmp/th") ;; this should have a process id in it
117  (utt.save.phonedata utt (string-append th-current-file ".ph"))
118  (utt.save.commands utt (string-append th-current-file ".com"))
119  ;; It would be good to background this process as long as you
120  ;; resync at play time
121  (system (format nil "%s %s %s"
122		  th-prepare-prog
123		  (string-append th-current-file ".ph")
124		  (string-append th-current-file ".ph")))
125  utt)
126
127;;;
128;;;  Define a new text mode for talking heads
129;;;
130
131(define (th_init_func)
132 "Called on starting talking head text mode."
133 (set! th_previous_t2w_func token_to_words)
134 (set! th_previous_after_analysis_hooks after_analysis_hooks)
135 (set! after_analysis_hooks (list th_output_info))
136 (set! english_token_to_words th_token_to_words)
137 (set! token_to_words th_token_to_words)
138
139 ;; We assume the prepare talking head program generates a movie
140 ;; that can be played by something, so we redefie the audio
141 ;; player to play the generated animation and waveform.
142 (set! th_previous_Parameter Parameter)
143 (audio_mode 'sync)  ;; ensure new Audio command gets passed to new audiosp
144 (Parameter.set 'Audio_Required_Format 'riff)
145 (Parameter.set 'Audio_Command "xanim /tmp/th.anime $FILE")
146 (Parameter.set 'Audio_Method 'Audio_Command)
147 (audio_mode 'async)
148)
149
150(define (th_exit_func)
151 "Called on exit talking head text mode."
152 (set! token_to_words th_previous_t2w_func)
153 (set! english_token_to_words th_previous_t2w_func)
154 (set! after_analysis_hooks th_previous_after_analysis_hooks)
155
156 (audio_mode 'sync) ;; so we can reset the audio
157 (set! Parameter th_previous_Parameter)
158)
159
160(define (th_token_to_words token name)
161"(th_token_to_words TOKEN NAME)
162Talking head specific token to word rules."
163 (cond
164  ((string-matches name "\\*.*\\*")
165   ;;  Symbols started and ended with an asterisk as treated as commands
166   ;;  and not rendered as speech
167   nil)
168  (t
169   (th_previous_t2w_func token name))))
170
171(set! tts_text_modes
172   (cons
173    (list
174      'th           ;; mode name
175      (list         ;; ogimarkup mode params
176       (list 'init_func th_init_func)
177       (list 'exit_func th_exit_func)))
178    tts_text_modes))
179
180(provide 'th-mode)
181