1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;;;                                                                       ;;
3;;;                Centre for Speech Technology Research                  ;;
4;;;                     University of Edinburgh, UK                       ;;
5;;;                         Copyright (c) 1997                            ;;
6;;;                        All Rights Reserved.                           ;;
7;;;                                                                       ;;
8;;;  Permission is hereby granted, free of charge, to use and distribute  ;;
9;;;  this software and its documentation without restriction, including   ;;
10;;;  without limitation the rights to use, copy, modify, merge, publish,  ;;
11;;;  distribute, sublicense, and/or sell copies of this work, and to      ;;
12;;;  permit persons to whom this work is furnished to do so, subject to   ;;
13;;;  the following conditions:                                            ;;
14;;;   1. The code must retain the above copyright notice, this list of    ;;
15;;;      conditions and the following disclaimer.                         ;;
16;;;   2. Any modifications must be clearly marked as such.                ;;
17;;;   3. Original authors' names are not deleted.                         ;;
18;;;   4. The authors' names are not used to endorse or promote products   ;;
19;;;      derived from this software without specific prior written        ;;
20;;;      permission.                                                      ;;
21;;;                                                                       ;;
22;;;  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ;;
23;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
24;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ;;
25;;;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ;;
26;;;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ;;
27;;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ;;
28;;;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ;;
29;;;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ;;
30;;;  THIS SOFTWARE.                                                       ;;
31;;;                                                                       ;;
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33;;;  Set up kd_diphones using the standard diphone synthesizer
34;;;
35;;;  Kurt diphones: male American English collected Spring 1997
36;;;
37
38(defvar ked_diphone_dir (cdr (assoc 'ked_diphone voice-locations))
39  "ked_diphone_dir
40  The default directory for the ked diphone database.")
41(set! load-path (cons (path-append ked_diphone_dir "festvox/") load-path))
42
43(require 'radio_phones)
44(require_module 'UniSyn)
45
46;; set this to lpc or psola
47(defvar ked_sigpr 'lpc)
48;; Rset this to ungroup for ungrouped version
49(defvar ked_groupungroup 'group)
50
51(if (probe_file (path-append ked_diphone_dir "group/kedlpc16k.group"))
52    (defvar ked_index_file
53      (path-append ked_diphone_dir "group/kedlpc16k.group"))
54    (defvar ked_index_file
55      (path-append ked_diphone_dir "group/kedlpc8k.group")))
56
57(set! ked_lpc_sep
58      (list
59       '(name "ked_lpc_sep")
60       (list 'index_file (path-append ked_diphone_dir "dic/diphdic_full.est"))
61       '(grouped "false")
62       (list 'coef_dir (path-append ked_diphone_dir "lpc"))
63       (list 'sig_dir  (path-append ked_diphone_dir "lpc"))
64       '(coef_ext ".lpc")
65       '(sig_ext ".res")
66       ))
67
68(set! ked_lpc_group
69      (list
70       '(name "ked_lpc_group")
71       (list 'index_file ked_index_file)
72       '(grouped "true")
73       '(alternates_left ((ah ax)))
74       '(alternates_right (($p p) ($k k) ($g g) ($d d) ($b b) ($t t)
75				  (aor ao) (y ih) (ax ah) (ll l)))
76
77       '(default_diphone "pau-pau")))
78
79(cond
80 ((and (eq ked_sigpr 'psola)
81       (eq ked_groupungroup 'group))
82  (set! ked_db_name (us_diphone_init ked_psola_group)))
83 ((and (eq ked_sigpr 'psola)
84       (eq ked_groupungroup 'ungroup))
85  (set! ked_db_name (us_diphone_init ked_psola_sep)))
86 ((and (eq ked_sigpr 'lpc)
87       (eq ked_groupungroup 'group))
88  (set! ked_db_name (us_diphone_init ked_lpc_group)))
89 ((and (eq ked_sigpr 'lpc)
90       (eq ked_groupungroup 'ungroup))
91  (set! ked_db_name (us_diphone_init ked_lpc_sep))))
92
93;;;;
94;;;;  Our general diphone scheme allows identification of consonant
95;;;   clusters etc the follow rules should work for American English
96;;;;
97(define (ked_diphone_const_clusters utt)
98"(ked_diphone_const_clusters UTT)
99Identify consonant clusters, dark ls etc in the segment stream
100ready for diphone resynthesis.  This may be called as a post lexical
101rule through poslex_rule_hooks."
102  (mapcar
103   (lambda (s) (ked_diphone_fix_phone_name utt s))
104   (utt.relation.items utt 'Segment))
105  utt)
106
107(define (ked_diphone_fix_phone_name utt seg)
108"(ked_diphone_fix_phone_name UTT SEG)
109Add the feature diphone_phone_name to given segment with the appropriate
110name for constructing a diphone.  Basically adds _ if either side is part
111of the same consonant cluster, adds $ either side if in different
112syllable for preceding/succeeding vowel syllable."
113  (let ((name (item.name seg)))
114    (cond
115     ((string-equal name "pau") t)
116     ((string-equal "-" (item.feat seg 'ph_vc))
117      (if (and (member_string name '(r w y l))
118	       (member_string (item.feat seg "p.name") '(p t k b d g))
119	       (item.relation.prev seg "SylStructure"))
120	  (item.set_feat seg "us_diphone_right" (format nil "_%s" name)))
121      (if (and (member_string name '(w y l m n p t k))
122	       (string-equal (item.feat seg "p.name") 's)
123	       (item.relation.prev seg "SylStructure"))
124	  (item.set_feat seg "us_diphone_right" (format nil "_%s" name)))
125      (if (and (string-equal name 's)
126	       (member_string (item.feat seg "n.name") '(w y l m n p t k))
127	       (item.relation.next seg "SylStructure"))
128	  (item.set_feat seg "us_diphone_left" (format nil "%s_" name)))
129      (if (and (member_string name '(p t k b d g))
130	       (member_string (item.feat seg "n.name") '(r w y l))
131	       (item.relation.next seg "SylStructure"))
132	  (item.set_feat seg "us_diphone_left" (format nil "%s_" name)))
133      (if (and (member_string name '(p k b d g))
134	       (string-equal "+" (item.feat seg 'p.ph_vc))
135	       (not (member_string (item.feat seg "p.name") '(@ aa o)))
136	       (not (item.relation.prev seg "SylStructure")))
137	  (item.set_feat seg "us_diphone_right" (format nil "$%s" name)))
138      (if (and (string-equal "l" name)
139	       (string-equal "+" (item.feat seg "p.ph_vc"))
140	       (not (string-equal "a" (item.feat seg "p.ph_vlng")))
141	       (item.relation.prev seg 'SylStructure))
142	  (item.set_feat seg "us_diphone_right" "ll"))
143      (if (and (member_string name '(ch jh))
144	       (string-equal "+" (item.feat seg 'p.ph_vc)))
145	  (item.set_feat seg "us_diphone_right" "t"))
146      )
147   ((and (string-equal "ao" (item.name seg))
148	 (string-equal "r" (item.feat seg 'n.name)))
149    (item.set_feat seg "us_diphone_right" "aor"))
150   ((string-equal "ah" (item.name seg))
151    (item.set_feat seg "us_diphone" "ax"))
152   ((string-equal "er" (item.name seg))
153    ;; Cause the diphone have er-r for er we insert an r segment
154    ;; This is a hack and leaves the utterance somewhat tainted (an
155    ;; extra phone has appeared
156    (let ((newr (item.insert seg (list 'r) 'after)))
157      (item.set_feat newr "end" (item.feat seg "end"))
158      (item.set_feat seg "end"
159		     (/ (+ (item.feat seg "segment_start")
160			   (item.feat seg "end"))
161			2))))
162   )))
163
164;;;  Set up the CMU lexicon
165(setup_cmu_lex)
166
167(define (voice_ked_diphone)
168"(voice_ked_diphone)
169 Set up the current voice to be male  American English (Kurt) using
170 the standard diphone corpus."
171  ;; Phone set
172  (voice_reset)
173  (Parameter.set 'Language 'americanenglish)
174  (require 'radio_phones)
175  (Parameter.set 'PhoneSet 'radio)
176  (PhoneSet.select 'radio)
177  ;; Tokenization rules
178  (set! token_to_words english_token_to_words)
179  ;; POS tagger
180  (require 'pos)
181  (set! pos_lex_name "english_poslex")
182  (set! pos_ngram_name 'english_pos_ngram)
183  (set! pos_supported t)
184  (set! guess_pos english_guess_pos)   ;; need this for accents
185  ;; Lexicon selection
186  (lex.select "cmu")
187  (set! postlex_rules_hooks (list postlex_apos_s_check))
188  ;; Phrase prediction
189  (require 'phrase)
190  (Parameter.set 'Phrase_Method 'prob_models)
191  (set! phr_break_params english_phr_break_params)
192  ;; Accent and tone prediction
193  (require 'tobi)
194  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
195  (set! int_accent_cart_tree f2b_int_accent_cart_tree)
196
197  (set! postlex_vowel_reduce_cart_tree
198	postlex_vowel_reduce_cart_data)
199  ;; F0 prediction
200  (require 'f2bf0lr)
201  (set! f0_lr_start f2b_f0_lr_start)
202  (set! f0_lr_mid f2b_f0_lr_mid)
203  (set! f0_lr_end f2b_f0_lr_end)
204  (Parameter.set 'Int_Method Intonation_Tree)
205  (set! int_lr_params
206	'((target_f0_mean 105) (target_f0_std 15)
207	  (model_f0_mean 170) (model_f0_std 34)))
208  (Parameter.set 'Int_Target_Method Int_Targets_LR)
209  ;; Duration prediction
210  (require 'kddurtreeZ)
211  (set! duration_cart_tree kd_duration_cart_tree)
212  (set! duration_ph_info kd_durs)
213  (Parameter.set 'Duration_Method Duration_Tree_ZScores)
214  (Parameter.set 'Duration_Stretch 1.1)
215  ;; Waveform synthesizer: ked diphones
216  ;; This assigned the diphone names from their context (_ $ etc)
217  (set! UniSyn_module_hooks (list ked_diphone_const_clusters ))
218  (set! us_abs_offset 0.0)
219  (set! window_factor 1.0)
220  (set! us_rel_offset 0.0)
221  (set! us_gain 0.9)
222
223  (Parameter.set 'Synth_Method 'UniSyn)
224  (Parameter.set 'us_sigpr ked_sigpr)
225  (us_db_select ked_db_name)
226
227  (set! current-voice 'ked_diphone)
228)
229
230(proclaim_voice
231 'ked_diphone
232 '((language english)
233   (gender male)
234   (dialect american)
235   (description
236    "This voice provides an American English male voice using a
237     residual excited LPC diphone synthesis method.  It uses the
238     CMU Lexicon for pronunciations.  Prosodic phrasing is provided
239     by a statistically trained model using part of speech and local
240     distribution of breaks.  Intonation is provided by a CART tree
241     predicting ToBI accents and an F0 contour generated from a model
242     trained from natural speech.  The duration model is also trained
243     from data using a CART tree.")))
244
245(provide 'ked_diphone)
246