1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2;;; ;; 3;;; Centre for Speech Technology Research ;; 4;;; University of Edinburgh, UK ;; 5;;; Copyright (c) 1997 ;; 6;;; All Rights Reserved. ;; 7;;; ;; 8;;; Permission is hereby granted, free of charge, to use and distribute ;; 9;;; this software and its documentation without restriction, including ;; 10;;; without limitation the rights to use, copy, modify, merge, publish, ;; 11;;; distribute, sublicense, and/or sell copies of this work, and to ;; 12;;; permit persons to whom this work is furnished to do so, subject to ;; 13;;; the following conditions: ;; 14;;; 1. The code must retain the above copyright notice, this list of ;; 15;;; conditions and the following disclaimer. ;; 16;;; 2. Any modifications must be clearly marked as such. ;; 17;;; 3. Original authors' names are not deleted. ;; 18;;; 4. The authors' names are not used to endorse or promote products ;; 19;;; derived from this software without specific prior written ;; 20;;; permission. ;; 21;;; ;; 22;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; 23;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; 24;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; 25;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; 26;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; 27;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; 28;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; 29;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; 30;;; THIS SOFTWARE. ;; 31;;; ;; 32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 33;;; Set up kd_diphones using the standard diphone synthesizer 34;;; 35;;; Kurt diphones: male American English collected Spring 1997 36;;; 37 38(defvar ked_diphone_dir (cdr (assoc 'ked_diphone voice-locations)) 39 "ked_diphone_dir 40 The default directory for the ked diphone database.") 41(set! load-path (cons (path-append ked_diphone_dir "festvox/") load-path)) 42 43(require 'radio_phones) 44(require_module 'UniSyn) 45 46;; set this to lpc or psola 47(defvar ked_sigpr 'lpc) 48;; Rset this to ungroup for ungrouped version 49(defvar ked_groupungroup 'group) 50 51(if (probe_file (path-append ked_diphone_dir "group/kedlpc16k.group")) 52 (defvar ked_index_file 53 (path-append ked_diphone_dir "group/kedlpc16k.group")) 54 (defvar ked_index_file 55 (path-append ked_diphone_dir "group/kedlpc8k.group"))) 56 57(set! ked_lpc_sep 58 (list 59 '(name "ked_lpc_sep") 60 (list 'index_file (path-append ked_diphone_dir "dic/diphdic_full.est")) 61 '(grouped "false") 62 (list 'coef_dir (path-append ked_diphone_dir "lpc")) 63 (list 'sig_dir (path-append ked_diphone_dir "lpc")) 64 '(coef_ext ".lpc") 65 '(sig_ext ".res") 66 )) 67 68(set! ked_lpc_group 69 (list 70 '(name "ked_lpc_group") 71 (list 'index_file ked_index_file) 72 '(grouped "true") 73 '(alternates_left ((ah ax))) 74 '(alternates_right (($p p) ($k k) ($g g) ($d d) ($b b) ($t t) 75 (aor ao) (y ih) (ax ah) (ll l))) 76 77 '(default_diphone "pau-pau"))) 78 79(cond 80 ((and (eq ked_sigpr 'psola) 81 (eq ked_groupungroup 'group)) 82 (set! ked_db_name (us_diphone_init ked_psola_group))) 83 ((and (eq ked_sigpr 'psola) 84 (eq ked_groupungroup 'ungroup)) 85 (set! ked_db_name (us_diphone_init ked_psola_sep))) 86 ((and (eq ked_sigpr 'lpc) 87 (eq ked_groupungroup 'group)) 88 (set! ked_db_name (us_diphone_init ked_lpc_group))) 89 ((and (eq ked_sigpr 'lpc) 90 (eq ked_groupungroup 'ungroup)) 91 (set! ked_db_name (us_diphone_init ked_lpc_sep)))) 92 93;;;; 94;;;; Our general diphone scheme allows identification of consonant 95;;; clusters etc the follow rules should work for American English 96;;;; 97(define (ked_diphone_const_clusters utt) 98"(ked_diphone_const_clusters UTT) 99Identify consonant clusters, dark ls etc in the segment stream 100ready for diphone resynthesis. This may be called as a post lexical 101rule through poslex_rule_hooks." 102 (mapcar 103 (lambda (s) (ked_diphone_fix_phone_name utt s)) 104 (utt.relation.items utt 'Segment)) 105 utt) 106 107(define (ked_diphone_fix_phone_name utt seg) 108"(ked_diphone_fix_phone_name UTT SEG) 109Add the feature diphone_phone_name to given segment with the appropriate 110name for constructing a diphone. Basically adds _ if either side is part 111of the same consonant cluster, adds $ either side if in different 112syllable for preceding/succeeding vowel syllable." 113 (let ((name (item.name seg))) 114 (cond 115 ((string-equal name "pau") t) 116 ((string-equal "-" (item.feat seg 'ph_vc)) 117 (if (and (member_string name '(r w y l)) 118 (member_string (item.feat seg "p.name") '(p t k b d g)) 119 (item.relation.prev seg "SylStructure")) 120 (item.set_feat seg "us_diphone_right" (format nil "_%s" name))) 121 (if (and (member_string name '(w y l m n p t k)) 122 (string-equal (item.feat seg "p.name") 's) 123 (item.relation.prev seg "SylStructure")) 124 (item.set_feat seg "us_diphone_right" (format nil "_%s" name))) 125 (if (and (string-equal name 's) 126 (member_string (item.feat seg "n.name") '(w y l m n p t k)) 127 (item.relation.next seg "SylStructure")) 128 (item.set_feat seg "us_diphone_left" (format nil "%s_" name))) 129 (if (and (member_string name '(p t k b d g)) 130 (member_string (item.feat seg "n.name") '(r w y l)) 131 (item.relation.next seg "SylStructure")) 132 (item.set_feat seg "us_diphone_left" (format nil "%s_" name))) 133 (if (and (member_string name '(p k b d g)) 134 (string-equal "+" (item.feat seg 'p.ph_vc)) 135 (not (member_string (item.feat seg "p.name") '(@ aa o))) 136 (not (item.relation.prev seg "SylStructure"))) 137 (item.set_feat seg "us_diphone_right" (format nil "$%s" name))) 138 (if (and (string-equal "l" name) 139 (string-equal "+" (item.feat seg "p.ph_vc")) 140 (not (string-equal "a" (item.feat seg "p.ph_vlng"))) 141 (item.relation.prev seg 'SylStructure)) 142 (item.set_feat seg "us_diphone_right" "ll")) 143 (if (and (member_string name '(ch jh)) 144 (string-equal "+" (item.feat seg 'p.ph_vc))) 145 (item.set_feat seg "us_diphone_right" "t")) 146 ) 147 ((and (string-equal "ao" (item.name seg)) 148 (string-equal "r" (item.feat seg 'n.name))) 149 (item.set_feat seg "us_diphone_right" "aor")) 150 ((string-equal "ah" (item.name seg)) 151 (item.set_feat seg "us_diphone" "ax")) 152 ((string-equal "er" (item.name seg)) 153 ;; Cause the diphone have er-r for er we insert an r segment 154 ;; This is a hack and leaves the utterance somewhat tainted (an 155 ;; extra phone has appeared 156 (let ((newr (item.insert seg (list 'r) 'after))) 157 (item.set_feat newr "end" (item.feat seg "end")) 158 (item.set_feat seg "end" 159 (/ (+ (item.feat seg "segment_start") 160 (item.feat seg "end")) 161 2)))) 162 ))) 163 164;;; Set up the CMU lexicon 165(setup_cmu_lex) 166 167(define (voice_ked_diphone) 168"(voice_ked_diphone) 169 Set up the current voice to be male American English (Kurt) using 170 the standard diphone corpus." 171 ;; Phone set 172 (voice_reset) 173 (Parameter.set 'Language 'americanenglish) 174 (require 'radio_phones) 175 (Parameter.set 'PhoneSet 'radio) 176 (PhoneSet.select 'radio) 177 ;; Tokenization rules 178 (set! token_to_words english_token_to_words) 179 ;; POS tagger 180 (require 'pos) 181 (set! pos_lex_name "english_poslex") 182 (set! pos_ngram_name 'english_pos_ngram) 183 (set! pos_supported t) 184 (set! guess_pos english_guess_pos) ;; need this for accents 185 ;; Lexicon selection 186 (lex.select "cmu") 187 (set! postlex_rules_hooks (list postlex_apos_s_check)) 188 ;; Phrase prediction 189 (require 'phrase) 190 (Parameter.set 'Phrase_Method 'prob_models) 191 (set! phr_break_params english_phr_break_params) 192 ;; Accent and tone prediction 193 (require 'tobi) 194 (set! int_tone_cart_tree f2b_int_tone_cart_tree) 195 (set! int_accent_cart_tree f2b_int_accent_cart_tree) 196 197 (set! postlex_vowel_reduce_cart_tree 198 postlex_vowel_reduce_cart_data) 199 ;; F0 prediction 200 (require 'f2bf0lr) 201 (set! f0_lr_start f2b_f0_lr_start) 202 (set! f0_lr_mid f2b_f0_lr_mid) 203 (set! f0_lr_end f2b_f0_lr_end) 204 (Parameter.set 'Int_Method Intonation_Tree) 205 (set! int_lr_params 206 '((target_f0_mean 105) (target_f0_std 15) 207 (model_f0_mean 170) (model_f0_std 34))) 208 (Parameter.set 'Int_Target_Method Int_Targets_LR) 209 ;; Duration prediction 210 (require 'kddurtreeZ) 211 (set! duration_cart_tree kd_duration_cart_tree) 212 (set! duration_ph_info kd_durs) 213 (Parameter.set 'Duration_Method Duration_Tree_ZScores) 214 (Parameter.set 'Duration_Stretch 1.1) 215 ;; Waveform synthesizer: ked diphones 216 ;; This assigned the diphone names from their context (_ $ etc) 217 (set! UniSyn_module_hooks (list ked_diphone_const_clusters )) 218 (set! us_abs_offset 0.0) 219 (set! window_factor 1.0) 220 (set! us_rel_offset 0.0) 221 (set! us_gain 0.9) 222 223 (Parameter.set 'Synth_Method 'UniSyn) 224 (Parameter.set 'us_sigpr ked_sigpr) 225 (us_db_select ked_db_name) 226 227 (set! current-voice 'ked_diphone) 228) 229 230(proclaim_voice 231 'ked_diphone 232 '((language english) 233 (gender male) 234 (dialect american) 235 (description 236 "This voice provides an American English male voice using a 237 residual excited LPC diphone synthesis method. It uses the 238 CMU Lexicon for pronunciations. Prosodic phrasing is provided 239 by a statistically trained model using part of speech and local 240 distribution of breaks. Intonation is provided by a CART tree 241 predicting ToBI accents and an F0 contour generated from a model 242 trained from natural speech. The duration model is also trained 243 from data using a CART tree."))) 244 245(provide 'ked_diphone) 246