1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;;;                                                                       ;;
3;;;                     Carnegie Mellon University                        ;;
4;;;                      Copyright (c) 2005-2006                          ;;
5;;;                        All Rights Reserved.                           ;;
6;;;                                                                       ;;
7;;;  Permission is hereby granted, free of charge, to use and distribute  ;;
8;;;  this software and its documentation without restriction, including   ;;
9;;;  without limitation the rights to use, copy, modify, merge, publish,  ;;
10;;;  distribute, sublicense, and/or sell copies of this work, and to      ;;
11;;;  permit persons to whom this work is furnished to do so, subject to   ;;
12;;;  the following conditions:                                            ;;
13;;;   1. The code must retain the above copyright notice, this list of    ;;
14;;;      conditions and the following disclaimer.                         ;;
15;;;   2. Any modifications must be clearly marked as such.                ;;
16;;;   3. Original authors' names are not deleted.                         ;;
17;;;   4. The authors' names are not used to endorse or promote products   ;;
18;;;      derived from this software without specific prior written        ;;
19;;;      permission.                                                      ;;
20;;;                                                                       ;;
21;;;  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         ;;
22;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
23;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO         ;;
24;;;  EVENT SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE       ;;
25;;;  LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY     ;;
26;;;  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,      ;;
27;;;  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS       ;;
28;;;  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR              ;;
29;;;  PERFORMANCE OF THIS SOFTWARE.                                        ;;
30;;;                                                                       ;;
31;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32;;;                                                                       ;;
33;;;  Author: Alan W Black (awb@cs.cmu.edu) Nov 2005                       ;;
34;;;                                                                       ;;
35;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
36;;;                                                                       ;;
37;;;  Run Time Synthesis support for clustergen (HMM-generation) voices    ;;
38;;;                                                                       ;;
39;;;  This is voice-independant, and should be in festival/lib but is      ;;
40;;;  currently copied into each voice                                     ;;
41;;;                                                                       ;;
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43
44(defvar cluster_synth_pre_hooks nil)
45(defvar cluster_synth_post_hooks nil)
46(defvar clustergen_mcep_trees nil)
47(defvar cg:frame_shift 0.005)
48
49(defSynthType ClusterGen
50
51    (apply_hooks cluster_synth_pre_hooks utt)
52
53    (set! clustergen_utt utt) ;; for debugging
54
55    ;; Build the state relation
56    (ClusterGen_make_HMMstate utt)
57    ;; Predict number of frames, then predict the frame values
58    (ClusterGen_make_mcep utt) ;; durations for # of vectors
59    (if (assoc 'cg::trajectory clustergen_mcep_trees)
60        (ClusterGen_predict_trajectory utt) ;; predict trajectory
61        (ClusterGen_predict_mcep utt) ;; predict vector types
62        )
63
64    ;; Convert predicted mcep track into a waveform
65    (cluster_synth_method utt)
66
67    (apply_hooks cluster_synth_post_hooks utt)
68    utt
69)
70
71(define (cg_wave_synth_external utt)
72  ;; before we had it built-in to Festival
73  (let ((trackname (make_tmp_filename))
74        (wavename (make_tmp_filename))
75        )
76    (track.save (utt.feat utt "param_track") trackname "est")
77    (system
78     (format nil "$FESTVOXDIR/src/clustergen/cg_resynth %s %s"
79             trackname wavename))
80    (utt.import.wave utt wavename)
81;;    (delete-file trackname)
82    (delete-file wavename)
83    utt)
84)
85
86(define (cg_duration i)
87  (if (item.prev i)
88      (- (item.feat i "end") (item.feat i "p.end"))
89      (item.feat i "end")))
90
91(define (cg_state_pos i)
92  (let ((n (item.name i)))
93  (cond
94   ((not (string-equal n (item.feat i "p.name")))
95    "b")
96   ((string-equal n (item.feat i "n.name"))
97    "m")
98   (t
99    "e"))))
100
101(define (cg_wave_synth utt)
102    (utt.relation.create utt 'Wave)
103    (item.set_feat
104     (utt.relation.append utt 'Wave)
105     "wave"
106     (mlsa_resynthesis (utt.feat utt "param_track")))
107    utt)
108
109(define (ClusterGen_predict_states seg)
110  ;; The names may change
111  (cdr (assoc_string (item.name seg) phone_to_states)))
112
113(define (ClusterGen_make_HMMstate utt)
114  (let ((states)
115        (segstate)
116        (statepos))
117    ;; Make HMMstate relation and items (three per phone)
118    (utt.relation.create utt "HMMstate")
119    (utt.relation.create utt "segstate")
120
121    (mapcar
122     (lambda (seg)
123       (set! statepos 1)
124       (set! states (ClusterGen_predict_states seg))
125       (set! segstate (utt.relation.append utt 'segstate seg))
126       (while states
127          (set! state (utt.relation.append utt 'HMMstate))
128          (item.append_daughter segstate state)
129          (item.set_feat state "name" (car states))
130          (item.set_feat state "statepos" statepos)
131          (set! statepos (+ 1 statepos))
132          (set! states (cdr states)))
133       )
134     (utt.relation.items utt 'Segment))
135    )
136)
137
138(define (ClusterGen_state_duration state)
139  (let ((zdur (wagon_predict state duration_cart_tree_cg))
140        (ph_info (assoc_string (item.name state) duration_ph_info_cg))
141        (seg_stretch (item.feat state "R:segstate.parent.dur_stretch"))
142        (syl_stretch (item.feat state "R:segstate.parent.R:SylStructure.parent.dur_stretch"))
143        (tok_stretch (item.feat state "R:segstate.parent.R:SylStructure.parent.parent.R:Token.parent.dur_stretch"))
144        (global_stretch (Parameter.get 'Duration_Stretch))
145        (stretch 1.0))
146    (if (string-matches (item.name state) "#_.*")
147        ;; Its a pau so explicitly set the duration
148        ;; Note we want sentence internal pauses to be about 100ms
149        ;; and sentence final pauses to be 150ms, but there will also
150        ;; sentence initial pauses of 150ms so we can treat all pauses as
151        ;; 100ms, there are three states so we use 50ms
152        (set! zdur
153              (/ (- 0.05 (car (cdr ph_info)))
154                 (car (cdr (cdr ph_info))))))
155    (if (not (string-equal seg_stretch "0"))
156        (setq stretch (* stretch seg_stretch)))
157    (if (not (string-equal syl_stretch "0"))
158        (setq stretch (* stretch syl_stretch)))
159    (if (not (string-equal tok_stretch "0"))
160        (setq stretch (* stretch tok_stretch)))
161    (if (not (string-equal global_stretch "0"))
162        (setq stretch (* stretch global_stretch)))
163    (if ph_info
164        (* stretch
165           (+ (car (cdr ph_info)) ;; mean
166              (* (car (cdr (cdr ph_info))) ;; stddev
167                 zdur)))
168        (begin
169          (format t "ClusterGen_state_duration: no dur phone info for %s\n"
170                  (item.name state))
171          0.1))))
172
173(define (ClusterGen_make_mcep utt)
174  (let ((num_frames 0)
175        (frame_advance cg:frame_shift)
176        (end 0.0))
177
178    ;; Make HMMstate relation and items (three per phone)
179    (utt.relation.create utt "mcep")
180    (utt.relation.create utt "mcep_link")
181    (mapcar
182     (lambda (state)
183       ;; Predict Duration
184       (set! start end)
185       (set! end (+ start (ClusterGen_state_duration state)))
186       (item.set_feat state "end" end)
187       ;; create that number of mcep frames up to state end
188       (set! mcep_parent (utt.relation.append utt 'mcep_link state))
189       (while (<= (* num_frames frame_advance) end)
190              (set! mcep_frame (utt.relation.append utt 'mcep))
191              (item.append_daughter mcep_parent mcep_frame)
192              (item.set_feat mcep_frame "frame_number" num_frames)
193              (item.set_feat mcep_frame "name" (item.name mcep_parent))
194              (set! num_frames (+ 1 num_frames))
195              )
196       )
197     (utt.relation.items utt 'HMMstate))
198
199    ;; Copy the final state end back up on to the segment for consistency
200    (mapcar
201     (lambda (seg)
202       (item.set_feat seg "end" (item.feat seg "R:segstate.daughtern.end")))
203     (utt.relation.items utt 'Segment))
204
205    (utt.set_feat utt "param_track_num_frames" num_frames)
206    utt)
207)
208
209(define (cg_F0_smooth track j)
210  (let ((p 0.0)
211        (i 0)
212        (num_frames (- (track.num_frames track) 1)))
213
214    (set! i 1)
215    (while (< i num_frames)
216      (set! this (track.get track i j))
217      (set! next (track.get track (+ i 1) j))
218      (if (> this 0.0)
219          (track.set
220           track i j
221           (/ (+ (if (> p 0.0) p this)
222                 this
223                 (if (> next 0.0) next this))
224              3.0)))
225      (set! p this)
226      (set! i (+ 1 i)))
227    )
228)
229
230(define (cg_mcep_smooth track j)
231  (let ((p 0.0)
232        (i 0)
233        (num_frames (- (track.num_frames track) 1)))
234
235    (set! i 1)
236    (while (< i num_frames)
237      (set! this (track.get track i j))
238      (set! next (track.get track (+ i 1) j))
239      (track.set
240       track i j
241       (/ (+ p this next) 3.0))
242      (set! p this)
243      (set! i (+ 1 i)))
244    )
245)
246
247;; For normal synthesis make unvoiced states unvoiced, but we don't
248;; do this during testing
249(defvar cg_predict_unvoiced t)
250
251(define (ClusterGen_predict_F0 mcep npow_val f0_val param_track)
252  "(ClusterGen_predict_F0 mcep npow_val f0_val param_track)
253Predict the F0 (or not)."
254  (if (and cg_predict_unvoiced
255           (string-equal "-"
256            (item.feat
257             mcep "R:mcep_link.parent.R:segstate.parent.ph_vc"))
258           (or (string-equal "-"
259            (item.feat
260             mcep "R:mcep_link.parent.R:segstate.parent.ph_cvox"))
261	    (and (string-equal "+"
262            (item.feat
263             mcep "R:mcep_link.parent.R:segstate.parent.ph_cvox"))
264           (< npow_val 0.0))))
265      (track.set param_track i 0 0.0) ;; make it unvoiced
266      (track.set param_track i 0 f0_val)) ;; make it voiced
267      (format stderr "%d\n" i)
268      (track.set param_track i 0 0.0) ;; make it unvoiced
269  )
270
271(define (ClusterGen_predict_mcep utt)
272  (let ((param_track nil)
273        (frame_advance cg:frame_shift)
274        (frame nil) (f nil) (f0_val)
275        (num_channels (/ (track.num_channels clustergen_param_vectors) 2))
276        )
277
278    ;; Predict mcep values
279    (set! i 0)
280    (set! param_track
281          (track.resize nil
282           (utt.feat utt "param_track_num_frames")
283           num_channels))
284    (utt.set_feat utt "param_track" param_track)
285    (mapcar
286     (lambda (mcep)
287       ;; Predict mcep frame
288       (let ((mcep_tree (assoc_string (item.name mcep) clustergen_mcep_trees))
289             (f0_tree (assoc_string (item.name mcep) clustergen_f0_trees))
290             (npow_tree (assoc_string (item.name mcep) clustergen_npow_trees)))
291         (if (null mcep_tree)
292             (format t "ClusterGen: can't find cluster tree for %s\n"
293                     (item.name mcep))
294             (begin
295               ;; F0 prediction
296
297               (set! f0_val (wagon mcep (cadr f0_tree)))
298               (set! npow_val (wagon mcep (cadr npow_tree)))
299
300               (ClusterGen_predict_F0 mcep (cadr npow_val) (cadr f0_val) param_track)
301
302               ;; MCEP prediction
303               (set! frame (wagon mcep (cadr mcep_tree)))
304               (set! j 1)
305               (set! f (car frame))
306               (while (< j num_channels)
307                  (track.set param_track i j
308                    (track.get clustergen_param_vectors f (* 2 j)))
309                  (set! j (+ 1 j)))))
310
311         (track.set_time param_track i (* i frame_advance))
312         (set! i (+ 1 i))))
313     (utt.relation.items utt 'mcep))
314    (cg_F0_smooth param_track 0)
315    (mapcar
316     (lambda (x)
317       (cg_mcep_smooth param_track x))
318     '( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25))
319    utt
320  )
321)
322
323(define (cg_voiced state)
324  "(cg_voiced state)
325t if this state is voices, nil otherwise."
326  (if (and cg_predict_unvoiced
327           (string-equal "-" (item.feat state "R:segstate.parent.ph_vc"))
328           (string-equal "-" (item.feat state "R:segstate.parent.ph_cvox")))
329      nil
330      t))
331
332(define (ClusterGen_predict_trajectory utt)
333  (let ((param_track nil)
334        (frame_advance cg:frame_shift)
335        (frame nil) (f nil) (f0_val)
336        (num_channels (track.num_channels clustergen_param_vectors))
337;        (num_channels (track.num_channels traj::clustergen_param_vectors))
338        )
339
340    ;; Predict mcep values
341    (set! i 0)
342    (set! param_track
343          (track.resize nil
344           (utt.feat utt "param_track_num_frames")
345           num_channels))
346    (utt.set_feat utt "param_track" param_track)
347;    (set! param_track (utt.feat utt "param_track"))
348    (mapcar
349     (lambda (state)
350       ;; Predict mcep frame
351       (let ((mcep_tree (assoc_string (item.name state) clustergen_mcep_trees))
352             (f0_tree (assoc_string (item.name state) clustergen_f0_trees))
353             (npow_tree (assoc_string (item.name state) clustergen_npow_trees))
354             )
355         (set! f0_val (wagon state (cadr f0_tree)))
356         (set! npow_val (wagon state (cadr npow_tree)))
357;	  (set! npow_val '(0 0))
358;	  (set! f0_val '(0 0))
359         (if (null mcep_tree)
360             (format t "ClusterGen: can't find cluster tree for %s\n"
361                     (item.name state))
362             (begin
363               ;; feature prediction (F0 and mcep)
364               (set! trajectory (wagon state (cadr mcep_tree)))
365               (if (item.relation.daughters state 'mcep_link)
366                   (begin
367                    (if (assoc 'cg::trajectory_ola clustergen_mcep_trees)
368                     (cg:add_trajectory_ola
369                      (caar trajectory)
370                      (cadr (car trajectory))
371                      state
372                      num_channels
373                      param_track
374                      frame_advance)
375                     (cg:add_trajectory
376                      (caar trajectory)
377                      (cadr (car trajectory))
378                      state
379                      num_channels
380                      param_track
381                      frame_advance
382                      (cadr f0_val)
383                      (cadr npow_val))))
384
385                      )))
386
387                      ))
388     (utt.relation.items utt 'HMMstate))
389
390;    (track.save param_track "trajectory.track")
391
392     (cg_F0_smooth param_track 0)
393     (mapcar
394     (lambda (x)
395       (cg_mcep_smooth param_track x))
396;       '( 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25)
397      '( 1 2 3 )
398     )
399    utt
400  )
401)
402
403(define (cg:add_trajectory s_start s_frames state num_channels
404                           param_track frame_advance f0_val npow_val)
405"(cg:add_trajectory start n state num_channels)
406Add trajectory to daughters of state, interpolating as necessary."
407  (let ((j 0) (i 0)
408        (mceps (item.relation.daughters state 'mcep_link)))
409
410    (set! t_start (item.feat (car mceps) "frame_number"))
411    (set! t_frames (length mceps))
412    (set! m (/ s_frames t_frames))
413    (set! f 0)
414
415    (while (< i t_frames)
416       ;; find f
417       (set! s_pos (+ s_start f))
418
419  (if (and cg_predict_unvoiced
420	   (string-equal "-"
421            (item.feat
422             state "R:segstate.parent.ph_vc"))
423           (or (string-equal "-"
424            (item.feat
425             state "R:segstate.parent.ph_cvox"))
426	    (and (string-equal "+"
427            (item.feat
428             state "R:segstate.parent.ph_cvox"))
429           (< npow_val -7.0))))
430      (track.set param_track (+ i t_start) 0 0.0) ;; make it unvoiced
431      (track.set param_track (+ i t_start) 0 f0_val)) ;; make it voiced
432
433;;       (if (cg_voiced state)
434;;       (track.set param_track (+ i t_start) 0
435;;                      (track.get clustergen_param_vectors s_pos 0)))
436
437       (set! j 1)
438       (while (< j num_channels)
439              (track.set param_track (+ i t_start) j
440                         (track.get clustergen_param_vectors
441                                    s_pos j))
442              (set! j (+ 1 j)))
443       (set! f (+ m f))
444       (track.set_time param_track
445                       (+ i t_start) (* (+ i t_start) frame_advance))
446       (set! i (+ i 1))
447       )
448    )
449  )
450
451(define (cg:add_trajectory_ola s_start s_frames state num_channels
452                           param_track frame_advance)
453"(cg:add_trajectory start n state num_channels)
454Add trajectory to daughters of state, interpolating as necessary."
455  (let ((j 0) (i 0) (s1l 0) (s2l 0) (m 0.0) (w 0.0)
456        (t_start 0) (t_frames 0) (s_offset 0)
457        (mceps1 nil) (mceps2 nil))
458
459    (set! i 0)
460    (while (< i s_frames)
461     (if (equal? -1.0 (track.get clustergen_param_vectors (+ s_start i) 0))
462         (set! s1l i))
463     (set! i (+ i 1)))
464
465    (if (and (item.prev state)
466             (item.relation.daughters (item.prev state) 'mcep_link)
467             (> s1l 0))
468        (begin ;; do overlap on previous
469          (set! mceps1 (item.relation.daughters (item.prev state) 'mcep_link))
470          (set! first_half_delta (/ 1.0 (length mceps1)))
471          (set! t_start (item.feat (car mceps1) "frame_number"))
472          (set! t_frames (length mceps1))
473          (set! m (/ s1l t_frames))
474          (set! i 0)
475          (set! w 0.0)
476          (while (< i t_frames)
477           (set! s_offset (nint (* i m)))
478           (if (not (< s_offset s1l))
479               (begin
480;                 (format t "boing pre\n")
481                 (set! s_offset (- s1l 1))))
482           (set! s_pos (+ s_start s_offset))
483           (if (< (track.get clustergen_param_vectors s_pos 0) 0)
484               (format t "assigning pre -1/-2 %d %d %f\n" s_pos i m))
485           ;; F0 Prediction
486           (if (cg_voiced state)
487               (track.set param_track (+ i t_start) 0
488                (+ (* (- 1.0 w) (track.get param_track (+ i t_start) 0))
489                   (* w (track.get clustergen_param_vectors s_pos 0)))))
490
491           ;; MCEP Prediction
492           (set! j 1)
493           (while (< j num_channels)
494             (track.set param_track (+ i t_start) j
495              (+ (* (- 1.0 w) (track.get param_track (+ i t_start) j))
496                 (* w
497                    (track.get clustergen_param_vectors s_pos (* 2 j))
498                    )
499                 )
500              )
501             (set! j (+ 1 j)))
502           (set! i (+ 1 i))
503           (set! w (+ w first_half_delta))
504           (if (> w 1.0) (set! w 1.0))
505           )
506          ))
507
508    ;; do assignment on current unit
509    (set! mceps2 (item.relation.daughters state 'mcep_link))
510    (set! t_start (item.feat (car mceps2) "frame_number"))
511    (set! t_frames (length mceps2))
512    (set! s2l (- s_frames (+ s1l 2)))
513    (set! s2_start (+ s_start s1l 1))
514    (set! m (/ s2l t_frames))
515    (set! i 0)
516    (while (< i t_frames)
517     (set! s_offset (nint (* i m)))
518     (if (not (< s_offset s2l))
519         (set! s_offset (- s2l 1)))
520     (set! s_pos (+ s2_start s_offset))
521     (if (< (track.get clustergen_param_vectors s_pos 0) 0)
522         (format t "assigning -1/-2 %d %d %f %f\n" s_pos i m
523                 (track.get clustergen_param_vectors s_pos 0)))
524     ;; F0 Prediction
525     (if (cg_voiced state)
526         (track.set param_track (+ i t_start) 0
527                    (track.get clustergen_param_vectors s_pos 0)))
528     ;; MCEP Prediction
529     (set! j 1)
530     (while (< j num_channels)
531      (track.set param_track (+ i t_start) j
532                 (track.get clustergen_param_vectors s_pos (* 2 j)))
533      (set! j (+ 1 j)))
534     (track.set_time param_track
535                     (+ i t_start) (* (+ i t_start) frame_advance))
536     (set! i (+ 1 i))
537    )
538  )
539)
540
541;; Sort of historical it should be set in INST_LANG_VOX_cg.scm
542;; but maybe not in old instantiations
543(defvar cluster_synth_method
544  (if (boundp 'mlsa_resynthesis)
545      cg_wave_synth
546      cg_wave_synth_external ))
547
548(provide 'clustergen)
549