1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2;;;                                                                     ;;;
3;;;                     Carnegie Mellon University                      ;;;
4;;;                  and Alan W Black and Kevin Lenzo                   ;;;
5;;;                      Copyright (c) 1998-2000                        ;;;
6;;;                        All Rights Reserved.                         ;;;
7;;;                                                                     ;;;
8;;; Permission is hereby granted, free of charge, to use and distribute ;;;
9;;; this software and its documentation without restriction, including  ;;;
10;;; without limitation the rights to use, copy, modify, merge, publish, ;;;
11;;; distribute, sublicense, and/or sell copies of this work, and to     ;;;
12;;; permit persons to whom this work is furnished to do so, subject to  ;;;
13;;; the following conditions:                                           ;;;
14;;;  1. The code must retain the above copyright notice, this list of   ;;;
15;;;     conditions and the following disclaimer.                        ;;;
16;;;  2. Any modifications must be clearly marked as such.               ;;;
17;;;  3. Original authors' names are not deleted.                        ;;;
18;;;  4. The authors' names are not used to endorse or promote products  ;;;
19;;;     derived from this software without specific prior written       ;;;
20;;;     permission.                                                     ;;;
21;;;                                                                     ;;;
22;;; CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK        ;;;
23;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     ;;;
24;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  ;;;
25;;; SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE     ;;;
26;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   ;;;
27;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  ;;;
28;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         ;;;
29;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      ;;;
30;;; THIS SOFTWARE.                                                      ;;;
31;;;                                                                     ;;;
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34;;;                                                                     ;;;
35;;; Code for building data for prompts, aligning and unit selection     ;;;
36;;; synthesizer                                                         ;;;
37;;;                                                                     ;;;
38;;; This file is only used at database build time                       ;;;
39;;;                                                                     ;;;
40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
41
42(defvar cmu_us_rms_arctic::clunits_dir ".")
43
44(require 'clunits_build)
45
46;;; Basic voice definition file with voice defines and clunit
47;;; parameter definition for run time.
48(load "festvox/cmu_us_rms_arctic_clunits.scm")
49
50;;; Add Build time parameters
51(set! cmu_us_rms_arctic::dt_params
52      (cons
53       ;; in case cmu_us_rms_arctic_clunits defines this too, put this at start
54       (list 'db_dir (string-append cmu_us_rms_arctic::clunits_dir "/"))
55       (append
56	cmu_us_rms_arctic::dt_params
57	(list
58	;;; In cmu_us_rms_arctic_clunits.scm
59	 ;;'(coeffs_dir "lpc/")
60	 ;;'(coeffs_ext ".lpc")
61	 '(disttabs_dir "festival/disttabs/")
62	 '(utts_dir "festival/utts/")
63	 '(utts_ext ".utt")
64	 '(dur_pen_weight 0.0)
65	 '(f0_pen_weight 0.0)
66	 '(get_stds_per_unit t)
67	 '(ac_left_context 0.8)
68	 '(ac_weights
69	   (0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5))
70	 ;; Join weights in cmu_us_rms_arctic_clunits.scm
71	 ;; Features for extraction
72	 '(feats_dir "festival/feats/")
73	 '(feats
74	   (occurid
75	    p.name p.ph_vc p.ph_ctype
76	    p.ph_vheight p.ph_vlng
77	    p.ph_vfront  p.ph_vrnd
78	    p.ph_cplace  p.ph_cvox
79	    n.name n.ph_vc n.ph_ctype
80	    n.ph_vheight n.ph_vlng
81	    n.ph_vfront  n.ph_vrnd
82	    n.ph_cplace  n.ph_cvox
83	    segment_duration
84	    seg_pitch p.seg_pitch n.seg_pitch
85	    R:SylStructure.parent.stress
86	    seg_onsetcoda n.seg_onsetcoda p.seg_onsetcoda
87	    R:SylStructure.parent.accented
88	    pos_in_syl
89	    syl_initial
90	    syl_final
91	    R:SylStructure.parent.syl_break
92	    R:SylStructure.parent.R:Syllable.p.syl_break
93	    R:SylStructure.parent.position_type
94	    pp.name pp.ph_vc pp.ph_ctype
95	    pp.ph_vheight pp.ph_vlng
96	    pp.ph_vfront  pp.ph_vrnd
97	    pp.ph_cplace pp.ph_cvox
98            n.lisp_is_pau
99            p.lisp_is_pau
100	    R:SylStructure.parent.parent.gpos
101	    R:SylStructure.parent.parent.R:Word.p.gpos
102	    R:SylStructure.parent.parent.R:Word.n.gpos
103	    ))
104	 ;; Wagon tree building params
105;	 (trees_dir "festvox/")  ;; in cmu_us_rms_arctic_clunits.scm
106	 '(wagon_field_desc "festival/clunits/all.desc")
107	 '(wagon_progname "$ESTDIR/bin/wagon")
108	 '(wagon_cluster_size 20)
109	 '(prune_reduce 0)
110	 '(cluster_prune_limit 40)
111	 ;; The dictionary of units used at run time
112;	 (catalogue_dir "festvox/")   ;; in cmu_us_rms_arctic_clunits.scm
113	 ;;  Run time parameters
114	 ;; all in cmu_us_rms_arctic_clunits.scm
115	 ;; Files in db, filled in at build_clunits time
116	 ;; (files ("time0001" "time0002" ....))
117))))
118
119(define (build_clunits file)
120  "(build_clunits file)
121Build cluster synthesizer for the given recorded data and domain."
122  (build_clunits_init file)
123  (do_all)  ;; someday I'll change the name of this function
124)
125
126(define (build_clunits_init file)
127  "(build_clunits_init file)
128Get setup ready for (do_all) (or (do_init))."
129  (eval (list cmu_us_rms_arctic::closest_voice))
130
131  ;; Add specific fileids to the list for this run
132  (set! cmu_us_rms_arctic::dt_params
133	(append
134	 cmu_us_rms_arctic::dt_params
135	 (list
136	  (list
137	   'files
138	   (mapcar car (load file t))))))
139
140  (set! dt_params cmu_us_rms_arctic::dt_params)
141  (set! clunits_params cmu_us_rms_arctic::dt_params)
142)
143
144(define (do_prompt name text)
145  "(do_prompt name text)
146Synthesize given text and save waveform and labels for prompts."
147  (let ((utt1 (utt.synth (eval (list 'Utterance 'Text text)))))
148    (utt.save utt1 (format nil "prompt-utt/%s.utt" name))
149    (utt.save.segs utt1 (format nil "prompt-lab/%s.lab" name))
150    (utt.save.wave utt1 (format nil "prompt-wav/%s.wav" name))
151    t))
152
153(define (build_prompts file)
154  "(build_prompt file)
155For each utterances in prompt file, synth and save waveform and
156labels for prompts and aligning."
157  (set! cmu_us_rms_arctic::clunits_prompting_stage t)
158  (voice_cmu_us_rms_arctic_clunits)
159 (let ((p (load file t)))
160    (mapcar
161     (lambda (l)
162       (format t "%s PROMPTS\n" (car l))
163       (do_prompt (car l) (cadr l))
164       t)
165     p)
166    t))
167
168(define (build_utts file)
169  "(build_utts file)
170For each utterances in prompt file, synthesize and merge aligned labels
171to predicted labels building a new utetrances and saving it."
172  (set! cmu_us_rms_arctic::clunits_prompting_stage t)
173  (voice_cmu_us_rms_arctic_clunits)
174  (let ((p (load file t)))
175    (mapcar
176     (lambda (l)
177       (format t "%s UTTS\n" (car l))
178       (align_utt (car l) (cadr l))
179       t)
180     p)
181    t))
182
183(define (align_utt name text)
184  "(align_utts file)
185Synth an utterance and load in the actualed aligned segments and merge
186them into the synthesizer utterance."
187  (let (; (utt1 (utt.load nil (format nil "prompt-utt/%s.utt" name)))
188	(utt1 (utt.synth (eval (list 'Utterance 'Text text))))
189	(silence (car (cadr (car (PhoneSet.description '(silences))))))
190	segments actual-segments)
191
192    (utt.relation.load utt1 'actual-segment
193		       (format nil "lab/%s.lab" name))
194    (set! segments (utt.relation.items utt1 'Segment))
195    (set! actual-segments (utt.relation.items utt1 'actual-segment))
196
197    ;; These should align, but if the labels had to be hand edited
198    ;; then they may not, we cater here for insertions and deletions
199    ;; of silences int he corrected hand labelled files (actual-segments)
200    ;; If you need to something more elaborate you'll have to change the
201    ;; code below.
202    (while (and segments actual-segments)
203      (cond
204       ((string-equal (string-append "#" (item.name (car segments)))
205                      (item.name (car actual-segments)))
206        ;; junk unit that is to be ignored
207        (item.set_feat (car segments) "end"
208                       (item.feat (car actual-segments) "end"))
209        (item.set_feat (car segments) "ignore" "1")
210        (set! segments (cdr segments))
211        (set! actual-segments (cdr actual-segments)))
212       ((and (not (string-equal (item.name (car segments))
213				(item.name (car actual-segments))))
214	     (or (string-equal (item.name (car actual-segments)) silence)
215		 (string-equal (item.name (car actual-segments)) "H#")
216		 (string-equal (item.name (car actual-segments)) "h#")))
217	(item.insert
218	 (car segments)
219	 (list silence (list (list "end" (item.feat
220					(car actual-segments) "end"))))
221	 'before)
222	(set! actual-segments (cdr actual-segments)))
223       ((and (not (string-equal (item.name (car segments))
224				(item.name (car actual-segments))))
225	     (string-equal (item.name (car segments)) silence))
226	(item.delete (car segments))
227	(set! segments (cdr segments)))
228       ((string-equal (item.name (car segments))
229		      (item.name (car actual-segments)))
230	(item.set_feat (car segments) "end"
231		       (item.feat (car actual-segments) "end"))
232	(set! segments (cdr segments))
233	(set! actual-segments (cdr actual-segments)))
234       (t
235	(format stderr
236		"align missmatch at %s (%f) %s (%f)\n"
237		(item.name (car segments))
238		(item.feat (car segments) "end")
239		(item.name (car actual-segments))
240		(item.feat (car actual-segments) "end"))
241	(error)))
242      )
243
244    (mapcar
245     (lambda (a)
246      ;; shorten and split sliences
247      (while (and (string-equal (item.name a) silence)
248		  (> (item.feat a "segment_duration") 0.300))
249;              (format t "splitting %s silence of %f at %f\n"
250;		      (item.name a)
251;                      (item.feat a "segment_duration")
252;                      (item.feat a "end"))
253              (cond
254               ((string-equal "h#" (item.feat a "p.name"))
255                (item.set_feat (item.prev a) "end"
256                               (+ 0.150 (item.feat a "p.end"))))
257               ((and (string-equal silence (item.feat a "p.name"))
258                     (string-equal silence (item.feat a "p.p.name")))
259                (item.set_feat (item.prev a) "end"
260                               (+ 0.150 (item.feat a "p.end")))
261                (item.set_feat (item.prev a) "name" silence))
262               (t
263                (item.insert a
264                             (list silence
265                                   (list
266                                    (list "end"
267				      (+ 0.150
268					(item.feat a "p.end")))))
269                             'before)))))
270     (utt.relation.items utt1 'Segment))
271
272    (utt.relation.delete utt1 'actual-segment)
273    (utt.set_feat utt1 "fileid" name)
274    ;; If we have an F0 add in targets too
275    (if (probe_file (format nil "f0/%s.f0" name))
276	(build::add_targets utt1))
277    (utt.save utt1 (format nil "festival/utts/%s.utt" name))
278    t))
279
280;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
281;;;  Some prosody modelling code
282;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
283
284(define (build::add_targets utt)
285  "(build::add_targets utt)
286Adds targets based on the F0 in f0/*.f0.  Adds a point to each vowel."
287  (let ((fileid (utt.feat utt "fileid"))
288	(f0_points))
289    (set! f0_points (build::load_f0_points fileid))
290    (set! awb_f0_points f0_points)
291    ;; Get rid of the old one
292    (utt.relation.delete utt 'Target)
293    ;; Create a new one
294    (utt.relation.create utt 'Target)
295    (build::add_target
296     utt
297     f0_points)
298    utt))
299
300(define (build::add_target utt f0_points)
301  "(build::add_target utt f0_points)
302Add F0 points at start or syllable, mid point of each vowel, and
303last segment before silence.  The F0 continued over non-voiced
304periods is such a naive and hopless way its embarrassing."
305  (let ((s (utt.relation.first utt 'Segment))
306	(f0s f0_points)
307	targ)
308    (while s
309     (if (and (not (member_string
310		    (item.name s)
311		    (cadr (car (PhoneSet.description '(silences))))))
312	      (or (string-equal "1" (item.feat s "syl_initial"))
313		  (string-equal "+" (item.feat s "ph_vc"))
314		  (member_string
315		   (item.feat s "n.name")
316		   (cadr (car (PhoneSet.description '(silences)))))))
317	 (begin
318	   (set! targ (utt.relation.append utt 'Target s))
319	   (if (string-equal "1" (item.feat s "syl_initial"))
320	       (item.relation.append_daughter
321		targ
322		'Target
323		(list
324		 "0"
325		 (list
326		  (list 'f0 (build::get_f0_at f0s (item.feat s "segment_start")))
327		  (list 'pos (item.feat s "segment_start"))))))
328	   (if (string-equal "+" (item.feat s "ph_vc"))
329	       (item.relation.append_daughter
330		targ
331		'Target
332		(list
333		 "0"
334		 (list
335		  (list 'f0 (build::get_f0_at f0s (item.feat s "segment_mid")))
336		  (list 'pos (item.feat s "segment_mid"))))))
337	   (if (member_string
338		(item.feat s "n.name")
339		(cadr (car (PhoneSet.description '(silences)))))
340	       (item.relation.append_daughter
341		targ
342		'Target
343		(list
344		 "0"
345		 (list
346		  (list 'f0 (build::get_f0_at f0s (item.feat s "segment_end")))
347		  (list 'pos (item.feat s "segment_end"))))))))
348     (set! s (item.next s))
349     ))
350)
351
352(define (build::get_f0_at f0s position)
353  "(build::get_f0_at f0s position)
354Returns the non-zero F0 nearest to position."
355  (build::get_f0_at_2
356   -1
357   f0s
358   position))
359
360(define (build::get_f0_at_2 f0 f0s position)
361  "(build::get_f0_at f0 f0s position)
362Returns the non-zero F0 nearest to position."
363  (cond
364   ((null f0s)
365    (if (> f0 0)
366	f0
367	110 ;; aint nothing there at all at all
368	))
369   (t
370    (if (> 0 (cadr (car f0s)))
371	(set! f0 (cadr (car f0s))))
372    (cond
373     ((and (>= position (car (car f0s)))
374	   (<= position (car (cadr f0s))))
375      (if (< f0 1)
376	  (build::find_first_f0 f0s)
377	  f0))
378     (t
379      (build::get_f0_at_2 f0 (cdr f0s) position))))))
380
381(define (build::find_first_f0 f0s)
382  (cond
383   ((null f0s)
384    110  ;; last resort
385    )
386   ((> (cadr (car f0s)) 0)
387    (cadr (car f0s)))
388   (t
389    (build::find_first_f0 (cdr f0s)))))
390
391(define (build::load_f0_points fileid)
392  "(build::load_f0_points fileid)
393Extract F0 as ascii times and values from the F0 file and load
394it as a simple assoc list."
395  (let ((f0asciifile (make_tmp_filename))
396	f0fd point points
397	(time 0))
398    (system
399     (format nil "$EST%s/bin/ch_track -otype ascii -o %s f0/%s.f0"
400	     "DIR"  ;; to stop that var name being mapped.
401	     f0asciifile
402	     fileid))
403    (set! f0fd (fopen f0asciifile "r"))
404    (while (not (equal? (set! point (readfp f0fd)) (eof-val)))
405      (set! points
406	    (cons
407	     (list time point) points))
408      (set! time (+ 0.005 time))
409      ;; skip the second field.
410      (readfp f0fd))
411    (fclose f0fd)
412    (delete-file f0asciifile)
413    (reverse points)))
414
415;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
416;;;  Code to try to find bad labelling by looking at duration distribution
417;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
418
419;;;  A simple sufficient statistics class
420(define (suffstats.new)
421  (list
422   0    ;; n
423   0    ;; sum
424   0    ;; sumx
425   ))
426
427(define (suffstats.set_n x n)
428  (set-car! x n))
429(define (suffstats.set_sum x sum)
430  (set-car! (cdr x) sum))
431(define (suffstats.set_sumx x sumx)
432  (set-car! (cdr (cdr x)) sumx))
433(define (suffstats.n x)
434  (car x))
435(define (suffstats.sum x)
436  (car (cdr x)))
437(define (suffstats.sumx x)
438  (car (cdr (cdr x))))
439(define (suffstats.reset x)
440  (suffstats.set_n x 0)
441  (suffstats.set_sum x 0)
442  (suffstats.set_sumx x 0))
443(define (suffstats.add x d)
444  (suffstats.set_n x (+ (suffstats.n x) 1))
445  (suffstats.set_sum x (+ (suffstats.sum x) d))
446  (suffstats.set_sumx x (+ (suffstats.sumx x) (* d d)))
447)
448
449(define (suffstats.mean x)
450  (/ (suffstats.sum x) (suffstats.n x)))
451(define (suffstats.variance x)
452  (/ (- (* (suffstats.n x) (suffstats.sumx x))
453        (* (suffstats.sum x) (suffstats.sum x)))
454     (* (suffstats.n x) (- (suffstats.n x) 1))))
455(define (suffstats.stddev x)
456  (sqrt (suffstats.variance x)))
457
458(define (cummulate_stats stats phone duration)
459  (let ((pstat (car (cdr (assoc_string phone stats))))
460	(newstats stats))
461    (if (null pstat)
462	(begin
463	  (set! pstat (suffstats.new))
464	  (set! newstats (cons (list phone pstat) stats))))
465    (suffstats.add pstat duration)
466    newstats))
467
468(define (collect_dur_stats utts)
469  (let ((stats nil))
470    (mapcar
471     (lambda (u)
472       (mapcar
473	(lambda (s)
474	  (set! stats (cummulate_stats
475		       stats
476		       (item.name s)
477		       (item.feat s "segment_duration"))))
478	(utt.relation.items u 'Segment)))
479     utts)
480    stats))
481
482(define (score_utts utts durstats ofile)
483  (let ((ofd (fopen ofile "w")))
484    (mapcar
485     (lambda (u)
486       (let ((score 0) (tot 0))
487	 (format ofd "%s " (utt.feat u "fileid"))
488	 (mapcar
489	  (lambda (s)
490	    (let ((stats (car (cdr (assoc_string (item.name s) durstats))))
491		  (dur (item.feat s "segment_duration"))
492		  (zscore))
493	      (set! tot (+ 1 tot))
494	      (set! zscore (/ (- dur (suffstats.mean stats))
495			      (suffstats.stddev stats)))
496	      (if (< zscore 0)
497		  (set! zscore (* -1 zscore)))
498	      (if (or (< dur 0.011)
499		      (> zscore 3))
500		  (set! score (+ 1 score)))))
501	  (utt.relation.items u 'Segment))
502	 (format ofd "%0.4f %d %d\n"
503		 (/ score tot)
504		 score
505		 tot)))
506     utts)))
507
508(define (make_simple_utt fileid)
509  (let ((utt (Utterance Text "")))
510    (utt.relation.load utt 'Segment
511		       (format nil "lab/%s.lab" fileid))
512    (utt.set_feat utt "fileid" fileid)
513    utt))
514
515(define (find_outlier_utts file ofile)
516  (voice_kal_diphone)
517  (let ((p (load file t))
518	utts dur_states)
519    (set! utts (mapcar (lambda (l) (make_simple_utt (car l))) p))
520    (set! dur_stats (collect_dur_stats utts))
521    (score_utts utts dur_stats ofile)
522    t))
523
524(provide 'build_clunits)
525
526