1#!/bin/sh
2###########################################################################
3##                                                                       ##
4##                   Carnegie Mellon University and                      ##
5##                   Alan W Black and Kevin A. Lenzo                     ##
6##                      Copyright (c) 1998-2000                          ##
7##                        All Rights Reserved.                           ##
8##                                                                       ##
9##  Permission is hereby granted, free of charge, to use and distribute  ##
10##  this software and its documentation without restriction, including   ##
11##  without limitation the rights to use, copy, modify, merge, publish,  ##
12##  distribute, sublicense, and/or sell copies of this work, and to      ##
13##  permit persons to whom this work is furnished to do so, subject to   ##
14##  the following conditions:                                            ##
15##   1. The code must retain the above copyright notice, this list of    ##
16##      conditions and the following disclaimer.                         ##
17##   2. Any modifications must be clearly marked as such.                ##
18##   3. Original authors' names are not deleted.                         ##
19##   4. The authors' names are not used to endorse or promote products   ##
20##      derived from this software without specific prior written        ##
21##      permission.                                                      ##
22##                                                                       ##
23##  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         ##
24##  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ##
25##  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ##
26##  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      ##
27##  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ##
28##  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ##
29##  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ##
30##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ##
31##  THIS SOFTWARE.                                                       ##
32##                                                                       ##
33###########################################################################
34##                                                                       ##
35##  Build a duration model                                               ##
36##                                                                       ##
37##  Many parameterizations are possible and training techniques, many of ##
38##  which will be better than what is here, but from experience this     ##
39##  give a model that is substantially better than simply means durations##
40##  with hand speicifed modification factors at the phrasal boundaries   ##
41##                                                                       ##
42##  Builds CART tree that predicts zscores of durations                  ##
43##                                                                       ##
44##  This is the *whole* thing, you probably want to actually do each     ##
45##  stage by hand (the training itself can takes days)                   ##
46##                                                                       ##
47###########################################################################
48
49#  if [ $# != 3 ]
50#  then
51#      echo "Build a duration model, requires basic utterances to be pre-built"
52#      echo "Usage: make_dur_model VOX PHONESET SILENCENAME"
53#      echo "INST is the insstitute building the language, e.g. cmu, cstr, ogi"
54#      echo "     if there isn't an appropriate institute use, net."
55#      echo "LANG is language identifier, e.g. japan, scotsgaelic"
56#      echo "VOX is speaker identifier e.g kal, awb"
57#      exit 1
58#  fi
59
60if [ ! "$ESTDIR" ]
61then
62   echo "environment variable ESTDIR is unset"
63   echo "set it to your local speech tools directory e.g."
64   echo '   bash$ export ESTDIR=/home/awb/projects/speech_tools/'
65   echo or
66   echo '   csh% setenv ESTDIR /home/awb/projects/speech_tools/'
67   exit 1
68fi
69
70SILENCENAME=pau
71VOICENAME='(voice_kal_diphone)'
72MODELNAME=cmu_us_kal
73
74DURMEANSTD=$ESTDIR/../festival/examples/durmeanstd
75DUMPFEATS=$ESTDIR/../festival/examples/dumpfeats
76WAGON=$ESTDIR/bin/wagon
77WAGON_TEST=$ESTDIR/bin/wagon_test
78
79## find the means and stddev for durations in database
80echo ";;; Finding mean durations and standard deviation of each phone type"
81$DURMEANSTD -output festival/dur/etc/durs.meanstd festival/utts/*.utt
82
83## extract the features
84echo ";;; Extracting features from utterances"
85$DUMPFEATS -relation Segment -eval $VOICENAME -feats festival/dur/etc/dur.feats -output festival/dur/feats/%s.feats -eval festival/dur/etc/logdurn.scm festival/utts/*.utt
86
87## Save all features in one file removing silence phones
88echo ";;; Collecting features in training and test data"
89cat festival/dur/feats/*.feats | awk '{if ($2 != "'$SILENCENAME'") print $0}' >festival/dur/data/dur.data
90bin/traintest festival/dur/data/dur.data
91bin/traintest festival/dur/data/dur.data.train
92
93# Build description file
94echo ";;; Build feature description file"
95$ESTDIR/bin/make_wagon_desc festival/dur/data/dur.data festival/dur/etc/dur.feats festival/dur/etc/dur.desc
96festival -b --heap 2000000 festvox/build_prosody.scm $VOICENAME '(build_dur_feats_desc)'
97
98# emacs festival/dur/etc/dur.desc
99
100STOP=50
101PREF=dur
102
103(
104echo ";;; Build the duration model itself"
105$WAGON -data festival/dur/data/dur.data.train.train -desc festival/dur/etc/dur.desc -test festival/dur/data/dur.data.train.test -stop $STOP -output festival/dur/tree/$PREF.S$STOP.tree -stepwise
106
107echo ";;; Test the duration model"
108$WAGON_TEST -heap 2000000 -data festival/dur/data/dur.data.test -desc festival/dur/etc/dur.desc -tree festival/dur/tree/$PREF.S$STOP.tree ) |
109tee dur.$PREF.S$STOP.out
110
111echo ";;; Constructing the duration model as a loadable scheme file"
112festival -b --heap 2000000 festvox/build_prosody.scm $VOICENAME '(finalize_dur_model "'$MODELNAME'" "'$PREF.S$STOP.tree'")'
113
114exit
115
116